X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Flod%2Flod_internal.h;h=0cc8d8ba8ae7a2c1d47a5399a3b22326b5bfcd6c;hb=c39d13d3ccae15f99eb50b90aa4a5613fb59af8f;hp=a07201fa853c89157f17d0d51e8b9613a0226b82;hpb=7ff7b6e6f32e5937bd33f184fc0026d9da756565;p=fs%2Flustre-release.git diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index a07201f..0cc8d8b 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -6,13 +6,13 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 only, * as published by the Free Software Foundation. - + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License version 2 for more details. A copy is * included in the COPYING file that accompanied this code. - + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA @@ -23,7 +23,7 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved * Use is subject to license terms. * - * Copyright (c) 2012, Intel Corporation. + * Copyright (c) 2012, 2016, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -39,12 +39,82 @@ #define _LOD_INTERNAL_H #include +#include #include #include #define LOV_USES_ASSIGNED_STRIPE 0 #define LOV_USES_DEFAULT_STRIPE 1 +/* Special values to remove LOV EA from disk */ +#define LOVEA_DELETE_VALUES(size, count, offset, pool) \ + ((size) == 0 && (count) == 0 && \ + (offset) == (typeof(offset))(-1) && (pool) == NULL) + +#define LMVEA_DELETE_VALUES(count, offset) \ + ((count) == 0 && (offset) == (typeof(offset))(-1)) + +#define LOV_OFFSET_DEFAULT ((__u16)-1) + +struct lod_qos_rr { + spinlock_t lqr_alloc; /* protect allocation index */ + __u32 lqr_start_idx; /* start index of new inode */ + __u32 lqr_offset_idx;/* aliasing for start_idx */ + int lqr_start_count;/* reseed counter */ + struct ost_pool lqr_pool; /* round-robin optimized list */ + unsigned long lqr_dirty:1; /* recalc round-robin list */ +}; + +struct pool_desc { + char pool_name[LOV_MAXPOOLNAME + 1]; + struct ost_pool pool_obds; /* pool members */ + atomic_t pool_refcount; + struct lod_qos_rr pool_rr; + struct hlist_node pool_hash; /* access by poolname */ + struct list_head pool_list; + struct proc_dir_entry *pool_proc_entry; + struct obd_device *pool_lobd; /* owner */ +}; + +#define pool_tgt_size(p) ((p)->pool_obds.op_size) +#define pool_tgt_count(p) ((p)->pool_obds.op_count) +#define pool_tgt_array(p) ((p)->pool_obds.op_array) +#define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem) + +struct lod_qos { + struct list_head lq_oss_list; + struct rw_semaphore lq_rw_sem; + __u32 lq_active_oss_count; + unsigned int lq_prio_free; /* priority for free space */ + unsigned int lq_threshold_rr;/* priority for rr */ + struct lod_qos_rr lq_rr; /* round robin qos data */ + bool lq_dirty:1, /* recalc qos data */ + lq_same_space:1,/* the ost's all have approx. + the same space avail */ + lq_reset:1; /* zero current penalties */ +}; + +struct lod_qos_oss { + struct obd_uuid lqo_uuid; /* ptlrpc's c_remote_uuid */ + struct list_head lqo_oss_list; /* link to lov_qos */ + __u64 lqo_bavail; /* total bytes avail on OSS */ + __u64 lqo_penalty; /* current penalty */ + __u64 lqo_penalty_per_obj; /* penalty decrease + every obj*/ + time_t lqo_used; /* last used time, seconds */ + __u32 lqo_ost_count; /* number of osts on this oss */ +}; + +struct ltd_qos { + struct lod_qos_oss *ltq_oss; /* oss info */ + __u64 ltq_penalty; /* current penalty */ + __u64 ltq_penalty_per_obj; /* penalty decrease + every obj*/ + __u64 ltq_weight; /* net weighting */ + time_t ltq_used; /* last used time, seconds */ + bool ltq_usable:1; /* usable for striping */ +}; + struct lod_tgt_desc { struct dt_device *ltd_tgt; struct list_head ltd_kill; @@ -54,9 +124,12 @@ struct lod_tgt_desc { __u32 ltd_index; struct ltd_qos ltd_qos; /* qos info per target */ struct obd_statfs ltd_statfs; + struct ptlrpc_thread *ltd_recovery_thread; unsigned long ltd_active:1,/* is this target up for requests */ ltd_activate:1,/* should target be activated */ - ltd_reap:1; /* should this target be deleted */ + ltd_reap:1, /* should this target be deleted */ + ltd_got_update_log:1, /* Already got update log */ + ltd_connecting:1; /* target is connecting */ }; #define TGT_PTRS 256 /* number of pointers at 1st level */ @@ -71,15 +144,16 @@ struct lod_tgt_desc_idx { TGT_PTRS_PER_BLOCK]->ldi_tgt[(index) % TGT_PTRS_PER_BLOCK]) #define OST_TGT(lod, index) LTD_TGT(&lod->lod_ost_descs, index) +#define MDT_TGT(lod, index) LTD_TGT(&lod->lod_mdt_descs, index) struct lod_tgt_descs { /* list of known TGTs */ struct lod_tgt_desc_idx *ltd_tgt_idx[TGT_PTRS]; /* Size of the lod_tgts array, granted to be a power of 2 */ __u32 ltd_tgts_size; /* number of registered TGTs */ - int ltd_tgtnr; + __u32 ltd_tgtnr; /* bitmap of TGTs available */ - cfs_bitmap_t *ltd_tgt_bitmap; + struct cfs_bitmap *ltd_tgt_bitmap; /* TGTs scheduled to be deleted */ __u32 ltd_death_row; /* Table refcount used for delayed deletion */ @@ -94,24 +168,29 @@ struct lod_device { struct dt_device lod_dt_dev; struct obd_export *lod_child_exp; struct dt_device *lod_child; - cfs_proc_dir_entry_t *lod_proc_entry; + struct proc_dir_entry *lod_proc_entry; struct lprocfs_stats *lod_stats; spinlock_t lod_connects_lock; int lod_connects; unsigned int lod_recovery_completed:1, - lod_initialized:1; + lod_initialized:1, + lod_lmv_failout:1, + lod_child_got_update_log:1; /* lov settings descriptor storing static information */ struct lov_desc lod_desc; - /* use to protect ld_active_tgt_count and all ltd_active */ - spinlock_t lod_desc_lock; + /* protect ld_active_tgt_count, ltd_active and lod_md_root */ + spinlock_t lod_lock; /* Description of OST */ struct lod_tgt_descs lod_ost_descs; /* Description of MDT */ struct lod_tgt_descs lod_mdt_descs; + /* Recovery thread for lod_child */ + struct ptlrpc_thread lod_child_recovery_thread; + /* maximum EA size underlied OSD may have */ unsigned int lod_osd_max_easize; @@ -119,18 +198,21 @@ struct lod_device { * structure should be moved to lod_tgt_descs as well. */ /* QoS info per LOD */ - struct lov_qos lod_qos; /* qos info per lod */ + struct lod_qos lod_qos; /* qos info per lod */ /* OST pool data */ - struct ost_pool lod_pool_info; /* all OSTs in a packed array */ - int lod_pool_count; - cfs_hash_t *lod_pools_hash_body; /* used for key access */ - cfs_list_t lod_pool_list; /* used for sequential access */ - cfs_proc_dir_entry_t *lod_pool_proc_entry; + struct ost_pool lod_pool_info; /* all OSTs in a packed array */ + int lod_pool_count; + struct cfs_hash *lod_pools_hash_body; /* used for key access */ + struct list_head lod_pool_list; /* used for sequential access */ + struct proc_dir_entry *lod_pool_proc_entry; enum lustre_sec_part lod_sp_me; - cfs_proc_dir_entry_t *lod_symlink; + struct proc_dir_entry *lod_symlink; + + /* ROOT object, used to fetch FS default striping */ + struct lod_object *lod_md_root; }; #define lod_osts lod_ost_descs.ltd_tgts @@ -140,47 +222,117 @@ struct lod_device { #define ltd_ost ltd_tgt #define lod_ost_desc lod_tgt_desc -/* - * XXX: shrink this structure, currently it's 72bytes on 32bit arch, - * so, slab will be allocating 128bytes - */ +#define lod_mdts lod_mdt_descs.ltd_tgts +#define lod_mdt_bitmap lod_mdt_descs.ltd_tgt_bitmap +#define lod_remote_mdt_count lod_mdt_descs.ltd_tgtnr +#define lod_mdts_size lod_mdt_descs.ltd_tgts_size +#define ltd_mdt ltd_tgt +#define lod_mdt_desc lod_tgt_desc + +struct lod_default_striping { + /* default LOV */ + __u32 lds_def_stripe_size; + __u16 lds_def_stripenr; + __u16 lds_def_stripe_offset; + char lds_def_pool[LOV_MAXPOOLNAME + 1]; + /* default LMV */ + __u32 lds_dir_def_stripenr; + __u32 lds_dir_def_stripe_offset; + __u32 lds_dir_def_hash_type; + /* flags whether default striping is set */ + __u32 lds_def_striping_set:1, + lds_dir_def_striping_set:1; +}; + struct lod_object { - struct dt_object ldo_obj; - - /* if object is striped, then the next fields describe stripes */ - __u16 ldo_stripenr; - __u16 ldo_layout_gen; - __u32 ldo_stripe_size; - char *ldo_pool; - struct dt_object **ldo_stripe; - /* to know how much memory to free, ldo_stripenr can be less */ - int ldo_stripes_allocated; - /* default striping for directory represented by this object - * is cached in stripenr/stripe_size */ - int ldo_striping_cached:1, - ldo_def_striping_set:1; - __u32 ldo_def_stripe_size; - __u16 ldo_def_stripenr; - __u16 ldo_def_stripe_offset; - mdsno_t ldo_mds_num; + struct dt_object ldo_obj; + union { + /* file stripe */ + struct { + /* + * don't change field order, because both file and + * directory use ldo_stripenr/ldo_stripes_allocated + * to access stripe number. + */ + __u16 ldo_stripenr; + __u16 ldo_stripes_allocated; + __u16 ldo_layout_gen; + __u16 ldo_released_stripenr; + __u32 ldo_pattern; + __u32 ldo_stripe_size; + __u16 ldo_stripe_offset; + char *ldo_pool; + }; + /* directory stripe */ + struct { + __u16 ldo_dir_stripenr; + __u16 ldo_dir_stripes_allocated; + __u32 ldo_dir_stripe_offset; + __u32 ldo_dir_hash_type; + __u32 ldo_dir_slave_stripe:1, + ldo_dir_striped:1; + /* + * default striping is not cached, so this field is + * invalid after create, make sure it's used by + * lod_dir_striping_create_internal() only. + */ + struct lod_default_striping *ldo_def_striping; + }; + }; + struct dt_object **ldo_stripe; }; +static inline int lod_object_set_pool(struct lod_object *lo, const char *pool) +{ + int len; + + if (lo->ldo_pool != NULL) { + len = strlen(lo->ldo_pool) + 1; + OBD_FREE(lo->ldo_pool, len); + lo->ldo_pool = NULL; + } + if (pool != NULL) { + len = strlen(pool) + 1; + OBD_ALLOC(lo->ldo_pool, len); + if (lo->ldo_pool == NULL) + return -ENOMEM; + strlcpy(lo->ldo_pool, pool, len); + } + return 0; +} struct lod_it { struct dt_object *lit_obj; /* object from the layer below */ + /* stripe offset of iteration */ + __u32 lit_stripe_index; + __u32 lit_attr; struct dt_it *lit_it; /* iterator from the layer below */ }; struct lod_thread_info { - /* per-thread buffer for LOV EA */ - void *lti_ea_store; - int lti_ea_store_size; - struct lu_buf lti_buf; - struct ost_id lti_ostid; - struct lu_fid lti_fid; - struct obd_statfs lti_osfs; - struct lu_attr lti_attr; - struct lod_it lti_it; + /* per-thread buffer for LOV EA, may be vmalloc'd */ + void *lti_ea_store; + __u32 lti_ea_store_size; + /* per-thread buffer for LMV EA */ + struct lu_buf lti_buf; + struct ost_id lti_ostid; + struct lu_fid lti_fid; + struct obd_statfs lti_osfs; + struct lu_attr lti_attr; + struct lod_it lti_it; + struct ldlm_res_id lti_res_id; + /* used to hold lu_dirent, sizeof(struct lu_dirent) + NAME_MAX */ + char lti_key[sizeof(struct lu_dirent) + + NAME_MAX]; + struct dt_object_format lti_format; + struct lu_name lti_name; + struct lu_buf lti_linkea_buf; + struct dt_insert_rec lti_dt_rec; + struct llog_catid lti_cid; + struct llog_cookie lti_cookie; + struct lustre_cfg lti_lustre_cfg; + /* used to store parent default striping in create */ + struct lod_default_striping lti_def_striping; }; extern const struct lu_device_operations lod_lu_ops; @@ -240,18 +392,6 @@ static inline struct dt_object* lod_object_child(struct lod_object *o) struct dt_object, do_lu); } -static inline struct dt_object *lu2dt_obj(struct lu_object *o) -{ - LASSERT(ergo(o != NULL, lu_device_is_dt(o->lo_dev))); - return container_of0(o, struct dt_object, do_lu); -} - -static inline struct dt_object *dt_object_child(struct dt_object *o) -{ - return container_of0(lu_object_next(&(o)->do_lu), - struct dt_object, do_lu); -} - extern struct lu_context_key lod_thread_key; static inline struct lod_thread_info *lod_env_info(const struct lu_env *env) @@ -262,24 +402,69 @@ static inline struct lod_thread_info *lod_env_info(const struct lu_env *env) return info; } +static inline struct lu_name * +lod_name_get(const struct lu_env *env, const void *area, int len) +{ + struct lu_name *lname; + + lname = &lod_env_info(env)->lti_name; + lname->ln_name = area; + lname->ln_namelen = len; + return lname; +} + #define lod_foreach_ost(__dev, index) \ if ((__dev)->lod_osts_size > 0) \ cfs_foreach_bit((__dev)->lod_ost_bitmap, (index)) +#define lod_foreach_mdt(mdt_dev, index) \ + cfs_foreach_bit((mdt_dev)->lod_mdt_bitmap, (index)) + /* lod_dev.c */ +extern struct kmem_cache *lod_object_kmem; int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, - const struct lu_fid *fid, mdsno_t *tgt, int flags); + const struct lu_fid *fid, __u32 *tgt, int *flags); +int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod, + struct dt_device *dt); +void lod_sub_fini_llog(const struct lu_env *env, + struct dt_device *dt, struct ptlrpc_thread *thread); +int lodname2mdt_index(char *lodname, __u32 *mdt_index); +extern void target_recovery_fini(struct obd_device *obd); + /* lod_lov.c */ void lod_getref(struct lod_tgt_descs *ltd); void lod_putref(struct lod_device *lod, struct lod_tgt_descs *ltd); -int lod_add_device(const struct lu_env *env, struct lod_device *m, - char *osp, unsigned index, unsigned gen, int active); +int lod_add_device(const struct lu_env *env, struct lod_device *lod, + char *osp, unsigned index, unsigned gen, int mdt_index, + char *type, int active); int lod_del_device(const struct lu_env *env, struct lod_device *lod, struct lod_tgt_descs *ltd, char *osp, unsigned idx, - unsigned gen); -int lod_fini_tgt(struct lod_device *lod, struct lod_tgt_descs *ltd); -int lod_load_striping(const struct lu_env *env, struct lod_object *mo); -int lod_get_lov_ea(const struct lu_env *env, struct lod_object *mo); + unsigned gen, bool for_ost); +int lod_fini_tgt(const struct lu_env *env, struct lod_device *lod, + struct lod_tgt_descs *ltd, bool for_ost); +int lod_load_striping_locked(const struct lu_env *env, struct lod_object *lo); +int lod_load_striping(const struct lu_env *env, struct lod_object *lo); + +int lod_get_ea(const struct lu_env *env, struct lod_object *lo, + const char *name); +static inline int +lod_get_lov_ea(const struct lu_env *env, struct lod_object *lo) +{ + return lod_get_ea(env, lo, XATTR_NAME_LOV); +} + +static inline int +lod_get_lmv_ea(const struct lu_env *env, struct lod_object *lo) +{ + return lod_get_ea(env, lo, XATTR_NAME_LMV); +} + +static inline int +lod_get_default_lmv_ea(const struct lu_env *env, struct lod_object *lo) +{ + return lod_get_ea(env, lo, XATTR_NAME_DEFAULT_LMV); +} + void lod_fix_desc(struct lov_desc *desc); void lod_fix_desc_qos_maxage(__u32 *val); void lod_fix_desc_pattern(__u32 *val); @@ -289,14 +474,15 @@ int lod_pools_init(struct lod_device *m, struct lustre_cfg *cfg); int lod_pools_fini(struct lod_device *m); int lod_parse_striping(const struct lu_env *env, struct lod_object *mo, const struct lu_buf *buf); +int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo, + const struct lu_buf *buf); int lod_initialize_objects(const struct lu_env *env, struct lod_object *mo, struct lov_ost_data_v1 *objs); -int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt, - struct thandle *th); -int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, int specific); +int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, + bool is_from_disk); int lod_generate_and_set_lovea(const struct lu_env *env, struct lod_object *mo, struct thandle *th); - +int lod_ea_store_resize(struct lod_thread_info *info, size_t size); /* lod_pool.c */ int lod_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count); int lod_ost_pool_remove(struct ost_pool *op, __u32 idx); @@ -306,7 +492,7 @@ void lod_pool_putref(struct pool_desc *pool); int lod_ost_pool_free(struct ost_pool *op); int lod_pool_del(struct obd_device *obd, char *poolname); int lod_ost_pool_init(struct ost_pool *op, unsigned int count); -extern cfs_hash_ops_t pool_hash_operations; +extern struct cfs_hash_ops pool_hash_operations; int lod_check_index_in_pool(__u32 idx, struct pool_desc *pool); int lod_pool_new(struct obd_device *obd, char *poolname); int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname); @@ -318,14 +504,17 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo, struct thandle *th); int qos_add_tgt(struct lod_device*, struct lod_tgt_desc *); int qos_del_tgt(struct lod_device *, struct lod_tgt_desc *); +void lod_qos_rr_init(struct lod_qos_rr *lqr); /* lproc_lod.c */ -void lprocfs_lod_init_vars(struct lprocfs_static_vars *lvars); int lod_procfs_init(struct lod_device *lod); void lod_procfs_fini(struct lod_device *lod); /* lod_object.c */ -int lod_object_set_pool(struct lod_object *o, char *pool); +extern struct dt_object_operations lod_obj_ops; +extern struct lu_object_operations lod_lu_obj_ops; +int lod_load_lmv_shards(const struct lu_env *env, struct lod_object *lo, + struct lu_buf *buf, bool resize); int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr, const struct lu_buf *lovea, struct thandle *th); @@ -334,5 +523,90 @@ int lod_striping_create(const struct lu_env *env, struct dt_object *dt, struct thandle *th); void lod_object_free_striping(const struct lu_env *env, struct lod_object *lo); +/* lod_sub_object.c */ +struct thandle *lod_sub_get_thandle(const struct lu_env *env, + struct thandle *th, + const struct dt_object *sub_obj, + bool *record_update); +int lod_sub_object_declare_create(const struct lu_env *env, + struct dt_object *dt, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct dt_object_format *dof, + struct thandle *th); +int lod_sub_object_create(const struct lu_env *env, struct dt_object *dt, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct dt_object_format *dof, + struct thandle *th); +int lod_sub_object_declare_ref_add(const struct lu_env *env, + struct dt_object *dt, + struct thandle *th); +int lod_sub_object_ref_add(const struct lu_env *env, struct dt_object *dt, + struct thandle *th); +int lod_sub_object_declare_ref_del(const struct lu_env *env, + struct dt_object *dt, + struct thandle *th); +int lod_sub_object_ref_del(const struct lu_env *env, struct dt_object *dt, + struct thandle *th); +int lod_sub_object_declare_destroy(const struct lu_env *env, + struct dt_object *dt, + struct thandle *th); +int lod_sub_object_destroy(const struct lu_env *env, struct dt_object *dt, + struct thandle *th); +int lod_sub_object_declare_insert(const struct lu_env *env, + struct dt_object *dt, + const struct dt_rec *rec, + const struct dt_key *key, + struct thandle *th); +int lod_sub_object_index_insert(const struct lu_env *env, struct dt_object *dt, + const struct dt_rec *rec, + const struct dt_key *key, struct thandle *th, + int ign); +int lod_sub_object_declare_delete(const struct lu_env *env, + struct dt_object *dt, + const struct dt_key *key, + struct thandle *th); +int lod_sub_object_delete(const struct lu_env *env, struct dt_object *dt, + const struct dt_key *name, struct thandle *th); +int lod_sub_object_declare_xattr_set(const struct lu_env *env, + struct dt_object *dt, + const struct lu_buf *buf, + const char *name, int fl, + struct thandle *th); +int lod_sub_object_xattr_set(const struct lu_env *env, struct dt_object *dt, + const struct lu_buf *buf, const char *name, int fl, + struct thandle *th); +int lod_sub_object_declare_attr_set(const struct lu_env *env, + struct dt_object *dt, + const struct lu_attr *attr, + struct thandle *th); +int lod_sub_object_attr_set(const struct lu_env *env, + struct dt_object *dt, + const struct lu_attr *attr, + struct thandle *th); +int lod_sub_object_declare_xattr_del(const struct lu_env *env, + struct dt_object *dt, + const char *name, + struct thandle *th); +int lod_sub_object_xattr_del(const struct lu_env *env, + struct dt_object *dt, + const char *name, + struct thandle *th); +int lod_sub_object_declare_write(const struct lu_env *env, + struct dt_object *dt, + const struct lu_buf *buf, loff_t pos, + struct thandle *th); +ssize_t lod_sub_object_write(const struct lu_env *env, struct dt_object *dt, + const struct lu_buf *buf, loff_t *pos, + struct thandle *th, int rq); +int lod_sub_object_declare_punch(const struct lu_env *env, + struct dt_object *dt, + __u64 start, __u64 end, + struct thandle *th); +int lod_sub_object_punch(const struct lu_env *env, struct dt_object *dt, + __u64 start, __u64 end, struct thandle *th); + +int lod_sub_prep_llog(const struct lu_env *env, struct lod_device *lod, + struct dt_device *dt, int index); #endif -