X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flod%2Flod_internal.h;h=7d9b95fc51d2d9005b00952ea1ec3e7fbbd555c8;hp=a27110112512005837672e0bdeb05d988c98bf18;hb=de2d5808bd2987f76d2486272e1a9c192ba277d4;hpb=c7f2e70a27e872ddf61f254fd6d9b299d9745aa6 diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index a271101..7d9b95f 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -22,6 +22,8 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved * Use is subject to license terms. + * + * Copyright (c) 2012, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -43,8 +45,66 @@ #define LOV_USES_ASSIGNED_STRIPE 0 #define LOV_USES_DEFAULT_STRIPE 1 -struct lod_ost_desc { - struct dt_device *ltd_ost; +struct lod_qos_rr { + __u32 lqr_start_idx; /* start index of new inode */ + __u32 lqr_offset_idx; /* aliasing for start_idx */ + int lqr_start_count; /* reseed counter */ + struct ost_pool lqr_pool; /* round-robin optimized list */ + unsigned long lqr_dirty:1; /* recalc round-robin list */ +}; + +struct pool_desc { + char pool_name[LOV_MAXPOOLNAME + 1]; + struct ost_pool pool_obds; /* pool members */ + atomic_t pool_refcount; + struct lod_qos_rr pool_rr; + cfs_hlist_node_t pool_hash; /* access by poolname */ + struct list_head pool_list; + struct proc_dir_entry *pool_proc_entry; + struct obd_device *pool_lobd; /* owner */ +}; + +#define pool_tgt_size(p) ((p)->pool_obds.op_size) +#define pool_tgt_count(p) ((p)->pool_obds.op_count) +#define pool_tgt_array(p) ((p)->pool_obds.op_array) +#define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem) + +struct lod_qos { + struct list_head lq_oss_list; + struct rw_semaphore lq_rw_sem; + __u32 lq_active_oss_count; + unsigned int lq_prio_free; /* priority for free space */ + unsigned int lq_threshold_rr;/* priority for rr */ + struct lod_qos_rr lq_rr; /* round robin qos data */ + bool lq_dirty:1, /* recalc qos data */ + lq_same_space:1,/* the ost's all have approx. + the same space avail */ + lq_reset:1; /* zero current penalties */ +}; + +struct lod_qos_oss { + struct obd_uuid lqo_uuid; /* ptlrpc's c_remote_uuid */ + struct list_head lqo_oss_list; /* link to lov_qos */ + __u64 lqo_bavail; /* total bytes avail on OSS */ + __u64 lqo_penalty; /* current penalty */ + __u64 lqo_penalty_per_obj; /* penalty decrease + every obj*/ + time_t lqo_used; /* last used time, seconds */ + __u32 lqo_ost_count; /* number of osts on this oss */ +}; + +struct ltd_qos { + struct lod_qos_oss *ltq_oss; /* oss info */ + __u64 ltq_penalty; /* current penalty */ + __u64 ltq_penalty_per_obj; /* penalty decrease + every obj*/ + __u64 ltq_weight; /* net weighting */ + time_t ltq_used; /* last used time, seconds */ + bool ltq_usable:1; /* usable for striping */ +}; + +struct lod_tgt_desc { + struct dt_device *ltd_tgt; struct list_head ltd_kill; struct obd_export *ltd_exp; struct obd_uuid ltd_uuid; @@ -57,16 +117,37 @@ struct lod_ost_desc { ltd_reap:1; /* should this target be deleted */ }; -#define OST_PTRS 256 /* number of pointers at 1st level */ -#define OST_PTRS_PER_BLOCK 256 /* number of pointers at 2nd level */ +#define TGT_PTRS 256 /* number of pointers at 1st level */ +#define TGT_PTRS_PER_BLOCK 256 /* number of pointers at 2nd level */ -struct lod_ost_desc_idx { - struct lod_ost_desc *ldi_ost[OST_PTRS_PER_BLOCK]; +struct lod_tgt_desc_idx { + struct lod_tgt_desc *ldi_tgt[TGT_PTRS_PER_BLOCK]; }; -#define OST_TGT(dev,index) \ - ((dev)->lod_ost_idx[(index) / \ - OST_PTRS_PER_BLOCK]->ldi_ost[(index)%OST_PTRS_PER_BLOCK]) +#define LTD_TGT(ltd, index) \ + ((ltd)->ltd_tgt_idx[(index) / \ + TGT_PTRS_PER_BLOCK]->ldi_tgt[(index) % TGT_PTRS_PER_BLOCK]) + +#define OST_TGT(lod, index) LTD_TGT(&lod->lod_ost_descs, index) +#define MDT_TGT(lod, index) LTD_TGT(&lod->lod_mdt_descs, index) +struct lod_tgt_descs { + /* list of known TGTs */ + struct lod_tgt_desc_idx *ltd_tgt_idx[TGT_PTRS]; + /* Size of the lod_tgts array, granted to be a power of 2 */ + __u32 ltd_tgts_size; + /* number of registered TGTs */ + int ltd_tgtnr; + /* bitmap of TGTs available */ + cfs_bitmap_t *ltd_tgt_bitmap; + /* TGTs scheduled to be deleted */ + __u32 ltd_death_row; + /* Table refcount used for delayed deletion */ + int ltd_refcount; + /* mutex to serialize concurrent updates to the tgt table */ + struct mutex ltd_mutex; + /* read/write semaphore used for array relocation */ + struct rw_semaphore ltd_rw_sem; +}; struct lod_device { struct dt_device lod_dt_dev; @@ -74,39 +155,31 @@ struct lod_device { struct dt_device *lod_child; cfs_proc_dir_entry_t *lod_proc_entry; struct lprocfs_stats *lod_stats; + spinlock_t lod_connects_lock; int lod_connects; - int lod_recovery_completed; + unsigned int lod_recovery_completed:1, + lod_initialized:1, + lod_lmv_failout:1; /* lov settings descriptor storing static information */ struct lov_desc lod_desc; /* use to protect ld_active_tgt_count and all ltd_active */ - cfs_spinlock_t lod_desc_lock; - - /* list of known OSTs */ - struct lod_ost_desc_idx *lod_ost_idx[OST_PTRS]; + spinlock_t lod_desc_lock; - /* Size of the lod_osts array, granted to be a power of 2 */ - __u32 lod_osts_size; - /* number of registered OSTs */ - int lod_ostnr; - /* OSTs scheduled to be deleted */ - __u32 lod_death_row; - /* bitmap of OSTs available */ - cfs_bitmap_t *lod_ost_bitmap; + /* Description of OST */ + struct lod_tgt_descs lod_ost_descs; + /* Description of MDT */ + struct lod_tgt_descs lod_mdt_descs; /* maximum EA size underlied OSD may have */ unsigned int lod_osd_max_easize; - /* Table refcount used for delayed deletion */ - int lod_refcount; - /* mutex to serialize concurrent updates to the ost table */ - cfs_mutex_t lod_mutex; - /* read/write semaphore used for array relocation */ - cfs_rw_semaphore_t lod_rw_sem; - + /*FIXME: When QOS and pool is implemented for MDT, probably these + * structure should be moved to lod_tgt_descs as well. + */ /* QoS info per LOD */ - struct lov_qos lod_qos; /* qos info per lod */ + struct lod_qos lod_qos; /* qos info per lod */ /* OST pool data */ struct ost_pool lod_pool_info; /* all OSTs in a packed array */ @@ -120,6 +193,32 @@ struct lod_device { cfs_proc_dir_entry_t *lod_symlink; }; +#define lod_osts lod_ost_descs.ltd_tgts +#define lod_ost_bitmap lod_ost_descs.ltd_tgt_bitmap +#define lod_ostnr lod_ost_descs.ltd_tgtnr +#define lod_osts_size lod_ost_descs.ltd_tgts_size +#define ltd_ost ltd_tgt +#define lod_ost_desc lod_tgt_desc + +#define lod_mdts lod_mdt_descs.ltd_tgts +#define lod_mdt_bitmap lod_mdt_descs.ltd_tgt_bitmap +#define lod_remote_mdt_count lod_mdt_descs.ltd_tgtnr +#define lod_mdts_size lod_mdt_descs.ltd_tgts_size +#define ltd_mdt ltd_tgt +#define lod_mdt_desc lod_tgt_desc + +struct lod_dir_stripe_info { + __u32 ldsi_stripe_offset; + __u32 ldsi_def_stripenr; + __u32 ldsi_def_stripe_offset; + __u32 ldsi_def_hash_type; + __u32 ldsi_hash_type; + + unsigned int ldsi_striping_cached:1, + ldsi_def_striping_set:1, + ldsi_striped:1; +}; + /* * XXX: shrink this structure, currently it's 72bytes on 32bit arch, * so, slab will be allocating 128bytes @@ -128,32 +227,63 @@ struct lod_object { struct dt_object ldo_obj; /* if object is striped, then the next fields describe stripes */ + /* For striped directory, ldo_stripenr == slave stripe count */ __u16 ldo_stripenr; __u16 ldo_layout_gen; __u32 ldo_stripe_size; + __u32 ldo_pattern; + __u16 ldo_released_stripenr; char *ldo_pool; struct dt_object **ldo_stripe; /* to know how much memory to free, ldo_stripenr can be less */ - int ldo_stripes_allocated; /* default striping for directory represented by this object * is cached in stripenr/stripe_size */ - int ldo_striping_cached:1; - int ldo_def_striping_set:1; + unsigned int ldo_stripes_allocated:16, + ldo_striping_cached:1, + ldo_def_striping_set:1, + /* ldo_dir_slave_stripe indicate this is a slave stripe of + * a striped dir */ + ldo_dir_slave_stripe:1; __u32 ldo_def_stripe_size; __u16 ldo_def_stripenr; __u16 ldo_def_stripe_offset; + struct lod_dir_stripe_info *ldo_dir_stripe; }; +#define ldo_dir_stripe_offset ldo_dir_stripe->ldsi_stripe_offset +#define ldo_dir_def_stripenr ldo_dir_stripe->ldsi_def_stripenr +#define ldo_dir_hash_type ldo_dir_stripe->ldsi_hash_type +#define ldo_dir_def_hash_type ldo_dir_stripe->ldsi_def_hash_type +#define ldo_dir_striping_cached ldo_dir_stripe->ldsi_striping_cached +#define ldo_dir_striped ldo_dir_stripe->ldsi_striped +#define ldo_dir_def_striping_set ldo_dir_stripe->ldsi_def_striping_set +#define ldo_dir_def_stripe_offset ldo_dir_stripe->ldsi_def_stripe_offset + +struct lod_it { + struct dt_object *lit_obj; /* object from the layer below */ + /* stripe offset of iteration */ + __u32 lit_stripe_index; + __u32 lit_attr; + struct dt_it *lit_it; /* iterator from the layer below */ +}; struct lod_thread_info { /* per-thread buffer for LOV EA */ void *lti_ea_store; int lti_ea_store_size; + /* per-thread buffer for LMV EA */ struct lu_buf lti_buf; struct ost_id lti_ostid; struct lu_fid lti_fid; struct obd_statfs lti_osfs; struct lu_attr lti_attr; + struct lod_it lti_it; + struct ldlm_res_id lti_res_id; + /* used to hold lu_dirent, sizeof(struct lu_dirent) + NAME_MAX */ + char lti_key[sizeof(struct lu_dirent) + NAME_MAX]; + struct dt_object_format lti_format; + struct lu_name lti_name; + struct lu_buf lti_linkea_buf; }; extern const struct lu_device_operations lod_lu_ops; @@ -213,12 +343,6 @@ static inline struct dt_object* lod_object_child(struct lod_object *o) struct dt_object, do_lu); } -static inline struct dt_object *lu2dt_obj(struct lu_object *o) -{ - LASSERT(ergo(o != NULL, lu_device_is_dt(o->lo_dev))); - return container_of0(o, struct dt_object, do_lu); -} - static inline struct dt_object *dt_object_child(struct dt_object *o) { return container_of0(lu_object_next(&(o)->do_lu), @@ -235,19 +359,58 @@ static inline struct lod_thread_info *lod_env_info(const struct lu_env *env) return info; } +static inline struct lu_name * +lod_name_get(const struct lu_env *env, const void *area, int len) +{ + struct lu_name *lname; + + lname = &lod_env_info(env)->lti_name; + lname->ln_name = area; + lname->ln_namelen = len; + return lname; +} + #define lod_foreach_ost(__dev, index) \ if ((__dev)->lod_osts_size > 0) \ cfs_foreach_bit((__dev)->lod_ost_bitmap, (index)) +/* lod_dev.c */ +int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, + const struct lu_fid *fid, __u32 *tgt, int *flags); /* lod_lov.c */ -void lod_getref(struct lod_device *lod); -void lod_putref(struct lod_device *lod); -int lod_add_device(const struct lu_env *env, struct lod_device *m, - char *osp, unsigned index, unsigned gen, int active); -int lod_del_device(const struct lu_env *env, struct lod_device *m, - char *osp, unsigned index, unsigned gen); -int lod_load_striping(const struct lu_env *env, struct lod_object *mo); -int lod_get_lov_ea(const struct lu_env *env, struct lod_object *mo); +void lod_getref(struct lod_tgt_descs *ltd); +void lod_putref(struct lod_device *lod, struct lod_tgt_descs *ltd); +int lod_add_device(const struct lu_env *env, struct lod_device *lod, + char *osp, unsigned index, unsigned gen, int mdt_index, + char *type, int active); +int lod_del_device(const struct lu_env *env, struct lod_device *lod, + struct lod_tgt_descs *ltd, char *osp, unsigned idx, + unsigned gen, bool for_ost); +int lod_fini_tgt(const struct lu_env *env, struct lod_device *lod, + struct lod_tgt_descs *ltd, bool for_ost); +int lod_load_striping_locked(const struct lu_env *env, struct lod_object *lo); +int lod_load_striping(const struct lu_env *env, struct lod_object *lo); + +int lod_get_ea(const struct lu_env *env, struct lod_object *lo, + const char *name); +static inline int +lod_get_lov_ea(const struct lu_env *env, struct lod_object *lo) +{ + return lod_get_ea(env, lo, XATTR_NAME_LOV); +} + +static inline int +lod_get_lmv_ea(const struct lu_env *env, struct lod_object *lo) +{ + return lod_get_ea(env, lo, XATTR_NAME_LMV); +} + +static inline int +lod_get_default_lmv_ea(const struct lu_env *env, struct lod_object *lo) +{ + return lod_get_ea(env, lo, XATTR_NAME_DEFAULT_LMV); +} + void lod_fix_desc(struct lov_desc *desc); void lod_fix_desc_qos_maxage(__u32 *val); void lod_fix_desc_pattern(__u32 *val); @@ -257,14 +420,17 @@ int lod_pools_init(struct lod_device *m, struct lustre_cfg *cfg); int lod_pools_fini(struct lod_device *m); int lod_parse_striping(const struct lu_env *env, struct lod_object *mo, const struct lu_buf *buf); +int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo, + const struct lu_buf *buf); int lod_initialize_objects(const struct lu_env *env, struct lod_object *mo, struct lov_ost_data_v1 *objs); int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt, struct thandle *th); -int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, int specific); +int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, + bool is_from_disk); int lod_generate_and_set_lovea(const struct lu_env *env, struct lod_object *mo, struct thandle *th); - +int lod_ea_store_resize(struct lod_thread_info *info, int size); /* lod_pool.c */ int lod_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count); int lod_ost_pool_remove(struct ost_pool *op, __u32 idx); @@ -284,14 +450,16 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname); int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo, struct lu_attr *attr, const struct lu_buf *buf, struct thandle *th); -int qos_add_tgt(struct lod_device*, struct lod_ost_desc *); -int qos_del_tgt(struct lod_device *, struct lod_ost_desc *); +int qos_add_tgt(struct lod_device*, struct lod_tgt_desc *); +int qos_del_tgt(struct lod_device *, struct lod_tgt_desc *); /* lproc_lod.c */ -extern struct file_operations lod_proc_target_fops; -void lprocfs_lod_init_vars(struct lprocfs_static_vars *lvars); +int lod_procfs_init(struct lod_device *lod); +void lod_procfs_fini(struct lod_device *lod); /* lod_object.c */ +int lod_load_lmv_shards(const struct lu_env *env, struct lod_object *lo, + struct lu_buf *buf, bool resize); int lod_object_set_pool(struct lod_object *o, char *pool); int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr,