/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved
* Use is subject to license terms.
+ *
+ * Copyright (c) 2012, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define LOV_USES_ASSIGNED_STRIPE 0
#define LOV_USES_DEFAULT_STRIPE 1
-struct lod_ost_desc {
- struct dt_device *ltd_ost;
+struct lod_qos_rr {
+ __u32 lqr_start_idx; /* start index of new inode */
+ __u32 lqr_offset_idx; /* aliasing for start_idx */
+ int lqr_start_count; /* reseed counter */
+ struct ost_pool lqr_pool; /* round-robin optimized list */
+ unsigned long lqr_dirty:1; /* recalc round-robin list */
+};
+
+struct pool_desc {
+ char pool_name[LOV_MAXPOOLNAME + 1];
+ struct ost_pool pool_obds; /* pool members */
+ atomic_t pool_refcount;
+ struct lod_qos_rr pool_rr;
+ cfs_hlist_node_t pool_hash; /* access by poolname */
+ struct list_head pool_list;
+ struct proc_dir_entry *pool_proc_entry;
+ struct obd_device *pool_lobd; /* owner */
+};
+
+#define pool_tgt_size(p) ((p)->pool_obds.op_size)
+#define pool_tgt_count(p) ((p)->pool_obds.op_count)
+#define pool_tgt_array(p) ((p)->pool_obds.op_array)
+#define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem)
+
+struct lod_qos {
+ struct list_head lq_oss_list;
+ struct rw_semaphore lq_rw_sem;
+ __u32 lq_active_oss_count;
+ unsigned int lq_prio_free; /* priority for free space */
+ unsigned int lq_threshold_rr;/* priority for rr */
+ struct lod_qos_rr lq_rr; /* round robin qos data */
+ bool lq_dirty:1, /* recalc qos data */
+ lq_same_space:1,/* the ost's all have approx.
+ the same space avail */
+ lq_reset:1; /* zero current penalties */
+};
+
+struct lod_qos_oss {
+ struct obd_uuid lqo_uuid; /* ptlrpc's c_remote_uuid */
+ struct list_head lqo_oss_list; /* link to lov_qos */
+ __u64 lqo_bavail; /* total bytes avail on OSS */
+ __u64 lqo_penalty; /* current penalty */
+ __u64 lqo_penalty_per_obj; /* penalty decrease
+ every obj*/
+ time_t lqo_used; /* last used time, seconds */
+ __u32 lqo_ost_count; /* number of osts on this oss */
+};
+
+struct ltd_qos {
+ struct lod_qos_oss *ltq_oss; /* oss info */
+ __u64 ltq_penalty; /* current penalty */
+ __u64 ltq_penalty_per_obj; /* penalty decrease
+ every obj*/
+ __u64 ltq_weight; /* net weighting */
+ time_t ltq_used; /* last used time, seconds */
+ bool ltq_usable:1; /* usable for striping */
+};
+
+struct lod_tgt_desc {
+ struct dt_device *ltd_tgt;
struct list_head ltd_kill;
struct obd_export *ltd_exp;
struct obd_uuid ltd_uuid;
ltd_reap:1; /* should this target be deleted */
};
-#define OST_PTRS 256 /* number of pointers at 1st level */
-#define OST_PTRS_PER_BLOCK 256 /* number of pointers at 2nd level */
+#define TGT_PTRS 256 /* number of pointers at 1st level */
+#define TGT_PTRS_PER_BLOCK 256 /* number of pointers at 2nd level */
-struct lod_ost_desc_idx {
- struct lod_ost_desc *ldi_ost[OST_PTRS_PER_BLOCK];
+struct lod_tgt_desc_idx {
+ struct lod_tgt_desc *ldi_tgt[TGT_PTRS_PER_BLOCK];
};
-#define OST_TGT(dev,index) \
- ((dev)->lod_ost_idx[(index) / \
- OST_PTRS_PER_BLOCK]->ldi_ost[(index)%OST_PTRS_PER_BLOCK])
+#define LTD_TGT(ltd, index) \
+ ((ltd)->ltd_tgt_idx[(index) / \
+ TGT_PTRS_PER_BLOCK]->ldi_tgt[(index) % TGT_PTRS_PER_BLOCK])
+
+#define OST_TGT(lod, index) LTD_TGT(&lod->lod_ost_descs, index)
+#define MDT_TGT(lod, index) LTD_TGT(&lod->lod_mdt_descs, index)
+struct lod_tgt_descs {
+ /* list of known TGTs */
+ struct lod_tgt_desc_idx *ltd_tgt_idx[TGT_PTRS];
+ /* Size of the lod_tgts array, granted to be a power of 2 */
+ __u32 ltd_tgts_size;
+ /* number of registered TGTs */
+ int ltd_tgtnr;
+ /* bitmap of TGTs available */
+ cfs_bitmap_t *ltd_tgt_bitmap;
+ /* TGTs scheduled to be deleted */
+ __u32 ltd_death_row;
+ /* Table refcount used for delayed deletion */
+ int ltd_refcount;
+ /* mutex to serialize concurrent updates to the tgt table */
+ struct mutex ltd_mutex;
+ /* read/write semaphore used for array relocation */
+ struct rw_semaphore ltd_rw_sem;
+};
struct lod_device {
struct dt_device lod_dt_dev;
struct dt_device *lod_child;
cfs_proc_dir_entry_t *lod_proc_entry;
struct lprocfs_stats *lod_stats;
+ spinlock_t lod_connects_lock;
int lod_connects;
- int lod_recovery_completed;
+ unsigned int lod_recovery_completed:1,
+ lod_initialized:1,
+ lod_lmv_failout:1;
/* lov settings descriptor storing static information */
struct lov_desc lod_desc;
/* use to protect ld_active_tgt_count and all ltd_active */
- cfs_spinlock_t lod_desc_lock;
-
- /* list of known OSTs */
- struct lod_ost_desc_idx *lod_ost_idx[OST_PTRS];
+ spinlock_t lod_desc_lock;
- /* Size of the lod_osts array, granted to be a power of 2 */
- __u32 lod_osts_size;
- /* number of registered OSTs */
- int lod_ostnr;
- /* OSTs scheduled to be deleted */
- __u32 lod_death_row;
- /* bitmap of OSTs available */
- cfs_bitmap_t *lod_ost_bitmap;
+ /* Description of OST */
+ struct lod_tgt_descs lod_ost_descs;
+ /* Description of MDT */
+ struct lod_tgt_descs lod_mdt_descs;
/* maximum EA size underlied OSD may have */
unsigned int lod_osd_max_easize;
- /* Table refcount used for delayed deletion */
- int lod_refcount;
- /* mutex to serialize concurrent updates to the ost table */
- cfs_mutex_t lod_mutex;
- /* read/write semaphore used for array relocation */
- cfs_rw_semaphore_t lod_rw_sem;
-
+ /*FIXME: When QOS and pool is implemented for MDT, probably these
+ * structure should be moved to lod_tgt_descs as well.
+ */
/* QoS info per LOD */
- struct lov_qos lod_qos; /* qos info per lod */
+ struct lod_qos lod_qos; /* qos info per lod */
/* OST pool data */
struct ost_pool lod_pool_info; /* all OSTs in a packed array */
cfs_proc_dir_entry_t *lod_symlink;
};
+#define lod_osts lod_ost_descs.ltd_tgts
+#define lod_ost_bitmap lod_ost_descs.ltd_tgt_bitmap
+#define lod_ostnr lod_ost_descs.ltd_tgtnr
+#define lod_osts_size lod_ost_descs.ltd_tgts_size
+#define ltd_ost ltd_tgt
+#define lod_ost_desc lod_tgt_desc
+
+#define lod_mdts lod_mdt_descs.ltd_tgts
+#define lod_mdt_bitmap lod_mdt_descs.ltd_tgt_bitmap
+#define lod_remote_mdt_count lod_mdt_descs.ltd_tgtnr
+#define lod_mdts_size lod_mdt_descs.ltd_tgts_size
+#define ltd_mdt ltd_tgt
+#define lod_mdt_desc lod_tgt_desc
+
+struct lod_dir_stripe_info {
+ __u32 ldsi_stripe_offset;
+ __u32 ldsi_def_stripenr;
+ __u32 ldsi_def_stripe_offset;
+ __u32 ldsi_def_hash_type;
+ __u32 ldsi_hash_type;
+
+ unsigned int ldsi_striping_cached:1,
+ ldsi_def_striping_set:1,
+ ldsi_striped:1;
+};
+
/*
* XXX: shrink this structure, currently it's 72bytes on 32bit arch,
* so, slab will be allocating 128bytes
struct dt_object ldo_obj;
/* if object is striped, then the next fields describe stripes */
+ /* For striped directory, ldo_stripenr == slave stripe count */
__u16 ldo_stripenr;
__u16 ldo_layout_gen;
__u32 ldo_stripe_size;
+ __u32 ldo_pattern;
+ __u16 ldo_released_stripenr;
char *ldo_pool;
struct dt_object **ldo_stripe;
/* to know how much memory to free, ldo_stripenr can be less */
- int ldo_stripes_allocated;
/* default striping for directory represented by this object
* is cached in stripenr/stripe_size */
- int ldo_striping_cached:1;
- int ldo_def_striping_set:1;
+ unsigned int ldo_stripes_allocated:16,
+ ldo_striping_cached:1,
+ ldo_def_striping_set:1,
+ /* ldo_dir_slave_stripe indicate this is a slave stripe of
+ * a striped dir */
+ ldo_dir_slave_stripe:1;
__u32 ldo_def_stripe_size;
__u16 ldo_def_stripenr;
__u16 ldo_def_stripe_offset;
+ struct lod_dir_stripe_info *ldo_dir_stripe;
};
+#define ldo_dir_stripe_offset ldo_dir_stripe->ldsi_stripe_offset
+#define ldo_dir_def_stripenr ldo_dir_stripe->ldsi_def_stripenr
+#define ldo_dir_hash_type ldo_dir_stripe->ldsi_hash_type
+#define ldo_dir_def_hash_type ldo_dir_stripe->ldsi_def_hash_type
+#define ldo_dir_striping_cached ldo_dir_stripe->ldsi_striping_cached
+#define ldo_dir_striped ldo_dir_stripe->ldsi_striped
+#define ldo_dir_def_striping_set ldo_dir_stripe->ldsi_def_striping_set
+#define ldo_dir_def_stripe_offset ldo_dir_stripe->ldsi_def_stripe_offset
+
+struct lod_it {
+ struct dt_object *lit_obj; /* object from the layer below */
+ /* stripe offset of iteration */
+ __u32 lit_stripe_index;
+ __u32 lit_attr;
+ struct dt_it *lit_it; /* iterator from the layer below */
+};
struct lod_thread_info {
/* per-thread buffer for LOV EA */
void *lti_ea_store;
int lti_ea_store_size;
+ /* per-thread buffer for LMV EA */
struct lu_buf lti_buf;
struct ost_id lti_ostid;
struct lu_fid lti_fid;
struct obd_statfs lti_osfs;
struct lu_attr lti_attr;
+ struct lod_it lti_it;
+ struct ldlm_res_id lti_res_id;
+ /* used to hold lu_dirent, sizeof(struct lu_dirent) + NAME_MAX */
+ char lti_key[sizeof(struct lu_dirent) + NAME_MAX];
+ struct dt_object_format lti_format;
+ struct lu_name lti_name;
+ struct lu_buf lti_linkea_buf;
+ struct dt_insert_rec lti_dt_rec;
};
extern const struct lu_device_operations lod_lu_ops;
struct dt_object, do_lu);
}
-static inline struct dt_object *lu2dt_obj(struct lu_object *o)
-{
- LASSERT(ergo(o != NULL, lu_device_is_dt(o->lo_dev)));
- return container_of0(o, struct dt_object, do_lu);
-}
-
static inline struct dt_object *dt_object_child(struct dt_object *o)
{
return container_of0(lu_object_next(&(o)->do_lu),
return info;
}
+static inline struct lu_name *
+lod_name_get(const struct lu_env *env, const void *area, int len)
+{
+ struct lu_name *lname;
+
+ lname = &lod_env_info(env)->lti_name;
+ lname->ln_name = area;
+ lname->ln_namelen = len;
+ return lname;
+}
+
#define lod_foreach_ost(__dev, index) \
if ((__dev)->lod_osts_size > 0) \
cfs_foreach_bit((__dev)->lod_ost_bitmap, (index))
+/* lod_dev.c */
+int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod,
+ const struct lu_fid *fid, __u32 *tgt, int *flags);
/* lod_lov.c */
-void lod_getref(struct lod_device *lod);
-void lod_putref(struct lod_device *lod);
-int lod_add_device(const struct lu_env *env, struct lod_device *m,
- char *osp, unsigned index, unsigned gen, int active);
-int lod_del_device(const struct lu_env *env, struct lod_device *m,
- char *osp, unsigned index, unsigned gen);
+void lod_getref(struct lod_tgt_descs *ltd);
+void lod_putref(struct lod_device *lod, struct lod_tgt_descs *ltd);
+int lod_add_device(const struct lu_env *env, struct lod_device *lod,
+ char *osp, unsigned index, unsigned gen, int mdt_index,
+ char *type, int active);
+int lod_del_device(const struct lu_env *env, struct lod_device *lod,
+ struct lod_tgt_descs *ltd, char *osp, unsigned idx,
+ unsigned gen, bool for_ost);
+int lod_fini_tgt(const struct lu_env *env, struct lod_device *lod,
+ struct lod_tgt_descs *ltd, bool for_ost);
+int lod_load_striping_locked(const struct lu_env *env, struct lod_object *lo);
+int lod_load_striping(const struct lu_env *env, struct lod_object *lo);
+
+int lod_get_ea(const struct lu_env *env, struct lod_object *lo,
+ const char *name);
+static inline int
+lod_get_lov_ea(const struct lu_env *env, struct lod_object *lo)
+{
+ return lod_get_ea(env, lo, XATTR_NAME_LOV);
+}
+
+static inline int
+lod_get_lmv_ea(const struct lu_env *env, struct lod_object *lo)
+{
+ return lod_get_ea(env, lo, XATTR_NAME_LMV);
+}
+
+static inline int
+lod_get_default_lmv_ea(const struct lu_env *env, struct lod_object *lo)
+{
+ return lod_get_ea(env, lo, XATTR_NAME_DEFAULT_LMV);
+}
+
void lod_fix_desc(struct lov_desc *desc);
void lod_fix_desc_qos_maxage(__u32 *val);
void lod_fix_desc_pattern(__u32 *val);
void lod_fix_desc_stripe_size(__u64 *val);
int lod_pools_init(struct lod_device *m, struct lustre_cfg *cfg);
int lod_pools_fini(struct lod_device *m);
-
+int lod_parse_striping(const struct lu_env *env, struct lod_object *mo,
+ const struct lu_buf *buf);
+int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo,
+ const struct lu_buf *buf);
+int lod_initialize_objects(const struct lu_env *env, struct lod_object *mo,
+ struct lov_ost_data_v1 *objs);
+int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt,
+ struct thandle *th);
+int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
+ bool is_from_disk);
+int lod_generate_and_set_lovea(const struct lu_env *env,
+ struct lod_object *mo, struct thandle *th);
+int lod_ea_store_resize(struct lod_thread_info *info, int size);
/* lod_pool.c */
int lod_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count);
int lod_ost_pool_remove(struct ost_pool *op, __u32 idx);
int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname);
int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
+/* lod_qos.c */
+int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
+ struct lu_attr *attr, const struct lu_buf *buf,
+ struct thandle *th);
+int qos_add_tgt(struct lod_device*, struct lod_tgt_desc *);
+int qos_del_tgt(struct lod_device *, struct lod_tgt_desc *);
+
/* lproc_lod.c */
-extern struct file_operations lod_proc_target_fops;
-void lprocfs_lod_init_vars(struct lprocfs_static_vars *lvars);
+int lod_procfs_init(struct lod_device *lod);
+void lod_procfs_fini(struct lod_device *lod);
/* lod_object.c */
+int lod_load_lmv_shards(const struct lu_env *env, struct lod_object *lo,
+ struct lu_buf *buf, bool resize);
int lod_object_set_pool(struct lod_object *o, char *pool);
+int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
+ struct lu_attr *attr,
+ const struct lu_buf *lovea, struct thandle *th);
+int lod_striping_create(const struct lu_env *env, struct dt_object *dt,
+ struct lu_attr *attr, struct dt_object_format *dof,
+ struct thandle *th);
+void lod_object_free_striping(const struct lu_env *env, struct lod_object *lo);
#endif