4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/lod/lod_internal.h
34 * Author: Alex Zhuravlev <alexey.zhuravlev@intel.com>
35 * Author: Mikhail Pershin <mike.pershin@intel.com>
38 #ifndef _LOD_INTERNAL_H
39 #define _LOD_INTERNAL_H
41 #include <libcfs/libcfs.h>
42 #include <lustre_cfg.h>
44 #include <dt_object.h>
46 #define LOV_USES_ASSIGNED_STRIPE 0
47 #define LOV_USES_DEFAULT_STRIPE 1
49 /* Special values to remove LOV EA from disk */
50 #define LOVEA_DELETE_VALUES(size, count, offset, pool) \
51 ((size) == 0 && (count) == 0 && \
52 (offset) == (typeof(offset))(-1) && (pool) == NULL)
54 #define LMVEA_DELETE_VALUES(count, offset) \
55 ((count) == 0 && (offset) == (typeof(offset))(-1))
57 #define LOV_OFFSET_DEFAULT ((__u16)-1)
60 spinlock_t lqr_alloc; /* protect allocation index */
61 __u32 lqr_start_idx; /* start index of new inode */
62 __u32 lqr_offset_idx;/* aliasing for start_idx */
63 int lqr_start_count;/* reseed counter */
64 struct ost_pool lqr_pool; /* round-robin optimized list */
65 unsigned long lqr_dirty:1; /* recalc round-robin list */
69 char pool_name[LOV_MAXPOOLNAME + 1];
70 struct ost_pool pool_obds; /* pool members */
71 atomic_t pool_refcount;
72 struct lod_qos_rr pool_rr;
73 struct hlist_node pool_hash; /* access by poolname */
74 struct list_head pool_list;
75 struct proc_dir_entry *pool_proc_entry;
76 struct obd_device *pool_lobd; /* owner */
79 #define pool_tgt_size(p) ((p)->pool_obds.op_size)
80 #define pool_tgt_count(p) ((p)->pool_obds.op_count)
81 #define pool_tgt_array(p) ((p)->pool_obds.op_array)
82 #define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem)
85 struct list_head lq_oss_list;
86 struct rw_semaphore lq_rw_sem;
87 __u32 lq_active_oss_count;
88 unsigned int lq_prio_free; /* priority for free space */
89 unsigned int lq_threshold_rr;/* priority for rr */
90 struct lod_qos_rr lq_rr; /* round robin qos data */
91 bool lq_dirty:1, /* recalc qos data */
92 lq_same_space:1,/* the ost's all have approx.
93 the same space avail */
94 lq_reset:1; /* zero current penalties */
98 struct obd_uuid lqo_uuid; /* ptlrpc's c_remote_uuid */
99 struct list_head lqo_oss_list; /* link to lov_qos */
100 __u64 lqo_bavail; /* total bytes avail on OSS */
101 __u64 lqo_penalty; /* current penalty */
102 __u64 lqo_penalty_per_obj; /* penalty decrease
104 time_t lqo_used; /* last used time, seconds */
105 __u32 lqo_ost_count; /* number of osts on this oss */
109 struct lod_qos_oss *ltq_oss; /* oss info */
110 __u64 ltq_penalty; /* current penalty */
111 __u64 ltq_penalty_per_obj; /* penalty decrease
113 __u64 ltq_weight; /* net weighting */
114 time_t ltq_used; /* last used time, seconds */
115 bool ltq_usable:1; /* usable for striping */
118 struct lod_tgt_desc {
119 struct dt_device *ltd_tgt;
120 struct list_head ltd_kill;
121 struct obd_export *ltd_exp;
122 struct obd_uuid ltd_uuid;
125 struct ltd_qos ltd_qos; /* qos info per target */
126 struct obd_statfs ltd_statfs;
127 struct ptlrpc_thread *ltd_recovery_thread;
128 unsigned long ltd_active:1,/* is this target up for requests */
129 ltd_activate:1,/* should target be activated */
130 ltd_reap:1, /* should this target be deleted */
131 ltd_got_update_log:1, /* Already got update log */
132 ltd_connecting:1; /* target is connecting */
135 #define TGT_PTRS 256 /* number of pointers at 1st level */
136 #define TGT_PTRS_PER_BLOCK 256 /* number of pointers at 2nd level */
138 struct lod_tgt_desc_idx {
139 struct lod_tgt_desc *ldi_tgt[TGT_PTRS_PER_BLOCK];
142 #define LTD_TGT(ltd, index) \
143 ((ltd)->ltd_tgt_idx[(index) / \
144 TGT_PTRS_PER_BLOCK]->ldi_tgt[(index) % TGT_PTRS_PER_BLOCK])
146 #define OST_TGT(lod, index) LTD_TGT(&lod->lod_ost_descs, index)
147 #define MDT_TGT(lod, index) LTD_TGT(&lod->lod_mdt_descs, index)
148 struct lod_tgt_descs {
149 /* list of known TGTs */
150 struct lod_tgt_desc_idx *ltd_tgt_idx[TGT_PTRS];
151 /* Size of the lod_tgts array, granted to be a power of 2 */
153 /* number of registered TGTs */
155 /* bitmap of TGTs available */
156 struct cfs_bitmap *ltd_tgt_bitmap;
157 /* TGTs scheduled to be deleted */
159 /* Table refcount used for delayed deletion */
161 /* mutex to serialize concurrent updates to the tgt table */
162 struct mutex ltd_mutex;
163 /* read/write semaphore used for array relocation */
164 struct rw_semaphore ltd_rw_sem;
168 struct dt_device lod_dt_dev;
169 struct obd_export *lod_child_exp;
170 struct dt_device *lod_child;
171 struct proc_dir_entry *lod_proc_entry;
172 struct lprocfs_stats *lod_stats;
173 spinlock_t lod_connects_lock;
175 unsigned int lod_recovery_completed:1,
178 lod_child_got_update_log:1;
180 /* lov settings descriptor storing static information */
181 struct lov_desc lod_desc;
183 /* protect ld_active_tgt_count, ltd_active and lod_md_root */
186 /* Description of OST */
187 struct lod_tgt_descs lod_ost_descs;
188 /* Description of MDT */
189 struct lod_tgt_descs lod_mdt_descs;
191 /* Recovery thread for lod_child */
192 struct ptlrpc_thread lod_child_recovery_thread;
194 /* maximum EA size underlied OSD may have */
195 unsigned int lod_osd_max_easize;
197 /*FIXME: When QOS and pool is implemented for MDT, probably these
198 * structure should be moved to lod_tgt_descs as well.
200 /* QoS info per LOD */
201 struct lod_qos lod_qos; /* qos info per lod */
204 struct ost_pool lod_pool_info; /* all OSTs in a packed array */
206 struct cfs_hash *lod_pools_hash_body; /* used for key access */
207 struct list_head lod_pool_list; /* used for sequential access */
208 struct proc_dir_entry *lod_pool_proc_entry;
210 enum lustre_sec_part lod_sp_me;
212 struct proc_dir_entry *lod_symlink;
214 /* ROOT object, used to fetch FS default striping */
215 struct lod_object *lod_md_root;
218 #define lod_osts lod_ost_descs.ltd_tgts
219 #define lod_ost_bitmap lod_ost_descs.ltd_tgt_bitmap
220 #define lod_ostnr lod_ost_descs.ltd_tgtnr
221 #define lod_osts_size lod_ost_descs.ltd_tgts_size
222 #define ltd_ost ltd_tgt
223 #define lod_ost_desc lod_tgt_desc
225 #define lod_mdts lod_mdt_descs.ltd_tgts
226 #define lod_mdt_bitmap lod_mdt_descs.ltd_tgt_bitmap
227 #define lod_remote_mdt_count lod_mdt_descs.ltd_tgtnr
228 #define lod_mdts_size lod_mdt_descs.ltd_tgts_size
229 #define ltd_mdt ltd_tgt
230 #define lod_mdt_desc lod_tgt_desc
232 struct lod_default_striping {
234 __u32 lds_def_stripe_size;
235 __u16 lds_def_stripenr;
236 __u16 lds_def_stripe_offset;
237 char lds_def_pool[LOV_MAXPOOLNAME + 1];
239 __u32 lds_dir_def_stripenr;
240 __u32 lds_dir_def_stripe_offset;
241 __u32 lds_dir_def_hash_type;
242 /* flags whether default striping is set */
243 __u32 lds_def_striping_set:1,
244 lds_dir_def_striping_set:1;
248 struct dt_object ldo_obj;
253 * don't change field order, because both file and
254 * directory use ldo_stripenr/ldo_stripes_allocated
255 * to access stripe number.
258 __u16 ldo_stripes_allocated;
259 __u16 ldo_layout_gen;
260 __u16 ldo_released_stripenr;
262 __u32 ldo_stripe_size;
263 __u16 ldo_stripe_offset;
266 /* directory stripe */
268 __u16 ldo_dir_stripenr;
269 __u16 ldo_dir_stripes_allocated;
270 __u32 ldo_dir_stripe_offset;
271 __u32 ldo_dir_hash_type;
272 __u32 ldo_dir_slave_stripe:1,
275 * default striping is not cached, so this field is
276 * invalid after create, make sure it's used by
277 * lod_dir_striping_create_internal() only.
279 struct lod_default_striping *ldo_def_striping;
282 struct dt_object **ldo_stripe;
285 static inline int lod_object_set_pool(struct lod_object *lo, const char *pool)
289 if (lo->ldo_pool != NULL) {
290 len = strlen(lo->ldo_pool) + 1;
291 OBD_FREE(lo->ldo_pool, len);
295 len = strlen(pool) + 1;
296 OBD_ALLOC(lo->ldo_pool, len);
297 if (lo->ldo_pool == NULL)
299 strlcpy(lo->ldo_pool, pool, len);
305 struct dt_object *lit_obj; /* object from the layer below */
306 /* stripe offset of iteration */
307 __u32 lit_stripe_index;
309 struct dt_it *lit_it; /* iterator from the layer below */
312 struct lod_thread_info {
313 /* per-thread buffer for LOV EA, may be vmalloc'd */
315 __u32 lti_ea_store_size;
316 /* per-thread buffer for LMV EA */
317 struct lu_buf lti_buf;
318 struct ost_id lti_ostid;
319 struct lu_fid lti_fid;
320 struct obd_statfs lti_osfs;
321 struct lu_attr lti_attr;
322 struct lod_it lti_it;
323 struct ldlm_res_id lti_res_id;
324 /* used to hold lu_dirent, sizeof(struct lu_dirent) + NAME_MAX */
325 char lti_key[sizeof(struct lu_dirent) +
327 struct dt_object_format lti_format;
328 struct lu_name lti_name;
329 struct lu_buf lti_linkea_buf;
330 struct dt_insert_rec lti_dt_rec;
331 struct llog_catid lti_cid;
332 struct llog_cookie lti_cookie;
333 struct lustre_cfg lti_lustre_cfg;
334 /* used to store parent default striping in create */
335 struct lod_default_striping lti_def_striping;
338 extern const struct lu_device_operations lod_lu_ops;
340 static inline int lu_device_is_lod(struct lu_device *d)
342 return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &lod_lu_ops);
345 static inline struct lod_device* lu2lod_dev(struct lu_device *d)
347 LASSERT(lu_device_is_lod(d));
348 return container_of0(d, struct lod_device, lod_dt_dev.dd_lu_dev);
351 static inline struct lu_device *lod2lu_dev(struct lod_device *d)
353 return &d->lod_dt_dev.dd_lu_dev;
356 static inline struct obd_device *lod2obd(struct lod_device *d)
358 return d->lod_dt_dev.dd_lu_dev.ld_obd;
361 static inline struct lod_device *dt2lod_dev(struct dt_device *d)
363 LASSERT(lu_device_is_lod(&d->dd_lu_dev));
364 return container_of0(d, struct lod_device, lod_dt_dev);
367 static inline struct lod_object *lu2lod_obj(struct lu_object *o)
369 LASSERT(ergo(o != NULL, lu_device_is_lod(o->lo_dev)));
370 return container_of0(o, struct lod_object, ldo_obj.do_lu);
373 static inline struct lu_object *lod2lu_obj(struct lod_object *obj)
375 return &obj->ldo_obj.do_lu;
378 static inline struct lod_object *lod_obj(const struct lu_object *o)
380 LASSERT(lu_device_is_lod(o->lo_dev));
381 return container_of0(o, struct lod_object, ldo_obj.do_lu);
384 static inline struct lod_object *lod_dt_obj(const struct dt_object *d)
386 return lod_obj(&d->do_lu);
389 static inline struct dt_object* lod_object_child(struct lod_object *o)
391 return container_of0(lu_object_next(lod2lu_obj(o)),
392 struct dt_object, do_lu);
395 extern struct lu_context_key lod_thread_key;
397 static inline struct lod_thread_info *lod_env_info(const struct lu_env *env)
399 struct lod_thread_info *info;
400 info = lu_context_key_get(&env->le_ctx, &lod_thread_key);
405 static inline struct lu_name *
406 lod_name_get(const struct lu_env *env, const void *area, int len)
408 struct lu_name *lname;
410 lname = &lod_env_info(env)->lti_name;
411 lname->ln_name = area;
412 lname->ln_namelen = len;
416 #define lod_foreach_ost(__dev, index) \
417 if ((__dev)->lod_osts_size > 0) \
418 cfs_foreach_bit((__dev)->lod_ost_bitmap, (index))
420 #define lod_foreach_mdt(mdt_dev, index) \
421 cfs_foreach_bit((mdt_dev)->lod_mdt_bitmap, (index))
424 extern struct kmem_cache *lod_object_kmem;
425 int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod,
426 const struct lu_fid *fid, __u32 *tgt, int *flags);
427 int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod,
428 struct dt_device *dt);
429 void lod_sub_fini_llog(const struct lu_env *env,
430 struct dt_device *dt, struct ptlrpc_thread *thread);
431 int lodname2mdt_index(char *lodname, __u32 *mdt_index);
434 void lod_getref(struct lod_tgt_descs *ltd);
435 void lod_putref(struct lod_device *lod, struct lod_tgt_descs *ltd);
436 int lod_add_device(const struct lu_env *env, struct lod_device *lod,
437 char *osp, unsigned index, unsigned gen, int mdt_index,
438 char *type, int active);
439 int lod_del_device(const struct lu_env *env, struct lod_device *lod,
440 struct lod_tgt_descs *ltd, char *osp, unsigned idx,
441 unsigned gen, bool for_ost);
442 int lod_fini_tgt(const struct lu_env *env, struct lod_device *lod,
443 struct lod_tgt_descs *ltd, bool for_ost);
444 int lod_load_striping_locked(const struct lu_env *env, struct lod_object *lo);
445 int lod_load_striping(const struct lu_env *env, struct lod_object *lo);
447 int lod_get_ea(const struct lu_env *env, struct lod_object *lo,
450 lod_get_lov_ea(const struct lu_env *env, struct lod_object *lo)
452 return lod_get_ea(env, lo, XATTR_NAME_LOV);
456 lod_get_lmv_ea(const struct lu_env *env, struct lod_object *lo)
458 return lod_get_ea(env, lo, XATTR_NAME_LMV);
462 lod_get_default_lmv_ea(const struct lu_env *env, struct lod_object *lo)
464 return lod_get_ea(env, lo, XATTR_NAME_DEFAULT_LMV);
467 void lod_fix_desc(struct lov_desc *desc);
468 void lod_fix_desc_qos_maxage(__u32 *val);
469 void lod_fix_desc_pattern(__u32 *val);
470 void lod_fix_desc_stripe_count(__u32 *val);
471 void lod_fix_desc_stripe_size(__u64 *val);
472 int lod_pools_init(struct lod_device *m, struct lustre_cfg *cfg);
473 int lod_pools_fini(struct lod_device *m);
474 int lod_parse_striping(const struct lu_env *env, struct lod_object *mo,
475 const struct lu_buf *buf);
476 int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo,
477 const struct lu_buf *buf);
478 int lod_initialize_objects(const struct lu_env *env, struct lod_object *mo,
479 struct lov_ost_data_v1 *objs);
480 int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
482 int lod_generate_and_set_lovea(const struct lu_env *env,
483 struct lod_object *mo, struct thandle *th);
484 int lod_ea_store_resize(struct lod_thread_info *info, size_t size);
486 int lod_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count);
487 int lod_ost_pool_remove(struct ost_pool *op, __u32 idx);
488 int lod_ost_pool_extend(struct ost_pool *op, unsigned int min_count);
489 struct pool_desc *lod_find_pool(struct lod_device *lod, char *poolname);
490 void lod_pool_putref(struct pool_desc *pool);
491 int lod_ost_pool_free(struct ost_pool *op);
492 int lod_pool_del(struct obd_device *obd, char *poolname);
493 int lod_ost_pool_init(struct ost_pool *op, unsigned int count);
494 extern struct cfs_hash_ops pool_hash_operations;
495 int lod_check_index_in_pool(__u32 idx, struct pool_desc *pool);
496 int lod_pool_new(struct obd_device *obd, char *poolname);
497 int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname);
498 int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname);
501 int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
502 struct lu_attr *attr, const struct lu_buf *buf,
504 int qos_add_tgt(struct lod_device*, struct lod_tgt_desc *);
505 int qos_del_tgt(struct lod_device *, struct lod_tgt_desc *);
506 void lod_qos_rr_init(struct lod_qos_rr *lqr);
509 int lod_procfs_init(struct lod_device *lod);
510 void lod_procfs_fini(struct lod_device *lod);
513 extern struct dt_object_operations lod_obj_ops;
514 extern struct lu_object_operations lod_lu_obj_ops;
515 int lod_load_lmv_shards(const struct lu_env *env, struct lod_object *lo,
516 struct lu_buf *buf, bool resize);
517 int lod_declare_striped_object(const struct lu_env *env, struct dt_object *dt,
518 struct lu_attr *attr,
519 const struct lu_buf *lovea, struct thandle *th);
520 int lod_striping_create(const struct lu_env *env, struct dt_object *dt,
521 struct lu_attr *attr, struct dt_object_format *dof,
523 void lod_object_free_striping(const struct lu_env *env, struct lod_object *lo);
525 /* lod_sub_object.c */
526 struct thandle *lod_sub_get_thandle(const struct lu_env *env,
528 const struct dt_object *sub_obj,
529 bool *record_update);
530 int lod_sub_object_declare_create(const struct lu_env *env,
531 struct dt_object *dt,
532 struct lu_attr *attr,
533 struct dt_allocation_hint *hint,
534 struct dt_object_format *dof,
536 int lod_sub_object_create(const struct lu_env *env, struct dt_object *dt,
537 struct lu_attr *attr,
538 struct dt_allocation_hint *hint,
539 struct dt_object_format *dof,
541 int lod_sub_object_declare_ref_add(const struct lu_env *env,
542 struct dt_object *dt,
544 int lod_sub_object_ref_add(const struct lu_env *env, struct dt_object *dt,
546 int lod_sub_object_declare_ref_del(const struct lu_env *env,
547 struct dt_object *dt,
549 int lod_sub_object_ref_del(const struct lu_env *env, struct dt_object *dt,
551 int lod_sub_object_declare_destroy(const struct lu_env *env,
552 struct dt_object *dt,
554 int lod_sub_object_destroy(const struct lu_env *env, struct dt_object *dt,
556 int lod_sub_object_declare_insert(const struct lu_env *env,
557 struct dt_object *dt,
558 const struct dt_rec *rec,
559 const struct dt_key *key,
561 int lod_sub_object_index_insert(const struct lu_env *env, struct dt_object *dt,
562 const struct dt_rec *rec,
563 const struct dt_key *key, struct thandle *th,
565 int lod_sub_object_declare_delete(const struct lu_env *env,
566 struct dt_object *dt,
567 const struct dt_key *key,
569 int lod_sub_object_delete(const struct lu_env *env, struct dt_object *dt,
570 const struct dt_key *name, struct thandle *th);
571 int lod_sub_object_declare_xattr_set(const struct lu_env *env,
572 struct dt_object *dt,
573 const struct lu_buf *buf,
574 const char *name, int fl,
576 int lod_sub_object_xattr_set(const struct lu_env *env, struct dt_object *dt,
577 const struct lu_buf *buf, const char *name, int fl,
579 int lod_sub_object_declare_attr_set(const struct lu_env *env,
580 struct dt_object *dt,
581 const struct lu_attr *attr,
583 int lod_sub_object_attr_set(const struct lu_env *env,
584 struct dt_object *dt,
585 const struct lu_attr *attr,
587 int lod_sub_object_declare_xattr_del(const struct lu_env *env,
588 struct dt_object *dt,
591 int lod_sub_object_xattr_del(const struct lu_env *env,
592 struct dt_object *dt,
595 int lod_sub_object_declare_write(const struct lu_env *env,
596 struct dt_object *dt,
597 const struct lu_buf *buf, loff_t pos,
599 ssize_t lod_sub_object_write(const struct lu_env *env, struct dt_object *dt,
600 const struct lu_buf *buf, loff_t *pos,
601 struct thandle *th, int rq);
602 int lod_sub_object_declare_punch(const struct lu_env *env,
603 struct dt_object *dt,
604 __u64 start, __u64 end,
606 int lod_sub_object_punch(const struct lu_env *env, struct dt_object *dt,
607 __u64 start, __u64 end, struct thandle *th);
609 int lod_sub_prep_llog(const struct lu_env *env, struct lod_device *lod,
610 struct dt_device *dt, int index);