1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * Copyright (c) 2011 Whamcloud, Inc.
36 * This file is part of Lustre, http://www.lustre.org/
37 * Lustre is a trademark of Sun Microsystems, Inc.
40 #ifndef __LUSTRE_DT_OBJECT_H
41 #define __LUSTRE_DT_OBJECT_H
44 * Sub-class of lu_object with methods common for "data" objects in OST stack.
46 * Data objects behave like regular files: you can read/write them, get and
47 * set their attributes. Implementation of dt interface is supposed to
48 * implement some form of garbage collection, normally reference counting
51 * Examples: osd (lustre/osd) is an implementation of dt interface.
57 * super-class definitions.
59 #include <lu_object.h>
61 #include <libcfs/libcfs.h>
64 struct proc_dir_entry;
71 struct dt_index_features;
75 MNTOPT_USERXATTR = 0x00000001,
76 MNTOPT_ACL = 0x00000002,
79 struct dt_device_param {
80 unsigned ddp_max_name_len;
81 unsigned ddp_max_nlink;
82 unsigned ddp_block_shift;
87 * Basic transaction credit op
97 DTO_LOG_REC, /**< XXX temporary: dt layer knows nothing about llog. */
106 * Operations on dt device.
108 struct dt_device_operations {
110 * Return device-wide statistics.
112 int (*dt_statfs)(const struct lu_env *env,
113 struct dt_device *dev, cfs_kstatfs_t *sfs);
115 * Start transaction, described by \a param.
117 struct thandle *(*dt_trans_start)(const struct lu_env *env,
118 struct dt_device *dev,
119 struct txn_param *param);
121 * Finish previously started transaction.
123 void (*dt_trans_stop)(const struct lu_env *env,
126 * Return fid of root index object.
128 int (*dt_root_get)(const struct lu_env *env,
129 struct dt_device *dev, struct lu_fid *f);
131 * Return device configuration data.
133 void (*dt_conf_get)(const struct lu_env *env,
134 const struct dt_device *dev,
135 struct dt_device_param *param);
137 * handling device state, mostly for tests
139 int (*dt_sync)(const struct lu_env *env, struct dt_device *dev);
140 void (*dt_ro)(const struct lu_env *env, struct dt_device *dev);
142 * Start a transaction commit asynchronously
144 * \param env environment
145 * \param dev dt_device to start commit on
147 * \return 0 success, negative value if error
149 int (*dt_commit_async)(const struct lu_env *env,
150 struct dt_device *dev);
152 * Initialize capability context.
154 int (*dt_init_capa_ctxt)(const struct lu_env *env,
155 struct dt_device *dev,
156 int mode, unsigned long timeout,
157 __u32 alg, struct lustre_capa_key *keys);
159 * Initialize quota context.
161 void (*dt_init_quota_ctxt)(const struct lu_env *env,
162 struct dt_device *dev,
163 struct dt_quota_ctxt *ctxt, void *data);
166 * get transaction credits for given \a op.
168 int (*dt_credit_get)(const struct lu_env *env, struct dt_device *dev,
172 struct dt_index_features {
173 /** required feature flags from enum dt_index_flags */
175 /** minimal required key size */
176 size_t dif_keysize_min;
177 /** maximal required key size, 0 if no limit */
178 size_t dif_keysize_max;
179 /** minimal required record size */
180 size_t dif_recsize_min;
181 /** maximal required record size, 0 if no limit */
182 size_t dif_recsize_max;
183 /** pointer size for record */
187 enum dt_index_flags {
188 /** index supports variable sized keys */
189 DT_IND_VARKEY = 1 << 0,
190 /** index supports variable sized records */
191 DT_IND_VARREC = 1 << 1,
192 /** index can be modified */
193 DT_IND_UPDATE = 1 << 2,
194 /** index supports records with non-unique (duplicate) keys */
195 DT_IND_NONUNQ = 1 << 3
199 * Features, required from index to support file system directories (mapping
202 extern const struct dt_index_features dt_directory_features;
205 * This is a general purpose dt allocation hint.
206 * It now contains the parent object.
207 * It can contain any allocation hint in the future.
209 struct dt_allocation_hint {
210 struct dt_object *dah_parent;
215 * object type specifier.
218 enum dt_format_type {
223 /** for special index */
225 /** for symbolic link */
230 * object format specifier.
232 struct dt_object_format {
233 /** type for dt object */
234 enum dt_format_type dof_type;
243 * special index need feature as parameter to create
247 const struct dt_index_features *di_feat;
252 enum dt_format_type dt_mode_to_dft(__u32 mode);
254 /** Version type. May differ in DMU and ldiskfs */
255 typedef __u64 dt_obj_version_t;
258 * Per-dt-object operations.
260 struct dt_object_operations {
261 void (*do_read_lock)(const struct lu_env *env,
262 struct dt_object *dt, unsigned role);
263 void (*do_write_lock)(const struct lu_env *env,
264 struct dt_object *dt, unsigned role);
265 void (*do_read_unlock)(const struct lu_env *env,
266 struct dt_object *dt);
267 void (*do_write_unlock)(const struct lu_env *env,
268 struct dt_object *dt);
269 int (*do_write_locked)(const struct lu_env *env,
270 struct dt_object *dt);
272 * Note: following ->do_{x,}attr_{set,get}() operations are very
273 * similar to ->moo_{x,}attr_{set,get}() operations in struct
274 * md_object_operations (see md_object.h). These operations are not in
275 * lu_object_operations, because ->do_{x,}attr_set() versions take
276 * transaction handle as an argument (this transaction is started by
277 * caller). We might factor ->do_{x,}attr_get() into
278 * lu_object_operations, but that would break existing symmetry.
282 * Return standard attributes.
284 * precondition: lu_object_exists(&dt->do_lu);
286 int (*do_attr_get)(const struct lu_env *env,
287 struct dt_object *dt, struct lu_attr *attr,
288 struct lustre_capa *capa);
290 * Set standard attributes.
292 * precondition: dt_object_exists(dt);
294 int (*do_attr_set)(const struct lu_env *env,
295 struct dt_object *dt,
296 const struct lu_attr *attr,
297 struct thandle *handle,
298 struct lustre_capa *capa);
300 * Return a value of an extended attribute.
302 * precondition: dt_object_exists(dt);
304 int (*do_xattr_get)(const struct lu_env *env, struct dt_object *dt,
305 struct lu_buf *buf, const char *name,
306 struct lustre_capa *capa);
308 * Set value of an extended attribute.
310 * \a fl - flags from enum lu_xattr_flags
312 * precondition: dt_object_exists(dt);
314 int (*do_xattr_set)(const struct lu_env *env,
315 struct dt_object *dt, const struct lu_buf *buf,
316 const char *name, int fl, struct thandle *handle,
317 struct lustre_capa *capa);
319 * Delete existing extended attribute.
321 * precondition: dt_object_exists(dt);
323 int (*do_xattr_del)(const struct lu_env *env,
324 struct dt_object *dt,
325 const char *name, struct thandle *handle,
326 struct lustre_capa *capa);
328 * Place list of existing extended attributes into \a buf (which has
331 * precondition: dt_object_exists(dt);
333 int (*do_xattr_list)(const struct lu_env *env,
334 struct dt_object *dt, struct lu_buf *buf,
335 struct lustre_capa *capa);
337 * Init allocation hint using parent object and child mode.
338 * (1) The \a parent might be NULL if this is a partial creation for
340 * (2) The type of child is in \a child_mode.
341 * (3) The result hint is stored in \a ah;
343 void (*do_ah_init)(const struct lu_env *env,
344 struct dt_allocation_hint *ah,
345 struct dt_object *parent,
346 cfs_umode_t child_mode);
348 * Create new object on this device.
350 * precondition: !dt_object_exists(dt);
351 * postcondition: ergo(result == 0, dt_object_exists(dt));
353 int (*do_create)(const struct lu_env *env, struct dt_object *dt,
354 struct lu_attr *attr,
355 struct dt_allocation_hint *hint,
356 struct dt_object_format *dof,
360 * Announce that this object is going to be used as an index. This
361 * operation check that object supports indexing operations and
362 * installs appropriate dt_index_operations vector on success.
364 * Also probes for features. Operation is successful if all required
365 * features are supported.
367 int (*do_index_try)(const struct lu_env *env,
368 struct dt_object *dt,
369 const struct dt_index_features *feat);
371 * Add nlink of the object
372 * precondition: dt_object_exists(dt);
374 void (*do_ref_add)(const struct lu_env *env,
375 struct dt_object *dt, struct thandle *th);
377 * Del nlink of the object
378 * precondition: dt_object_exists(dt);
380 void (*do_ref_del)(const struct lu_env *env,
381 struct dt_object *dt, struct thandle *th);
383 struct obd_capa *(*do_capa_get)(const struct lu_env *env,
384 struct dt_object *dt,
385 struct lustre_capa *old,
387 int (*do_object_sync)(const struct lu_env *, struct dt_object *);
388 dt_obj_version_t (*do_version_get)(const struct lu_env *env,
389 struct dt_object *dt);
390 void (*do_version_set)(const struct lu_env *env, struct dt_object *dt,
391 dt_obj_version_t new_version);
393 * Get object info of next level. Currently, only get inode from osd.
394 * This is only used by quota b=16542
395 * precondition: dt_object_exists(dt);
397 int (*do_data_get)(const struct lu_env *env, struct dt_object *dt,
402 * Per-dt-object operations on "file body".
404 struct dt_body_operations {
406 * precondition: dt_object_exists(dt);
408 ssize_t (*dbo_read)(const struct lu_env *env, struct dt_object *dt,
409 struct lu_buf *buf, loff_t *pos,
410 struct lustre_capa *capa);
412 * precondition: dt_object_exists(dt);
414 ssize_t (*dbo_write)(const struct lu_env *env, struct dt_object *dt,
415 const struct lu_buf *buf, loff_t *pos,
416 struct thandle *handle, struct lustre_capa *capa,
421 * Incomplete type of index record.
426 * Incomplete type of index key.
431 * Incomplete type of dt iterator.
436 * Per-dt-object operations on object as index.
438 struct dt_index_operations {
440 * precondition: dt_object_exists(dt);
442 int (*dio_lookup)(const struct lu_env *env, struct dt_object *dt,
443 struct dt_rec *rec, const struct dt_key *key,
444 struct lustre_capa *capa);
446 * precondition: dt_object_exists(dt);
448 int (*dio_insert)(const struct lu_env *env, struct dt_object *dt,
449 const struct dt_rec *rec, const struct dt_key *key,
450 struct thandle *handle, struct lustre_capa *capa,
453 * precondition: dt_object_exists(dt);
455 int (*dio_delete)(const struct lu_env *env, struct dt_object *dt,
456 const struct dt_key *key, struct thandle *handle,
457 struct lustre_capa *capa);
463 * Allocate and initialize new iterator.
465 * precondition: dt_object_exists(dt);
467 struct dt_it *(*init)(const struct lu_env *env,
468 struct dt_object *dt,
470 struct lustre_capa *capa);
471 void (*fini)(const struct lu_env *env,
473 int (*get)(const struct lu_env *env,
475 const struct dt_key *key);
476 void (*put)(const struct lu_env *env,
478 int (*next)(const struct lu_env *env,
480 struct dt_key *(*key)(const struct lu_env *env,
481 const struct dt_it *di);
482 int (*key_size)(const struct lu_env *env,
483 const struct dt_it *di);
484 int (*rec)(const struct lu_env *env,
485 const struct dt_it *di,
486 struct lu_dirent *lde,
488 __u64 (*store)(const struct lu_env *env,
489 const struct dt_it *di);
490 int (*load)(const struct lu_env *env,
491 const struct dt_it *di, __u64 hash);
496 struct lu_device dd_lu_dev;
497 const struct dt_device_operations *dd_ops;
500 * List of dt_txn_callback (see below). This is not protected in any
501 * way, because callbacks are supposed to be added/deleted only during
502 * single-threaded start-up shut-down procedures.
504 cfs_list_t dd_txn_callbacks;
507 int dt_device_init(struct dt_device *dev, struct lu_device_type *t);
508 void dt_device_fini(struct dt_device *dev);
510 static inline int lu_device_is_dt(const struct lu_device *d)
512 return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_DT);
515 static inline struct dt_device * lu2dt_dev(struct lu_device *l)
517 LASSERT(lu_device_is_dt(l));
518 return container_of0(l, struct dt_device, dd_lu_dev);
522 struct lu_object do_lu;
523 const struct dt_object_operations *do_ops;
524 const struct dt_body_operations *do_body_ops;
525 const struct dt_index_operations *do_index_ops;
528 int dt_object_init(struct dt_object *obj,
529 struct lu_object_header *h, struct lu_device *d);
531 void dt_object_fini(struct dt_object *obj);
533 static inline int dt_object_exists(const struct dt_object *dt)
535 return lu_object_exists(&dt->do_lu);
539 /** number of blocks this transaction will modify */
540 unsigned int tp_credits;
541 /** sync transaction is needed */
545 static inline void txn_param_init(struct txn_param *p, unsigned int credits)
547 memset(p, 0, sizeof(*p));
548 p->tp_credits = credits;
551 static inline void txn_param_credit_add(struct txn_param *p,
552 unsigned int credits)
554 p->tp_credits += credits;
557 static inline void txn_param_sync(struct txn_param *p)
563 * This is the general purpose transaction handle.
564 * 1. Transaction Life Cycle
565 * This transaction handle is allocated upon starting a new transaction,
566 * and deallocated after this transaction is committed.
567 * 2. Transaction Nesting
568 * We do _NOT_ support nested transaction. So, every thread should only
569 * have one active transaction, and a transaction only belongs to one
570 * thread. Due to this, transaction handle need no reference count.
571 * 3. Transaction & dt_object locking
572 * dt_object locks should be taken inside transaction.
573 * 4. Transaction & RPC
574 * No RPC request should be issued inside transaction.
577 /** the dt device on which the transactions are executed */
578 struct dt_device *th_dev;
580 /** context for this transaction, tag is LCT_TX_HANDLE */
581 struct lu_context th_ctx;
583 /** the last operation result in this transaction.
584 * this value is used in recovery */
589 * Transaction call-backs.
591 * These are invoked by osd (or underlying transaction engine) when
592 * transaction changes state.
594 * Call-backs are used by upper layers to modify transaction parameters and to
595 * perform some actions on for each transaction state transition. Typical
596 * example is mdt registering call-back to write into last-received file
597 * before each transaction commit.
599 struct dt_txn_callback {
600 int (*dtc_txn_start)(const struct lu_env *env,
601 struct txn_param *param, void *cookie);
602 int (*dtc_txn_stop)(const struct lu_env *env,
603 struct thandle *txn, void *cookie);
604 int (*dtc_txn_commit)(const struct lu_env *env,
605 struct thandle *txn, void *cookie);
608 cfs_list_t dtc_linkage;
611 void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb);
612 void dt_txn_callback_del(struct dt_device *dev, struct dt_txn_callback *cb);
614 int dt_txn_hook_start(const struct lu_env *env,
615 struct dt_device *dev, struct txn_param *param);
616 int dt_txn_hook_stop(const struct lu_env *env, struct thandle *txn);
617 int dt_txn_hook_commit(const struct lu_env *env, struct thandle *txn);
619 int dt_try_as_dir(const struct lu_env *env, struct dt_object *obj);
622 * Callback function used for parsing path.
623 * \see llo_store_resolve
625 typedef int (*dt_entry_func_t)(const struct lu_env *env,
629 #define DT_MAX_PATH 1024
631 int dt_path_parser(const struct lu_env *env,
632 char *local, dt_entry_func_t entry_func,
635 struct dt_object *dt_store_open(const struct lu_env *env,
636 struct dt_device *dt,
638 const char *filename,
641 struct dt_object *dt_locate(const struct lu_env *env,
642 struct dt_device *dev,
643 const struct lu_fid *fid);
645 static inline dt_obj_version_t do_version_get(const struct lu_env *env,
648 LASSERT(o->do_ops->do_version_get);
649 return o->do_ops->do_version_get(env, o);
652 static inline void do_version_set(const struct lu_env *env,
653 struct dt_object *o, dt_obj_version_t v)
655 LASSERT(o->do_ops->do_version_set);
656 return o->do_ops->do_version_set(env, o, v);
659 int dt_record_read(const struct lu_env *env, struct dt_object *dt,
660 struct lu_buf *buf, loff_t *pos);
661 int dt_record_write(const struct lu_env *env, struct dt_object *dt,
662 const struct lu_buf *buf, loff_t *pos, struct thandle *th);
665 static inline struct thandle *dt_trans_start(const struct lu_env *env,
669 LASSERT(d->dd_ops->dt_trans_start);
670 return d->dd_ops->dt_trans_start(env, d, p);
673 static inline void dt_trans_stop(const struct lu_env *env,
677 LASSERT(d->dd_ops->dt_trans_stop);
678 return d->dd_ops->dt_trans_stop(env, th);
681 #endif /* __LUSTRE_DT_OBJECT_H */