1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * Copyright (c) 2011 Whamcloud, Inc.
36 * This file is part of Lustre, http://www.lustre.org/
37 * Lustre is a trademark of Sun Microsystems, Inc.
40 #ifndef __LUSTRE_DT_OBJECT_H
41 #define __LUSTRE_DT_OBJECT_H
44 * Sub-class of lu_object with methods common for "data" objects in OST stack.
46 * Data objects behave like regular files: you can read/write them, get and
47 * set their attributes. Implementation of dt interface is supposed to
48 * implement some form of garbage collection, normally reference counting
51 * Examples: osd (lustre/osd) is an implementation of dt interface.
57 * super-class definitions.
59 #include <lu_object.h>
61 #include <libcfs/libcfs.h>
64 struct proc_dir_entry;
71 struct dt_index_features;
75 MNTOPT_USERXATTR = 0x00000001,
76 MNTOPT_ACL = 0x00000002,
79 struct dt_device_param {
80 unsigned ddp_max_name_len;
81 unsigned ddp_max_nlink;
82 unsigned ddp_block_shift;
84 unsigned ddp_max_ea_size;
88 * Per-transaction commit callback function
90 struct dt_txn_commit_cb;
91 typedef void (*dt_cb_t)(struct lu_env *env, struct thandle *th,
92 struct dt_txn_commit_cb *cb, int err);
94 * Special per-transaction callback for cases when just commit callback
95 * is needed and per-device callback are not convenient to use
97 struct dt_txn_commit_cb {
98 cfs_list_t dcb_linkage;
103 * Basic transaction credit op
113 DTO_LOG_REC, /**< XXX temporary: dt layer knows nothing about llog. */
122 * Operations on dt device.
124 struct dt_device_operations {
126 * Return device-wide statistics.
128 int (*dt_statfs)(const struct lu_env *env,
129 struct dt_device *dev, cfs_kstatfs_t *sfs);
131 * Start transaction, described by \a param.
133 struct thandle *(*dt_trans_start)(const struct lu_env *env,
134 struct dt_device *dev,
135 struct txn_param *param);
137 * Finish previously started transaction.
139 void (*dt_trans_stop)(const struct lu_env *env,
142 * Add commit callback to the transaction.
144 int (*dt_trans_cb_add)(struct thandle *th,
145 struct dt_txn_commit_cb *dcb);
147 * Return fid of root index object.
149 int (*dt_root_get)(const struct lu_env *env,
150 struct dt_device *dev, struct lu_fid *f);
152 * Return device configuration data.
154 void (*dt_conf_get)(const struct lu_env *env,
155 const struct dt_device *dev,
156 struct dt_device_param *param);
158 * handling device state, mostly for tests
160 int (*dt_sync)(const struct lu_env *env, struct dt_device *dev);
161 void (*dt_ro)(const struct lu_env *env, struct dt_device *dev);
163 * Start a transaction commit asynchronously
165 * \param env environment
166 * \param dev dt_device to start commit on
168 * \return 0 success, negative value if error
170 int (*dt_commit_async)(const struct lu_env *env,
171 struct dt_device *dev);
173 * Initialize capability context.
175 int (*dt_init_capa_ctxt)(const struct lu_env *env,
176 struct dt_device *dev,
177 int mode, unsigned long timeout,
178 __u32 alg, struct lustre_capa_key *keys);
180 * Initialize quota context.
182 void (*dt_init_quota_ctxt)(const struct lu_env *env,
183 struct dt_device *dev,
184 struct dt_quota_ctxt *ctxt, void *data);
187 * get transaction credits for given \a op.
189 int (*dt_credit_get)(const struct lu_env *env, struct dt_device *dev,
193 struct dt_index_features {
194 /** required feature flags from enum dt_index_flags */
196 /** minimal required key size */
197 size_t dif_keysize_min;
198 /** maximal required key size, 0 if no limit */
199 size_t dif_keysize_max;
200 /** minimal required record size */
201 size_t dif_recsize_min;
202 /** maximal required record size, 0 if no limit */
203 size_t dif_recsize_max;
204 /** pointer size for record */
208 enum dt_index_flags {
209 /** index supports variable sized keys */
210 DT_IND_VARKEY = 1 << 0,
211 /** index supports variable sized records */
212 DT_IND_VARREC = 1 << 1,
213 /** index can be modified */
214 DT_IND_UPDATE = 1 << 2,
215 /** index supports records with non-unique (duplicate) keys */
216 DT_IND_NONUNQ = 1 << 3
220 * Features, required from index to support file system directories (mapping
223 extern const struct dt_index_features dt_directory_features;
226 * This is a general purpose dt allocation hint.
227 * It now contains the parent object.
228 * It can contain any allocation hint in the future.
230 struct dt_allocation_hint {
231 struct dt_object *dah_parent;
236 * object type specifier.
239 enum dt_format_type {
244 /** for special index */
246 /** for symbolic link */
251 * object format specifier.
253 struct dt_object_format {
254 /** type for dt object */
255 enum dt_format_type dof_type;
264 * special index need feature as parameter to create
268 const struct dt_index_features *di_feat;
273 enum dt_format_type dt_mode_to_dft(__u32 mode);
275 /** Version type. May differ in DMU and ldiskfs */
276 typedef __u64 dt_obj_version_t;
279 * Per-dt-object operations.
281 struct dt_object_operations {
282 void (*do_read_lock)(const struct lu_env *env,
283 struct dt_object *dt, unsigned role);
284 void (*do_write_lock)(const struct lu_env *env,
285 struct dt_object *dt, unsigned role);
286 void (*do_read_unlock)(const struct lu_env *env,
287 struct dt_object *dt);
288 void (*do_write_unlock)(const struct lu_env *env,
289 struct dt_object *dt);
290 int (*do_write_locked)(const struct lu_env *env,
291 struct dt_object *dt);
293 * Note: following ->do_{x,}attr_{set,get}() operations are very
294 * similar to ->moo_{x,}attr_{set,get}() operations in struct
295 * md_object_operations (see md_object.h). These operations are not in
296 * lu_object_operations, because ->do_{x,}attr_set() versions take
297 * transaction handle as an argument (this transaction is started by
298 * caller). We might factor ->do_{x,}attr_get() into
299 * lu_object_operations, but that would break existing symmetry.
303 * Return standard attributes.
305 * precondition: lu_object_exists(&dt->do_lu);
307 int (*do_attr_get)(const struct lu_env *env,
308 struct dt_object *dt, struct lu_attr *attr,
309 struct lustre_capa *capa);
311 * Set standard attributes.
313 * precondition: dt_object_exists(dt);
315 int (*do_attr_set)(const struct lu_env *env,
316 struct dt_object *dt,
317 const struct lu_attr *attr,
318 struct thandle *handle,
319 struct lustre_capa *capa);
321 * Return a value of an extended attribute.
323 * precondition: dt_object_exists(dt);
325 int (*do_xattr_get)(const struct lu_env *env, struct dt_object *dt,
326 struct lu_buf *buf, const char *name,
327 struct lustre_capa *capa);
329 * Set value of an extended attribute.
331 * \a fl - flags from enum lu_xattr_flags
333 * precondition: dt_object_exists(dt);
335 int (*do_xattr_set)(const struct lu_env *env,
336 struct dt_object *dt, const struct lu_buf *buf,
337 const char *name, int fl, struct thandle *handle,
338 struct lustre_capa *capa);
340 * Delete existing extended attribute.
342 * precondition: dt_object_exists(dt);
344 int (*do_xattr_del)(const struct lu_env *env,
345 struct dt_object *dt,
346 const char *name, struct thandle *handle,
347 struct lustre_capa *capa);
349 * Place list of existing extended attributes into \a buf (which has
352 * precondition: dt_object_exists(dt);
354 int (*do_xattr_list)(const struct lu_env *env,
355 struct dt_object *dt, struct lu_buf *buf,
356 struct lustre_capa *capa);
358 * Init allocation hint using parent object and child mode.
359 * (1) The \a parent might be NULL if this is a partial creation for
361 * (2) The type of child is in \a child_mode.
362 * (3) The result hint is stored in \a ah;
364 void (*do_ah_init)(const struct lu_env *env,
365 struct dt_allocation_hint *ah,
366 struct dt_object *parent,
367 cfs_umode_t child_mode);
369 * Create new object on this device.
371 * precondition: !dt_object_exists(dt);
372 * postcondition: ergo(result == 0, dt_object_exists(dt));
374 int (*do_create)(const struct lu_env *env, struct dt_object *dt,
375 struct lu_attr *attr,
376 struct dt_allocation_hint *hint,
377 struct dt_object_format *dof,
381 * Announce that this object is going to be used as an index. This
382 * operation check that object supports indexing operations and
383 * installs appropriate dt_index_operations vector on success.
385 * Also probes for features. Operation is successful if all required
386 * features are supported.
388 int (*do_index_try)(const struct lu_env *env,
389 struct dt_object *dt,
390 const struct dt_index_features *feat);
392 * Add nlink of the object
393 * precondition: dt_object_exists(dt);
395 void (*do_ref_add)(const struct lu_env *env,
396 struct dt_object *dt, struct thandle *th);
398 * Del nlink of the object
399 * precondition: dt_object_exists(dt);
401 void (*do_ref_del)(const struct lu_env *env,
402 struct dt_object *dt, struct thandle *th);
404 struct obd_capa *(*do_capa_get)(const struct lu_env *env,
405 struct dt_object *dt,
406 struct lustre_capa *old,
408 int (*do_object_sync)(const struct lu_env *, struct dt_object *);
409 dt_obj_version_t (*do_version_get)(const struct lu_env *env,
410 struct dt_object *dt);
411 void (*do_version_set)(const struct lu_env *env, struct dt_object *dt,
412 dt_obj_version_t new_version);
414 * Get object info of next level. Currently, only get inode from osd.
415 * This is only used by quota b=16542
416 * precondition: dt_object_exists(dt);
418 int (*do_data_get)(const struct lu_env *env, struct dt_object *dt,
423 * Per-dt-object operations on "file body".
425 struct dt_body_operations {
427 * precondition: dt_object_exists(dt);
429 ssize_t (*dbo_read)(const struct lu_env *env, struct dt_object *dt,
430 struct lu_buf *buf, loff_t *pos,
431 struct lustre_capa *capa);
433 * precondition: dt_object_exists(dt);
435 ssize_t (*dbo_write)(const struct lu_env *env, struct dt_object *dt,
436 const struct lu_buf *buf, loff_t *pos,
437 struct thandle *handle, struct lustre_capa *capa,
442 * Incomplete type of index record.
447 * Incomplete type of index key.
452 * Incomplete type of dt iterator.
457 * Per-dt-object operations on object as index.
459 struct dt_index_operations {
461 * precondition: dt_object_exists(dt);
463 int (*dio_lookup)(const struct lu_env *env, struct dt_object *dt,
464 struct dt_rec *rec, const struct dt_key *key,
465 struct lustre_capa *capa);
467 * precondition: dt_object_exists(dt);
469 int (*dio_insert)(const struct lu_env *env, struct dt_object *dt,
470 const struct dt_rec *rec, const struct dt_key *key,
471 struct thandle *handle, struct lustre_capa *capa,
474 * precondition: dt_object_exists(dt);
476 int (*dio_delete)(const struct lu_env *env, struct dt_object *dt,
477 const struct dt_key *key, struct thandle *handle,
478 struct lustre_capa *capa);
484 * Allocate and initialize new iterator.
486 * precondition: dt_object_exists(dt);
488 struct dt_it *(*init)(const struct lu_env *env,
489 struct dt_object *dt,
491 struct lustre_capa *capa);
492 void (*fini)(const struct lu_env *env,
494 int (*get)(const struct lu_env *env,
496 const struct dt_key *key);
497 void (*put)(const struct lu_env *env,
499 int (*next)(const struct lu_env *env,
501 struct dt_key *(*key)(const struct lu_env *env,
502 const struct dt_it *di);
503 int (*key_size)(const struct lu_env *env,
504 const struct dt_it *di);
505 int (*rec)(const struct lu_env *env,
506 const struct dt_it *di,
507 struct lu_dirent *lde,
509 __u64 (*store)(const struct lu_env *env,
510 const struct dt_it *di);
511 int (*load)(const struct lu_env *env,
512 const struct dt_it *di, __u64 hash);
517 struct lu_device dd_lu_dev;
518 const struct dt_device_operations *dd_ops;
521 * List of dt_txn_callback (see below). This is not protected in any
522 * way, because callbacks are supposed to be added/deleted only during
523 * single-threaded start-up shut-down procedures.
525 cfs_list_t dd_txn_callbacks;
528 int dt_device_init(struct dt_device *dev, struct lu_device_type *t);
529 void dt_device_fini(struct dt_device *dev);
531 static inline int lu_device_is_dt(const struct lu_device *d)
533 return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_DT);
536 static inline struct dt_device * lu2dt_dev(struct lu_device *l)
538 LASSERT(lu_device_is_dt(l));
539 return container_of0(l, struct dt_device, dd_lu_dev);
543 struct lu_object do_lu;
544 const struct dt_object_operations *do_ops;
545 const struct dt_body_operations *do_body_ops;
546 const struct dt_index_operations *do_index_ops;
549 int dt_object_init(struct dt_object *obj,
550 struct lu_object_header *h, struct lu_device *d);
552 void dt_object_fini(struct dt_object *obj);
554 static inline int dt_object_exists(const struct dt_object *dt)
556 return lu_object_exists(&dt->do_lu);
560 /** number of blocks this transaction will modify */
561 unsigned int tp_credits;
564 static inline void txn_param_init(struct txn_param *p, unsigned int credits)
566 memset(p, 0, sizeof(*p));
567 p->tp_credits = credits;
570 static inline void txn_param_credit_add(struct txn_param *p,
571 unsigned int credits)
573 p->tp_credits += credits;
577 * This is the general purpose transaction handle.
578 * 1. Transaction Life Cycle
579 * This transaction handle is allocated upon starting a new transaction,
580 * and deallocated after this transaction is committed.
581 * 2. Transaction Nesting
582 * We do _NOT_ support nested transaction. So, every thread should only
583 * have one active transaction, and a transaction only belongs to one
584 * thread. Due to this, transaction handle need no reference count.
585 * 3. Transaction & dt_object locking
586 * dt_object locks should be taken inside transaction.
587 * 4. Transaction & RPC
588 * No RPC request should be issued inside transaction.
591 /** the dt device on which the transactions are executed */
592 struct dt_device *th_dev;
594 /** context for this transaction, tag is LCT_TX_HANDLE */
595 struct lu_context th_ctx;
597 /** the last operation result in this transaction.
598 * this value is used in recovery */
600 /** whether we need sync commit */
605 * Transaction call-backs.
607 * These are invoked by osd (or underlying transaction engine) when
608 * transaction changes state.
610 * Call-backs are used by upper layers to modify transaction parameters and to
611 * perform some actions on for each transaction state transition. Typical
612 * example is mdt registering call-back to write into last-received file
613 * before each transaction commit.
615 struct dt_txn_callback {
616 int (*dtc_txn_start)(const struct lu_env *env,
617 struct txn_param *param, void *cookie);
618 int (*dtc_txn_stop)(const struct lu_env *env,
619 struct thandle *txn, void *cookie);
620 void (*dtc_txn_commit)(struct thandle *txn, void *cookie);
623 cfs_list_t dtc_linkage;
626 void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb);
627 void dt_txn_callback_del(struct dt_device *dev, struct dt_txn_callback *cb);
629 int dt_txn_hook_start(const struct lu_env *env,
630 struct dt_device *dev, struct txn_param *param);
631 int dt_txn_hook_stop(const struct lu_env *env, struct thandle *txn);
632 void dt_txn_hook_commit(struct thandle *txn);
634 int dt_try_as_dir(const struct lu_env *env, struct dt_object *obj);
637 * Callback function used for parsing path.
638 * \see llo_store_resolve
640 typedef int (*dt_entry_func_t)(const struct lu_env *env,
644 #define DT_MAX_PATH 1024
646 int dt_path_parser(const struct lu_env *env,
647 char *local, dt_entry_func_t entry_func,
650 struct dt_object *dt_store_open(const struct lu_env *env,
651 struct dt_device *dt,
653 const char *filename,
656 struct dt_object *dt_locate(const struct lu_env *env,
657 struct dt_device *dev,
658 const struct lu_fid *fid);
660 static inline dt_obj_version_t do_version_get(const struct lu_env *env,
663 LASSERT(o->do_ops->do_version_get);
664 return o->do_ops->do_version_get(env, o);
667 static inline void do_version_set(const struct lu_env *env,
668 struct dt_object *o, dt_obj_version_t v)
670 LASSERT(o->do_ops->do_version_set);
671 return o->do_ops->do_version_set(env, o, v);
674 int dt_record_read(const struct lu_env *env, struct dt_object *dt,
675 struct lu_buf *buf, loff_t *pos);
676 int dt_record_write(const struct lu_env *env, struct dt_object *dt,
677 const struct lu_buf *buf, loff_t *pos, struct thandle *th);
680 static inline struct thandle *dt_trans_start(const struct lu_env *env,
684 LASSERT(d->dd_ops->dt_trans_start);
685 return d->dd_ops->dt_trans_start(env, d, p);
688 static inline void dt_trans_stop(const struct lu_env *env,
689 struct dt_device *d, struct thandle *th)
691 LASSERT(d->dd_ops->dt_trans_stop);
692 return d->dd_ops->dt_trans_stop(env, th);
695 static inline int dt_trans_cb_add(struct thandle *th,
696 struct dt_txn_commit_cb *dcb)
698 LASSERT(th->th_dev->dd_ops->dt_trans_cb_add);
699 return th->th_dev->dd_ops->dt_trans_cb_add(th, dcb);
702 #endif /* __LUSTRE_DT_OBJECT_H */