1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2006 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #ifndef __LUSTRE_DT_OBJECT_H
24 #define __LUSTRE_DT_OBJECT_H
27 * Sub-class of lu_object with methods common for "data" objects in OST stack.
29 * Data objects behave like regular files: you can read/write them, get and
30 * set their attributes. Implementation of dt interface is supposed to
31 * implement some form of garbage collection, normally reference counting
34 * Examples: osd (lustre/osd) is an implementation of dt interface.
39 * super-class definitions.
41 #include <lu_object.h>
43 #include <libcfs/libcfs.h>
46 struct proc_dir_entry;
53 struct dt_index_features;
55 struct dt_device_param {
56 unsigned ddp_max_name_len;
57 unsigned ddp_max_nlink;
58 unsigned ddp_block_shift;
62 * Basic transaction credit op
72 DTO_LOG_REC, /* XXX temporary: dt layer knows nothing about llog. */
80 * Operations on dt device.
82 struct dt_device_operations {
84 * Return device-wide statistics.
86 int (*dt_statfs)(const struct lu_env *env,
87 struct dt_device *dev, struct kstatfs *sfs);
89 * Start transaction, described by @param.
91 struct thandle *(*dt_trans_start)(const struct lu_env *env,
92 struct dt_device *dev,
93 struct txn_param *param);
95 * Finish previously started transaction.
97 void (*dt_trans_stop)(const struct lu_env *env,
100 * Return fid of root index object.
102 int (*dt_root_get)(const struct lu_env *env,
103 struct dt_device *dev, struct lu_fid *f);
105 * Return device configuration data.
107 void (*dt_conf_get)(const struct lu_env *env,
108 const struct dt_device *dev,
109 struct dt_device_param *param);
111 * handling device state, mostly for tests
113 int (*dt_sync)(const struct lu_env *env, struct dt_device *dev);
114 void (*dt_ro)(const struct lu_env *env, struct dt_device *dev);
116 * Initialize capability context.
118 int (*dt_init_capa_ctxt)(const struct lu_env *env,
119 struct dt_device *dev,
120 int mode, unsigned long timeout,
121 __u32 alg, struct lustre_capa_key *keys);
124 * get transaction credits for given @op.
126 int (*dt_credit_get)(const struct lu_env *env, struct dt_device *dev,
130 struct dt_index_features {
131 /* required feature flags from enum dt_index_flags */
133 /* minimal required key size */
134 size_t dif_keysize_min;
135 /* maximal required key size, 0 if no limit */
136 size_t dif_keysize_max;
137 /* minimal required record size */
138 size_t dif_recsize_min;
139 /* maximal required record size, 0 if no limit */
140 size_t dif_recsize_max;
143 enum dt_index_flags {
144 /* index supports variable sized keys */
145 DT_IND_VARKEY = 1 << 0,
146 /* index supports variable sized records */
147 DT_IND_VARREC = 1 << 1,
148 /* index can be modified */
149 DT_IND_UPDATE = 1 << 2,
150 /* index supports records with non-unique (duplicate) keys */
151 DT_IND_NONUNQ = 1 << 3
155 * Features, required from index to support file system directories (mapping
158 extern const struct dt_index_features dt_directory_features;
161 * This is a general purpose dt allocation hint.
162 * It now contains the parent object.
163 * It can contain any allocation hint in the future.
165 struct dt_allocation_hint {
166 struct dt_object *dah_parent;
171 * Per-dt-object operations.
173 struct dt_object_operations {
174 void (*do_read_lock)(const struct lu_env *env,
175 struct dt_object *dt);
176 void (*do_write_lock)(const struct lu_env *env,
177 struct dt_object *dt);
178 void (*do_read_unlock)(const struct lu_env *env,
179 struct dt_object *dt);
180 void (*do_write_unlock)(const struct lu_env *env,
181 struct dt_object *dt);
183 * Note: following ->do_{x,}attr_{set,get}() operations are very
184 * similar to ->moo_{x,}attr_{set,get}() operations in struct
185 * md_object_operations (see md_object.h). These operations are not in
186 * lu_object_operations, because ->do_{x,}attr_set() versions take
187 * transaction handle as an argument (this transaction is started by
188 * caller). We might factor ->do_{x,}attr_get() into
189 * lu_object_operations, but that would break existing symmetry.
193 * Return standard attributes.
195 * precondition: lu_object_exists(&dt->do_lu);
197 int (*do_attr_get)(const struct lu_env *env,
198 struct dt_object *dt, struct lu_attr *attr,
199 struct lustre_capa *capa);
201 * Set standard attributes.
203 * precondition: dt_object_exists(dt);
205 int (*do_attr_set)(const struct lu_env *env,
206 struct dt_object *dt,
207 const struct lu_attr *attr,
208 struct thandle *handle,
209 struct lustre_capa *capa);
211 * Return a value of an extended attribute.
213 * precondition: dt_object_exists(dt);
215 int (*do_xattr_get)(const struct lu_env *env, struct dt_object *dt,
216 struct lu_buf *buf, const char *name,
217 struct lustre_capa *capa);
219 * Set value of an extended attribute.
221 * @fl - flags from enum lu_xattr_flags
223 * precondition: dt_object_exists(dt);
225 int (*do_xattr_set)(const struct lu_env *env,
226 struct dt_object *dt, const struct lu_buf *buf,
227 const char *name, int fl, struct thandle *handle,
228 struct lustre_capa *capa);
230 * Delete existing extended attribute.
232 * precondition: dt_object_exists(dt);
234 int (*do_xattr_del)(const struct lu_env *env,
235 struct dt_object *dt,
236 const char *name, struct thandle *handle,
237 struct lustre_capa *capa);
239 * Place list of existing extended attributes into @buf (which has
242 * precondition: dt_object_exists(dt);
244 int (*do_xattr_list)(const struct lu_env *env,
245 struct dt_object *dt, struct lu_buf *buf,
246 struct lustre_capa *capa);
248 * Init allocation hint using parent object and child mode.
249 * (1) The @parent might be NULL if this is a partial creation for
251 * (2) The type of child is in @child_mode.
252 * (3) The result hint is stored in @ah;
254 void (*do_ah_init)(const struct lu_env *env,
255 struct dt_allocation_hint *ah,
256 struct dt_object *parent,
259 * Create new object on this device.
261 * precondition: !dt_object_exists(dt);
262 * postcondition: ergo(result == 0, dt_object_exists(dt));
264 int (*do_create)(const struct lu_env *env, struct dt_object *dt,
265 struct lu_attr *attr,
266 struct dt_allocation_hint *hint,
270 * Announce that this object is going to be used as an index. This
271 * operation check that object supports indexing operations and
272 * installs appropriate dt_index_operations vector on success.
274 * Also probes for features. Operation is successful if all required
275 * features are supported.
277 int (*do_index_try)(const struct lu_env *env,
278 struct dt_object *dt,
279 const struct dt_index_features *feat);
281 * Add nlink of the object
282 * precondition: dt_object_exists(dt);
284 void (*do_ref_add)(const struct lu_env *env,
285 struct dt_object *dt, struct thandle *th);
287 * Del nlink of the object
288 * precondition: dt_object_exists(dt);
290 void (*do_ref_del)(const struct lu_env *env,
291 struct dt_object *dt, struct thandle *th);
293 struct obd_capa *(*do_capa_get)(const struct lu_env *env,
294 struct dt_object *dt,
295 struct lustre_capa *old,
300 * Per-dt-object operations on "file body".
302 struct dt_body_operations {
304 * precondition: dt_object_exists(dt);
306 ssize_t (*dbo_read)(const struct lu_env *env, struct dt_object *dt,
307 struct lu_buf *buf, loff_t *pos,
308 struct lustre_capa *capa);
310 * precondition: dt_object_exists(dt);
312 ssize_t (*dbo_write)(const struct lu_env *env, struct dt_object *dt,
313 const struct lu_buf *buf, loff_t *pos,
314 struct thandle *handle, struct lustre_capa *capa);
318 * Incomplete type of index record.
323 * Incomplete type of index key.
328 * Incomplete type of dt iterator.
333 * Per-dt-object operations on object as index.
335 struct dt_index_operations {
337 * precondition: dt_object_exists(dt);
339 int (*dio_lookup)(const struct lu_env *env, struct dt_object *dt,
340 struct dt_rec *rec, const struct dt_key *key,
341 struct lustre_capa *capa);
343 * precondition: dt_object_exists(dt);
345 int (*dio_insert)(const struct lu_env *env, struct dt_object *dt,
346 const struct dt_rec *rec, const struct dt_key *key,
347 struct thandle *handle, struct lustre_capa *capa);
349 * precondition: dt_object_exists(dt);
351 int (*dio_delete)(const struct lu_env *env, struct dt_object *dt,
352 const struct dt_key *key, struct thandle *handle,
353 struct lustre_capa *capa);
359 * Allocate and initialize new iterator.
361 * precondition: dt_object_exists(dt);
363 struct dt_it *(*init)(const struct lu_env *env,
364 struct dt_object *dt, int writable,
365 struct lustre_capa *capa);
366 void (*fini)(const struct lu_env *env,
368 int (*get)(const struct lu_env *env,
370 const struct dt_key *key);
371 void (*put)(const struct lu_env *env,
373 int (*del)(const struct lu_env *env,
374 struct dt_it *di, struct thandle *th);
375 int (*next)(const struct lu_env *env,
377 struct dt_key *(*key)(const struct lu_env *env,
378 const struct dt_it *di);
379 int (*key_size)(const struct lu_env *env,
380 const struct dt_it *di);
381 struct dt_rec *(*rec)(const struct lu_env *env,
382 const struct dt_it *di);
383 __u64 (*store)(const struct lu_env *env,
384 const struct dt_it *di);
385 int (*load)(const struct lu_env *env,
386 const struct dt_it *di, __u64 hash);
391 struct lu_device dd_lu_dev;
392 struct dt_device_operations *dd_ops;
395 * List of dt_txn_callback (see below). This is not protected in any
396 * way, because callbacks are supposed to be added/deleted only during
397 * single-threaded start-up shut-down procedures.
399 struct list_head dd_txn_callbacks;
402 int dt_device_init(struct dt_device *dev, struct lu_device_type *t);
403 void dt_device_fini(struct dt_device *dev);
405 static inline int lu_device_is_dt(const struct lu_device *d)
407 return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_DT);
410 static inline struct dt_device * lu2dt_dev(struct lu_device *l)
412 LASSERT(lu_device_is_dt(l));
413 return container_of0(l, struct dt_device, dd_lu_dev);
417 struct lu_object do_lu;
418 struct dt_object_operations *do_ops;
419 struct dt_body_operations *do_body_ops;
420 struct dt_index_operations *do_index_ops;
423 int dt_object_init(struct dt_object *obj,
424 struct lu_object_header *h, struct lu_device *d);
426 void dt_object_fini(struct dt_object *obj);
428 static inline int dt_object_exists(const struct dt_object *dt)
430 return lu_object_exists(&dt->do_lu);
434 /* number of blocks this transaction will modify */
435 unsigned int tp_credits;
436 /* sync transaction is needed */
440 static inline void txn_param_init(struct txn_param *p, unsigned int credits)
442 memset(p, 0, sizeof(*p));
443 p->tp_credits = credits;
447 * This is the general purpose transaction handle.
448 * 1. Transaction Life Cycle
449 * This transaction handle is allocated upon starting a new transaction,
450 * and deallocated after this transaction is committed.
451 * 2. Transaction Nesting
452 * We do _NOT_ support nested transaction. So, every thread should only
453 * have one active transaction, and a transaction only belongs to one
454 * thread. Due to this, transaction handle need no reference count.
455 * 3. Transaction & dt_object locking
456 * dt_object locks should be taken inside transaction.
457 * 4. Transaction & RPC
458 * No RPC request should be issued inside transaction.
461 /* the dt device on which the transactions are executed */
462 struct dt_device *th_dev;
464 /* context for this transaction, tag is LCT_TX_HANDLE */
465 struct lu_context th_ctx;
467 /* the last operation result in this transaction.
468 * this value is used in recovery */
473 * Transaction call-backs.
475 * These are invoked by osd (or underlying transaction engine) when
476 * transaction changes state.
478 * Call-backs are used by upper layers to modify transaction parameters and to
479 * perform some actions on for each transaction state transition. Typical
480 * example is mdt registering call-back to write into last-received file
481 * before each transaction commit.
483 struct dt_txn_callback {
484 int (*dtc_txn_start)(const struct lu_env *env,
485 struct txn_param *param, void *cookie);
486 int (*dtc_txn_stop)(const struct lu_env *env,
487 struct thandle *txn, void *cookie);
488 int (*dtc_txn_commit)(const struct lu_env *env,
489 struct thandle *txn, void *cookie);
491 struct list_head dtc_linkage;
494 void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb);
495 void dt_txn_callback_del(struct dt_device *dev, struct dt_txn_callback *cb);
497 int dt_txn_hook_start(const struct lu_env *env,
498 struct dt_device *dev, struct txn_param *param);
499 int dt_txn_hook_stop(const struct lu_env *env, struct thandle *txn);
500 int dt_txn_hook_commit(const struct lu_env *env, struct thandle *txn);
502 int dt_try_as_dir(const struct lu_env *env, struct dt_object *obj);
503 struct dt_object *dt_store_open(const struct lu_env *env,
504 struct dt_device *dt, const char *name,
507 #endif /* __LUSTRE_DT_OBJECT_H */