1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2006 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #ifndef __LINUX_LU_OBJECT_H
24 #define __LINUX_LU_OBJECT_H
29 #include <linux/lustre_idl.h>
31 #include <libcfs/list.h>
32 #include <libcfs/kp30.h>
35 * Layered objects support for CMD3/C5.
40 struct proc_dir_entry;
44 * lu_* data-types represent server-side entities shared by data and meta-data
49 * 0. support for layering.
51 * Server side object is split into layers, one per device in the
52 * corresponding device stack. Individual layer is represented by struct
53 * lu_object. Compound layered object --- by struct lu_object_header. Most
54 * interface functions take lu_object as an argument and operate on the
55 * whole compound object. This decision was made due to the following
58 * - it's envisaged that lu_object will be used much more often than
61 * - we want lower (non-top) layers to be able to initiate operations
62 * on the whole object.
64 * Generic code supports layering more complex than simple stacking, e.g.,
65 * it is possible that at some layer object "spawns" multiple sub-objects
68 * 1. fid-based identification.
70 * Compound object is uniquely identified by its fid. Objects are indexed
71 * by their fids (hash table is used for index).
73 * 2. caching and life-cycle management.
75 * Object's life-time is controlled by reference counting. When reference
76 * count drops to 0, object is returned to cache. Cached objects still
77 * retain their identity (i.e., fid), and can be recovered from cache.
79 * Objects are kept in the global LRU list, and lu_site_purge() function
80 * can be used to reclaim given number of unused objects from the tail of
83 * 3. avoiding recursion.
85 * Generic code tries to replace recursion through layers by iterations
101 struct lu_object_header;
104 * Operations common for data and meta-data devices.
106 struct lu_device_operations {
108 * Object creation protocol.
110 * Due to design goal of avoiding recursion, object creation (see
111 * lu_object_alloc()) is somewhat involved:
113 * - first, ->ldo_object_alloc() method of the top-level device
114 * in the stack is called. It should allocate top level object
115 * (including lu_object_header), but without any lower-layer
118 * - then lu_object_alloc() sets fid in the header of newly created
121 * - then ->ldo_object_init() is called. It has to allocate
122 * lower-layer object(s). To do this, ->ldo_object_init() calls
123 * ldo_object_alloc() of the lower-layer device(s).
125 * - for all new objects allocated by ->ldo_object_init() (and
126 * inserted into object stack), ->ldo_object_init() is called again
127 * repeatedly, until no new objects are created.
131 * Allocate lower-layer parts of the object by calling
132 * ->ldo_object_alloc() of the corresponding underlying device.
134 * This method is called once for each object inserted into object
135 * stack. It's responsibility of this method to insert lower-layer
136 * object(s) it create into appropriate places of object stack.
138 int (*ldo_object_init)(struct lu_object *);
141 * Allocate object for the given device (without lower-layer
142 * parts). This is called by ->ldo_object_init() from the parent
145 struct lu_object *(*ldo_object_alloc)(struct lu_device *);
148 * Dual to ->ldo_object_alloc(). Called when object is removed from
151 void (*ldo_object_free)(struct lu_object *o);
154 * Called when last active reference to the object is released (and
155 * object returns to the cache).
157 void (*ldo_object_release)(struct lu_object *o);
160 * Debugging helper. Print given object.
162 int (*ldo_object_print)(struct seq_file *f, const struct lu_object *o);
168 struct lu_device_type;
171 * Device: a layer in the server side abstraction stacking.
175 * reference count. This is incremented, in particular, on each object
176 * created at this layer.
178 * XXX which means that atomic_t is probably too small.
181 struct lu_device_type *ld_type;
182 struct lu_device_operations *ld_ops;
183 struct lu_site *ld_site;
184 struct proc_dir_entry *ld_proc_entry;
186 /* XXX: temporary back pointer into obd. */
187 struct obd_device *ld_obd;
190 struct lu_device_type_operations;
193 /* this is meta-data device */
194 LU_DEVICE_MD = (1 << 0),
195 /* this is data device */
196 LU_DEVICE_DT = (1 << 1)
199 struct lu_device_type {
202 struct lu_device_type_operations *ldt_ops;
205 struct lu_device_type_operations {
206 struct lu_device *(*ldto_device_alloc)(struct lu_device_type *t,
207 struct lustre_cfg *lcfg);
208 void (*ldto_device_free)(struct lu_device *d);
210 int (*ldto_init)(struct lu_device_type *t);
211 void (*ldto_fini)(struct lu_device_type *t);
215 * Flags for the object layers.
217 enum lu_object_flags {
219 * this flags is set if ->ldo_object_init() has been called for this
220 * layer. Used by lu_object_alloc().
222 LU_OBJECT_ALLOCATED = (1 << 0)
226 * Layer in the layered object.
230 * Header for this object.
232 struct lu_object_header *lo_header;
234 * Device for this layer.
236 struct lu_device *lo_dev;
238 * Linkage into list of all layers.
240 struct list_head lo_linkage;
242 * Depth. Top level layer depth is 0.
246 * Flags from enum lu_object_flags.
248 unsigned long lo_flags;
251 enum lu_object_header_flags {
253 * Don't keep this object in cache. Object will be destroyed as soon
254 * as last reference to it is released. This flag cannot be cleared
257 LU_OBJECT_HEARD_BANSHEE = 0,
261 * "Compound" object, consisting of multiple layers.
263 struct lu_object_header {
265 * Object flags from enum lu_object_header_flags. Set and checked
268 unsigned long loh_flags;
270 * Object reference count. Protected by site guard lock.
274 * Fid, uniquely identifying this object.
276 struct ll_fid loh_fid;
278 * Linkage into per-site hash table. Protected by site guard lock.
280 struct hlist_node loh_hash;
282 * Linkage into per-site LRU list. Protected by site guard lock.
284 struct list_head loh_lru;
286 * Linkage into list of layers. Never modified once set (except lately
287 * during object destruction). No locking is necessary.
289 struct list_head loh_layers;
293 * lu_site is a "compartment" within which objects are unique, and LRU
294 * discipline is maintained.
296 * lu_site exists so that multiple layered stacks can co-exist in the same
304 * - ->ls_hash hash table (and its linkages in objects);
306 * - ->ls_lru list (and its linkages in objects);
308 * - 0/1 transitions of object ->loh_ref reference count;
314 * Hash-table where objects are indexed by fid.
316 struct hlist_head *ls_hash;
318 * Bit-mask for hash-table size.
324 * LRU list, updated on each access to object. Protected by
327 * "Cold" end of LRU is ->ls_lru.next. Accessed object are moved to
328 * the ->ls_lru.prev (this is due to the non-existence of
329 * list_for_each_entry_safe_reverse()).
331 struct list_head ls_lru;
333 * Total number of objects in this site. Protected by ->ls_guard.
337 * Total number of objects in this site with reference counter greater
338 * than 0. Protected by ->ls_guard.
343 * Top-level device for this stack.
345 struct lu_device *ls_top_dev;
347 /* statistical counters. Protected by nothing, races are accepted. */
353 * Number of hash-table entry checks made.
355 * ->s_cache_check / (->s_cache_miss + ->s_cache_hit)
357 * is an average number of hash slots inspected during single
361 /* raced cache insertions */
370 static inline struct lu_device_operations *
371 lu_object_ops(const struct lu_object *o)
373 return o->lo_dev->ld_ops;
376 static inline struct lu_object *lu_object_next(const struct lu_object *o)
378 return container_of(o->lo_linkage.next, struct lu_object, lo_linkage);
381 static inline struct ll_fid *lu_object_fid(const struct lu_object *o)
383 return &o->lo_header->loh_fid;
386 static inline struct lu_object *lu_object_top(struct lu_object_header *h)
388 LASSERT(!list_empty(&h->loh_layers));
389 return container_of(h->loh_layers.next, struct lu_object, lo_linkage);
392 static inline void lu_object_get(struct lu_object *o)
394 LASSERT(o->lo_header->loh_ref > 0);
395 spin_lock(&o->lo_dev->ld_site->ls_guard);
396 o->lo_header->loh_ref ++;
397 spin_unlock(&o->lo_dev->ld_site->ls_guard);
400 static inline int lu_object_is_dying(struct lu_object_header *h)
402 return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags);
405 void lu_object_put(struct lu_object *o);
406 void lu_site_purge(struct lu_site *s, int nr);
407 int lu_object_print(struct seq_file *f, const struct lu_object *o);
408 struct lu_object *lu_object_find(struct lu_site *s, const struct ll_fid *f);
410 int lu_site_init(struct lu_site *s, struct lu_device *top);
411 void lu_site_fini(struct lu_site *s);
413 void lu_device_get(struct lu_device *d);
414 void lu_device_put(struct lu_device *d);
416 int lu_device_init(struct lu_device *d, struct lu_device_type *t);
417 void lu_device_fini(struct lu_device *d);
419 int lu_object_init(struct lu_object *o,
420 struct lu_object_header *h, struct lu_device *d);
421 void lu_object_fini(struct lu_object *o);
422 void lu_object_add_top(struct lu_object_header *h, struct lu_object *o);
423 void lu_object_add(struct lu_object *before, struct lu_object *o);
425 int lu_object_header_init(struct lu_object_header *h);
426 void lu_object_header_fini(struct lu_object_header *h);
428 #endif /* __LINUX_OBD_CLASS_H */