1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * Copyright (c) 2011 Whamcloud, Inc.
36 * This file is part of Lustre, http://www.lustre.org/
37 * Lustre is a trademark of Sun Microsystems, Inc.
39 * lustre/osd/osd_internal.h
41 * Shared definitions and declarations for osd module
43 * Author: Nikita Danilov <nikita@clusterfs.com>
46 #ifndef _OSD_INTERNAL_H
47 #define _OSD_INTERNAL_H
49 #if defined(__KERNEL__)
51 /* struct rw_semaphore */
52 #include <linux/rwsem.h>
54 #include <linux/dcache.h>
56 #include <linux/dirent.h>
58 #ifdef HAVE_EXT4_LDISKFS
59 #include <ldiskfs/ldiskfs.h>
60 #include <ldiskfs/ldiskfs_jbd2.h>
61 # ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD
62 # define journal_callback ldiskfs_journal_cb_entry
63 # define osd_journal_callback_set(handle, func, jcb) ldiskfs_journal_callback_add(handle, func, jcb)
65 # define osd_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
68 #include <linux/jbd.h>
69 #include <linux/ldiskfs_fs.h>
70 #include <linux/ldiskfs_jbd.h>
71 #define osd_journal_callback_set(handle, func, jcb) journal_callback_set(handle, func, jcb)
77 /* class_register_type(), class_unregister_type(), class_get_type() */
78 #include <obd_class.h>
79 #include <lustre_disk.h>
81 #include <dt_object.h>
87 #define OSD_OII_NOGEN (0)
88 #define OSD_COUNTERS (0)
90 /** Enable thandle usage statistics */
91 #define OSD_THANDLE_STATS (0)
93 #ifdef HAVE_QUOTA_SUPPORT
97 cfs_kernel_cap_t oc_cap;
101 #ifdef HAVE_LDISKFS_PDO
103 #define osd_ldiskfs_find_entry(dir, dentry, de, lock) \
104 ll_ldiskfs_find_entry(dir, dentry, de, lock)
105 #define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \
106 ldiskfs_add_entry(handle, child, cinode, hlock)
108 #else /* HAVE_LDISKFS_PDO */
114 struct htree_lock_head {
118 #define ldiskfs_htree_lock(lock, head, inode, op) do { LBUG(); } while (0)
119 #define ldiskfs_htree_unlock(lock) do { LBUG(); } while (0)
121 static inline struct htree_lock_head *ldiskfs_htree_lock_head_alloc(int dep)
127 #define ldiskfs_htree_lock_head_free(lh) do { LBUG(); } while (0)
129 #define LDISKFS_DUMMY_HTREE_LOCK 0xbabecafe
131 static inline struct htree_lock *ldiskfs_htree_lock_alloc(void)
133 return (struct htree_lock *)LDISKFS_DUMMY_HTREE_LOCK;
136 static inline void ldiskfs_htree_lock_free(struct htree_lock *lk)
138 LASSERT((unsigned long)lk == LDISKFS_DUMMY_HTREE_LOCK);
141 #define HTREE_HBITS_DEF 0
143 #define osd_ldiskfs_find_entry(dir, dentry, de, lock) \
144 ll_ldiskfs_find_entry(dir, dentry, de)
145 #define osd_ldiskfs_add_entry(handle, child, cinode, lock) \
146 ldiskfs_add_entry(handle, child, cinode)
148 #endif /* HAVE_LDISKFS_PDO */
155 struct dt_device od_dt_dev;
156 /* information about underlying file system */
157 struct lustre_mount_info *od_mount;
159 * XXX temporary stuff for object index: directory where every object
160 * is named by its fid.
162 struct dt_object *od_obj_area;
164 struct osd_oi *od_oi_table;
165 /* total number of OI containers */
170 unsigned int od_fl_capa:1;
171 unsigned long od_capa_timeout;
173 struct lustre_capa_key *od_capa_keys;
174 cfs_hlist_head_t *od_capa_hash;
176 cfs_proc_dir_entry_t *od_proc_entry;
177 struct lprocfs_stats *od_stats;
179 * statfs optimization: we cache a bit.
181 cfs_time_t od_osfs_age;
182 cfs_kstatfs_t od_kstatfs;
183 cfs_spinlock_t od_osfs_lock;
186 * The following flag indicates, if it is interop mode or not.
187 * It will be initialized, using mount param.
198 #if OSD_THANDLE_STATS
199 LPROC_OSD_THANDLE_STARTING,
200 LPROC_OSD_THANDLE_OPEN,
201 LPROC_OSD_THANDLE_CLOSING,
208 * Storage representation for fids.
210 * Variable size, first byte contains the length of the whole record.
212 struct osd_fid_pack {
213 unsigned char fp_len;
214 char fp_area[sizeof(struct lu_fid)];
217 struct osd_it_ea_dirent {
218 struct lu_fid oied_fid;
221 unsigned short oied_namelen;
222 unsigned int oied_type;
224 } __attribute__((packed));
227 * as osd_it_ea_dirent (in memory dirent struct for osd) is greater
228 * than lu_dirent struct. osd readdir reads less number of dirent than
229 * required for mdd dir page. so buffer size need to be increased so that
230 * there would be one ext3 readdir for every mdd readdir page.
233 #define OSD_IT_EA_BUFSIZE (CFS_PAGE_SIZE + CFS_PAGE_SIZE/4)
236 * This is iterator's in-memory data structure in interoperability
237 * mode (i.e. iterator over ldiskfs style directory)
240 struct osd_object *oie_obj;
241 /** used in ldiskfs iterator, to stored file pointer */
242 struct file oie_file;
243 /** how many entries have been read-cached from storage */
245 /** current entry is being iterated by caller */
247 /** current processing entry */
248 struct osd_it_ea_dirent *oie_dirent;
249 /** buffer to hold entries, size == OSD_IT_EA_BUFSIZE */
254 * Iterator's in-memory data structure for IAM mode.
257 struct osd_object *oi_obj;
258 struct iam_path_descr *oi_ipd;
259 struct iam_iterator oi_it;
262 struct osd_thread_info {
263 const struct lu_env *oti_env;
265 * used for index operations.
267 struct dentry oti_obj_dentry;
268 struct dentry oti_child_dentry;
270 /** dentry for Iterator context. */
271 struct dentry oti_it_dentry;
272 struct htree_lock *oti_hlock;
274 struct lu_fid oti_fid;
275 struct osd_inode_id oti_id;
277 * XXX temporary: for ->i_op calls.
279 struct timespec oti_time;
281 * XXX temporary: fake struct file for osd_object_sync
283 struct file oti_file;
285 * XXX temporary: for capa operations.
287 struct lustre_capa_key oti_capa_key;
288 struct lustre_capa oti_capa;
290 /** osd_device reference, initialized in osd_trans_start() and
291 used in osd_trans_stop() */
292 struct osd_device *oti_dev;
295 * following ipd and it structures are used for osd_index_iam_lookup()
296 * these are defined separately as we might do index operation
297 * in open iterator session.
300 /** osd iterator context used for iterator session */
303 struct osd_it_iam oti_it;
304 /** ldiskfs iterator data structure, see osd_it_ea_{init, fini} */
305 struct osd_it_ea oti_it_ea;
308 /** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */
311 /** IAM iterator for index operation. */
312 struct iam_iterator oti_idx_it;
314 /** union to guarantee that ->oti_ipd[] has proper alignment. */
316 char oti_it_ipd[DX_IPD_MAX_SIZE];
317 long long oti_alignment_lieutenant;
321 char oti_idx_ipd[DX_IPD_MAX_SIZE];
322 long long oti_alignment_lieutenant_colonel;
329 /** used in osd_fid_set() to put xattr */
330 struct lu_buf oti_buf;
331 /** used in osd_ea_fid_set() to set fid into common ea */
332 struct lustre_mdt_attrs oti_mdt_attrs;
333 #ifdef HAVE_QUOTA_SUPPORT
334 struct osd_ctxt oti_ctxt;
336 struct lu_env oti_obj_delete_tx_env;
337 #define OSD_FID_REC_SZ 32
338 char oti_ldp[OSD_FID_REC_SZ];
339 char oti_ldp2[OSD_FID_REC_SZ];
342 extern int ldiskfs_pdo;
346 void lprocfs_osd_init_vars(struct lprocfs_static_vars *lvars);
347 int osd_procfs_init(struct osd_device *osd, const char *name);
348 int osd_procfs_fini(struct osd_device *osd);
349 void osd_lprocfs_time_start(const struct lu_env *env);
350 void osd_lprocfs_time_end(const struct lu_env *env,
351 struct osd_device *osd, int op);
353 int osd_statfs(const struct lu_env *env, struct dt_device *dev,
357 * Invariants, assertions.
361 * XXX: do not enable this, until invariant checking code is made thread safe
362 * in the face of pdirops locking.
364 #define OSD_INVARIANT_CHECKS (0)
366 #if OSD_INVARIANT_CHECKS
367 static inline int osd_invariant(const struct osd_object *obj)
371 ergo(obj->oo_inode != NULL,
372 obj->oo_inode->i_sb == osd_sb(osd_obj2dev(obj)) &&
373 atomic_read(&obj->oo_inode->i_count) > 0) &&
374 ergo(obj->oo_dir != NULL &&
375 obj->oo_dir->od_conationer.ic_object != NULL,
376 obj->oo_dir->od_conationer.ic_object == obj->oo_inode);
379 #define osd_invariant(obj) (1)
382 /* The on-disk extN format reserves inodes 0-11 for internal filesystem
383 * use, and these inodes will be invisible on client side, so the valid
384 * sequence for IGIF fid is 12-0xffffffff. But root inode (2#) will be seen
385 * on server side (osd), and it should be valid too here.
387 #define OSD_ROOT_SEQ 2
388 static inline int osd_fid_is_root(const struct lu_fid *fid)
390 return fid_seq(fid) == OSD_ROOT_SEQ;
393 static inline int osd_fid_is_igif(const struct lu_fid *fid)
395 return fid_is_igif(fid) || osd_fid_is_root(fid);
398 static inline struct osd_oi *
399 osd_fid2oi(struct osd_device *osd, const struct lu_fid *fid)
401 if (!fid_is_norm(fid))
404 LASSERT(osd->od_oi_table != NULL && osd->od_oi_count >= 1);
405 /* It can work even od_oi_count equals to 1 although it's unexpected,
406 * the only reason we set it to 1 is for performance measurement */
407 return &osd->od_oi_table[fid->f_seq & (osd->od_oi_count - 1)];
410 #endif /* __KERNEL__ */
411 #endif /* _OSD_INTERNAL_H */