2 * linux/include/linux/jfs.h
4 * Written by Stephen C. Tweedie <sct@redhat.com>
6 * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
12 * Definitions for transaction data structures for the buffer cache
13 * filesystem journaling support.
19 /* Allow this file to be included directly into e2fsprogs */
21 #include "jfs_compat.h"
25 * Debug code enabled by default for kernel builds
33 extern int journal_enable_debug;
36 #define jfs_debug(n, f, a...) \
38 if ((n) <= journal_enable_debug) { \
39 printk (KERN_DEBUG "JFS DEBUG: (%s, %d): %s: ", \
40 __FILE__, __LINE__, __FUNCTION__); \
45 #define jfs_debug(f, a...) /**/
48 #define JFS_MIN_JOURNAL_BLOCKS 1024
51 * Internal structures used by the logging mechanism:
54 #define JFS_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */
62 * Descriptor block types:
65 #define JFS_DESCRIPTOR_BLOCK 1
66 #define JFS_COMMIT_BLOCK 2
67 #define JFS_SUPERBLOCK_V1 3
68 #define JFS_SUPERBLOCK_V2 4
69 #define JFS_REVOKE_BLOCK 5
72 * Standard header for all descriptor blocks:
74 typedef struct journal_header_s
83 * The block tag: used to describe a single buffer in the journal
85 typedef struct journal_block_tag_s
87 __u32 t_blocknr; /* The on-disk block number */
88 __u32 t_flags; /* See below */
89 } journal_block_tag_t;
92 * The revoke descriptor: used on disk to describe a series of blocks to
93 * be revoked from the log
95 typedef struct journal_revoke_header_s
97 journal_header_t r_header;
98 int r_count; /* Count of bytes used in the block */
99 } journal_revoke_header_t;
102 /* Definitions for the journal tag flags word: */
103 #define JFS_FLAG_ESCAPE 1 /* on-disk block is escaped */
104 #define JFS_FLAG_SAME_UUID 2 /* block has same uuid as previous */
105 #define JFS_FLAG_DELETED 4 /* block deleted by this transaction */
106 #define JFS_FLAG_LAST_TAG 8 /* last tag in this descriptor block */
110 * The journal superblock. All fields are in big-endian byte order.
112 typedef struct journal_superblock_s
115 journal_header_t s_header;
118 /* Static information describing the journal */
119 __u32 s_blocksize; /* journal device blocksize */
120 __u32 s_maxlen; /* total blocks in journal file */
121 __u32 s_first; /* first block of log information */
124 /* Dynamic information describing the current state of the log */
125 __u32 s_sequence; /* first commit ID expected in log */
126 __u32 s_start; /* blocknr of start of log */
129 /* Error value, as set by journal_abort(). */
133 /* Remaining fields are only valid in a version-2 superblock */
134 __u32 s_feature_compat; /* compatible feature set */
135 __u32 s_feature_incompat; /* incompatible feature set */
136 __u32 s_feature_ro_compat; /* readonly-compatible feature set */
138 __u8 s_uuid[16]; /* 128-bit uuid for journal */
141 __u32 s_nr_users; /* Nr of filesystems sharing log */
143 __u32 s_dynsuper; /* Blocknr of dynamic superblock copy*/
146 __u32 s_max_transaction; /* Limit of journal blocks per trans.*/
147 __u32 s_max_trans_data; /* Limit of data blocks per trans. */
153 __u8 s_users[16*48]; /* ids of all fs'es sharing the log */
155 } journal_superblock_t;
157 #define JFS_HAS_COMPAT_FEATURE(j,mask) \
158 ((j)->j_format_version >= 2 && \
159 ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask))))
160 #define JFS_HAS_RO_COMPAT_FEATURE(j,mask) \
161 ((j)->j_format_version >= 2 && \
162 ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask))))
163 #define JFS_HAS_INCOMPAT_FEATURE(j,mask) \
164 ((j)->j_format_version >= 2 && \
165 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
167 #define JFS_FEATURE_INCOMPAT_REVOKE 0x00000001
169 /* Features known to this kernel version: */
170 #define JFS_KNOWN_COMPAT_FEATURES 0
171 #define JFS_KNOWN_ROCOMPAT_FEATURES 0
172 #define JFS_KNOWN_INCOMPAT_FEATURES JFS_FEATURE_INCOMPAT_REVOKE
176 #include <linux/fs.h>
179 #define J_ASSERT(assert) \
180 do { if (!(assert)) { \
182 "Assertion failure in %s() at %s line %d: " \
184 __FUNCTION__, __FILE__, __LINE__, # assert); \
185 * ((char *) 0) = 0; \
189 struct jfs_revoke_table_s;
191 /* The handle_t type represents a single atomic update being performed
192 * by some process. All filesystem modifications made by the process go
193 * through this handle. Recursive operations (such as quota operations)
194 * are gathered into a single update.
196 * The buffer credits field is used to account for journaled buffers
197 * being modified by the running process. To ensure that there is
198 * enough log space for all outstanding operations, we need to limit the
199 * number of outstanding buffers possible at any time. When the
200 * operation completes, any buffer credits not used are credited back to
201 * the transaction, so that at all times we know how many buffers the
202 * outstanding updates on a transaction might possibly touch. */
206 /* Which compound transaction is this update a part of? */
207 transaction_t * h_transaction;
209 /* Number of remaining buffers we are allowed to dirty: */
210 int h_buffer_credits;
212 /* Reference count on this handle */
216 unsigned int h_sync : 1; /* sync-on-close */
217 unsigned int h_jdata : 1; /* force data journaling */
221 /* The transaction_t type is the guts of the journaling mechanism. It
222 * tracks a compound transaction through its various states:
224 * RUNNING: accepting new updates
225 * LOCKED: Updates still running but we don't accept new ones
226 * RUNDOWN: Updates are tidying up but have finished requesting
227 * new buffers to modify (state not used for now)
228 * FLUSH: All updates complete, but we are still writing to disk
229 * COMMIT: All data on disk, writing commit record
230 * FINISHED: We still have to keep the transaction for checkpointing.
232 * The transaction keeps track of all of the buffers modified by a
233 * running transaction, and all of the buffers committed but not yet
234 * flushed to home for finished transactions.
239 /* Pointer to the journal for this transaction. */
240 journal_t * t_journal;
242 /* Sequence number for this transaction */
245 /* Transaction's current state */
255 /* Where in the log does this transaction's commit start? */
256 unsigned long t_log_start;
258 /* Doubly-linked circular list of all inodes owned by this
260 struct inode * t_ilist;
262 /* Number of buffers on the t_buffers list */
265 /* Doubly-linked circular list of all buffers reserved but not
266 yet modified by this transaction */
267 struct buffer_head * t_reserved_list;
269 /* Doubly-linked circular list of all metadata buffers owned by this
271 struct buffer_head * t_buffers;
273 /* Doubly-linked circular list of all data buffers still to be
274 flushed before this transaction can be committed */
275 struct buffer_head * t_datalist;
277 /* Doubly-linked circular list of all forget buffers (superceded
278 buffers which we can un-checkpoint once this transaction
280 struct buffer_head * t_forget;
282 /* Doubly-linked circular list of all buffers still to be
283 flushed before this transaction can be checkpointed */
284 struct buffer_head * t_checkpoint_list;
286 /* Doubly-linked circular list of temporary buffers currently
287 undergoing IO in the log */
288 struct buffer_head * t_iobuf_list;
290 /* Doubly-linked circular list of metadata buffers being
291 shadowed by log IO. The IO buffers on the iobuf list and the
292 shadow buffers on this list match each other one for one at
294 struct buffer_head * t_shadow_list;
296 /* Doubly-linked circular list of control buffers being written
298 struct buffer_head * t_log_list;
300 /* Number of outstanding updates running on this transaction */
303 /* Number of buffers reserved for use by all handles in this
304 * transaction handle but not yet modified. */
305 int t_outstanding_credits;
307 /* Wait queue to wait for updates to complete */
308 struct wait_queue * t_wait;
310 /* Forward and backward links for the circular list of all
311 * transactions awaiting checkpoint */
312 transaction_t *t_cpnext, *t_cpprev;
314 /* When will the transaction expire (become due for commit), in
316 unsigned long t_expires;
320 /* The journal_t maintains all of the journaling state information for a
321 * single filesystem. It is linked to from the fs superblock structure.
323 * We use the journal_t to keep track of all outstanding transaction
324 * activity on the filesystem, and to manage the state of the log
325 * writing process. */
329 /* General journaling state flags */
330 unsigned long j_flags;
332 /* Is there an outstanding uncleared error on the journal (from
336 /* The superblock buffer */
337 struct buffer_head * j_sb_buffer;
338 journal_superblock_t * j_superblock;
340 /* Version of the superblock format */
341 int j_format_version;
343 /* Transactions: The current running transaction... */
344 transaction_t * j_running_transaction;
346 /* ... the transaction we are pushing to disk ... */
347 transaction_t * j_committing_transaction;
349 /* ... and a linked circular list of all transactions waiting
350 * for checkpointing. */
351 transaction_t * j_checkpoint_transactions;
353 /* Wait queue for locking of the journal structure. */
354 struct wait_queue * j_wait_lock;
356 /* Wait queue for waiting for a locked transaction to start
358 struct wait_queue * j_wait_transaction_locked;
360 /* Wait queue for waiting for checkpointing to complete */
361 struct wait_queue * j_wait_logspace;
363 /* Wait queue for waiting for commit to complete */
364 struct wait_queue * j_wait_done_commit;
366 /* Wait queue to trigger checkpointing */
367 struct wait_queue * j_wait_checkpoint;
369 /* Wait queue to trigger commit */
370 struct wait_queue * j_wait_commit;
372 /* Semaphore for locking against concurrent checkpoints */
373 struct semaphore j_checkpoint_sem;
375 /* Journal running state: */
376 /* The lock flag is *NEVER* touched from interrupts. */
377 unsigned int j_locked : 1;
379 /* Journal head: identifies the first unused block in the journal. */
380 unsigned long j_head;
382 /* Journal tail: identifies the oldest still-used block in the
384 unsigned long j_tail;
386 /* Journal free: how many free blocks are there in the journal? */
387 unsigned long j_free;
389 /* Journal start and end: the block numbers of the first usable
390 * block and one beyond the last usable block in the journal. */
391 unsigned long j_first, j_last;
393 /* Device, blocksize and starting block offset for the location
394 * where we store the journal. */
397 unsigned int j_blk_offset;
399 /* Total maximum capacity of the journal region on disk. */
400 unsigned int j_maxlen;
402 /* Optional inode where we store the journal. If present, all
403 * journal block numbers are mapped into this inode via
405 struct inode * j_inode;
407 /* Sequence number of the oldest transaction in the log */
408 tid_t j_tail_sequence;
409 /* Sequence number of the next transaction to grant */
410 tid_t j_transaction_sequence;
411 /* Sequence number of the most recently committed transaction */
412 tid_t j_commit_sequence;
413 /* Sequence number of the most recent transaction wanting commit */
414 tid_t j_commit_request;
416 /* Journal uuid: identifies the object (filesystem, LVM volume
417 * etc) backed by this journal. This will eventually be
418 * replaced by an array of uuids, allowing us to index multiple
419 * devices within a single journal and to perform atomic updates
424 /* Pointer to the current commit thread for this journal */
425 struct task_struct * j_task;
427 /* Maximum number of metadata buffers to allow in a single
428 * compound commit transaction */
429 int j_max_transaction_buffers;
431 /* What is the maximum transaction lifetime before we begin a
433 unsigned long j_commit_interval;
435 /* The timer used to wakeup the commit thread: */
436 struct timer_list * j_commit_timer;
437 int j_commit_timer_active;
439 /* The revoke table: maintains the list of revoked blocks in the
440 current transaction. */
441 struct jfs_revoke_table_s *j_revoke;
445 * Journal flag definitions
447 #define JFS_UNMOUNT 1 /* Journal thread is being destroyed */
448 #define JFS_SYNC 2 /* Perform synchronous transaction commits */
449 #define JFS_ABORT 4 /* Journaling has been aborted for errors. */
450 #define JFS_ACK_ERR 8 /* The errno in the sb has been acked */
453 * Journaling internal variables/parameters
456 extern int journal_flush_nr_buffers;
460 * Function declarations for the journaling transaction and buffer
465 extern void journal_unfile_buffer(struct buffer_head *);
466 extern void journal_refile_buffer(struct buffer_head *);
467 extern void journal_file_buffer(struct buffer_head *, transaction_t *, int);
468 extern void journal_clean_data_list(transaction_t *transaction);
470 /* Log buffer allocation */
471 extern struct buffer_head * journal_get_descriptor_buffer(journal_t *);
472 extern unsigned long journal_next_log_block(journal_t *);
474 /* Commit management */
475 extern void journal_commit_transaction(journal_t *);
477 /* Checkpoint list management */
478 extern void journal_remove_checkpoint(struct buffer_head *);
479 extern void journal_insert_checkpoint(struct buffer_head *, transaction_t *);
483 journal_write_metadata_buffer(transaction_t *transaction,
484 struct buffer_head *bh_in,
485 struct buffer_head **bh_out,
488 /* Create and destroy transactions */
489 extern transaction_t * get_transaction (journal_t *);
490 extern void put_transaction (transaction_t *);
492 /* Notify state transitions (called by the log writer thread): */
493 extern int set_transaction_state (transaction_t *, int);
496 /* Transaction locking */
497 extern void __wait_on_journal (journal_t *);
499 /* Journal locking. In 2.2, we assume that the kernel lock is already
501 static inline void lock_journal (journal_t * journal)
504 J_ASSERT(current->lock_depth >= 0);
506 if (journal->j_locked)
507 __wait_on_journal(journal);
508 journal->j_locked = 1;
511 static inline int try_lock_journal (journal_t * journal)
513 if (journal->j_locked)
515 journal->j_locked = 1;
519 static inline void unlock_journal (journal_t * journal)
521 J_ASSERT (journal->j_locked);
522 journal->j_locked = 0;
523 wake_up(&journal->j_wait_lock);
526 /* This function is gross, but unfortunately we need it as long as
527 * existing filesystems want to guard against races by testing
528 * bh->b_count. @@@ Remove this? We no longer abuse b_count so badly!
531 static inline int journal_is_buffer_shared(struct buffer_head *bh)
533 int count = bh->b_count;
534 J_ASSERT (count >= 1);
538 /* The journaling code user interface:
540 * Create and destroy handles
541 * Register buffer modifications against the current transaction.
544 extern handle_t *journal_start (journal_t *, int nblocks);
545 extern int journal_restart (handle_t *, int nblocks);
546 extern int journal_extend (handle_t *, int nblocks);
547 extern int journal_get_write_access (handle_t *, struct buffer_head *);
548 extern int journal_get_create_access (handle_t *, struct buffer_head *);
549 extern int journal_get_undo_access (handle_t *, struct buffer_head *);
550 extern int journal_dirty_data (handle_t *, struct buffer_head *);
551 extern int journal_dirty_metadata (handle_t *, struct buffer_head *);
552 extern void journal_release_buffer (handle_t *, struct buffer_head *);
553 extern void journal_forget (handle_t *, struct buffer_head *);
554 extern void journal_sync_buffer (struct buffer_head *);
555 extern int journal_stop (handle_t *);
556 extern int journal_flush (journal_t *);
558 extern journal_t * journal_init_dev (kdev_t, int start, int len, int bsize);
559 extern journal_t * journal_init_inode (struct inode *);
560 extern int journal_update_format (journal_t *);
561 extern int journal_check_used_features
562 (journal_t *, unsigned long, unsigned long, unsigned long);
563 extern int journal_check_available_features
564 (journal_t *, unsigned long, unsigned long, unsigned long);
565 extern int journal_set_features
566 (journal_t *, unsigned long, unsigned long, unsigned long);
567 extern int journal_create (journal_t *);
568 extern int journal_load (journal_t *);
569 extern void journal_release (journal_t *);
570 extern int journal_recover (journal_t *);
571 extern void journal_update_superblock (journal_t *, int);
572 extern void __journal_abort (journal_t *);
573 extern void journal_abort (journal_t *, int);
574 extern int journal_errno (journal_t *);
575 extern void journal_ack_err (journal_t *);
576 extern int journal_clear_err (journal_t *);
578 /* Primary revoke support */
579 #define JOURNAL_REVOKE_DEFAULT_HASH 256
580 extern int journal_init_revoke(journal_t *, int);
581 extern void journal_destroy_revoke(journal_t *);
582 extern int journal_revoke (handle_t *, unsigned long, struct buffer_head *);
583 extern void journal_cancel_revoke(handle_t *, struct buffer_head *);
584 extern void journal_write_revoke_records(journal_t *, transaction_t *);
586 /* Recovery revoke support */
587 extern int journal_set_revoke(journal_t *, unsigned long, tid_t);
588 extern int journal_test_revoke(journal_t *, unsigned long, tid_t);
589 extern void journal_clear_revoke(journal_t *);
592 /* The log thread user interface:
594 * Request space in the current transaction, and force transaction commit
595 * transitions on demand.
598 extern int log_space_left (journal_t *); /* Called with journal locked */
599 extern void log_start_commit (journal_t *, transaction_t *);
600 extern void log_wait_commit (journal_t *, tid_t);
601 extern int log_do_checkpoint (journal_t *, int);
603 extern void log_wait_for_space(journal_t *, int nblocks);
604 extern void journal_drop_transaction(journal_t *, transaction_t *);
605 extern int cleanup_journal_tail(journal_t *);
608 /* Debugging code only: */
610 #define jfs_ENOSYS() \
612 printk (KERN_ERR "JFS unimplemented function " __FUNCTION__); \
613 current->state = TASK_UNINTERRUPTIBLE; \
620 * Simple test wrapper function to test the JFS_ABORT state flag. This
621 * bit, when set, indicates that we have had a fatal error somewhere,
622 * either inside the journaling layer or indicated to us by the client
623 * (eg. ext3), and that we and should not commit any further
627 static inline int is_journal_abort(journal_t *journal)
629 return journal->j_flags & JFS_ABORT;
632 #endif /* __KERNEL__ */
634 /* Comparison functions for transaction IDs: perform comparisons using
635 * modulo arithmetic so that they work over sequence number wraps. */
637 static inline int tid_ge(tid_t x, tid_t y)
639 int difference = (x - y);
640 return (difference > 0);
643 static inline int tid_geq(tid_t x, tid_t y)
645 int difference = (x - y);
646 return (difference >= 0);
650 #endif /* _LINUX_JFS_H */