From 31b78903c40cd74874e3db24762e0ace3f7f0d5d Mon Sep 17 00:00:00 2001 From: wangdi Date: Wed, 16 Apr 2003 12:36:05 +0000 Subject: [PATCH] fix a flaw of smbfs/inode.c --- .../kernel_patches/patches/invalidate_show.patch | 8217 +++++++++++++++++++- 1 file changed, 8137 insertions(+), 80 deletions(-) diff --git a/lustre/kernel_patches/patches/invalidate_show.patch b/lustre/kernel_patches/patches/invalidate_show.patch index f4612fa..9f044a5 100644 --- a/lustre/kernel_patches/patches/invalidate_show.patch +++ b/lustre/kernel_patches/patches/invalidate_show.patch @@ -1,103 +1,8160 @@ - fs/inode.c | 21 ++++++++++++++------- - fs/super.c | 4 ++-- - include/linux/fs.h | 2 +- - 3 files changed, 17 insertions(+), 10 deletions(-) + fs/block_dev.c | 695 ++++++++++ + fs/devfs/base.c | 3544 +++++++++++++++++++++++++++++++++++++++++++++++++++++ + fs/exec.c | 2 + fs/inode.c | 1247 ++++++++++++++++++ + fs/super.c | 926 +++++++++++++ + include/linux/fs.h | 1709 +++++++++++++++++++++++++ + 6 files changed, 8122 insertions(+), 1 deletion(-) ---- linux-2.4.20/fs/inode.c~invalidate_show 2003-04-08 23:34:36.000000000 -0600 -+++ linux-2.4.20-braam/fs/inode.c 2003-04-08 23:34:36.000000000 -0600 -@@ -553,7 +553,8 @@ static void dispose_list(struct list_hea - /* - * Invalidate all inodes for a device. - */ --static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.19-hp3_pnnl1-root/fs/inode.c 2003-04-15 13:21:57.000000000 +0800 +@@ -0,0 +1,1247 @@ ++/* ++ * linux/fs/inode.c ++ * ++ * (C) 1997 Linus Torvalds ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * New inode.c implementation. ++ * ++ * This implementation has the basic premise of trying ++ * to be extremely low-overhead and SMP-safe, yet be ++ * simple enough to be "obviously correct". ++ * ++ * Famous last words. ++ */ ++ ++/* inode dynamic allocation 1999, Andrea Arcangeli */ ++ ++/* #define INODE_PARANOIA 1 */ ++/* #define INODE_DEBUG 1 */ ++ ++/* ++ * Inode lookup is no longer as critical as it used to be: ++ * most of the lookups are going to be through the dcache. ++ */ ++#define I_HASHBITS i_hash_shift ++#define I_HASHMASK i_hash_mask ++ ++static unsigned int i_hash_mask; ++static unsigned int i_hash_shift; ++ ++/* ++ * Each inode can be on two separate lists. One is ++ * the hash list of the inode, used for lookups. The ++ * other linked list is the "type" list: ++ * "in_use" - valid inode, i_count > 0, i_nlink > 0 ++ * "dirty" - as "in_use" but also dirty ++ * "unused" - valid inode, i_count = 0 ++ * ++ * A "dirty" list is maintained for each super block, ++ * allowing for low-overhead inode sync() operations. ++ */ ++ ++static LIST_HEAD(inode_in_use); ++static LIST_HEAD(inode_unused); ++static struct list_head *inode_hashtable; ++static LIST_HEAD(anon_hash_chain); /* for inodes with NULL i_sb */ ++ ++/* ++ * A simple spinlock to protect the list manipulations. ++ * ++ * NOTE! You also have to own the lock if you change ++ * the i_state of an inode while it is in use.. ++ */ ++static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; ++ ++/* ++ * Statistics gathering.. ++ */ ++struct inodes_stat_t inodes_stat; ++ ++static kmem_cache_t * inode_cachep; ++ ++static struct inode *alloc_inode(struct super_block *sb) ++{ ++ static struct address_space_operations empty_aops; ++ static struct inode_operations empty_iops; ++ static struct file_operations empty_fops; ++ struct inode *inode; ++ ++ if (sb->s_op->alloc_inode) ++ inode = sb->s_op->alloc_inode(sb); ++ else { ++ inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL); ++ /* will die */ ++ if (inode) ++ memset(&inode->u, 0, sizeof(inode->u)); ++ } ++ ++ if (inode) { ++ struct address_space * const mapping = &inode->i_data; ++ ++ inode->i_sb = sb; ++ inode->i_dev = sb->s_dev; ++ inode->i_blkbits = sb->s_blocksize_bits; ++ inode->i_flags = 0; ++ atomic_set(&inode->i_count, 1); ++ inode->i_sock = 0; ++ inode->i_op = &empty_iops; ++ inode->i_fop = &empty_fops; ++ inode->i_nlink = 1; ++ atomic_set(&inode->i_writecount, 0); ++ inode->i_size = 0; ++ inode->i_blocks = 0; ++ inode->i_bytes = 0; ++ inode->i_generation = 0; ++ memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); ++ inode->i_pipe = NULL; ++ inode->i_bdev = NULL; ++ inode->i_cdev = NULL; ++ ++ mapping->a_ops = &empty_aops; ++ mapping->host = inode; ++ mapping->gfp_mask = GFP_HIGHUSER; ++ inode->i_mapping = mapping; ++ } ++ return inode; ++} ++ ++static void destroy_inode(struct inode *inode) ++{ ++ if (inode_has_buffers(inode)) ++ BUG(); ++ if (inode->i_sb->s_op->destroy_inode) ++ inode->i_sb->s_op->destroy_inode(inode); ++ else ++ kmem_cache_free(inode_cachep, inode); ++} ++ ++ ++/* ++ * These are initializations that only need to be done ++ * once, because the fields are idempotent across use ++ * of the inode, so let the slab aware of that. ++ */ ++void inode_init_once(struct inode *inode) ++{ ++ memset(inode, 0, sizeof(*inode)); ++ init_waitqueue_head(&inode->i_wait); ++ INIT_LIST_HEAD(&inode->i_hash); ++ INIT_LIST_HEAD(&inode->i_data.clean_pages); ++ INIT_LIST_HEAD(&inode->i_data.dirty_pages); ++ INIT_LIST_HEAD(&inode->i_data.locked_pages); ++ INIT_LIST_HEAD(&inode->i_dentry); ++ INIT_LIST_HEAD(&inode->i_dirty_buffers); ++ INIT_LIST_HEAD(&inode->i_dirty_data_buffers); ++ INIT_LIST_HEAD(&inode->i_devices); ++ sema_init(&inode->i_sem, 1); ++ sema_init(&inode->i_zombie, 1); ++ spin_lock_init(&inode->i_data.i_shared_lock); ++} ++ ++static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) ++{ ++ struct inode * inode = (struct inode *) foo; ++ ++ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == ++ SLAB_CTOR_CONSTRUCTOR) ++ inode_init_once(inode); ++} ++ ++/* ++ * Put the inode on the super block's dirty list. ++ * ++ * CAREFUL! We mark it dirty unconditionally, but ++ * move it onto the dirty list only if it is hashed. ++ * If it was not hashed, it will never be added to ++ * the dirty list even if it is later hashed, as it ++ * will have been marked dirty already. ++ * ++ * In short, make sure you hash any inodes _before_ ++ * you start marking them dirty.. ++ */ ++ ++/** ++ * __mark_inode_dirty - internal function ++ * @inode: inode to mark ++ * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) ++ * Mark an inode as dirty. Callers should use mark_inode_dirty or ++ * mark_inode_dirty_sync. ++ */ ++ ++void __mark_inode_dirty(struct inode *inode, int flags) ++{ ++ struct super_block * sb = inode->i_sb; ++ ++ if (!sb) ++ return; ++ ++ /* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */ ++ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { ++ if (sb->s_op && sb->s_op->dirty_inode) ++ sb->s_op->dirty_inode(inode); ++ } ++ ++ /* avoid the locking if we can */ ++ if ((inode->i_state & flags) == flags) ++ return; ++ ++ spin_lock(&inode_lock); ++ if ((inode->i_state & flags) != flags) { ++ inode->i_state |= flags; ++ /* Only add valid (ie hashed) inodes to the dirty list */ ++ if (!(inode->i_state & I_LOCK) && !list_empty(&inode->i_hash)) { ++ list_del(&inode->i_list); ++ list_add(&inode->i_list, &sb->s_dirty); ++ } ++ } ++ spin_unlock(&inode_lock); ++} ++ ++static void __wait_on_inode(struct inode * inode) ++{ ++ DECLARE_WAITQUEUE(wait, current); ++ ++ add_wait_queue(&inode->i_wait, &wait); ++repeat: ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ if (inode->i_state & I_LOCK) { ++ schedule(); ++ goto repeat; ++ } ++ remove_wait_queue(&inode->i_wait, &wait); ++ current->state = TASK_RUNNING; ++} ++ ++static inline void wait_on_inode(struct inode *inode) ++{ ++ if (inode->i_state & I_LOCK) ++ __wait_on_inode(inode); ++} ++ ++ ++static inline void write_inode(struct inode *inode, int sync) ++{ ++ if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) ++ inode->i_sb->s_op->write_inode(inode, sync); ++} ++ ++static inline void __iget(struct inode * inode) ++{ ++ if (atomic_read(&inode->i_count)) { ++ atomic_inc(&inode->i_count); ++ return; ++ } ++ atomic_inc(&inode->i_count); ++ if (!(inode->i_state & (I_DIRTY|I_LOCK))) { ++ list_del(&inode->i_list); ++ list_add(&inode->i_list, &inode_in_use); ++ } ++ inodes_stat.nr_unused--; ++} ++ ++static inline void __sync_one(struct inode *inode, int sync) ++{ ++ unsigned dirty; ++ ++ list_del(&inode->i_list); ++ list_add(&inode->i_list, &inode->i_sb->s_locked_inodes); ++ ++ if (inode->i_state & I_LOCK) ++ BUG(); ++ ++ /* Set I_LOCK, reset I_DIRTY */ ++ dirty = inode->i_state & I_DIRTY; ++ inode->i_state |= I_LOCK; ++ inode->i_state &= ~I_DIRTY; ++ spin_unlock(&inode_lock); ++ ++ filemap_fdatasync(inode->i_mapping); ++ ++ /* Don't write the inode if only I_DIRTY_PAGES was set */ ++ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ++ write_inode(inode, sync); ++ ++ filemap_fdatawait(inode->i_mapping); ++ ++ spin_lock(&inode_lock); ++ inode->i_state &= ~I_LOCK; ++ if (!(inode->i_state & I_FREEING)) { ++ struct list_head *to; ++ if (inode->i_state & I_DIRTY) ++ to = &inode->i_sb->s_dirty; ++ else if (atomic_read(&inode->i_count)) ++ to = &inode_in_use; ++ else ++ to = &inode_unused; ++ list_del(&inode->i_list); ++ list_add(&inode->i_list, to); ++ } ++ wake_up(&inode->i_wait); ++} ++ ++static inline void sync_one(struct inode *inode, int sync) ++{ ++ while (inode->i_state & I_LOCK) { ++ __iget(inode); ++ spin_unlock(&inode_lock); ++ __wait_on_inode(inode); ++ iput(inode); ++ spin_lock(&inode_lock); ++ } ++ ++ __sync_one(inode, sync); ++} ++ ++static inline void sync_list(struct list_head *head) ++{ ++ struct list_head * tmp; ++ ++ while ((tmp = head->prev) != head) ++ __sync_one(list_entry(tmp, struct inode, i_list), 0); ++} ++ ++static inline void wait_on_locked(struct list_head *head) ++{ ++ struct list_head * tmp; ++ while ((tmp = head->prev) != head) { ++ struct inode *inode = list_entry(tmp, struct inode, i_list); ++ __iget(inode); ++ spin_unlock(&inode_lock); ++ __wait_on_inode(inode); ++ iput(inode); ++ spin_lock(&inode_lock); ++ } ++} ++ ++static inline int try_to_sync_unused_list(struct list_head *head, int nr_inodes) ++{ ++ struct list_head *tmp = head; ++ struct inode *inode; ++ ++ while (nr_inodes && (tmp = tmp->prev) != head) { ++ inode = list_entry(tmp, struct inode, i_list); ++ ++ if (!atomic_read(&inode->i_count)) { ++ __sync_one(inode, 0); ++ nr_inodes--; ++ ++ /* ++ * __sync_one moved the inode to another list, ++ * so we have to start looking from the list head. ++ */ ++ tmp = head; ++ } ++ } ++ ++ return nr_inodes; ++} ++ ++void sync_inodes_sb(struct super_block *sb) ++{ ++ spin_lock(&inode_lock); ++ while (!list_empty(&sb->s_dirty)||!list_empty(&sb->s_locked_inodes)) { ++ sync_list(&sb->s_dirty); ++ wait_on_locked(&sb->s_locked_inodes); ++ } ++ spin_unlock(&inode_lock); ++} ++ ++/* ++ * Note: ++ * We don't need to grab a reference to superblock here. If it has non-empty ++ * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed ++ * past sync_inodes_sb() until both ->s_dirty and ->s_locked_inodes are ++ * empty. Since __sync_one() regains inode_lock before it finally moves ++ * inode from superblock lists we are OK. ++ */ ++ ++void sync_unlocked_inodes(void) ++{ ++ struct super_block * sb; ++ spin_lock(&inode_lock); ++ spin_lock(&sb_lock); ++ sb = sb_entry(super_blocks.next); ++ for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) { ++ if (!list_empty(&sb->s_dirty)) { ++ spin_unlock(&sb_lock); ++ sync_list(&sb->s_dirty); ++ spin_lock(&sb_lock); ++ } ++ } ++ spin_unlock(&sb_lock); ++ spin_unlock(&inode_lock); ++} ++ ++/* ++ * Find a superblock with inodes that need to be synced ++ */ ++ ++static struct super_block *get_super_to_sync(void) ++{ ++ struct list_head *p; ++restart: ++ spin_lock(&inode_lock); ++ spin_lock(&sb_lock); ++ list_for_each(p, &super_blocks) { ++ struct super_block *s = list_entry(p,struct super_block,s_list); ++ if (list_empty(&s->s_dirty) && list_empty(&s->s_locked_inodes)) ++ continue; ++ s->s_count++; ++ spin_unlock(&sb_lock); ++ spin_unlock(&inode_lock); ++ down_read(&s->s_umount); ++ if (!s->s_root) { ++ drop_super(s); ++ goto restart; ++ } ++ return s; ++ } ++ spin_unlock(&sb_lock); ++ spin_unlock(&inode_lock); ++ return NULL; ++} ++ ++/** ++ * sync_inodes ++ * @dev: device to sync the inodes from. ++ * ++ * sync_inodes goes through the super block's dirty list, ++ * writes them out, and puts them back on the normal list. ++ */ ++ ++void sync_inodes(kdev_t dev) ++{ ++ struct super_block * s; ++ ++ /* ++ * Search the super_blocks array for the device(s) to sync. ++ */ ++ if (dev) { ++ if ((s = get_super(dev)) != NULL) { ++ sync_inodes_sb(s); ++ drop_super(s); ++ } ++ } else { ++ while ((s = get_super_to_sync()) != NULL) { ++ sync_inodes_sb(s); ++ drop_super(s); ++ } ++ } ++} ++ ++static void try_to_sync_unused_inodes(void * arg) ++{ ++ struct super_block * sb; ++ int nr_inodes = inodes_stat.nr_unused; ++ ++ spin_lock(&inode_lock); ++ spin_lock(&sb_lock); ++ sb = sb_entry(super_blocks.next); ++ for (; nr_inodes && sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) { ++ if (list_empty(&sb->s_dirty)) ++ continue; ++ spin_unlock(&sb_lock); ++ nr_inodes = try_to_sync_unused_list(&sb->s_dirty, nr_inodes); ++ spin_lock(&sb_lock); ++ } ++ spin_unlock(&sb_lock); ++ spin_unlock(&inode_lock); ++} ++ ++static struct tq_struct unused_inodes_flush_task; ++ ++/** ++ * write_inode_now - write an inode to disk ++ * @inode: inode to write to disk ++ * @sync: whether the write should be synchronous or not ++ * ++ * This function commits an inode to disk immediately if it is ++ * dirty. This is primarily needed by knfsd. ++ */ ++ ++void write_inode_now(struct inode *inode, int sync) ++{ ++ struct super_block * sb = inode->i_sb; ++ ++ if (sb) { ++ spin_lock(&inode_lock); ++ while (inode->i_state & I_DIRTY) ++ sync_one(inode, sync); ++ spin_unlock(&inode_lock); ++ if (sync) ++ wait_on_inode(inode); ++ } ++ else ++ printk(KERN_ERR "write_inode_now: no super block\n"); ++} ++ ++/** ++ * generic_osync_inode - flush all dirty data for a given inode to disk ++ * @inode: inode to write ++ * @datasync: if set, don't bother flushing timestamps ++ * ++ * This can be called by file_write functions for files which have the ++ * O_SYNC flag set, to flush dirty writes to disk. ++ */ ++ ++int generic_osync_inode(struct inode *inode, int what) ++{ ++ int err = 0, err2 = 0, need_write_inode_now = 0; ++ ++ /* ++ * WARNING ++ * ++ * Currently, the filesystem write path does not pass the ++ * filp down to the low-level write functions. Therefore it ++ * is impossible for (say) __block_commit_write to know if ++ * the operation is O_SYNC or not. ++ * ++ * Ideally, O_SYNC writes would have the filesystem call ++ * ll_rw_block as it went to kick-start the writes, and we ++ * could call osync_inode_buffers() here to wait only for ++ * those IOs which have already been submitted to the device ++ * driver layer. As it stands, if we did this we'd not write ++ * anything to disk since our writes have not been queued by ++ * this point: they are still on the dirty LRU. ++ * ++ * So, currently we will call fsync_inode_buffers() instead, ++ * to flush _all_ dirty buffers for this inode to disk on ++ * every O_SYNC write, not just the synchronous I/Os. --sct ++ */ ++ ++ if (what & OSYNC_METADATA) ++ err = fsync_inode_buffers(inode); ++ if (what & OSYNC_DATA) ++ err2 = fsync_inode_data_buffers(inode); ++ if (!err) ++ err = err2; ++ ++ spin_lock(&inode_lock); ++ if ((inode->i_state & I_DIRTY) && ++ ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC))) ++ need_write_inode_now = 1; ++ spin_unlock(&inode_lock); ++ ++ if (need_write_inode_now) ++ write_inode_now(inode, 1); ++ else ++ wait_on_inode(inode); ++ ++ return err; ++} ++ ++/** ++ * clear_inode - clear an inode ++ * @inode: inode to clear ++ * ++ * This is called by the filesystem to tell us ++ * that the inode is no longer useful. We just ++ * terminate it with extreme prejudice. ++ */ ++ ++void clear_inode(struct inode *inode) ++{ ++ invalidate_inode_buffers(inode); ++ ++ if (inode->i_data.nrpages) ++ BUG(); ++ if (!(inode->i_state & I_FREEING)) ++ BUG(); ++ if (inode->i_state & I_CLEAR) ++ BUG(); ++ wait_on_inode(inode); ++ DQUOT_DROP(inode); ++ if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode) ++ inode->i_sb->s_op->clear_inode(inode); ++ if (inode->i_bdev) ++ bd_forget(inode); ++ else if (inode->i_cdev) { ++ cdput(inode->i_cdev); ++ inode->i_cdev = NULL; ++ } ++ inode->i_state = I_CLEAR; ++} ++ ++/* ++ * Dispose-list gets a local list with local inodes in it, so it doesn't ++ * need to worry about list corruption and SMP locks. ++ */ ++static void dispose_list(struct list_head * head) ++{ ++ struct list_head * inode_entry; ++ struct inode * inode; ++ ++ while ((inode_entry = head->next) != head) ++ { ++ list_del(inode_entry); ++ ++ inode = list_entry(inode_entry, struct inode, i_list); ++ if (inode->i_data.nrpages) ++ truncate_inode_pages(&inode->i_data, 0); ++ clear_inode(inode); ++ destroy_inode(inode); ++ inodes_stat.nr_inodes--; ++ } ++} ++ ++/* ++ * Invalidate all inodes for a device. ++ */ +static int invalidate_list(struct list_head *head, struct super_block * sb, + struct list_head * dispose, int show) - { - struct list_head *next; - int busy = 0, count = 0; -@@ -578,6 +579,11 @@ static int invalidate_list(struct list_h - count++; - continue; - } ++{ ++ struct list_head *next; ++ int busy = 0, count = 0; ++ ++ next = head->next; ++ for (;;) { ++ struct list_head * tmp = next; ++ struct inode * inode; ++ ++ next = next->next; ++ if (tmp == head) ++ break; ++ inode = list_entry(tmp, struct inode, i_list); ++ if (inode->i_sb != sb) ++ continue; ++ invalidate_inode_buffers(inode); ++ if (!atomic_read(&inode->i_count)) { ++ list_del_init(&inode->i_hash); ++ list_del(&inode->i_list); ++ list_add(&inode->i_list, dispose); ++ inode->i_state |= I_FREEING; ++ count++; ++ continue; ++ } + if (show) + printk(KERN_ERR + "inode busy: dev %s:%lu (%p) mode %o count %u\n", + kdevname(sb->s_dev), inode->i_ino, inode, + inode->i_mode, atomic_read(&inode->i_count)); - busy = 1; - } - /* only unused inodes may be cached with i_count zero */ -@@ -596,22 +602,23 @@ static int invalidate_list(struct list_h - /** - * invalidate_inodes - discard the inodes on a device - * @sb: superblock ++ busy = 1; ++ } ++ /* only unused inodes may be cached with i_count zero */ ++ inodes_stat.nr_unused -= count; ++ return busy; ++} ++ ++/* ++ * This is a two-stage process. First we collect all ++ * offending inodes onto the throw-away list, and in ++ * the second stage we actually dispose of them. This ++ * is because we don't want to sleep while messing ++ * with the global lists.. ++ */ ++ ++/** ++ * invalidate_inodes - discard the inodes on a device ++ * @sb: superblock + * @show: whether we should display any busy inodes found - * - * Discard all of the inodes for a given superblock. If the discard - * fails because there are busy inodes then a non zero value is returned. - * If the discard is successful all the inodes have been discarded. - */ - --int invalidate_inodes(struct super_block * sb) ++ * ++ * Discard all of the inodes for a given superblock. If the discard ++ * fails because there are busy inodes then a non zero value is returned. ++ * If the discard is successful all the inodes have been discarded. ++ */ ++ +int invalidate_inodes(struct super_block * sb, int show) - { - int busy; - LIST_HEAD(throw_away); - - spin_lock(&inode_lock); -- busy = invalidate_list(&inode_in_use, sb, &throw_away); -- busy |= invalidate_list(&inode_unused, sb, &throw_away); -- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); -- busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away); ++{ ++ int busy; ++ LIST_HEAD(throw_away); ++ ++ spin_lock(&inode_lock); + busy = invalidate_list(&inode_in_use, sb, &throw_away, show); + busy |= invalidate_list(&inode_unused, sb, &throw_away, show); + busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show); + busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show); - spin_unlock(&inode_lock); - - dispose_list(&throw_away); -@@ -637,7 +644,7 @@ int invalidate_device(kdev_t dev, int do - * hold). - */ - shrink_dcache_sb(sb); -- res = invalidate_inodes(sb); ++ spin_unlock(&inode_lock); ++ ++ dispose_list(&throw_away); ++ ++ return busy; ++} ++ ++int invalidate_device(kdev_t dev, int do_sync) ++{ ++ struct super_block *sb; ++ int res; ++ ++ if (do_sync) ++ fsync_dev(dev); ++ ++ res = 0; ++ sb = get_super(dev); ++ if (sb) { ++ /* ++ * no need to lock the super, get_super holds the ++ * read semaphore so the filesystem cannot go away ++ * under us (->put_super runs with the write lock ++ * hold). ++ */ ++ shrink_dcache_sb(sb); + res = invalidate_inodes(sb, 0); - drop_super(sb); - } - invalidate_buffers(dev); ---- linux-2.4.20/fs/super.c~invalidate_show 2003-04-08 23:34:36.000000000 -0600 -+++ linux-2.4.20-braam/fs/super.c 2003-04-08 23:34:36.000000000 -0600 -@@ -835,7 +835,7 @@ void kill_super(struct super_block *sb) - lock_super(sb); - lock_kernel(); - sb->s_flags &= ~MS_ACTIVE; -- invalidate_inodes(sb); /* bad name - it should be evict_inodes() */ ++ drop_super(sb); ++ } ++ invalidate_buffers(dev); ++ return res; ++} ++ ++ ++/* ++ * This is called with the inode lock held. It searches ++ * the in-use for freeable inodes, which are moved to a ++ * temporary list and then placed on the unused list by ++ * dispose_list. ++ * ++ * We don't expect to have to call this very often. ++ * ++ * N.B. The spinlock is released during the call to ++ * dispose_list. ++ */ ++#define CAN_UNUSE(inode) \ ++ ((((inode)->i_state | (inode)->i_data.nrpages) == 0) && \ ++ !inode_has_buffers(inode)) ++#define INODE(entry) (list_entry(entry, struct inode, i_list)) ++ ++void prune_icache(int goal) ++{ ++ LIST_HEAD(list); ++ struct list_head *entry, *freeable = &list; ++ int count; ++ struct inode * inode; ++ ++ spin_lock(&inode_lock); ++ ++ count = 0; ++ entry = inode_unused.prev; ++ while (entry != &inode_unused) ++ { ++ struct list_head *tmp = entry; ++ ++ entry = entry->prev; ++ inode = INODE(tmp); ++ if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK)) ++ continue; ++ if (!CAN_UNUSE(inode)) ++ continue; ++ if (atomic_read(&inode->i_count)) ++ continue; ++ list_del(tmp); ++ list_del(&inode->i_hash); ++ INIT_LIST_HEAD(&inode->i_hash); ++ list_add(tmp, freeable); ++ inode->i_state |= I_FREEING; ++ count++; ++ if (!--goal) ++ break; ++ } ++ inodes_stat.nr_unused -= count; ++ spin_unlock(&inode_lock); ++ ++ dispose_list(freeable); ++ ++ /* ++ * If we didn't freed enough clean inodes schedule ++ * a sync of the dirty inodes, we cannot do it ++ * from here or we're either synchronously dogslow ++ * or we deadlock with oom. ++ */ ++ if (goal) ++ schedule_task(&unused_inodes_flush_task); ++} ++ ++int shrink_icache_memory(int priority, int gfp_mask) ++{ ++ int count = 0; ++ ++ /* ++ * Nasty deadlock avoidance.. ++ * ++ * We may hold various FS locks, and we don't ++ * want to recurse into the FS that called us ++ * in clear_inode() and friends.. ++ */ ++ if (!(gfp_mask & __GFP_FS)) ++ return 0; ++ ++ count = inodes_stat.nr_unused / priority; ++ ++ prune_icache(count); ++ return kmem_cache_shrink(inode_cachep); ++} ++ ++/* ++ * Called with the inode lock held. ++ * NOTE: we are not increasing the inode-refcount, you must call __iget() ++ * by hand after calling find_inode now! This simplifies iunique and won't ++ * add any additional branch in the common code. ++ */ ++static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque) ++{ ++ struct list_head *tmp; ++ struct inode * inode; ++ ++ tmp = head; ++ for (;;) { ++ tmp = tmp->next; ++ inode = NULL; ++ if (tmp == head) ++ break; ++ inode = list_entry(tmp, struct inode, i_hash); ++ if (inode->i_ino != ino) ++ continue; ++ if (inode->i_sb != sb) ++ continue; ++ if (find_actor && !find_actor(inode, ino, opaque)) ++ continue; ++ break; ++ } ++ return inode; ++} ++ ++/** ++ * new_inode - obtain an inode ++ * @sb: superblock ++ * ++ * Allocates a new inode for given superblock. ++ */ ++ ++struct inode * new_inode(struct super_block *sb) ++{ ++ static unsigned long last_ino; ++ struct inode * inode; ++ ++ spin_lock_prefetch(&inode_lock); ++ ++ inode = alloc_inode(sb); ++ if (inode) { ++ spin_lock(&inode_lock); ++ inodes_stat.nr_inodes++; ++ list_add(&inode->i_list, &inode_in_use); ++ inode->i_ino = ++last_ino; ++ inode->i_state = 0; ++ spin_unlock(&inode_lock); ++ } ++ return inode; ++} ++ ++void unlock_new_inode(struct inode *inode) ++{ ++ /* ++ * This is special! We do not need the spinlock ++ * when clearing I_LOCK, because we're guaranteed ++ * that nobody else tries to do anything about the ++ * state of the inode when it is locked, as we ++ * just created it (so there can be no old holders ++ * that haven't tested I_LOCK). ++ */ ++ inode->i_state &= ~(I_LOCK|I_NEW); ++ wake_up(&inode->i_wait); ++} ++ ++/* ++ * This is called without the inode lock held.. Be careful. ++ * ++ * We no longer cache the sb_flags in i_flags - see fs.h ++ * -- rmk@arm.uk.linux.org ++ */ ++static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque) ++{ ++ struct inode * inode; ++ ++ inode = alloc_inode(sb); ++ if (inode) { ++ struct inode * old; ++ ++ spin_lock(&inode_lock); ++ /* We released the lock, so.. */ ++ old = find_inode(sb, ino, head, find_actor, opaque); ++ if (!old) { ++ inodes_stat.nr_inodes++; ++ list_add(&inode->i_list, &inode_in_use); ++ list_add(&inode->i_hash, head); ++ inode->i_ino = ino; ++ inode->i_state = I_LOCK|I_NEW; ++ spin_unlock(&inode_lock); ++ ++ /* ++ * Return the locked inode with I_NEW set, the ++ * caller is responsible for filling in the contents ++ */ ++ return inode; ++ } ++ ++ /* ++ * Uhhuh, somebody else created the same inode under ++ * us. Use the old inode instead of the one we just ++ * allocated. ++ */ ++ __iget(old); ++ spin_unlock(&inode_lock); ++ destroy_inode(inode); ++ inode = old; ++ wait_on_inode(inode); ++ } ++ return inode; ++} ++ ++static inline unsigned long hash(struct super_block *sb, unsigned long i_ino) ++{ ++ unsigned long tmp = i_ino + ((unsigned long) sb / L1_CACHE_BYTES); ++ tmp = tmp + (tmp >> I_HASHBITS); ++ return tmp & I_HASHMASK; ++} ++ ++/* Yeah, I know about quadratic hash. Maybe, later. */ ++ ++/** ++ * iunique - get a unique inode number ++ * @sb: superblock ++ * @max_reserved: highest reserved inode number ++ * ++ * Obtain an inode number that is unique on the system for a given ++ * superblock. This is used by file systems that have no natural ++ * permanent inode numbering system. An inode number is returned that ++ * is higher than the reserved limit but unique. ++ * ++ * BUGS: ++ * With a large number of inodes live on the file system this function ++ * currently becomes quite slow. ++ */ ++ ++ino_t iunique(struct super_block *sb, ino_t max_reserved) ++{ ++ static ino_t counter = 0; ++ struct inode *inode; ++ struct list_head * head; ++ ino_t res; ++ spin_lock(&inode_lock); ++retry: ++ if (counter > max_reserved) { ++ head = inode_hashtable + hash(sb,counter); ++ inode = find_inode(sb, res = counter++, head, NULL, NULL); ++ if (!inode) { ++ spin_unlock(&inode_lock); ++ return res; ++ } ++ } else { ++ counter = max_reserved + 1; ++ } ++ goto retry; ++ ++} ++ ++struct inode *igrab(struct inode *inode) ++{ ++ spin_lock(&inode_lock); ++ if (!(inode->i_state & I_FREEING)) ++ __iget(inode); ++ else ++ /* ++ * Handle the case where s_op->clear_inode is not been ++ * called yet, and somebody is calling igrab ++ * while the inode is getting freed. ++ */ ++ inode = NULL; ++ spin_unlock(&inode_lock); ++ return inode; ++} ++ ++struct inode *iget4_locked(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) ++{ ++ struct list_head * head = inode_hashtable + hash(sb,ino); ++ struct inode * inode; ++ ++ spin_lock(&inode_lock); ++ inode = find_inode(sb, ino, head, find_actor, opaque); ++ if (inode) { ++ __iget(inode); ++ spin_unlock(&inode_lock); ++ wait_on_inode(inode); ++ return inode; ++ } ++ spin_unlock(&inode_lock); ++ ++ /* ++ * get_new_inode() will do the right thing, re-trying the search ++ * in case it had to block at any point. ++ */ ++ return get_new_inode(sb, ino, head, find_actor, opaque); ++} ++ ++/** ++ * insert_inode_hash - hash an inode ++ * @inode: unhashed inode ++ * ++ * Add an inode to the inode hash for this superblock. If the inode ++ * has no superblock it is added to a separate anonymous chain. ++ */ ++ ++void insert_inode_hash(struct inode *inode) ++{ ++ struct list_head *head = &anon_hash_chain; ++ if (inode->i_sb) ++ head = inode_hashtable + hash(inode->i_sb, inode->i_ino); ++ spin_lock(&inode_lock); ++ list_add(&inode->i_hash, head); ++ spin_unlock(&inode_lock); ++} ++ ++/** ++ * remove_inode_hash - remove an inode from the hash ++ * @inode: inode to unhash ++ * ++ * Remove an inode from the superblock or anonymous hash. ++ */ ++ ++void remove_inode_hash(struct inode *inode) ++{ ++ spin_lock(&inode_lock); ++ list_del(&inode->i_hash); ++ INIT_LIST_HEAD(&inode->i_hash); ++ spin_unlock(&inode_lock); ++} ++ ++/** ++ * iput - put an inode ++ * @inode: inode to put ++ * ++ * Puts an inode, dropping its usage count. If the inode use count hits ++ * zero the inode is also then freed and may be destroyed. ++ */ ++ ++void iput(struct inode *inode) ++{ ++ if (inode) { ++ struct super_block *sb = inode->i_sb; ++ struct super_operations *op = NULL; ++ ++ if (inode->i_state == I_CLEAR) ++ BUG(); ++ ++ if (sb && sb->s_op) ++ op = sb->s_op; ++ if (op && op->put_inode) ++ op->put_inode(inode); ++ ++ if (!atomic_dec_and_lock(&inode->i_count, &inode_lock)) ++ return; ++ ++ if (!inode->i_nlink) { ++ list_del(&inode->i_hash); ++ INIT_LIST_HEAD(&inode->i_hash); ++ list_del(&inode->i_list); ++ INIT_LIST_HEAD(&inode->i_list); ++ inode->i_state|=I_FREEING; ++ inodes_stat.nr_inodes--; ++ spin_unlock(&inode_lock); ++ ++ if (inode->i_data.nrpages) ++ truncate_inode_pages(&inode->i_data, 0); ++ ++ if (op && op->delete_inode) { ++ void (*delete)(struct inode *) = op->delete_inode; ++ if (!is_bad_inode(inode)) ++ DQUOT_INIT(inode); ++ /* s_op->delete_inode internally recalls clear_inode() */ ++ delete(inode); ++ } else ++ clear_inode(inode); ++ if (inode->i_state != I_CLEAR) ++ BUG(); ++ } else { ++ if (!list_empty(&inode->i_hash)) { ++ if (!(inode->i_state & (I_DIRTY|I_LOCK))) { ++ list_del(&inode->i_list); ++ list_add(&inode->i_list, &inode_unused); ++ } ++ inodes_stat.nr_unused++; ++ spin_unlock(&inode_lock); ++ if (!sb || (sb->s_flags & MS_ACTIVE)) ++ return; ++ write_inode_now(inode, 1); ++ spin_lock(&inode_lock); ++ inodes_stat.nr_unused--; ++ list_del_init(&inode->i_hash); ++ } ++ list_del_init(&inode->i_list); ++ inode->i_state|=I_FREEING; ++ inodes_stat.nr_inodes--; ++ spin_unlock(&inode_lock); ++ if (inode->i_data.nrpages) ++ truncate_inode_pages(&inode->i_data, 0); ++ clear_inode(inode); ++ } ++ destroy_inode(inode); ++ } ++} ++ ++void force_delete(struct inode *inode) ++{ ++ /* ++ * Kill off unused inodes ... iput() will unhash and ++ * delete the inode if we set i_nlink to zero. ++ */ ++ if (atomic_read(&inode->i_count) == 1) ++ inode->i_nlink = 0; ++} ++ ++/** ++ * bmap - find a block number in a file ++ * @inode: inode of file ++ * @block: block to find ++ * ++ * Returns the block number on the device holding the inode that ++ * is the disk block number for the block of the file requested. ++ * That is, asked for block 4 of inode 1 the function will return the ++ * disk block relative to the disk start that holds that block of the ++ * file. ++ */ ++ ++int bmap(struct inode * inode, int block) ++{ ++ int res = 0; ++ if (inode->i_mapping->a_ops->bmap) ++ res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); ++ return res; ++} ++ ++/* ++ * Initialize the hash tables. ++ */ ++void __init inode_init(unsigned long mempages) ++{ ++ struct list_head *head; ++ unsigned long order; ++ unsigned int nr_hash; ++ int i; ++ ++ mempages >>= (14 - PAGE_SHIFT); ++ mempages *= sizeof(struct list_head); ++ for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++) ++ ; ++ ++ do { ++ unsigned long tmp; ++ ++ nr_hash = (1UL << order) * PAGE_SIZE / ++ sizeof(struct list_head); ++ i_hash_mask = (nr_hash - 1); ++ ++ tmp = nr_hash; ++ i_hash_shift = 0; ++ while ((tmp >>= 1UL) != 0UL) ++ i_hash_shift++; ++ ++ inode_hashtable = (struct list_head *) ++ __get_free_pages(GFP_ATOMIC, order); ++ } while (inode_hashtable == NULL && --order >= 0); ++ ++ printk(KERN_INFO "Inode cache hash table entries: %d (order: %ld, %ld bytes)\n", ++ nr_hash, order, (PAGE_SIZE << order)); ++ ++ if (!inode_hashtable) ++ panic("Failed to allocate inode hash table\n"); ++ ++ head = inode_hashtable; ++ i = nr_hash; ++ do { ++ INIT_LIST_HEAD(head); ++ head++; ++ i--; ++ } while (i); ++ ++ /* inode slab cache */ ++ inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), ++ 0, SLAB_HWCACHE_ALIGN, init_once, ++ NULL); ++ if (!inode_cachep) ++ panic("cannot create inode slab cache"); ++ ++ unused_inodes_flush_task.routine = try_to_sync_unused_inodes; ++} ++ ++/** ++ * update_atime - update the access time ++ * @inode: inode accessed ++ * ++ * Update the accessed time on an inode and mark it for writeback. ++ * This function automatically handles read only file systems and media, ++ * as well as the "noatime" flag and inode specific "noatime" markers. ++ */ ++ ++void update_atime (struct inode *inode) ++{ ++ if (inode->i_atime == CURRENT_TIME) ++ return; ++ if ( IS_NOATIME (inode) ) return; ++ if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return; ++ if ( IS_RDONLY (inode) ) return; ++ inode->i_atime = CURRENT_TIME; ++ mark_inode_dirty_sync (inode); ++} /* End Function update_atime */ ++ ++ ++/* ++ * Quota functions that want to walk the inode lists.. ++ */ ++#ifdef CONFIG_QUOTA ++ ++/* Functions back in dquot.c */ ++void put_dquot_list(struct list_head *); ++int remove_inode_dquot_ref(struct inode *, int, struct list_head *); ++ ++void remove_dquot_ref(struct super_block *sb, int type) ++{ ++ struct inode *inode; ++ struct list_head *act_head; ++ LIST_HEAD(tofree_head); ++ ++ if (!sb->dq_op) ++ return; /* nothing to do */ ++ /* We have to be protected against other CPUs */ ++ lock_kernel(); /* This lock is for quota code */ ++ spin_lock(&inode_lock); /* This lock is for inodes code */ ++ ++ list_for_each(act_head, &inode_in_use) { ++ inode = list_entry(act_head, struct inode, i_list); ++ if (inode->i_sb == sb && IS_QUOTAINIT(inode)) ++ remove_inode_dquot_ref(inode, type, &tofree_head); ++ } ++ list_for_each(act_head, &inode_unused) { ++ inode = list_entry(act_head, struct inode, i_list); ++ if (inode->i_sb == sb && IS_QUOTAINIT(inode)) ++ remove_inode_dquot_ref(inode, type, &tofree_head); ++ } ++ list_for_each(act_head, &sb->s_dirty) { ++ inode = list_entry(act_head, struct inode, i_list); ++ if (IS_QUOTAINIT(inode)) ++ remove_inode_dquot_ref(inode, type, &tofree_head); ++ } ++ list_for_each(act_head, &sb->s_locked_inodes) { ++ inode = list_entry(act_head, struct inode, i_list); ++ if (IS_QUOTAINIT(inode)) ++ remove_inode_dquot_ref(inode, type, &tofree_head); ++ } ++ spin_unlock(&inode_lock); ++ unlock_kernel(); ++ ++ put_dquot_list(&tofree_head); ++} ++ ++#endif +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.19-hp3_pnnl1-root/fs/block_dev.c 2002-08-03 08:39:45.000000000 +0800 +@@ -0,0 +1,695 @@ ++/* ++ * linux/fs/block_dev.c ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ * Copyright (C) 2001 Andrea Arcangeli SuSE ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++static unsigned long max_block(kdev_t dev) ++{ ++ unsigned int retval = ~0U; ++ int major = MAJOR(dev); ++ ++ if (blk_size[major]) { ++ int minor = MINOR(dev); ++ unsigned int blocks = blk_size[major][minor]; ++ if (blocks) { ++ unsigned int size = block_size(dev); ++ unsigned int sizebits = blksize_bits(size); ++ blocks += (size-1) >> BLOCK_SIZE_BITS; ++ retval = blocks << (BLOCK_SIZE_BITS - sizebits); ++ if (sizebits > BLOCK_SIZE_BITS) ++ retval = blocks >> (sizebits - BLOCK_SIZE_BITS); ++ } ++ } ++ return retval; ++} ++ ++static loff_t blkdev_size(kdev_t dev) ++{ ++ unsigned int blocks = ~0U; ++ int major = MAJOR(dev); ++ ++ if (blk_size[major]) { ++ int minor = MINOR(dev); ++ blocks = blk_size[major][minor]; ++ } ++ return (loff_t) blocks << BLOCK_SIZE_BITS; ++} ++ ++/* Kill _all_ buffers, dirty or not.. */ ++static void kill_bdev(struct block_device *bdev) ++{ ++ invalidate_bdev(bdev, 1); ++ truncate_inode_pages(bdev->bd_inode->i_mapping, 0); ++} ++ ++int set_blocksize(kdev_t dev, int size) ++{ ++ int oldsize; ++ struct block_device *bdev; ++ ++ /* Size must be a power of two, and between 512 and PAGE_SIZE */ ++ if (size > PAGE_SIZE || size < 512 || (size & (size-1))) ++ return -EINVAL; ++ ++ /* Size cannot be smaller than the size supported by the device */ ++ if (size < get_hardsect_size(dev)) ++ return -EINVAL; ++ ++ /* No blocksize array? Implies hardcoded BLOCK_SIZE */ ++ if (!blksize_size[MAJOR(dev)]) { ++ if (size == BLOCK_SIZE) ++ return 0; ++ return -EINVAL; ++ } ++ ++ oldsize = blksize_size[MAJOR(dev)][MINOR(dev)]; ++ if (oldsize == size) ++ return 0; ++ ++ if (!oldsize && size == BLOCK_SIZE) { ++ blksize_size[MAJOR(dev)][MINOR(dev)] = size; ++ return 0; ++ } ++ ++ /* Ok, we're actually changing the blocksize.. */ ++ bdev = bdget(dev); ++ sync_buffers(dev, 2); ++ blksize_size[MAJOR(dev)][MINOR(dev)] = size; ++ bdev->bd_inode->i_blkbits = blksize_bits(size); ++ kill_bdev(bdev); ++ bdput(bdev); ++ return 0; ++} ++ ++int sb_set_blocksize(struct super_block *sb, int size) ++{ ++ int bits; ++ if (set_blocksize(sb->s_dev, size) < 0) ++ return 0; ++ sb->s_blocksize = size; ++ for (bits = 9, size >>= 9; size >>= 1; bits++) ++ ; ++ sb->s_blocksize_bits = bits; ++ return sb->s_blocksize; ++} ++ ++int sb_min_blocksize(struct super_block *sb, int size) ++{ ++ int minsize = get_hardsect_size(sb->s_dev); ++ if (size < minsize) ++ size = minsize; ++ return sb_set_blocksize(sb, size); ++} ++ ++static int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh, int create) ++{ ++ if (iblock >= max_block(inode->i_rdev)) ++ return -EIO; ++ ++ bh->b_dev = inode->i_rdev; ++ bh->b_blocknr = iblock; ++ bh->b_state |= 1UL << BH_Mapped; ++ return 0; ++} ++ ++static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) ++{ ++ return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, blkdev_get_block); ++} ++ ++static int blkdev_writepage(struct page * page) ++{ ++ return block_write_full_page(page, blkdev_get_block); ++} ++ ++static int blkdev_readpage(struct file * file, struct page * page) ++{ ++ return block_read_full_page(page, blkdev_get_block); ++} ++ ++static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) ++{ ++ return block_prepare_write(page, from, to, blkdev_get_block); ++} ++ ++static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) ++{ ++ return block_commit_write(page, from, to); ++} ++ ++/* ++ * private llseek: ++ * for a block special file file->f_dentry->d_inode->i_size is zero ++ * so we compute the size by hand (just as in block_read/write above) ++ */ ++static loff_t block_llseek(struct file *file, loff_t offset, int origin) ++{ ++ /* ewww */ ++ loff_t size = file->f_dentry->d_inode->i_bdev->bd_inode->i_size; ++ loff_t retval; ++ ++ switch (origin) { ++ case 2: ++ offset += size; ++ break; ++ case 1: ++ offset += file->f_pos; ++ } ++ retval = -EINVAL; ++ if (offset >= 0 && offset <= size) { ++ if (offset != file->f_pos) { ++ file->f_pos = offset; ++ file->f_reada = 0; ++ file->f_version = ++event; ++ } ++ retval = offset; ++ } ++ return retval; ++} ++ ++ ++static int __block_fsync(struct inode * inode) ++{ ++ int ret, err; ++ ++ ret = filemap_fdatasync(inode->i_mapping); ++ err = sync_buffers(inode->i_rdev, 1); ++ if (err && !ret) ++ ret = err; ++ err = filemap_fdatawait(inode->i_mapping); ++ if (err && !ret) ++ ret = err; ++ ++ return ret; ++} ++ ++/* ++ * Filp may be NULL when we are called by an msync of a vma ++ * since the vma has no handle. ++ */ ++ ++static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) ++{ ++ struct inode * inode = dentry->d_inode; ++ ++ return __block_fsync(inode); ++} ++ ++/* ++ * pseudo-fs ++ */ ++ ++static struct super_block *bd_read_super(struct super_block *sb, void *data, int silent) ++{ ++ static struct super_operations sops = {}; ++ struct inode *root = new_inode(sb); ++ if (!root) ++ return NULL; ++ root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; ++ root->i_uid = root->i_gid = 0; ++ root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; ++ sb->s_maxbytes = ~0ULL; ++ sb->s_blocksize = 1024; ++ sb->s_blocksize_bits = 10; ++ sb->s_magic = 0x62646576; ++ sb->s_op = &sops; ++ sb->s_root = d_alloc(NULL, &(const struct qstr) { "bdev:", 5, 0 }); ++ if (!sb->s_root) { ++ iput(root); ++ return NULL; ++ } ++ sb->s_root->d_sb = sb; ++ sb->s_root->d_parent = sb->s_root; ++ d_instantiate(sb->s_root, root); ++ return sb; ++} ++ ++static DECLARE_FSTYPE(bd_type, "bdev", bd_read_super, FS_NOMOUNT); ++ ++static struct vfsmount *bd_mnt; ++ ++/* ++ * bdev cache handling - shamelessly stolen from inode.c ++ * We use smaller hashtable, though. ++ */ ++ ++#define HASH_BITS 6 ++#define HASH_SIZE (1UL << HASH_BITS) ++#define HASH_MASK (HASH_SIZE-1) ++static struct list_head bdev_hashtable[HASH_SIZE]; ++static spinlock_t bdev_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; ++static kmem_cache_t * bdev_cachep; ++ ++#define alloc_bdev() \ ++ ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL)) ++#define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev)) ++ ++static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) ++{ ++ struct block_device * bdev = (struct block_device *) foo; ++ ++ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == ++ SLAB_CTOR_CONSTRUCTOR) ++ { ++ memset(bdev, 0, sizeof(*bdev)); ++ sema_init(&bdev->bd_sem, 1); ++ INIT_LIST_HEAD(&bdev->bd_inodes); ++ } ++} ++ ++void __init bdev_cache_init(void) ++{ ++ int i, err; ++ struct list_head *head = bdev_hashtable; ++ ++ i = HASH_SIZE; ++ do { ++ INIT_LIST_HEAD(head); ++ head++; ++ i--; ++ } while (i); ++ ++ bdev_cachep = kmem_cache_create("bdev_cache", ++ sizeof(struct block_device), ++ 0, SLAB_HWCACHE_ALIGN, init_once, ++ NULL); ++ if (!bdev_cachep) ++ panic("Cannot create bdev_cache SLAB cache"); ++ err = register_filesystem(&bd_type); ++ if (err) ++ panic("Cannot register bdev pseudo-fs"); ++ bd_mnt = kern_mount(&bd_type); ++ err = PTR_ERR(bd_mnt); ++ if (IS_ERR(bd_mnt)) ++ panic("Cannot create bdev pseudo-fs"); ++} ++ ++/* ++ * Most likely _very_ bad one - but then it's hardly critical for small ++ * /dev and can be fixed when somebody will need really large one. ++ */ ++static inline unsigned long hash(dev_t dev) ++{ ++ unsigned long tmp = dev; ++ tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2); ++ return tmp & HASH_MASK; ++} ++ ++static struct block_device *bdfind(dev_t dev, struct list_head *head) ++{ ++ struct list_head *p; ++ struct block_device *bdev; ++ for (p=head->next; p!=head; p=p->next) { ++ bdev = list_entry(p, struct block_device, bd_hash); ++ if (bdev->bd_dev != dev) ++ continue; ++ atomic_inc(&bdev->bd_count); ++ return bdev; ++ } ++ return NULL; ++} ++ ++struct block_device *bdget(dev_t dev) ++{ ++ struct list_head * head = bdev_hashtable + hash(dev); ++ struct block_device *bdev, *new_bdev; ++ spin_lock(&bdev_lock); ++ bdev = bdfind(dev, head); ++ spin_unlock(&bdev_lock); ++ if (bdev) ++ return bdev; ++ new_bdev = alloc_bdev(); ++ if (new_bdev) { ++ struct inode *inode = new_inode(bd_mnt->mnt_sb); ++ if (inode) { ++ kdev_t kdev = to_kdev_t(dev); ++ atomic_set(&new_bdev->bd_count,1); ++ new_bdev->bd_dev = dev; ++ new_bdev->bd_op = NULL; ++ new_bdev->bd_inode = inode; ++ inode->i_rdev = kdev; ++ inode->i_dev = kdev; ++ inode->i_bdev = new_bdev; ++ inode->i_data.a_ops = &def_blk_aops; ++ inode->i_data.gfp_mask = GFP_USER; ++ inode->i_mode = S_IFBLK; ++ spin_lock(&bdev_lock); ++ bdev = bdfind(dev, head); ++ if (!bdev) { ++ list_add(&new_bdev->bd_hash, head); ++ spin_unlock(&bdev_lock); ++ return new_bdev; ++ } ++ spin_unlock(&bdev_lock); ++ iput(new_bdev->bd_inode); ++ } ++ destroy_bdev(new_bdev); ++ } ++ return bdev; ++} ++ ++static inline void __bd_forget(struct inode *inode) ++{ ++ list_del_init(&inode->i_devices); ++ inode->i_bdev = NULL; ++ inode->i_mapping = &inode->i_data; ++} ++ ++void bdput(struct block_device *bdev) ++{ ++ if (atomic_dec_and_lock(&bdev->bd_count, &bdev_lock)) { ++ struct list_head *p; ++ if (bdev->bd_openers) ++ BUG(); ++ list_del(&bdev->bd_hash); ++ while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { ++ __bd_forget(list_entry(p, struct inode, i_devices)); ++ } ++ spin_unlock(&bdev_lock); ++ iput(bdev->bd_inode); ++ destroy_bdev(bdev); ++ } ++} ++ ++int bd_acquire(struct inode *inode) ++{ ++ struct block_device *bdev; ++ spin_lock(&bdev_lock); ++ if (inode->i_bdev) { ++ atomic_inc(&inode->i_bdev->bd_count); ++ spin_unlock(&bdev_lock); ++ return 0; ++ } ++ spin_unlock(&bdev_lock); ++ bdev = bdget(kdev_t_to_nr(inode->i_rdev)); ++ if (!bdev) ++ return -ENOMEM; ++ spin_lock(&bdev_lock); ++ if (!inode->i_bdev) { ++ inode->i_bdev = bdev; ++ inode->i_mapping = bdev->bd_inode->i_mapping; ++ list_add(&inode->i_devices, &bdev->bd_inodes); ++ } else if (inode->i_bdev != bdev) ++ BUG(); ++ spin_unlock(&bdev_lock); ++ return 0; ++} ++ ++/* Call when you free inode */ ++ ++void bd_forget(struct inode *inode) ++{ ++ spin_lock(&bdev_lock); ++ if (inode->i_bdev) ++ __bd_forget(inode); ++ spin_unlock(&bdev_lock); ++} ++ ++static struct { ++ const char *name; ++ struct block_device_operations *bdops; ++} blkdevs[MAX_BLKDEV]; ++ ++int get_blkdev_list(char * p) ++{ ++ int i; ++ int len; ++ ++ len = sprintf(p, "\nBlock devices:\n"); ++ for (i = 0; i < MAX_BLKDEV ; i++) { ++ if (blkdevs[i].bdops) { ++ len += sprintf(p+len, "%3d %s\n", i, blkdevs[i].name); ++ } ++ } ++ return len; ++} ++ ++/* ++ Return the function table of a device. ++ Load the driver if needed. ++*/ ++const struct block_device_operations * get_blkfops(unsigned int major) ++{ ++ const struct block_device_operations *ret = NULL; ++ ++ /* major 0 is used for non-device mounts */ ++ if (major && major < MAX_BLKDEV) { ++#ifdef CONFIG_KMOD ++ if (!blkdevs[major].bdops) { ++ char name[20]; ++ sprintf(name, "block-major-%d", major); ++ request_module(name); ++ } ++#endif ++ ret = blkdevs[major].bdops; ++ } ++ return ret; ++} ++ ++int register_blkdev(unsigned int major, const char * name, struct block_device_operations *bdops) ++{ ++ if (major == 0) { ++ for (major = MAX_BLKDEV-1; major > 0; major--) { ++ if (blkdevs[major].bdops == NULL) { ++ blkdevs[major].name = name; ++ blkdevs[major].bdops = bdops; ++ return major; ++ } ++ } ++ return -EBUSY; ++ } ++ if (major >= MAX_BLKDEV) ++ return -EINVAL; ++ if (blkdevs[major].bdops && blkdevs[major].bdops != bdops) ++ return -EBUSY; ++ blkdevs[major].name = name; ++ blkdevs[major].bdops = bdops; ++ return 0; ++} ++ ++int unregister_blkdev(unsigned int major, const char * name) ++{ ++ if (major >= MAX_BLKDEV) ++ return -EINVAL; ++ if (!blkdevs[major].bdops) ++ return -EINVAL; ++ if (strcmp(blkdevs[major].name, name)) ++ return -EINVAL; ++ blkdevs[major].name = NULL; ++ blkdevs[major].bdops = NULL; ++ return 0; ++} ++ ++/* ++ * This routine checks whether a removable media has been changed, ++ * and invalidates all buffer-cache-entries in that case. This ++ * is a relatively slow routine, so we have to try to minimize using ++ * it. Thus it is called only upon a 'mount' or 'open'. This ++ * is the best way of combining speed and utility, I think. ++ * People changing diskettes in the middle of an operation deserve ++ * to lose :-) ++ */ ++int check_disk_change(kdev_t dev) ++{ ++ int i; ++ const struct block_device_operations * bdops = NULL; ++ ++ i = MAJOR(dev); ++ if (i < MAX_BLKDEV) ++ bdops = blkdevs[i].bdops; ++ if (bdops == NULL) { ++ devfs_handle_t de; ++ ++ de = devfs_find_handle (NULL, NULL, i, MINOR (dev), ++ DEVFS_SPECIAL_BLK, 0); ++ if (de) { ++ bdops = devfs_get_ops (de); ++ devfs_put_ops (de); /* We're running in owner module */ ++ } ++ } ++ if (bdops == NULL) ++ return 0; ++ if (bdops->check_media_change == NULL) ++ return 0; ++ if (!bdops->check_media_change(dev)) ++ return 0; ++ ++ if (invalidate_device(dev, 0)) ++ printk("VFS: busy inodes on changed media.\n"); ++ ++ if (bdops->revalidate) ++ bdops->revalidate(dev); ++ return 1; ++} ++ ++int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) ++{ ++ int res; ++ mm_segment_t old_fs = get_fs(); ++ ++ if (!bdev->bd_op->ioctl) ++ return -EINVAL; ++ set_fs(KERNEL_DS); ++ res = bdev->bd_op->ioctl(bdev->bd_inode, NULL, cmd, arg); ++ set_fs(old_fs); ++ return res; ++} ++ ++static int do_open(struct block_device *bdev, struct inode *inode, struct file *file) ++{ ++ int ret = -ENXIO; ++ kdev_t dev = to_kdev_t(bdev->bd_dev); ++ ++ down(&bdev->bd_sem); ++ lock_kernel(); ++ if (!bdev->bd_op) ++ bdev->bd_op = get_blkfops(MAJOR(dev)); ++ if (bdev->bd_op) { ++ ret = 0; ++ if (bdev->bd_op->owner) ++ __MOD_INC_USE_COUNT(bdev->bd_op->owner); ++ if (bdev->bd_op->open) ++ ret = bdev->bd_op->open(inode, file); ++ if (!ret) { ++ bdev->bd_openers++; ++ bdev->bd_inode->i_size = blkdev_size(dev); ++ bdev->bd_inode->i_blkbits = blksize_bits(block_size(dev)); ++ } else { ++ if (bdev->bd_op->owner) ++ __MOD_DEC_USE_COUNT(bdev->bd_op->owner); ++ if (!bdev->bd_openers) ++ bdev->bd_op = NULL; ++ } ++ } ++ unlock_kernel(); ++ up(&bdev->bd_sem); ++ if (ret) ++ bdput(bdev); ++ return ret; ++} ++ ++int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind) ++{ ++ /* ++ * This crockload is due to bad choice of ->open() type. ++ * It will go away. ++ * For now, block device ->open() routine must _not_ ++ * examine anything in 'inode' argument except ->i_rdev. ++ */ ++ struct file fake_file = {}; ++ struct dentry fake_dentry = {}; ++ fake_file.f_mode = mode; ++ fake_file.f_flags = flags; ++ fake_file.f_dentry = &fake_dentry; ++ fake_dentry.d_inode = bdev->bd_inode; ++ ++ return do_open(bdev, bdev->bd_inode, &fake_file); ++} ++ ++int blkdev_open(struct inode * inode, struct file * filp) ++{ ++ struct block_device *bdev; ++ ++ /* ++ * Preserve backwards compatibility and allow large file access ++ * even if userspace doesn't ask for it explicitly. Some mkfs ++ * binary needs it. We might want to drop this workaround ++ * during an unstable branch. ++ */ ++ filp->f_flags |= O_LARGEFILE; ++ ++ bd_acquire(inode); ++ bdev = inode->i_bdev; ++ ++ return do_open(bdev, inode, filp); ++} ++ ++int blkdev_put(struct block_device *bdev, int kind) ++{ ++ int ret = 0; ++ kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */ ++ struct inode *bd_inode = bdev->bd_inode; ++ ++ down(&bdev->bd_sem); ++ lock_kernel(); ++ if (kind == BDEV_FILE && bdev->bd_openers == 1) ++ __block_fsync(bd_inode); ++ else if (kind == BDEV_FS) ++ fsync_no_super(rdev); ++ if (!--bdev->bd_openers) ++ kill_bdev(bdev); ++ if (bdev->bd_op->release) ++ ret = bdev->bd_op->release(bd_inode, NULL); ++ if (bdev->bd_op->owner) ++ __MOD_DEC_USE_COUNT(bdev->bd_op->owner); ++ if (!bdev->bd_openers) ++ bdev->bd_op = NULL; ++ unlock_kernel(); ++ up(&bdev->bd_sem); ++ bdput(bdev); ++ return ret; ++} ++ ++int blkdev_close(struct inode * inode, struct file * filp) ++{ ++ return blkdev_put(inode->i_bdev, BDEV_FILE); ++} ++ ++static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, ++ unsigned long arg) ++{ ++ if (inode->i_bdev->bd_op->ioctl) ++ return inode->i_bdev->bd_op->ioctl(inode, file, cmd, arg); ++ return -EINVAL; ++} ++ ++struct address_space_operations def_blk_aops = { ++ readpage: blkdev_readpage, ++ writepage: blkdev_writepage, ++ sync_page: block_sync_page, ++ prepare_write: blkdev_prepare_write, ++ commit_write: blkdev_commit_write, ++ direct_IO: blkdev_direct_IO, ++}; ++ ++struct file_operations def_blk_fops = { ++ open: blkdev_open, ++ release: blkdev_close, ++ llseek: block_llseek, ++ read: generic_file_read, ++ write: generic_file_write, ++ mmap: generic_file_mmap, ++ fsync: block_fsync, ++ ioctl: blkdev_ioctl, ++}; ++ ++const char * bdevname(kdev_t dev) ++{ ++ static char buffer[32]; ++ const char * name = blkdevs[MAJOR(dev)].name; ++ ++ if (!name) ++ name = "unknown-block"; ++ ++ sprintf(buffer, "%s(%d,%d)", name, MAJOR(dev), MINOR(dev)); ++ return buffer; ++} +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.19-hp3_pnnl1-root/fs/devfs/base.c 2002-08-03 08:39:45.000000000 +0800 +@@ -0,0 +1,3544 @@ ++/* devfs (Device FileSystem) driver. ++ ++ Copyright (C) 1998-2002 Richard Gooch ++ ++ This library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Library General Public ++ License as published by the Free Software Foundation; either ++ version 2 of the License, or (at your option) any later version. ++ ++ This library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Library General Public License for more details. ++ ++ You should have received a copy of the GNU Library General Public ++ License along with this library; if not, write to the Free ++ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ ++ Richard Gooch may be reached by email at rgooch@atnf.csiro.au ++ The postal address is: ++ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. ++ ++ ChangeLog ++ ++ 19980110 Richard Gooch ++ Original version. ++ v0.1 ++ 19980111 Richard Gooch ++ Created per-fs inode table rather than using inode->u.generic_ip ++ v0.2 ++ 19980111 Richard Gooch ++ Created .epoch inode which has a ctime of 0. ++ Fixed loss of named pipes when dentries lost. ++ Fixed loss of inode data when devfs_register() follows mknod(). ++ v0.3 ++ 19980111 Richard Gooch ++ Fix for when compiling with CONFIG_KERNELD. ++ 19980112 Richard Gooch ++ Fix for readdir() which sometimes didn't show entries. ++ Added <> option to . ++ v0.4 ++ 19980113 Richard Gooch ++ Created function. ++ v0.5 ++ 19980115 Richard Gooch ++ Added subdirectory support. Major restructuring. ++ 19980116 Richard Gooch ++ Fixed to not search major=0,minor=0. ++ Added symlink support. ++ v0.6 ++ 19980120 Richard Gooch ++ Created function and support directory unregister ++ 19980120 Richard Gooch ++ Auto-ownership uses real uid/gid rather than effective uid/gid. ++ v0.7 ++ 19980121 Richard Gooch ++ Supported creation of sockets. ++ v0.8 ++ 19980122 Richard Gooch ++ Added DEVFS_FL_HIDE_UNREG flag. ++ Interface change to . ++ Created to support symlink(2). ++ v0.9 ++ 19980123 Richard Gooch ++ Added check to to check inode is in devfs. ++ Added optional traversal of symlinks. ++ v0.10 ++ 19980124 Richard Gooch ++ Created and . ++ v0.11 ++ 19980125 C. Scott Ananian ++ Created . ++ 19980125 Richard Gooch ++ Allow removal of symlinks. ++ v0.12 ++ 19980125 Richard Gooch ++ Created . ++ 19980126 Richard Gooch ++ Moved DEVFS_SUPER_MAGIC into header file. ++ Added DEVFS_FL_HIDE flag. ++ Created . ++ Created . ++ Fixed minor bug in . ++ 19980127 Richard Gooch ++ Changed interface to , , ++ , and . ++ Fixed inode times when symlink created with symlink(2). ++ v0.13 ++ 19980129 C. Scott Ananian ++ Exported , ++ and . ++ 19980129 Richard Gooch ++ Created to support unlink(2). ++ v0.14 ++ 19980129 Richard Gooch ++ Fixed kerneld support for entries in devfs subdirectories. ++ 19980130 Richard Gooch ++ Bugfixes in . ++ v0.15 ++ 19980207 Richard Gooch ++ Call kerneld when looking up unregistered entries. ++ v0.16 ++ 19980326 Richard Gooch ++ Modified interface to for symlink traversal. ++ v0.17 ++ 19980331 Richard Gooch ++ Fixed persistence bug with device numbers for manually created ++ device files. ++ Fixed problem with recreating symlinks with different content. ++ v0.18 ++ 19980401 Richard Gooch ++ Changed to CONFIG_KMOD. ++ Hide entries which are manually unlinked. ++ Always invalidate devfs dentry cache when registering entries. ++ Created to support rmdir(2). ++ Ensure directories created by are visible. ++ v0.19 ++ 19980402 Richard Gooch ++ Invalidate devfs dentry cache when making directories. ++ Invalidate devfs dentry cache when removing entries. ++ Fixed persistence bug with fifos. ++ v0.20 ++ 19980421 Richard Gooch ++ Print process command when debugging kerneld/kmod. ++ Added debugging for register/unregister/change operations. ++ 19980422 Richard Gooch ++ Added "devfs=" boot options. ++ v0.21 ++ 19980426 Richard Gooch ++ No longer lock/unlock superblock in . ++ Drop negative dentries when they are released. ++ Manage dcache more efficiently. ++ v0.22 ++ 19980427 Richard Gooch ++ Added DEVFS_FL_AUTO_DEVNUM flag. ++ v0.23 ++ 19980430 Richard Gooch ++ No longer set unnecessary methods. ++ v0.24 ++ 19980504 Richard Gooch ++ Added PID display to debugging message. ++ Added "after" debugging message to . ++ 19980519 Richard Gooch ++ Added "diread" and "diwrite" boot options. ++ 19980520 Richard Gooch ++ Fixed persistence problem with permissions. ++ v0.25 ++ 19980602 Richard Gooch ++ Support legacy device nodes. ++ Fixed bug where recreated inodes were hidden. ++ v0.26 ++ 19980602 Richard Gooch ++ Improved debugging in . ++ 19980607 Richard Gooch ++ No longer free old dentries in . ++ Free all dentries for a given entry when deleting inodes. ++ v0.27 ++ 19980627 Richard Gooch ++ Limit auto-device numbering to majors 128 to 239. ++ v0.28 ++ 19980629 Richard Gooch ++ Fixed inode times persistence problem. ++ v0.29 ++ 19980704 Richard Gooch ++ Fixed spelling in debug. ++ Fixed bug in parsing "dilookup". ++ v0.30 ++ 19980705 Richard Gooch ++ Fixed devfs inode leak when manually recreating inodes. ++ Fixed permission persistence problem when recreating inodes. ++ v0.31 ++ 19980727 Richard Gooch ++ Removed harmless "unused variable" compiler warning. ++ Fixed modes for manually recreated device nodes. ++ v0.32 ++ 19980728 Richard Gooch ++ Added NULL devfs inode warning in . ++ Force all inode nlink values to 1. ++ v0.33 ++ 19980730 Richard Gooch ++ Added "dimknod" boot option. ++ Set inode nlink to 0 when freeing dentries. ++ Fixed modes for manually recreated symlinks. ++ v0.34 ++ 19980802 Richard Gooch ++ Fixed bugs in recreated directories and symlinks. ++ v0.35 ++ 19980806 Richard Gooch ++ Fixed bugs in recreated device nodes. ++ 19980807 Richard Gooch ++ Fixed bug in currently unused . ++ Defined new type. ++ Improved debugging when getting entries. ++ Fixed bug where directories could be emptied. ++ v0.36 ++ 19980809 Richard Gooch ++ Replaced dummy .epoch inode with .devfsd character device. ++ 19980810 Richard Gooch ++ Implemented devfsd protocol revision 0. ++ v0.37 ++ 19980819 Richard Gooch ++ Added soothing message to warning in . ++ v0.38 ++ 19980829 Richard Gooch ++ Use GCC extensions for structure initialisations. ++ Implemented async open notification. ++ Incremented devfsd protocol revision to 1. ++ v0.39 ++ 19980908 Richard Gooch ++ Moved async open notification to end of . ++ v0.40 ++ 19980910 Richard Gooch ++ Prepended "/dev/" to module load request. ++ Renamed to . ++ v0.41 ++ 19980910 Richard Gooch ++ Fixed typo "AYSNC" -> "ASYNC". ++ v0.42 ++ 19980910 Richard Gooch ++ Added open flag for files. ++ v0.43 ++ 19980927 Richard Gooch ++ Set i_blocks=0 and i_blksize=1024 in . ++ v0.44 ++ 19981005 Richard Gooch ++ Added test for empty <> in . ++ Renamed to and published. ++ v0.45 ++ 19981006 Richard Gooch ++ Created . ++ v0.46 ++ 19981007 Richard Gooch ++ Limit auto-device numbering to majors 144 to 239. ++ v0.47 ++ 19981010 Richard Gooch ++ Updated for VFS change in 2.1.125. ++ v0.48 ++ 19981022 Richard Gooch ++ Created DEVFS_ FL_COMPAT flag. ++ v0.49 ++ 19981023 Richard Gooch ++ Created "nocompat" boot option. ++ v0.50 ++ 19981025 Richard Gooch ++ Replaced "mount" boot option with "nomount". ++ v0.51 ++ 19981110 Richard Gooch ++ Created "only" boot option. ++ v0.52 ++ 19981112 Richard Gooch ++ Added DEVFS_FL_REMOVABLE flag. ++ v0.53 ++ 19981114 Richard Gooch ++ Only call on first call to ++ . ++ v0.54 ++ 19981205 Richard Gooch ++ Updated for VFS change in 2.1.131. ++ v0.55 ++ 19981218 Richard Gooch ++ Created . ++ 19981220 Richard Gooch ++ Check for partitions on removable media in . ++ v0.56 ++ 19990118 Richard Gooch ++ Added support for registering regular files. ++ Created . ++ Update devfs inodes from entries if not changed through FS. ++ v0.57 ++ 19990124 Richard Gooch ++ Fixed to only initialise temporary inodes. ++ Trap for NULL fops in . ++ Return -ENODEV in for non-driver inodes. ++ v0.58 ++ 19990126 Richard Gooch ++ Switched from PATH_MAX to DEVFS_PATHLEN. ++ v0.59 ++ 19990127 Richard Gooch ++ Created "nottycompat" boot option. ++ v0.60 ++ 19990318 Richard Gooch ++ Fixed to not overrun event buffer. ++ v0.61 ++ 19990329 Richard Gooch ++ Created . ++ v0.62 ++ 19990330 Richard Gooch ++ Don't return unregistred entries in . ++ Panic in if entry unregistered. ++ 19990401 Richard Gooch ++ Don't panic in for duplicates. ++ v0.63 ++ 19990402 Richard Gooch ++ Don't unregister already unregistered entries in . ++ v0.64 ++ 19990510 Richard Gooch ++ Disable warning messages when unable to read partition table for ++ removable media. ++ v0.65 ++ 19990512 Richard Gooch ++ Updated for VFS change in 2.3.1-pre1. ++ Created "oops-on-panic" boot option. ++ Improved debugging in and . ++ v0.66 ++ 19990519 Richard Gooch ++ Added documentation for some functions. ++ 19990525 Richard Gooch ++ Removed "oops-on-panic" boot option: now always Oops. ++ v0.67 ++ 19990531 Richard Gooch ++ Improved debugging in . ++ v0.68 ++ 19990604 Richard Gooch ++ Added "diunlink" and "nokmod" boot options. ++ Removed superfluous warning message in . ++ v0.69 ++ 19990611 Richard Gooch ++ Took account of change to . ++ v0.70 ++ 19990614 Richard Gooch ++ Created separate event queue for each mounted devfs. ++ Removed . ++ Created new ioctl()s. ++ Incremented devfsd protocol revision to 3. ++ Fixed bug when re-creating directories: contents were lost. ++ Block access to inodes until devfsd updates permissions. ++ 19990615 Richard Gooch ++ Support 2.2.x kernels. ++ v0.71 ++ 19990623 Richard Gooch ++ Switched to sending process uid/gid to devfsd. ++ Renamed to . ++ Added DEVFSD_NOTIFY_LOOKUP event. ++ 19990624 Richard Gooch ++ Added DEVFSD_NOTIFY_CHANGE event. ++ Incremented devfsd protocol revision to 4. ++ v0.72 ++ 19990713 Richard Gooch ++ Return EISDIR rather than EINVAL for read(2) on directories. ++ v0.73 ++ 19990809 Richard Gooch ++ Changed to new __init scheme. ++ v0.74 ++ 19990901 Richard Gooch ++ Changed remaining function declarations to new __init scheme. ++ v0.75 ++ 19991013 Richard Gooch ++ Created , , ++ and . ++ Added <> parameter to , , ++ and . ++ Work sponsored by SGI. ++ v0.76 ++ 19991017 Richard Gooch ++ Allow multiple unregistrations. ++ Work sponsored by SGI. ++ v0.77 ++ 19991026 Richard Gooch ++ Added major and minor number to devfsd protocol. ++ Incremented devfsd protocol revision to 5. ++ Work sponsored by SGI. ++ v0.78 ++ 19991030 Richard Gooch ++ Support info pointer for all devfs entry types. ++ Added <> parameter to and ++ . ++ Work sponsored by SGI. ++ v0.79 ++ 19991031 Richard Gooch ++ Support "../" when searching devfs namespace. ++ Work sponsored by SGI. ++ v0.80 ++ 19991101 Richard Gooch ++ Created . ++ Work sponsored by SGI. ++ v0.81 ++ 19991103 Richard Gooch ++ Exported . ++ Work sponsored by SGI. ++ v0.82 ++ 19991104 Richard Gooch ++ Removed unused . ++ 19991105 Richard Gooch ++ Do not hide entries from devfsd or children. ++ Removed DEVFS_ FL_TTY_COMPAT flag. ++ Removed "nottycompat" boot option. ++ Removed . ++ Work sponsored by SGI. ++ v0.83 ++ 19991107 Richard Gooch ++ Added DEVFS_FL_WAIT flag. ++ Work sponsored by SGI. ++ v0.84 ++ 19991107 Richard Gooch ++ Support new "disc" naming scheme in . ++ Allow NULL fops in . ++ Work sponsored by SGI. ++ v0.85 ++ 19991110 Richard Gooch ++ Fall back to major table if NULL fops given to . ++ Work sponsored by SGI. ++ v0.86 ++ 19991204 Richard Gooch ++ Support fifos when unregistering. ++ Work sponsored by SGI. ++ v0.87 ++ 19991209 Richard Gooch ++ Removed obsolete DEVFS_ FL_COMPAT and DEVFS_ FL_TOLERANT flags. ++ Work sponsored by SGI. ++ v0.88 ++ 19991214 Richard Gooch ++ Removed kmod support. ++ Work sponsored by SGI. ++ v0.89 ++ 19991216 Richard Gooch ++ Improved debugging in . ++ Ensure dentries created by devfsd will be cleaned up. ++ Work sponsored by SGI. ++ v0.90 ++ 19991223 Richard Gooch ++ Created . ++ Work sponsored by SGI. ++ v0.91 ++ 20000203 Richard Gooch ++ Ported to kernel 2.3.42. ++ Removed . ++ Work sponsored by SGI. ++ v0.92 ++ 20000306 Richard Gooch ++ Added DEVFS_ FL_NO_PERSISTENCE flag. ++ Removed unnecessary call to in ++ . ++ Work sponsored by SGI. ++ v0.93 ++ 20000413 Richard Gooch ++ Set inode->i_size to correct size for symlinks. ++ 20000414 Richard Gooch ++ Only give lookup() method to directories to comply with new VFS ++ assumptions. ++ Work sponsored by SGI. ++ 20000415 Richard Gooch ++ Remove unnecessary tests in symlink methods. ++ Don't kill existing block ops in . ++ Work sponsored by SGI. ++ v0.94 ++ 20000424 Richard Gooch ++ Don't create missing directories in . ++ Work sponsored by SGI. ++ v0.95 ++ 20000430 Richard Gooch ++ Added CONFIG_DEVFS_MOUNT. ++ Work sponsored by SGI. ++ v0.96 ++ 20000608 Richard Gooch ++ Disabled multi-mount capability (use VFS bindings instead). ++ Work sponsored by SGI. ++ v0.97 ++ 20000610 Richard Gooch ++ Switched to FS_SINGLE to disable multi-mounts. ++ 20000612 Richard Gooch ++ Removed module support. ++ Removed multi-mount code. ++ Removed compatibility macros: VFS has changed too much. ++ Work sponsored by SGI. ++ v0.98 ++ 20000614 Richard Gooch ++ Merged devfs inode into devfs entry. ++ Work sponsored by SGI. ++ v0.99 ++ 20000619 Richard Gooch ++ Removed dead code in which used to call ++ . ++ Work sponsored by SGI. ++ v0.100 ++ 20000621 Richard Gooch ++ Changed interface to . ++ Work sponsored by SGI. ++ v0.101 ++ 20000622 Richard Gooch ++ Simplified interface to and . ++ Simplified interface to . ++ Work sponsored by SGI. ++ v0.102 ++ 20010519 Richard Gooch ++ Ensure terminates string for root entry. ++ Exported to modules. ++ 20010520 Richard Gooch ++ Make send events to devfsd. ++ Cleaned up option processing in . ++ 20010521 Richard Gooch ++ Fixed bugs in handling symlinks: could leak or cause Oops. ++ 20010522 Richard Gooch ++ Cleaned up directory handling by separating fops. ++ v0.103 ++ 20010601 Richard Gooch ++ Fixed handling of inverted options in . ++ v0.104 ++ 20010604 Richard Gooch ++ Adjusted to account for fix. ++ v0.105 ++ 20010617 Richard Gooch ++ Answered question posed by Al Viro and removed his comments. ++ Moved setting of registered flag after other fields are changed. ++ Fixed race between and . ++ Global VFS changes added bogus BKL to : removed. ++ Widened locking in and . ++ Replaced stack usage with kmalloc. ++ Simplified locking in and fixed memory leak. ++ v0.106 ++ 20010709 Richard Gooch ++ Removed broken devnum allocation and use . ++ Fixed old devnum leak by calling new . ++ v0.107 ++ 20010712 Richard Gooch ++ Fixed bug in which could hang boot process. ++ v0.108 ++ 20010730 Richard Gooch ++ Added DEVFSD_NOTIFY_DELETE event. ++ 20010801 Richard Gooch ++ Removed #include . ++ v0.109 ++ 20010807 Richard Gooch ++ Fixed inode table races by removing it and using ++ inode->u.generic_ip instead. ++ Moved into . ++ Moved into . ++ v0.110 ++ 20010808 Richard Gooch ++ Fixed race in for uni-processor. ++ v0.111 ++ 20010818 Richard Gooch ++ Removed remnant of multi-mount support in . ++ Removed unused DEVFS_FL_SHOW_UNREG flag. ++ v0.112 ++ 20010820 Richard Gooch ++ Removed nlink field from struct devfs_inode. ++ v0.113 ++ 20010823 Richard Gooch ++ Replaced BKL with global rwsem to protect symlink data (quick ++ and dirty hack). ++ v0.114 ++ 20010827 Richard Gooch ++ Replaced global rwsem for symlink with per-link refcount. ++ v0.115 ++ 20010919 Richard Gooch ++ Set inode->i_mapping->a_ops for block nodes in . ++ v0.116 ++ 20011008 Richard Gooch ++ Fixed overrun in by removing function (not needed). ++ 20011009 Richard Gooch ++ Fixed buffer underrun in . ++ 20011029 Richard Gooch ++ Fixed race in when setting event mask. ++ 20011114 Richard Gooch ++ First release of new locking code. ++ v1.0 ++ 20011117 Richard Gooch ++ Discard temporary buffer, now use "%s" for dentry names. ++ 20011118 Richard Gooch ++ Don't generate path in : use fake entry instead. ++ Use "existing" directory in <_devfs_make_parent_for_leaf>. ++ 20011122 Richard Gooch ++ Use slab cache rather than fixed buffer for devfsd events. ++ v1.1 ++ 20011125 Richard Gooch ++ Send DEVFSD_NOTIFY_REGISTERED events in . ++ 20011127 Richard Gooch ++ Fixed locking bug in due to typo. ++ Do not send CREATE, CHANGE, ASYNC_OPEN or DELETE events from ++ devfsd or children. ++ v1.2 ++ 20011202 Richard Gooch ++ Fixed bug in : was dereferencing freed pointer. ++ v1.3 ++ 20011203 Richard Gooch ++ Fixed bug in : was dereferencing freed pointer. ++ Added process group check for devfsd privileges. ++ v1.4 ++ 20011204 Richard Gooch ++ Use SLAB_ATOMIC in from . ++ v1.5 ++ 20011211 Richard Gooch ++ Return old entry in for 2.4.x kernels. ++ 20011212 Richard Gooch ++ Increment refcount on module in . ++ 20011215 Richard Gooch ++ Created and exported . ++ Increment refcount on module in . ++ Created . ++ v1.6 ++ 20011216 Richard Gooch ++ Added poisoning to . ++ Improved debugging messages. ++ v1.7 ++ 20011221 Richard Gooch ++ Corrected (made useful) debugging message in . ++ Moved in to ++ 20011224 Richard Gooch ++ Added magic number to guard against scribbling drivers. ++ 20011226 Richard Gooch ++ Only return old entry in if a directory. ++ Defined macros for error and debug messages. ++ v1.8 ++ 20020113 Richard Gooch ++ Fixed (rare, old) race in . ++ v1.9 ++ 20020120 Richard Gooch ++ Fixed deadlock bug in . ++ Tag VFS deletable in if handle ignored. ++ v1.10 ++ 20020129 Richard Gooch ++ Added KERN_* to remaining messages. ++ Cleaned up declaration of . ++ v1.11 ++ 20020219 Richard Gooch ++ Changed to allow later additions if not yet empty. ++ v1.12 ++ 20020514 Richard Gooch ++ Added BKL to because drivers still need it. ++ Protected and ++ from changing directory contents. ++ v1.12a ++*/ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DEVFS_VERSION "1.12a (20020514)" ++ ++#define DEVFS_NAME "devfs" ++ ++#define FIRST_INODE 1 ++ ++#define STRING_LENGTH 256 ++#define FAKE_BLOCK_SIZE 1024 ++#define POISON_PTR ( *(void **) poison_array ) ++#define MAGIC_VALUE 0x327db823 ++ ++#ifndef TRUE ++# define TRUE 1 ++# define FALSE 0 ++#endif ++ ++#define MODE_DIR (S_IFDIR | S_IWUSR | S_IRUGO | S_IXUGO) ++ ++#define IS_HIDDEN(de) ( (de)->hide && !is_devfsd_or_child(fs_info) ) ++ ++#define DEBUG_NONE 0x0000000 ++#define DEBUG_MODULE_LOAD 0x0000001 ++#define DEBUG_REGISTER 0x0000002 ++#define DEBUG_UNREGISTER 0x0000004 ++#define DEBUG_FREE 0x0000008 ++#define DEBUG_SET_FLAGS 0x0000010 ++#define DEBUG_S_READ 0x0000100 /* Break */ ++#define DEBUG_I_LOOKUP 0x0001000 /* Break */ ++#define DEBUG_I_CREATE 0x0002000 ++#define DEBUG_I_GET 0x0004000 ++#define DEBUG_I_CHANGE 0x0008000 ++#define DEBUG_I_UNLINK 0x0010000 ++#define DEBUG_I_RLINK 0x0020000 ++#define DEBUG_I_FLINK 0x0040000 ++#define DEBUG_I_MKNOD 0x0080000 ++#define DEBUG_F_READDIR 0x0100000 /* Break */ ++#define DEBUG_D_DELETE 0x1000000 /* Break */ ++#define DEBUG_D_RELEASE 0x2000000 ++#define DEBUG_D_IPUT 0x4000000 ++#define DEBUG_ALL 0xfffffff ++#define DEBUG_DISABLED DEBUG_NONE ++ ++#define OPTION_NONE 0x00 ++#define OPTION_MOUNT 0x01 ++#define OPTION_ONLY 0x02 ++ ++#define PRINTK(format, args...) \ ++ {printk (KERN_ERR "%s" format, __FUNCTION__ , ## args);} ++ ++#define OOPS(format, args...) \ ++ {printk (KERN_CRIT "%s" format, __FUNCTION__ , ## args); \ ++ printk ("Forcing Oops\n"); \ ++ BUG();} ++ ++#ifdef CONFIG_DEVFS_DEBUG ++# define VERIFY_ENTRY(de) \ ++ {if ((de) && (de)->magic_number != MAGIC_VALUE) \ ++ OOPS ("(%p): bad magic value: %x\n", (de), (de)->magic_number);} ++# define WRITE_ENTRY_MAGIC(de,magic) (de)->magic_number = (magic) ++# define DPRINTK(flag, format, args...) \ ++ {if (devfs_debug & flag) \ ++ printk (KERN_INFO "%s" format, __FUNCTION__ , ## args);} ++#else ++# define VERIFY_ENTRY(de) ++# define WRITE_ENTRY_MAGIC(de,magic) ++# define DPRINTK(flag, format, args...) ++#endif ++ ++ ++struct directory_type ++{ ++ rwlock_t lock; /* Lock for searching(R)/updating(W) */ ++ struct devfs_entry *first; ++ struct devfs_entry *last; ++ unsigned short num_removable; /* Lock for writing but not reading */ ++ unsigned char no_more_additions:1; ++}; ++ ++struct file_type ++{ ++ unsigned long size; ++}; ++ ++struct device_type ++{ ++ unsigned short major; ++ unsigned short minor; ++}; ++ ++struct fcb_type /* File, char, block type */ ++{ ++ void *ops; ++ union ++ { ++ struct file_type file; ++ struct device_type device; ++ } ++ u; ++ unsigned char auto_owner:1; ++ unsigned char aopen_notify:1; ++ unsigned char removable:1; /* Belongs in device_type, but save space */ ++ unsigned char open:1; /* Not entirely correct */ ++ unsigned char autogen:1; /* Belongs in device_type, but save space */ ++}; ++ ++struct symlink_type ++{ ++ unsigned int length; /* Not including the NULL-termimator */ ++ char *linkname; /* This is NULL-terminated */ ++}; ++ ++struct devfs_inode /* This structure is for "persistent" inode storage */ ++{ ++ struct dentry *dentry; ++ time_t atime; ++ time_t mtime; ++ time_t ctime; ++ unsigned int ino; /* Inode number as seen in the VFS */ ++ uid_t uid; ++ gid_t gid; ++}; ++ ++struct devfs_entry ++{ ++#ifdef CONFIG_DEVFS_DEBUG ++ unsigned int magic_number; ++#endif ++ void *info; ++ atomic_t refcount; /* When this drops to zero, it's unused */ ++ union ++ { ++ struct directory_type dir; ++ struct fcb_type fcb; ++ struct symlink_type symlink; ++ const char *name; /* Only used for (mode == 0) */ ++ } ++ u; ++ struct devfs_entry *prev; /* Previous entry in the parent directory */ ++ struct devfs_entry *next; /* Next entry in the parent directory */ ++ struct devfs_entry *parent; /* The parent directory */ ++ struct devfs_entry *slave; /* Another entry to unregister */ ++ struct devfs_inode inode; ++ umode_t mode; ++ unsigned short namelen; /* I think 64k+ filenames are a way off... */ ++ unsigned char hide:1; ++ unsigned char vfs_deletable:1;/* Whether the VFS may delete the entry */ ++ char name[1]; /* This is just a dummy: the allocated array ++ is bigger. This is NULL-terminated */ ++}; ++ ++/* The root of the device tree */ ++static struct devfs_entry *root_entry; ++ ++struct devfsd_buf_entry ++{ ++ struct devfs_entry *de; /* The name is generated with this */ ++ unsigned short type; /* The type of event */ ++ umode_t mode; ++ uid_t uid; ++ gid_t gid; ++ struct devfsd_buf_entry *next; ++}; ++ ++struct fs_info /* This structure is for the mounted devfs */ ++{ ++ struct super_block *sb; ++ spinlock_t devfsd_buffer_lock; /* Lock when inserting/deleting events */ ++ struct devfsd_buf_entry *devfsd_first_event; ++ struct devfsd_buf_entry *devfsd_last_event; ++ volatile int devfsd_sleeping; ++ volatile struct task_struct *devfsd_task; ++ volatile pid_t devfsd_pgrp; ++ volatile struct file *devfsd_file; ++ struct devfsd_notify_struct *devfsd_info; ++ volatile unsigned long devfsd_event_mask; ++ atomic_t devfsd_overrun_count; ++ wait_queue_head_t devfsd_wait_queue; /* Wake devfsd on input */ ++ wait_queue_head_t revalidate_wait_queue; /* Wake when devfsd sleeps */ ++}; ++ ++static struct fs_info fs_info = {devfsd_buffer_lock: SPIN_LOCK_UNLOCKED}; ++static kmem_cache_t *devfsd_buf_cache; ++#ifdef CONFIG_DEVFS_DEBUG ++static unsigned int devfs_debug_init __initdata = DEBUG_NONE; ++static unsigned int devfs_debug = DEBUG_NONE; ++static spinlock_t stat_lock = SPIN_LOCK_UNLOCKED; ++static unsigned int stat_num_entries; ++static unsigned int stat_num_bytes; ++#endif ++static unsigned char poison_array[8] = ++ {0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a}; ++ ++#ifdef CONFIG_DEVFS_MOUNT ++static unsigned int boot_options = OPTION_MOUNT; ++#else ++static unsigned int boot_options = OPTION_NONE; ++#endif ++ ++/* Forward function declarations */ ++static devfs_handle_t _devfs_walk_path (struct devfs_entry *dir, ++ const char *name, int namelen, ++ int traverse_symlink); ++static ssize_t devfsd_read (struct file *file, char *buf, size_t len, ++ loff_t *ppos); ++static int devfsd_ioctl (struct inode *inode, struct file *file, ++ unsigned int cmd, unsigned long arg); ++static int devfsd_close (struct inode *inode, struct file *file); ++#ifdef CONFIG_DEVFS_DEBUG ++static ssize_t stat_read (struct file *file, char *buf, size_t len, ++ loff_t *ppos); ++static struct file_operations stat_fops = ++{ ++ read: stat_read, ++}; ++#endif ++ ++ ++/* Devfs daemon file operations */ ++static struct file_operations devfsd_fops = ++{ ++ read: devfsd_read, ++ ioctl: devfsd_ioctl, ++ release: devfsd_close, ++}; ++ ++ ++/* Support functions follow */ ++ ++ ++/** ++ * devfs_get - Get a reference to a devfs entry. ++ * @de: The devfs entry. ++ */ ++ ++static struct devfs_entry *devfs_get (struct devfs_entry *de) ++{ ++ VERIFY_ENTRY (de); ++ if (de) atomic_inc (&de->refcount); ++ return de; ++} /* End Function devfs_get */ ++ ++/** ++ * devfs_put - Put (release) a reference to a devfs entry. ++ * @de: The handle to the devfs entry. ++ */ ++ ++void devfs_put (devfs_handle_t de) ++{ ++ if (!de) return; ++ VERIFY_ENTRY (de); ++ if (de->info == POISON_PTR) OOPS ("(%p): poisoned pointer\n", de); ++ if ( !atomic_dec_and_test (&de->refcount) ) return; ++ if (de == root_entry) OOPS ("(%p): root entry being freed\n", de); ++ DPRINTK (DEBUG_FREE, "(%s): de: %p, parent: %p \"%s\"\n", ++ de->name, de, de->parent, ++ de->parent ? de->parent->name : "no parent"); ++ if ( S_ISLNK (de->mode) ) kfree (de->u.symlink.linkname); ++ if ( ( S_ISCHR (de->mode) || S_ISBLK (de->mode) ) && de->u.fcb.autogen ) ++ { ++ devfs_dealloc_devnum ( S_ISCHR (de->mode) ? DEVFS_SPECIAL_CHR : ++ DEVFS_SPECIAL_BLK, ++ mk_kdev (de->u.fcb.u.device.major, ++ de->u.fcb.u.device.minor) ); ++ } ++ WRITE_ENTRY_MAGIC (de, 0); ++#ifdef CONFIG_DEVFS_DEBUG ++ spin_lock (&stat_lock); ++ --stat_num_entries; ++ stat_num_bytes -= sizeof *de + de->namelen; ++ if ( S_ISLNK (de->mode) ) stat_num_bytes -= de->u.symlink.length + 1; ++ spin_unlock (&stat_lock); ++#endif ++ de->info = POISON_PTR; ++ kfree (de); ++} /* End Function devfs_put */ ++ ++/** ++ * _devfs_search_dir - Search for a devfs entry in a directory. ++ * @dir: The directory to search. ++ * @name: The name of the entry to search for. ++ * @namelen: The number of characters in @name. ++ * ++ * Search for a devfs entry in a directory and returns a pointer to the entry ++ * on success, else %NULL. The directory must be locked already. ++ * An implicit devfs_get() is performed on the returned entry. ++ */ ++ ++static struct devfs_entry *_devfs_search_dir (struct devfs_entry *dir, ++ const char *name, ++ unsigned int namelen) ++{ ++ struct devfs_entry *curr; ++ ++ if ( !S_ISDIR (dir->mode) ) ++ { ++ PRINTK ("(%s): not a directory\n", dir->name); ++ return NULL; ++ } ++ for (curr = dir->u.dir.first; curr != NULL; curr = curr->next) ++ { ++ if (curr->namelen != namelen) continue; ++ if (memcmp (curr->name, name, namelen) == 0) break; ++ /* Not found: try the next one */ ++ } ++ return devfs_get (curr); ++} /* End Function _devfs_search_dir */ ++ ++ ++/** ++ * _devfs_alloc_entry - Allocate a devfs entry. ++ * @name: The name of the entry. ++ * @namelen: The number of characters in @name. ++ * ++ * Allocate a devfs entry and returns a pointer to the entry on success, else ++ * %NULL. ++ */ ++ ++static struct devfs_entry *_devfs_alloc_entry (const char *name, ++ unsigned int namelen, ++ umode_t mode) ++{ ++ struct devfs_entry *new; ++ static unsigned long inode_counter = FIRST_INODE; ++ static spinlock_t counter_lock = SPIN_LOCK_UNLOCKED; ++ ++ if ( name && (namelen < 1) ) namelen = strlen (name); ++ if ( ( new = kmalloc (sizeof *new + namelen, GFP_KERNEL) ) == NULL ) ++ return NULL; ++ memset (new, 0, sizeof *new + namelen); /* Will set '\0' on name */ ++ new->mode = mode; ++ if ( S_ISDIR (mode) ) rwlock_init (&new->u.dir.lock); ++ atomic_set (&new->refcount, 1); ++ spin_lock (&counter_lock); ++ new->inode.ino = inode_counter++; ++ spin_unlock (&counter_lock); ++ if (name) memcpy (new->name, name, namelen); ++ new->namelen = namelen; ++ WRITE_ENTRY_MAGIC (new, MAGIC_VALUE); ++#ifdef CONFIG_DEVFS_DEBUG ++ spin_lock (&stat_lock); ++ ++stat_num_entries; ++ stat_num_bytes += sizeof *new + namelen; ++ spin_unlock (&stat_lock); ++#endif ++ return new; ++} /* End Function _devfs_alloc_entry */ ++ ++ ++/** ++ * _devfs_append_entry - Append a devfs entry to a directory's child list. ++ * @dir: The directory to add to. ++ * @de: The devfs entry to append. ++ * @removable: If TRUE, increment the count of removable devices for %dir. ++ * @old_de: If an existing entry exists, it will be written here. This may ++ * be %NULL. An implicit devfs_get() is performed on this entry. ++ * ++ * Append a devfs entry to a directory's list of children, checking first to ++ * see if an entry of the same name exists. The directory will be locked. ++ * The value 0 is returned on success, else a negative error code. ++ * On failure, an implicit devfs_put() is performed on %de. ++ */ ++ ++static int _devfs_append_entry (devfs_handle_t dir, devfs_handle_t de, ++ int removable, devfs_handle_t *old_de) ++{ ++ int retval; ++ ++ if (old_de) *old_de = NULL; ++ if ( !S_ISDIR (dir->mode) ) ++ { ++ PRINTK ("(%s): dir: \"%s\" is not a directory\n", de->name, dir->name); ++ devfs_put (de); ++ return -ENOTDIR; ++ } ++ write_lock (&dir->u.dir.lock); ++ if (dir->u.dir.no_more_additions) retval = -ENOENT; ++ else ++ { ++ struct devfs_entry *old; ++ ++ old = _devfs_search_dir (dir, de->name, de->namelen); ++ if (old_de) *old_de = old; ++ else devfs_put (old); ++ if (old == NULL) ++ { ++ de->parent = dir; ++ de->prev = dir->u.dir.last; ++ /* Append to the directory's list of children */ ++ if (dir->u.dir.first == NULL) dir->u.dir.first = de; ++ else dir->u.dir.last->next = de; ++ dir->u.dir.last = de; ++ if (removable) ++dir->u.dir.num_removable; ++ retval = 0; ++ } ++ else retval = -EEXIST; ++ } ++ write_unlock (&dir->u.dir.lock); ++ if (retval) devfs_put (de); ++ return retval; ++} /* End Function _devfs_append_entry */ ++ ++ ++/** ++ * _devfs_get_root_entry - Get the root devfs entry. ++ * ++ * Returns the root devfs entry on success, else %NULL. ++ */ ++ ++static struct devfs_entry *_devfs_get_root_entry (void) ++{ ++ kdev_t devnum; ++ struct devfs_entry *new; ++ static spinlock_t root_lock = SPIN_LOCK_UNLOCKED; ++ ++ /* Always ensure the root is created */ ++ if (root_entry) return root_entry; ++ if ( ( new = _devfs_alloc_entry (NULL, 0,MODE_DIR) ) == NULL ) return NULL; ++ spin_lock (&root_lock); ++ if (root_entry) ++ { ++ spin_unlock (&root_lock); ++ devfs_put (new); ++ return (root_entry); ++ } ++ root_entry = new; ++ spin_unlock (&root_lock); ++ /* And create the entry for ".devfsd" */ ++ if ( ( new = _devfs_alloc_entry (".devfsd", 0, S_IFCHR |S_IRUSR |S_IWUSR) ) ++ == NULL ) return NULL; ++ devnum = devfs_alloc_devnum (DEVFS_SPECIAL_CHR); ++ new->u.fcb.u.device.major = major (devnum); ++ new->u.fcb.u.device.minor = minor (devnum); ++ new->u.fcb.ops = &devfsd_fops; ++ _devfs_append_entry (root_entry, new, FALSE, NULL); ++#ifdef CONFIG_DEVFS_DEBUG ++ if ( ( new = _devfs_alloc_entry (".stat", 0, S_IFCHR | S_IRUGO | S_IWUGO) ) ++ == NULL ) return NULL; ++ devnum = devfs_alloc_devnum (DEVFS_SPECIAL_CHR); ++ new->u.fcb.u.device.major = major (devnum); ++ new->u.fcb.u.device.minor = minor (devnum); ++ new->u.fcb.ops = &stat_fops; ++ _devfs_append_entry (root_entry, new, FALSE, NULL); ++#endif ++ return root_entry; ++} /* End Function _devfs_get_root_entry */ ++ ++ ++/** ++ * _devfs_descend - Descend down a tree using the next component name. ++ * @dir: The directory to search. ++ * @name: The component name to search for. ++ * @namelen: The length of %name. ++ * @next_pos: The position of the next '/' or '\0' is written here. ++ * ++ * Descend into a directory, searching for a component. This function forms ++ * the core of a tree-walking algorithm. The directory will be locked. ++ * The devfs entry corresponding to the component is returned. If there is ++ * no matching entry, %NULL is returned. ++ * An implicit devfs_get() is performed on the returned entry. ++ */ ++ ++static struct devfs_entry *_devfs_descend (struct devfs_entry *dir, ++ const char *name, int namelen, ++ int *next_pos) ++{ ++ const char *stop, *ptr; ++ struct devfs_entry *entry; ++ ++ if ( (namelen >= 3) && (strncmp (name, "../", 3) == 0) ) ++ { /* Special-case going to parent directory */ ++ *next_pos = 3; ++ return devfs_get (dir->parent); ++ } ++ stop = name + namelen; ++ /* Search for a possible '/' */ ++ for (ptr = name; (ptr < stop) && (*ptr != '/'); ++ptr); ++ *next_pos = ptr - name; ++ read_lock (&dir->u.dir.lock); ++ entry = _devfs_search_dir (dir, name, *next_pos); ++ read_unlock (&dir->u.dir.lock); ++ return entry; ++} /* End Function _devfs_descend */ ++ ++ ++static devfs_handle_t _devfs_make_parent_for_leaf (struct devfs_entry *dir, ++ const char *name, ++ int namelen, int *leaf_pos) ++{ ++ int next_pos = 0; ++ ++ if (dir == NULL) dir = _devfs_get_root_entry (); ++ if (dir == NULL) return NULL; ++ devfs_get (dir); ++ /* Search for possible trailing component and ignore it */ ++ for (--namelen; (namelen > 0) && (name[namelen] != '/'); --namelen); ++ *leaf_pos = (name[namelen] == '/') ? (namelen + 1) : 0; ++ for (; namelen > 0; name += next_pos, namelen -= next_pos) ++ { ++ struct devfs_entry *de, *old; ++ ++ if ( ( de = _devfs_descend (dir, name, namelen, &next_pos) ) == NULL ) ++ { ++ de = _devfs_alloc_entry (name, next_pos, MODE_DIR); ++ devfs_get (de); ++ if ( !de || _devfs_append_entry (dir, de, FALSE, &old) ) ++ { ++ devfs_put (de); ++ if ( !old || !S_ISDIR (old->mode) ) ++ { ++ devfs_put (old); ++ devfs_put (dir); ++ return NULL; ++ } ++ de = old; /* Use the existing directory */ ++ } ++ } ++ if (de == dir->parent) ++ { ++ devfs_put (dir); ++ devfs_put (de); ++ return NULL; ++ } ++ devfs_put (dir); ++ dir = de; ++ if (name[next_pos] == '/') ++next_pos; ++ } ++ return dir; ++} /* End Function _devfs_make_parent_for_leaf */ ++ ++ ++static devfs_handle_t _devfs_prepare_leaf (devfs_handle_t *dir, ++ const char *name, umode_t mode) ++{ ++ int namelen, leaf_pos; ++ struct devfs_entry *de; ++ ++ namelen = strlen (name); ++ if ( ( *dir = _devfs_make_parent_for_leaf (*dir, name, namelen, ++ &leaf_pos) ) == NULL ) ++ { ++ PRINTK ("(%s): could not create parent path\n", name); ++ return NULL; ++ } ++ if ( ( de = _devfs_alloc_entry (name + leaf_pos, namelen - leaf_pos,mode) ) ++ == NULL ) ++ { ++ PRINTK ("(%s): could not allocate entry\n", name); ++ devfs_put (*dir); ++ return NULL; ++ } ++ return de; ++} /* End Function _devfs_prepare_leaf */ ++ ++ ++static devfs_handle_t _devfs_walk_path (struct devfs_entry *dir, ++ const char *name, int namelen, ++ int traverse_symlink) ++{ ++ int next_pos = 0; ++ ++ if (dir == NULL) dir = _devfs_get_root_entry (); ++ if (dir == NULL) return NULL; ++ devfs_get (dir); ++ for (; namelen > 0; name += next_pos, namelen -= next_pos) ++ { ++ struct devfs_entry *de, *link; ++ ++ if ( ( de = _devfs_descend (dir, name, namelen, &next_pos) ) == NULL ) ++ { ++ devfs_put (dir); ++ return NULL; ++ } ++ if (S_ISLNK (de->mode) && traverse_symlink) ++ { /* Need to follow the link: this is a stack chomper */ ++ link = _devfs_walk_path (dir, de->u.symlink.linkname, ++ de->u.symlink.length, TRUE); ++ devfs_put (de); ++ if (!link) ++ { ++ devfs_put (dir); ++ return NULL; ++ } ++ de = link; ++ } ++ devfs_put (dir); ++ dir = de; ++ if (name[next_pos] == '/') ++next_pos; ++ } ++ return dir; ++} /* End Function _devfs_walk_path */ ++ ++ ++/** ++ * _devfs_find_by_dev - Find a devfs entry in a directory. ++ * @dir: The directory where to search ++ * @major: The major number to search for. ++ * @minor: The minor number to search for. ++ * @type: The type of special file to search for. This may be either ++ * %DEVFS_SPECIAL_CHR or %DEVFS_SPECIAL_BLK. ++ * ++ * Returns the devfs_entry pointer on success, else %NULL. An implicit ++ * devfs_get() is performed. ++ */ ++ ++static struct devfs_entry *_devfs_find_by_dev (struct devfs_entry *dir, ++ unsigned int major, ++ unsigned int minor, char type) ++{ ++ struct devfs_entry *entry, *de; ++ ++ devfs_get (dir); ++ if (dir == NULL) return NULL; ++ if ( !S_ISDIR (dir->mode) ) ++ { ++ PRINTK ("(%p): not a directory\n", dir); ++ devfs_put (dir); ++ return NULL; ++ } ++ /* First search files in this directory */ ++ read_lock (&dir->u.dir.lock); ++ for (entry = dir->u.dir.first; entry != NULL; entry = entry->next) ++ { ++ if ( !S_ISCHR (entry->mode) && !S_ISBLK (entry->mode) ) continue; ++ if ( S_ISCHR (entry->mode) && (type != DEVFS_SPECIAL_CHR) ) continue; ++ if ( S_ISBLK (entry->mode) && (type != DEVFS_SPECIAL_BLK) ) continue; ++ if ( (entry->u.fcb.u.device.major == major) && ++ (entry->u.fcb.u.device.minor == minor) ) ++ { ++ devfs_get (entry); ++ read_unlock (&dir->u.dir.lock); ++ devfs_put (dir); ++ return entry; ++ } ++ /* Not found: try the next one */ ++ } ++ /* Now recursively search the subdirectories: this is a stack chomper */ ++ for (entry = dir->u.dir.first; entry != NULL; entry = entry->next) ++ { ++ if ( !S_ISDIR (entry->mode) ) continue; ++ de = _devfs_find_by_dev (entry, major, minor, type); ++ if (de) ++ { ++ read_unlock (&dir->u.dir.lock); ++ devfs_put (dir); ++ return de; ++ } ++ } ++ read_unlock (&dir->u.dir.lock); ++ devfs_put (dir); ++ return NULL; ++} /* End Function _devfs_find_by_dev */ ++ ++ ++/** ++ * _devfs_find_entry - Find a devfs entry. ++ * @dir: The handle to the parent devfs directory entry. If this is %NULL the ++ * name is relative to the root of the devfs. ++ * @name: The name of the entry. This may be %NULL. ++ * @major: The major number. This is used if lookup by @name fails. ++ * @minor: The minor number. This is used if lookup by @name fails. ++ * NOTE: If @major and @minor are both 0, searching by major and minor ++ * numbers is disabled. ++ * @type: The type of special file to search for. This may be either ++ * %DEVFS_SPECIAL_CHR or %DEVFS_SPECIAL_BLK. ++ * @traverse_symlink: If %TRUE then symbolic links are traversed. ++ * ++ * Returns the devfs_entry pointer on success, else %NULL. An implicit ++ * devfs_get() is performed. ++ */ ++ ++static struct devfs_entry *_devfs_find_entry (devfs_handle_t dir, ++ const char *name, ++ unsigned int major, ++ unsigned int minor, ++ char type, int traverse_symlink) ++{ ++ struct devfs_entry *entry; ++ ++ if (name != NULL) ++ { ++ unsigned int namelen = strlen (name); ++ ++ if (name[0] == '/') ++ { ++ /* Skip leading pathname component */ ++ if (namelen < 2) ++ { ++ PRINTK ("(%s): too short\n", name); ++ return NULL; ++ } ++ for (++name, --namelen; (*name != '/') && (namelen > 0); ++ ++name, --namelen); ++ if (namelen < 2) ++ { ++ PRINTK ("(%s): too short\n", name); ++ return NULL; ++ } ++ ++name; ++ --namelen; ++ } ++ entry = _devfs_walk_path (dir, name, namelen, traverse_symlink); ++ if (entry != NULL) return entry; ++ } ++ /* Have to search by major and minor: slow */ ++ if ( (major == 0) && (minor == 0) ) return NULL; ++ return _devfs_find_by_dev (root_entry, major, minor, type); ++} /* End Function _devfs_find_entry */ ++ ++static struct devfs_entry *get_devfs_entry_from_vfs_inode (struct inode *inode) ++{ ++ if (inode == NULL) return NULL; ++ VERIFY_ENTRY ( (struct devfs_entry *) inode->u.generic_ip ); ++ return inode->u.generic_ip; ++} /* End Function get_devfs_entry_from_vfs_inode */ ++ ++ ++/** ++ * free_dentry - Free the dentry for a device entry and invalidate inode. ++ * @de: The entry. ++ * ++ * This must only be called after the entry has been unhooked from it's ++ * parent directory. ++ */ ++ ++static void free_dentry (struct devfs_entry *de) ++{ ++ struct dentry *dentry = de->inode.dentry; ++ ++ if (!dentry) return; ++ spin_lock (&dcache_lock); ++ dget_locked (dentry); ++ spin_unlock (&dcache_lock); ++ /* Forcefully remove the inode */ ++ if (dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; ++ d_drop (dentry); ++ dput (dentry); ++} /* End Function free_dentry */ ++ ++ ++/** ++ * is_devfsd_or_child - Test if the current process is devfsd or one of its children. ++ * @fs_info: The filesystem information. ++ * ++ * Returns %TRUE if devfsd or child, else %FALSE. ++ */ ++ ++static int is_devfsd_or_child (struct fs_info *fs_info) ++{ ++ struct task_struct *p; ++ ++ if (current == fs_info->devfsd_task) return (TRUE); ++ if (current->pgrp == fs_info->devfsd_pgrp) return (TRUE); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,1) ++ for (p = current->p_opptr; p != &init_task; p = p->p_opptr) ++ { ++ if (p == fs_info->devfsd_task) return (TRUE); ++ } ++#endif ++ return (FALSE); ++} /* End Function is_devfsd_or_child */ ++ ++ ++/** ++ * devfsd_queue_empty - Test if devfsd has work pending in its event queue. ++ * @fs_info: The filesystem information. ++ * ++ * Returns %TRUE if the queue is empty, else %FALSE. ++ */ ++ ++static inline int devfsd_queue_empty (struct fs_info *fs_info) ++{ ++ return (fs_info->devfsd_last_event) ? FALSE : TRUE; ++} /* End Function devfsd_queue_empty */ ++ ++ ++/** ++ * wait_for_devfsd_finished - Wait for devfsd to finish processing its event queue. ++ * @fs_info: The filesystem information. ++ * ++ * Returns %TRUE if no more waiting will be required, else %FALSE. ++ */ ++ ++static int wait_for_devfsd_finished (struct fs_info *fs_info) ++{ ++ DECLARE_WAITQUEUE (wait, current); ++ ++ if (fs_info->devfsd_task == NULL) return (TRUE); ++ if (devfsd_queue_empty (fs_info) && fs_info->devfsd_sleeping) return TRUE; ++ if ( is_devfsd_or_child (fs_info) ) return (FALSE); ++ add_wait_queue (&fs_info->revalidate_wait_queue, &wait); ++ current->state = TASK_UNINTERRUPTIBLE; ++ if (!devfsd_queue_empty (fs_info) || !fs_info->devfsd_sleeping) ++ if (fs_info->devfsd_task) schedule (); ++ remove_wait_queue (&fs_info->revalidate_wait_queue, &wait); ++ current->state = TASK_RUNNING; ++ return (TRUE); ++} /* End Function wait_for_devfsd_finished */ ++ ++ ++/** ++ * devfsd_notify_de - Notify the devfsd daemon of a change. ++ * @de: The devfs entry that has changed. This and all parent entries will ++ * have their reference counts incremented if the event was queued. ++ * @type: The type of change. ++ * @mode: The mode of the entry. ++ * @uid: The user ID. ++ * @gid: The group ID. ++ * @fs_info: The filesystem info. ++ * ++ * Returns %TRUE if an event was queued and devfsd woken up, else %FALSE. ++ */ ++ ++static int devfsd_notify_de (struct devfs_entry *de, ++ unsigned short type, umode_t mode, ++ uid_t uid, gid_t gid, struct fs_info *fs_info, ++ int atomic) ++{ ++ struct devfsd_buf_entry *entry; ++ struct devfs_entry *curr; ++ ++ if ( !( fs_info->devfsd_event_mask & (1 << type) ) ) return (FALSE); ++ if ( ( entry = kmem_cache_alloc (devfsd_buf_cache, ++ atomic ? SLAB_ATOMIC : SLAB_KERNEL) ) ++ == NULL ) ++ { ++ atomic_inc (&fs_info->devfsd_overrun_count); ++ return (FALSE); ++ } ++ for (curr = de; curr != NULL; curr = curr->parent) devfs_get (curr); ++ entry->de = de; ++ entry->type = type; ++ entry->mode = mode; ++ entry->uid = uid; ++ entry->gid = gid; ++ entry->next = NULL; ++ spin_lock (&fs_info->devfsd_buffer_lock); ++ if (!fs_info->devfsd_first_event) fs_info->devfsd_first_event = entry; ++ if (fs_info->devfsd_last_event) fs_info->devfsd_last_event->next = entry; ++ fs_info->devfsd_last_event = entry; ++ spin_unlock (&fs_info->devfsd_buffer_lock); ++ wake_up_interruptible (&fs_info->devfsd_wait_queue); ++ return (TRUE); ++} /* End Function devfsd_notify_de */ ++ ++ ++/** ++ * devfsd_notify - Notify the devfsd daemon of a change. ++ * @de: The devfs entry that has changed. ++ * @type: The type of change event. ++ * @wait: If TRUE, the function waits for the daemon to finish processing ++ * the event. ++ */ ++ ++static void devfsd_notify (struct devfs_entry *de,unsigned short type,int wait) ++{ ++ if (devfsd_notify_de (de, type, de->mode, current->euid, ++ current->egid, &fs_info, 0) && wait) ++ wait_for_devfsd_finished (&fs_info); ++} /* End Function devfsd_notify */ ++ ++ ++/** ++ * devfs_register - Register a device entry. ++ * @dir: The handle to the parent devfs directory entry. If this is %NULL the ++ * new name is relative to the root of the devfs. ++ * @name: The name of the entry. ++ * @flags: A set of bitwise-ORed flags (DEVFS_FL_*). ++ * @major: The major number. Not needed for regular files. ++ * @minor: The minor number. Not needed for regular files. ++ * @mode: The default file mode. ++ * @ops: The &file_operations or &block_device_operations structure. ++ * This must not be externally deallocated. ++ * @info: An arbitrary pointer which will be written to the @private_data ++ * field of the &file structure passed to the device driver. You can set ++ * this to whatever you like, and change it once the file is opened (the next ++ * file opened will not see this change). ++ * ++ * Returns a handle which may later be used in a call to devfs_unregister(). ++ * On failure %NULL is returned. ++ */ ++ ++devfs_handle_t devfs_register (devfs_handle_t dir, const char *name, ++ unsigned int flags, ++ unsigned int major, unsigned int minor, ++ umode_t mode, void *ops, void *info) ++{ ++ char devtype = S_ISCHR (mode) ? DEVFS_SPECIAL_CHR : DEVFS_SPECIAL_BLK; ++ int err; ++ kdev_t devnum = NODEV; ++ struct devfs_entry *de; ++ ++ if (name == NULL) ++ { ++ PRINTK ("(): NULL name pointer\n"); ++ return NULL; ++ } ++ if (ops == NULL) ++ { ++ if ( S_ISBLK (mode) ) ops = (void *) get_blkfops (major); ++ if (ops == NULL) ++ { ++ PRINTK ("(%s): NULL ops pointer\n", name); ++ return NULL; ++ } ++ PRINTK ("(%s): NULL ops, got %p from major table\n", name, ops); ++ } ++ if ( S_ISDIR (mode) ) ++ { ++ PRINTK ("(%s): creating directories is not allowed\n", name); ++ return NULL; ++ } ++ if ( S_ISLNK (mode) ) ++ { ++ PRINTK ("(%s): creating symlinks is not allowed\n", name); ++ return NULL; ++ } ++ if ( ( S_ISCHR (mode) || S_ISBLK (mode) ) && ++ (flags & DEVFS_FL_AUTO_DEVNUM) ) ++ { ++ if ( kdev_none ( devnum = devfs_alloc_devnum (devtype) ) ) ++ { ++ PRINTK ("(%s): exhausted %s device numbers\n", ++ name, S_ISCHR (mode) ? "char" : "block"); ++ return NULL; ++ } ++ major = major (devnum); ++ minor = minor (devnum); ++ } ++ if ( ( de = _devfs_prepare_leaf (&dir, name, mode) ) == NULL ) ++ { ++ PRINTK ("(%s): could not prepare leaf\n", name); ++ if ( !kdev_none (devnum) ) devfs_dealloc_devnum (devtype, devnum); ++ return NULL; ++ } ++ if ( S_ISCHR (mode) || S_ISBLK (mode) ) ++ { ++ de->u.fcb.u.device.major = major; ++ de->u.fcb.u.device.minor = minor; ++ de->u.fcb.autogen = kdev_none (devnum) ? FALSE : TRUE; ++ } ++ else if ( !S_ISREG (mode) ) ++ { ++ PRINTK ("(%s): illegal mode: %x\n", name, mode); ++ devfs_put (de); ++ devfs_put (dir); ++ return (NULL); ++ } ++ de->info = info; ++ if (flags & DEVFS_FL_CURRENT_OWNER) ++ { ++ de->inode.uid = current->uid; ++ de->inode.gid = current->gid; ++ } ++ else ++ { ++ de->inode.uid = 0; ++ de->inode.gid = 0; ++ } ++ de->u.fcb.ops = ops; ++ de->u.fcb.auto_owner = (flags & DEVFS_FL_AUTO_OWNER) ? TRUE : FALSE; ++ de->u.fcb.aopen_notify = (flags & DEVFS_FL_AOPEN_NOTIFY) ? TRUE : FALSE; ++ de->hide = (flags & DEVFS_FL_HIDE) ? TRUE : FALSE; ++ if (flags & DEVFS_FL_REMOVABLE) de->u.fcb.removable = TRUE; ++ if ( ( err = _devfs_append_entry (dir, de, de->u.fcb.removable, NULL) ) ++ != 0 ) ++ { ++ PRINTK ("(%s): could not append to parent, err: %d\n", name, err); ++ devfs_put (dir); ++ if ( !kdev_none (devnum) ) devfs_dealloc_devnum (devtype, devnum); ++ return NULL; ++ } ++ DPRINTK (DEBUG_REGISTER, "(%s): de: %p dir: %p \"%s\" pp: %p\n", ++ name, de, dir, dir->name, dir->parent); ++ devfsd_notify (de, DEVFSD_NOTIFY_REGISTERED, flags & DEVFS_FL_WAIT); ++ devfs_put (dir); ++ return de; ++} /* End Function devfs_register */ ++ ++ ++/** ++ * _devfs_unhook - Unhook a device entry from its parents list ++ * @de: The entry to unhook. ++ * ++ * Returns %TRUE if the entry was unhooked, else %FALSE if it was ++ * previously unhooked. ++ * The caller must have a write lock on the parent directory. ++ */ ++ ++static int _devfs_unhook (struct devfs_entry *de) ++{ ++ struct devfs_entry *parent; ++ ++ if ( !de || (de->prev == de) ) return FALSE; ++ parent = de->parent; ++ if (de->prev == NULL) parent->u.dir.first = de->next; ++ else de->prev->next = de->next; ++ if (de->next == NULL) parent->u.dir.last = de->prev; ++ else de->next->prev = de->prev; ++ de->prev = de; /* Indicate we're unhooked */ ++ de->next = NULL; /* Force early termination for */ ++ if ( ( S_ISREG (de->mode) || S_ISCHR (de->mode) || S_ISBLK (de->mode) ) && ++ de->u.fcb.removable ) ++ --parent->u.dir.num_removable; ++ return TRUE; ++} /* End Function _devfs_unhook */ ++ ++ ++/** ++ * _devfs_unregister - Unregister a device entry from it's parent. ++ * @dir: The parent directory. ++ * @de: The entry to unregister. ++ * ++ * The caller must have a write lock on the parent directory, which is ++ * unlocked by this function. ++ */ ++ ++static void _devfs_unregister (struct devfs_entry *dir, struct devfs_entry *de) ++{ ++ int unhooked = _devfs_unhook (de); ++ ++ write_unlock (&dir->u.dir.lock); ++ if (!unhooked) return; ++ devfs_get (dir); ++ devfs_unregister (de->slave); /* Let it handle the locking */ ++ devfsd_notify (de, DEVFSD_NOTIFY_UNREGISTERED, 0); ++ free_dentry (de); ++ devfs_put (dir); ++ if ( !S_ISDIR (de->mode) ) return; ++ while (TRUE) /* Recursively unregister: this is a stack chomper */ ++ { ++ struct devfs_entry *child; ++ ++ write_lock (&de->u.dir.lock); ++ de->u.dir.no_more_additions = TRUE; ++ child = de->u.dir.first; ++ VERIFY_ENTRY (child); ++ _devfs_unregister (de, child); ++ if (!child) break; ++ DPRINTK (DEBUG_UNREGISTER, "(%s): child: %p refcount: %d\n", ++ child->name, child, atomic_read (&child->refcount) ); ++ devfs_put (child); ++ } ++} /* End Function _devfs_unregister */ ++ ++ ++/** ++ * devfs_unregister - Unregister a device entry. ++ * @de: A handle previously created by devfs_register() or returned from ++ * devfs_get_handle(). If this is %NULL the routine does nothing. ++ */ ++ ++void devfs_unregister (devfs_handle_t de) ++{ ++ VERIFY_ENTRY (de); ++ if ( (de == NULL) || (de->parent == NULL) ) return; ++ DPRINTK (DEBUG_UNREGISTER, "(%s): de: %p refcount: %d\n", ++ de->name, de, atomic_read (&de->refcount) ); ++ write_lock (&de->parent->u.dir.lock); ++ _devfs_unregister (de->parent, de); ++ devfs_put (de); ++} /* End Function devfs_unregister */ ++ ++static int devfs_do_symlink (devfs_handle_t dir, const char *name, ++ unsigned int flags, const char *link, ++ devfs_handle_t *handle, void *info) ++{ ++ int err; ++ unsigned int linklength; ++ char *newlink; ++ struct devfs_entry *de; ++ ++ if (handle != NULL) *handle = NULL; ++ if (name == NULL) ++ { ++ PRINTK ("(): NULL name pointer\n"); ++ return -EINVAL; ++ } ++ if (link == NULL) ++ { ++ PRINTK ("(%s): NULL link pointer\n", name); ++ return -EINVAL; ++ } ++ linklength = strlen (link); ++ if ( ( newlink = kmalloc (linklength + 1, GFP_KERNEL) ) == NULL ) ++ return -ENOMEM; ++ memcpy (newlink, link, linklength); ++ newlink[linklength] = '\0'; ++ if ( ( de = _devfs_prepare_leaf (&dir, name, S_IFLNK | S_IRUGO | S_IXUGO) ) ++ == NULL ) ++ { ++ PRINTK ("(%s): could not prepare leaf\n", name); ++ kfree (newlink); ++ return -ENOTDIR; ++ } ++ de->info = info; ++ de->hide = (flags & DEVFS_FL_HIDE) ? TRUE : FALSE; ++ de->u.symlink.linkname = newlink; ++ de->u.symlink.length = linklength; ++ if ( ( err = _devfs_append_entry (dir, de, FALSE, NULL) ) != 0 ) ++ { ++ PRINTK ("(%s): could not append to parent, err: %d\n", name, err); ++ devfs_put (dir); ++ return err; ++ } ++ devfs_put (dir); ++#ifdef CONFIG_DEVFS_DEBUG ++ spin_lock (&stat_lock); ++ stat_num_bytes += linklength + 1; ++ spin_unlock (&stat_lock); ++#endif ++ if (handle != NULL) *handle = de; ++ return 0; ++} /* End Function devfs_do_symlink */ ++ ++ ++/** ++ * devfs_mk_symlink Create a symbolic link in the devfs namespace. ++ * @dir: The handle to the parent devfs directory entry. If this is %NULL the ++ * new name is relative to the root of the devfs. ++ * @name: The name of the entry. ++ * @flags: A set of bitwise-ORed flags (DEVFS_FL_*). ++ * @link: The destination name. ++ * @handle: The handle to the symlink entry is written here. This may be %NULL. ++ * @info: An arbitrary pointer which will be associated with the entry. ++ * ++ * Returns 0 on success, else a negative error code is returned. ++ */ ++ ++int devfs_mk_symlink (devfs_handle_t dir, const char *name, unsigned int flags, ++ const char *link, devfs_handle_t *handle, void *info) ++{ ++ int err; ++ devfs_handle_t de; ++ ++ if (handle != NULL) *handle = NULL; ++ DPRINTK (DEBUG_REGISTER, "(%s)\n", name); ++ err = devfs_do_symlink (dir, name, flags, link, &de, info); ++ if (err) return err; ++ if (handle == NULL) de->vfs_deletable = TRUE; ++ else *handle = de; ++ devfsd_notify (de, DEVFSD_NOTIFY_REGISTERED, flags & DEVFS_FL_WAIT); ++ return 0; ++} /* End Function devfs_mk_symlink */ ++ ++ ++/** ++ * devfs_mk_dir - Create a directory in the devfs namespace. ++ * @dir: The handle to the parent devfs directory entry. If this is %NULL the ++ * new name is relative to the root of the devfs. ++ * @name: The name of the entry. ++ * @info: An arbitrary pointer which will be associated with the entry. ++ * ++ * Use of this function is optional. The devfs_register() function ++ * will automatically create intermediate directories as needed. This function ++ * is provided for efficiency reasons, as it provides a handle to a directory. ++ * Returns a handle which may later be used in a call to devfs_unregister(). ++ * On failure %NULL is returned. ++ */ ++ ++devfs_handle_t devfs_mk_dir (devfs_handle_t dir, const char *name, void *info) ++{ ++ int err; ++ struct devfs_entry *de, *old; ++ ++ if (name == NULL) ++ { ++ PRINTK ("(): NULL name pointer\n"); ++ return NULL; ++ } ++ if ( ( de = _devfs_prepare_leaf (&dir, name, MODE_DIR) ) == NULL ) ++ { ++ PRINTK ("(%s): could not prepare leaf\n", name); ++ return NULL; ++ } ++ de->info = info; ++ if ( ( err = _devfs_append_entry (dir, de, FALSE, &old) ) != 0 ) ++ { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,1) ++ if ( old && S_ISDIR (old->mode) ) ++ { ++ PRINTK ("(%s): using old entry in dir: %p \"%s\"\n", ++ name, dir, dir->name); ++ old->vfs_deletable = FALSE; ++ devfs_put (dir); ++ return old; ++ } ++#endif ++ PRINTK ("(%s): could not append to dir: %p \"%s\", err: %d\n", ++ name, dir, dir->name, err); ++ devfs_put (old); ++ devfs_put (dir); ++ return NULL; ++ } ++ DPRINTK (DEBUG_REGISTER, "(%s): de: %p dir: %p \"%s\"\n", ++ name, de, dir, dir->name); ++ devfsd_notify (de, DEVFSD_NOTIFY_REGISTERED, 0); ++ devfs_put (dir); ++ return de; ++} /* End Function devfs_mk_dir */ ++ ++ ++/** ++ * devfs_get_handle - Find the handle of a devfs entry. ++ * @dir: The handle to the parent devfs directory entry. If this is %NULL the ++ * name is relative to the root of the devfs. ++ * @name: The name of the entry. ++ * @major: The major number. This is used if @name is %NULL. ++ * @minor: The minor number. This is used if @name is %NULL. ++ * @type: The type of special file to search for. This may be either ++ * %DEVFS_SPECIAL_CHR or %DEVFS_SPECIAL_BLK. ++ * @traverse_symlinks: If %TRUE then symlink entries in the devfs namespace are ++ * traversed. Symlinks pointing out of the devfs namespace will cause a ++ * failure. Symlink traversal consumes stack space. ++ * ++ * Returns a handle which may later be used in a call to ++ * devfs_unregister(), devfs_get_flags(), or devfs_set_flags(). A ++ * subsequent devfs_put() is required to decrement the refcount. ++ * On failure %NULL is returned. ++ */ ++ ++devfs_handle_t devfs_get_handle (devfs_handle_t dir, const char *name, ++ unsigned int major, unsigned int minor, ++ char type, int traverse_symlinks) ++{ ++ if ( (name != NULL) && (name[0] == '\0') ) name = NULL; ++ return _devfs_find_entry (dir, name, major, minor, type,traverse_symlinks); ++} /* End Function devfs_get_handle */ ++ ++ ++/* Compatibility function. Will be removed in sometime in 2.5 */ ++ ++devfs_handle_t devfs_find_handle (devfs_handle_t dir, const char *name, ++ unsigned int major, unsigned int minor, ++ char type, int traverse_symlinks) ++{ ++ devfs_handle_t de; ++ ++ de = devfs_get_handle (dir, name, major, minor, type, traverse_symlinks); ++ devfs_put (de); ++ return de; ++} /* End Function devfs_find_handle */ ++ ++ ++/** ++ * devfs_get_flags - Get the flags for a devfs entry. ++ * @de: The handle to the device entry. ++ * @flags: The flags are written here. ++ * ++ * Returns 0 on success, else a negative error code. ++ */ ++ ++int devfs_get_flags (devfs_handle_t de, unsigned int *flags) ++{ ++ unsigned int fl = 0; ++ ++ if (de == NULL) return -EINVAL; ++ VERIFY_ENTRY (de); ++ if (de->hide) fl |= DEVFS_FL_HIDE; ++ if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) || S_ISREG (de->mode) ) ++ { ++ if (de->u.fcb.auto_owner) fl |= DEVFS_FL_AUTO_OWNER; ++ if (de->u.fcb.aopen_notify) fl |= DEVFS_FL_AOPEN_NOTIFY; ++ if (de->u.fcb.removable) fl |= DEVFS_FL_REMOVABLE; ++ } ++ *flags = fl; ++ return 0; ++} /* End Function devfs_get_flags */ ++ ++ ++/* ++ * devfs_set_flags - Set the flags for a devfs entry. ++ * @de: The handle to the device entry. ++ * @flags: The flags to set. Unset flags are cleared. ++ * ++ * Returns 0 on success, else a negative error code. ++ */ ++ ++int devfs_set_flags (devfs_handle_t de, unsigned int flags) ++{ ++ if (de == NULL) return -EINVAL; ++ VERIFY_ENTRY (de); ++ DPRINTK (DEBUG_SET_FLAGS, "(%s): flags: %x\n", de->name, flags); ++ de->hide = (flags & DEVFS_FL_HIDE) ? TRUE : FALSE; ++ if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) || S_ISREG (de->mode) ) ++ { ++ de->u.fcb.auto_owner = (flags & DEVFS_FL_AUTO_OWNER) ? TRUE : FALSE; ++ de->u.fcb.aopen_notify = (flags & DEVFS_FL_AOPEN_NOTIFY) ? TRUE:FALSE; ++ } ++ return 0; ++} /* End Function devfs_set_flags */ ++ ++ ++/** ++ * devfs_get_maj_min - Get the major and minor numbers for a devfs entry. ++ * @de: The handle to the device entry. ++ * @major: The major number is written here. This may be %NULL. ++ * @minor: The minor number is written here. This may be %NULL. ++ * ++ * Returns 0 on success, else a negative error code. ++ */ ++ ++int devfs_get_maj_min (devfs_handle_t de, unsigned int *major, ++ unsigned int *minor) ++{ ++ if (de == NULL) return -EINVAL; ++ VERIFY_ENTRY (de); ++ if ( S_ISDIR (de->mode) ) return -EISDIR; ++ if ( !S_ISCHR (de->mode) && !S_ISBLK (de->mode) ) return -EINVAL; ++ if (major != NULL) *major = de->u.fcb.u.device.major; ++ if (minor != NULL) *minor = de->u.fcb.u.device.minor; ++ return 0; ++} /* End Function devfs_get_maj_min */ ++ ++ ++/** ++ * devfs_get_handle_from_inode - Get the devfs handle for a VFS inode. ++ * @inode: The VFS inode. ++ * ++ * Returns the devfs handle on success, else %NULL. ++ */ ++ ++devfs_handle_t devfs_get_handle_from_inode (struct inode *inode) ++{ ++ if (!inode || !inode->i_sb) return NULL; ++ if (inode->i_sb->s_magic != DEVFS_SUPER_MAGIC) return NULL; ++ return get_devfs_entry_from_vfs_inode (inode); ++} /* End Function devfs_get_handle_from_inode */ ++ ++ ++/** ++ * devfs_generate_path - Generate a pathname for an entry, relative to the devfs root. ++ * @de: The devfs entry. ++ * @path: The buffer to write the pathname to. The pathname and '\0' ++ * terminator will be written at the end of the buffer. ++ * @buflen: The length of the buffer. ++ * ++ * Returns the offset in the buffer where the pathname starts on success, ++ * else a negative error code. ++ */ ++ ++int devfs_generate_path (devfs_handle_t de, char *path, int buflen) ++{ ++ int pos; ++#define NAMEOF(de) ( (de)->mode ? (de)->name : (de)->u.name ) ++ ++ if (de == NULL) return -EINVAL; ++ VERIFY_ENTRY (de); ++ if (de->namelen >= buflen) return -ENAMETOOLONG; /* Must be first */ ++ path[buflen - 1] = '\0'; ++ if (de->parent == NULL) return buflen - 1; /* Don't prepend root */ ++ pos = buflen - de->namelen - 1; ++ memcpy (path + pos, NAMEOF (de), de->namelen); ++ for (de = de->parent; de->parent != NULL; de = de->parent) ++ { ++ if (pos - de->namelen - 1 < 0) return -ENAMETOOLONG; ++ path[--pos] = '/'; ++ pos -= de->namelen; ++ memcpy (path + pos, NAMEOF (de), de->namelen); ++ } ++ return pos; ++} /* End Function devfs_generate_path */ ++ ++ ++/** ++ * devfs_get_ops - Get the device operations for a devfs entry. ++ * @de: The handle to the device entry. ++ * ++ * Returns a pointer to the device operations on success, else NULL. ++ * The use count for the module owning the operations will be incremented. ++ */ ++ ++void *devfs_get_ops (devfs_handle_t de) ++{ ++ struct module *owner; ++ ++ if (de == NULL) return NULL; ++ VERIFY_ENTRY (de); ++ if ( !S_ISCHR (de->mode) && !S_ISBLK (de->mode) && !S_ISREG (de->mode) ) ++ return NULL; ++ if (de->u.fcb.ops == NULL) return NULL; ++ read_lock (&de->parent->u.dir.lock); /* Prevent module from unloading */ ++ if (de->next == de) owner = NULL; /* Ops pointer is already stale */ ++ else if ( S_ISCHR (de->mode) || S_ISREG (de->mode) ) ++ owner = ( (struct file_operations *) de->u.fcb.ops )->owner; ++ else owner = ( (struct block_device_operations *) de->u.fcb.ops )->owner; ++ if ( (de->next == de) || !try_inc_mod_count (owner) ) ++ { /* Entry is already unhooked or module is unloading */ ++ read_unlock (&de->parent->u.dir.lock); ++ return NULL; ++ } ++ read_unlock (&de->parent->u.dir.lock); /* Module can continue unloading*/ ++ return de->u.fcb.ops; ++} /* End Function devfs_get_ops */ ++ ++ ++/** ++ * devfs_put_ops - Put the device operations for a devfs entry. ++ * @de: The handle to the device entry. ++ * ++ * The use count for the module owning the operations will be decremented. ++ */ ++ ++void devfs_put_ops (devfs_handle_t de) ++{ ++ struct module *owner; ++ ++ if (de == NULL) return; ++ VERIFY_ENTRY (de); ++ if ( !S_ISCHR (de->mode) && !S_ISBLK (de->mode) && !S_ISREG (de->mode) ) ++ return; ++ if (de->u.fcb.ops == NULL) return; ++ if ( S_ISCHR (de->mode) || S_ISREG (de->mode) ) ++ owner = ( (struct file_operations *) de->u.fcb.ops )->owner; ++ else owner = ( (struct block_device_operations *) de->u.fcb.ops )->owner; ++ if (owner) __MOD_DEC_USE_COUNT (owner); ++} /* End Function devfs_put_ops */ ++ ++ ++/** ++ * devfs_set_file_size - Set the file size for a devfs regular file. ++ * @de: The handle to the device entry. ++ * @size: The new file size. ++ * ++ * Returns 0 on success, else a negative error code. ++ */ ++ ++int devfs_set_file_size (devfs_handle_t de, unsigned long size) ++{ ++ if (de == NULL) return -EINVAL; ++ VERIFY_ENTRY (de); ++ if ( !S_ISREG (de->mode) ) return -EINVAL; ++ if (de->u.fcb.u.file.size == size) return 0; ++ de->u.fcb.u.file.size = size; ++ if (de->inode.dentry == NULL) return 0; ++ if (de->inode.dentry->d_inode == NULL) return 0; ++ de->inode.dentry->d_inode->i_size = size; ++ return 0; ++} /* End Function devfs_set_file_size */ ++ ++ ++/** ++ * devfs_get_info - Get the info pointer written to private_data of @de upon open. ++ * @de: The handle to the device entry. ++ * ++ * Returns the info pointer. ++ */ ++void *devfs_get_info (devfs_handle_t de) ++{ ++ if (de == NULL) return NULL; ++ VERIFY_ENTRY (de); ++ return de->info; ++} /* End Function devfs_get_info */ ++ ++ ++/** ++ * devfs_set_info - Set the info pointer written to private_data upon open. ++ * @de: The handle to the device entry. ++ * @info: pointer to the data ++ * ++ * Returns 0 on success, else a negative error code. ++ */ ++int devfs_set_info (devfs_handle_t de, void *info) ++{ ++ if (de == NULL) return -EINVAL; ++ VERIFY_ENTRY (de); ++ de->info = info; ++ return 0; ++} /* End Function devfs_set_info */ ++ ++ ++/** ++ * devfs_get_parent - Get the parent device entry. ++ * @de: The handle to the device entry. ++ * ++ * Returns the parent device entry if it exists, else %NULL. ++ */ ++devfs_handle_t devfs_get_parent (devfs_handle_t de) ++{ ++ if (de == NULL) return NULL; ++ VERIFY_ENTRY (de); ++ return de->parent; ++} /* End Function devfs_get_parent */ ++ ++ ++/** ++ * devfs_get_first_child - Get the first leaf node in a directory. ++ * @de: The handle to the device entry. ++ * ++ * Returns the leaf node device entry if it exists, else %NULL. ++ */ ++ ++devfs_handle_t devfs_get_first_child (devfs_handle_t de) ++{ ++ if (de == NULL) return NULL; ++ VERIFY_ENTRY (de); ++ if ( !S_ISDIR (de->mode) ) return NULL; ++ return de->u.dir.first; ++} /* End Function devfs_get_first_child */ ++ ++ ++/** ++ * devfs_get_next_sibling - Get the next sibling leaf node. for a device entry. ++ * @de: The handle to the device entry. ++ * ++ * Returns the leaf node device entry if it exists, else %NULL. ++ */ ++ ++devfs_handle_t devfs_get_next_sibling (devfs_handle_t de) ++{ ++ if (de == NULL) return NULL; ++ VERIFY_ENTRY (de); ++ return de->next; ++} /* End Function devfs_get_next_sibling */ ++ ++ ++/** ++ * devfs_auto_unregister - Configure a devfs entry to be automatically unregistered. ++ * @master: The master devfs entry. Only one slave may be registered. ++ * @slave: The devfs entry which will be automatically unregistered when the ++ * master entry is unregistered. It is illegal to call devfs_unregister() ++ * on this entry. ++ */ ++ ++void devfs_auto_unregister (devfs_handle_t master, devfs_handle_t slave) ++{ ++ if (master == NULL) return; ++ VERIFY_ENTRY (master); ++ VERIFY_ENTRY (slave); ++ if (master->slave != NULL) ++ { ++ /* Because of the dumbness of the layers above, ignore duplicates */ ++ if (master->slave == slave) return; ++ PRINTK ("(%s): only one slave allowed\n", master->name); ++ OOPS ("(): old slave: \"%s\" new slave: \"%s\"\n", ++ master->slave->name, slave->name); ++ } ++ master->slave = slave; ++} /* End Function devfs_auto_unregister */ ++ ++ ++/** ++ * devfs_get_unregister_slave - Get the slave entry which will be automatically unregistered. ++ * @master: The master devfs entry. ++ * ++ * Returns the slave which will be unregistered when @master is unregistered. ++ */ ++ ++devfs_handle_t devfs_get_unregister_slave (devfs_handle_t master) ++{ ++ if (master == NULL) return NULL; ++ VERIFY_ENTRY (master); ++ return master->slave; ++} /* End Function devfs_get_unregister_slave */ ++ ++ ++/** ++ * devfs_get_name - Get the name for a device entry in its parent directory. ++ * @de: The handle to the device entry. ++ * @namelen: The length of the name is written here. This may be %NULL. ++ * ++ * Returns the name on success, else %NULL. ++ */ ++ ++const char *devfs_get_name (devfs_handle_t de, unsigned int *namelen) ++{ ++ if (de == NULL) return NULL; ++ VERIFY_ENTRY (de); ++ if (namelen != NULL) *namelen = de->namelen; ++ return de->name; ++} /* End Function devfs_get_name */ ++ ++ ++/** ++ * devfs_register_chrdev - Optionally register a conventional character driver. ++ * @major: The major number for the driver. ++ * @name: The name of the driver (as seen in /proc/devices). ++ * @fops: The &file_operations structure pointer. ++ * ++ * This function will register a character driver provided the "devfs=only" ++ * option was not provided at boot time. ++ * Returns 0 on success, else a negative error code on failure. ++ */ ++ ++int devfs_register_chrdev (unsigned int major, const char *name, ++ struct file_operations *fops) ++{ ++ if (boot_options & OPTION_ONLY) return 0; ++ return register_chrdev (major, name, fops); ++} /* End Function devfs_register_chrdev */ ++ ++ ++/** ++ * devfs_register_blkdev - Optionally register a conventional block driver. ++ * @major: The major number for the driver. ++ * @name: The name of the driver (as seen in /proc/devices). ++ * @bdops: The &block_device_operations structure pointer. ++ * ++ * This function will register a block driver provided the "devfs=only" ++ * option was not provided at boot time. ++ * Returns 0 on success, else a negative error code on failure. ++ */ ++ ++int devfs_register_blkdev (unsigned int major, const char *name, ++ struct block_device_operations *bdops) ++{ ++ if (boot_options & OPTION_ONLY) return 0; ++ return register_blkdev (major, name, bdops); ++} /* End Function devfs_register_blkdev */ ++ ++ ++/** ++ * devfs_unregister_chrdev - Optionally unregister a conventional character driver. ++ * @major: The major number for the driver. ++ * @name: The name of the driver (as seen in /proc/devices). ++ * ++ * This function will unregister a character driver provided the "devfs=only" ++ * option was not provided at boot time. ++ * Returns 0 on success, else a negative error code on failure. ++ */ ++ ++int devfs_unregister_chrdev (unsigned int major, const char *name) ++{ ++ if (boot_options & OPTION_ONLY) return 0; ++ return unregister_chrdev (major, name); ++} /* End Function devfs_unregister_chrdev */ ++ ++ ++/** ++ * devfs_unregister_blkdev - Optionally unregister a conventional block driver. ++ * @major: The major number for the driver. ++ * @name: The name of the driver (as seen in /proc/devices). ++ * ++ * This function will unregister a block driver provided the "devfs=only" ++ * option was not provided at boot time. ++ * Returns 0 on success, else a negative error code on failure. ++ */ ++ ++int devfs_unregister_blkdev (unsigned int major, const char *name) ++{ ++ if (boot_options & OPTION_ONLY) return 0; ++ return unregister_blkdev (major, name); ++} /* End Function devfs_unregister_blkdev */ ++ ++/** ++ * devfs_setup - Process kernel boot options. ++ * @str: The boot options after the "devfs=". ++ */ ++ ++static int __init devfs_setup (char *str) ++{ ++ static struct ++ { ++ char *name; ++ unsigned int mask; ++ unsigned int *opt; ++ } devfs_options_tab[] __initdata = ++ { ++#ifdef CONFIG_DEVFS_DEBUG ++ {"dall", DEBUG_ALL, &devfs_debug_init}, ++ {"dmod", DEBUG_MODULE_LOAD, &devfs_debug_init}, ++ {"dreg", DEBUG_REGISTER, &devfs_debug_init}, ++ {"dunreg", DEBUG_UNREGISTER, &devfs_debug_init}, ++ {"dfree", DEBUG_FREE, &devfs_debug_init}, ++ {"diget", DEBUG_I_GET, &devfs_debug_init}, ++ {"dchange", DEBUG_SET_FLAGS, &devfs_debug_init}, ++ {"dsread", DEBUG_S_READ, &devfs_debug_init}, ++ {"dichange", DEBUG_I_CHANGE, &devfs_debug_init}, ++ {"dimknod", DEBUG_I_MKNOD, &devfs_debug_init}, ++ {"dilookup", DEBUG_I_LOOKUP, &devfs_debug_init}, ++ {"diunlink", DEBUG_I_UNLINK, &devfs_debug_init}, ++#endif /* CONFIG_DEVFS_DEBUG */ ++ {"only", OPTION_ONLY, &boot_options}, ++ {"mount", OPTION_MOUNT, &boot_options}, ++ {NULL, 0, NULL} ++ }; ++ ++ while ( (*str != '\0') && !isspace (*str) ) ++ { ++ int i, found = 0, invert = 0; ++ ++ if (strncmp (str, "no", 2) == 0) ++ { ++ invert = 1; ++ str += 2; ++ } ++ for (i = 0; devfs_options_tab[i].name != NULL; i++) ++ { ++ int len = strlen (devfs_options_tab[i].name); ++ ++ if (strncmp (str, devfs_options_tab[i].name, len) == 0) ++ { ++ if (invert) ++ *devfs_options_tab[i].opt &= ~devfs_options_tab[i].mask; ++ else ++ *devfs_options_tab[i].opt |= devfs_options_tab[i].mask; ++ str += len; ++ found = 1; ++ break; ++ } ++ } ++ if (!found) return 0; /* No match */ ++ if (*str != ',') return 0; /* No more options */ ++ ++str; ++ } ++ return 1; ++} /* End Function devfs_setup */ ++ ++__setup("devfs=", devfs_setup); ++ ++EXPORT_SYMBOL(devfs_put); ++EXPORT_SYMBOL(devfs_register); ++EXPORT_SYMBOL(devfs_unregister); ++EXPORT_SYMBOL(devfs_mk_symlink); ++EXPORT_SYMBOL(devfs_mk_dir); ++EXPORT_SYMBOL(devfs_get_handle); ++EXPORT_SYMBOL(devfs_find_handle); ++EXPORT_SYMBOL(devfs_get_flags); ++EXPORT_SYMBOL(devfs_set_flags); ++EXPORT_SYMBOL(devfs_get_maj_min); ++EXPORT_SYMBOL(devfs_get_handle_from_inode); ++EXPORT_SYMBOL(devfs_generate_path); ++EXPORT_SYMBOL(devfs_get_ops); ++EXPORT_SYMBOL(devfs_set_file_size); ++EXPORT_SYMBOL(devfs_get_info); ++EXPORT_SYMBOL(devfs_set_info); ++EXPORT_SYMBOL(devfs_get_parent); ++EXPORT_SYMBOL(devfs_get_first_child); ++EXPORT_SYMBOL(devfs_get_next_sibling); ++EXPORT_SYMBOL(devfs_auto_unregister); ++EXPORT_SYMBOL(devfs_get_unregister_slave); ++EXPORT_SYMBOL(devfs_get_name); ++EXPORT_SYMBOL(devfs_register_chrdev); ++EXPORT_SYMBOL(devfs_register_blkdev); ++EXPORT_SYMBOL(devfs_unregister_chrdev); ++EXPORT_SYMBOL(devfs_unregister_blkdev); ++ ++ ++/** ++ * try_modload - Notify devfsd of an inode lookup by a non-devfsd process. ++ * @parent: The parent devfs entry. ++ * @fs_info: The filesystem info. ++ * @name: The device name. ++ * @namelen: The number of characters in @name. ++ * @buf: A working area that will be used. This must not go out of scope ++ * until devfsd is idle again. ++ * ++ * Returns 0 on success (event was queued), else a negative error code. ++ */ ++ ++static int try_modload (struct devfs_entry *parent, struct fs_info *fs_info, ++ const char *name, unsigned namelen, ++ struct devfs_entry *buf) ++{ ++ if ( !( fs_info->devfsd_event_mask & (1 << DEVFSD_NOTIFY_LOOKUP) ) ) ++ return -ENOENT; ++ if ( is_devfsd_or_child (fs_info) ) return -ENOENT; ++ memset (buf, 0, sizeof *buf); ++ atomic_set (&buf->refcount, 1); ++ buf->parent = parent; ++ buf->namelen = namelen; ++ buf->u.name = name; ++ WRITE_ENTRY_MAGIC (buf, MAGIC_VALUE); ++ if ( !devfsd_notify_de (buf, DEVFSD_NOTIFY_LOOKUP, 0, ++ current->euid, current->egid, fs_info, 0) ) ++ return -ENOENT; ++ /* Possible success: event has been queued */ ++ return 0; ++} /* End Function try_modload */ ++ ++ ++/** ++ * check_disc_changed - Check if a removable disc was changed. ++ * @de: The device. ++ * ++ * Returns 1 if the media was changed, else 0. ++ * ++ * This function may block, and may indirectly cause the parent directory ++ * contents to be changed due to partition re-reading. ++ */ ++ ++static int check_disc_changed (struct devfs_entry *de) ++{ ++ int tmp; ++ int retval = 0; ++ kdev_t dev = mk_kdev (de->u.fcb.u.device.major, de->u.fcb.u.device.minor); ++ struct block_device_operations *bdops; ++ extern int warn_no_part; ++ ++ if ( !S_ISBLK (de->mode) ) return 0; ++ bdops = devfs_get_ops (de); ++ if (!bdops) return 0; ++ if (bdops->check_media_change == NULL) goto out; ++ if ( !bdops->check_media_change (dev) ) goto out; ++ retval = 1; ++ printk (KERN_DEBUG "VFS: Disk change detected on device %s\n", ++ kdevname (dev) ); ++ if ( invalidate_device (dev, 0) ) ++ printk (KERN_WARNING "VFS: busy inodes on changed media..\n"); ++ /* Ugly hack to disable messages about unable to read partition table */ ++ tmp = warn_no_part; ++ warn_no_part = 0; ++ if (bdops->revalidate) bdops->revalidate (dev); ++ warn_no_part = tmp; ++out: ++ devfs_put_ops (de); ++ return retval; ++} /* End Function check_disc_changed */ ++ ++ ++/** ++ * scan_dir_for_removable - Scan a directory for removable media devices and check media. ++ * @dir: The directory. ++ * ++ * This function may block, and may indirectly cause the directory ++ * contents to be changed due to partition re-reading. The directory will ++ * be locked for reading. ++ */ ++ ++static void scan_dir_for_removable (struct devfs_entry *dir) ++{ ++ struct devfs_entry *de; ++ ++ read_lock (&dir->u.dir.lock); ++ if (dir->u.dir.num_removable < 1) de = NULL; ++ else ++ { ++ for (de = dir->u.dir.first; de != NULL; de = de->next) ++ { ++ if (S_ISBLK (de->mode) && de->u.fcb.removable) break; ++ } ++ devfs_get (de); ++ } ++ read_unlock (&dir->u.dir.lock); ++ if (de) check_disc_changed (de); ++ devfs_put (de); ++} /* End Function scan_dir_for_removable */ ++ ++/** ++ * get_removable_partition - Get removable media partition. ++ * @dir: The parent directory. ++ * @name: The name of the entry. ++ * @namelen: The number of characters in <>. ++ * ++ * Returns 1 if the media was changed, else 0. ++ * ++ * This function may block, and may indirectly cause the directory ++ * contents to be changed due to partition re-reading. The directory must ++ * be locked for reading upon entry, and will be unlocked upon exit. ++ */ ++ ++static int get_removable_partition (struct devfs_entry *dir, const char *name, ++ unsigned int namelen) ++{ ++ int retval; ++ struct devfs_entry *de; ++ ++ if (dir->u.dir.num_removable < 1) ++ { ++ read_unlock (&dir->u.dir.lock); ++ return 0; ++ } ++ for (de = dir->u.dir.first; de != NULL; de = de->next) ++ { ++ if (!S_ISBLK (de->mode) || !de->u.fcb.removable) continue; ++ if (strcmp (de->name, "disc") == 0) break; ++ /* Support for names where the partition is appended to the disc name ++ */ ++ if (de->namelen >= namelen) continue; ++ if (strncmp (de->name, name, de->namelen) == 0) break; ++ } ++ devfs_get (de); ++ read_unlock (&dir->u.dir.lock); ++ retval = de ? check_disc_changed (de) : 0; ++ devfs_put (de); ++ return retval; ++} /* End Function get_removable_partition */ ++ ++ ++/* Superblock operations follow */ ++ ++static struct inode_operations devfs_iops; ++static struct inode_operations devfs_dir_iops; ++static struct file_operations devfs_fops; ++static struct file_operations devfs_dir_fops; ++static struct inode_operations devfs_symlink_iops; ++ ++static int devfs_notify_change (struct dentry *dentry, struct iattr *iattr) ++{ ++ int retval; ++ struct devfs_entry *de; ++ struct inode *inode = dentry->d_inode; ++ struct fs_info *fs_info = inode->i_sb->u.generic_sbp; ++ ++ de = get_devfs_entry_from_vfs_inode (inode); ++ if (de == NULL) return -ENODEV; ++ retval = inode_change_ok (inode, iattr); ++ if (retval != 0) return retval; ++ retval = inode_setattr (inode, iattr); ++ if (retval != 0) return retval; ++ DPRINTK (DEBUG_I_CHANGE, "(%d): VFS inode: %p devfs_entry: %p\n", ++ (int) inode->i_ino, inode, de); ++ DPRINTK (DEBUG_I_CHANGE, "(): mode: 0%o uid: %d gid: %d\n", ++ (int) inode->i_mode, (int) inode->i_uid, (int) inode->i_gid); ++ /* Inode is not on hash chains, thus must save permissions here rather ++ than in a write_inode() method */ ++ if ( ( !S_ISREG (inode->i_mode) && !S_ISCHR (inode->i_mode) && ++ !S_ISBLK (inode->i_mode) ) || !de->u.fcb.auto_owner ) ++ { ++ de->mode = inode->i_mode; ++ de->inode.uid = inode->i_uid; ++ de->inode.gid = inode->i_gid; ++ } ++ de->inode.atime = inode->i_atime; ++ de->inode.mtime = inode->i_mtime; ++ de->inode.ctime = inode->i_ctime; ++ if ( ( iattr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID) ) && ++ !is_devfsd_or_child (fs_info) ) ++ devfsd_notify_de (de, DEVFSD_NOTIFY_CHANGE, inode->i_mode, ++ inode->i_uid, inode->i_gid, fs_info, 0); ++ return 0; ++} /* End Function devfs_notify_change */ ++ ++static int devfs_statfs (struct super_block *sb, struct statfs *buf) ++{ ++ buf->f_type = DEVFS_SUPER_MAGIC; ++ buf->f_bsize = FAKE_BLOCK_SIZE; ++ buf->f_bfree = 0; ++ buf->f_bavail = 0; ++ buf->f_ffree = 0; ++ buf->f_namelen = NAME_MAX; ++ return 0; ++} /* End Function devfs_statfs */ ++ ++static void devfs_clear_inode (struct inode *inode) ++{ ++ if ( S_ISBLK (inode->i_mode) ) bdput (inode->i_bdev); ++} /* End Function devfs_clear_inode */ ++ ++static struct super_operations devfs_sops = ++{ ++ put_inode: force_delete, ++ clear_inode: devfs_clear_inode, ++ statfs: devfs_statfs, ++}; ++ ++ ++/** ++ * _devfs_get_vfs_inode - Get a VFS inode. ++ * @sb: The super block. ++ * @de: The devfs inode. ++ * @dentry: The dentry to register with the devfs inode. ++ * ++ * Returns the inode on success, else %NULL. An implicit devfs_get() is ++ * performed if the inode is created. ++ */ ++ ++static struct inode *_devfs_get_vfs_inode (struct super_block *sb, ++ struct devfs_entry *de, ++ struct dentry *dentry) ++{ ++ int is_fcb = FALSE; ++ struct inode *inode; ++ ++ if (de->prev == de) return NULL; /* Quick check to see if unhooked */ ++ if ( ( inode = new_inode (sb) ) == NULL ) ++ { ++ PRINTK ("(%s): new_inode() failed, de: %p\n", de->name, de); ++ return NULL; ++ } ++ if (de->parent) ++ { ++ read_lock (&de->parent->u.dir.lock); ++ if (de->prev != de) de->inode.dentry = dentry; /* Not unhooked */ ++ read_unlock (&de->parent->u.dir.lock); ++ } ++ else de->inode.dentry = dentry; /* Root: no locking needed */ ++ if (de->inode.dentry != dentry) ++ { /* Must have been unhooked */ ++ iput (inode); ++ return NULL; ++ } ++ inode->u.generic_ip = devfs_get (de); ++ inode->i_ino = de->inode.ino; ++ DPRINTK (DEBUG_I_GET, "(%d): VFS inode: %p devfs_entry: %p\n", ++ (int) inode->i_ino, inode, de); ++ inode->i_blocks = 0; ++ inode->i_blksize = FAKE_BLOCK_SIZE; ++ inode->i_op = &devfs_iops; ++ inode->i_fop = &devfs_fops; ++ inode->i_rdev = NODEV; ++ if ( S_ISCHR (de->mode) ) ++ { ++ inode->i_rdev = mk_kdev (de->u.fcb.u.device.major, ++ de->u.fcb.u.device.minor); ++ inode->i_cdev = cdget ( kdev_t_to_nr (inode->i_rdev) ); ++ is_fcb = TRUE; ++ } ++ else if ( S_ISBLK (de->mode) ) ++ { ++ inode->i_rdev = mk_kdev (de->u.fcb.u.device.major, ++ de->u.fcb.u.device.minor); ++ if (bd_acquire (inode) == 0) ++ { ++ if (!inode->i_bdev->bd_op && de->u.fcb.ops) ++ inode->i_bdev->bd_op = de->u.fcb.ops; ++ } ++ else PRINTK ("(%d): no block device from bdget()\n",(int)inode->i_ino); ++ is_fcb = TRUE; ++ } ++ else if ( S_ISFIFO (de->mode) ) inode->i_fop = &def_fifo_fops; ++ else if ( S_ISREG (de->mode) ) ++ { ++ inode->i_size = de->u.fcb.u.file.size; ++ is_fcb = TRUE; ++ } ++ else if ( S_ISDIR (de->mode) ) ++ { ++ inode->i_op = &devfs_dir_iops; ++ inode->i_fop = &devfs_dir_fops; ++ } ++ else if ( S_ISLNK (de->mode) ) ++ { ++ inode->i_op = &devfs_symlink_iops; ++ inode->i_size = de->u.symlink.length; ++ } ++ if (is_fcb && de->u.fcb.auto_owner) ++ inode->i_mode = (de->mode & S_IFMT) | S_IRUGO | S_IWUGO; ++ else inode->i_mode = de->mode; ++ inode->i_uid = de->inode.uid; ++ inode->i_gid = de->inode.gid; ++ inode->i_atime = de->inode.atime; ++ inode->i_mtime = de->inode.mtime; ++ inode->i_ctime = de->inode.ctime; ++ DPRINTK (DEBUG_I_GET, "(): mode: 0%o uid: %d gid: %d\n", ++ (int) inode->i_mode, (int) inode->i_uid, (int) inode->i_gid); ++ return inode; ++} /* End Function _devfs_get_vfs_inode */ ++ ++ ++/* File operations for device entries follow */ ++ ++static int devfs_readdir (struct file *file, void *dirent, filldir_t filldir) ++{ ++ int err, count; ++ int stored = 0; ++ struct fs_info *fs_info; ++ struct devfs_entry *parent, *de, *next = NULL; ++ struct inode *inode = file->f_dentry->d_inode; ++ ++ fs_info = inode->i_sb->u.generic_sbp; ++ parent = get_devfs_entry_from_vfs_inode (file->f_dentry->d_inode); ++ if ( (long) file->f_pos < 0 ) return -EINVAL; ++ DPRINTK (DEBUG_F_READDIR, "(%s): fs_info: %p pos: %ld\n", ++ parent->name, fs_info, (long) file->f_pos); ++ switch ( (long) file->f_pos ) ++ { ++ case 0: ++ scan_dir_for_removable (parent); ++ err = (*filldir) (dirent, "..", 2, file->f_pos, ++ file->f_dentry->d_parent->d_inode->i_ino, DT_DIR); ++ if (err == -EINVAL) break; ++ if (err < 0) return err; ++ file->f_pos++; ++ ++stored; ++ /* Fall through */ ++ case 1: ++ err = (*filldir) (dirent, ".", 1, file->f_pos, inode->i_ino, DT_DIR); ++ if (err == -EINVAL) break; ++ if (err < 0) return err; ++ file->f_pos++; ++ ++stored; ++ /* Fall through */ ++ default: ++ /* Skip entries */ ++ count = file->f_pos - 2; ++ read_lock (&parent->u.dir.lock); ++ for (de = parent->u.dir.first; de && (count > 0); de = de->next) ++ if ( !IS_HIDDEN (de) ) --count; ++ devfs_get (de); ++ read_unlock (&parent->u.dir.lock); ++ /* Now add all remaining entries */ ++ while (de) ++ { ++ if ( IS_HIDDEN (de) ) err = 0; ++ else ++ { ++ err = (*filldir) (dirent, de->name, de->namelen, ++ file->f_pos, de->inode.ino, de->mode >> 12); ++ if (err >= 0) ++ { ++ file->f_pos++; ++ ++stored; ++ } ++ } ++ read_lock (&parent->u.dir.lock); ++ next = devfs_get (de->next); ++ read_unlock (&parent->u.dir.lock); ++ devfs_put (de); ++ de = next; ++ if (err == -EINVAL) break; ++ if (err < 0) return err; ++ } ++ break; ++ } ++ return stored; ++} /* End Function devfs_readdir */ ++ ++static int devfs_open (struct inode *inode, struct file *file) ++{ ++ int err; ++ struct fcb_type *df; ++ struct devfs_entry *de; ++ struct fs_info *fs_info = inode->i_sb->u.generic_sbp; ++ ++ de = get_devfs_entry_from_vfs_inode (inode); ++ if (de == NULL) return -ENODEV; ++ if ( S_ISDIR (de->mode) ) return 0; ++ df = &de->u.fcb; ++ file->private_data = de->info; ++ if ( S_ISBLK (inode->i_mode) ) ++ { ++ file->f_op = &def_blk_fops; ++ if (df->ops) inode->i_bdev->bd_op = df->ops; ++ err = def_blk_fops.open (inode, file); ++ } ++ else ++ { ++ file->f_op = fops_get ( (struct file_operations *) df->ops ); ++ if (file->f_op) ++ { ++ lock_kernel (); ++ err = file->f_op->open ? (*file->f_op->open) (inode, file) : 0; ++ unlock_kernel (); ++ } ++ else ++ { /* Fallback to legacy scheme */ ++ if ( S_ISCHR (inode->i_mode) ) err = chrdev_open (inode, file); ++ else err = -ENODEV; ++ } ++ } ++ if (err < 0) return err; ++ /* Open was successful */ ++ if (df->open) return 0; ++ df->open = TRUE; /* This is the first open */ ++ if (df->auto_owner) ++ { ++ /* Change the ownership/protection to what driver specified */ ++ inode->i_mode = de->mode; ++ inode->i_uid = current->euid; ++ inode->i_gid = current->egid; ++ } ++ if ( df->aopen_notify && !is_devfsd_or_child (fs_info) ) ++ devfsd_notify_de (de, DEVFSD_NOTIFY_ASYNC_OPEN, inode->i_mode, ++ current->euid, current->egid, fs_info, 0); ++ return 0; ++} /* End Function devfs_open */ ++ ++static struct file_operations devfs_fops = ++{ ++ open: devfs_open, ++}; ++ ++static struct file_operations devfs_dir_fops = ++{ ++ read: generic_read_dir, ++ readdir: devfs_readdir, ++ open: devfs_open, ++}; ++ ++ ++/* Dentry operations for device entries follow */ ++ ++ ++/** ++ * devfs_d_release - Callback for when a dentry is freed. ++ * @dentry: The dentry. ++ */ ++ ++static void devfs_d_release (struct dentry *dentry) ++{ ++ DPRINTK (DEBUG_D_RELEASE, "(%p): inode: %p\n", dentry, dentry->d_inode); ++} /* End Function devfs_d_release */ ++ ++/** ++ * devfs_d_iput - Callback for when a dentry loses its inode. ++ * @dentry: The dentry. ++ * @inode: The inode. ++ */ ++ ++static void devfs_d_iput (struct dentry *dentry, struct inode *inode) ++{ ++ struct devfs_entry *de; ++ ++ de = get_devfs_entry_from_vfs_inode (inode); ++ DPRINTK (DEBUG_D_IPUT,"(%s): dentry: %p inode: %p de: %p de->dentry: %p\n", ++ de->name, dentry, inode, de, de->inode.dentry); ++ if ( de->inode.dentry && (de->inode.dentry != dentry) ) ++ OOPS ("(%s): de: %p dentry: %p de->dentry: %p\n", ++ de->name, de, dentry, de->inode.dentry); ++ de->inode.dentry = NULL; ++ iput (inode); ++ devfs_put (de); ++} /* End Function devfs_d_iput */ ++ ++static int devfs_d_delete (struct dentry *dentry); ++ ++static struct dentry_operations devfs_dops = ++{ ++ d_delete: devfs_d_delete, ++ d_release: devfs_d_release, ++ d_iput: devfs_d_iput, ++}; ++ ++static int devfs_d_revalidate_wait (struct dentry *dentry, int flags); ++ ++static struct dentry_operations devfs_wait_dops = ++{ ++ d_delete: devfs_d_delete, ++ d_release: devfs_d_release, ++ d_iput: devfs_d_iput, ++ d_revalidate: devfs_d_revalidate_wait, ++}; ++ ++/** ++ * devfs_d_delete - Callback for when all files for a dentry are closed. ++ * @dentry: The dentry. ++ */ ++ ++static int devfs_d_delete (struct dentry *dentry) ++{ ++ struct inode *inode = dentry->d_inode; ++ struct devfs_entry *de; ++ struct fs_info *fs_info; ++ ++ if (dentry->d_op == &devfs_wait_dops) dentry->d_op = &devfs_dops; ++ /* Unhash dentry if negative (has no inode) */ ++ if (inode == NULL) ++ { ++ DPRINTK (DEBUG_D_DELETE, "(%p): dropping negative dentry\n", dentry); ++ return 1; ++ } ++ fs_info = inode->i_sb->u.generic_sbp; ++ de = get_devfs_entry_from_vfs_inode (inode); ++ DPRINTK (DEBUG_D_DELETE, "(%p): inode: %p devfs_entry: %p\n", ++ dentry, inode, de); ++ if (de == NULL) return 0; ++ if ( !S_ISCHR (de->mode) && !S_ISBLK (de->mode) && !S_ISREG (de->mode) ) ++ return 0; ++ if (!de->u.fcb.open) return 0; ++ de->u.fcb.open = FALSE; ++ if (de->u.fcb.aopen_notify) ++ devfsd_notify_de (de, DEVFSD_NOTIFY_CLOSE, inode->i_mode, ++ current->euid, current->egid, fs_info, 1); ++ if (!de->u.fcb.auto_owner) return 0; ++ /* Change the ownership/protection back */ ++ inode->i_mode = (de->mode & S_IFMT) | S_IRUGO | S_IWUGO; ++ inode->i_uid = de->inode.uid; ++ inode->i_gid = de->inode.gid; ++ return 0; ++} /* End Function devfs_d_delete */ ++ ++struct devfs_lookup_struct ++{ ++ devfs_handle_t de; ++ wait_queue_head_t wait_queue; ++}; ++ ++static int devfs_d_revalidate_wait (struct dentry *dentry, int flags) ++{ ++ struct inode *dir = dentry->d_parent->d_inode; ++ struct fs_info *fs_info = dir->i_sb->u.generic_sbp; ++ devfs_handle_t parent = get_devfs_entry_from_vfs_inode (dir); ++ struct devfs_lookup_struct *lookup_info = dentry->d_fsdata; ++ DECLARE_WAITQUEUE (wait, current); ++ ++ if ( is_devfsd_or_child (fs_info) ) ++ { ++ devfs_handle_t de = lookup_info->de; ++ struct inode *inode; ++ ++ DPRINTK (DEBUG_I_LOOKUP, ++ "(%s): dentry: %p inode: %p de: %p by: \"%s\"\n", ++ dentry->d_name.name, dentry, dentry->d_inode, de, ++ current->comm); ++ if (dentry->d_inode) return 1; ++ if (de == NULL) ++ { ++ read_lock (&parent->u.dir.lock); ++ de = _devfs_search_dir (parent, dentry->d_name.name, ++ dentry->d_name.len); ++ read_unlock (&parent->u.dir.lock); ++ if (de == NULL) return 1; ++ lookup_info->de = de; ++ } ++ /* Create an inode, now that the driver information is available */ ++ inode = _devfs_get_vfs_inode (dir->i_sb, de, dentry); ++ if (!inode) return 1; ++ DPRINTK (DEBUG_I_LOOKUP, ++ "(%s): new VFS inode(%u): %p de: %p by: \"%s\"\n", ++ de->name, de->inode.ino, inode, de, current->comm); ++ d_instantiate (dentry, inode); ++ return 1; ++ } ++ if (lookup_info == NULL) return 1; /* Early termination */ ++ read_lock (&parent->u.dir.lock); ++ if (dentry->d_fsdata) ++ { ++ add_wait_queue (&lookup_info->wait_queue, &wait); ++ current->state = TASK_UNINTERRUPTIBLE; ++ read_unlock (&parent->u.dir.lock); ++ schedule (); ++ } ++ else read_unlock (&parent->u.dir.lock); ++ return 1; ++} /* End Function devfs_d_revalidate_wait */ ++ ++ ++/* Inode operations for device entries follow */ ++ ++static struct dentry *devfs_lookup (struct inode *dir, struct dentry *dentry) ++{ ++ struct devfs_entry tmp; /* Must stay in scope until devfsd idle again */ ++ struct devfs_lookup_struct lookup_info; ++ struct fs_info *fs_info = dir->i_sb->u.generic_sbp; ++ struct devfs_entry *parent, *de; ++ struct inode *inode; ++ struct dentry *retval = NULL; ++ ++ /* Set up the dentry operations before anything else, to ensure cleaning ++ up on any error */ ++ dentry->d_op = &devfs_dops; ++ /* First try to get the devfs entry for this directory */ ++ parent = get_devfs_entry_from_vfs_inode (dir); ++ DPRINTK (DEBUG_I_LOOKUP, "(%s): dentry: %p parent: %p by: \"%s\"\n", ++ dentry->d_name.name, dentry, parent, current->comm); ++ if (parent == NULL) return ERR_PTR (-ENOENT); ++ read_lock (&parent->u.dir.lock); ++ de = _devfs_search_dir (parent, dentry->d_name.name, dentry->d_name.len); ++ if (de) read_unlock (&parent->u.dir.lock); ++ else ++ { /* Try re-reading the partition (media may have changed) */ ++ if ( get_removable_partition (parent, dentry->d_name.name, ++ dentry->d_name.len) ) /* Unlocks */ ++ { /* Media did change */ ++ read_lock (&parent->u.dir.lock); ++ de = _devfs_search_dir (parent, dentry->d_name.name, ++ dentry->d_name.len); ++ read_unlock (&parent->u.dir.lock); ++ } ++ } ++ lookup_info.de = de; ++ init_waitqueue_head (&lookup_info.wait_queue); ++ dentry->d_fsdata = &lookup_info; ++ if (de == NULL) ++ { /* Try with devfsd. For any kind of failure, leave a negative dentry ++ so someone else can deal with it (in the case where the sysadmin ++ does a mknod()). It's important to do this before hashing the ++ dentry, so that the devfsd queue is filled before revalidates ++ can start */ ++ if (try_modload (parent, fs_info, ++ dentry->d_name.name, dentry->d_name.len, &tmp) < 0) ++ { /* Lookup event was not queued to devfsd */ ++ d_add (dentry, NULL); ++ return NULL; ++ } ++ } ++ dentry->d_op = &devfs_wait_dops; ++ d_add (dentry, NULL); /* Open the floodgates */ ++ /* Unlock directory semaphore, which will release any waiters. They ++ will get the hashed dentry, and may be forced to wait for ++ revalidation */ ++ up (&dir->i_sem); ++ wait_for_devfsd_finished (fs_info); /* If I'm not devfsd, must wait */ ++ down (&dir->i_sem); /* Grab it again because them's the rules */ ++ de = lookup_info.de; ++ /* If someone else has been so kind as to make the inode, we go home ++ early */ ++ if (dentry->d_inode) goto out; ++ if (de == NULL) ++ { ++ read_lock (&parent->u.dir.lock); ++ de = _devfs_search_dir (parent, dentry->d_name.name, ++ dentry->d_name.len); ++ read_unlock (&parent->u.dir.lock); ++ if (de == NULL) goto out; ++ /* OK, there's an entry now, but no VFS inode yet */ ++ } ++ /* Create an inode, now that the driver information is available */ ++ inode = _devfs_get_vfs_inode (dir->i_sb, de, dentry); ++ if (!inode) ++ { ++ retval = ERR_PTR (-ENOMEM); ++ goto out; ++ } ++ DPRINTK (DEBUG_I_LOOKUP, "(%s): new VFS inode(%u): %p de: %p by: \"%s\"\n", ++ de->name, de->inode.ino, inode, de, current->comm); ++ d_instantiate (dentry, inode); ++out: ++ dentry->d_op = &devfs_dops; ++ dentry->d_fsdata = NULL; ++ write_lock (&parent->u.dir.lock); ++ wake_up (&lookup_info.wait_queue); ++ write_unlock (&parent->u.dir.lock); ++ devfs_put (de); ++ return retval; ++} /* End Function devfs_lookup */ ++ ++static int devfs_unlink (struct inode *dir, struct dentry *dentry) ++{ ++ int unhooked; ++ struct devfs_entry *de; ++ struct inode *inode = dentry->d_inode; ++ struct fs_info *fs_info = dir->i_sb->u.generic_sbp; ++ ++ de = get_devfs_entry_from_vfs_inode (inode); ++ DPRINTK (DEBUG_I_UNLINK, "(%s): de: %p\n", dentry->d_name.name, de); ++ if (de == NULL) return -ENOENT; ++ if (!de->vfs_deletable) return -EPERM; ++ write_lock (&de->parent->u.dir.lock); ++ unhooked = _devfs_unhook (de); ++ write_unlock (&de->parent->u.dir.lock); ++ if (!unhooked) return -ENOENT; ++ if ( !is_devfsd_or_child (fs_info) ) ++ devfsd_notify_de (de, DEVFSD_NOTIFY_DELETE, inode->i_mode, ++ inode->i_uid, inode->i_gid, fs_info, 0); ++ free_dentry (de); ++ devfs_put (de); ++ return 0; ++} /* End Function devfs_unlink */ ++ ++static int devfs_symlink (struct inode *dir, struct dentry *dentry, ++ const char *symname) ++{ ++ int err; ++ struct fs_info *fs_info = dir->i_sb->u.generic_sbp; ++ struct devfs_entry *parent, *de; ++ struct inode *inode; ++ ++ /* First try to get the devfs entry for this directory */ ++ parent = get_devfs_entry_from_vfs_inode (dir); ++ if (parent == NULL) return -ENOENT; ++ err = devfs_do_symlink (parent, dentry->d_name.name, DEVFS_FL_NONE, ++ symname, &de, NULL); ++ DPRINTK (DEBUG_DISABLED, "(%s): errcode from : %d\n", ++ dentry->d_name.name, err); ++ if (err < 0) return err; ++ de->vfs_deletable = TRUE; ++ de->inode.uid = current->euid; ++ de->inode.gid = current->egid; ++ de->inode.atime = CURRENT_TIME; ++ de->inode.mtime = CURRENT_TIME; ++ de->inode.ctime = CURRENT_TIME; ++ if ( ( inode = _devfs_get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) ++ return -ENOMEM; ++ DPRINTK (DEBUG_DISABLED, "(%s): new VFS inode(%u): %p dentry: %p\n", ++ dentry->d_name.name, de->inode.ino, inode, dentry); ++ d_instantiate (dentry, inode); ++ if ( !is_devfsd_or_child (fs_info) ) ++ devfsd_notify_de (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, ++ inode->i_uid, inode->i_gid, fs_info, 0); ++ return 0; ++} /* End Function devfs_symlink */ ++ ++static int devfs_mkdir (struct inode *dir, struct dentry *dentry, int mode) ++{ ++ int err; ++ struct fs_info *fs_info = dir->i_sb->u.generic_sbp; ++ struct devfs_entry *parent, *de; ++ struct inode *inode; ++ ++ mode = (mode & ~S_IFMT) | S_IFDIR; /* VFS doesn't pass S_IFMT part */ ++ parent = get_devfs_entry_from_vfs_inode (dir); ++ if (parent == NULL) return -ENOENT; ++ de = _devfs_alloc_entry (dentry->d_name.name, dentry->d_name.len, mode); ++ if (!de) return -ENOMEM; ++ de->vfs_deletable = TRUE; ++ if ( ( err = _devfs_append_entry (parent, de, FALSE, NULL) ) != 0 ) ++ return err; ++ de->inode.uid = current->euid; ++ de->inode.gid = current->egid; ++ de->inode.atime = CURRENT_TIME; ++ de->inode.mtime = CURRENT_TIME; ++ de->inode.ctime = CURRENT_TIME; ++ if ( ( inode = _devfs_get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) ++ return -ENOMEM; ++ DPRINTK (DEBUG_DISABLED, "(%s): new VFS inode(%u): %p dentry: %p\n", ++ dentry->d_name.name, de->inode.ino, inode, dentry); ++ d_instantiate (dentry, inode); ++ if ( !is_devfsd_or_child (fs_info) ) ++ devfsd_notify_de (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, ++ inode->i_uid, inode->i_gid, fs_info, 0); ++ return 0; ++} /* End Function devfs_mkdir */ ++ ++static int devfs_rmdir (struct inode *dir, struct dentry *dentry) ++{ ++ int err = 0; ++ struct devfs_entry *de; ++ struct fs_info *fs_info = dir->i_sb->u.generic_sbp; ++ struct inode *inode = dentry->d_inode; ++ ++ if (dir->i_sb->u.generic_sbp != inode->i_sb->u.generic_sbp) return -EINVAL; ++ de = get_devfs_entry_from_vfs_inode (inode); ++ if (de == NULL) return -ENOENT; ++ if ( !S_ISDIR (de->mode) ) return -ENOTDIR; ++ if (!de->vfs_deletable) return -EPERM; ++ /* First ensure the directory is empty and will stay that way */ ++ write_lock (&de->u.dir.lock); ++ if (de->u.dir.first) err = -ENOTEMPTY; ++ else de->u.dir.no_more_additions = TRUE; ++ write_unlock (&de->u.dir.lock); ++ if (err) return err; ++ /* Now unhook the directory from it's parent */ ++ write_lock (&de->parent->u.dir.lock); ++ if ( !_devfs_unhook (de) ) err = -ENOENT; ++ write_unlock (&de->parent->u.dir.lock); ++ if (err) return err; ++ if ( !is_devfsd_or_child (fs_info) ) ++ devfsd_notify_de (de, DEVFSD_NOTIFY_DELETE, inode->i_mode, ++ inode->i_uid, inode->i_gid, fs_info, 0); ++ free_dentry (de); ++ devfs_put (de); ++ return 0; ++} /* End Function devfs_rmdir */ ++ ++static int devfs_mknod (struct inode *dir, struct dentry *dentry, int mode, ++ int rdev) ++{ ++ int err; ++ struct fs_info *fs_info = dir->i_sb->u.generic_sbp; ++ struct devfs_entry *parent, *de; ++ struct inode *inode; ++ ++ DPRINTK (DEBUG_I_MKNOD, "(%s): mode: 0%o dev: %d\n", ++ dentry->d_name.name, mode, rdev); ++ parent = get_devfs_entry_from_vfs_inode (dir); ++ if (parent == NULL) return -ENOENT; ++ de = _devfs_alloc_entry (dentry->d_name.name, dentry->d_name.len, mode); ++ if (!de) return -ENOMEM; ++ de->vfs_deletable = TRUE; ++ if ( S_ISBLK (mode) || S_ISCHR (mode) ) ++ { ++ de->u.fcb.u.device.major = MAJOR (rdev); ++ de->u.fcb.u.device.minor = MINOR (rdev); ++ } ++ if ( ( err = _devfs_append_entry (parent, de, FALSE, NULL) ) != 0 ) ++ return err; ++ de->inode.uid = current->euid; ++ de->inode.gid = current->egid; ++ de->inode.atime = CURRENT_TIME; ++ de->inode.mtime = CURRENT_TIME; ++ de->inode.ctime = CURRENT_TIME; ++ if ( ( inode = _devfs_get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) ++ return -ENOMEM; ++ DPRINTK (DEBUG_I_MKNOD, ": new VFS inode(%u): %p dentry: %p\n", ++ de->inode.ino, inode, dentry); ++ d_instantiate (dentry, inode); ++ if ( !is_devfsd_or_child (fs_info) ) ++ devfsd_notify_de (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, ++ inode->i_uid, inode->i_gid, fs_info, 0); ++ return 0; ++} /* End Function devfs_mknod */ ++ ++static int devfs_readlink (struct dentry *dentry, char *buffer, int buflen) ++{ ++ int err; ++ struct devfs_entry *de; ++ ++ de = get_devfs_entry_from_vfs_inode (dentry->d_inode); ++ if (!de) return -ENODEV; ++ err = vfs_readlink (dentry, buffer, buflen, de->u.symlink.linkname); ++ return err; ++} /* End Function devfs_readlink */ ++ ++static int devfs_follow_link (struct dentry *dentry, struct nameidata *nd) ++{ ++ int err; ++ struct devfs_entry *de; ++ ++ de = get_devfs_entry_from_vfs_inode (dentry->d_inode); ++ if (!de) return -ENODEV; ++ err = vfs_follow_link (nd, de->u.symlink.linkname); ++ return err; ++} /* End Function devfs_follow_link */ ++ ++static struct inode_operations devfs_iops = ++{ ++ setattr: devfs_notify_change, ++}; ++ ++static struct inode_operations devfs_dir_iops = ++{ ++ lookup: devfs_lookup, ++ unlink: devfs_unlink, ++ symlink: devfs_symlink, ++ mkdir: devfs_mkdir, ++ rmdir: devfs_rmdir, ++ mknod: devfs_mknod, ++ setattr: devfs_notify_change, ++}; ++ ++static struct inode_operations devfs_symlink_iops = ++{ ++ readlink: devfs_readlink, ++ follow_link: devfs_follow_link, ++ setattr: devfs_notify_change, ++}; ++ ++static struct super_block *devfs_read_super (struct super_block *sb, ++ void *data, int silent) ++{ ++ struct inode *root_inode = NULL; ++ ++ if (_devfs_get_root_entry () == NULL) goto out_no_root; ++ atomic_set (&fs_info.devfsd_overrun_count, 0); ++ init_waitqueue_head (&fs_info.devfsd_wait_queue); ++ init_waitqueue_head (&fs_info.revalidate_wait_queue); ++ fs_info.sb = sb; ++ sb->u.generic_sbp = &fs_info; ++ sb->s_blocksize = 1024; ++ sb->s_blocksize_bits = 10; ++ sb->s_magic = DEVFS_SUPER_MAGIC; ++ sb->s_op = &devfs_sops; ++ if ( ( root_inode = _devfs_get_vfs_inode (sb, root_entry, NULL) ) == NULL ) ++ goto out_no_root; ++ sb->s_root = d_alloc_root (root_inode); ++ if (!sb->s_root) goto out_no_root; ++ DPRINTK (DEBUG_S_READ, "(): made devfs ptr: %p\n", sb->u.generic_sbp); ++ return sb; ++ ++out_no_root: ++ PRINTK ("(): get root inode failed\n"); ++ if (root_inode) iput (root_inode); ++ return NULL; ++} /* End Function devfs_read_super */ ++ ++ ++static DECLARE_FSTYPE (devfs_fs_type, DEVFS_NAME, devfs_read_super, FS_SINGLE); ++ ++ ++/* File operations for devfsd follow */ ++ ++static ssize_t devfsd_read (struct file *file, char *buf, size_t len, ++ loff_t *ppos) ++{ ++ int done = FALSE; ++ int ival; ++ loff_t pos, devname_offset, tlen, rpos; ++ devfs_handle_t de; ++ struct devfsd_buf_entry *entry; ++ struct fs_info *fs_info = file->f_dentry->d_inode->i_sb->u.generic_sbp; ++ struct devfsd_notify_struct *info = fs_info->devfsd_info; ++ DECLARE_WAITQUEUE (wait, current); ++ ++ /* Can't seek (pread) on this device */ ++ if (ppos != &file->f_pos) return -ESPIPE; ++ /* Verify the task has grabbed the queue */ ++ if (fs_info->devfsd_task != current) return -EPERM; ++ info->major = 0; ++ info->minor = 0; ++ /* Block for a new entry */ ++ add_wait_queue (&fs_info->devfsd_wait_queue, &wait); ++ current->state = TASK_INTERRUPTIBLE; ++ while ( devfsd_queue_empty (fs_info) ) ++ { ++ fs_info->devfsd_sleeping = TRUE; ++ wake_up (&fs_info->revalidate_wait_queue); ++ schedule (); ++ fs_info->devfsd_sleeping = FALSE; ++ if ( signal_pending (current) ) ++ { ++ remove_wait_queue (&fs_info->devfsd_wait_queue, &wait); ++ current->state = TASK_RUNNING; ++ return -EINTR; ++ } ++ set_current_state (TASK_INTERRUPTIBLE); ++ } ++ remove_wait_queue (&fs_info->devfsd_wait_queue, &wait); ++ current->state = TASK_RUNNING; ++ /* Now play with the data */ ++ ival = atomic_read (&fs_info->devfsd_overrun_count); ++ info->overrun_count = ival; ++ entry = fs_info->devfsd_first_event; ++ info->type = entry->type; ++ info->mode = entry->mode; ++ info->uid = entry->uid; ++ info->gid = entry->gid; ++ de = entry->de; ++ if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) ) ++ { ++ info->major = de->u.fcb.u.device.major; ++ info->minor = de->u.fcb.u.device.minor; ++ } ++ pos = devfs_generate_path (de, info->devname, DEVFS_PATHLEN); ++ if (pos < 0) return pos; ++ info->namelen = DEVFS_PATHLEN - pos - 1; ++ if (info->mode == 0) info->mode = de->mode; ++ devname_offset = info->devname - (char *) info; ++ rpos = *ppos; ++ if (rpos < devname_offset) ++ { ++ /* Copy parts of the header */ ++ tlen = devname_offset - rpos; ++ if (tlen > len) tlen = len; ++ if ( copy_to_user (buf, (char *) info + rpos, tlen) ) ++ { ++ return -EFAULT; ++ } ++ rpos += tlen; ++ buf += tlen; ++ len -= tlen; ++ } ++ if ( (rpos >= devname_offset) && (len > 0) ) ++ { ++ /* Copy the name */ ++ tlen = info->namelen + 1; ++ if (tlen > len) tlen = len; ++ else done = TRUE; ++ if ( copy_to_user (buf, info->devname + pos + rpos - devname_offset, ++ tlen) ) ++ { ++ return -EFAULT; ++ } ++ rpos += tlen; ++ } ++ tlen = rpos - *ppos; ++ if (done) ++ { ++ devfs_handle_t parent; ++ ++ spin_lock (&fs_info->devfsd_buffer_lock); ++ fs_info->devfsd_first_event = entry->next; ++ if (entry->next == NULL) fs_info->devfsd_last_event = NULL; ++ spin_unlock (&fs_info->devfsd_buffer_lock); ++ for (; de != NULL; de = parent) ++ { ++ parent = de->parent; ++ devfs_put (de); ++ } ++ kmem_cache_free (devfsd_buf_cache, entry); ++ if (ival > 0) atomic_sub (ival, &fs_info->devfsd_overrun_count); ++ *ppos = 0; ++ } ++ else *ppos = rpos; ++ return tlen; ++} /* End Function devfsd_read */ ++ ++static int devfsd_ioctl (struct inode *inode, struct file *file, ++ unsigned int cmd, unsigned long arg) ++{ ++ int ival; ++ struct fs_info *fs_info = inode->i_sb->u.generic_sbp; ++ ++ switch (cmd) ++ { ++ case DEVFSDIOC_GET_PROTO_REV: ++ ival = DEVFSD_PROTOCOL_REVISION_KERNEL; ++ if ( copy_to_user ( (void *)arg, &ival, sizeof ival ) ) return -EFAULT; ++ break; ++ case DEVFSDIOC_SET_EVENT_MASK: ++ /* Ensure only one reader has access to the queue. This scheme will ++ work even if the global kernel lock were to be removed, because it ++ doesn't matter who gets in first, as long as only one gets it */ ++ if (fs_info->devfsd_task == NULL) ++ { ++ static spinlock_t lock = SPIN_LOCK_UNLOCKED; ++ ++ if ( !spin_trylock (&lock) ) return -EBUSY; ++ if (fs_info->devfsd_task != NULL) ++ { /* We lost the race... */ ++ spin_unlock (&lock); ++ return -EBUSY; ++ } ++ fs_info->devfsd_task = current; ++ spin_unlock (&lock); ++ fs_info->devfsd_pgrp = (current->pgrp == current->pid) ? ++ current->pgrp : 0; ++ fs_info->devfsd_file = file; ++ fs_info->devfsd_info = kmalloc (sizeof *fs_info->devfsd_info, ++ GFP_KERNEL); ++ if (!fs_info->devfsd_info) ++ { ++ devfsd_close (inode, file); ++ return -ENOMEM; ++ } ++ } ++ else if (fs_info->devfsd_task != current) return -EBUSY; ++ fs_info->devfsd_event_mask = arg; /* Let the masses come forth */ ++ break; ++ case DEVFSDIOC_RELEASE_EVENT_QUEUE: ++ if (fs_info->devfsd_file != file) return -EPERM; ++ return devfsd_close (inode, file); ++ /*break;*/ ++#ifdef CONFIG_DEVFS_DEBUG ++ case DEVFSDIOC_SET_DEBUG_MASK: ++ if ( copy_from_user (&ival, (void *) arg, sizeof ival) )return -EFAULT; ++ devfs_debug = ival; ++ break; ++#endif ++ default: ++ return -ENOIOCTLCMD; ++ } ++ return 0; ++} /* End Function devfsd_ioctl */ ++ ++static int devfsd_close (struct inode *inode, struct file *file) ++{ ++ struct devfsd_buf_entry *entry, *next; ++ struct fs_info *fs_info = inode->i_sb->u.generic_sbp; ++ ++ if (fs_info->devfsd_file != file) return 0; ++ fs_info->devfsd_event_mask = 0; ++ fs_info->devfsd_file = NULL; ++ spin_lock (&fs_info->devfsd_buffer_lock); ++ entry = fs_info->devfsd_first_event; ++ fs_info->devfsd_first_event = NULL; ++ fs_info->devfsd_last_event = NULL; ++ if (fs_info->devfsd_info) ++ { ++ kfree (fs_info->devfsd_info); ++ fs_info->devfsd_info = NULL; ++ } ++ spin_unlock (&fs_info->devfsd_buffer_lock); ++ fs_info->devfsd_pgrp = 0; ++ fs_info->devfsd_task = NULL; ++ wake_up (&fs_info->revalidate_wait_queue); ++ for (; entry; entry = next) ++ { ++ next = entry->next; ++ kmem_cache_free (devfsd_buf_cache, entry); ++ } ++ return 0; ++} /* End Function devfsd_close */ ++ ++#ifdef CONFIG_DEVFS_DEBUG ++static ssize_t stat_read (struct file *file, char *buf, size_t len, ++ loff_t *ppos) ++{ ++ ssize_t num; ++ char txt[80]; ++ ++ num = sprintf (txt, "Number of entries: %u number of bytes: %u\n", ++ stat_num_entries, stat_num_bytes) + 1; ++ /* Can't seek (pread) on this device */ ++ if (ppos != &file->f_pos) return -ESPIPE; ++ if (*ppos >= num) return 0; ++ if (*ppos + len > num) len = num - *ppos; ++ if ( copy_to_user (buf, txt + *ppos, len) ) return -EFAULT; ++ *ppos += len; ++ return len; ++} /* End Function stat_read */ ++#endif ++ ++ ++static int __init init_devfs_fs (void) ++{ ++ int err; ++ ++ printk (KERN_INFO "%s: v%s Richard Gooch (rgooch@atnf.csiro.au)\n", ++ DEVFS_NAME, DEVFS_VERSION); ++ devfsd_buf_cache = kmem_cache_create ("devfsd_event", ++ sizeof (struct devfsd_buf_entry), ++ 0, 0, NULL, NULL); ++ if (!devfsd_buf_cache) OOPS ("(): unable to allocate event slab\n"); ++#ifdef CONFIG_DEVFS_DEBUG ++ devfs_debug = devfs_debug_init; ++ printk (KERN_INFO "%s: devfs_debug: 0x%0x\n", DEVFS_NAME, devfs_debug); ++#endif ++ printk (KERN_INFO "%s: boot_options: 0x%0x\n", DEVFS_NAME, boot_options); ++ err = register_filesystem (&devfs_fs_type); ++ if (!err) ++ { ++ struct vfsmount *devfs_mnt = kern_mount (&devfs_fs_type); ++ err = PTR_ERR (devfs_mnt); ++ if ( !IS_ERR (devfs_mnt) ) err = 0; ++ } ++ return err; ++} /* End Function init_devfs_fs */ ++ ++void __init mount_devfs_fs (void) ++{ ++ int err; ++ ++ if ( !(boot_options & OPTION_MOUNT) ) return; ++ err = do_mount ("none", "/dev", "devfs", 0, ""); ++ if (err == 0) printk (KERN_INFO "Mounted devfs on /dev\n"); ++ else PRINTK ("(): unable to mount devfs, err: %d\n", err); ++} /* End Function mount_devfs_fs */ ++ ++module_init(init_devfs_fs) +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.19-hp3_pnnl1-root/fs/super.c 2003-04-15 13:21:57.000000000 +0800 +@@ -0,0 +1,926 @@ ++/* ++ * linux/fs/super.c ++ * ++ * Copyright (C) 1991, 1992 Linus Torvalds ++ * ++ * super.c contains code to handle: - mount structures ++ * - super-block tables ++ * - filesystem drivers list ++ * - mount system call ++ * - umount system call ++ * - ustat system call ++ * ++ * GK 2/5/95 - Changed to support mounting the root fs via NFS ++ * ++ * Added kerneld support: Jacques Gelinas and Bjorn Ekwall ++ * Added change_root: Werner Almesberger & Hans Lermen, Feb '96 ++ * Added options to /proc/mounts: ++ * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996. ++ * Added devfs support: Richard Gooch , 13-JAN-1998 ++ * Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000 ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include ++#define __NO_VERSION__ ++#include ++ ++LIST_HEAD(super_blocks); ++spinlock_t sb_lock = SPIN_LOCK_UNLOCKED; ++ ++/* ++ * lock/unlockfs grab a read lock on s_umount, but you need this lock to ++ * make sure no lockfs runs are in progress before inserting/removing ++ * supers from the list. ++ */ ++static DECLARE_MUTEX(lockfs_sem); ++ ++/* ++ * Handling of filesystem drivers list. ++ * Rules: ++ * Inclusion to/removals from/scanning of list are protected by spinlock. ++ * During the unload module must call unregister_filesystem(). ++ * We can access the fields of list element if: ++ * 1) spinlock is held or ++ * 2) we hold the reference to the module. ++ * The latter can be guaranteed by call of try_inc_mod_count(); if it ++ * returned 0 we must skip the element, otherwise we got the reference. ++ * Once the reference is obtained we can drop the spinlock. ++ */ ++ ++static struct file_system_type *file_systems; ++static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED; ++ ++/* WARNING: This can be used only if we _already_ own a reference */ ++static void get_filesystem(struct file_system_type *fs) ++{ ++ if (fs->owner) ++ __MOD_INC_USE_COUNT(fs->owner); ++} ++ ++static void put_filesystem(struct file_system_type *fs) ++{ ++ if (fs->owner) ++ __MOD_DEC_USE_COUNT(fs->owner); ++} ++ ++static struct file_system_type **find_filesystem(const char *name) ++{ ++ struct file_system_type **p; ++ for (p=&file_systems; *p; p=&(*p)->next) ++ if (strcmp((*p)->name,name) == 0) ++ break; ++ return p; ++} ++ ++/** ++ * register_filesystem - register a new filesystem ++ * @fs: the file system structure ++ * ++ * Adds the file system passed to the list of file systems the kernel ++ * is aware of for mount and other syscalls. Returns 0 on success, ++ * or a negative errno code on an error. ++ * ++ * The &struct file_system_type that is passed is linked into the kernel ++ * structures and must not be freed until the file system has been ++ * unregistered. ++ */ ++ ++int register_filesystem(struct file_system_type * fs) ++{ ++ int res = 0; ++ struct file_system_type ** p; ++ ++ if (!fs) ++ return -EINVAL; ++ if (fs->next) ++ return -EBUSY; ++ INIT_LIST_HEAD(&fs->fs_supers); ++ write_lock(&file_systems_lock); ++ p = find_filesystem(fs->name); ++ if (*p) ++ res = -EBUSY; ++ else ++ *p = fs; ++ write_unlock(&file_systems_lock); ++ return res; ++} ++ ++/** ++ * unregister_filesystem - unregister a file system ++ * @fs: filesystem to unregister ++ * ++ * Remove a file system that was previously successfully registered ++ * with the kernel. An error is returned if the file system is not found. ++ * Zero is returned on a success. ++ * ++ * Once this function has returned the &struct file_system_type structure ++ * may be freed or reused. ++ */ ++ ++int unregister_filesystem(struct file_system_type * fs) ++{ ++ struct file_system_type ** tmp; ++ ++ write_lock(&file_systems_lock); ++ tmp = &file_systems; ++ while (*tmp) { ++ if (fs == *tmp) { ++ *tmp = fs->next; ++ fs->next = NULL; ++ write_unlock(&file_systems_lock); ++ return 0; ++ } ++ tmp = &(*tmp)->next; ++ } ++ write_unlock(&file_systems_lock); ++ return -EINVAL; ++} ++ ++static int fs_index(const char * __name) ++{ ++ struct file_system_type * tmp; ++ char * name; ++ int err, index; ++ ++ name = getname(__name); ++ err = PTR_ERR(name); ++ if (IS_ERR(name)) ++ return err; ++ ++ err = -EINVAL; ++ read_lock(&file_systems_lock); ++ for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) { ++ if (strcmp(tmp->name,name) == 0) { ++ err = index; ++ break; ++ } ++ } ++ read_unlock(&file_systems_lock); ++ putname(name); ++ return err; ++} ++ ++static int fs_name(unsigned int index, char * buf) ++{ ++ struct file_system_type * tmp; ++ int len, res; ++ ++ read_lock(&file_systems_lock); ++ for (tmp = file_systems; tmp; tmp = tmp->next, index--) ++ if (index <= 0 && try_inc_mod_count(tmp->owner)) ++ break; ++ read_unlock(&file_systems_lock); ++ if (!tmp) ++ return -EINVAL; ++ ++ /* OK, we got the reference, so we can safely block */ ++ len = strlen(tmp->name) + 1; ++ res = copy_to_user(buf, tmp->name, len) ? -EFAULT : 0; ++ put_filesystem(tmp); ++ return res; ++} ++ ++static int fs_maxindex(void) ++{ ++ struct file_system_type * tmp; ++ int index; ++ ++ read_lock(&file_systems_lock); ++ for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++) ++ ; ++ read_unlock(&file_systems_lock); ++ return index; ++} ++ ++/* ++ * Whee.. Weird sysv syscall. ++ */ ++asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2) ++{ ++ int retval = -EINVAL; ++ ++ switch (option) { ++ case 1: ++ retval = fs_index((const char *) arg1); ++ break; ++ ++ case 2: ++ retval = fs_name(arg1, (char *) arg2); ++ break; ++ ++ case 3: ++ retval = fs_maxindex(); ++ break; ++ } ++ return retval; ++} ++ ++int get_filesystem_list(char * buf) ++{ ++ int len = 0; ++ struct file_system_type * tmp; ++ ++ read_lock(&file_systems_lock); ++ tmp = file_systems; ++ while (tmp && len < PAGE_SIZE - 80) { ++ len += sprintf(buf+len, "%s\t%s\n", ++ (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev", ++ tmp->name); ++ tmp = tmp->next; ++ } ++ read_unlock(&file_systems_lock); ++ return len; ++} ++ ++struct file_system_type *get_fs_type(const char *name) ++{ ++ struct file_system_type *fs; ++ ++ read_lock(&file_systems_lock); ++ fs = *(find_filesystem(name)); ++ if (fs && !try_inc_mod_count(fs->owner)) ++ fs = NULL; ++ read_unlock(&file_systems_lock); ++ if (!fs && (request_module(name) == 0)) { ++ read_lock(&file_systems_lock); ++ fs = *(find_filesystem(name)); ++ if (fs && !try_inc_mod_count(fs->owner)) ++ fs = NULL; ++ read_unlock(&file_systems_lock); ++ } ++ return fs; ++} ++ ++/** ++ * alloc_super - create new superblock ++ * ++ * Allocates and initializes a new &struct super_block. alloc_super() ++ * returns a pointer new superblock or %NULL if allocation had failed. ++ */ ++static struct super_block *alloc_super(void) ++{ ++ static struct super_operations empty_sops = {}; ++ struct super_block *s = kmalloc(sizeof(struct super_block), GFP_USER); ++ if (s) { ++ memset(s, 0, sizeof(struct super_block)); ++ INIT_LIST_HEAD(&s->s_dirty); ++ INIT_LIST_HEAD(&s->s_locked_inodes); ++ INIT_LIST_HEAD(&s->s_files); ++ INIT_LIST_HEAD(&s->s_instances); ++ init_rwsem(&s->s_umount); ++ sema_init(&s->s_lock, 1); ++ down_write(&s->s_umount); ++ s->s_count = S_BIAS; ++ atomic_set(&s->s_active, 1); ++ sema_init(&s->s_vfs_rename_sem,1); ++ sema_init(&s->s_nfsd_free_path_sem,1); ++ sema_init(&s->s_dquot.dqio_sem, 1); ++ sema_init(&s->s_dquot.dqoff_sem, 1); ++ s->s_maxbytes = MAX_NON_LFS; ++ s->s_op = &empty_sops; ++ s->dq_op = sb_dquot_ops; ++ s->s_qcop = sb_quotactl_ops; ++ } ++ return s; ++} ++ ++/** ++ * destroy_super - frees a superblock ++ * @s: superblock to free ++ * ++ * Frees a superblock. ++ */ ++static inline void destroy_super(struct super_block *s) ++{ ++ kfree(s); ++} ++ ++/* Superblock refcounting */ ++ ++/** ++ * deactivate_super - turn an active reference into temporary ++ * @s: superblock to deactivate ++ * ++ * Turns an active reference into temporary one. Returns 0 if there are ++ * other active references, 1 if we had deactivated the last one. ++ */ ++static inline int deactivate_super(struct super_block *s) ++{ ++ if (!atomic_dec_and_lock(&s->s_active, &sb_lock)) ++ return 0; ++ s->s_count -= S_BIAS-1; ++ spin_unlock(&sb_lock); ++ return 1; ++} ++ ++/** ++ * put_super - drop a temporary reference to superblock ++ * @s: superblock in question ++ * ++ * Drops a temporary reference, frees superblock if there's no ++ * references left. ++ */ ++static inline void put_super(struct super_block *s) ++{ ++ spin_lock(&sb_lock); ++ if (!--s->s_count) ++ destroy_super(s); ++ spin_unlock(&sb_lock); ++} ++ ++/** ++ * grab_super - acquire an active reference ++ * @s - reference we are trying to make active ++ * ++ * Tries to acquire an active reference. grab_super() is used when we ++ * had just found a superblock in super_blocks or fs_type->fs_supers ++ * and want to turn it into a full-blown active reference. grab_super() ++ * is called with sb_lock held and drops it. Returns 1 in case of ++ * success, 0 if we had failed (superblock contents was already dead or ++ * dying when grab_super() had been called). ++ */ ++static int grab_super(struct super_block *s) ++{ ++ s->s_count++; ++ spin_unlock(&sb_lock); ++ down_write(&s->s_umount); ++ if (s->s_root) { ++ spin_lock(&sb_lock); ++ if (s->s_count > S_BIAS) { ++ atomic_inc(&s->s_active); ++ s->s_count--; ++ spin_unlock(&sb_lock); ++ return 1; ++ } ++ spin_unlock(&sb_lock); ++ } ++ up_write(&s->s_umount); ++ put_super(s); ++ return 0; ++} ++ ++/** ++ * insert_super - put superblock on the lists ++ * @s: superblock in question ++ * @type: filesystem type it will belong to ++ * ++ * Associates superblock with fs type and puts it on per-type and global ++ * superblocks' lists. Should be called with sb_lock held; drops it. ++ */ ++static void insert_super(struct super_block *s, struct file_system_type *type) ++{ ++ s->s_type = type; ++ list_add(&s->s_list, super_blocks.prev); ++ list_add(&s->s_instances, &type->fs_supers); ++ spin_unlock(&sb_lock); ++ get_filesystem(type); ++} ++ ++static void put_anon_dev(kdev_t dev); ++ ++/** ++ * remove_super - makes superblock unreachable ++ * @s: superblock in question ++ * ++ * Removes superblock from the lists, unlocks it, drop the reference ++ * and releases the hosting device. @s should have no active ++ * references by that time and after remove_super() it's essentially ++ * in rundown mode - all remaining references are temporary, no new ++ * reference of any sort are going to appear and all holders of ++ * temporary ones will eventually drop them. At that point superblock ++ * itself will be destroyed; all its contents is already gone. ++ */ ++static void remove_super(struct super_block *s) ++{ ++ kdev_t dev = s->s_dev; ++ struct block_device *bdev = s->s_bdev; ++ struct file_system_type *fs = s->s_type; ++ ++ spin_lock(&sb_lock); ++ list_del(&s->s_list); ++ list_del(&s->s_instances); ++ spin_unlock(&sb_lock); ++ up_write(&s->s_umount); ++ put_super(s); ++ put_filesystem(fs); ++ if (bdev) ++ blkdev_put(bdev, BDEV_FS); ++ else ++ put_anon_dev(dev); ++} ++ ++struct vfsmount *alloc_vfsmnt(char *name); ++void free_vfsmnt(struct vfsmount *mnt); ++ ++static inline struct super_block * find_super(kdev_t dev) ++{ ++ struct list_head *p; ++ ++ list_for_each(p, &super_blocks) { ++ struct super_block * s = sb_entry(p); ++ if (s->s_dev == dev) { ++ s->s_count++; ++ return s; ++ } ++ } ++ return NULL; ++} ++ ++void drop_super(struct super_block *sb) ++{ ++ up_read(&sb->s_umount); ++ put_super(sb); ++} ++ ++static void write_super_lockfs(struct super_block *sb) ++{ ++ lock_super(sb); ++ if (sb->s_root && sb->s_op) { ++ if (sb->s_dirt && sb->s_op->write_super) ++ sb->s_op->write_super(sb); ++ if (sb->s_op->write_super_lockfs) { ++ sb->s_op->write_super_lockfs(sb); ++ } ++ } ++ unlock_super(sb); ++} ++ ++static inline void write_super(struct super_block *sb) ++{ ++ lock_super(sb); ++ if (sb->s_root && sb->s_dirt) ++ if (sb->s_op && sb->s_op->write_super) ++ sb->s_op->write_super(sb); ++ unlock_super(sb); ++} ++ ++/* ++ * Note: check the dirty flag before waiting, so we don't ++ * hold up the sync while mounting a device. (The newly ++ * mounted device won't need syncing.) ++ */ ++void sync_supers(kdev_t dev) ++{ ++ struct super_block * sb; ++ ++ if (dev) { ++ sb = get_super(dev); ++ if (sb) { ++ if (sb->s_dirt) ++ write_super(sb); ++ drop_super(sb); ++ } ++ return; ++ } ++restart: ++ spin_lock(&sb_lock); ++ sb = sb_entry(super_blocks.next); ++ while (sb != sb_entry(&super_blocks)) ++ if (sb->s_dirt) { ++ sb->s_count++; ++ spin_unlock(&sb_lock); ++ down_read(&sb->s_umount); ++ write_super(sb); ++ drop_super(sb); ++ goto restart; ++ } else ++ sb = sb_entry(sb->s_list.next); ++ spin_unlock(&sb_lock); ++} ++ ++/* ++ * Note: don't check the dirty flag before waiting, we want the lock ++ * to happen every time this is called. dev must be non-zero ++ */ ++void sync_supers_lockfs(kdev_t dev) ++{ ++ struct super_block * sb; ++ ++ down(&lockfs_sem) ; ++ if (dev) { ++ sb = get_super(dev); ++ if (sb) { ++ write_super_lockfs(sb); ++ drop_super(sb); ++ } ++ } ++} ++ ++void unlockfs(kdev_t dev) ++{ ++ struct super_block * sb; ++ ++ if (dev) { ++ sb = get_super(dev); ++ if (sb) { ++ if (sb->s_op && sb->s_op->unlockfs) ++ sb->s_op->unlockfs(sb) ; ++ drop_super(sb); ++ } ++ } ++ up(&lockfs_sem) ; ++} ++ ++/** ++ * get_super - get the superblock of a device ++ * @dev: device to get the superblock for ++ * ++ * Scans the superblock list and finds the superblock of the file system ++ * mounted on the device given. %NULL is returned if no match is found. ++ */ ++ ++struct super_block * get_super(kdev_t dev) ++{ ++ struct super_block * s; ++ ++ if (!dev) ++ return NULL; ++restart: ++ spin_lock(&sb_lock); ++ s = find_super(dev); ++ if (s) { ++ spin_unlock(&sb_lock); ++ down_read(&s->s_umount); ++ if (s->s_root) ++ return s; ++ drop_super(s); ++ goto restart; ++ } ++ spin_unlock(&sb_lock); ++ return NULL; ++} ++ ++asmlinkage long sys_ustat(dev_t dev, struct ustat * ubuf) ++{ ++ struct super_block *s; ++ struct ustat tmp; ++ struct statfs sbuf; ++ int err = -EINVAL; ++ ++ s = get_super(to_kdev_t(dev)); ++ if (s == NULL) ++ goto out; ++ err = vfs_statfs(s, &sbuf); ++ drop_super(s); ++ if (err) ++ goto out; ++ ++ memset(&tmp,0,sizeof(struct ustat)); ++ tmp.f_tfree = sbuf.f_bfree; ++ tmp.f_tinode = sbuf.f_ffree; ++ ++ err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0; ++out: ++ return err; ++} ++ ++/** ++ * do_remount_sb - asks filesystem to change mount options. ++ * @sb: superblock in question ++ * @flags: numeric part of options ++ * @data: the rest of options ++ * ++ * Alters the mount options of a mounted file system. ++ */ ++int do_remount_sb(struct super_block *sb, int flags, void *data) ++{ ++ int retval; ++ ++ if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev)) ++ return -EACCES; ++ /*flags |= MS_RDONLY;*/ ++ if (flags & MS_RDONLY) ++ acct_auto_close(sb->s_dev); ++ shrink_dcache_sb(sb); ++ fsync_super(sb); ++ /* If we are remounting RDONLY, make sure there are no rw files open */ ++ if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) ++ if (!fs_may_remount_ro(sb)) ++ return -EBUSY; ++ if (sb->s_op && sb->s_op->remount_fs) { ++ lock_super(sb); ++ retval = sb->s_op->remount_fs(sb, &flags, data); ++ unlock_super(sb); ++ if (retval) ++ return retval; ++ } ++ sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); ++ return 0; ++} ++ ++/* ++ * Unnamed block devices are dummy devices used by virtual ++ * filesystems which don't use real block-devices. -- jrs ++ */ ++ ++enum {Max_anon = 256}; ++static unsigned long unnamed_dev_in_use[Max_anon/(8*sizeof(unsigned long))]; ++static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */ ++ ++/** ++ * put_anon_dev - release anonymous device number. ++ * @dev: device in question ++ */ ++static void put_anon_dev(kdev_t dev) ++{ ++ spin_lock(&unnamed_dev_lock); ++ clear_bit(MINOR(dev), unnamed_dev_in_use); ++ spin_unlock(&unnamed_dev_lock); ++} ++ ++/** ++ * get_anon_super - allocate a superblock for non-device fs ++ * @type: filesystem type ++ * @compare: check if existing superblock is what we want ++ * @data: argument for @compare. ++ * ++ * get_anon_super is a helper for non-blockdevice filesystems. ++ * It either finds and returns one of the superblocks of given type ++ * (if it can find one that would satisfy caller) or creates a new ++ * one. In the either case we return an active reference to superblock ++ * with ->s_umount locked. If superblock is new it gets a new ++ * anonymous device allocated for it and is inserted into lists - ++ * other initialization is left to caller. ++ * ++ * Rather than duplicating all that logics every time when ++ * we want something that doesn't fit "nodev" and "single" we pull ++ * the relevant code into common helper and let get_sb_...() call ++ * it. ++ * ++ * NB: get_sb_...() is going to become an fs type method, with ++ * current ->read_super() becoming a callback used by common instances. ++ */ ++struct super_block *get_anon_super(struct file_system_type *type, ++ int (*compare)(struct super_block *,void *), void *data) ++{ ++ struct super_block *s = alloc_super(); ++ kdev_t dev; ++ struct list_head *p; ++ ++ if (!s) ++ return ERR_PTR(-ENOMEM); ++ ++retry: ++ spin_lock(&sb_lock); ++ if (compare) list_for_each(p, &type->fs_supers) { ++ struct super_block *old; ++ old = list_entry(p, struct super_block, s_instances); ++ if (!compare(old, data)) ++ continue; ++ if (!grab_super(old)) ++ goto retry; ++ destroy_super(s); ++ return old; ++ } ++ ++ spin_lock(&unnamed_dev_lock); ++ dev = find_first_zero_bit(unnamed_dev_in_use, Max_anon); ++ if (dev == Max_anon) { ++ spin_unlock(&unnamed_dev_lock); ++ spin_unlock(&sb_lock); ++ destroy_super(s); ++ return ERR_PTR(-EMFILE); ++ } ++ set_bit(dev, unnamed_dev_in_use); ++ spin_unlock(&unnamed_dev_lock); ++ ++ s->s_dev = dev; ++ insert_super(s, type); ++ return s; ++} ++ ++static struct super_block *get_sb_bdev(struct file_system_type *fs_type, ++ int flags, char *dev_name, void * data) ++{ ++ struct inode *inode; ++ struct block_device *bdev; ++ struct block_device_operations *bdops; ++ devfs_handle_t de; ++ struct super_block * s; ++ struct nameidata nd; ++ struct list_head *p; ++ kdev_t dev; ++ int error = 0; ++ mode_t mode = FMODE_READ; /* we always need it ;-) */ ++ ++ /* What device it is? */ ++ if (!dev_name || !*dev_name) ++ return ERR_PTR(-EINVAL); ++ if (path_init(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd)) ++ error = path_walk(dev_name, &nd); ++ if (error) ++ return ERR_PTR(error); ++ inode = nd.dentry->d_inode; ++ error = -ENOTBLK; ++ if (!S_ISBLK(inode->i_mode)) ++ goto out; ++ error = -EACCES; ++ if (nd.mnt->mnt_flags & MNT_NODEV) ++ goto out; ++ bd_acquire(inode); ++ bdev = inode->i_bdev; ++ de = devfs_get_handle_from_inode (inode); ++ bdops = devfs_get_ops (de); /* Increments module use count */ ++ if (bdops) bdev->bd_op = bdops; ++ /* Done with lookups, semaphore down */ ++ dev = to_kdev_t(bdev->bd_dev); ++ if (!(flags & MS_RDONLY)) ++ mode |= FMODE_WRITE; ++ error = blkdev_get(bdev, mode, 0, BDEV_FS); ++ devfs_put_ops (de); /* Decrement module use count now we're safe */ ++ if (error) ++ goto out; ++ check_disk_change(dev); ++ error = -EACCES; ++ if (!(flags & MS_RDONLY) && is_read_only(dev)) ++ goto out1; ++ ++ error = -ENOMEM; ++ s = alloc_super(); ++ if (!s) ++ goto out1; ++ ++ error = -EBUSY; ++ down(&lockfs_sem); ++restart: ++ spin_lock(&sb_lock); ++ ++ list_for_each(p, &super_blocks) { ++ struct super_block *old = sb_entry(p); ++ if (old->s_dev != dev) ++ continue; ++ if (old->s_type != fs_type || ++ ((flags ^ old->s_flags) & MS_RDONLY)) { ++ spin_unlock(&sb_lock); ++ destroy_super(s); ++ up(&lockfs_sem); ++ goto out1; ++ } ++ if (!grab_super(old)) ++ goto restart; ++ destroy_super(s); ++ blkdev_put(bdev, BDEV_FS); ++ path_release(&nd); ++ up(&lockfs_sem); ++ return old; ++ } ++ s->s_dev = dev; ++ s->s_bdev = bdev; ++ s->s_flags = flags; ++ insert_super(s, fs_type); ++ up(&lockfs_sem); ++ if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) ++ goto Einval; ++ s->s_flags |= MS_ACTIVE; ++ path_release(&nd); ++ return s; ++ ++Einval: ++ deactivate_super(s); ++ remove_super(s); ++ error = -EINVAL; ++ goto out; ++out1: ++ blkdev_put(bdev, BDEV_FS); ++out: ++ path_release(&nd); ++ return ERR_PTR(error); ++} ++ ++static struct super_block *get_sb_nodev(struct file_system_type *fs_type, ++ int flags, char *dev_name, void *data) ++{ ++ struct super_block *s = get_anon_super(fs_type, NULL, NULL); ++ ++ if (IS_ERR(s)) ++ return s; ++ ++ s->s_flags = flags; ++ if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) { ++ deactivate_super(s); ++ remove_super(s); ++ return ERR_PTR(-EINVAL); ++ } ++ s->s_flags |= MS_ACTIVE; ++ return s; ++} ++ ++static int compare_single(struct super_block *s, void *p) ++{ ++ return 1; ++} ++ ++static struct super_block *get_sb_single(struct file_system_type *fs_type, ++ int flags, char *dev_name, void *data) ++{ ++ struct super_block *s = get_anon_super(fs_type, compare_single, NULL); ++ ++ if (IS_ERR(s)) ++ return s; ++ if (!s->s_root) { ++ s->s_flags = flags; ++ if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) { ++ deactivate_super(s); ++ remove_super(s); ++ return ERR_PTR(-EINVAL); ++ } ++ s->s_flags |= MS_ACTIVE; ++ } ++ do_remount_sb(s, flags, data); ++ return s; ++} ++ ++struct vfsmount * ++do_kern_mount(const char *fstype, int flags, char *name, void *data) ++{ ++ struct file_system_type *type = get_fs_type(fstype); ++ struct super_block *sb = ERR_PTR(-ENOMEM); ++ struct vfsmount *mnt; ++ ++ if (!type) ++ return ERR_PTR(-ENODEV); ++ ++ mnt = alloc_vfsmnt(name); ++ if (!mnt) ++ goto out; ++ if (type->fs_flags & FS_REQUIRES_DEV) ++ sb = get_sb_bdev(type, flags, name, data); ++ else if (type->fs_flags & FS_SINGLE) ++ sb = get_sb_single(type, flags, name, data); ++ else ++ sb = get_sb_nodev(type, flags, name, data); ++ if (IS_ERR(sb)) ++ goto out_mnt; ++ if (type->fs_flags & FS_NOMOUNT) ++ sb->s_flags |= MS_NOUSER; ++ mnt->mnt_sb = sb; ++ mnt->mnt_root = dget(sb->s_root); ++ mnt->mnt_mountpoint = sb->s_root; ++ mnt->mnt_parent = mnt; ++ up_write(&sb->s_umount); ++ put_filesystem(type); ++ return mnt; ++out_mnt: ++ free_vfsmnt(mnt); ++out: ++ put_filesystem(type); ++ return (struct vfsmount *)sb; ++} ++ ++void kill_super(struct super_block *sb) ++{ ++ struct dentry *root = sb->s_root; ++ struct file_system_type *fs = sb->s_type; ++ struct super_operations *sop = sb->s_op; ++ ++ if (!deactivate_super(sb)) ++ return; ++ ++ down(&lockfs_sem); ++ down_write(&sb->s_umount); ++ up(&lockfs_sem); ++ ++ sb->s_root = NULL; ++ /* Need to clean after the sucker */ ++ if (fs->fs_flags & FS_LITTER) ++ d_genocide(root); ++ shrink_dcache_parent(root); ++ dput(root); ++ fsync_super(sb); ++ lock_super(sb); ++ lock_kernel(); ++ sb->s_flags &= ~MS_ACTIVE; + invalidate_inodes(sb, 0); /* bad name - it should be evict_inodes() */ - if (sop) { - if (sop->write_super && sb->s_dirt) - sop->write_super(sb); -@@ -844,7 +844,7 @@ void kill_super(struct super_block *sb) - } - - /* Forget any remaining inodes */ -- if (invalidate_inodes(sb)) { ++ if (sop) { ++ if (sop->write_super && sb->s_dirt) ++ sop->write_super(sb); ++ if (sop->put_super) ++ sop->put_super(sb); ++ } ++ ++ /* Forget any remaining inodes */ + if (invalidate_inodes(sb, 1)) { - printk(KERN_ERR "VFS: Busy inodes after unmount. " - "Self-destruct in 5 seconds. Have a nice day...\n"); - } ---- linux-2.4.20/include/linux/fs.h~invalidate_show 2003-04-08 23:34:36.000000000 -0600 -+++ linux-2.4.20-braam/include/linux/fs.h 2003-04-08 23:34:36.000000000 -0600 -@@ -1237,7 +1237,7 @@ static inline void mark_buffer_dirty_ino - extern void set_buffer_flushtime(struct buffer_head *); - extern void balance_dirty(void); - extern int check_disk_change(kdev_t); --extern int invalidate_inodes(struct super_block *); ++ printk(KERN_ERR "VFS: Busy inodes after unmount. " ++ "Self-destruct in 5 seconds. Have a nice day...\n"); ++ } ++ ++ unlock_kernel(); ++ unlock_super(sb); ++ remove_super(sb); ++} ++ ++struct vfsmount *kern_mount(struct file_system_type *type) ++{ ++ return do_kern_mount(type->name, 0, (char *)type->name, NULL); ++} +--- /dev/null 2003-01-30 18:24:37.000000000 +0800 ++++ linux-2.4.19-hp3_pnnl1-root/include/linux/fs.h 2003-04-15 13:21:57.000000000 +0800 +@@ -0,0 +1,1709 @@ ++#ifndef _LINUX_FS_H ++#define _LINUX_FS_H ++ ++/* ++ * This file has definitions for some important file table ++ * structures etc. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++struct poll_table_struct; ++ ++ ++/* ++ * It's silly to have NR_OPEN bigger than NR_FILE, but you can change ++ * the file limit at runtime and only root can increase the per-process ++ * nr_file rlimit, so it's safe to set up a ridiculously high absolute ++ * upper limit on files-per-process. ++ * ++ * Some programs (notably those using select()) may have to be ++ * recompiled to take full advantage of the new limits.. ++ */ ++ ++/* Fixed constants first: */ ++#undef NR_OPEN ++#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */ ++#define INR_OPEN 1024 /* Initial setting for nfile rlimits */ ++ ++#define BLOCK_SIZE_BITS 10 ++#define BLOCK_SIZE (1<i_sb->s_flags & (flg)) ++ ++#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) ++#define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || ((inode)->i_flags & S_SYNC)) ++#define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) ++ ++#define IS_QUOTAINIT(inode) ((inode)->i_flags & S_QUOTA) ++#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) ++#define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) ++#define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) ++#define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME)) ++#define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME) ++#define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) ++ ++#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) ++ ++/* the read-only stuff doesn't really belong here, but any other place is ++ probably as bad and I don't want to create yet another include file. */ ++ ++#define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ ++#define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ ++#define BLKRRPART _IO(0x12,95) /* re-read partition table */ ++#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ ++#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ ++#define BLKRASET _IO(0x12,98) /* Set read ahead for block device */ ++#define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ ++#define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ ++#define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ ++#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ ++#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ ++#define BLKSSZGET _IO(0x12,104)/* get block device sector size */ ++#if 0 ++#define BLKPG _IO(0x12,105)/* See blkpg.h */ ++#define BLKELVGET _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t))/* elevator get */ ++#define BLKELVSET _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t))/* elevator set */ ++/* This was here just to show that the number is taken - ++ probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ ++#endif ++/* A jump here: 108-111 have been used for various private purposes. */ ++#define BLKBSZGET _IOR(0x12,112,sizeof(int)) ++#define BLKBSZSET _IOW(0x12,113,sizeof(int)) ++#define BLKGETSIZE64 _IOR(0x12,114,sizeof(u64)) /* return device size in bytes (u64 *arg) */ ++ ++#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ ++#define FIBMAP _IO(0x00,1) /* bmap access */ ++#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ ++ ++#ifdef __KERNEL__ ++ ++#include ++#include ++ ++extern void update_atime (struct inode *); ++#define UPDATE_ATIME(inode) update_atime (inode) ++ ++extern void buffer_init(unsigned long); ++extern void inode_init(unsigned long); ++extern void mnt_init(unsigned long); ++ ++/* bh state bits */ ++enum bh_state_bits { ++ BH_Uptodate, /* 1 if the buffer contains valid data */ ++ BH_Dirty, /* 1 if the buffer is dirty */ ++ BH_Lock, /* 1 if the buffer is locked */ ++ BH_Req, /* 0 if the buffer has been invalidated */ ++ BH_Mapped, /* 1 if the buffer has a disk mapping */ ++ BH_New, /* 1 if the buffer is new and not yet written out */ ++ BH_Async, /* 1 if the buffer is under end_buffer_io_async I/O */ ++ BH_Wait_IO, /* 1 if we should write out this buffer */ ++ BH_Launder, /* 1 if we can throttle on this buffer */ ++ BH_JBD, /* 1 if it has an attached journal_head */ ++ BH_Delay, /* 1 if the buffer is delayed allocate */ ++ ++ BH_PrivateStart,/* not a state bit, but the first bit available ++ * for private allocation by other entities ++ */ ++}; ++ ++#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512) ++ ++/* ++ * Try to keep the most commonly used fields in single cache lines (16 ++ * bytes) to improve performance. This ordering should be ++ * particularly beneficial on 32-bit processors. ++ * ++ * We use the first 16 bytes for the data which is used in searches ++ * over the block hash lists (ie. getblk() and friends). ++ * ++ * The second 16 bytes we use for lru buffer scans, as used by ++ * sync_buffers() and refill_freelist(). -- sct ++ */ ++struct buffer_head { ++ /* First cache line: */ ++ struct buffer_head *b_next; /* Hash queue list */ ++ unsigned long b_blocknr; /* block number */ ++ unsigned short b_size; /* block size */ ++ unsigned short b_list; /* List that this buffer appears */ ++ kdev_t b_dev; /* device (B_FREE = free) */ ++ ++ atomic_t b_count; /* users using this block */ ++ kdev_t b_rdev; /* Real device */ ++ unsigned long b_state; /* buffer state bitmap (see above) */ ++ unsigned long b_flushtime; /* Time when (dirty) buffer should be written */ ++ ++ struct buffer_head *b_next_free;/* lru/free list linkage */ ++ struct buffer_head *b_prev_free;/* doubly linked list of buffers */ ++ struct buffer_head *b_this_page;/* circular list of buffers in one page */ ++ struct buffer_head *b_reqnext; /* request queue */ ++ ++ struct buffer_head **b_pprev; /* doubly linked list of hash-queue */ ++ char * b_data; /* pointer to data block */ ++ struct page *b_page; /* the page this bh is mapped to */ ++ void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ ++ void *b_private; /* reserved for b_end_io */ ++ ++ unsigned long b_rsector; /* Real buffer location on disk */ ++ wait_queue_head_t b_wait; ++ ++ struct inode * b_inode; ++ struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ ++}; ++ ++typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); ++void init_buffer(struct buffer_head *, bh_end_io_t *, void *); ++ ++#define __buffer_state(bh, state) (((bh)->b_state & (1UL << BH_##state)) != 0) ++ ++#define buffer_uptodate(bh) __buffer_state(bh,Uptodate) ++#define buffer_dirty(bh) __buffer_state(bh,Dirty) ++#define buffer_locked(bh) __buffer_state(bh,Lock) ++#define buffer_req(bh) __buffer_state(bh,Req) ++#define buffer_mapped(bh) __buffer_state(bh,Mapped) ++#define buffer_new(bh) __buffer_state(bh,New) ++#define buffer_async(bh) __buffer_state(bh,Async) ++#define buffer_launder(bh) __buffer_state(bh,Launder) ++#define buffer_delay(bh) __buffer_state(bh,Delay) ++ ++#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) ++ ++extern void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); ++ ++#define touch_buffer(bh) mark_page_accessed(bh->b_page) ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * Attribute flags. These should be or-ed together to figure out what ++ * has been changed! ++ */ ++#define ATTR_MODE 1 ++#define ATTR_UID 2 ++#define ATTR_GID 4 ++#define ATTR_SIZE 8 ++#define ATTR_ATIME 16 ++#define ATTR_MTIME 32 ++#define ATTR_CTIME 64 ++#define ATTR_ATIME_SET 128 ++#define ATTR_MTIME_SET 256 ++#define ATTR_FORCE 512 /* Not a change, but a change it */ ++#define ATTR_ATTR_FLAG 1024 ++#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */ ++#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */ ++ ++/* ++ * This is the Inode Attributes structure, used for notify_change(). It ++ * uses the above definitions as flags, to know which values have changed. ++ * Also, in this manner, a Filesystem can look at only the values it cares ++ * about. Basically, these are the attributes that the VFS layer can ++ * request to change from the FS layer. ++ * ++ * Derek Atkins 94-10-20 ++ */ ++struct iattr { ++ unsigned int ia_valid; ++ umode_t ia_mode; ++ uid_t ia_uid; ++ gid_t ia_gid; ++ loff_t ia_size; ++ time_t ia_atime; ++ time_t ia_mtime; ++ time_t ia_ctime; ++ unsigned int ia_attr_flags; ++}; ++ ++/* ++ * This is the inode attributes flag definitions ++ */ ++#define ATTR_FLAG_SYNCRONOUS 1 /* Syncronous write */ ++#define ATTR_FLAG_NOATIME 2 /* Don't update atime */ ++#define ATTR_FLAG_APPEND 4 /* Append-only file */ ++#define ATTR_FLAG_IMMUTABLE 8 /* Immutable file */ ++#define ATTR_FLAG_NODIRATIME 16 /* Don't update atime for directory */ ++ ++/* ++ * Includes for diskquotas and mount structures. ++ */ ++#include ++#include ++ ++/* ++ * oh the beauties of C type declarations. ++ */ ++struct page; ++struct address_space; ++struct kiobuf; ++ ++struct address_space_operations { ++ int (*writepage)(struct page *); ++ int (*readpage)(struct file *, struct page *); ++ int (*sync_page)(struct page *); ++ /* ++ * ext3 requires that a successful prepare_write() call be followed ++ * by a commit_write() call - they must be balanced ++ */ ++ int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); ++ int (*commit_write)(struct file *, struct page *, unsigned, unsigned); ++ /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ ++ int (*bmap)(struct address_space *, long); ++ int (*flushpage) (struct page *, unsigned long); ++ int (*releasepage) (struct page *, int); ++#define KERNEL_HAS_O_DIRECT /* this is for modules out of the kernel */ ++ int (*direct_IO)(int, struct inode *, struct kiobuf *, unsigned long, int); ++}; ++ ++struct address_space { ++ struct list_head clean_pages; /* list of clean pages */ ++ struct list_head dirty_pages; /* list of dirty pages */ ++ struct list_head locked_pages; /* list of locked pages */ ++ unsigned long nrpages; /* number of total pages */ ++ struct address_space_operations *a_ops; /* methods */ ++ struct inode *host; /* owner: inode, block_device */ ++ struct vm_area_struct *i_mmap; /* list of private mappings */ ++ struct vm_area_struct *i_mmap_shared; /* list of shared mappings */ ++ spinlock_t i_shared_lock; /* and spinlock protecting it */ ++ int gfp_mask; /* how to allocate the pages */ ++}; ++ ++struct char_device { ++ struct list_head hash; ++ atomic_t count; ++ dev_t dev; ++ atomic_t openers; ++ struct semaphore sem; ++}; ++ ++struct block_device { ++ struct list_head bd_hash; ++ atomic_t bd_count; ++ struct inode * bd_inode; ++ dev_t bd_dev; /* not a kdev_t - it's a search key */ ++ int bd_openers; ++ const struct block_device_operations *bd_op; ++ struct semaphore bd_sem; /* open/close mutex */ ++ struct list_head bd_inodes; ++}; ++ ++struct inode { ++ struct list_head i_hash; ++ struct list_head i_list; ++ struct list_head i_dentry; ++ ++ struct list_head i_dirty_buffers; ++ struct list_head i_dirty_data_buffers; ++ ++ unsigned long i_ino; ++ atomic_t i_count; ++ kdev_t i_dev; ++ umode_t i_mode; ++ nlink_t i_nlink; ++ uid_t i_uid; ++ gid_t i_gid; ++ kdev_t i_rdev; ++ loff_t i_size; ++ time_t i_atime; ++ time_t i_mtime; ++ time_t i_ctime; ++ unsigned int i_blkbits; ++ unsigned long i_blksize; ++ unsigned long i_blocks; ++ unsigned long i_version; ++ unsigned short i_bytes; ++ struct semaphore i_sem; ++ struct semaphore i_zombie; ++ struct inode_operations *i_op; ++ struct file_operations *i_fop; /* former ->i_op->default_file_ops */ ++ struct super_block *i_sb; ++ wait_queue_head_t i_wait; ++ struct file_lock *i_flock; ++ struct address_space *i_mapping; ++ struct address_space i_data; ++ struct dquot *i_dquot[MAXQUOTAS]; ++ /* These three should probably be a union */ ++ struct list_head i_devices; ++ struct pipe_inode_info *i_pipe; ++ struct block_device *i_bdev; ++ struct char_device *i_cdev; ++ ++ unsigned long i_dnotify_mask; /* Directory notify events */ ++ struct dnotify_struct *i_dnotify; /* for directory notifications */ ++ ++ unsigned long i_state; ++ ++ unsigned int i_flags; ++ unsigned char i_sock; ++ ++ atomic_t i_writecount; ++ unsigned int i_attr_flags; ++ __u32 i_generation; ++ union { ++ struct minix_inode_info minix_i; ++ struct ext2_inode_info ext2_i; ++ struct ext3_inode_info ext3_i; ++ struct hpfs_inode_info hpfs_i; ++ struct ntfs_inode_info ntfs_i; ++ struct msdos_inode_info msdos_i; ++ struct umsdos_inode_info umsdos_i; ++ struct iso_inode_info isofs_i; ++ struct nfs_inode_info nfs_i; ++ struct sysv_inode_info sysv_i; ++ struct affs_inode_info affs_i; ++ struct ufs_inode_info ufs_i; ++ struct efs_inode_info efs_i; ++ struct romfs_inode_info romfs_i; ++ struct shmem_inode_info shmem_i; ++ struct coda_inode_info coda_i; ++ struct smb_inode_info smbfs_i; ++ struct hfs_inode_info hfs_i; ++ struct adfs_inode_info adfs_i; ++ struct qnx4_inode_info qnx4_i; ++ struct reiserfs_inode_info reiserfs_i; ++ struct bfs_inode_info bfs_i; ++ struct udf_inode_info udf_i; ++ struct ncp_inode_info ncpfs_i; ++ struct proc_inode_info proc_i; ++ struct socket socket_i; ++ struct usbdev_inode_info usbdev_i; ++ struct jffs2_inode_info jffs2_i; ++ void *generic_ip; ++ } u; ++}; ++ ++static inline void inode_add_bytes(struct inode *inode, loff_t bytes) ++{ ++ inode->i_blocks += bytes >> 9; ++ bytes &= 511; ++ inode->i_bytes += bytes; ++ if (inode->i_bytes >= 512) { ++ inode->i_blocks++; ++ inode->i_bytes -= 512; ++ } ++} ++ ++static inline void inode_sub_bytes(struct inode *inode, loff_t bytes) ++{ ++ inode->i_blocks -= bytes >> 9; ++ bytes &= 511; ++ if (inode->i_bytes < bytes) { ++ inode->i_blocks--; ++ inode->i_bytes += 512; ++ } ++ inode->i_bytes -= bytes; ++} ++ ++static inline loff_t inode_get_bytes(struct inode *inode) ++{ ++ return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes; ++} ++ ++static inline void inode_set_bytes(struct inode *inode, loff_t bytes) ++{ ++ inode->i_blocks = bytes >> 9; ++ inode->i_bytes = bytes & 511; ++} ++ ++struct fown_struct { ++ int pid; /* pid or -pgrp where SIGIO should be sent */ ++ uid_t uid, euid; /* uid/euid of process setting the owner */ ++ int signum; /* posix.1b rt signal to be delivered on IO */ ++}; ++ ++struct file { ++ struct list_head f_list; ++ struct dentry *f_dentry; ++ struct vfsmount *f_vfsmnt; ++ struct file_operations *f_op; ++ atomic_t f_count; ++ unsigned int f_flags; ++ mode_t f_mode; ++ loff_t f_pos; ++ unsigned long f_reada, f_ramax, f_raend, f_ralen, f_rawin; ++ struct fown_struct f_owner; ++ unsigned int f_uid, f_gid; ++ int f_error; ++ ++ unsigned long f_version; ++ ++ /* needed for tty driver, and maybe others */ ++ void *private_data; ++ struct lookup_intent *f_intent; ++ ++ /* preallocated helper kiobuf to speedup O_DIRECT */ ++ struct kiobuf *f_iobuf; ++ long f_iobuf_lock; ++}; ++extern spinlock_t files_lock; ++#define file_list_lock() spin_lock(&files_lock); ++#define file_list_unlock() spin_unlock(&files_lock); ++ ++#define get_file(x) atomic_inc(&(x)->f_count) ++#define file_count(x) atomic_read(&(x)->f_count) ++ ++extern int init_private_file(struct file *, struct dentry *, int); ++ ++#define MAX_NON_LFS ((1UL<<31) - 1) ++ ++/* Page cache limit. The filesystems should put that into their s_maxbytes ++ limits, otherwise bad things can happen in VM. */ ++#if BITS_PER_LONG==32 ++#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) ++#elif BITS_PER_LONG==64 ++#define MAX_LFS_FILESIZE 0x7fffffffffffffff ++#endif ++#define BLKGETLASTSECT _IO(0x12,108) /* get last sector of block device */ ++#define BLKSETLASTSECT _IO(0x12,109) /* get last sector of block device */ ++ ++#define FL_POSIX 1 ++#define FL_FLOCK 2 ++#define FL_BROKEN 4 /* broken flock() emulation */ ++#define FL_ACCESS 8 /* for processes suspended by mandatory locking */ ++#define FL_LOCKD 16 /* lock held by rpc.lockd */ ++#define FL_LEASE 32 /* lease held on this file */ ++ ++/* ++ * The POSIX file lock owner is determined by ++ * the "struct files_struct" in the thread group ++ * (or NULL for no owner - BSD locks). ++ * ++ * Lockd stuffs a "host" pointer into this. ++ */ ++typedef struct files_struct *fl_owner_t; ++ ++struct file_lock { ++ struct file_lock *fl_next; /* singly linked list for this inode */ ++ struct list_head fl_link; /* doubly linked list of all locks */ ++ struct list_head fl_block; /* circular list of blocked processes */ ++ fl_owner_t fl_owner; ++ unsigned int fl_pid; ++ wait_queue_head_t fl_wait; ++ struct file *fl_file; ++ unsigned char fl_flags; ++ unsigned char fl_type; ++ loff_t fl_start; ++ loff_t fl_end; ++ ++ void (*fl_notify)(struct file_lock *); /* unblock callback */ ++ void (*fl_insert)(struct file_lock *); /* lock insertion callback */ ++ void (*fl_remove)(struct file_lock *); /* lock removal callback */ ++ ++ struct fasync_struct * fl_fasync; /* for lease break notifications */ ++ ++ union { ++ struct nfs_lock_info nfs_fl; ++ } fl_u; ++}; ++ ++/* The following constant reflects the upper bound of the file/locking space */ ++#ifndef OFFSET_MAX ++#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) ++#define OFFSET_MAX INT_LIMIT(loff_t) ++#define OFFT_OFFSET_MAX INT_LIMIT(off_t) ++#endif ++ ++extern struct list_head file_lock_list; ++ ++#include ++ ++extern int fcntl_getlk(unsigned int, struct flock *); ++extern int fcntl_setlk(unsigned int, unsigned int, struct flock *); ++ ++extern int fcntl_getlk64(unsigned int, struct flock64 *); ++extern int fcntl_setlk64(unsigned int, unsigned int, struct flock64 *); ++ ++/* fs/locks.c */ ++extern void locks_init_lock(struct file_lock *); ++extern void locks_copy_lock(struct file_lock *, struct file_lock *); ++extern void locks_remove_posix(struct file *, fl_owner_t); ++extern void locks_remove_flock(struct file *); ++extern struct file_lock *posix_test_lock(struct file *, struct file_lock *); ++extern int posix_lock_file(struct file *, struct file_lock *, unsigned int); ++extern void posix_block_lock(struct file_lock *, struct file_lock *); ++extern void posix_unblock_lock(struct file_lock *); ++extern int posix_locks_deadlock(struct file_lock *, struct file_lock *); ++extern int __get_lease(struct inode *inode, unsigned int flags); ++extern time_t lease_get_mtime(struct inode *); ++extern int lock_may_read(struct inode *, loff_t start, unsigned long count); ++extern int lock_may_write(struct inode *, loff_t start, unsigned long count); ++ ++struct fasync_struct { ++ int magic; ++ int fa_fd; ++ struct fasync_struct *fa_next; /* singly linked list */ ++ struct file *fa_file; ++}; ++ ++#define FASYNC_MAGIC 0x4601 ++ ++/* SMP safe fasync helpers: */ ++extern int fasync_helper(int, struct file *, int, struct fasync_struct **); ++/* can be called from interrupts */ ++extern void kill_fasync(struct fasync_struct **, int, int); ++/* only for net: no internal synchronization */ ++extern void __kill_fasync(struct fasync_struct *, int, int); ++ ++struct nameidata { ++ struct dentry *dentry; ++ struct vfsmount *mnt; ++ struct qstr last; ++ unsigned int flags; ++ int last_type; ++}; ++ ++/* ++ * Umount options ++ */ ++ ++#define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ ++#define MNT_DETACH 0x00000002 /* Just detach from the tree */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern struct list_head super_blocks; ++extern spinlock_t sb_lock; ++ ++#define sb_entry(list) list_entry((list), struct super_block, s_list) ++#define S_BIAS (1<<30) ++struct super_block { ++ struct list_head s_list; /* Keep this first */ ++ kdev_t s_dev; ++ unsigned long s_blocksize; ++ unsigned char s_blocksize_bits; ++ unsigned char s_dirt; ++ unsigned long long s_maxbytes; /* Max file size */ ++ struct file_system_type *s_type; ++ struct super_operations *s_op; ++ struct dquot_operations *dq_op; ++ struct quotactl_ops *s_qcop; ++ unsigned long s_flags; ++ unsigned long s_magic; ++ struct dentry *s_root; ++ struct rw_semaphore s_umount; ++ struct semaphore s_lock; ++ int s_count; ++ atomic_t s_active; ++ ++ struct list_head s_dirty; /* dirty inodes */ ++ struct list_head s_locked_inodes;/* inodes being synced */ ++ struct list_head s_files; ++ ++ struct block_device *s_bdev; ++ struct list_head s_instances; ++ struct quota_info s_dquot; /* Diskquota specific options */ ++ ++ union { ++ struct minix_sb_info minix_sb; ++ struct ext2_sb_info ext2_sb; ++ struct ext3_sb_info ext3_sb; ++ struct hpfs_sb_info hpfs_sb; ++ struct ntfs_sb_info ntfs_sb; ++ struct msdos_sb_info msdos_sb; ++ struct isofs_sb_info isofs_sb; ++ struct nfs_sb_info nfs_sb; ++ struct sysv_sb_info sysv_sb; ++ struct affs_sb_info affs_sb; ++ struct ufs_sb_info ufs_sb; ++ struct efs_sb_info efs_sb; ++ struct shmem_sb_info shmem_sb; ++ struct romfs_sb_info romfs_sb; ++ struct smb_sb_info smbfs_sb; ++ struct hfs_sb_info hfs_sb; ++ struct adfs_sb_info adfs_sb; ++ struct qnx4_sb_info qnx4_sb; ++ struct reiserfs_sb_info reiserfs_sb; ++ struct bfs_sb_info bfs_sb; ++ struct udf_sb_info udf_sb; ++ struct ncp_sb_info ncpfs_sb; ++ struct usbdev_sb_info usbdevfs_sb; ++ struct jffs2_sb_info jffs2_sb; ++ struct cramfs_sb_info cramfs_sb; ++ void *generic_sbp; ++ } u; ++ /* ++ * The next field is for VFS *only*. No filesystems have any business ++ * even looking at it. You had been warned. ++ */ ++ struct semaphore s_vfs_rename_sem; /* Kludge */ ++ ++ /* The next field is used by knfsd when converting a (inode number based) ++ * file handle into a dentry. As it builds a path in the dcache tree from ++ * the bottom up, there may for a time be a subpath of dentrys which is not ++ * connected to the main tree. This semaphore ensure that there is only ever ++ * one such free path per filesystem. Note that unconnected files (or other ++ * non-directories) are allowed, but not unconnected diretories. ++ */ ++ struct semaphore s_nfsd_free_path_sem; ++}; ++ ++/* ++ * VFS helper functions.. ++ */ ++extern int vfs_create(struct inode *, struct dentry *, int); ++extern int vfs_mkdir(struct inode *, struct dentry *, int); ++extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); ++extern int vfs_symlink(struct inode *, struct dentry *, const char *); ++extern int vfs_link(struct dentry *, struct inode *, struct dentry *); ++extern int vfs_rmdir(struct inode *, struct dentry *); ++extern int vfs_unlink(struct inode *, struct dentry *); ++int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry, ++ struct lookup_intent *it); ++ ++/* ++ * File types ++ */ ++#define DT_UNKNOWN 0 ++#define DT_FIFO 1 ++#define DT_CHR 2 ++#define DT_DIR 4 ++#define DT_BLK 6 ++#define DT_REG 8 ++#define DT_LNK 10 ++#define DT_SOCK 12 ++#define DT_WHT 14 ++ ++/* ++ * This is the "filldir" function type, used by readdir() to let ++ * the kernel specify what kind of dirent layout it wants to have. ++ * This allows the kernel to read directories into kernel space or ++ * to have different dirent layouts depending on the binary type. ++ */ ++typedef int (*filldir_t)(void *, const char *, int, loff_t, ino_t, unsigned); ++ ++struct block_device_operations { ++ int (*open) (struct inode *, struct file *); ++ int (*release) (struct inode *, struct file *); ++ int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); ++ int (*check_media_change) (kdev_t); ++ int (*revalidate) (kdev_t); ++ struct module *owner; ++}; ++ ++/* ++ * NOTE: ++ * read, write, poll, fsync, readv, writev can be called ++ * without the big kernel lock held in all filesystems. ++ */ ++struct file_operations { ++ struct module *owner; ++ loff_t (*llseek) (struct file *, loff_t, int); ++ ssize_t (*read) (struct file *, char *, size_t, loff_t *); ++ ssize_t (*write) (struct file *, const char *, size_t, loff_t *); ++ int (*readdir) (struct file *, void *, filldir_t); ++ unsigned int (*poll) (struct file *, struct poll_table_struct *); ++ int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); ++ int (*mmap) (struct file *, struct vm_area_struct *); ++ int (*open) (struct inode *, struct file *); ++ int (*flush) (struct file *); ++ int (*release) (struct inode *, struct file *); ++ int (*fsync) (struct file *, struct dentry *, int datasync); ++ int (*fasync) (int, struct file *, int); ++ int (*lock) (struct file *, int, struct file_lock *); ++ ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *); ++ ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *); ++ ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); ++ unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); ++}; ++ ++struct inode_operations { ++ int (*create) (struct inode *,struct dentry *,int); ++ struct dentry * (*lookup) (struct inode *,struct dentry *); ++ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *); ++ int (*link) (struct dentry *,struct inode *,struct dentry *); ++ int (*link2) (struct inode *,struct inode *, const char *, int); ++ int (*unlink) (struct inode *,struct dentry *); ++ int (*unlink2) (struct inode *, const char *, int); ++ int (*symlink) (struct inode *,struct dentry *,const char *); ++ int (*symlink2) (struct inode *, const char *, int, const char *); ++ int (*mkdir) (struct inode *,struct dentry *,int); ++ int (*mkdir2) (struct inode *, const char *, int,int); ++ int (*rmdir) (struct inode *,struct dentry *); ++ int (*rmdir2) (struct inode *, const char *, int); ++ int (*mknod) (struct inode *,struct dentry *,int,int); ++ int (*mknod2) (struct inode *, const char *, int,int,int); ++ int (*rename) (struct inode *, struct dentry *, ++ struct inode *, struct dentry *); ++ int (*rename2) (struct inode *, struct inode *, ++ const char *oldname, int oldlen, ++ const char *newname, int newlen); ++ int (*readlink) (struct dentry *, char *,int); ++ int (*follow_link) (struct dentry *, struct nameidata *); ++ int (*follow_link2) (struct dentry *, struct nameidata *, ++ struct lookup_intent *it); ++ void (*truncate) (struct inode *); ++ int (*permission) (struct inode *, int); ++ int (*revalidate) (struct dentry *); ++ int (*setattr) (struct dentry *, struct iattr *); ++ int (*setattr_raw) (struct inode *, struct iattr *); ++ int (*getattr) (struct dentry *, struct iattr *); ++ int (*setxattr) (struct dentry *, const char *, void *, size_t, int); ++ ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ++ ssize_t (*listxattr) (struct dentry *, char *, size_t); ++ int (*removexattr) (struct dentry *, const char *); ++}; ++ ++struct seq_file; ++ ++/* ++ * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called ++ * without the big kernel lock held in all filesystems. ++ */ ++struct super_operations { ++ struct inode *(*alloc_inode)(struct super_block *sb); ++ void (*destroy_inode)(struct inode *); ++ ++ void (*read_inode) (struct inode *); ++ ++ /* reiserfs kludge. reiserfs needs 64 bits of information to ++ ** find an inode. We are using the read_inode2 call to get ++ ** that information. We don't like this, and are waiting on some ++ ** VFS changes for the real solution. ++ ** iget4 calls read_inode2, iff it is defined ++ */ ++ void (*read_inode2) (struct inode *, void *) ; ++ void (*dirty_inode) (struct inode *); ++ void (*write_inode) (struct inode *, int); ++ void (*put_inode) (struct inode *); ++ void (*delete_inode) (struct inode *); ++ void (*put_super) (struct super_block *); ++ void (*write_super) (struct super_block *); ++ void (*write_super_lockfs) (struct super_block *); ++ void (*unlockfs) (struct super_block *); ++ int (*statfs) (struct super_block *, struct statfs *); ++ int (*remount_fs) (struct super_block *, int *, char *); ++ void (*clear_inode) (struct inode *); ++ void (*umount_begin) (struct super_block *); ++ ++ /* Following are for knfsd to interact with "interesting" filesystems ++ * Currently just reiserfs, but possibly FAT and others later ++ * ++ * fh_to_dentry is given a filehandle fragement with length, and a type flag ++ * and must return a dentry for the referenced object or, if "parent" is ++ * set, a dentry for the parent of the object. ++ * If a dentry cannot be found, a "root" dentry should be created and ++ * flaged as DCACHE_NFSD_DISCONNECTED. nfsd_iget is an example implementation. ++ * ++ * dentry_to_fh is given a dentry and must generate the filesys specific ++ * part of the file handle. Available length is passed in *lenp and used ++ * length should be returned therein. ++ * If need_parent is set, then dentry_to_fh should encode sufficient information ++ * to find the (current) parent. ++ * dentry_to_fh should return a 1byte "type" which will be passed back in ++ * the fhtype arguement to fh_to_dentry. Type of 0 is reserved. ++ * If filesystem was exportable before the introduction of fh_to_dentry, ++ * types 1 and 2 should be used is that same way as the generic code. ++ * Type 255 means error. ++ * ++ * Lengths are in units of 4bytes, not bytes. ++ */ ++ struct dentry * (*fh_to_dentry)(struct super_block *sb, __u32 *fh, int len, int fhtype, int parent); ++ int (*dentry_to_fh)(struct dentry *, __u32 *fh, int *lenp, int need_parent); ++ int (*show_options)(struct seq_file *, struct vfsmount *); ++}; ++ ++/* Inode state bits.. */ ++#define I_DIRTY_SYNC 1 /* Not dirty enough for O_DATASYNC */ ++#define I_DIRTY_DATASYNC 2 /* Data-related inode changes pending */ ++#define I_DIRTY_PAGES 4 /* Data-related inode changes pending */ ++#define I_LOCK 8 ++#define I_FREEING 16 ++#define I_CLEAR 32 ++#define I_NEW 64 ++ ++#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) ++ ++extern void __mark_inode_dirty(struct inode *, int); ++static inline void mark_inode_dirty(struct inode *inode) ++{ ++ __mark_inode_dirty(inode, I_DIRTY); ++} ++ ++static inline void mark_inode_dirty_sync(struct inode *inode) ++{ ++ __mark_inode_dirty(inode, I_DIRTY_SYNC); ++} ++ ++static inline void mark_inode_dirty_pages(struct inode *inode) ++{ ++ __mark_inode_dirty(inode, I_DIRTY_PAGES); ++} ++ ++struct file_system_type { ++ const char *name; ++ int fs_flags; ++ struct super_block *(*read_super) (struct super_block *, void *, int); ++ struct module *owner; ++ struct file_system_type * next; ++ struct list_head fs_supers; ++}; ++ ++#define DECLARE_FSTYPE(var,type,read,flags) \ ++struct file_system_type var = { \ ++ name: type, \ ++ read_super: read, \ ++ fs_flags: flags, \ ++ owner: THIS_MODULE, \ ++} ++ ++#define DECLARE_FSTYPE_DEV(var,type,read) \ ++ DECLARE_FSTYPE(var,type,read,FS_REQUIRES_DEV) ++ ++/* Alas, no aliases. Too much hassle with bringing module.h everywhere */ ++#define fops_get(fops) \ ++ (((fops) && (fops)->owner) \ ++ ? ( try_inc_mod_count((fops)->owner) ? (fops) : NULL ) \ ++ : (fops)) ++ ++#define fops_put(fops) \ ++do { \ ++ if ((fops) && (fops)->owner) \ ++ __MOD_DEC_USE_COUNT((fops)->owner); \ ++} while(0) ++ ++extern int register_filesystem(struct file_system_type *); ++extern int unregister_filesystem(struct file_system_type *); ++extern struct vfsmount *kern_mount(struct file_system_type *); ++extern int may_umount(struct vfsmount *); ++extern long do_mount(char *, char *, char *, unsigned long, void *); ++struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); ++extern void umount_tree(struct vfsmount *); ++ ++#define kern_umount mntput ++ ++extern int vfs_statfs(struct super_block *, struct statfs *); ++ ++/* Return value for VFS lock functions - tells locks.c to lock conventionally ++ * REALLY kosha for root NFS and nfs_lock ++ */ ++#define LOCK_USE_CLNT 1 ++ ++#define FLOCK_VERIFY_READ 1 ++#define FLOCK_VERIFY_WRITE 2 ++ ++extern int locks_mandatory_locked(struct inode *); ++extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); ++ ++/* ++ * Candidates for mandatory locking have the setgid bit set ++ * but no group execute bit - an otherwise meaningless combination. ++ */ ++#define MANDATORY_LOCK(inode) \ ++ (IS_MANDLOCK(inode) && ((inode)->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) ++ ++static inline int locks_verify_locked(struct inode *inode) ++{ ++ if (MANDATORY_LOCK(inode)) ++ return locks_mandatory_locked(inode); ++ return 0; ++} ++ ++static inline int locks_verify_area(int read_write, struct inode *inode, ++ struct file *filp, loff_t offset, ++ size_t count) ++{ ++ if (inode->i_flock && MANDATORY_LOCK(inode)) ++ return locks_mandatory_area(read_write, inode, filp, offset, count); ++ return 0; ++} ++ ++static inline int locks_verify_truncate(struct inode *inode, ++ struct file *filp, ++ loff_t size) ++{ ++ if (inode->i_flock && MANDATORY_LOCK(inode)) ++ return locks_mandatory_area( ++ FLOCK_VERIFY_WRITE, inode, filp, ++ size < inode->i_size ? size : inode->i_size, ++ (size < inode->i_size ? inode->i_size - size ++ : size - inode->i_size) ++ ); ++ return 0; ++} ++ ++static inline int get_lease(struct inode *inode, unsigned int mode) ++{ ++ if (inode->i_flock && (inode->i_flock->fl_flags & FL_LEASE)) ++ return __get_lease(inode, mode); ++ return 0; ++} ++ ++/* fs/open.c */ ++ ++asmlinkage long sys_open(const char *, int, int); ++asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ ++extern int do_truncate(struct dentry *, loff_t start, int called_from_open); ++ ++extern struct file *filp_open(const char *, int, int); ++extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); ++extern int open_namei_it(const char *filename, int namei_flags, int mode, ++ struct nameidata *nd, struct lookup_intent *it); ++extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, ++ int flags, struct lookup_intent *it); ++extern int filp_close(struct file *, fl_owner_t id); ++extern char * getname(const char *); ++ ++/* fs/dcache.c */ ++extern void vfs_caches_init(unsigned long); ++ ++#define __getname() kmem_cache_alloc(names_cachep, SLAB_KERNEL) ++#define putname(name) kmem_cache_free(names_cachep, (void *)(name)) ++ ++enum {BDEV_FILE, BDEV_SWAP, BDEV_FS, BDEV_RAW}; ++extern int register_blkdev(unsigned int, const char *, struct block_device_operations *); ++extern int unregister_blkdev(unsigned int, const char *); ++extern struct block_device *bdget(dev_t); ++extern int bd_acquire(struct inode *inode); ++extern void bd_forget(struct inode *inode); ++extern void bdput(struct block_device *); ++extern struct char_device *cdget(dev_t); ++extern void cdput(struct char_device *); ++extern int blkdev_open(struct inode *, struct file *); ++extern int blkdev_close(struct inode *, struct file *); ++extern struct file_operations def_blk_fops; ++extern struct address_space_operations def_blk_aops; ++extern struct file_operations def_fifo_fops; ++extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); ++extern int blkdev_get(struct block_device *, mode_t, unsigned, int); ++extern int blkdev_put(struct block_device *, int); ++ ++/* fs/devices.c */ ++extern const struct block_device_operations *get_blkfops(unsigned int); ++extern int register_chrdev(unsigned int, const char *, struct file_operations *); ++extern int unregister_chrdev(unsigned int, const char *); ++extern int chrdev_open(struct inode *, struct file *); ++extern const char * bdevname(kdev_t); ++extern const char * cdevname(kdev_t); ++extern const char * kdevname(kdev_t); ++extern void init_special_inode(struct inode *, umode_t, int); ++ ++/* Invalid inode operations -- fs/bad_inode.c */ ++extern void make_bad_inode(struct inode *); ++extern int is_bad_inode(struct inode *); ++ ++extern struct file_operations read_fifo_fops; ++extern struct file_operations write_fifo_fops; ++extern struct file_operations rdwr_fifo_fops; ++extern struct file_operations read_pipe_fops; ++extern struct file_operations write_pipe_fops; ++extern struct file_operations rdwr_pipe_fops; ++ ++extern int fs_may_remount_ro(struct super_block *); ++ ++extern int FASTCALL(try_to_free_buffers(struct page *, unsigned int)); ++extern void refile_buffer(struct buffer_head * buf); ++extern void create_empty_buffers(struct page *, kdev_t, unsigned long); ++extern void end_buffer_io_sync(struct buffer_head *bh, int uptodate); ++ ++/* reiserfs_writepage needs this */ ++extern void set_buffer_async_io(struct buffer_head *bh) ; ++ ++#define BUF_CLEAN 0 ++#define BUF_LOCKED 1 /* Buffers scheduled for write */ ++#define BUF_DIRTY 2 /* Dirty buffers, not yet scheduled for write */ ++#define NR_LIST 3 ++ ++static inline void get_bh(struct buffer_head * bh) ++{ ++ atomic_inc(&(bh)->b_count); ++} ++ ++static inline void put_bh(struct buffer_head *bh) ++{ ++ smp_mb__before_atomic_dec(); ++ atomic_dec(&bh->b_count); ++} ++ ++/* ++ * This is called by bh->b_end_io() handlers when I/O has completed. ++ */ ++static inline void mark_buffer_uptodate(struct buffer_head * bh, int on) ++{ ++ if (on) ++ set_bit(BH_Uptodate, &bh->b_state); ++ else ++ clear_bit(BH_Uptodate, &bh->b_state); ++} ++ ++#define atomic_set_buffer_clean(bh) test_and_clear_bit(BH_Dirty, &(bh)->b_state) ++ ++static inline void __mark_buffer_clean(struct buffer_head *bh) ++{ ++ refile_buffer(bh); ++} ++ ++static inline void mark_buffer_clean(struct buffer_head * bh) ++{ ++ if (atomic_set_buffer_clean(bh)) ++ __mark_buffer_clean(bh); ++} ++ ++extern void FASTCALL(__mark_dirty(struct buffer_head *bh)); ++extern void FASTCALL(__mark_buffer_dirty(struct buffer_head *bh)); ++extern void FASTCALL(mark_buffer_dirty(struct buffer_head *bh)); ++extern void FASTCALL(buffer_insert_inode_queue(struct buffer_head *, struct inode *)); ++extern void FASTCALL(buffer_insert_inode_data_queue(struct buffer_head *, struct inode *)); ++ ++static inline int atomic_set_buffer_dirty(struct buffer_head *bh) ++{ ++ return test_and_set_bit(BH_Dirty, &bh->b_state); ++} ++ ++static inline void mark_buffer_async(struct buffer_head * bh, int on) ++{ ++ if (on) ++ set_bit(BH_Async, &bh->b_state); ++ else ++ clear_bit(BH_Async, &bh->b_state); ++} ++ ++/* ++ * If an error happens during the make_request, this function ++ * has to be recalled. It marks the buffer as clean and not ++ * uptodate, and it notifys the upper layer about the end ++ * of the I/O. ++ */ ++static inline void buffer_IO_error(struct buffer_head * bh) ++{ ++ mark_buffer_clean(bh); ++ /* ++ * b_end_io has to clear the BH_Uptodate bitflag in the error case! ++ */ ++ bh->b_end_io(bh, 0); ++} ++ ++static inline void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) ++{ ++ mark_buffer_dirty(bh); ++ buffer_insert_inode_queue(bh, inode); ++} ++ ++extern void set_buffer_flushtime(struct buffer_head *); ++extern void balance_dirty(void); ++extern int check_disk_change(kdev_t); +extern int invalidate_inodes(struct super_block *, int); - extern int invalidate_device(kdev_t, int); - extern void invalidate_inode_pages(struct inode *); - extern void invalidate_inode_pages2(struct address_space *); ++extern int invalidate_device(kdev_t, int); ++extern void invalidate_inode_pages(struct inode *); ++extern void invalidate_inode_pages2(struct address_space *); ++extern void invalidate_inode_buffers(struct inode *); ++#define invalidate_buffers(dev) __invalidate_buffers((dev), 0) ++#define destroy_buffers(dev) __invalidate_buffers((dev), 1) ++extern void invalidate_bdev(struct block_device *, int); ++extern void __invalidate_buffers(kdev_t dev, int); ++extern void sync_inodes(kdev_t); ++extern void sync_unlocked_inodes(void); ++extern void write_inode_now(struct inode *, int); ++extern int sync_buffers(kdev_t, int); ++extern void sync_dev(kdev_t); ++extern int fsync_dev(kdev_t); ++extern int fsync_dev_lockfs(kdev_t); ++extern int fsync_super(struct super_block *); ++extern int fsync_no_super(kdev_t); ++extern void sync_inodes_sb(struct super_block *); ++extern int fsync_buffers_list(struct list_head *); ++static inline int fsync_inode_buffers(struct inode *inode) ++{ ++ return fsync_buffers_list(&inode->i_dirty_buffers); ++} ++static inline int fsync_inode_data_buffers(struct inode *inode) ++{ ++ return fsync_buffers_list(&inode->i_dirty_data_buffers); ++} ++extern int inode_has_buffers(struct inode *); ++extern int filemap_fdatasync(struct address_space *); ++extern int filemap_fdatawait(struct address_space *); ++extern void sync_supers(kdev_t); ++extern void sync_supers_lockfs(kdev_t); ++extern void unlockfs(kdev_t); ++extern int bmap(struct inode *, int); ++extern int notify_change(struct dentry *, struct iattr *); ++extern int permission(struct inode *, int); ++extern int vfs_permission(struct inode *, int); ++extern int get_write_access(struct inode *); ++extern int deny_write_access(struct file *); ++static inline void put_write_access(struct inode * inode) ++{ ++ atomic_dec(&inode->i_writecount); ++} ++static inline void allow_write_access(struct file *file) ++{ ++ if (file) ++ atomic_inc(&file->f_dentry->d_inode->i_writecount); ++} ++extern int do_pipe(int *); ++ ++extern int open_namei(const char *, int, int, struct nameidata *); ++ ++extern int kernel_read(struct file *, unsigned long, char *, unsigned long); ++extern struct file * open_exec(const char *); ++ ++/* fs/dcache.c -- generic fs support functions */ ++extern int is_subdir(struct dentry *, struct dentry *); ++extern ino_t find_inode_number(struct dentry *, struct qstr *); ++ ++/* ++ * Kernel pointers have redundant information, so we can use a ++ * scheme where we can return either an error code or a dentry ++ * pointer with the same return value. ++ * ++ * This should be a per-architecture thing, to allow different ++ * error and pointer decisions. ++ */ ++static inline void *ERR_PTR(long error) ++{ ++ return (void *) error; ++} ++ ++static inline long PTR_ERR(const void *ptr) ++{ ++ return (long) ptr; ++} ++ ++static inline long IS_ERR(const void *ptr) ++{ ++ return (unsigned long)ptr > (unsigned long)-1000L; ++} ++ ++/* ++ * The bitmask for a lookup event: ++ * - follow links at the end ++ * - require a directory ++ * - ending slashes ok even for nonexistent files ++ * - internal "there are more path compnents" flag ++ */ ++#define LOOKUP_FOLLOW (1) ++#define LOOKUP_DIRECTORY (2) ++#define LOOKUP_CONTINUE (4) ++#define LOOKUP_POSITIVE (8) ++#define LOOKUP_PARENT (16) ++#define LOOKUP_NOALT (32) ++/* ++ * Type of the last component on LOOKUP_PARENT ++ */ ++enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; ++ ++/* ++ * "descriptor" for what we're up to with a read for sendfile(). ++ * This allows us to use the same read code yet ++ * have multiple different users of the data that ++ * we read from a file. ++ * ++ * The simplest case just copies the data to user ++ * mode. ++ */ ++typedef struct { ++ size_t written; ++ size_t count; ++ char * buf; ++ int error; ++} read_descriptor_t; ++ ++typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long); ++ ++/* needed for stackable file system support */ ++extern loff_t default_llseek(struct file *file, loff_t offset, int origin); ++ ++extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); ++extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); ++extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); ++extern int FASTCALL(path_walk(const char *, struct nameidata *)); ++extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); ++extern void path_release(struct nameidata *); ++extern int follow_down(struct vfsmount **, struct dentry **); ++extern int follow_up(struct vfsmount **, struct dentry **); ++extern struct dentry * lookup_one_len(const char *, struct dentry *, int); ++extern struct dentry * lookup_hash(struct qstr *, struct dentry *); ++#define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) ++#define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) ++#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) ++#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) ++ ++extern void inode_init_once(struct inode *); ++extern void iput(struct inode *); ++extern void force_delete(struct inode *); ++extern struct inode * igrab(struct inode *); ++extern ino_t iunique(struct super_block *, ino_t); ++extern void unlock_new_inode(struct inode *); ++ ++typedef int (*find_inode_t)(struct inode *, unsigned long, void *); ++ ++extern struct inode * iget4_locked(struct super_block *, unsigned long, ++ find_inode_t, void *); ++ ++static inline struct inode *iget4(struct super_block *sb, unsigned long ino, ++ find_inode_t find_actor, void *opaque) ++{ ++ struct inode *inode = iget4_locked(sb, ino, find_actor, opaque); ++ ++ if (inode && (inode->i_state & I_NEW)) { ++ /* ++ * reiserfs-specific kludge that is expected to go away ASAP. ++ */ ++ if (sb->s_op->read_inode2) ++ sb->s_op->read_inode2(inode, opaque); ++ else ++ sb->s_op->read_inode(inode); ++ unlock_new_inode(inode); ++ } ++ ++ return inode; ++} ++ ++static inline struct inode *iget(struct super_block *sb, unsigned long ino) ++{ ++ struct inode *inode = iget4_locked(sb, ino, NULL, NULL); ++ ++ if (inode && (inode->i_state & I_NEW)) { ++ sb->s_op->read_inode(inode); ++ unlock_new_inode(inode); ++ } ++ ++ return inode; ++} ++ ++static inline struct inode *iget_locked(struct super_block *sb, unsigned long ino) ++{ ++ return iget4_locked(sb, ino, NULL, NULL); ++} ++ ++extern void clear_inode(struct inode *); ++extern struct inode *new_inode(struct super_block *sb); ++extern void remove_suid(struct inode *inode); ++ ++extern void insert_inode_hash(struct inode *); ++extern void remove_inode_hash(struct inode *); ++extern struct file * get_empty_filp(void); ++extern void file_move(struct file *f, struct list_head *list); ++extern struct buffer_head * get_hash_table(kdev_t, int, int); ++extern struct buffer_head * getblk(kdev_t, int, int); ++extern void ll_rw_block(int, int, struct buffer_head * bh[]); ++extern void submit_bh(int, struct buffer_head *); ++extern int is_read_only(kdev_t); ++extern void __brelse(struct buffer_head *); ++static inline void brelse(struct buffer_head *buf) ++{ ++ if (buf) ++ __brelse(buf); ++} ++extern void __bforget(struct buffer_head *); ++static inline void bforget(struct buffer_head *buf) ++{ ++ if (buf) ++ __bforget(buf); ++} ++extern int set_blocksize(kdev_t, int); ++extern int sb_set_blocksize(struct super_block *, int); ++extern int sb_min_blocksize(struct super_block *, int); ++extern struct buffer_head * bread(kdev_t, int, int); ++static inline struct buffer_head * sb_bread(struct super_block *sb, int block) ++{ ++ return bread(sb->s_dev, block, sb->s_blocksize); ++} ++static inline struct buffer_head * sb_getblk(struct super_block *sb, int block) ++{ ++ return getblk(sb->s_dev, block, sb->s_blocksize); ++} ++static inline struct buffer_head * sb_get_hash_table(struct super_block *sb, int block) ++{ ++ return get_hash_table(sb->s_dev, block, sb->s_blocksize); ++} ++extern void wakeup_bdflush(void); ++extern void put_unused_buffer_head(struct buffer_head * bh); ++extern struct buffer_head * get_unused_buffer_head(int async); ++ ++extern int brw_page(int, struct page *, kdev_t, int [], int); ++ ++typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int); ++ ++/* Generic buffer handling for block filesystems.. */ ++extern int try_to_release_page(struct page * page, int gfp_mask); ++extern int discard_bh_page(struct page *, unsigned long, int); ++#define block_flushpage(page, offset) discard_bh_page(page, offset, 1) ++#define block_invalidate_page(page) discard_bh_page(page, 0, 0) ++extern int block_symlink(struct inode *, const char *, int); ++extern int block_write_full_page(struct page*, get_block_t*); ++extern int block_read_full_page(struct page*, get_block_t*); ++extern int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); ++extern int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, ++ unsigned long *); ++extern int generic_cont_expand(struct inode *inode, loff_t size) ; ++extern int block_commit_write(struct page *page, unsigned from, unsigned to); ++extern int block_sync_page(struct page *); ++ ++int generic_block_bmap(struct address_space *, long, get_block_t *); ++int generic_commit_write(struct file *, struct page *, unsigned, unsigned); ++int block_truncate_page(struct address_space *, loff_t, get_block_t *); ++extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *); ++extern int waitfor_one_page(struct page *); ++extern int writeout_one_page(struct page *); ++ ++extern int generic_file_mmap(struct file *, struct vm_area_struct *); ++extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); ++extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); ++extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *); ++extern ssize_t generic_file_write_nolock(struct file *, const char *, size_t, loff_t *); ++extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t); ++extern loff_t no_llseek(struct file *file, loff_t offset, int origin); ++extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); ++extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *); ++extern int generic_file_open(struct inode * inode, struct file * filp); ++ ++extern struct file_operations generic_ro_fops; ++ ++extern int vfs_readlink(struct dentry *, char *, int, const char *); ++extern int vfs_follow_link(struct nameidata *, const char *); ++extern int vfs_follow_link_it(struct nameidata *, const char *, ++ struct lookup_intent *it); ++extern int page_readlink(struct dentry *, char *, int); ++extern int page_follow_link(struct dentry *, struct nameidata *); ++extern struct inode_operations page_symlink_inode_operations; ++ ++extern int vfs_readdir(struct file *, filldir_t, void *); ++extern int dcache_dir_open(struct inode *, struct file *); ++extern int dcache_dir_close(struct inode *, struct file *); ++extern loff_t dcache_dir_lseek(struct file *, loff_t, int); ++extern int dcache_dir_fsync(struct file *, struct dentry *, int); ++extern int dcache_readdir(struct file *, void *, filldir_t); ++extern struct file_operations dcache_dir_ops; ++ ++extern struct file_system_type *get_fs_type(const char *name); ++extern struct super_block *get_super(kdev_t); ++extern void drop_super(struct super_block *sb); ++static inline int is_mounted(kdev_t dev) ++{ ++ struct super_block *sb = get_super(dev); ++ if (sb) { ++ drop_super(sb); ++ return 1; ++ } ++ return 0; ++} ++unsigned long generate_cluster(kdev_t, int b[], int); ++unsigned long generate_cluster_swab32(kdev_t, int b[], int); ++extern kdev_t ROOT_DEV; ++extern char root_device_name[]; ++ ++ ++extern void show_buffers(void); ++ ++#ifdef CONFIG_BLK_DEV_INITRD ++extern unsigned int real_root_dev; ++#endif ++ ++extern ssize_t char_read(struct file *, char *, size_t, loff_t *); ++extern ssize_t block_read(struct file *, char *, size_t, loff_t *); ++extern int read_ahead[]; ++ ++extern ssize_t char_write(struct file *, const char *, size_t, loff_t *); ++extern ssize_t block_write(struct file *, const char *, size_t, loff_t *); ++ ++extern int file_fsync(struct file *, struct dentry *, int); ++extern int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx); ++extern int generic_osync_inode(struct inode *, int); ++#define OSYNC_METADATA (1<<0) ++#define OSYNC_DATA (1<<1) ++#define OSYNC_INODE (1<<2) ++ ++extern int inode_change_ok(struct inode *, struct iattr *); ++extern int inode_setattr(struct inode *, struct iattr *); ++ ++/* ++ * Common dentry functions for inclusion in the VFS ++ * or in other stackable file systems. Some of these ++ * functions were in linux/fs/ C (VFS) files. ++ * ++ */ ++ ++/* ++ * Locking the parent is needed to: ++ * - serialize directory operations ++ * - make sure the parent doesn't change from ++ * under us in the middle of an operation. ++ * ++ * NOTE! Right now we'd rather use a "struct inode" ++ * for this, but as I expect things to move toward ++ * using dentries instead for most things it is ++ * probably better to start with the conceptually ++ * better interface of relying on a path of dentries. ++ */ ++static inline struct dentry *lock_parent(struct dentry *dentry) ++{ ++ struct dentry *dir = dget(dentry->d_parent); ++ ++ down(&dir->d_inode->i_sem); ++ return dir; ++} ++ ++static inline struct dentry *get_parent(struct dentry *dentry) ++{ ++ return dget(dentry->d_parent); ++} ++ ++static inline void unlock_dir(struct dentry *dir) ++{ ++ up(&dir->d_inode->i_sem); ++ dput(dir); ++} ++ ++/* ++ * Whee.. Deadlock country. Happily there are only two VFS ++ * operations that does this.. ++ */ ++static inline void double_down(struct semaphore *s1, struct semaphore *s2) ++{ ++ if (s1 != s2) { ++ if ((unsigned long) s1 < (unsigned long) s2) { ++ struct semaphore *tmp = s2; ++ s2 = s1; s1 = tmp; ++ } ++ down(s1); ++ } ++ down(s2); ++} ++ ++/* ++ * Ewwwwwwww... _triple_ lock. We are guaranteed that the 3rd argument is ++ * not equal to 1st and not equal to 2nd - the first case (target is parent of ++ * source) would be already caught, the second is plain impossible (target is ++ * its own parent and that case would be caught even earlier). Very messy. ++ * I _think_ that it works, but no warranties - please, look it through. ++ * Pox on bloody lusers who mandated overwriting rename() for directories... ++ */ ++ ++static inline void triple_down(struct semaphore *s1, ++ struct semaphore *s2, ++ struct semaphore *s3) ++{ ++ if (s1 != s2) { ++ if ((unsigned long) s1 < (unsigned long) s2) { ++ if ((unsigned long) s1 < (unsigned long) s3) { ++ struct semaphore *tmp = s3; ++ s3 = s1; s1 = tmp; ++ } ++ if ((unsigned long) s1 < (unsigned long) s2) { ++ struct semaphore *tmp = s2; ++ s2 = s1; s1 = tmp; ++ } ++ } else { ++ if ((unsigned long) s1 < (unsigned long) s3) { ++ struct semaphore *tmp = s3; ++ s3 = s1; s1 = tmp; ++ } ++ if ((unsigned long) s2 < (unsigned long) s3) { ++ struct semaphore *tmp = s3; ++ s3 = s2; s2 = tmp; ++ } ++ } ++ down(s1); ++ } else if ((unsigned long) s2 < (unsigned long) s3) { ++ struct semaphore *tmp = s3; ++ s3 = s2; s2 = tmp; ++ } ++ down(s2); ++ down(s3); ++} ++ ++static inline void double_up(struct semaphore *s1, struct semaphore *s2) ++{ ++ up(s1); ++ if (s1 != s2) ++ up(s2); ++} ++ ++static inline void triple_up(struct semaphore *s1, ++ struct semaphore *s2, ++ struct semaphore *s3) ++{ ++ up(s1); ++ if (s1 != s2) ++ up(s2); ++ up(s3); ++} ++ ++static inline void double_lock(struct dentry *d1, struct dentry *d2) ++{ ++ double_down(&d1->d_inode->i_sem, &d2->d_inode->i_sem); ++} ++ ++static inline void double_unlock(struct dentry *d1, struct dentry *d2) ++{ ++ double_up(&d1->d_inode->i_sem,&d2->d_inode->i_sem); ++ dput(d1); ++ dput(d2); ++} ++ ++#endif /* __KERNEL__ */ ++ ++#endif /* _LINUX_FS_H */ +--- linux-2.4.19-hp3_pnnl1/fs/exec.c~invalidate_show 2003-04-15 13:17:46.000000000 +0800 ++++ linux-2.4.19-hp3_pnnl1-root/fs/exec.c 2003-04-15 13:22:26.000000000 +0800 +@@ -1058,7 +1058,7 @@ int do_coredump(long signr, struct pt_re + goto close_fail; + if (!file->f_op->write) + goto close_fail; +- if (do_truncate(file->f_dentry, 0) != 0) ++ if (do_truncate(file->f_dentry, 0, 0) != 0) + goto close_fail; + + retval = binfmt->core_dump(signr, regs, file); _ -- 1.8.3.1