Index: mmp/fs/ext3/al.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ mmp/fs/ext3/al.h 2006-07-24 10:39:26.000000000 +0800 @@ -0,0 +1,11 @@ +/* + * (C) 2006 Qi Yong + */ + +#define ALIVE_MAGIC 0xA1153C29 +struct alive_struct { + __le32 al_magic; + __le32 al_seq; + __le32 al_time; + char al_nodename[65]; +}; Index: mmp/fs/ext3/namei.c =================================================================== --- mmp.orig/fs/ext3/namei.c 2006-07-24 10:34:41.000000000 +0800 +++ mmp/fs/ext3/namei.c 2006-07-24 10:39:26.000000000 +0800 @@ -805,7 +805,7 @@ static inline int search_dirblock(struct * The returned buffer_head has ->b_count elevated. The caller is expected * to brelse() it when appropriate. */ -static struct buffer_head * ext3_find_entry (struct dentry *dentry, +struct buffer_head * ext3_find_entry (struct dentry *dentry, struct ext3_dir_entry_2 ** res_dir) { struct super_block * sb; Index: mmp/fs/ext3/super.c =================================================================== --- mmp.orig/fs/ext3/super.c 2006-07-24 10:34:41.000000000 +0800 +++ mmp/fs/ext3/super.c 2006-07-24 10:45:19.000000000 +0800 @@ -36,12 +36,14 @@ #include #include #include +#include #include #include "xattr.h" #include "acl.h" #include "namei.h" +#include "al.h" static int ext3_load_journal(struct super_block *, struct ext3_super_block *, unsigned long journal_devnum); @@ -62,6 +64,8 @@ static int ext3_statfs (struct super_blo static void ext3_unlockfs(struct super_block *sb); static void ext3_write_super (struct super_block * sb); static void ext3_write_super_lockfs(struct super_block *sb); +struct buffer_head * ext3_find_entry (struct dentry *dentry, + struct ext3_dir_entry_2 ** res_dir); /* * Wrappers for journal_start/end. @@ -435,6 +439,9 @@ static void ext3_put_super (struct super invalidate_bdev(sbi->journal_bdev, 0); ext3_blkdev_remove(sbi); } + if (sbi->s_alive_tsk) + kthread_stop(sbi->s_alive_tsk); + sb->s_fs_info = NULL; kfree(sbi); return; @@ -1369,6 +1376,261 @@ static unsigned long descriptor_loc(stru return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); } +static int write_alive(struct buffer_head * bh) +{ + lock_buffer(bh); + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(WRITE, bh); + wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) + return 1; + return 0; +} + +static int read_alive_again(struct buffer_head * bh) +{ + lock_buffer(bh); + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); + submit_bh(READ, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + brelse(bh); + return 1; + } + return 0; +} + +/* + * The caller must have a ref on the buffer_head. + */ +static int kalived(void *data) +{ + struct buffer_head * bh; + struct alive_struct * alive; + char b[BDEVNAME_SIZE]; + u32 seq = 0; + + bh = (struct buffer_head *)data; + bdevname(bh->b_bdev, b); + + alive = (struct alive_struct *)(bh->b_data); + alive->al_magic = cpu_to_le32(ALIVE_MAGIC); + alive->al_time = cpu_to_le32(get_seconds()); + + down_read(&uts_sem); + memcpy(alive->al_nodename, system_utsname.nodename, 65); + up_read(&uts_sem); + + while (!kthread_should_stop()) { + if (++seq == 0) + ++seq; + + alive->al_seq = cpu_to_le32(seq); + alive->al_time = cpu_to_le32(get_seconds()); + + if (unlikely(write_alive(bh))) { + /* panic here? */ + printk(KERN_ERR "Alive (device %s): " + "can't write alive block\n", b); + continue; + } + + schedule_timeout_interruptible(5 * HZ); + } + + alive->al_seq = 0; + alive->al_time = cpu_to_le32(get_seconds()); + + if (unlikely(write_alive(bh))) + printk(KERN_ERR "Alive (device %s): " + "can't reset alive block\n", b); + brelse(bh); + return 0; +} + +static unsigned long get_alive_ino(struct super_block *sb) +{ + unsigned long ino = 0; + struct dentry alive; + struct dentry * root; + struct inode * root_inode; + struct ext3_dir_entry_2 * de; + struct buffer_head * bh; + + root_inode = iget(sb, EXT3_ROOT_INO); + root = d_alloc_root(root_inode); + if (!root) { + printk(KERN_ERR "Alive (device %s): get root inode failed\n", + sb->s_id); + iput(root_inode); + goto out; + } + + alive.d_name.name = ".alive"; + alive.d_name.len = 6; + alive.d_parent = root; + + bh = ext3_find_entry(&alive, &de); + dput(root); + + if (!bh) { + printk(KERN_WARNING "Alive (device %s): alive lookup failed\n", + sb->s_id); + goto out; + } + + ino = le32_to_cpu(de->inode); + brelse (bh); + pr_debug("Alive (device %s): alive_ino=%lu\n", sb->s_id, ino); +out: + return ino; +} + +/* check alive file */ +static int check_alive(struct super_block *sb, struct ext3_sb_info *sbi) +{ + unsigned long ino; + struct buffer_head * bh; + struct ext3_inode_info * ei; + struct inode * alive_inode; + struct alive_struct * alive; + u32 alive_block; + u32 seq; + + ino = get_alive_ino(sb); + if (!ino) + goto failed; + + alive_inode = iget(sb, ino); + if (!alive_inode) { + iput(alive_inode); + printk(KERN_ERR "Alive (device %s): get alive inode failed\n", + sb->s_id); + goto failed; + } + if (!alive_inode->i_nlink) { + make_bad_inode(alive_inode); + iput(alive_inode); + printk(KERN_ERR "Alive (device %s): alive inode is deleted\n", + sb->s_id); + goto failed; + } + if (!S_ISREG(alive_inode->i_mode)) { + iput(alive_inode); + printk(KERN_ERR "Alive (device %s): invalid alive inode\n", + sb->s_id); + goto failed; + } + if (EXT3_I(alive_inode)->i_flags & EXT3_EXTENTS_FL) { + iput(alive_inode); + printk(KERN_ERR "Alive (device %s): invalid alive inode, " + "in extents format\n", sb->s_id); + goto failed; + } + + ei = EXT3_I(alive_inode); + alive_block = ei->i_data[0]; + iput(alive_inode); + + pr_debug("Alive (device %s): read in alive block #%u\n", + sb->s_id, alive_block); + + /* first read */ + bh = sb_bread(sb, alive_block); + if (!bh) { + printk(KERN_ERR "Alive (device %s): " + "can't read alive block #%u\n", sb->s_id, alive_block); + goto failed; + } + + alive = (struct alive_struct *)(bh->b_data); + if (le32_to_cpu(alive->al_magic) != ALIVE_MAGIC) { + printk(KERN_ERR "Alive (device %s): " + "magic mismatch\n", sb->s_id); + brelse(bh); + goto failed; + } + + seq = le32_to_cpu(alive->al_seq); + pr_debug("Alive (device %s): seq=%u\n", sb->s_id, seq); + pr_info ("Alive (device %s): last touched by node: %s, " + "%li seconds ago\n", sb->s_id, alive->al_nodename, + get_seconds() - le32_to_cpu(alive->al_time)); + + if (seq == 0) + goto skip; + + /* wait 8s */ + pr_info("Alive (device %s): wait for 8 seconds...\n", sb->s_id); + schedule_timeout_uninterruptible(HZ * 8); + + /* read again */ + if (read_alive_again(bh)) { + printk(KERN_ERR "Alive (device %s): " + "can't read alive block #%u\n", + sb->s_id, alive_block); + goto failed; + } + + alive = (struct alive_struct *)(bh->b_data); + pr_debug("Alive (device %s): seq=%u\n", + sb->s_id, le32_to_cpu(alive->al_seq)); + + if (seq != le32_to_cpu(alive->al_seq)) { + printk(KERN_WARNING "Alive (device %s): " + "still active on node %s\n", + sb->s_id, alive->al_nodename); + brelse(bh); + goto failed; + } +skip: + /* write a new random seq */ + get_random_bytes(&seq, sizeof(u32)); + alive->al_seq = cpu_to_le32(seq); + if (unlikely(write_alive(bh))) { + printk(KERN_ERR "Alive (device %s): " + "can't write alive block\n", sb->s_id); + goto failed; + } + pr_debug("Alive (device %s): write random seq=%u\n", sb->s_id, seq); + + /* wait 6s */ + pr_info("Alive (device %s): wait for 6 seconds...\n", sb->s_id); + schedule_timeout_uninterruptible(HZ * 6); + + /* read again */ + if (read_alive_again(bh)) { + printk(KERN_ERR "Alive (device %s): " + "can't read alive block #%u\n", + sb->s_id, alive_block); + goto failed; + } + + alive = (struct alive_struct *)(bh->b_data); + pr_debug("Alive (device %s): seq=%u\n", + sb->s_id, le32_to_cpu(alive->al_seq)); + + if (seq != le32_to_cpu(alive->al_seq)) { + printk(KERN_WARNING "Alive (device %s): " + "still active on node %s\n", + sb->s_id, alive->al_nodename); + brelse(bh); + goto failed; + } + + /* succeed */ + pr_info("Alive (device %s): alive check passed!\n", sb->s_id); + sbi->s_alive_tsk = kthread_run(kalived, bh, "kalived"); + return 0; + +failed: + printk(KERN_WARNING "Alive (device %s): alive check failed!\n", + sb->s_id); + return 1; +} + static int ext3_fill_super (struct super_block *sb, void *data, int silent) { @@ -1668,6 +1930,10 @@ static int ext3_fill_super (struct super EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)); + if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_ALIVE)) + if (check_alive(sb, sbi)) + goto failed_mount2; + /* * The first inode we look at is the journal inode. Don't try * root first: it may be modified in the journal! @@ -1785,6 +2051,8 @@ cantfind_ext3: failed_mount3: journal_destroy(sbi->s_journal); + if (sbi->s_alive_tsk) + kthread_stop(sbi->s_alive_tsk); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); Index: mmp/include/linux/ext3_fs.h =================================================================== --- mmp.orig/include/linux/ext3_fs.h 2006-07-24 10:34:41.000000000 +0800 +++ mmp/include/linux/ext3_fs.h 2006-07-24 10:39:26.000000000 +0800 @@ -581,12 +581,14 @@ static inline struct ext3_inode_info *EX #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ +#define EXT3_FEATURE_INCOMPAT_ALIVE 0x0080 #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ EXT3_FEATURE_INCOMPAT_RECOVER| \ EXT3_FEATURE_INCOMPAT_META_BG| \ - EXT3_FEATURE_INCOMPAT_EXTENTS) + EXT3_FEATURE_INCOMPAT_EXTENTS| \ + EXT3_FEATURE_INCOMPAT_ALIVE) #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT3_FEATURE_RO_COMPAT_BTREE_DIR) Index: mmp/include/linux/ext3_fs_sb.h =================================================================== --- mmp.orig/include/linux/ext3_fs_sb.h 2006-07-24 10:34:41.000000000 +0800 +++ mmp/include/linux/ext3_fs_sb.h 2006-07-24 10:39:26.000000000 +0800 @@ -86,6 +86,7 @@ struct ext3_sb_info { char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ int s_jquota_fmt; /* Format of quota to use */ #endif + struct task_struct * s_alive_tsk; /* for buddy allocator */ struct ext3_group_info **s_group_info;