-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011 Whamcloud, Inc.
- *
- */
-/*
- * Copyright (c) 2011 Whamcloud, Inc.
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#ifdef HAVE_LINUX_EXPORTFS_H
#include <linux/exportfs.h>
#endif
-#ifdef HAVE_EXT4_LDISKFS
#include <ext4/ext4.h>
#include <ext4/ext4_jbd2.h>
-#else
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
-#endif
#include <linux/version.h>
#include <linux/bitops.h>
#include <linux/quota.h>
# include <quota/quotaio_v2.h>
# include <quota/quota_tree.h>
# define V2_DQTREEOFF QT_TREEOFF
-#elif defined(HAVE_FS_QUOTAIO_V1_H)
+#elif defined(HAVE_FS_QUOTAIO_H)
# include <quotaio_v2.h>
# include <quota_tree.h>
# define V2_DQTREEOFF QT_TREEOFF
#include <linux/lustre_compat25.h>
#include <linux/lprocfs_status.h>
-#ifdef HAVE_EXT4_LDISKFS
#include <ext4/ext4_extents.h>
-#else
-#include <linux/ext3_extents.h>
-#endif
#include "lustre_quota_fmt.h"
#define ext3_mb_discard_inode_preallocations(inode) \
ext3_discard_preallocations(inode)
-#ifdef HAVE_EXT4_LDISKFS
#define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
#define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
-#else
-#define fsfilt_log_start_commit(journal, tid) log_start_commit(journal, tid)
-#define fsfilt_log_wait_commit(journal, tid) log_wait_commit(journal, tid)
-#define ext_pblock(ex) le32_to_cpu((ex)->ee_start)
-#define ext3_ext_store_pblock(ex, pblock) ((ex)->ee_start = cpu_to_le32(pblock))
-#define ext3_inode_bitmap(sb,desc) le32_to_cpu((desc)->bg_inode_bitmap)
-#endif
#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
# define journal_callback ext4_journal_cb_entry
nblocks += 3;
/* no break */
case FSFILT_OP_CREATE: {
-#if defined(EXT3_EXTENTS_FL) && defined(EXT3_INDEX_FL) && !defined(HAVE_EXT4_LDISKFS)
- static int warned;
- if (!warned) {
- if (!test_opt(inode->i_sb, EXTENTS)) {
- warned = 1;
- } else if (((EXT3_I(inode)->i_flags &
- cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL)) ==
- cpu_to_le32(EXT3_EXTENTS_FL | EXT3_INDEX_FL))) {
- CWARN("extent-mapped directory found with "
- "ext3-based ldiskfs - contact "
- "http://bugs.whamcloud.com/\n");
- warned = 1;
- }
- }
-#endif
/* no break */
}
case FSFILT_OP_MKDIR:
struct inode *inode = dentry->d_inode;
int rc = 0;
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2,7,50,0)
+ /* Try to correct for a bug in 2.1.0 (LU-221) that caused negative
+ * timestamps to appear to be in the far future, due old timestamp
+ * being stored on disk as an unsigned value. This fixes up any
+ * bad values held by the client before storing them on disk,
+ * and ensures any timestamp updates are correct. LU-1042 */
+ if (unlikely(LTIME_S(inode->i_atime) == LU221_BAD_TIME &&
+ !(iattr->ia_valid & ATTR_ATIME))) {
+ iattr->ia_valid |= ATTR_ATIME;
+ LTIME_S(iattr->ia_atime) = 0;
+ }
+ if (unlikely(LTIME_S(inode->i_mtime) == LU221_BAD_TIME &&
+ !(iattr->ia_valid & ATTR_MTIME))) {
+ iattr->ia_valid |= ATTR_MTIME;
+ LTIME_S(iattr->ia_mtime) = 0;
+ }
+ if (unlikely((LTIME_S(inode->i_ctime) == LU221_BAD_TIME ||
+ LTIME_S(inode->i_ctime) == 0) &&
+ !(iattr->ia_valid & ATTR_CTIME))) {
+ iattr->ia_valid |= ATTR_CTIME;
+ LTIME_S(iattr->ia_ctime) = 0;
+ }
+#else
+#warning "remove old LU-221/LU-1042 workaround code"
+#endif
+
+ /* When initializating timestamps for new inodes, use the filesystem
+ * mkfs time for ctime to avoid e2fsck ibadness incorrectly thinking
+ * that this is potentially an invalid inode. Files with an old ctime
+ * migrated to a newly-formatted OST with a newer s_mkfs_time will not
+ * hit this check, since it is only for ctime == 0. LU-1010/LU-1042 */
+ if ((iattr->ia_valid & ATTR_CTIME) && LTIME_S(iattr->ia_ctime) == 0)
+ LTIME_S(iattr->ia_ctime) =
+ EXT4_SB(inode->i_sb)->s_es->s_mkfs_time;
+
/* Avoid marking the inode dirty on the superblock list unnecessarily.
* We are already writing the inode to disk as part of this
* transaction and want to avoid a lot of extra inode writeout
/* We set these flags on the client, but have already checked perms
* so don't confuse inode_change_ok. */
- iattr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
+ iattr->ia_valid &= ~TIMES_SET_FLAGS;
if (inode->i_op->setattr) {
rc = inode->i_op->setattr(dentry, iattr);
} else {
+#ifndef HAVE_SIMPLE_SETATTR /* simple_setattr() already call it */
rc = inode_change_ok(inode, iattr);
if (!rc)
- rc = inode_setattr(inode, iattr);
+#endif
+ rc = simple_setattr(dentry, iattr);
}
out:
*(int *)arg |= EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL;
}
-#ifdef HAVE_EXT4_LDISKFS
/* ext4_ioctl does not have a inode argument */
if (inode->i_fop->unlocked_ioctl)
rc = inode->i_fop->unlocked_ioctl(file, cmd, arg);
-#else
- if (inode->i_fop->ioctl)
- rc = inode->i_fop->ioctl(inode, file, cmd, arg);
-#endif
else
RETURN(-ENOTTY);
# define fsfilt_up_truncate_sem(inode) up(&LDISKFS_I(inode)->truncate_sem);
# define fsfilt_down_truncate_sem(inode) down(&LDISKFS_I(inode)->truncate_sem);
#else
-# ifdef HAVE_EXT4_LDISKFS
-# define fsfilt_up_truncate_sem(inode) do{ }while(0)
-# define fsfilt_down_truncate_sem(inode) do{ }while(0)
-# else
-# define fsfilt_up_truncate_sem(inode) mutex_unlock(&EXT3_I(inode)->truncate_mutex)
-# define fsfilt_down_truncate_sem(inode) mutex_lock(&EXT3_I(inode)->truncate_mutex)
-# endif
+# define fsfilt_up_truncate_sem(inode) do{ }while(0)
+# define fsfilt_down_truncate_sem(inode) do{ }while(0)
#endif
#ifndef EXT_ASSERT
#ifdef EXT3_EXT_HAS_NO_TREE
/* for kernels 2.6.18 and later */
-#ifdef HAVE_EXT4_LDISKFS
#define EXT_GENERATION(inode) (EXT4_I(inode)->i_ext_generation)
-#else
-#define EXT_GENERATION(inode) ext_generation(inode)
-#endif
#define ext3_ext_base inode
#define ext3_ext_base2inode(inode) (inode)
#define EXT_DEPTH(inode) ext_depth(inode)
ext3_ext_walk_space(tree, block, num, cb);
#endif
-#include <linux/lustre_version.h>
-
struct bpointers {
unsigned long *blocks;
int *created;
#endif
struct inode *inode = ext3_ext_base2inode(base);
struct ext3_extent nex;
-#if defined(HAVE_EXT4_LDISKFS)
- struct ext4_ext_path *tmppath = NULL;
- struct ext4_extent *tmpex;
-#endif
unsigned long pblock;
unsigned long tgen;
- int err, i, depth;
+ int err, i;
unsigned long count;
handle_t *handle;
- i = depth = EXT_DEPTH(base);
- EXT_ASSERT(i == path->p_depth);
- EXT_ASSERT(path[i].p_hdr);
-
if (cex->ec_type == EXT3_EXT_CACHE_EXTENT) {
err = EXT_CONTINUE;
goto map;
return EXT_REPEAT;
}
-#if defined(HAVE_EXT4_LDISKFS)
/* In 2.6.32 kernel, ext4_ext_walk_space()'s callback func is not
- * protected by i_data_sem, we need revalidate extent to be created */
+ * protected by i_data_sem as whole. so we patch it to store
+ * generation to path and now verify the tree hasn't changed */
down_write((&EXT4_I(inode)->i_data_sem));
/* validate extent, make sure the extent tree does not changed */
- tmppath = ext4_ext_find_extent(inode, cex->ec_block, NULL);
- if (IS_ERR(tmppath)) {
- up_write(&EXT4_I(inode)->i_data_sem);
- ext3_journal_stop(handle);
- return PTR_ERR(tmppath);
- }
- tmpex = tmppath[depth].p_ext;
- if (tmpex != ex) {
+ if (EXT_GENERATION(base) != path[0].p_generation) {
/* cex is invalid, try again */
- ext4_ext_drop_refs(tmppath);
- kfree(tmppath);
up_write(&EXT4_I(inode)->i_data_sem);
ext3_journal_stop(handle);
return EXT_REPEAT;
}
-#endif
count = cex->ec_len;
pblock = new_blocks(handle, base, path, cex->ec_block, &count, &err);
BUG_ON(le32_to_cpu(nex.ee_block) != cex->ec_block);
out:
-#if defined(HAVE_EXT4_LDISKFS)
- ext4_ext_drop_refs(tmppath);
- kfree(tmppath);
up_write((&EXT4_I(inode)->i_data_sem));
-#endif
ext3_journal_stop(handle);
map:
if (err >= 0) {
int fsfilt_ext3_map_inode_pages(struct inode *inode, struct page **page,
int pages, unsigned long *blocks,
int *created, int create,
- cfs_semaphore_t *optional_sem)
+ cfs_mutex_t *optional_mutex)
{
int rc;
blocks, created, create);
return rc;
}
- if (optional_sem != NULL)
- cfs_down(optional_sem);
+ if (optional_mutex != NULL)
+ cfs_mutex_lock(optional_mutex);
rc = fsfilt_ext3_map_bm_inode_pages(inode, page, pages, blocks,
created, create);
- if (optional_sem != NULL)
- cfs_up(optional_sem);
+ if (optional_mutex != NULL)
+ cfs_mutex_unlock(optional_mutex);
return rc;
}
int err, blocksize, csize, boffs, osize = size;
/* prevent reading after eof */
- cfs_lock_kernel();
+ spin_lock(&inode->i_lock);
if (i_size_read(inode) < *offs + size) {
size = i_size_read(inode) - *offs;
- cfs_unlock_kernel();
+ spin_unlock(&inode->i_lock);
if (size < 0) {
CDEBUG(D_EXT2, "size %llu is too short for read @%llu\n",
i_size_read(inode), *offs);
return 0;
}
} else {
- cfs_unlock_kernel();
+ spin_unlock(&inode->i_lock);
}
blocksize = 1 << inode->i_blkbits;
/* correct in-core and on-disk sizes */
if (new_size > i_size_read(inode)) {
- cfs_lock_kernel();
+ spin_lock(&inode->i_lock);
if (new_size > i_size_read(inode))
i_size_write(inode, new_size);
if (i_size_read(inode) > EXT3_I(inode)->i_disksize)
EXT3_I(inode)->i_disksize = i_size_read(inode);
- if (i_size_read(inode) > old_size)
+ if (i_size_read(inode) > old_size) {
+ spin_unlock(&inode->i_lock);
mark_inode_dirty(inode);
- cfs_unlock_kernel();
+ } else {
+ spin_unlock(&inode->i_lock);
+ }
}
if (err == 0)
sbi->dx_unlock = fsfilt_ext3_dx_unlock;
#endif
#endif
+ if (!EXT3_HAS_COMPAT_FEATURE(sb,
+ EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
+ CERROR("ext3 mounted without journal\n");
+ return -EINVAL;
+ }
+
#ifdef S_PDIROPS
CWARN("Enabling PDIROPS\n");
set_opt(sbi->s_mount_opt, PDIROPS);
RETURN(rc);
}
-#ifndef HAVE_EXT4_LDISKFS
-static inline struct ext3_group_desc *
-get_group_desc(struct super_block *sb, int group, struct buffer_head **bh)
-{
- unsigned long desc_block, desc;
- struct ext3_group_desc *gdp;
-
- desc_block = group / EXT3_DESC_PER_BLOCK(sb);
- desc = group % EXT3_DESC_PER_BLOCK(sb);
- gdp = (struct ext3_group_desc *)
- EXT3_SB(sb)->s_group_desc[desc_block]->b_data;
-
- return gdp + desc;
-}
-
-static inline struct buffer_head *
-ext3_read_inode_bitmap(struct super_block *sb, unsigned long group)
-{
- struct ext3_group_desc *desc;
- struct buffer_head *bh;
-
- desc = get_group_desc(sb, group, NULL);
- bh = sb_bread(sb, ext3_inode_bitmap(sb, desc));
- return bh;
-}
-
-static __u32 ext3_itable_unused_count(struct super_block *sb,
- struct ext3_group_desc *bg) {
- return le16_to_cpu(bg->bg_itable_unused);
-}
-#else
-#define get_group_desc ext3_get_group_desc
-#endif
-
struct qchk_ctxt {
cfs_hlist_head_t qckt_hash[NR_DQHASH]; /* quotacheck hash */
cfs_list_t qckt_list; /* quotacheck list */
if (uninit_feat) {
struct ext3_group_desc *desc;
- desc = get_group_desc(sb, group, NULL);
+ desc = ext3_get_group_desc(sb, group, NULL);
if (!desc)
GOTO(out, -EIO);
/* we don't really need to take the group lock here,
* but it may be useful if one day we support online
* quotacheck */
-#ifdef HAVE_EXT4_LDISKFS
ext4_lock_group(sb, group);
-#else
- spin_lock(sb_bgl_lock(sbi, group));
-#endif
if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
/* no inode in use in this group, just skip it */
-#ifdef HAVE_EXT4_LDISKFS
ext3_unlock_group(sb, group);
-#else
- spin_unlock(sb_bgl_lock(sbi, group));
-#endif
continue;
}
used_count -= ext3_itable_unused_count(sb, desc);
-#ifdef HAVE_EXT4_LDISKFS
ext3_unlock_group(sb, group);
-#else
- spin_unlock(sb_bgl_lock(sbi, group));
-#endif
}
ino = group * sbi->s_inodes_per_group + 1;
ino = i + group * sbi->s_inodes_per_group;
if (ino < sbi->s_first_ino)
continue;
-#if defined(HAVE_EXT4_LDISKFS) || !defined(HAVE_READ_INODE_IN_SBOPS)
inode = ext3_iget(sb, ino);
-#else
- inode = iget(sb, ino);
-#endif
if (!inode || IS_ERR(inode))
continue;
continue;
- LASSERT(sb_dqopt(sb)->files[i] != NULL);
- CFS_INIT_LIST_HEAD(&id_list);
-#ifndef KERNEL_SUPPORTS_QUOTA_READ
- rc = lustre_get_qids(sb_dqopt(sb)->files[i], NULL, i, &id_list);
-#else
- rc = lustre_get_qids(NULL, sb_dqopt(sb)->files[i], i, &id_list);
-#endif
- if (rc)
- CERROR("read old limits failed. (rc:%d)\n", rc);
+ LASSERT(sb_dqopt(sb)->files[i] != NULL);
+ CFS_INIT_LIST_HEAD(&id_list);
+ rc = lustre_get_qids(NULL, sb_dqopt(sb)->files[i], i, &id_list);
+ if (rc)
+ CERROR("read old limits failed. (rc:%d)\n", rc);
cfs_list_for_each_entry_safe(dqid, tmp, &id_list, di_link) {
cfs_list_del_init(&dqid->di_link);