#include <ldiskfs/ldiskfs.h>
#include <ldiskfs/xattr.h>
+#include <ldiskfs/ldiskfs_extents.h>
#undef ENTRY
/*
* struct OBD_{ALLOC,FREE}*()
#include <lustre_linkea.h>
int ldiskfs_pdo = 1;
-CFS_MODULE_PARM(ldiskfs_pdo, "i", int, 0644,
- "ldiskfs with parallel directory operations");
+module_param(ldiskfs_pdo, int, 0644);
+MODULE_PARM_DESC(ldiskfs_pdo, "ldiskfs with parallel directory operations");
int ldiskfs_track_declares_assert;
-CFS_MODULE_PARM(ldiskfs_track_declares_assert, "i", int, 0644,
- "LBUG during tracking of declares");
+module_param(ldiskfs_track_declares_assert, int, 0644);
+MODULE_PARM_DESC(ldiskfs_track_declares_assert, "LBUG during tracking of declares");
/* Slab to allocate dynlocks */
struct kmem_cache *dynlock_cachep;
result = osd_fid_lookup(env, obj, lu_object_fid(l), conf);
obj->oo_dt.do_body_ops = &osd_body_ops_new;
- if (result == 0 && obj->oo_inode != NULL)
+ if (result == 0 && obj->oo_inode != NULL) {
+ struct osd_thread_info *oti = osd_oti_get(env);
+ struct lustre_mdt_attrs *lma = &oti->oti_mdt_attrs;
+
osd_object_init0(obj);
+ result = osd_get_lma(oti, obj->oo_inode,
+ &oti->oti_obj_dentry, lma);
+ if (result == 0) {
+ /* Convert LMAI flags to lustre LMA flags
+ * and cache it to oo_lma_flags */
+ obj->oo_lma_flags =
+ lma_to_lustre_flags(lma->lma_incompat);
+ } else if (result == -ENODATA) {
+ result = 0;
+ }
+ }
LINVRNT(osd_invariant(obj));
return result;
static unsigned long last_printed;
static int last_credits;
- CWARN("%.16s: too many transaction credits (%d > %d)\n",
- LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name,
- oh->ot_credits,
- osd_journal(dev)->j_max_transaction_buffers);
-
- osd_trans_dump_creds(env, th);
-
+ /* don't make noise on a tiny testing systems
+ * actual credits misuse will be caught anyway */
if (last_credits != oh->ot_credits &&
time_after(jiffies, last_printed +
- msecs_to_jiffies(60 * MSEC_PER_SEC))) {
+ msecs_to_jiffies(60 * MSEC_PER_SEC)) &&
+ osd_transaction_size(dev) > 512) {
+ osd_trans_dump_creds(env, th);
libcfs_debug_dumpstack(NULL);
last_credits = oh->ot_credits;
last_printed = jiffies;
static void osd_object_release(const struct lu_env *env,
struct lu_object *l)
{
+ struct osd_object *o = osd_obj(l);
+ /* nobody should be releasing a non-destroyed object with nlink=0
+ * the API allows this, but ldiskfs doesn't like and then report
+ * this inode as deleted */
+ if (unlikely(!o->oo_destroyed && o->oo_inode && o->oo_inode->i_nlink == 0))
+ LBUG();
}
/*
d ? d->id_ops->id_name : "plain");
}
-#define GRANT_FOR_LOCAL_OIDS 32 /* 128kB for last_rcvd, quota files, ... */
-
/*
* Concurrency: shouldn't matter.
*/
int osd_statfs(const struct lu_env *env, struct dt_device *d,
struct obd_statfs *sfs)
{
- struct osd_device *osd = osd_dt_dev(d);
- struct super_block *sb = osd_sb(osd);
- struct kstatfs *ksfs;
- int result = 0;
+ struct osd_device *osd = osd_dt_dev(d);
+ struct super_block *sb = osd_sb(osd);
+ struct kstatfs *ksfs;
+ __u64 reserved;
+ int result = 0;
if (unlikely(osd->od_mnt == NULL))
return -EINPROGRESS;
ksfs = &osd_oti_get(env)->oti_ksfs;
}
- spin_lock(&osd->od_osfs_lock);
result = sb->s_op->statfs(sb->s_root, ksfs);
- if (likely(result == 0)) { /* N.B. statfs can't really fail */
- statfs_pack(sfs, ksfs);
- if (unlikely(sb->s_flags & MS_RDONLY))
- sfs->os_state = OS_STATE_READONLY;
- if (LDISKFS_HAS_INCOMPAT_FEATURE(sb,
- LDISKFS_FEATURE_INCOMPAT_EXTENTS))
- sfs->os_maxbytes = sb->s_maxbytes;
- else
- sfs->os_maxbytes = LDISKFS_SB(sb)->s_bitmap_maxbytes;
- }
- spin_unlock(&osd->od_osfs_lock);
+ if (result)
+ goto out;
+
+ statfs_pack(sfs, ksfs);
+ if (unlikely(sb->s_flags & MS_RDONLY))
+ sfs->os_state = OS_STATE_READONLY;
+ if (LDISKFS_HAS_INCOMPAT_FEATURE(sb,
+ LDISKFS_FEATURE_INCOMPAT_EXTENTS))
+ sfs->os_maxbytes = sb->s_maxbytes;
+ else
+ sfs->os_maxbytes = LDISKFS_SB(sb)->s_bitmap_maxbytes;
- if (unlikely(env == NULL))
- OBD_FREE_PTR(ksfs);
+ /*
+ * Reserve some space so to avoid fragmenting the filesystem too much.
+ * Fragmentation not only impacts performance, but can also increase
+ * metadata overhead significantly, causing grant calculation to be
+ * wrong.
+ *
+ * Reserve 0.78% of total space, at least 8MB for small filesystems.
+ */
+ CLASSERT(OSD_STATFS_RESERVED > LDISKFS_MAX_BLOCK_SIZE);
+ reserved = OSD_STATFS_RESERVED >> sb->s_blocksize_bits;
+ if (likely(sfs->os_blocks >= reserved << OSD_STATFS_RESERVED_SHIFT))
+ reserved = sfs->os_blocks >> OSD_STATFS_RESERVED_SHIFT;
- /* Reserve a small amount of space for local objects like last_rcvd,
- * llog, quota files, ... */
- if (sfs->os_bavail <= GRANT_FOR_LOCAL_OIDS) {
- sfs->os_bavail = 0;
- } else {
- sfs->os_bavail -= GRANT_FOR_LOCAL_OIDS;
- /** Take out metadata overhead for indirect blocks */
- sfs->os_bavail -= sfs->os_bavail >> (sb->s_blocksize_bits - 3);
- }
+ sfs->os_blocks -= reserved;
+ sfs->os_bfree -= min(reserved, sfs->os_bfree);
+ sfs->os_bavail -= min(reserved, sfs->os_bavail);
- return result;
+out:
+ if (unlikely(env == NULL))
+ OBD_FREE_PTR(ksfs);
+ return result;
}
/**
*/
param->ddp_max_name_len = LDISKFS_NAME_LEN;
param->ddp_max_nlink = LDISKFS_LINK_MAX;
- param->ddp_block_shift = sb->s_blocksize_bits;
+ param->ddp_symlink_max = sb->s_blocksize;
param->ddp_mount_type = LDD_MT_LDISKFS;
if (LDISKFS_HAS_INCOMPAT_FEATURE(sb, LDISKFS_FEATURE_INCOMPAT_EXTENTS))
param->ddp_maxbytes = sb->s_maxbytes;
else
param->ddp_maxbytes = LDISKFS_SB(sb)->s_bitmap_maxbytes;
- /* Overhead estimate should be fairly accurate, so we really take a tiny
- * error margin which also avoids fragmenting the filesystem too much */
- param->ddp_grant_reserved = 2; /* end up to be 1.9% after conversion */
/* inode are statically allocated, so per-inode space consumption
* is the space consumed by the directory entry */
param->ddp_inodespace = PER_OBJ_USAGE;
- /* per-fragment overhead to be used by the client code */
- param->ddp_grant_frag = 6 * LDISKFS_BLOCK_SIZE(sb);
- param->ddp_mntopts = 0;
+ /* EXT_INIT_MAX_LEN is the theoretical maximum extent size (32k blocks
+ * = 128MB) which is unlikely to be hit in real life. Report a smaller
+ * maximum length to not under count the actual number of extents
+ * needed for writing a file. */
+ param->ddp_max_extent_blks = EXT_INIT_MAX_LEN >> 2;
+ /* worst-case extent insertion metadata overhead */
+ param->ddp_extent_tax = 6 * LDISKFS_BLOCK_SIZE(sb);
+ param->ddp_mntopts = 0;
if (test_opt(sb, XATTR_USER))
param->ddp_mntopts |= MNTOPT_USERXATTR;
if (test_opt(sb, POSIX_ACL))
return t;
}
-
static void osd_inode_getattr(const struct lu_env *env,
struct inode *inode, struct lu_attr *attr)
{
attr->la_blocks = inode->i_blocks;
attr->la_uid = i_uid_read(inode);
attr->la_gid = i_gid_read(inode);
- attr->la_flags = LDISKFS_I(inode)->i_flags;
+ attr->la_flags = ll_inode_to_ext_flags(inode->i_flags);
attr->la_nlink = inode->i_nlink;
attr->la_rdev = inode->i_rdev;
attr->la_blksize = 1 << inode->i_blkbits;
{
struct osd_object *obj = osd_dt_obj(dt);
- if (!dt_object_exists(dt))
+ if (unlikely(!dt_object_exists(dt)))
+ return -ENOENT;
+ if (unlikely(obj->oo_destroyed))
return -ENOENT;
LASSERT(!dt_object_remote(dt));
spin_lock(&obj->oo_guard);
osd_inode_getattr(env, obj->oo_inode, attr);
+ if (obj->oo_lma_flags & LUSTRE_ORPHAN_FL)
+ attr->la_flags |= LUSTRE_ORPHAN_FL;
spin_unlock(&obj->oo_guard);
+
return 0;
}
osd_trans_declare_op(env, oh, OSD_OT_ATTR_SET,
osd_dto_credits_noquota[DTO_ATTR_SET_BASE]);
+ osd_trans_declare_op(env, oh, OSD_OT_XATTR_SET,
+ osd_dto_credits_noquota[DTO_XATTR_SET]);
+
if (attr == NULL || obj->oo_inode == NULL)
RETURN(rc);
spin_lock(&obj->oo_guard);
rc = osd_inode_setattr(env, inode, attr);
spin_unlock(&obj->oo_guard);
+ if (rc != 0)
+ GOTO(out, rc);
- if (!rc)
- ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+ ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+
+ if (!(attr->la_valid & LA_FLAGS))
+ GOTO(out, rc);
+
+ /* Let's check if there are extra flags need to be set into LMA */
+ if (attr->la_flags & LUSTRE_LMA_FL_MASKS) {
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+
+ rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ lma->lma_incompat |=
+ lustre_to_lma_flags(attr->la_flags);
+ lustre_lma_swab(lma);
+ rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA,
+ lma, sizeof(*lma), XATTR_REPLACE);
+ if (rc != 0) {
+ struct osd_device *osd = osd_obj2dev(obj);
+ CWARN("%s: set "DFID" lma flags %u failed: rc = %d\n",
+ osd_name(osd), PFID(lu_object_fid(&dt->do_lu)),
+ lma->lma_incompat, rc);
+ } else {
+ obj->oo_lma_flags =
+ attr->la_flags & LUSTRE_LMA_FL_MASKS;
+ }
+ osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
+ }
+out:
osd_trans_exec_check(env, handle, OSD_OT_ATTR_SET);
return rc;
if (result)
return;
- if (attr->la_valid != 0) {
- result = osd_inode_setattr(info->oti_env, inode, attr);
- /*
- * The osd_inode_setattr() should always succeed here. The
- * only error that could be returned is EDQUOT when we are
- * trying to change the UID or GID of the inode. However, this
- * should not happen since quota enforcement is no longer
- * enabled on ldiskfs (lquota takes care of it).
- */
+ if (attr->la_valid != 0) {
+ result = osd_inode_setattr(info->oti_env, inode, attr);
+ /*
+ * The osd_inode_setattr() should always succeed here. The
+ * only error that could be returned is EDQUOT when we are
+ * trying to change the UID or GID of the inode. However, this
+ * should not happen since quota enforcement is no longer
+ * enabled on ldiskfs (lquota takes care of it).
+ */
LASSERTF(result == 0, "%d\n", result);
ll_dirty_inode(inode, I_DIRTY_DATASYNC);
- }
+ }
- attr->la_valid = valid;
+ attr->la_valid = valid;
}
/**
struct osd_thandle *oh;
int rc = 0;
- if (!dt_object_exists(dt))
+ if (!dt_object_exists(dt) || obj->oo_destroyed)
return -ENOENT;
LINVRNT(osd_invariant(obj));
* \retval 0 on success
* \retval 1 on buffer full
*/
+#ifdef HAVE_FILLDIR_USE_CTX
+static int osd_ldiskfs_filldir(struct dir_context *buf,
+ const char *name, int namelen,
+#else
static int osd_ldiskfs_filldir(void *buf, const char *name, int namelen,
+#endif
loff_t offset, __u64 ino,
unsigned d_type)
{
- struct osd_it_ea *it = ((struct osd_filldir_cbs *)buf)->it;
+ struct osd_it_ea *it =
+ ((struct osd_filldir_cbs *)buf)->it;
struct osd_object *obj = it->oie_obj;
struct osd_it_ea_dirent *ent = it->oie_dirent;
struct lu_fid *fid = &ent->oied_fid;
struct osd_thread_info *info = osd_oti_get(env);
struct lu_fid *fid = &info->oti_fid;
struct inode *inode;
- int rc = 0, force_over_128tb = 0;
+ int rc = 0, force_over_256tb = 0;
ENTRY;
if (o->od_mnt != NULL)
RETURN(-EINVAL);
}
#endif
- if (opts != NULL && strstr(opts, "force_over_128tb") != NULL)
- force_over_128tb = 1;
+ if (opts != NULL && strstr(opts, "force_over_128tb") != NULL) {
+ CWARN("force_over_128tb option is depricated."
+ "Filesystems less then 256TB can be created without any"
+ "force options. Use force_over_256tb option for"
+ "filesystems greather then 256TB.\n");
+ }
+
+ if (opts != NULL && strstr(opts, "force_over_256tb") != NULL)
+ force_over_256tb = 1;
__page = alloc_page(GFP_IOFS);
if (__page == NULL)
"noextents",
/* strip out option we processed in osd */
"bigendian_extents",
- "force_over_128tb",
+#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(3,0,53,0)
+#warning "remove force_over_128 option"
+#else
+ "force_over_128tb (deprecated)",
+#endif
+ "force_over_256tb",
NULL
};
strcat(options, opts);
GOTO(out, rc);
}
- if (ldiskfs_blocks_count(LDISKFS_SB(osd_sb(o))->s_es) > (8ULL << 32) &&
- force_over_128tb == 0) {
+ if (ldiskfs_blocks_count(LDISKFS_SB(osd_sb(o))->s_es) > (64ULL << 30) &&
+ force_over_256tb == 0) {
CERROR("%s: device %s LDISKFS does not support filesystems "
- "greater than 128TB and can cause data corruption. "
- "Use \"force_over_128tb\" mount option to override.\n",
+ "greater than 256TB and can cause data corruption. "
+ "Use \"force_over_256tb\" mount option to override.\n",
name, dev);
GOTO(out, rc = -EINVAL);
}