*/
/* LASSERT(current->journal_info == NULL); */
- inode = ldiskfs_iget(osd_sb(dev), id->oii_ino);
+ inode = osd_ldiskfs_iget(osd_sb(dev), id->oii_ino);
if (IS_ERR(inode)) {
CDEBUG(D_INODE, "no inode: ino = %u, rc = %ld\n",
id->oii_ino, PTR_ERR(inode));
*/
again:
- inode = ldiskfs_iget(osd_sb(dev), id->oii_ino);
+ inode = osd_ldiskfs_iget(osd_sb(dev), id->oii_ino);
if (IS_ERR(inode)) {
rc = PTR_ERR(inode);
if (!trusted && (rc == -ENOENT || rc == -ESTALE))
goto trigger;
}
+ /* -ESTALE is returned if inode of OST object doesn't exist */
+ if (result == -ESTALE &&
+ fid_is_on_ost(info, dev, fid, OI_CHECK_FLD)) {
+ GOTO(out, result = 0);
+ }
+
if (result)
GOTO(out, result);
LASSERT(!updated);
+ /*
+ * if two OST objects map to the same inode, and inode mode is
+ * (S_IFREG | S_ISUID | S_ISGID | S_ISVTX | 0666), which means it's
+ * reserved by precreate, and not written yet, in this case, don't
+ * set inode for the object whose FID mismatch, so that it can create
+ * inode and not block precreate.
+ */
+ if (fid_is_on_ost(info, dev, fid, OI_CHECK_FLD) &&
+ inode->i_mode == (S_IFREG | S_ISUID | S_ISGID | S_ISVTX | 0666)) {
+ obj->oo_inode = NULL;
+ GOTO(out, result = 0);
+ }
+
result = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD);
/*
* "result == -ENOENT" means the cached OI mapping has been removed
if (error)
CERROR("transaction @0x%p commit error: %d\n", th, error);
- dt_txn_hook_commit(th);
-
+ OBD_FAIL_TIMEOUT(OBD_FAIL_OST_DELAY_TRANS, 40);
/* call per-transaction callbacks if any */
list_for_each_entry_safe(dcb, tmp, &oh->ot_commit_dcb_list,
dcb_linkage) {
*/
if (last_credits != oh->ot_credits &&
time_after(jiffies, last_printed +
- msecs_to_jiffies(60 * MSEC_PER_SEC)) &&
+ cfs_time_seconds(60)) &&
osd_transaction_size(dev) > 512) {
CWARN("%s: credits %u > trans_max %u\n", osd_name(dev),
oh->ot_credits, osd_transaction_size(dev));
static void osd_object_delete(const struct lu_env *env, struct lu_object *l)
{
struct osd_object *obj = osd_obj(l);
+ struct qsd_instance *qsd = osd_def_qsd(osd_obj2dev(obj));
struct inode *inode = obj->oo_inode;
+ __u64 projid;
+ qid_t uid;
+ qid_t gid;
LINVRNT(osd_invariant(obj));
*/
osd_index_fini(obj);
- if (inode != NULL) {
- struct qsd_instance *qsd = osd_def_qsd(osd_obj2dev(obj));
- qid_t uid = i_uid_read(inode);
- qid_t gid = i_gid_read(inode);
- obj->oo_inode = NULL;
- iput(inode);
- if (!obj->oo_header && qsd) {
- struct osd_thread_info *info = osd_oti_get(env);
- struct lquota_id_info *qi = &info->oti_qi;
+ if (!inode)
+ return;
+
+ uid = i_uid_read(inode);
+ gid = i_gid_read(inode);
+ projid = i_projid_read(inode);
- /* Release granted quota to master if necessary */
- qi->lqi_id.qid_uid = uid;
- qsd_op_adjust(env, qsd, &qi->lqi_id, USRQUOTA);
+ obj->oo_inode = NULL;
+ iput(inode);
- qi->lqi_id.qid_uid = gid;
- qsd_op_adjust(env, qsd, &qi->lqi_id, GRPQUOTA);
+ /* do not rebalance quota if the caller needs to release memory
+ * otherwise qsd_refresh_usage() may went into a new ldiskfs
+ * transaction and risk to deadlock - LU-12178 */
+ if (current->flags & (PF_MEMALLOC | PF_KSWAPD))
+ return;
- qi->lqi_id.qid_uid = i_projid_read(inode);
- qsd_op_adjust(env, qsd, &qi->lqi_id, PRJQUOTA);
- }
+ if (!obj->oo_header && qsd) {
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lquota_id_info *qi = &info->oti_qi;
+
+ /* Release granted quota to master if necessary */
+ qi->lqi_id.qid_uid = uid;
+ qsd_op_adjust(env, qsd, &qi->lqi_id, USRQUOTA);
+
+ qi->lqi_id.qid_uid = gid;
+ qsd_op_adjust(env, qsd, &qi->lqi_id, GRPQUOTA);
+
+ qi->lqi_id.qid_uid = projid;
+ qsd_op_adjust(env, qsd, &qi->lqi_id, PRJQUOTA);
}
}
* Concurrency: shouldn't matter.
*/
int osd_statfs(const struct lu_env *env, struct dt_device *d,
- struct obd_statfs *sfs)
+ struct obd_statfs *sfs, struct obd_statfs_info *info)
{
struct osd_device *osd = osd_dt_dev(d);
struct super_block *sb = osd_sb(osd);
goto out;
statfs_pack(sfs, ksfs);
- if (unlikely(sb->s_flags & MS_RDONLY))
+ if (unlikely(sb->s_flags & SB_RDONLY))
sfs->os_state |= OS_STATE_READONLY;
+
+ sfs->os_state |= osd->od_nonrotational ? OS_STATE_NONROT : 0;
+
if (ldiskfs_has_feature_extents(sb))
sfs->os_maxbytes = sb->s_maxbytes;
else
*/
param->ddp_inodespace = PER_OBJ_USAGE;
/*
- * EXT_INIT_MAX_LEN is the theoretical maximum extent size (32k blocks
- * = 128MB) which is unlikely to be hit in real life. Report a smaller
- * maximum length to not under count the actual number of extents
- * needed for writing a file.
+ * EXT_INIT_MAX_LEN is the theoretical maximum extent size (32k blocks
+ * is 128MB) which is unlikely to be hit in real life. Report a smaller
+ * maximum length to not under-count the actual number of extents
+ * needed for writing a file if there are sub-optimal block allocations.
*/
- param->ddp_max_extent_blks = EXT_INIT_MAX_LEN >> 2;
+ param->ddp_max_extent_blks = EXT_INIT_MAX_LEN >> 1;
/* worst-case extent insertion metadata overhead */
param->ddp_extent_tax = 6 * LDISKFS_BLOCK_SIZE(sb);
param->ddp_mntopts = 0;
#endif
param->ddp_max_ea_size = sb->s_blocksize - ea_overhead;
- if (param->ddp_max_ea_size > OSD_MAX_EA_SIZE)
- param->ddp_max_ea_size = OSD_MAX_EA_SIZE;
+ if (param->ddp_max_ea_size > OBD_MAX_EA_SIZE)
+ param->ddp_max_ea_size = OBD_MAX_EA_SIZE;
/*
* Preferred RPC size for efficient disk IO. 4MB shows good
}
}
+static struct super_block *osd_mnt_sb_get(const struct dt_device *d)
+{
+ return osd_sb(osd_dt_dev(d));
+}
+
/*
* Concurrency: shouldn't matter.
*/
.dt_trans_stop = osd_trans_stop,
.dt_trans_cb_add = osd_trans_cb_add,
.dt_conf_get = osd_conf_get,
+ .dt_mnt_sb_get = osd_mnt_sb_get,
.dt_sync = osd_sync,
.dt_ro = osd_ro,
.dt_commit_async = osd_commit_async,
return obj->oo_owner == env;
}
-static struct timespec *osd_inode_time(const struct lu_env *env,
- struct inode *inode, __u64 seconds)
-{
- struct osd_thread_info *oti = osd_oti_get(env);
- struct timespec *t = &oti->oti_time;
-
- t->tv_sec = seconds;
- t->tv_nsec = 0;
- *t = timespec_trunc(*t, inode->i_sb->s_time_gran);
- return t;
-}
-
static void osd_inode_getattr(const struct lu_env *env,
struct inode *inode, struct lu_attr *attr)
{
return 0;
if (bits & LA_ATIME)
- inode->i_atime = *osd_inode_time(env, inode, attr->la_atime);
+ inode->i_atime = osd_inode_time(inode, attr->la_atime);
if (bits & LA_CTIME)
- inode->i_ctime = *osd_inode_time(env, inode, attr->la_ctime);
+ inode->i_ctime = osd_inode_time(inode, attr->la_ctime);
if (bits & LA_MTIME)
- inode->i_mtime = *osd_inode_time(env, inode, attr->la_mtime);
+ inode->i_mtime = osd_inode_time(inode, attr->la_mtime);
if (bits & LA_SIZE) {
spin_lock(&inode->i_lock);
LDISKFS_I(inode)->i_disksize = attr->la_size;
static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
umode_t mode, struct dt_allocation_hint *hint,
- struct thandle *th)
+ struct thandle *th, struct lu_attr *attr)
{
int result;
struct osd_device *osd = osd_obj2dev(obj);
struct osd_thandle *oth;
struct dt_object *parent = NULL;
struct inode *inode;
+ uid_t owner[2] = {0, 0};
+
+ if (attr->la_valid & LA_UID)
+ owner[0] = attr->la_uid;
+ if (attr->la_valid & LA_GID)
+ owner[1] = attr->la_gid;
LINVRNT(osd_invariant(obj));
LASSERT(obj->oo_inode == NULL);
inode = ldiskfs_create_inode(oth->ot_handle,
parent ? osd_dt_obj(parent)->oo_inode :
osd_sb(osd)->s_root->d_inode,
- mode);
+ mode, owner);
if (!IS_ERR(inode)) {
/* Do not update file c/mtime in ldiskfs. */
inode->i_flags |= S_NOCMTIME;
oth = container_of(th, struct osd_thandle, ot_super);
LASSERT(oth->ot_handle->h_transaction != NULL);
- result = osd_mkfile(info, obj, mode, hint, th);
+ result = osd_mkfile(info, obj, mode, hint, th, attr);
return result;
}
oth = container_of(th, struct osd_thandle, ot_super);
LASSERT(oth->ot_handle->h_transaction != NULL);
- result = osd_mkfile(info, obj, mode, hint, th);
+ result = osd_mkfile(info, obj, mode, hint, th, attr);
if (result == 0) {
LASSERT(obj->oo_inode != NULL);
if (feat->dif_flags & DT_IND_VARKEY)
{
LASSERT(S_ISREG(attr->la_mode));
return osd_mkfile(info, obj, (attr->la_mode &
- (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th);
+ (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th,
+ attr);
}
static int osd_mksym(struct osd_thread_info *info, struct osd_object *obj,
{
LASSERT(S_ISLNK(attr->la_mode));
return osd_mkfile(info, obj, (attr->la_mode &
- (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th);
+ (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th,
+ attr);
}
static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj,
LASSERT(S_ISCHR(mode) || S_ISBLK(mode) ||
S_ISFIFO(mode) || S_ISSOCK(mode));
- result = osd_mkfile(info, obj, mode, hint, th);
+ result = osd_mkfile(info, obj, mode, hint, th, attr);
if (result == 0) {
LASSERT(obj->oo_inode != NULL);
/*
osd_id_gen(id, obj->oo_inode->i_ino, obj->oo_inode->i_generation);
rc = osd_oi_insert(info, osd, fid, id, oh->ot_handle,
OI_CHECK_FLD, NULL);
+ if (CFS_FAIL_CHECK(OBD_FAIL_OSD_DUPLICATE_MAP) && osd->od_is_ost) {
+ struct lu_fid next_fid = *fid;
+
+ /* insert next object in advance, and map to the same inode */
+ next_fid.f_oid++;
+ if (next_fid.f_oid != 0) {
+ osd_trans_exec_op(env, th, OSD_OT_INSERT);
+ osd_oi_insert(info, osd, &next_fid, id, oh->ot_handle,
+ OI_CHECK_FLD, NULL);
+ osd_trans_exec_check(env, th, OSD_OT_INSERT);
+ }
+ }
+
osd_trans_exec_check(env, th, OSD_OT_INSERT);
return rc;
*/
osd_trans_declare_op(env, oh, OSD_OT_INSERT,
osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
+ if (CFS_FAIL_CHECK(OBD_FAIL_OSD_DUPLICATE_MAP))
+ osd_trans_declare_op(env, oh, OSD_OT_INSERT,
+ osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
/* will help to find FID->ino mapping at dt_insert() */
rc = osd_idc_find_and_init(env, osd_obj2dev(osd_dt_obj(dt)),
oh = container_of(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle->h_transaction != NULL);
- local = ldiskfs_create_inode(oh->ot_handle, pobj->oo_inode, type);
+ local = ldiskfs_create_inode(oh->ot_handle, pobj->oo_inode, type,
+ NULL);
if (IS_ERR(local)) {
CERROR("%s: create local error %d\n", osd_name(osd),
(int)PTR_ERR(local));
file->f_op = inode->i_fop;
set_file_inode(file, inode);
- rc = ll_vfs_fsync_range(file, start, end, 0);
+ rc = vfs_fsync_range(file, start, end, 0);
RETURN(rc);
}
"force_over_512tb",
NULL
};
- strcat(options, opts);
+ strncat(options, opts, PAGE_SIZE);
for (rc = 0, str = options; sout[rc]; ) {
char *op = strstr(str, sout[rc]);
;
}
} else {
- strncat(options, "user_xattr,acl", 14);
+ strncat(options, "user_xattr,acl", PAGE_SIZE);
}
/* Glom up mount options */
if (*options != '\0')
- strcat(options, ",");
- strlcat(options, "no_mbcache,nodelalloc", PAGE_SIZE);
+ strncat(options, ",", PAGE_SIZE);
+ strncat(options, "no_mbcache,nodelalloc", PAGE_SIZE);
type = get_fs_type("ldiskfs");
if (!type) {
o->od_read_cache = 1;
o->od_writethrough_cache = 1;
o->od_readcache_max_filesize = OSD_MAX_CACHE_SIZE;
+
o->od_auto_scrub_interval = AS_DEFAULT;
cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4),
if (rc != 0)
GOTO(out, rc);
+ /* Can only check block device after mount */
+ o->od_nonrotational =
+ blk_queue_nonrot(bdev_get_queue(osd_sb(o)->s_bdev));
+
rc = osd_obj_map_init(env, o);
if (rc != 0)
GOTO(out_mnt, rc);
struct lu_device *d, struct lustre_cfg *cfg)
{
struct osd_device *o = osd_dev(d);
+ ssize_t count;
int rc;
ENTRY;
break;
case LCFG_PARAM:
LASSERT(&o->od_dt_dev);
- rc = class_process_proc_param(PARAM_OSD, lprocfs_osd_obd_vars,
- cfg, &o->od_dt_dev);
- if (rc > 0 || rc == -ENOSYS) {
- rc = class_process_proc_param(PARAM_OST,
- lprocfs_osd_obd_vars,
- cfg, &o->od_dt_dev);
- if (rc > 0)
- rc = 0;
- }
+ count = class_modify_config(cfg, PARAM_OSD,
+ &o->od_dt_dev.dd_kobj);
+ if (count < 0)
+ count = class_modify_config(cfg, PARAM_OST,
+ &o->od_dt_dev.dd_kobj);
+ rc = count > 0 ? 0 : count;
break;
case LCFG_PRE_CLEANUP:
osd_scrub_stop(o);
struct osd_device *osd = osd_dev(obd->obd_lu_dev);
struct super_block *sb = osd_sb(osd);
- return (osd->od_mnt == NULL || sb->s_flags & MS_RDONLY);
+ return (osd->od_mnt == NULL || sb->s_flags & SB_RDONLY);
}
/*
.o_health_check = osd_health_check,
};
+static ssize_t track_declares_assert_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", ldiskfs_track_declares_assert);
+}
+
+static ssize_t track_declares_assert_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ bool track_declares_assert;
+ int rc;
+
+ rc = kstrtobool(buffer, &track_declares_assert);
+ if (rc)
+ return rc;
+
+ ldiskfs_track_declares_assert = track_declares_assert;
+
+ return count;
+}
+LUSTRE_RW_ATTR(track_declares_assert);
+
static int __init osd_init(void)
{
+ struct kobject *kobj;
int rc;
CLASSERT(BH_DXLock < sizeof(((struct buffer_head *)0)->b_state) * 8);
(void *)kallsyms_lookup_name("dev_check_rdonly");
#endif
- rc = class_register_type(&osd_obd_device_ops, NULL, true,
- lprocfs_osd_module_vars,
+ rc = class_register_type(&osd_obd_device_ops, NULL, true, NULL,
LUSTRE_OSD_LDISKFS_NAME, &osd_device_type);
- if (rc)
+ if (rc) {
lu_kmem_fini(ldiskfs_caches);
+ return rc;
+ }
+
+ kobj = kset_find_obj(lustre_kset, LUSTRE_OSD_LDISKFS_NAME);
+ if (kobj) {
+ rc = sysfs_create_file(kobj,
+ &lustre_attr_track_declares_assert.attr);
+ kobject_put(kobj);
+ if (rc) {
+ CWARN("osd-ldiskfs: track_declares_assert failed to register with sysfs\n");
+ rc = 0;
+ }
+ }
return rc;
}
static void __exit osd_exit(void)
{
+ struct kobject *kobj;
+
+ kobj = kset_find_obj(lustre_kset, LUSTRE_OSD_LDISKFS_NAME);
+ if (kobj) {
+ sysfs_remove_file(kobj,
+ &lustre_attr_track_declares_assert.attr);
+ kobject_put(kobj);
+ }
class_unregister_type(LUSTRE_OSD_LDISKFS_NAME);
lu_kmem_fini(ldiskfs_caches);
}