X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-ldiskfs%2Fosd_handler.c;h=6ae6a6409a23968d069e43197aab241e08689642;hb=72a84970e6d2a2d4b3a35f2ee058511be2fda82e;hp=73d0f02cc30fb786e0be0a4ea80bbbb12195fe0f;hpb=077570483e75e0610fd45149b926097547c434b8;p=fs%2Flustre-release.git diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 73d0f02..6ae6a64 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -423,7 +423,7 @@ struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, */ /* LASSERT(current->journal_info == NULL); */ - inode = ldiskfs_iget(osd_sb(dev), id->oii_ino); + inode = osd_ldiskfs_iget(osd_sb(dev), id->oii_ino); if (IS_ERR(inode)) { CDEBUG(D_INODE, "no inode: ino = %u, rc = %ld\n", id->oii_ino, PTR_ERR(inode)); @@ -559,7 +559,7 @@ static struct inode *osd_iget_check(struct osd_thread_info *info, */ again: - inode = ldiskfs_iget(osd_sb(dev), id->oii_ino); + inode = osd_ldiskfs_iget(osd_sb(dev), id->oii_ino); if (IS_ERR(inode)) { rc = PTR_ERR(inode); if (!trusted && (rc == -ENOENT || rc == -ESTALE)) @@ -1128,6 +1128,12 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj, goto trigger; } + /* -ESTALE is returned if inode of OST object doesn't exist */ + if (result == -ESTALE && + fid_is_on_ost(info, dev, fid, OI_CHECK_FLD)) { + GOTO(out, result = 0); + } + if (result) GOTO(out, result); @@ -1290,6 +1296,19 @@ check_lma: LASSERT(!updated); + /* + * if two OST objects map to the same inode, and inode mode is + * (S_IFREG | S_ISUID | S_ISGID | S_ISVTX | 0666), which means it's + * reserved by precreate, and not written yet, in this case, don't + * set inode for the object whose FID mismatch, so that it can create + * inode and not block precreate. + */ + if (fid_is_on_ost(info, dev, fid, OI_CHECK_FLD) && + inode->i_mode == (S_IFREG | S_ISUID | S_ISGID | S_ISVTX | 0666)) { + obj->oo_inode = NULL; + GOTO(out, result = 0); + } + result = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD); /* * "result == -ENOENT" means the cached OI mapping has been removed @@ -1707,8 +1726,7 @@ static void osd_trans_commit_cb(struct super_block *sb, if (error) CERROR("transaction @0x%p commit error: %d\n", th, error); - dt_txn_hook_commit(th); - + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_DELAY_TRANS, 40); /* call per-transaction callbacks if any */ list_for_each_entry_safe(dcb, tmp, &oh->ot_commit_dcb_list, dcb_linkage) { @@ -1865,7 +1883,7 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d, */ if (last_credits != oh->ot_credits && time_after(jiffies, last_printed + - msecs_to_jiffies(60 * MSEC_PER_SEC)) && + cfs_time_seconds(60)) && osd_transaction_size(dev) > 512) { CWARN("%s: credits %u > trans_max %u\n", osd_name(dev), oh->ot_credits, osd_transaction_size(dev)); @@ -2084,7 +2102,11 @@ static int osd_trans_cb_add(struct thandle *th, struct dt_txn_commit_cb *dcb) static void osd_object_delete(const struct lu_env *env, struct lu_object *l) { struct osd_object *obj = osd_obj(l); + struct qsd_instance *qsd = osd_def_qsd(osd_obj2dev(obj)); struct inode *inode = obj->oo_inode; + __u64 projid; + qid_t uid; + qid_t gid; LINVRNT(osd_invariant(obj)); @@ -2093,27 +2115,36 @@ static void osd_object_delete(const struct lu_env *env, struct lu_object *l) */ osd_index_fini(obj); - if (inode != NULL) { - struct qsd_instance *qsd = osd_def_qsd(osd_obj2dev(obj)); - qid_t uid = i_uid_read(inode); - qid_t gid = i_gid_read(inode); - obj->oo_inode = NULL; - iput(inode); - if (!obj->oo_header && qsd) { - struct osd_thread_info *info = osd_oti_get(env); - struct lquota_id_info *qi = &info->oti_qi; + if (!inode) + return; + + uid = i_uid_read(inode); + gid = i_gid_read(inode); + projid = i_projid_read(inode); - /* Release granted quota to master if necessary */ - qi->lqi_id.qid_uid = uid; - qsd_op_adjust(env, qsd, &qi->lqi_id, USRQUOTA); + obj->oo_inode = NULL; + iput(inode); - qi->lqi_id.qid_uid = gid; - qsd_op_adjust(env, qsd, &qi->lqi_id, GRPQUOTA); + /* do not rebalance quota if the caller needs to release memory + * otherwise qsd_refresh_usage() may went into a new ldiskfs + * transaction and risk to deadlock - LU-12178 */ + if (current->flags & (PF_MEMALLOC | PF_KSWAPD)) + return; - qi->lqi_id.qid_uid = i_projid_read(inode); - qsd_op_adjust(env, qsd, &qi->lqi_id, PRJQUOTA); - } + if (!obj->oo_header && qsd) { + struct osd_thread_info *info = osd_oti_get(env); + struct lquota_id_info *qi = &info->oti_qi; + + /* Release granted quota to master if necessary */ + qi->lqi_id.qid_uid = uid; + qsd_op_adjust(env, qsd, &qi->lqi_id, USRQUOTA); + + qi->lqi_id.qid_uid = gid; + qsd_op_adjust(env, qsd, &qi->lqi_id, GRPQUOTA); + + qi->lqi_id.qid_uid = projid; + qsd_op_adjust(env, qsd, &qi->lqi_id, PRJQUOTA); } } @@ -2159,7 +2190,7 @@ static int osd_object_print(const struct lu_env *env, void *cookie, * Concurrency: shouldn't matter. */ int osd_statfs(const struct lu_env *env, struct dt_device *d, - struct obd_statfs *sfs) + struct obd_statfs *sfs, struct obd_statfs_info *info) { struct osd_device *osd = osd_dt_dev(d); struct super_block *sb = osd_sb(osd); @@ -2184,8 +2215,11 @@ int osd_statfs(const struct lu_env *env, struct dt_device *d, goto out; statfs_pack(sfs, ksfs); - if (unlikely(sb->s_flags & MS_RDONLY)) + if (unlikely(sb->s_flags & SB_RDONLY)) sfs->os_state |= OS_STATE_READONLY; + + sfs->os_state |= osd->od_nonrotational ? OS_STATE_NONROT : 0; + if (ldiskfs_has_feature_extents(sb)) sfs->os_maxbytes = sb->s_maxbytes; else @@ -2255,12 +2289,12 @@ static void osd_conf_get(const struct lu_env *env, */ param->ddp_inodespace = PER_OBJ_USAGE; /* - * EXT_INIT_MAX_LEN is the theoretical maximum extent size (32k blocks - * = 128MB) which is unlikely to be hit in real life. Report a smaller - * maximum length to not under count the actual number of extents - * needed for writing a file. + * EXT_INIT_MAX_LEN is the theoretical maximum extent size (32k blocks + * is 128MB) which is unlikely to be hit in real life. Report a smaller + * maximum length to not under-count the actual number of extents + * needed for writing a file if there are sub-optimal block allocations. */ - param->ddp_max_extent_blks = EXT_INIT_MAX_LEN >> 2; + param->ddp_max_extent_blks = EXT_INIT_MAX_LEN >> 1; /* worst-case extent insertion metadata overhead */ param->ddp_extent_tax = 6 * LDISKFS_BLOCK_SIZE(sb); param->ddp_mntopts = 0; @@ -2285,8 +2319,8 @@ static void osd_conf_get(const struct lu_env *env, #endif param->ddp_max_ea_size = sb->s_blocksize - ea_overhead; - if (param->ddp_max_ea_size > OSD_MAX_EA_SIZE) - param->ddp_max_ea_size = OSD_MAX_EA_SIZE; + if (param->ddp_max_ea_size > OBD_MAX_EA_SIZE) + param->ddp_max_ea_size = OBD_MAX_EA_SIZE; /* * Preferred RPC size for efficient disk IO. 4MB shows good @@ -2348,6 +2382,11 @@ static void osd_conf_get(const struct lu_env *env, } } +static struct super_block *osd_mnt_sb_get(const struct dt_device *d) +{ + return osd_sb(osd_dt_dev(d)); +} + /* * Concurrency: shouldn't matter. */ @@ -2510,6 +2549,7 @@ static const struct dt_device_operations osd_dt_ops = { .dt_trans_stop = osd_trans_stop, .dt_trans_cb_add = osd_trans_cb_add, .dt_conf_get = osd_conf_get, + .dt_mnt_sb_get = osd_mnt_sb_get, .dt_sync = osd_sync, .dt_ro = osd_ro, .dt_commit_async = osd_commit_async, @@ -2581,18 +2621,6 @@ static int osd_write_locked(const struct lu_env *env, struct dt_object *dt) return obj->oo_owner == env; } -static struct timespec *osd_inode_time(const struct lu_env *env, - struct inode *inode, __u64 seconds) -{ - struct osd_thread_info *oti = osd_oti_get(env); - struct timespec *t = &oti->oti_time; - - t->tv_sec = seconds; - t->tv_nsec = 0; - *t = timespec_trunc(*t, inode->i_sb->s_time_gran); - return t; -} - static void osd_inode_getattr(const struct lu_env *env, struct inode *inode, struct lu_attr *attr) { @@ -2808,11 +2836,11 @@ static int osd_inode_setattr(const struct lu_env *env, return 0; if (bits & LA_ATIME) - inode->i_atime = *osd_inode_time(env, inode, attr->la_atime); + inode->i_atime = osd_inode_time(inode, attr->la_atime); if (bits & LA_CTIME) - inode->i_ctime = *osd_inode_time(env, inode, attr->la_ctime); + inode->i_ctime = osd_inode_time(inode, attr->la_ctime); if (bits & LA_MTIME) - inode->i_mtime = *osd_inode_time(env, inode, attr->la_mtime); + inode->i_mtime = osd_inode_time(inode, attr->la_mtime); if (bits & LA_SIZE) { spin_lock(&inode->i_lock); LDISKFS_I(inode)->i_disksize = attr->la_size; @@ -3057,13 +3085,19 @@ static struct dentry *osd_child_dentry_get(const struct lu_env *env, static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj, umode_t mode, struct dt_allocation_hint *hint, - struct thandle *th) + struct thandle *th, struct lu_attr *attr) { int result; struct osd_device *osd = osd_obj2dev(obj); struct osd_thandle *oth; struct dt_object *parent = NULL; struct inode *inode; + uid_t owner[2] = {0, 0}; + + if (attr->la_valid & LA_UID) + owner[0] = attr->la_uid; + if (attr->la_valid & LA_GID) + owner[1] = attr->la_gid; LINVRNT(osd_invariant(obj)); LASSERT(obj->oo_inode == NULL); @@ -3086,7 +3120,7 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj, inode = ldiskfs_create_inode(oth->ot_handle, parent ? osd_dt_obj(parent)->oo_inode : osd_sb(osd)->s_root->d_inode, - mode); + mode, owner); if (!IS_ERR(inode)) { /* Do not update file c/mtime in ldiskfs. */ inode->i_flags |= S_NOCMTIME; @@ -3128,7 +3162,7 @@ static int osd_mkdir(struct osd_thread_info *info, struct osd_object *obj, oth = container_of(th, struct osd_thandle, ot_super); LASSERT(oth->ot_handle->h_transaction != NULL); - result = osd_mkfile(info, obj, mode, hint, th); + result = osd_mkfile(info, obj, mode, hint, th, attr); return result; } @@ -3150,7 +3184,7 @@ static int osd_mk_index(struct osd_thread_info *info, struct osd_object *obj, oth = container_of(th, struct osd_thandle, ot_super); LASSERT(oth->ot_handle->h_transaction != NULL); - result = osd_mkfile(info, obj, mode, hint, th); + result = osd_mkfile(info, obj, mode, hint, th, attr); if (result == 0) { LASSERT(obj->oo_inode != NULL); if (feat->dif_flags & DT_IND_VARKEY) @@ -3177,7 +3211,8 @@ static int osd_mkreg(struct osd_thread_info *info, struct osd_object *obj, { LASSERT(S_ISREG(attr->la_mode)); return osd_mkfile(info, obj, (attr->la_mode & - (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th); + (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th, + attr); } static int osd_mksym(struct osd_thread_info *info, struct osd_object *obj, @@ -3188,7 +3223,8 @@ static int osd_mksym(struct osd_thread_info *info, struct osd_object *obj, { LASSERT(S_ISLNK(attr->la_mode)); return osd_mkfile(info, obj, (attr->la_mode & - (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th); + (S_IFMT | S_IALLUGO | S_ISVTX)), hint, th, + attr); } static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj, @@ -3205,7 +3241,7 @@ static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj, LASSERT(S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)); - result = osd_mkfile(info, obj, mode, hint, th); + result = osd_mkfile(info, obj, mode, hint, th, attr); if (result == 0) { LASSERT(obj->oo_inode != NULL); /* @@ -3375,6 +3411,19 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj, osd_id_gen(id, obj->oo_inode->i_ino, obj->oo_inode->i_generation); rc = osd_oi_insert(info, osd, fid, id, oh->ot_handle, OI_CHECK_FLD, NULL); + if (CFS_FAIL_CHECK(OBD_FAIL_OSD_DUPLICATE_MAP) && osd->od_is_ost) { + struct lu_fid next_fid = *fid; + + /* insert next object in advance, and map to the same inode */ + next_fid.f_oid++; + if (next_fid.f_oid != 0) { + osd_trans_exec_op(env, th, OSD_OT_INSERT); + osd_oi_insert(info, osd, &next_fid, id, oh->ot_handle, + OI_CHECK_FLD, NULL); + osd_trans_exec_check(env, th, OSD_OT_INSERT); + } + } + osd_trans_exec_check(env, th, OSD_OT_INSERT); return rc; @@ -3439,6 +3488,9 @@ static int osd_declare_create(const struct lu_env *env, struct dt_object *dt, */ osd_trans_declare_op(env, oh, OSD_OT_INSERT, osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1); + if (CFS_FAIL_CHECK(OBD_FAIL_OSD_DUPLICATE_MAP)) + osd_trans_declare_op(env, oh, OSD_OT_INSERT, + osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1); /* will help to find FID->ino mapping at dt_insert() */ rc = osd_idc_find_and_init(env, osd_obj2dev(osd_dt_obj(dt)), @@ -3756,7 +3808,8 @@ static struct inode *osd_create_local_agent_inode(const struct lu_env *env, oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle->h_transaction != NULL); - local = ldiskfs_create_inode(oh->ot_handle, pobj->oo_inode, type); + local = ldiskfs_create_inode(oh->ot_handle, pobj->oo_inode, type, + NULL); if (IS_ERR(local)) { CERROR("%s: create local error %d\n", osd_name(osd), (int)PTR_ERR(local)); @@ -4723,7 +4776,7 @@ static int osd_object_sync(const struct lu_env *env, struct dt_object *dt, file->f_op = inode->i_fop; set_file_inode(file, inode); - rc = ll_vfs_fsync_range(file, start, end, 0); + rc = vfs_fsync_range(file, start, end, 0); RETURN(rc); } @@ -7565,7 +7618,7 @@ static int osd_mount(const struct lu_env *env, "force_over_512tb", NULL }; - strcat(options, opts); + strncat(options, opts, PAGE_SIZE); for (rc = 0, str = options; sout[rc]; ) { char *op = strstr(str, sout[rc]); @@ -7585,13 +7638,13 @@ static int osd_mount(const struct lu_env *env, ; } } else { - strncat(options, "user_xattr,acl", 14); + strncat(options, "user_xattr,acl", PAGE_SIZE); } /* Glom up mount options */ if (*options != '\0') - strcat(options, ","); - strlcat(options, "no_mbcache,nodelalloc", PAGE_SIZE); + strncat(options, ",", PAGE_SIZE); + strncat(options, "no_mbcache,nodelalloc", PAGE_SIZE); type = get_fs_type("ldiskfs"); if (!type) { @@ -7730,6 +7783,7 @@ static int osd_device_init0(const struct lu_env *env, o->od_read_cache = 1; o->od_writethrough_cache = 1; o->od_readcache_max_filesize = OSD_MAX_CACHE_SIZE; + o->od_auto_scrub_interval = AS_DEFAULT; cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4), @@ -7751,6 +7805,10 @@ static int osd_device_init0(const struct lu_env *env, if (rc != 0) GOTO(out, rc); + /* Can only check block device after mount */ + o->od_nonrotational = + blk_queue_nonrot(bdev_get_queue(osd_sb(o)->s_bdev)); + rc = osd_obj_map_init(env, o); if (rc != 0) GOTO(out_mnt, rc); @@ -7878,6 +7936,7 @@ static int osd_process_config(const struct lu_env *env, struct lu_device *d, struct lustre_cfg *cfg) { struct osd_device *o = osd_dev(d); + ssize_t count; int rc; ENTRY; @@ -7897,15 +7956,12 @@ static int osd_process_config(const struct lu_env *env, break; case LCFG_PARAM: LASSERT(&o->od_dt_dev); - rc = class_process_proc_param(PARAM_OSD, lprocfs_osd_obd_vars, - cfg, &o->od_dt_dev); - if (rc > 0 || rc == -ENOSYS) { - rc = class_process_proc_param(PARAM_OST, - lprocfs_osd_obd_vars, - cfg, &o->od_dt_dev); - if (rc > 0) - rc = 0; - } + count = class_modify_config(cfg, PARAM_OSD, + &o->od_dt_dev.dd_kobj); + if (count < 0) + count = class_modify_config(cfg, PARAM_OST, + &o->od_dt_dev.dd_kobj); + rc = count > 0 ? 0 : count; break; case LCFG_PRE_CLEANUP: osd_scrub_stop(o); @@ -8091,7 +8147,7 @@ static int osd_health_check(const struct lu_env *env, struct obd_device *obd) struct osd_device *osd = osd_dev(obd->obd_lu_dev); struct super_block *sb = osd_sb(osd); - return (osd->od_mnt == NULL || sb->s_flags & MS_RDONLY); + return (osd->od_mnt == NULL || sb->s_flags & SB_RDONLY); } /* @@ -8105,8 +8161,33 @@ static struct obd_ops osd_obd_device_ops = { .o_health_check = osd_health_check, }; +static ssize_t track_declares_assert_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", ldiskfs_track_declares_assert); +} + +static ssize_t track_declares_assert_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + bool track_declares_assert; + int rc; + + rc = kstrtobool(buffer, &track_declares_assert); + if (rc) + return rc; + + ldiskfs_track_declares_assert = track_declares_assert; + + return count; +} +LUSTRE_RW_ATTR(track_declares_assert); + static int __init osd_init(void) { + struct kobject *kobj; int rc; CLASSERT(BH_DXLock < sizeof(((struct buffer_head *)0)->b_state) * 8); @@ -8127,16 +8208,36 @@ static int __init osd_init(void) (void *)kallsyms_lookup_name("dev_check_rdonly"); #endif - rc = class_register_type(&osd_obd_device_ops, NULL, true, - lprocfs_osd_module_vars, + rc = class_register_type(&osd_obd_device_ops, NULL, true, NULL, LUSTRE_OSD_LDISKFS_NAME, &osd_device_type); - if (rc) + if (rc) { lu_kmem_fini(ldiskfs_caches); + return rc; + } + + kobj = kset_find_obj(lustre_kset, LUSTRE_OSD_LDISKFS_NAME); + if (kobj) { + rc = sysfs_create_file(kobj, + &lustre_attr_track_declares_assert.attr); + kobject_put(kobj); + if (rc) { + CWARN("osd-ldiskfs: track_declares_assert failed to register with sysfs\n"); + rc = 0; + } + } return rc; } static void __exit osd_exit(void) { + struct kobject *kobj; + + kobj = kset_find_obj(lustre_kset, LUSTRE_OSD_LDISKFS_NAME); + if (kobj) { + sysfs_remove_file(kobj, + &lustre_attr_track_declares_assert.attr); + kobject_put(kobj); + } class_unregister_type(LUSTRE_OSD_LDISKFS_NAME); lu_kmem_fini(ldiskfs_caches); }