X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-ldiskfs%2Fosd_handler.c;h=de725c67f0db249b6817701baa400cf04d92bdd1;hb=47b457491889278ef17f7dc4c72435e313829bdd;hp=f3ea4873d7a97084444e68037ac0125666acd455;hpb=8bd064273746bc51826af4a795be79a5cedef265;p=fs%2Flustre-release.git diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index f3ea487..de725c6 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -43,9 +41,6 @@ * Pravin Shelar : Added fid in dirent */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_MDS #include @@ -248,43 +243,101 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env, } } +static int osd_get_lma(struct inode *inode, struct dentry *dentry, + struct lustre_mdt_attrs *lma) +{ + int rc; + + dentry->d_inode = inode; + rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, (void *)lma, + sizeof(*lma)); + if (rc > 0) { + /* Check LMA compatibility */ + if (lma->lma_incompat & ~cpu_to_le32(LMA_INCOMPAT_SUPP)) { + CWARN("%.16s: unsupported incompat LMA feature(s) " + "%lx/%#x\n", + LDISKFS_SB(inode->i_sb)->s_es->s_volume_name, + inode->i_ino, le32_to_cpu(lma->lma_incompat) & + ~LMA_INCOMPAT_SUPP); + rc = -ENOSYS; + } else { + lustre_lma_swab(lma); + rc = 0; + } + } else if (rc == 0) { + rc = -ENODATA; + } + + return rc; +} + /* * retrieve object from backend ext fs. **/ -struct inode *osd_iget(struct osd_thread_info *info, - struct osd_device *dev, - const struct osd_inode_id *id) -{ - struct inode *inode = NULL; - - inode = ldiskfs_iget(osd_sb(dev), id->oii_ino); - if (IS_ERR(inode)) { - CERROR("Cannot get inode, rc = %li\n", PTR_ERR(inode)); - } else if (id->oii_gen != OSD_OII_NOGEN && - inode->i_generation != id->oii_gen) { - iput(inode); - inode = ERR_PTR(-ESTALE); - } else if (inode->i_nlink == 0) { - /* due to parallel readdir and unlink, - * we can have dead inode here. */ - CWARN("stale inode\n"); - make_bad_inode(inode); - iput(inode); - inode = ERR_PTR(-ESTALE); - } else if (is_bad_inode(inode)) { - CERROR("bad inode %lx\n",inode->i_ino); - iput(inode); - inode = ERR_PTR(-ENOENT); - } else { - /* Do not update file c/mtime in ldiskfs. - * NB: we don't have any lock to protect this because we don't - * have reference on osd_object now, but contention with - * another lookup + attr_set can't happen in the tiny window - * between if (...) and set S_NOCMTIME. */ - if (!(inode->i_flags & S_NOCMTIME)) - inode->i_flags |= S_NOCMTIME; - } - return inode; +struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, + struct osd_inode_id *id) +{ + struct inode *inode = NULL; + + inode = ldiskfs_iget(osd_sb(dev), id->oii_ino); + if (IS_ERR(inode)) { + CDEBUG(D_INODE, "no inode: ino = %u, rc = %ld\n", + id->oii_ino, PTR_ERR(inode)); + } else if (id->oii_gen != OSD_OII_NOGEN && + inode->i_generation != id->oii_gen) { + CDEBUG(D_INODE, "unmatched inode: ino = %u, gen0 = %u, " + "gen1 = %u\n", + id->oii_ino, id->oii_gen, inode->i_generation); + iput(inode); + inode = ERR_PTR(-ESTALE); + } else if (inode->i_nlink == 0) { + /* due to parallel readdir and unlink, + * we can have dead inode here. */ + CDEBUG(D_INODE, "stale inode: ino = %u\n", id->oii_ino); + make_bad_inode(inode); + iput(inode); + inode = ERR_PTR(-ESTALE); + } else if (is_bad_inode(inode)) { + CWARN("%s: bad inode: ino = %u\n", + dev->od_dt_dev.dd_lu_dev.ld_obd->obd_name, id->oii_ino); + iput(inode); + inode = ERR_PTR(-ENOENT); + } else { + if (id->oii_gen == OSD_OII_NOGEN) + osd_id_gen(id, inode->i_ino, inode->i_generation); + + /* Do not update file c/mtime in ldiskfs. + * NB: we don't have any lock to protect this because we don't + * have reference on osd_object now, but contention with + * another lookup + attr_set can't happen in the tiny window + * between if (...) and set S_NOCMTIME. */ + if (!(inode->i_flags & S_NOCMTIME)) + inode->i_flags |= S_NOCMTIME; + } + return inode; +} + +struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev, + struct osd_inode_id *id, struct lu_fid *fid) +{ + struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; + struct inode *inode; + int rc; + + inode = osd_iget(info, dev, id); + if (IS_ERR(inode)) + return inode; + + rc = osd_get_lma(inode, &info->oti_obj_dentry, lma); + if (rc == 0) { + *fid = lma->lma_self_fid; + } else if (rc == -ENODATA) { + LU_IGIF_BUILD(fid, inode->i_ino, inode->i_generation); + } else { + iput(inode); + inode = ERR_PTR(rc); + } + return inode; } static int osd_fid_lookup(const struct lu_env *env, @@ -345,18 +398,20 @@ static int osd_fid_lookup(const struct lu_env *env, } if (!S_ISDIR(inode->i_mode) || !ldiskfs_pdo) /* done */ - goto out; + GOTO(out, result = 0); + + LASSERT(obj->oo_hl_head == NULL); + obj->oo_hl_head = ldiskfs_htree_lock_head_alloc(HTREE_HBITS_DEF); + if (obj->oo_hl_head == NULL) { + obj->oo_inode = NULL; + iput(inode); + GOTO(out, result = -ENOMEM); + } + GOTO(out, result = 0); - LASSERT(obj->oo_hl_head == NULL); - obj->oo_hl_head = ldiskfs_htree_lock_head_alloc(HTREE_HBITS_DEF); - if (obj->oo_hl_head == NULL) { - obj->oo_inode = NULL; - iput(inode); - result = -ENOMEM; - } out: - LINVRNT(osd_invariant(obj)); - RETURN(result); + LINVRNT(osd_invariant(obj)); + return result; } /* @@ -556,9 +611,11 @@ static void osd_trans_commit_cb(struct journal_callback *jcb, int error) dt_txn_hook_commit(th); - /* call per-transaction callbacks if any */ - cfs_list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage) - dcb->dcb_func(NULL, th, dcb, error); + /* call per-transaction callbacks if any */ + cfs_list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage) { + cfs_list_del_init(&dcb->dcb_linkage); + dcb->dcb_func(NULL, th, dcb, error); + } lu_ref_del_at(&lud->ld_reference, oh->ot_dev_link, "osd-tx", th); lu_device_put(lud); @@ -873,7 +930,7 @@ static void osd_conf_get(const struct lu_env *env, */ param->ddp_max_name_len = LDISKFS_NAME_LEN; param->ddp_max_nlink = LDISKFS_LINK_MAX; - param->ddp_block_shift = osd_sb(osd_dt_dev(dev))->s_blocksize_bits; + param->ddp_block_shift = sb->s_blocksize_bits; param->ddp_mntopts = 0; if (test_opt(sb, XATTR_USER)) param->ddp_mntopts |= MNTOPT_USERXATTR; @@ -1000,7 +1057,7 @@ const int osd_dto_credits_noquota[DTO_NR] = { [DTO_INDEX_INSERT] = 16, [DTO_INDEX_DELETE] = 16, /** - * Unused now + * Used for OI scrub */ [DTO_INDEX_UPDATE] = 16, /** @@ -1484,6 +1541,7 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj, * NB: don't need any lock because no contention at this * early stage */ inode->i_flags |= S_NOCMTIME; + inode->i_state |= I_LUSTRE_NOSCRUB; obj->oo_inode = inode; result = 0; } else { @@ -1697,11 +1755,8 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj, LASSERT(obj->oo_inode != NULL); LASSERT(uc != NULL); - id->oii_ino = obj->oo_inode->i_ino; - id->oii_gen = obj->oo_inode->i_generation; - - return osd_oi_insert(info, osd, fid, id, th, - uc->mu_cap & CFS_CAP_SYS_RESOURCE_MASK); + osd_id_gen(id, obj->oo_inode->i_ino, obj->oo_inode->i_generation); + return osd_oi_insert(info, osd, fid, id, th); } static int osd_declare_object_create(const struct lu_env *env, @@ -1820,6 +1875,9 @@ static int osd_object_destroy(const struct lu_env *env, LASSERT(inode); LASSERT(!lu_object_is_dying(dt->do_lu.lo_header)); + /* Parallel control for OI scrub. For most of cases, there is no + * lock contention. So it will not affect unlink performance. */ + cfs_mutex_lock(&inode->i_mutex); if (S_ISDIR(inode->i_mode)) { LASSERT(osd_inode_unlinked(inode) || inode->i_nlink == 1); @@ -1834,6 +1892,7 @@ static int osd_object_destroy(const struct lu_env *env, OSD_EXEC_OP(th, destroy); result = osd_oi_delete(osd_oti_get(env), osd, fid, th); + cfs_mutex_unlock(&inode->i_mutex); /* XXX: add to ext3 orphan list */ /* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */ @@ -1859,7 +1918,6 @@ static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt, LASSERT(dt_object_exists(dt)); LASSERT(inode->i_op != NULL && inode->i_op->setxattr != NULL); - LASSERT(osd_write_locked(env, obj)); if (fl & LU_XATTR_REPLACE) fs_flags |= XATTR_REPLACE; @@ -1898,15 +1956,6 @@ static int osd_ea_fid_set(const struct lu_env *env, struct dt_object *dt, } /** - * Helper function to form igif - */ -static inline void osd_igif_get(const struct lu_env *env, struct inode *inode, - struct lu_fid *fid) -{ - LU_IGIF_BUILD(fid, inode->i_ino, inode->i_generation); -} - -/** * ldiskfs supports fid in dirent, it is passed in dentry->d_fsdata. * lustre 1.8 also uses d_fsdata for passing other info to ldiskfs. * To have compatilibility with 1.8 ldiskfs driver we need to have @@ -1933,54 +1982,20 @@ void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param, * \retval 0 on success */ static int osd_ea_fid_get(const struct lu_env *env, struct osd_object *obj, - __u32 ino, struct lu_fid *fid) + __u32 ino, struct lu_fid *fid) { - struct osd_thread_info *info = osd_oti_get(env); - struct lustre_mdt_attrs *mdt_attrs = &info->oti_mdt_attrs; - struct lu_device *ldev = obj->oo_dt.do_lu.lo_dev; - struct dentry *dentry = &info->oti_child_dentry; - struct osd_inode_id *id = &info->oti_id; - struct osd_device *dev; - struct inode *inode; - int rc; - - ENTRY; - dev = osd_dev(ldev); - - id->oii_ino = ino; - id->oii_gen = OSD_OII_NOGEN; + struct osd_thread_info *info = osd_oti_get(env); + struct osd_inode_id *id = &info->oti_id; + struct inode *inode; + ENTRY; - inode = osd_iget(info, dev, id); - if (IS_ERR(inode)) { - rc = PTR_ERR(inode); - GOTO(out,rc); - } - dentry->d_inode = inode; - - LASSERT(inode->i_op != NULL && inode->i_op->getxattr != NULL); - rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, (void *)mdt_attrs, - sizeof *mdt_attrs); - - /* Check LMA compatibility */ - if (rc > 0 && - (mdt_attrs->lma_incompat & ~cpu_to_le32(LMA_INCOMPAT_SUPP))) { - CWARN("Inode %lx: Unsupported incompat LMA feature(s) %#x\n", - inode->i_ino, le32_to_cpu(mdt_attrs->lma_incompat) & - ~LMA_INCOMPAT_SUPP); - return -ENOSYS; - } + osd_id_gen(id, ino, OSD_OII_NOGEN); + inode = osd_iget_fid(info, osd_obj2dev(obj), id, fid); + if (IS_ERR(inode)) + RETURN(PTR_ERR(inode)); - if (rc > 0) { - lustre_lma_swab(mdt_attrs); - memcpy(fid, &mdt_attrs->lma_self_fid, sizeof(*fid)); - rc = 0; - } else if (rc == -ENODATA) { - osd_igif_get(env, inode, fid); - rc = 0; - } - iput(inode); -out: - RETURN(rc); + iput(inode); + RETURN(0); } /** @@ -2075,7 +2090,7 @@ static int osd_object_ref_add(const struct lu_env *env, inode->i_nlink == 2) inode->i_nlink = 1; } - LASSERT(inode->i_nlink < LDISKFS_LINK_MAX); + LASSERT(inode->i_nlink <= LDISKFS_LINK_MAX); cfs_spin_unlock(&obj->oo_guard); inode->i_sb->s_op->dirty_inode(inode); LINVRNT(osd_invariant(obj)); @@ -2914,7 +2929,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt, LASSERT(th != NULL); if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_INSERT)) - return -EACCES; + RETURN(-EACCES); OSD_EXEC_OP(th, insert); @@ -3176,7 +3191,7 @@ struct osd_object *osd_object_find(const struct lu_env *env, else LU_OBJECT_DEBUG(D_ERROR, env, luch, "lu_object can't be located" - ""DFID"\n", PFID(fid)); + DFID"\n", PFID(fid)); if (child == NULL) { lu_object_put(env, luch); @@ -3187,6 +3202,7 @@ struct osd_object *osd_object_find(const struct lu_env *env, LU_OBJECT_DEBUG(D_ERROR, env, luch, "lu_object does not exists "DFID"\n", PFID(fid)); + lu_object_put(env, luch); child = ERR_PTR(-ENOENT); } } else @@ -3977,7 +3993,7 @@ static void osd_key_exit(const struct lu_context *ctx, LU_TYPE_INIT_FINI(osd, &osd_key); struct lu_context_key osd_key = { - .lct_tags = LCT_DT_THREAD | LCT_MD_THREAD, + .lct_tags = LCT_DT_THREAD | LCT_MD_THREAD | LCT_MG_THREAD | LCT_LOCAL, .lct_init = osd_key_init, .lct_fini = osd_key_fini, .lct_exit = osd_key_exit @@ -3992,19 +4008,16 @@ static int osd_device_init(const struct lu_env *env, struct lu_device *d, static int osd_shutdown(const struct lu_env *env, struct osd_device *o) { - struct osd_thread_info *info = osd_oti_get(env); + ENTRY; - ENTRY; + osd_scrub_cleanup(env, o); - if (o->od_oi_table != NULL) - osd_oi_fini(info, o); + if (o->od_fsops) { + fsfilt_put_ops(o->od_fsops); + o->od_fsops = NULL; + } - if (o->od_fsops) { - fsfilt_put_ops(o->od_fsops); - o->od_fsops = NULL; - } - - RETURN(0); + RETURN(0); } static int osd_mount(const struct lu_env *env, @@ -4101,6 +4114,7 @@ static struct lu_device *osd_device_alloc(const struct lu_env *env, l->ld_ops = &osd_lu_ops; o->od_dt_dev.dd_ops = &osd_dt_ops; cfs_spin_lock_init(&o->od_osfs_lock); + cfs_mutex_init(&o->od_otable_mutex); o->od_osfs_age = cfs_time_shift_64(-1000); o->od_capa_hash = init_capa_hash(); if (o->od_capa_hash == NULL) { @@ -4159,14 +4173,12 @@ static int osd_recovery_complete(const struct lu_env *env, static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, struct lu_device *dev) { - struct osd_device *osd = osd_dev(dev); - struct osd_thread_info *oti = osd_oti_get(env); - int result; - - ENTRY; + struct osd_device *osd = osd_dev(dev); + int result; + ENTRY; - /* 1. initialize oi before any file create or file open */ - result = osd_oi_init(oti, osd); + /* 1. setup scrub, including OI files initialization */ + result = osd_scrub_setup(env, osd); if (result < 0) RETURN(result); @@ -4212,7 +4224,7 @@ static struct lu_device_type osd_device_type = { .ldt_tags = LU_DEVICE_DT, .ldt_name = LUSTRE_OSD_NAME, .ldt_ops = &osd_device_type_ops, - .ldt_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD + .ldt_ctx_tags = LCT_LOCAL, }; /*