X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-ldiskfs%2Fosd_oi.c;h=cd1db06d302de1a16f35b3290e3ffc1b686b7745;hp=0918c75be6b09ca69ab9074ccdbeb2935a1fe6c7;hb=2272a301f5e365c0ca02b5c939934c1051d910d3;hpb=4980567857699c7f902ebda336ea98fdc4b83100 diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c index 0918c75..cd1db06 100644 --- a/lustre/osd-ldiskfs/osd_oi.c +++ b/lustre/osd-ldiskfs/osd_oi.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -29,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, Whamcloud, Inc. + * Copyright (c) 2012, 2016, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -42,19 +36,7 @@ * Author: Nikita Danilov */ -/* - * oi uses two mechanisms to implement fid->cookie mapping: - * - * - persistent index, where cookie is a record and fid is a key, and - * - * - algorithmic mapping for "igif" fids. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_MDS +#define DEBUG_SUBSYSTEM S_OSD #include @@ -69,23 +51,19 @@ /* fid_cpu_to_be() */ #include +#include #include "osd_oi.h" /* osd_lookup(), struct osd_thread_info */ #include "osd_internal.h" -#include "osd_igif.h" -#include "dt_object.h" - -#define OSD_OI_FID_NR (1UL << OSD_OI_FID_OID_BITS) -#define OSD_OI_FID_NR_MAX (1UL << OSD_OI_FID_OID_BITS_MAX) +#include "osd_scrub.h" static unsigned int osd_oi_count = OSD_OI_FID_NR; -CFS_MODULE_PARM(osd_oi_count, "i", int, 0444, - "Number of Object Index containers to be created, " - "it's only valid for new filesystem."); +module_param(osd_oi_count, int, 0444); +MODULE_PARM_DESC(osd_oi_count, "Number of Object Index containers to be created, it's only valid for new filesystem."); /** to serialize concurrent OI index initialization */ -static cfs_mutex_t oi_init_lock; +static struct mutex oi_init_lock; static struct dt_index_features oi_feat = { .dif_flags = DT_IND_UPDATE, @@ -97,81 +75,81 @@ static struct dt_index_features oi_feat = { #define OSD_OI_NAME_BASE "oi.16" static void osd_oi_table_put(struct osd_thread_info *info, - struct osd_oi **oi_table, unsigned oi_count) + struct osd_oi **oi_table, unsigned oi_count) { - struct iam_container *bag; - int i; - - for (i = 0; i < oi_count; i++) { - LASSERT(oi_table[i] != NULL); - LASSERT(oi_table[i]->oi_inode != NULL); - - bag = &(oi_table[i]->oi_dir.od_container); - if (bag->ic_object == oi_table[i]->oi_inode) - iam_container_fini(bag); - iput(oi_table[i]->oi_inode); - oi_table[i]->oi_inode = NULL; - OBD_FREE_PTR(oi_table[i]); - } + struct iam_container *bag; + int i; + + for (i = 0; i < oi_count; i++) { + if (oi_table[i] == NULL) + continue; + + LASSERT(oi_table[i]->oi_inode != NULL); + + bag = &(oi_table[i]->oi_dir.od_container); + if (bag->ic_object == oi_table[i]->oi_inode) + iam_container_fini(bag); + iput(oi_table[i]->oi_inode); + oi_table[i]->oi_inode = NULL; + OBD_FREE_PTR(oi_table[i]); + oi_table[i] = NULL; + } } static int osd_oi_index_create_one(struct osd_thread_info *info, - struct osd_device *osd, const char *name, - struct dt_index_features *feat) + struct osd_device *osd, const char *name, + struct dt_index_features *feat) { - const struct lu_env *env = info->oti_env; - struct osd_inode_id *id = &info->oti_id; - struct buffer_head *bh; - struct inode *inode; - struct ldiskfs_dir_entry_2 *de; - struct dentry *dentry; - struct inode *dir; - handle_t *jh; - int rc; - - dentry = osd_child_dentry_by_inode(env, osd_sb(osd)->s_root->d_inode, - name, strlen(name)); - dir = osd_sb(osd)->s_root->d_inode; - bh = osd_ldiskfs_find_entry(dir, dentry, &de, NULL); - if (bh) { - brelse(bh); - - id->oii_ino = le32_to_cpu(de->inode); - id->oii_gen = OSD_OII_NOGEN; - - inode = osd_iget(info, osd, id); - if (!IS_ERR(inode)) { - iput(inode); - RETURN(-EEXIST); - } - RETURN(PTR_ERR(inode)); - } - - jh = ldiskfs_journal_start_sb(osd_sb(osd), 100); - LASSERT(!IS_ERR(jh)); - - inode = ldiskfs_create_inode(jh, osd_sb(osd)->s_root->d_inode, - (S_IFREG | S_IRUGO | S_IWUSR)); - LASSERT(!IS_ERR(inode)); - - if (feat->dif_flags & DT_IND_VARKEY) - rc = iam_lvar_create(inode, feat->dif_keysize_max, - feat->dif_ptrsize, feat->dif_recsize_max, - jh); - else - rc = iam_lfix_create(inode, feat->dif_keysize_max, - feat->dif_ptrsize, feat->dif_recsize_max, - jh); - - dentry = osd_child_dentry_by_inode(env, osd_sb(osd)->s_root->d_inode, - name, strlen(name)); - rc = osd_ldiskfs_add_entry(jh, dentry, inode, NULL); - LASSERT(rc == 0); - - ldiskfs_journal_stop(jh); - iput(inode); - - return rc; + const struct lu_env *env = info->oti_env; + struct osd_inode_id *id = &info->oti_id; + struct buffer_head *bh; + struct inode *inode; + struct ldiskfs_dir_entry_2 *de; + struct dentry *dentry; + struct super_block *sb = osd_sb(osd); + struct inode *dir = sb->s_root->d_inode; + handle_t *jh; + int rc; + + dentry = osd_child_dentry_by_inode(env, dir, name, strlen(name)); + bh = osd_ldiskfs_find_entry(dir, &dentry->d_name, &de, NULL, NULL); + if (!IS_ERR(bh)) { + osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN); + brelse(bh); + inode = osd_iget(info, osd, id); + if (!IS_ERR(inode)) { + iput(inode); + inode = ERR_PTR(-EEXIST); + } + return PTR_ERR(inode); + } + + jh = osd_journal_start_sb(sb, LDISKFS_HT_MISC, 100); + if (IS_ERR(jh)) + return PTR_ERR(jh); + + inode = ldiskfs_create_inode(jh, dir, (S_IFREG | S_IRUGO | S_IWUSR)); + if (IS_ERR(inode)) { + ldiskfs_journal_stop(jh); + return PTR_ERR(inode); + } + + ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB); + unlock_new_inode(inode); + + if (feat->dif_flags & DT_IND_VARKEY) + rc = iam_lvar_create(inode, feat->dif_keysize_max, + feat->dif_ptrsize, feat->dif_recsize_max, + jh); + else + rc = iam_lfix_create(inode, feat->dif_keysize_max, + feat->dif_ptrsize, feat->dif_recsize_max, + jh); + dentry = osd_child_dentry_by_inode(env, dir, name, strlen(name)); + rc = osd_ldiskfs_add_entry(info, osd, jh, dentry, inode, NULL); + ldiskfs_journal_stop(jh); + iput(inode); + return rc; } static struct inode *osd_oi_index_open(struct osd_thread_info *info, @@ -204,7 +182,7 @@ static struct inode *osd_oi_index_open(struct osd_thread_info *info, rc = osd_oi_index_create_one(info, osd, name, f); if (rc) - RETURN(ERR_PTR(rc)); + return ERR_PTR(rc); dentry = ll_lookup_one_len(name, osd_sb(osd)->s_root, strlen(name)); if (IS_ERR(dentry)) @@ -248,6 +226,13 @@ static int osd_oi_open(struct osd_thread_info *info, struct osd_device *osd, if (IS_ERR(inode)) RETURN(PTR_ERR(inode)); + /* 'What the @fid is' is not imporatant, because these objects + * have no OI mappings, and only are visible inside the OSD.*/ + lu_igif_build(&info->oti_fid, inode->i_ino, inode->i_generation); + rc = osd_ea_fid_set(info, inode, &info->oti_fid, LMAC_NOT_IN_OI, 0); + if (rc != 0) + GOTO(out_inode, rc); + OBD_ALLOC_PTR(oi); if (oi == NULL) GOTO(out_inode, rc = -ENOMEM); @@ -295,93 +280,216 @@ out_inode: */ static int osd_oi_table_open(struct osd_thread_info *info, struct osd_device *osd, - struct osd_oi **oi_table, unsigned oi_count, bool create) + struct osd_oi **oi_table, unsigned oi_count, bool create) { - struct dt_device *dev = &osd->od_dt_dev; - int count = 0; - int rc = 0; - int i; - - /* NB: oi_count != 0 means that we have already created/known all OIs - * and have known exact number of OIs. */ - LASSERT(oi_count <= OSD_OI_FID_NR_MAX); - - for (i = 0; i < (oi_count != 0 ? oi_count : OSD_OI_FID_NR_MAX); i++) { - char name[12]; - - sprintf(name, "%s.%d", OSD_OI_NAME_BASE, i); - rc = osd_oi_open(info, osd, name, &oi_table[i], create); - if (rc == 0) { - count++; - continue; - } - - if (rc == -ENOENT && oi_count == 0) - return count; - - CERROR("%s: can't open %s: rc = %d\n", - dev->dd_lu_dev.ld_obd->obd_name, name, rc); - if (oi_count > 0) { - CERROR("%s: expect to open total %d OI files.\n", - dev->dd_lu_dev.ld_obd->obd_name, oi_count); - } - break; - } - - if (rc < 0) { - osd_oi_table_put(info, oi_table, count); - return rc; - } + struct scrub_file *sf = &osd->od_scrub.os_file; + int count = 0; + int rc = 0; + int i; + ENTRY; + + /* NB: oi_count != 0 means that we have already created/known all OIs + * and have known exact number of OIs. */ + LASSERT(oi_count <= OSD_OI_FID_NR_MAX); + + for (i = 0; i < (oi_count != 0 ? oi_count : OSD_OI_FID_NR_MAX); i++) { + char name[12]; + + if (oi_table[i] != NULL) { + count++; + continue; + } + + sprintf(name, "%s.%d", OSD_OI_NAME_BASE, i); + rc = osd_oi_open(info, osd, name, &oi_table[i], create); + if (rc == 0) { + count++; + continue; + } + + if (rc == -ENOENT && create == false) { + if (oi_count == 0) + return count; + + rc = 0; + ldiskfs_set_bit(i, sf->sf_oi_bitmap); + continue; + } + + CERROR("%s: can't open %s: rc = %d\n", + osd_dev2name(osd), name, rc); + if (oi_count > 0) + CERROR("%s: expect to open total %d OI files.\n", + osd_dev2name(osd), oi_count); + break; + } + + if (rc < 0) { + osd_oi_table_put(info, oi_table, oi_count > 0 ? oi_count : i); + count = rc; + } + + RETURN(count); +} - return count; +static int osd_remove_oi_one(struct dentry *parent, const char *name, + int namelen) +{ + struct dentry *child; + int rc; + + child = ll_lookup_one_len(name, parent, namelen); + if (IS_ERR(child)) { + rc = PTR_ERR(child); + } else { + rc = ll_vfs_unlink(parent->d_inode, child); + dput(child); + } + + return rc == -ENOENT ? 0 : rc; } -int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd) +static int osd_remove_ois(struct osd_thread_info *info, struct osd_device *osd) { - struct dt_device *dev = &osd->od_dt_dev; - struct osd_oi **oi; - int rc; + char name[16]; + int namelen; + int rc; + int i; + + for (i = 0; i < osd->od_scrub.os_file.sf_oi_count; i++) { + namelen = snprintf(name, sizeof(name), "%s.%d", + OSD_OI_NAME_BASE, i); + rc = osd_remove_oi_one(osd_sb(osd)->s_root, name, namelen); + if (rc != 0) { + CERROR("%s: fail to remove the stale OI file %s: " + "rc = %d\n", osd_dev2name(osd), name, rc); + return rc; + } + } + + namelen = snprintf(name, sizeof(name), "%s", OSD_OI_NAME_BASE); + rc = osd_remove_oi_one(osd_sb(osd)->s_root, name, namelen); + if (rc != 0) + CERROR("%s: fail to remove the stale OI file %s: rc = %d\n", + osd_dev2name(osd), name, rc); + + return rc; +} - OBD_ALLOC(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX); - if (oi == NULL) - return -ENOMEM; - - cfs_mutex_lock(&oi_init_lock); - /* try to open existing multiple OIs first */ - rc = osd_oi_table_open(info, osd, oi, 0, false); - if (rc != 0) - goto out; - - /* if previous failed then try found single OI from old filesystem */ - rc = osd_oi_open(info, osd, OSD_OI_NAME_BASE, &oi[0], false); - if (rc == 0) { /* found single OI from old filesystem */ - rc = 1; - goto out; - } else if (rc != -ENOENT) { - CERROR("%s: can't open %s: rc = %d\n", - dev->dd_lu_dev.ld_obd->obd_name, OSD_OI_NAME_BASE, rc); - goto out; - } +int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd, + bool restored) +{ + struct osd_scrub *scrub = &osd->od_scrub; + struct scrub_file *sf = &scrub->os_file; + struct osd_oi **oi; + int rc; + ENTRY; + + if (restored) { + rc = osd_remove_ois(info, osd); + if (rc != 0) + return rc; + } + + OBD_ALLOC(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX); + if (oi == NULL) + RETURN(-ENOMEM); + + mutex_lock(&oi_init_lock); + /* try to open existing multiple OIs first */ + rc = osd_oi_table_open(info, osd, oi, sf->sf_oi_count, false); + if (rc < 0) + GOTO(out, rc); + + if (rc > 0) { + if (rc == sf->sf_oi_count || sf->sf_oi_count == 0) + GOTO(out, rc); + + osd_scrub_file_reset(scrub, + LDISKFS_SB(osd_sb(osd))->s_es->s_uuid, + SF_RECREATED); + osd_oi_count = sf->sf_oi_count; + goto create; + } + + /* if previous failed then try found single OI from old filesystem */ + rc = osd_oi_open(info, osd, OSD_OI_NAME_BASE, &oi[0], false); + if (rc == 0) { /* found single OI from old filesystem */ + ldiskfs_clear_bit(0, sf->sf_oi_bitmap); + if (sf->sf_success_count == 0) + /* XXX: There is one corner case that if the OI_scrub + * file crashed or lost and we regard it upgrade, + * then we allow IGIF lookup to bypass OI files. + * + * The risk is that osd_fid_lookup() may found + * a wrong inode with the given IGIF especially + * when the MDT has performed file-level backup + * and restored after former upgrading from 1.8 + * to 2.x. Fortunately, the osd_fid_lookup()can + * verify the inode to decrease the risk. */ + osd_scrub_file_reset(scrub, + LDISKFS_SB(osd_sb(osd))->s_es->s_uuid, + SF_UPGRADE); + GOTO(out, rc = 1); + } else if (rc != -ENOENT) { + CERROR("%s: can't open %s: rc = %d\n", + osd_dev2name(osd), OSD_OI_NAME_BASE, rc); + GOTO(out, rc); + } + + if (sf->sf_oi_count > 0) { + int i; + + memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE); + for (i = 0; i < osd_oi_count; i++) + ldiskfs_set_bit(i, sf->sf_oi_bitmap); + osd_scrub_file_reset(scrub, + LDISKFS_SB(osd_sb(osd))->s_es->s_uuid, + SF_RECREATED); + } + sf->sf_oi_count = osd_oi_count; + +create: + rc = osd_scrub_file_store(scrub); + if (rc < 0) { + osd_oi_table_put(info, oi, sf->sf_oi_count); + GOTO(out, rc); + } + + /* No OIs exist, new filesystem, create OI objects */ + rc = osd_oi_table_open(info, osd, oi, osd_oi_count, true); + LASSERT(ergo(rc >= 0, rc == osd_oi_count)); + + GOTO(out, rc); - /* No OIs exist, new filesystem, create OI objects */ - rc = osd_oi_table_open(info, osd, oi, osd_oi_count, true); - LASSERT(ergo(rc >= 0, rc == osd_oi_count)); out: - if (rc < 0) { - OBD_FREE(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX); - } else { - LASSERT((rc & (rc - 1)) == 0); - osd->od_oi_table = oi; - osd->od_oi_count = rc; - rc = 0; - } - - cfs_mutex_unlock(&oi_init_lock); - return rc; + if (rc < 0) { + OBD_FREE(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX); + } else { + LASSERT((rc & (rc - 1)) == 0); + osd->od_oi_table = oi; + osd->od_oi_count = rc; + if (sf->sf_oi_count != rc) { + sf->sf_oi_count = rc; + rc = osd_scrub_file_store(scrub); + if (rc < 0) { + osd_oi_table_put(info, oi, sf->sf_oi_count); + OBD_FREE(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX); + } + } else { + rc = 0; + } + } + + mutex_unlock(&oi_init_lock); + return rc; } void osd_oi_fini(struct osd_thread_info *info, struct osd_device *osd) { + if (unlikely(osd->od_oi_table == NULL)) + return; + osd_oi_table_put(info, osd->od_oi_table, osd->od_oi_count); OBD_FREE(osd->od_oi_table, @@ -402,7 +510,6 @@ static int osd_oi_iam_lookup(struct osd_thread_info *oti, { struct iam_container *bag; struct iam_iterator *it = &oti->oti_idx_it; - struct iam_rec *iam_rec; struct iam_path_descr *ipd; int rc; ENTRY; @@ -419,17 +526,8 @@ static int osd_oi_iam_lookup(struct osd_thread_info *oti, iam_it_init(it, bag, 0, ipd); rc = iam_it_get(it, (struct iam_key *)key); - if (rc >= 0) { - if (S_ISDIR(oi->oi_inode->i_mode)) - iam_rec = (struct iam_rec *)oti->oti_ldp; - else - iam_rec = (struct iam_rec *)rec; - - iam_reccpy(&it->ii_path.ip_leaf, (struct iam_rec *)iam_rec); - if (S_ISDIR(oi->oi_inode->i_mode)) - osd_fid_unpack((struct lu_fid *)rec, - (struct osd_fid_pack *)iam_rec); - } + if (rc > 0) + iam_reccpy(&it->ii_path.ip_leaf, (struct iam_rec *)rec); iam_it_put(it); iam_it_fini(it); osd_ipd_put(oti->oti_env, bag, ipd); @@ -439,173 +537,296 @@ static int osd_oi_iam_lookup(struct osd_thread_info *oti, RETURN(rc); } -int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, - const struct lu_fid *fid, struct osd_inode_id *id) +int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, enum oi_check_flags flags) { - struct lu_fid *oi_fid = &info->oti_fid; - const struct dt_key *key; - int rc = 0; - - if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG) { - /* old OSD obj id */ - rc = osd_compat_objid_lookup(info, osd, fid, id); - } else if (fid_is_igif(fid)) { - lu_igif_to_id(fid, id); - rc = 0; - } else if (fid_is_fs_root(fid)) { - struct inode *inode = osd_sb(osd)->s_root->d_inode; - - id->oii_ino = inode->i_ino; - id->oii_gen = inode->i_generation; - } else { - if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) { - rc = osd_compat_spec_lookup(info, osd, fid, id); - if (rc == 0 || rc != -ERESTART) - goto out; - } - - fid_cpu_to_be(oi_fid, fid); - key = (struct dt_key *)oi_fid; - - rc = osd_oi_iam_lookup(info, osd_fid2oi(osd, fid), - (struct dt_rec *)id, key); - - if (rc > 0) { - id->oii_ino = be32_to_cpu(id->oii_ino); - id->oii_gen = be32_to_cpu(id->oii_gen); - rc = 0; - } else if (rc == 0) { - rc = -ENOENT; - } - } - -out: - return rc; + struct lu_seq_range *range = &info->oti_seq_range; + int rc; + ENTRY; + + if (flags & OI_KNOWN_ON_OST) + RETURN(1); + + if (unlikely(fid_is_local_file(fid) || fid_is_igif(fid) || + fid_is_llog(fid)) || fid_is_name_llog(fid) || + fid_is_quota(fid)) + RETURN(0); + + if (fid_is_idif(fid) || fid_is_last_id(fid)) + RETURN(1); + + if (!(flags & OI_CHECK_FLD)) + RETURN(0); + + if (osd_seq_site(osd)->ss_server_fld == NULL) + RETURN(0); + + rc = osd_fld_lookup(info->oti_env, osd, fid_seq(fid), range); + if (rc != 0) { + /* During upgrade, OST FLDB might not be loaded because + * OST FLDB is not created until 2.6, so if some DNE + * filesystem upgrade from 2.5 to 2.7/2.8, they will + * not be able to find the sequence from local FLDB + * cache see fld_index_init(). */ + if (rc == -ENOENT && osd->od_is_ost) + RETURN(1); + + if (rc != -ENOENT) + CERROR("%s: lookup FLD "DFID": rc = %d\n", + osd_name(osd), PFID(fid), rc); + RETURN(0); + } + + if (fld_range_is_ost(range)) + RETURN(1); + + RETURN(0); } -static int osd_oi_iam_insert(struct osd_thread_info *oti, struct osd_oi *oi, - const struct dt_rec *rec, const struct dt_key *key, - struct thandle *th, int ignore_quota) +static int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id) { - struct iam_container *bag; - struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_ldp; - struct iam_path_descr *ipd; - struct osd_thandle *oh; - int rc; -#ifdef HAVE_QUOTA_SUPPORT - cfs_cap_t save = cfs_curproc_cap_pack(); -#endif - ENTRY; - - LASSERT(oi); - LASSERT(oi->oi_inode); + struct lu_fid *oi_fid = &info->oti_fid2; + int rc; + + fid_cpu_to_be(oi_fid, fid); + rc = osd_oi_iam_lookup(info, osd_fid2oi(osd, fid), (struct dt_rec *)id, + (const struct dt_key *)oi_fid); + if (rc > 0) { + osd_id_unpack(id, id); + rc = 0; + } else if (rc == 0) { + rc = -ENOENT; + } + return rc; +} - bag = &oi->oi_dir.od_container; - ipd = osd_idx_ipd_get(oti->oti_env, bag); - if (unlikely(ipd == NULL)) - RETURN(-ENOMEM); +int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id, + enum oi_check_flags flags) +{ + if (unlikely(fid_is_last_id(fid))) + return osd_obj_spec_lookup(info, osd, fid, id); + + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) + return osd_obj_map_lookup(info, osd, fid, id); + + + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) { + int rc; + if (fid_is_fs_root(fid)) { + osd_id_gen(id, osd_sb(osd)->s_root->d_inode->i_ino, + osd_sb(osd)->s_root->d_inode->i_generation); + return 0; + } + if (unlikely(fid_is_acct(fid))) + return osd_acct_obj_lookup(info, osd, fid, id); + + /* For other special FIDs, try OI first, then do spec lookup */ + rc = __osd_oi_lookup(info, osd, fid, id); + if (rc == -ENOENT) + return osd_obj_spec_lookup(info, osd, fid, id); + return rc; + } + + if (!osd->od_igif_inoi && fid_is_igif(fid)) { + osd_id_gen(id, lu_igif_ino(fid), lu_igif_gen(fid)); + return 0; + } + + return __osd_oi_lookup(info, osd, fid, id); +} - oh = container_of0(th, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle != NULL); - LASSERT(oh->ot_handle->h_transaction != NULL); -#ifdef HAVE_QUOTA_SUPPORT - if (ignore_quota) - cfs_cap_raise(CFS_CAP_SYS_RESOURCE); - else - cfs_cap_lower(CFS_CAP_SYS_RESOURCE); -#endif - if (S_ISDIR(oi->oi_inode->i_mode)) - osd_fid_pack((struct osd_fid_pack *)iam_rec, rec, - &oti->oti_fid); - else - iam_rec = (struct iam_rec *) rec; - rc = iam_insert(oh->ot_handle, bag, (const struct iam_key *)key, - iam_rec, ipd); -#ifdef HAVE_QUOTA_SUPPORT - cfs_curproc_cap_unpack(save); -#endif - osd_ipd_put(oti->oti_env, bag, ipd); - LINVRNT(osd_invariant(obj)); - RETURN(rc); +static int osd_oi_iam_refresh(struct osd_thread_info *oti, struct osd_oi *oi, + const struct dt_rec *rec, const struct dt_key *key, + handle_t *th, bool insert) +{ + struct iam_container *bag; + struct iam_path_descr *ipd; + int rc; + ENTRY; + + LASSERT(oi); + LASSERT(oi->oi_inode); + ll_vfs_dq_init(oi->oi_inode); + + bag = &oi->oi_dir.od_container; + ipd = osd_idx_ipd_get(oti->oti_env, bag); + if (unlikely(ipd == NULL)) + RETURN(-ENOMEM); + + LASSERT(th != NULL); + LASSERT(th->h_transaction != NULL); + if (insert) + rc = iam_insert(th, bag, (const struct iam_key *)key, + (const struct iam_rec *)rec, ipd); + else + rc = iam_update(th, bag, (const struct iam_key *)key, + (const struct iam_rec *)rec, ipd); + osd_ipd_put(oti->oti_env, bag, ipd); + LINVRNT(osd_invariant(obj)); + RETURN(rc); } int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd, - const struct lu_fid *fid, const struct osd_inode_id *id0, - struct thandle *th, int ignore_quota) + const struct lu_fid *fid, const struct osd_inode_id *id, + handle_t *th, enum oi_check_flags flags, bool *exist) { - struct lu_fid *oi_fid = &info->oti_fid; - struct osd_inode_id *id; - const struct dt_key *key; - - if (fid_is_igif(fid)) - return 0; - - if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG) - return osd_compat_objid_insert(info, osd, fid, id0, th); - - /* notice we don't return immediately, but continue to get into OI */ - if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) - osd_compat_spec_insert(info, osd, fid, id0, th); - - fid_cpu_to_be(oi_fid, fid); - key = (struct dt_key *)oi_fid; - - id = &info->oti_id; - id->oii_ino = cpu_to_be32(id0->oii_ino); - id->oii_gen = cpu_to_be32(id0->oii_gen); - - return osd_oi_iam_insert(info, osd_fid2oi(osd, fid), - (struct dt_rec *)id, key, th, ignore_quota); + struct lu_fid *oi_fid = &info->oti_fid2; + struct osd_inode_id *oi_id = &info->oti_id2; + int rc = 0; + + if (unlikely(fid_is_last_id(fid))) + return osd_obj_spec_insert(info, osd, fid, id, th); + + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) + return osd_obj_map_insert(info, osd, fid, id, th); + + fid_cpu_to_be(oi_fid, fid); + osd_id_pack(oi_id, id); + rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid), + (const struct dt_rec *)oi_id, + (const struct dt_key *)oi_fid, th, true); + if (rc != 0) { + struct inode *inode; + struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma; + + if (rc != -EEXIST) + return rc; + + rc = osd_oi_lookup(info, osd, fid, oi_id, 0); + if (rc != 0) + return rc; + + if (unlikely(osd_id_eq(id, oi_id))) + return 1; + + /* Check whether the mapping for oi_id is valid or not. */ + inode = osd_iget(info, osd, oi_id); + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + if (rc == -ENOENT || rc == -ESTALE) + goto update; + return rc; + } + + /* The EA inode should NOT be in OI, old OI scrub may added + * such OI mapping by wrong, replace it. */ + if (unlikely(osd_is_ea_inode(inode))) { + iput(inode); + goto update; + } + + rc = osd_get_lma(info, inode, &info->oti_obj_dentry, + &info->oti_ost_attrs); + iput(inode); + if (rc == -ENODATA) + goto update; + + if (rc != 0) + return rc; + + if (!(lma->lma_compat & LMAC_NOT_IN_OI) && + lu_fid_eq(fid, &lma->lma_self_fid)) { + CERROR("%s: the FID "DFID" is used by two objects: " + "%u/%u %u/%u\n", osd_dev2name(osd), + PFID(fid), oi_id->oii_ino, oi_id->oii_gen, + id->oii_ino, id->oii_gen); + return -EEXIST; + } + +update: + osd_id_pack(oi_id, id); + rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid), + (const struct dt_rec *)oi_id, + (const struct dt_key *)oi_fid, th, false); + if (rc != 0) + return rc; + + if (exist != NULL) + *exist = true; + } + + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) + rc = osd_obj_spec_insert(info, osd, fid, id, th); + return rc; } static int osd_oi_iam_delete(struct osd_thread_info *oti, struct osd_oi *oi, - const struct dt_key *key, struct thandle *handle) + const struct dt_key *key, handle_t *th) { - struct iam_container *bag; - struct iam_path_descr *ipd; - struct osd_thandle *oh; - int rc; - ENTRY; - - LASSERT(oi); - - bag = &oi->oi_dir.od_container; - ipd = osd_idx_ipd_get(oti->oti_env, bag); - if (unlikely(ipd == NULL)) - RETURN(-ENOMEM); - - oh = container_of0(handle, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle != NULL); - LASSERT(oh->ot_handle->h_transaction != NULL); - - rc = iam_delete(oh->ot_handle, bag, (const struct iam_key *)key, ipd); - osd_ipd_put(oti->oti_env, bag, ipd); - LINVRNT(osd_invariant(obj)); - RETURN(rc); + struct iam_container *bag; + struct iam_path_descr *ipd; + int rc; + ENTRY; + + LASSERT(oi); + LASSERT(oi->oi_inode); + ll_vfs_dq_init(oi->oi_inode); + + bag = &oi->oi_dir.od_container; + ipd = osd_idx_ipd_get(oti->oti_env, bag); + if (unlikely(ipd == NULL)) + RETURN(-ENOMEM); + + LASSERT(th != NULL); + LASSERT(th->h_transaction != NULL); + + rc = iam_delete(th, bag, (const struct iam_key *)key, ipd); + osd_ipd_put(oti->oti_env, bag, ipd); + LINVRNT(osd_invariant(obj)); + RETURN(rc); } int osd_oi_delete(struct osd_thread_info *info, - struct osd_device *osd, const struct lu_fid *fid, - struct thandle *th) + struct osd_device *osd, const struct lu_fid *fid, + handle_t *th, enum oi_check_flags flags) { - struct lu_fid *oi_fid = &info->oti_fid; - const struct dt_key *key; + struct lu_fid *oi_fid = &info->oti_fid2; - if (!fid_is_norm(fid)) - return 0; + /* clear idmap cache */ + if (lu_fid_eq(fid, &info->oti_cache.oic_fid)) + fid_zero(&info->oti_cache.oic_fid); - LASSERT(fid_seq(fid) != FID_SEQ_LOCAL_FILE); + if (fid_is_last_id(fid)) + return 0; - if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG) - return osd_compat_objid_delete(info, osd, fid, th); + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) + return osd_obj_map_delete(info, osd, fid, th); - fid_cpu_to_be(oi_fid, fid); - key = (struct dt_key *)oi_fid; + fid_cpu_to_be(oi_fid, fid); + return osd_oi_iam_delete(info, osd_fid2oi(osd, fid), + (const struct dt_key *)oi_fid, th); +} - return osd_oi_iam_delete(info, osd_fid2oi(osd, fid), key, th); +int osd_oi_update(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, const struct osd_inode_id *id, + handle_t *th, enum oi_check_flags flags) +{ + struct lu_fid *oi_fid = &info->oti_fid2; + struct osd_inode_id *oi_id = &info->oti_id2; + int rc = 0; + + if (unlikely(fid_is_last_id(fid))) + return osd_obj_spec_update(info, osd, fid, id, th); + + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) + return osd_obj_map_update(info, osd, fid, id, th); + + fid_cpu_to_be(oi_fid, fid); + osd_id_pack(oi_id, id); + rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid), + (const struct dt_rec *)oi_id, + (const struct dt_key *)oi_fid, th, false); + if (rc != 0) + return rc; + + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) + rc = osd_obj_spec_update(info, osd, fid, id, th); + return rc; } -int osd_oi_mod_init() +int osd_oi_mod_init(void) { if (osd_oi_count == 0 || osd_oi_count > OSD_OI_FID_NR_MAX) osd_oi_count = OSD_OI_FID_NR; @@ -616,6 +837,6 @@ int osd_oi_mod_init() osd_oi_count = size_roundup_power2(osd_oi_count); } - cfs_mutex_init(&oi_init_lock); + mutex_init(&oi_init_lock); return 0; }