X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-ldiskfs%2Fosd_oi.c;h=a6fed15322fb6e3c48df7fa1ccfa69d358745747;hb=85c6c099d83b19480dd4160de57e7ffac5b312af;hp=c63b7f59cf192299b9d5c0c94dcf8b1250f3a1d7;hpb=93e457796a1643a6ebf104ea1fc638d4add9e1b5;p=fs%2Flustre-release.git diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c index c63b7f5..a6fed15 100644 --- a/lustre/osd-ldiskfs/osd_oi.c +++ b/lustre/osd-ldiskfs/osd_oi.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -28,6 +26,8 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2012, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -40,19 +40,7 @@ * Author: Nikita Danilov */ -/* - * oi uses two mechanisms to implement fid->cookie mapping: - * - * - persistent index, where cookie is a record and fid is a key, and - * - * - algorithmic mapping for "igif" fids. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_MDS +#define DEBUG_SUBSYSTEM S_OSD #include @@ -67,21 +55,20 @@ /* fid_cpu_to_be() */ #include +#include #include "osd_oi.h" /* osd_lookup(), struct osd_thread_info */ #include "osd_internal.h" -#include "osd_igif.h" -#include "dt_object.h" +#include "osd_scrub.h" -struct oi_descr { - int fid_size; - char *name; - __u32 oid; -}; +static unsigned int osd_oi_count = OSD_OI_FID_NR; +CFS_MODULE_PARM(osd_oi_count, "i", int, 0444, + "Number of Object Index containers to be created, " + "it's only valid for new filesystem."); /** to serialize concurrent OI index initialization */ -static cfs_mutex_t oi_init_lock; +static struct mutex oi_init_lock; static struct dt_index_features oi_feat = { .dif_flags = DT_IND_UPDATE, @@ -90,190 +77,704 @@ static struct dt_index_features oi_feat = { .dif_ptrsize = 4 }; -static const struct oi_descr oi_descr[OSD_OI_FID_NR] = { - [OSD_OI_FID_16] = { - .fid_size = sizeof(struct lu_fid), - .name = "oi.16", - .oid = OSD_OI_FID_16_OID - } -}; +#define OSD_OI_NAME_BASE "oi.16" -static int osd_oi_index_create(struct osd_thread_info *info, - struct dt_device *dev, - struct md_device *mdev) +static void osd_oi_table_put(struct osd_thread_info *info, + struct osd_oi **oi_table, unsigned oi_count) { - const struct lu_env *env; - struct lu_fid *oi_fid = &info->oti_fid; - struct md_object *mdo; - int i; - int rc; + struct iam_container *bag; + int i; + + for (i = 0; i < oi_count; i++) { + if (oi_table[i] == NULL) + continue; + + LASSERT(oi_table[i]->oi_inode != NULL); + + bag = &(oi_table[i]->oi_dir.od_container); + if (bag->ic_object == oi_table[i]->oi_inode) + iam_container_fini(bag); + iput(oi_table[i]->oi_inode); + oi_table[i]->oi_inode = NULL; + OBD_FREE_PTR(oi_table[i]); + oi_table[i] = NULL; + } +} - env = info->oti_env; +static int osd_oi_index_create_one(struct osd_thread_info *info, + struct osd_device *osd, const char *name, + struct dt_index_features *feat) +{ + const struct lu_env *env = info->oti_env; + struct osd_inode_id *id = &info->oti_id; + struct buffer_head *bh; + struct inode *inode; + struct ldiskfs_dir_entry_2 *de; + struct dentry *dentry; + struct super_block *sb = osd_sb(osd); + struct inode *dir = sb->s_root->d_inode; + handle_t *jh; + int rc; + + dentry = osd_child_dentry_by_inode(env, dir, name, strlen(name)); + bh = osd_ldiskfs_find_entry(dir, &dentry->d_name, &de, NULL, NULL); + if (bh) { + osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN); + brelse(bh); + inode = osd_iget(info, osd, id); + if (!IS_ERR(inode)) { + iput(inode); + inode = ERR_PTR(-EEXIST); + } + return PTR_ERR(inode); + } + + jh = osd_journal_start_sb(sb, LDISKFS_HT_MISC, 100); + if (IS_ERR(jh)) + return PTR_ERR(jh); + + inode = ldiskfs_create_inode(jh, dir, (S_IFREG | S_IRUGO | S_IWUSR)); + if (IS_ERR(inode)) { + ldiskfs_journal_stop(jh); + return PTR_ERR(inode); + } + + ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB); + unlock_new_inode(inode); + + if (feat->dif_flags & DT_IND_VARKEY) + rc = iam_lvar_create(inode, feat->dif_keysize_max, + feat->dif_ptrsize, feat->dif_recsize_max, + jh); + else + rc = iam_lfix_create(inode, feat->dif_keysize_max, + feat->dif_ptrsize, feat->dif_recsize_max, + jh); + dentry = osd_child_dentry_by_inode(env, dir, name, strlen(name)); + rc = osd_ldiskfs_add_entry(jh, dentry, inode, NULL); + ldiskfs_journal_stop(jh); + iput(inode); + return rc; +} - for (i = rc = 0; i < OSD_OI_FID_NR && rc == 0; ++i) { - char *name; - name = oi_descr[i].name; - lu_local_obj_fid(oi_fid, oi_descr[i].oid); - oi_feat.dif_keysize_min = oi_descr[i].fid_size, - oi_feat.dif_keysize_max = oi_descr[i].fid_size, +static struct inode *osd_oi_index_open(struct osd_thread_info *info, + struct osd_device *osd, + const char *name, + struct dt_index_features *f, + bool create) +{ + struct dentry *dentry; + struct inode *inode; + int rc; + + dentry = ll_lookup_one_len(name, osd_sb(osd)->s_root, strlen(name)); + if (IS_ERR(dentry)) + return (void *) dentry; + + if (dentry->d_inode) { + LASSERT(!is_bad_inode(dentry->d_inode)); + inode = dentry->d_inode; + atomic_inc(&inode->i_count); + dput(dentry); + return inode; + } - mdo = llo_store_create_index(env, mdev, dev, - "", name, - oi_fid, &oi_feat); + /* create */ + dput(dentry); + shrink_dcache_parent(osd_sb(osd)->s_root); + if (!create) + return ERR_PTR(-ENOENT); + + rc = osd_oi_index_create_one(info, osd, name, f); + if (rc) + return ERR_PTR(rc); + + dentry = ll_lookup_one_len(name, osd_sb(osd)->s_root, strlen(name)); + if (IS_ERR(dentry)) + return (void *) dentry; + + if (dentry->d_inode) { + LASSERT(!is_bad_inode(dentry->d_inode)); + inode = dentry->d_inode; + atomic_inc(&inode->i_count); + dput(dentry); + return inode; + } - if (IS_ERR(mdo)) - RETURN(PTR_ERR(mdo)); + return ERR_PTR(-ENOENT); +} - lu_object_put(env, &mdo->mo_lu); - } - return 0; +/** + * Open an OI(Ojbect Index) container. + * + * \param name Name of OI container + * \param objp Pointer of returned OI + * + * \retval 0 success + * \retval -ve failure + */ +static int osd_oi_open(struct osd_thread_info *info, struct osd_device *osd, + char *name, struct osd_oi **oi_slot, bool create) +{ + struct osd_directory *dir; + struct iam_container *bag; + struct inode *inode; + struct osd_oi *oi; + int rc; + + ENTRY; + + oi_feat.dif_keysize_min = sizeof(struct lu_fid); + oi_feat.dif_keysize_max = sizeof(struct lu_fid); + + inode = osd_oi_index_open(info, osd, name, &oi_feat, create); + if (IS_ERR(inode)) + RETURN(PTR_ERR(inode)); + + /* 'What the @fid is' is not imporatant, because these objects + * have no OI mappings, and only are visible inside the OSD.*/ + lu_igif_build(&info->oti_fid, inode->i_ino, inode->i_generation); + rc = osd_ea_fid_set(info, inode, &info->oti_fid, LMAC_NOT_IN_OI, 0); + if (rc != 0) + GOTO(out_inode, rc); + + OBD_ALLOC_PTR(oi); + if (oi == NULL) + GOTO(out_inode, rc = -ENOMEM); + + oi->oi_inode = inode; + dir = &oi->oi_dir; + + bag = &dir->od_container; + rc = iam_container_init(bag, &dir->od_descr, inode); + if (rc < 0) + GOTO(out_free, rc); + + rc = iam_container_setup(bag); + if (rc < 0) + GOTO(out_container, rc); + + *oi_slot = oi; + RETURN(0); + +out_container: + iam_container_fini(bag); +out_free: + OBD_FREE_PTR(oi); +out_inode: + iput(inode); + return rc; } -int osd_oi_init(struct osd_thread_info *info, - struct osd_oi *oi, - struct dt_device *dev, - struct md_device *mdev) +/** + * Open OI(Object Index) table. + * If \a oi_count is zero, which means caller doesn't know how many OIs there + * will be, this function can either return 0 for new filesystem, or number + * of OIs on existed filesystem. + * + * If \a oi_count is non-zero, which means caller does know number of OIs on + * filesystem, this function should return the exactly same number on + * success, or error code in failure. + * + * \param oi_count Number of expected OI containers + * \param create Create OIs if doesn't exist + * + * \retval +ve number of opened OI containers + * \retval 0 no OI containers found + * \retval -ve failure + */ +static int +osd_oi_table_open(struct osd_thread_info *info, struct osd_device *osd, + struct osd_oi **oi_table, unsigned oi_count, bool create) { - const struct lu_env *env; - int rc; - int i; - - env = info->oti_env; - cfs_mutex_lock(&oi_init_lock); - memset(oi, 0, sizeof *oi); -retry: - for (i = rc = 0; i < OSD_OI_FID_NR && rc == 0; ++i) { - const char *name; - struct dt_object *obj; - - name = oi_descr[i].name; - oi_feat.dif_keysize_min = oi_descr[i].fid_size, - oi_feat.dif_keysize_max = oi_descr[i].fid_size, - - obj = dt_store_open(env, dev, "", name, &info->oti_fid); - if (!IS_ERR(obj)) { - rc = obj->do_ops->do_index_try(env, obj, &oi_feat); - if (rc == 0) { - LASSERT(obj->do_index_ops != NULL); - oi->oi_dir = obj; - } else { - CERROR("Wrong index \"%s\": %d\n", name, rc); - lu_object_put(env, &obj->do_lu); - } - } else { - rc = PTR_ERR(obj); - if (rc == -ENOENT) { - rc = osd_oi_index_create(info, dev, mdev); - if (!rc) - goto retry; - } - CERROR("Cannot open \"%s\": %d\n", name, rc); - } - } - if (rc != 0) - osd_oi_fini(info, oi); + struct scrub_file *sf = &osd->od_scrub.os_file; + int count = 0; + int rc = 0; + int i; + ENTRY; + + /* NB: oi_count != 0 means that we have already created/known all OIs + * and have known exact number of OIs. */ + LASSERT(oi_count <= OSD_OI_FID_NR_MAX); + + for (i = 0; i < (oi_count != 0 ? oi_count : OSD_OI_FID_NR_MAX); i++) { + char name[12]; + + if (oi_table[i] != NULL) { + count++; + continue; + } + + sprintf(name, "%s.%d", OSD_OI_NAME_BASE, i); + rc = osd_oi_open(info, osd, name, &oi_table[i], create); + if (rc == 0) { + count++; + continue; + } + + if (rc == -ENOENT && create == false) { + if (oi_count == 0) + return count; + + rc = 0; + ldiskfs_set_bit(i, sf->sf_oi_bitmap); + continue; + } + + CERROR("%.16s: can't open %s: rc = %d\n", + LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name, name, rc); + if (oi_count > 0) + CERROR("%.16s: expect to open total %d OI files.\n", + LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name, + oi_count); + break; + } + + if (rc < 0) { + osd_oi_table_put(info, oi_table, oi_count > 0 ? oi_count : i); + count = rc; + } + + RETURN(count); +} - cfs_mutex_unlock(&oi_init_lock); - return rc; +int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd) +{ + struct osd_scrub *scrub = &osd->od_scrub; + struct scrub_file *sf = &scrub->os_file; + struct osd_oi **oi; + int rc; + ENTRY; + + OBD_ALLOC(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX); + if (oi == NULL) + RETURN(-ENOMEM); + + mutex_lock(&oi_init_lock); + /* try to open existing multiple OIs first */ + rc = osd_oi_table_open(info, osd, oi, sf->sf_oi_count, false); + if (rc < 0) + GOTO(out, rc); + + if (rc > 0) { + if (rc == sf->sf_oi_count || sf->sf_oi_count == 0) + GOTO(out, rc); + + osd_scrub_file_reset(scrub, + LDISKFS_SB(osd_sb(osd))->s_es->s_uuid, + SF_RECREATED); + osd_oi_count = sf->sf_oi_count; + goto create; + } + + /* if previous failed then try found single OI from old filesystem */ + rc = osd_oi_open(info, osd, OSD_OI_NAME_BASE, &oi[0], false); + if (rc == 0) { /* found single OI from old filesystem */ + ldiskfs_clear_bit(0, sf->sf_oi_bitmap); + if (sf->sf_success_count == 0) + /* XXX: There is one corner case that if the OI_scrub + * file crashed or lost and we regard it upgrade, + * then we allow IGIF lookup to bypass OI files. + * + * The risk is that osd_fid_lookup() may found + * a wrong inode with the given IGIF especially + * when the MDT has performed file-level backup + * and restored after former upgrading from 1.8 + * to 2.x. Fortunately, the osd_fid_lookup()can + * verify the inode to decrease the risk. */ + osd_scrub_file_reset(scrub, + LDISKFS_SB(osd_sb(osd))->s_es->s_uuid, + SF_UPGRADE); + GOTO(out, rc = 1); + } else if (rc != -ENOENT) { + CERROR("%.16s: can't open %s: rc = %d\n", + LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name, + OSD_OI_NAME_BASE, rc); + GOTO(out, rc); + } + + if (sf->sf_oi_count > 0) { + int i; + + memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE); + for (i = 0; i < osd_oi_count; i++) + ldiskfs_set_bit(i, sf->sf_oi_bitmap); + osd_scrub_file_reset(scrub, + LDISKFS_SB(osd_sb(osd))->s_es->s_uuid, + SF_RECREATED); + } + sf->sf_oi_count = osd_oi_count; + +create: + rc = osd_scrub_file_store(scrub); + if (rc < 0) { + osd_oi_table_put(info, oi, sf->sf_oi_count); + GOTO(out, rc); + } + + /* No OIs exist, new filesystem, create OI objects */ + rc = osd_oi_table_open(info, osd, oi, osd_oi_count, true); + LASSERT(ergo(rc >= 0, rc == osd_oi_count)); + + GOTO(out, rc); + +out: + if (rc < 0) { + OBD_FREE(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX); + } else { + LASSERT((rc & (rc - 1)) == 0); + osd->od_oi_table = oi; + osd->od_oi_count = rc; + if (sf->sf_oi_count != rc) { + sf->sf_oi_count = rc; + rc = osd_scrub_file_store(scrub); + if (rc < 0) { + osd_oi_table_put(info, oi, sf->sf_oi_count); + OBD_FREE(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX); + } + } else { + rc = 0; + } + } + + mutex_unlock(&oi_init_lock); + return rc; } -void osd_oi_fini(struct osd_thread_info *info, struct osd_oi *oi) +void osd_oi_fini(struct osd_thread_info *info, struct osd_device *osd) { - if (oi->oi_dir != NULL) { - lu_object_put(info->oti_env, &oi->oi_dir->do_lu); - oi->oi_dir = NULL; - } + if (unlikely(osd->od_oi_table == NULL)) + return; + + osd_oi_table_put(info, osd->od_oi_table, osd->od_oi_count); + + OBD_FREE(osd->od_oi_table, + sizeof(*(osd->od_oi_table)) * OSD_OI_FID_NR_MAX); + osd->od_oi_table = NULL; } -static inline int fid_is_oi_fid(const struct lu_fid *fid) +static inline int fid_is_fs_root(const struct lu_fid *fid) { - /* We need to filter-out oi obj's fid. As we can not store it, while - * oi-index create operation. - */ + /* Map root inode to special local object FID */ return (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE && - fid_oid(fid) == OSD_OI_FID_16_OID)); + fid_oid(fid) == OSD_FS_ROOT_OID)); } -int osd_oi_lookup(struct osd_thread_info *info, struct osd_oi *oi, - const struct lu_fid *fid, struct osd_inode_id *id) +static int osd_oi_iam_lookup(struct osd_thread_info *oti, + struct osd_oi *oi, struct dt_rec *rec, + const struct dt_key *key) { - struct lu_fid *oi_fid = &info->oti_fid; - int rc; - - if (osd_fid_is_igif(fid)) { - lu_igif_to_id(fid, id); - rc = 0; - } else { - struct dt_object *idx; - const struct dt_key *key; - - if (fid_is_oi_fid(fid)) - return -ENOENT; - - idx = oi->oi_dir; - fid_cpu_to_be(oi_fid, fid); - key = (struct dt_key *) oi_fid; - rc = idx->do_index_ops->dio_lookup(info->oti_env, idx, - (struct dt_rec *)id, key, - BYPASS_CAPA); - if (rc > 0) { - id->oii_ino = be32_to_cpu(id->oii_ino); - id->oii_gen = be32_to_cpu(id->oii_gen); - rc = 0; - } else if (rc == 0) - rc = -ENOENT; - } - return rc; + struct iam_container *bag; + struct iam_iterator *it = &oti->oti_idx_it; + struct iam_path_descr *ipd; + int rc; + ENTRY; + + LASSERT(oi); + LASSERT(oi->oi_inode); + + bag = &oi->oi_dir.od_container; + ipd = osd_idx_ipd_get(oti->oti_env, bag); + if (IS_ERR(ipd)) + RETURN(-ENOMEM); + + /* got ipd now we can start iterator. */ + iam_it_init(it, bag, 0, ipd); + + rc = iam_it_get(it, (struct iam_key *)key); + if (rc > 0) + iam_reccpy(&it->ii_path.ip_leaf, (struct iam_rec *)rec); + iam_it_put(it); + iam_it_fini(it); + osd_ipd_put(oti->oti_env, bag, ipd); + + LINVRNT(osd_invariant(obj)); + + RETURN(rc); +} + +int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, enum oi_check_flags flags) +{ + struct lu_seq_range *range = &info->oti_seq_range; + int rc; + ENTRY; + + if (flags & OI_KNOWN_ON_OST) + RETURN(1); + + if (unlikely(fid_is_local_file(fid) || fid_is_igif(fid) || + fid_is_llog(fid)) || fid_is_name_llog(fid) || + fid_is_quota(fid)) + RETURN(0); + + if (fid_is_idif(fid) || fid_is_last_id(fid)) + RETURN(1); + + if (!(flags & OI_CHECK_FLD)) + RETURN(0); + + if (osd_seq_site(osd)->ss_server_fld == NULL) + RETURN(0); + + rc = osd_fld_lookup(info->oti_env, osd, fid_seq(fid), range); + if (rc != 0) { + if (rc != -ENOENT) + CERROR("%s: lookup FLD "DFID": rc = %d\n", + osd_name(osd), PFID(fid), rc); + RETURN(0); + } + + if (fld_range_is_ost(range)) + RETURN(1); + + RETURN(0); +} + +static int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id) +{ + struct lu_fid *oi_fid = &info->oti_fid2; + int rc; + + fid_cpu_to_be(oi_fid, fid); + rc = osd_oi_iam_lookup(info, osd_fid2oi(osd, fid), (struct dt_rec *)id, + (const struct dt_key *)oi_fid); + if (rc > 0) { + osd_id_unpack(id, id); + rc = 0; + } else if (rc == 0) { + rc = -ENOENT; + } + return rc; } -int osd_oi_insert(struct osd_thread_info *info, struct osd_oi *oi, - const struct lu_fid *fid, const struct osd_inode_id *id0, - struct thandle *th, int ignore_quota) +int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id, + enum oi_check_flags flags) { - struct lu_fid *oi_fid = &info->oti_fid; - struct dt_object *idx; - struct osd_inode_id *id; - const struct dt_key *key; - - if (osd_fid_is_igif(fid)) - return 0; - - if (fid_is_oi_fid(fid)) - return 0; - - idx = oi->oi_dir; - fid_cpu_to_be(oi_fid, fid); - key = (struct dt_key *) oi_fid; - - id = &info->oti_id; - id->oii_ino = cpu_to_be32(id0->oii_ino); - id->oii_gen = cpu_to_be32(id0->oii_gen); - return idx->do_index_ops->dio_insert(info->oti_env, idx, - (struct dt_rec *)id, - key, th, BYPASS_CAPA, - ignore_quota); + if (unlikely(fid_is_last_id(fid))) + return osd_obj_spec_lookup(info, osd, fid, id); + + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) + return osd_obj_map_lookup(info, osd, fid, id); + + + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) { + int rc; + if (fid_is_fs_root(fid)) { + osd_id_gen(id, osd_sb(osd)->s_root->d_inode->i_ino, + osd_sb(osd)->s_root->d_inode->i_generation); + return 0; + } + if (unlikely(fid_is_acct(fid))) + return osd_acct_obj_lookup(info, osd, fid, id); + + /* For other special FIDs, try OI first, then do spec lookup */ + rc = __osd_oi_lookup(info, osd, fid, id); + if (rc == -ENOENT) + return osd_obj_spec_lookup(info, osd, fid, id); + return rc; + } + + if (!osd->od_igif_inoi && fid_is_igif(fid)) { + osd_id_gen(id, lu_igif_ino(fid), lu_igif_gen(fid)); + return 0; + } + + return __osd_oi_lookup(info, osd, fid, id); +} + +static int osd_oi_iam_refresh(struct osd_thread_info *oti, struct osd_oi *oi, + const struct dt_rec *rec, const struct dt_key *key, + handle_t *th, bool insert) +{ + struct iam_container *bag; + struct iam_path_descr *ipd; + int rc; + ENTRY; + + LASSERT(oi); + LASSERT(oi->oi_inode); + ll_vfs_dq_init(oi->oi_inode); + + bag = &oi->oi_dir.od_container; + ipd = osd_idx_ipd_get(oti->oti_env, bag); + if (unlikely(ipd == NULL)) + RETURN(-ENOMEM); + + LASSERT(th != NULL); + LASSERT(th->h_transaction != NULL); + if (insert) + rc = iam_insert(th, bag, (const struct iam_key *)key, + (const struct iam_rec *)rec, ipd); + else + rc = iam_update(th, bag, (const struct iam_key *)key, + (const struct iam_rec *)rec, ipd); + osd_ipd_put(oti->oti_env, bag, ipd); + LINVRNT(osd_invariant(obj)); + RETURN(rc); +} + +int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, const struct osd_inode_id *id, + handle_t *th, enum oi_check_flags flags) +{ + struct lu_fid *oi_fid = &info->oti_fid2; + struct osd_inode_id *oi_id = &info->oti_id2; + int rc = 0; + + if (unlikely(fid_is_last_id(fid))) + return osd_obj_spec_insert(info, osd, fid, id, th); + + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) + return osd_obj_map_insert(info, osd, fid, id, th); + + fid_cpu_to_be(oi_fid, fid); + osd_id_pack(oi_id, id); + rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid), + (const struct dt_rec *)oi_id, + (const struct dt_key *)oi_fid, th, true); + if (rc != 0) { + struct inode *inode; + struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; + + if (rc != -EEXIST) + return rc; + + rc = osd_oi_lookup(info, osd, fid, oi_id, 0); + if (rc != 0) + return rc; + + if (unlikely(osd_id_eq(id, oi_id))) + return 0; + + /* Check whether the mapping for oi_id is valid or not. */ + inode = osd_iget(info, osd, oi_id); + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + if (rc == -ENOENT || rc == -ESTALE) + goto update; + return rc; + } + + rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma); + iput(inode); + if (rc == -ENODATA) + goto update; + + if (rc != 0) + return rc; + + if (!(lma->lma_compat & LMAC_NOT_IN_OI) && + lu_fid_eq(fid, &lma->lma_self_fid)) { + CERROR("%.16s: the FID "DFID" is used by two objects: " + "%u/%u %u/%u\n", + LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name, + PFID(fid), oi_id->oii_ino, oi_id->oii_gen, + id->oii_ino, id->oii_gen); + return -EEXIST; + } + +update: + osd_id_pack(oi_id, id); + rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid), + (const struct dt_rec *)oi_id, + (const struct dt_key *)oi_fid, th, false); + if (rc != 0) + return rc; + } + + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) + rc = osd_obj_spec_insert(info, osd, fid, id, th); + return rc; +} + +static int osd_oi_iam_delete(struct osd_thread_info *oti, struct osd_oi *oi, + const struct dt_key *key, handle_t *th) +{ + struct iam_container *bag; + struct iam_path_descr *ipd; + int rc; + ENTRY; + + LASSERT(oi); + LASSERT(oi->oi_inode); + ll_vfs_dq_init(oi->oi_inode); + + bag = &oi->oi_dir.od_container; + ipd = osd_idx_ipd_get(oti->oti_env, bag); + if (unlikely(ipd == NULL)) + RETURN(-ENOMEM); + + LASSERT(th != NULL); + LASSERT(th->h_transaction != NULL); + + rc = iam_delete(th, bag, (const struct iam_key *)key, ipd); + osd_ipd_put(oti->oti_env, bag, ipd); + LINVRNT(osd_invariant(obj)); + RETURN(rc); } int osd_oi_delete(struct osd_thread_info *info, - struct osd_oi *oi, const struct lu_fid *fid, - struct thandle *th) + struct osd_device *osd, const struct lu_fid *fid, + handle_t *th, enum oi_check_flags flags) { - struct lu_fid *oi_fid = &info->oti_fid; - struct dt_object *idx; - const struct dt_key *key; - - if (osd_fid_is_igif(fid)) - return 0; - - idx = oi->oi_dir; - fid_cpu_to_be(oi_fid, fid); - key = (struct dt_key *) oi_fid; - return idx->do_index_ops->dio_delete(info->oti_env, idx, - key, th, BYPASS_CAPA); + struct lu_fid *oi_fid = &info->oti_fid2; + + /* clear idmap cache */ + if (lu_fid_eq(fid, &info->oti_cache.oic_fid)) + fid_zero(&info->oti_cache.oic_fid); + + if (fid_is_last_id(fid)) + return 0; + + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) + return osd_obj_map_delete(info, osd, fid, th); + + fid_cpu_to_be(oi_fid, fid); + return osd_oi_iam_delete(info, osd_fid2oi(osd, fid), + (const struct dt_key *)oi_fid, th); } -int osd_oi_mod_init() +int osd_oi_update(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, const struct osd_inode_id *id, + handle_t *th, enum oi_check_flags flags) { - cfs_mutex_init(&oi_init_lock); + struct lu_fid *oi_fid = &info->oti_fid2; + struct osd_inode_id *oi_id = &info->oti_id2; + int rc = 0; + + if (unlikely(fid_is_last_id(fid))) + return osd_obj_spec_update(info, osd, fid, id, th); + + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) + return osd_obj_map_update(info, osd, fid, id, th); + + fid_cpu_to_be(oi_fid, fid); + osd_id_pack(oi_id, id); + rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid), + (const struct dt_rec *)oi_id, + (const struct dt_key *)oi_fid, th, false); + if (rc != 0) + return rc; + + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) + rc = osd_obj_spec_update(info, osd, fid, id, th); + return rc; +} + +int osd_oi_mod_init(void) +{ + if (osd_oi_count == 0 || osd_oi_count > OSD_OI_FID_NR_MAX) + osd_oi_count = OSD_OI_FID_NR; + + if ((osd_oi_count & (osd_oi_count - 1)) != 0) { + LCONSOLE_WARN("Round up oi_count %d to power2 %d\n", + osd_oi_count, size_roundup_power2(osd_oi_count)); + osd_oi_count = size_roundup_power2(osd_oi_count); + } + + mutex_init(&oi_init_lock); return 0; }