X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-ldiskfs%2Fosd_oi.c;h=ae3d5c10a6cc829f2ad40d9cbd2c5949eef61f94;hp=b625d826c2c1f3c772ec17777cd1bcdf232fb941;hb=07660ad33a7d109cced29b6400f99f25adab3f54;hpb=b36763df4f2b3695766e90598f9435a814cc10d0 diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c index b625d82..ae3d5c1 100644 --- a/lustre/osd-ldiskfs/osd_oi.c +++ b/lustre/osd-ldiskfs/osd_oi.c @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Intel Corporation. + * Copyright (c) 2012, 2015, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -40,16 +40,7 @@ * Author: Nikita Danilov */ -/* - * oi uses two mechanisms to implement fid->cookie mapping: - * - * - persistent index, where cookie is a record and fid is a key, and - * - * - algorithmic mapping for "igif" fids. - * - */ - -#define DEBUG_SUBSYSTEM S_MDS +#define DEBUG_SUBSYSTEM S_OSD #include @@ -69,7 +60,6 @@ #include "osd_oi.h" /* osd_lookup(), struct osd_thread_info */ #include "osd_internal.h" -#include "osd_igif.h" #include "osd_scrub.h" static unsigned int osd_oi_count = OSD_OI_FID_NR; @@ -127,7 +117,7 @@ static int osd_oi_index_create_one(struct osd_thread_info *info, int rc; dentry = osd_child_dentry_by_inode(env, dir, name, strlen(name)); - bh = osd_ldiskfs_find_entry(dir, dentry, &de, NULL); + bh = osd_ldiskfs_find_entry(dir, &dentry->d_name, &de, NULL, NULL); if (bh) { osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN); brelse(bh); @@ -139,7 +129,7 @@ static int osd_oi_index_create_one(struct osd_thread_info *info, return PTR_ERR(inode); } - jh = ldiskfs_journal_start_sb(sb, 100); + jh = osd_journal_start_sb(sb, LDISKFS_HT_MISC, 100); if (IS_ERR(jh)) return PTR_ERR(jh); @@ -149,6 +139,9 @@ static int osd_oi_index_create_one(struct osd_thread_info *info, return PTR_ERR(inode); } + ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB); + unlock_new_inode(inode); + if (feat->dif_flags & DT_IND_VARKEY) rc = iam_lvar_create(inode, feat->dif_keysize_max, feat->dif_ptrsize, feat->dif_recsize_max, @@ -158,7 +151,7 @@ static int osd_oi_index_create_one(struct osd_thread_info *info, feat->dif_ptrsize, feat->dif_recsize_max, jh); dentry = osd_child_dentry_by_inode(env, dir, name, strlen(name)); - rc = osd_ldiskfs_add_entry(jh, dentry, inode, NULL); + rc = osd_ldiskfs_add_entry(info, jh, dentry, inode, NULL); ldiskfs_journal_stop(jh); iput(inode); return rc; @@ -238,6 +231,13 @@ static int osd_oi_open(struct osd_thread_info *info, struct osd_device *osd, if (IS_ERR(inode)) RETURN(PTR_ERR(inode)); + /* 'What the @fid is' is not imporatant, because these objects + * have no OI mappings, and only are visible inside the OSD.*/ + lu_igif_build(&info->oti_fid, inode->i_ino, inode->i_generation); + rc = osd_ea_fid_set(info, inode, &info->oti_fid, LMAC_NOT_IN_OI, 0); + if (rc != 0) + GOTO(out_inode, rc); + OBD_ALLOC_PTR(oi); if (oi == NULL) GOTO(out_inode, rc = -ENOMEM); @@ -338,7 +338,54 @@ osd_oi_table_open(struct osd_thread_info *info, struct osd_device *osd, RETURN(count); } -int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd) +static int osd_remove_oi_one(struct dentry *parent, const char *name, + int namelen) +{ + struct dentry *child; + int rc; + + child = ll_lookup_one_len(name, parent, namelen); + if (IS_ERR(child)) { + rc = PTR_ERR(child); + } else { + rc = ll_vfs_unlink(parent->d_inode, child); + dput(child); + } + + return rc == -ENOENT ? 0 : rc; +} + +static int osd_remove_ois(struct osd_thread_info *info, struct osd_device *osd) +{ + char name[16]; + int namelen; + int rc; + int i; + + for (i = 0; i < osd->od_scrub.os_file.sf_oi_count; i++) { + namelen = snprintf(name, sizeof(name), "%s.%d", + OSD_OI_NAME_BASE, i); + rc = osd_remove_oi_one(osd_sb(osd)->s_root, name, namelen); + if (rc != 0) { + CERROR("%.16s: fail to remove the stale OI file %s: " + "rc = %d\n", + LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name, + name, rc); + return rc; + } + } + + namelen = snprintf(name, sizeof(name), "%s", OSD_OI_NAME_BASE); + rc = osd_remove_oi_one(osd_sb(osd)->s_root, name, namelen); + if (rc != 0) + CERROR("%.16s: fail to remove the stale OI file %s: rc = %d\n", + LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name, name, rc); + + return rc; +} + +int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd, + bool restored) { struct osd_scrub *scrub = &osd->od_scrub; struct scrub_file *sf = &scrub->os_file; @@ -346,6 +393,12 @@ int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd) int rc; ENTRY; + if (restored) { + rc = osd_remove_ois(info, osd); + if (rc != 0) + return rc; + } + OBD_ALLOC(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX); if (oi == NULL) RETURN(-ENOMEM); @@ -370,6 +423,21 @@ int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd) /* if previous failed then try found single OI from old filesystem */ rc = osd_oi_open(info, osd, OSD_OI_NAME_BASE, &oi[0], false); if (rc == 0) { /* found single OI from old filesystem */ + ldiskfs_clear_bit(0, sf->sf_oi_bitmap); + if (sf->sf_success_count == 0) + /* XXX: There is one corner case that if the OI_scrub + * file crashed or lost and we regard it upgrade, + * then we allow IGIF lookup to bypass OI files. + * + * The risk is that osd_fid_lookup() may found + * a wrong inode with the given IGIF especially + * when the MDT has performed file-level backup + * and restored after former upgrading from 1.8 + * to 2.x. Fortunately, the osd_fid_lookup()can + * verify the inode to decrease the risk. */ + osd_scrub_file_reset(scrub, + LDISKFS_SB(osd_sb(osd))->s_es->s_uuid, + SF_UPGRADE); GOTO(out, rc = 1); } else if (rc != -ENOENT) { CERROR("%.16s: can't open %s: rc = %d\n", @@ -410,7 +478,16 @@ out: LASSERT((rc & (rc - 1)) == 0); osd->od_oi_table = oi; osd->od_oi_count = rc; - rc = 0; + if (sf->sf_oi_count != rc) { + sf->sf_oi_count = rc; + rc = osd_scrub_file_store(scrub); + if (rc < 0) { + osd_oi_table_put(info, oi, sf->sf_oi_count); + OBD_FREE(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX); + } + } else { + rc = 0; + } } mutex_unlock(&oi_init_lock); @@ -442,7 +519,6 @@ static int osd_oi_iam_lookup(struct osd_thread_info *oti, { struct iam_container *bag; struct iam_iterator *it = &oti->oti_idx_it; - struct iam_rec *iam_rec; struct iam_path_descr *ipd; int rc; ENTRY; @@ -459,17 +535,8 @@ static int osd_oi_iam_lookup(struct osd_thread_info *oti, iam_it_init(it, bag, 0, ipd); rc = iam_it_get(it, (struct iam_key *)key); - if (rc >= 0) { - if (S_ISDIR(oi->oi_inode->i_mode)) - iam_rec = (struct iam_rec *)oti->oti_ldp; - else - iam_rec = (struct iam_rec *)rec; - - iam_reccpy(&it->ii_path.ip_leaf, (struct iam_rec *)iam_rec); - if (S_ISDIR(oi->oi_inode->i_mode)) - osd_fid_unpack((struct lu_fid *)rec, - (struct osd_fid_pack *)iam_rec); - } + if (rc > 0) + iam_reccpy(&it->ii_path.ip_leaf, (struct iam_rec *)rec); iam_it_put(it); iam_it_fini(it); osd_ipd_put(oti->oti_env, bag, ipd); @@ -480,33 +547,45 @@ static int osd_oi_iam_lookup(struct osd_thread_info *oti, } int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd, - const struct lu_fid *fid) + const struct lu_fid *fid, enum oi_check_flags flags) { - struct lu_seq_range *range = &info->oti_seq_range; - int rc; + struct lu_seq_range *range = &info->oti_seq_range; + int rc; ENTRY; + if (flags & OI_KNOWN_ON_OST) + RETURN(1); + + if (unlikely(fid_is_local_file(fid) || fid_is_igif(fid) || + fid_is_llog(fid)) || fid_is_name_llog(fid) || + fid_is_quota(fid)) + RETURN(0); + if (fid_is_idif(fid) || fid_is_last_id(fid)) RETURN(1); - rc = osd_fld_lookup(info->oti_env, osd, fid, range); + if (!(flags & OI_CHECK_FLD)) + RETURN(0); + + if (osd_seq_site(osd)->ss_server_fld == NULL) + RETURN(0); + + rc = osd_fld_lookup(info->oti_env, osd, fid_seq(fid), range); if (rc != 0) { - CERROR("%s: Can not lookup fld for "DFID"\n", - osd2lu_dev(osd)->ld_obd->obd_name, PFID(fid)); - RETURN(rc); + if (rc != -ENOENT) + CERROR("%s: lookup FLD "DFID": rc = %d\n", + osd_name(osd), PFID(fid), rc); + RETURN(0); } - CDEBUG(D_INFO, "fid "DFID" range "DRANGE"\n", PFID(fid), - PRANGE(range)); - - if (range->lsr_flags == LU_SEQ_RANGE_OST) + if (fld_range_is_ost(range)) RETURN(1); RETURN(0); } -int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, - const struct lu_fid *fid, struct osd_inode_id *id) +static int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id) { struct lu_fid *oi_fid = &info->oti_fid2; int rc; @@ -524,133 +603,193 @@ int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, } int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, - const struct lu_fid *fid, struct osd_inode_id *id) + const struct lu_fid *fid, struct osd_inode_id *id, + enum oi_check_flags flags) { - int rc = 0; - - if ((!fid_is_last_id(fid) && fid_is_on_ost(info, osd, fid)) || - fid_is_llog(fid)) { - /* old OSD obj id */ - /* FIXME: actually for all of the OST object */ - rc = osd_obj_map_lookup(info, osd, fid, id); - } else if (fid_is_igif(fid)) { - lu_igif_to_id(fid, id); - } else if (fid_is_fs_root(fid)) { - osd_id_gen(id, osd_sb(osd)->s_root->d_inode->i_ino, - osd_sb(osd)->s_root->d_inode->i_generation); - } else { + if (unlikely(fid_is_last_id(fid))) + return osd_obj_spec_lookup(info, osd, fid, id); + + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) + return osd_obj_map_lookup(info, osd, fid, id); + + + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) { + int rc; + if (fid_is_fs_root(fid)) { + osd_id_gen(id, osd_sb(osd)->s_root->d_inode->i_ino, + osd_sb(osd)->s_root->d_inode->i_generation); + return 0; + } if (unlikely(fid_is_acct(fid))) return osd_acct_obj_lookup(info, osd, fid, id); - else if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE) || - fid_is_last_id(fid)) - return osd_obj_spec_lookup(info, osd, fid, id); + /* For other special FIDs, try OI first, then do spec lookup */ rc = __osd_oi_lookup(info, osd, fid, id); + if (rc == -ENOENT) + return osd_obj_spec_lookup(info, osd, fid, id); + return rc; } - return rc; + + if (!osd->od_igif_inoi && fid_is_igif(fid)) { + osd_id_gen(id, lu_igif_ino(fid), lu_igif_gen(fid)); + return 0; + } + + return __osd_oi_lookup(info, osd, fid, id); } -static int osd_oi_iam_insert(struct osd_thread_info *oti, struct osd_oi *oi, +static int osd_oi_iam_refresh(struct osd_thread_info *oti, struct osd_oi *oi, const struct dt_rec *rec, const struct dt_key *key, - struct thandle *th) + handle_t *th, bool insert) { - struct iam_container *bag; - struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_ldp; - struct iam_path_descr *ipd; - struct osd_thandle *oh; - int rc; - ENTRY; + struct iam_container *bag; + struct iam_path_descr *ipd; + int rc; + ENTRY; - LASSERT(oi); - LASSERT(oi->oi_inode); + LASSERT(oi); + LASSERT(oi->oi_inode); ll_vfs_dq_init(oi->oi_inode); - bag = &oi->oi_dir.od_container; - ipd = osd_idx_ipd_get(oti->oti_env, bag); - if (unlikely(ipd == NULL)) - RETURN(-ENOMEM); + bag = &oi->oi_dir.od_container; + ipd = osd_idx_ipd_get(oti->oti_env, bag); + if (unlikely(ipd == NULL)) + RETURN(-ENOMEM); - oh = container_of0(th, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle != NULL); - LASSERT(oh->ot_handle->h_transaction != NULL); - if (S_ISDIR(oi->oi_inode->i_mode)) - osd_fid_pack((struct osd_fid_pack *)iam_rec, rec, - &oti->oti_fid); - else - iam_rec = (struct iam_rec *) rec; - rc = iam_insert(oh->ot_handle, bag, (const struct iam_key *)key, - iam_rec, ipd); - osd_ipd_put(oti->oti_env, bag, ipd); - LINVRNT(osd_invariant(obj)); - RETURN(rc); + LASSERT(th != NULL); + LASSERT(th->h_transaction != NULL); + if (insert) + rc = iam_insert(th, bag, (const struct iam_key *)key, + (const struct iam_rec *)rec, ipd); + else + rc = iam_update(th, bag, (const struct iam_key *)key, + (const struct iam_rec *)rec, ipd); + osd_ipd_put(oti->oti_env, bag, ipd); + LINVRNT(osd_invariant(obj)); + RETURN(rc); } int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd, const struct lu_fid *fid, const struct osd_inode_id *id, - struct thandle *th) + handle_t *th, enum oi_check_flags flags) { struct lu_fid *oi_fid = &info->oti_fid2; - struct osd_inode_id *oi_id = &info->oti_id2; + struct osd_inode_id *oi_id = &info->oti_id2; + int rc = 0; - if (fid_is_igif(fid) || unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE)) - return 0; + if (unlikely(fid_is_last_id(fid))) + return osd_obj_spec_insert(info, osd, fid, id, th); - if ((fid_is_on_ost(info, osd, fid) && !fid_is_last_id(fid)) || - fid_is_llog(fid)) + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) return osd_obj_map_insert(info, osd, fid, id, th); - /* Server mount should not depends on OI files */ - if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE) || - fid_is_last_id(fid)) - return osd_obj_spec_insert(info, osd, fid, id, th); - fid_cpu_to_be(oi_fid, fid); osd_id_pack(oi_id, id); - return osd_oi_iam_insert(info, osd_fid2oi(osd, fid), - (const struct dt_rec *)oi_id, - (const struct dt_key *)oi_fid, th); + rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid), + (const struct dt_rec *)oi_id, + (const struct dt_key *)oi_fid, th, true); + if (rc != 0) { + struct inode *inode; + struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; + + if (rc != -EEXIST) + return rc; + + rc = osd_oi_lookup(info, osd, fid, oi_id, 0); + if (rc != 0) + return rc; + + if (unlikely(osd_id_eq(id, oi_id))) + return 1; + + /* Check whether the mapping for oi_id is valid or not. */ + inode = osd_iget(info, osd, oi_id); + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + if (rc == -ENOENT || rc == -ESTALE) + goto update; + return rc; + } + + /* The EA inode should NOT be in OI, old OI scrub may added + * such OI mapping by wrong, replace it. */ + if (unlikely(osd_is_ea_inode(inode))) { + iput(inode); + goto update; + } + + rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma); + iput(inode); + if (rc == -ENODATA) + goto update; + + if (rc != 0) + return rc; + + if (!(lma->lma_compat & LMAC_NOT_IN_OI) && + lu_fid_eq(fid, &lma->lma_self_fid)) { + CERROR("%.16s: the FID "DFID" is used by two objects: " + "%u/%u %u/%u\n", + LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name, + PFID(fid), oi_id->oii_ino, oi_id->oii_gen, + id->oii_ino, id->oii_gen); + return -EEXIST; + } + +update: + osd_id_pack(oi_id, id); + rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid), + (const struct dt_rec *)oi_id, + (const struct dt_key *)oi_fid, th, false); + if (rc != 0) + return rc; + } + + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) + rc = osd_obj_spec_insert(info, osd, fid, id, th); + return rc; } static int osd_oi_iam_delete(struct osd_thread_info *oti, struct osd_oi *oi, - const struct dt_key *key, struct thandle *handle) + const struct dt_key *key, handle_t *th) { - struct iam_container *bag; - struct iam_path_descr *ipd; - struct osd_thandle *oh; - int rc; - ENTRY; + struct iam_container *bag; + struct iam_path_descr *ipd; + int rc; + ENTRY; - LASSERT(oi); + LASSERT(oi); LASSERT(oi->oi_inode); ll_vfs_dq_init(oi->oi_inode); - bag = &oi->oi_dir.od_container; - ipd = osd_idx_ipd_get(oti->oti_env, bag); - if (unlikely(ipd == NULL)) - RETURN(-ENOMEM); + bag = &oi->oi_dir.od_container; + ipd = osd_idx_ipd_get(oti->oti_env, bag); + if (unlikely(ipd == NULL)) + RETURN(-ENOMEM); - oh = container_of0(handle, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle != NULL); - LASSERT(oh->ot_handle->h_transaction != NULL); + LASSERT(th != NULL); + LASSERT(th->h_transaction != NULL); - rc = iam_delete(oh->ot_handle, bag, (const struct iam_key *)key, ipd); - osd_ipd_put(oti->oti_env, bag, ipd); - LINVRNT(osd_invariant(obj)); - RETURN(rc); + rc = iam_delete(th, bag, (const struct iam_key *)key, ipd); + osd_ipd_put(oti->oti_env, bag, ipd); + LINVRNT(osd_invariant(obj)); + RETURN(rc); } int osd_oi_delete(struct osd_thread_info *info, struct osd_device *osd, const struct lu_fid *fid, - struct thandle *th) + handle_t *th, enum oi_check_flags flags) { struct lu_fid *oi_fid = &info->oti_fid2; - if (fid_is_igif(fid) || fid_is_last_id(fid)) - return 0; + /* clear idmap cache */ + if (lu_fid_eq(fid, &info->oti_cache.oic_fid)) + fid_zero(&info->oti_cache.oic_fid); - LASSERT(fid_seq(fid) != FID_SEQ_LOCAL_FILE); + if (fid_is_last_id(fid)) + return 0; - if (fid_is_on_ost(info, osd, fid) || fid_is_llog(fid)) + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) return osd_obj_map_delete(info, osd, fid, th); fid_cpu_to_be(oi_fid, fid); @@ -658,6 +797,33 @@ int osd_oi_delete(struct osd_thread_info *info, (const struct dt_key *)oi_fid, th); } +int osd_oi_update(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, const struct osd_inode_id *id, + handle_t *th, enum oi_check_flags flags) +{ + struct lu_fid *oi_fid = &info->oti_fid2; + struct osd_inode_id *oi_id = &info->oti_id2; + int rc = 0; + + if (unlikely(fid_is_last_id(fid))) + return osd_obj_spec_update(info, osd, fid, id, th); + + if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) + return osd_obj_map_update(info, osd, fid, id, th); + + fid_cpu_to_be(oi_fid, fid); + osd_id_pack(oi_id, id); + rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid), + (const struct dt_rec *)oi_id, + (const struct dt_key *)oi_fid, th, false); + if (rc != 0) + return rc; + + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) + rc = osd_obj_spec_update(info, osd, fid, id, th); + return rc; +} + int osd_oi_mod_init(void) { if (osd_oi_count == 0 || osd_oi_count > OSD_OI_FID_NR_MAX)