X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Fosd-zfs%2Fosd_oi.c;h=538385825dfd679df14f8811a4757ecf7259614a;hb=226fdfbd8d177587787f473f4fb48714e1ffad91;hp=427bde0afae4f4d7b5b4b29f3eb53a55e66b47b9;hpb=aafe85fac4aa0589185048c57a0cce2b8c6618ee;p=fs%2Flustre-release.git diff --git a/lustre/osd-zfs/osd_oi.c b/lustre/osd-zfs/osd_oi.c index 427bde0..5383858 100644 --- a/lustre/osd-zfs/osd_oi.c +++ b/lustre/osd-zfs/osd_oi.c @@ -15,21 +15,15 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. - */ -/* - * Copyright (c) 2012, Intel Corporation. - * Use is subject to license terms. + * + * Copyright (c) 2012, 2016, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -43,9 +37,6 @@ * Author: Di Wang */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_OSD #include @@ -72,8 +63,6 @@ #include #include -static char *oi_tag = "osd_mount, oi"; - #define OSD_OI_FID_NR (1UL << 7) #define OSD_OI_FID_NR_MAX (1UL << OSD_OI_FID_OID_BITS_MAX) unsigned int osd_oi_count = OSD_OI_FID_NR; @@ -90,21 +79,19 @@ struct named_oid { }; static const struct named_oid oids[] = { - { LAST_RECV_OID, LAST_RCVD }, - { OFD_LAST_GROUP_OID, "LAST_GROUP" }, - { LLOG_CATALOGS_OID, "CATALOGS" }, - { MGS_CONFIGS_OID, NULL /*MOUNT_CONFIGS_DIR*/ }, - { FID_SEQ_SRV_OID, "seq_srv" }, - { FID_SEQ_CTL_OID, "seq_ctl" }, - { MDD_CAPA_KEYS_OID, NULL /*CAPA_KEYS*/ }, - { FLD_INDEX_OID, "fld" }, - { MDD_LOV_OBJ_OID, LOV_OBJID }, - { OFD_HEALTH_CHECK_OID, HEALTH_CHECK }, - { ACCT_USER_OID, "acct_usr_inode" }, - { ACCT_GROUP_OID, "acct_grp_inode" }, - { MDD_ROOT_INDEX_OID, NULL }, - { MDD_ORPHAN_OID, NULL }, - { 0, NULL } + { .oid = LAST_RECV_OID, .name = LAST_RCVD }, + { .oid = OFD_LAST_GROUP_OID, .name = "LAST_GROUP" }, + { .oid = LLOG_CATALOGS_OID, .name = "CATALOGS" }, + { .oid = MGS_CONFIGS_OID, /*MOUNT_CONFIGS_DIR*/ }, + { .oid = FID_SEQ_SRV_OID, .name = "seq_srv" }, + { .oid = FID_SEQ_CTL_OID, .name = "seq_ctl" }, + { .oid = FLD_INDEX_OID, .name = "fld" }, + { .oid = MDD_LOV_OBJ_OID, .name = LOV_OBJID }, + { .oid = OFD_HEALTH_CHECK_OID, .name = HEALTH_CHECK }, + { .oid = ACCT_USER_OID, .name = "acct_usr_inode" }, + { .oid = ACCT_GROUP_OID, .name = "acct_grp_inode" }, + { .oid = REPLY_DATA_OID, .name = REPLY_DATA }, + { .oid = 0 } }; static char *oid2name(const unsigned long oid) @@ -129,14 +116,17 @@ osd_oi_lookup(const struct lu_env *env, struct osd_device *o, struct zpl_direntry *zde = &osd_oti_get(env)->oti_zde.lzd_reg; int rc; - rc = -zap_lookup(o->od_objset.os, parent, name, 8, 1, (void *)zde); + rc = -zap_lookup(o->od_os, parent, name, 8, 1, (void *)zde); if (rc) return rc; - strncpy(oi->oi_name, name, OSD_OI_NAME_SIZE - 1); + rc = strlcpy(oi->oi_name, name, sizeof(oi->oi_name)); + if (rc >= sizeof(oi->oi_name)) + return -E2BIG; + oi->oi_zapid = zde->zde_dnode; - return rc; + return 0; } /** @@ -148,17 +138,18 @@ osd_oi_create(const struct lu_env *env, struct osd_device *o, { struct zpl_direntry *zde = &osd_oti_get(env)->oti_zde.lzd_reg; struct lu_attr *la = &osd_oti_get(env)->oti_la; - dmu_buf_t *db; + sa_handle_t *sa_hdl = NULL; dmu_tx_t *tx; + uint64_t oid; int rc; /* verify it doesn't already exist */ - rc = -zap_lookup(o->od_objset.os, parent, name, 8, 1, (void *)zde); + rc = -zap_lookup(o->od_os, parent, name, 8, 1, (void *)zde); if (rc == 0) return -EEXIST; /* create fid-to-dnode index */ - tx = dmu_tx_create(o->od_objset.os); + tx = dmu_tx_create(o->od_os); if (tx == NULL) return -ENOMEM; @@ -174,21 +165,36 @@ osd_oi_create(const struct lu_env *env, struct osd_device *o, return rc; } + oid = zap_create_flags(o->od_os, 0, ZAP_FLAG_HASH64, + DMU_OT_DIRECTORY_CONTENTS, + 14, /* == ZFS fzap_default_block_shift */ + DN_MAX_INDBLKSHIFT, /* indirect block shift */ + DMU_OT_SA, DN_MAX_BONUSLEN, tx); + + rc = -sa_handle_get(o->od_os, oid, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (rc) + goto commit; la->la_valid = LA_MODE | LA_UID | LA_GID; la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; la->la_uid = la->la_gid = 0; - __osd_zap_create(env, &o->od_objset, &db, tx, la, parent, oi_tag, 0); + rc = __osd_attr_init(env, o, sa_hdl, tx, la, parent); + sa_handle_destroy(sa_hdl); + if (rc) + goto commit; - zde->zde_dnode = db->db_object; + zde->zde_dnode = oid; zde->zde_pad = 0; zde->zde_type = IFTODT(S_IFDIR); - rc = -zap_add(o->od_objset.os, parent, name, 8, 1, (void *)zde, tx); + rc = -zap_add(o->od_os, parent, name, 8, 1, (void *)zde, tx); +commit: + if (rc) + dmu_object_free(o->od_os, oid, tx); dmu_tx_commit(tx); - *child = db->db_object; - sa_buf_rele(db, oi_tag); + if (rc == 0) + *child = oid; return rc; } @@ -214,74 +220,72 @@ osd_oi_find_or_create(const struct lu_env *env, struct osd_device *o, * the object is located (tgt index) and it is MDT or OST object. */ int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, struct lu_seq_range *range) + u64 seq, struct lu_seq_range *range) { struct seq_server_site *ss = osd_seq_site(osd); - int rc; - if (fid_is_idif(fid)) { - range->lsr_flags = LU_SEQ_RANGE_OST; - range->lsr_index = fid_idif_ost_idx(fid); + if (fid_seq_is_idif(seq)) { + fld_range_set_ost(range); + range->lsr_index = idif_ost_idx(seq); return 0; } - if (!fid_is_norm(fid)) { - range->lsr_flags = LU_SEQ_RANGE_MDT; + if (!fid_seq_in_fldb(seq)) { + fld_range_set_mdt(range); if (ss != NULL) + /* FIXME: If ss is NULL, it suppose not get lsr_index + * at all */ range->lsr_index = ss->ss_node_id; return 0; } LASSERT(ss != NULL); - range->lsr_flags = -1; - rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), range); - if (rc != 0) { - CERROR("%s can not find "DFID": rc = %d\n", - osd2lu_dev(osd)->ld_obd->obd_name, PFID(fid), rc); - } - return rc; + fld_range_set_any(range); + /* OSD will only do local fld lookup */ + return fld_local_lookup(env, ss->ss_server_fld, seq, range); } int fid_is_on_ost(const struct lu_env *env, struct osd_device *osd, const struct lu_fid *fid) { - struct lu_seq_range *range = &osd_oti_get(env)->oti_seq_range; - int rc; + struct lu_seq_range *range = &osd_oti_get(env)->oti_seq_range; + int rc; ENTRY; if (fid_is_idif(fid)) RETURN(1); - rc = osd_fld_lookup(env, osd, fid, range); + if (unlikely(fid_is_local_file(fid) || fid_is_llog(fid)) || + fid_is_name_llog(fid) || fid_is_quota(fid)) + RETURN(0); + + rc = osd_fld_lookup(env, osd, fid_seq(fid), range); if (rc != 0) { - CERROR("%s: Can not lookup fld for "DFID"\n", - osd2lu_dev(osd)->ld_obd->obd_name, PFID(fid)); - RETURN(rc); + if (rc != -ENOENT) + CERROR("%s: "DFID" lookup failed: rc = %d\n", + osd_name(osd), PFID(fid), rc); + RETURN(0); } - CDEBUG(D_INFO, "fid "DFID" range "DRANGE"\n", PFID(fid), - PRANGE(range)); - - if (range->lsr_flags == LU_SEQ_RANGE_OST) + if (fld_range_is_ost(range)) RETURN(1); RETURN(0); } static struct osd_seq *osd_seq_find_locked(struct osd_seq_list *seq_list, - obd_seq seq) + u64 seq) { struct osd_seq *osd_seq; - cfs_list_for_each_entry(osd_seq, &seq_list->osl_seq_list, os_seq_list) { + list_for_each_entry(osd_seq, &seq_list->osl_seq_list, os_seq_list) { if (osd_seq->os_seq == seq) return osd_seq; } return NULL; } -static struct osd_seq *osd_seq_find(struct osd_seq_list *seq_list, - obd_seq seq) +static struct osd_seq *osd_seq_find(struct osd_seq_list *seq_list, u64 seq) { struct osd_seq *osd_seq; @@ -293,7 +297,7 @@ static struct osd_seq *osd_seq_find(struct osd_seq_list *seq_list, } static struct osd_seq *osd_find_or_add_seq(const struct lu_env *env, - struct osd_device *osd, obd_seq seq) + struct osd_device *osd, u64 seq) { struct osd_seq_list *seq_list = &osd->od_seq_list; struct osd_seq *osd_seq; @@ -320,7 +324,7 @@ static struct osd_seq *osd_find_or_add_seq(const struct lu_env *env, if (osd_seq == NULL) GOTO(out, rc = -ENOMEM); - CFS_INIT_LIST_HEAD(&osd_seq->os_seq_list); + INIT_LIST_HEAD(&osd_seq->os_seq_list); osd_seq->os_seq = seq; /* Init subdir count to be 32, but each seq can have @@ -331,14 +335,9 @@ static struct osd_seq *osd_find_or_add_seq(const struct lu_env *env, if (osd_seq->os_compat_dirs == NULL) GOTO(out, rc = -ENOMEM); - rc = osd_oi_lookup(env, osd, osd->od_root, "O", &oi); - if (rc != 0) { - CERROR("%s: Can not find O: rc = %d\n", osd_name(osd), rc); - GOTO(out, rc); - } - + oi.oi_zapid = osd->od_O_id; sprintf(seq_name, (fid_seq_is_rsvd(seq) || - fid_seq_is_mdt0(seq)) ? LPU64 : LPX64i, + fid_seq_is_mdt0(seq)) ? "%llu" : "%llx", fid_seq_is_idif(seq) ? 0 : seq); rc = osd_oi_find_or_create(env, osd, oi.oi_zapid, seq_name, &odb); @@ -348,19 +347,16 @@ static struct osd_seq *osd_find_or_add_seq(const struct lu_env *env, GOTO(out, rc); } - if (seq == 0) - osd->od_ost_compat_grp0 = odb; - for (i = 0; i < OSD_OST_MAP_SIZE; i++) { sprintf(key, "d%d", i); rc = osd_oi_find_or_create(env, osd, odb, key, &sdb); if (rc) - GOTO(out, osd_seq = ERR_PTR(rc)); + GOTO(out, rc); osd_seq->os_compat_dirs[i] = sdb; } write_lock(&seq_list->osl_seq_list_lock); - cfs_list_add(&osd_seq->os_seq_list, &seq_list->osl_seq_list); + list_add(&osd_seq->os_seq_list, &seq_list->osl_seq_list); write_unlock(&seq_list->osl_seq_list_lock); out: up(&seq_list->osl_seq_init_sem); @@ -382,10 +378,11 @@ out: */ static uint64_t osd_get_idx_for_ost_obj(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, char *buf) + const struct lu_fid *fid, char *buf, int bufsize) { struct osd_seq *osd_seq; unsigned long b; + u64 id; int rc; osd_seq = osd_find_or_add_seq(env, osd, fid_seq(fid)); @@ -394,12 +391,20 @@ osd_get_idx_for_ost_obj(const struct lu_env *env, struct osd_device *osd, PFID(fid)); return PTR_ERR(osd_seq); } - rc = fid_ostid_pack(fid, &osd_oti_get(env)->oti_ostid); - LASSERT(rc == 0); /* we should not get here with IGIF */ - b = osd_oti_get(env)->oti_ostid.oi_id % OSD_OST_MAP_SIZE; + + if (fid_is_last_id(fid)) { + id = 0; + } else { + rc = fid_to_ostid(fid, &osd_oti_get(env)->oti_ostid); + LASSERT(rc == 0); /* we should not get here with IGIF */ + id = ostid_id(&osd_oti_get(env)->oti_ostid); + } + + b = id % OSD_OST_MAP_SIZE; LASSERT(osd_seq->os_compat_dirs[b]); - sprintf(buf, LPU64, osd_oti_get(env)->oti_ostid.oi_id); + if (buf) + snprintf(buf, bufsize, "%llu", id); return osd_seq->os_compat_dirs[b]; } @@ -424,30 +429,29 @@ osd_get_idx_for_fid(struct osd_device *osd, const struct lu_fid *fid, LASSERT(osd->od_oi_table != NULL); oi = osd->od_oi_table[fid_seq(fid) & (osd->od_oi_count - 1)]; - osd_fid2str(buf, fid); + if (buf) + osd_fid2str(buf, fid); return oi->oi_zapid; } uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, char *buf) + const struct lu_fid *fid, char *buf, int bufsize) { uint64_t zapid; LASSERT(fid); - LASSERT(buf); - if (fid_is_on_ost(env, osd, fid) == 1) { - zapid = osd_get_idx_for_ost_obj(env, osd, fid, buf); - } else if (fid_is_last_id(fid)) { - zapid = osd->od_ost_compat_grp0; + if (fid_is_on_ost(env, osd, fid) == 1 || fid_seq(fid) == FID_SEQ_ECHO) { + zapid = osd_get_idx_for_ost_obj(env, osd, fid, buf, bufsize); } else if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) { /* special objects with fixed known fids get their name */ char *name = oid2name(fid_oid(fid)); if (name) { zapid = osd->od_root; - strcpy(buf, name); + if (buf) + strncpy(buf, name, bufsize); if (fid_is_acct(fid)) zapid = MASTER_NODE_OBJ; } else { @@ -476,7 +480,7 @@ int osd_fid_lookup(const struct lu_env *env, struct osd_device *dev, int rc = 0; ENTRY; - if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) + if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT)) RETURN(-ENOENT); if (unlikely(fid_is_acct(fid))) { @@ -487,15 +491,19 @@ int osd_fid_lookup(const struct lu_env *env, struct osd_device *dev, } else if (unlikely(fid_is_fs_root(fid))) { *oid = dev->od_root; } else { - zapid = osd_get_name_n_idx(env, dev, fid, buf); - - rc = -zap_lookup(dev->od_objset.os, zapid, buf, + zapid = osd_get_name_n_idx(env, dev, fid, buf, + sizeof(info->oti_buf)); + rc = -zap_lookup(dev->od_os, zapid, buf, 8, 1, &info->oti_zde); if (rc) RETURN(rc); *oid = info->oti_zde.lzd_reg.zde_dnode; } + if (rc == 0) + osd_dmu_prefetch(dev->od_os, *oid, 0, 0, 0, + ZIO_PRIORITY_ASYNC_READ); + RETURN(rc); } @@ -511,6 +519,8 @@ osd_oi_remove_table(const struct lu_env *env, struct osd_device *o, int key) oi = o->od_oi_table[key]; if (oi) { + if (oi->oi_db) + sa_buf_rele(oi->oi_db, osd_obj_tag); OBD_FREE_PTR(oi); o->od_oi_table[key] = NULL; } @@ -540,6 +550,7 @@ osd_oi_add_table(const struct lu_env *env, struct osd_device *o, } o->od_oi_table[key] = oi; + __osd_obj2dbuf(env, o->od_os, oi->oi_zapid, &oi->oi_db); return 0; } @@ -623,24 +634,15 @@ osd_oi_probe(const struct lu_env *env, struct osd_device *o, int *count) RETURN(0); } -static void osd_ost_seq_init(const struct lu_env *env, struct osd_device *osd) -{ - struct osd_seq_list *osl = &osd->od_seq_list; - - CFS_INIT_LIST_HEAD(&osl->osl_seq_list); - rwlock_init(&osl->osl_seq_list_lock); - sema_init(&osl->osl_seq_init_sem, 1); -} - static void osd_ost_seq_fini(const struct lu_env *env, struct osd_device *osd) { struct osd_seq_list *osl = &osd->od_seq_list; struct osd_seq *osd_seq, *tmp; write_lock(&osl->osl_seq_list_lock); - cfs_list_for_each_entry_safe(osd_seq, tmp, &osl->osl_seq_list, - os_seq_list) { - cfs_list_del(&osd_seq->os_seq_list); + list_for_each_entry_safe(osd_seq, tmp, &osl->osl_seq_list, + os_seq_list) { + list_del(&osd_seq->os_seq_list); OBD_FREE(osd_seq->os_compat_dirs, sizeof(uint64_t) * osd_seq->os_subdir_count); OBD_FREE(osd_seq, sizeof(*osd_seq)); @@ -664,7 +666,8 @@ osd_oi_init_compat(const struct lu_env *env, struct osd_device *o) if (rc) RETURN(rc); - osd_ost_seq_init(env, o); + o->od_O_id = sdb; + /* Create on-disk indexes to maintain per-UID/GID inode usage. * Those new indexes are created in the top-level ZAP outside the * namespace in order not to confuse ZPL which might interpret those @@ -684,107 +687,6 @@ osd_oi_init_compat(const struct lu_env *env, struct osd_device *o) RETURN(rc); } -static char *root2convert = "ROOT"; -/* - * due to DNE requirements we have to change sequence of /ROOT object - * so that it doesn't belong to the local sequence FID_SEQ_LOCAL_FILE - * but a normal sequence living on MDS#0 - * this is the sole purpose of this function. - * - * This is only needed for pre-production 2.4 ZFS filesystems, and - * can be removed in the future. - */ -int osd_convert_root_to_new_seq(const struct lu_env *env, - struct osd_device *o) -{ - struct luz_direntry *lze = &osd_oti_get(env)->oti_zde; - char *buf = osd_oti_get(env)->oti_str; - struct lu_fid newfid; - uint64_t zapid; - dmu_tx_t *tx = NULL; - int rc; - ENTRY; - - /* ignore OSTs */ - if (strstr(o->od_svname, "MDT") == NULL) - RETURN(0); - - /* lookup /ROOT */ - rc = -zap_lookup(o->od_objset.os, o->od_root, root2convert, 8, - sizeof(*lze) / 8, (void *)lze); - /* doesn't exist or let actual user to handle the error */ - if (rc) - RETURN(0); - - CDEBUG(D_OTHER, "%s: /ROOT -> "DFID" -> "LPU64"\n", o->od_svname, - PFID(&lze->lzd_fid), (long long int) lze->lzd_reg.zde_dnode); - - /* already right one? */ - if (fid_seq(&lze->lzd_fid) == FID_SEQ_ROOT) - return 0; - - tx = dmu_tx_create(o->od_objset.os); - if (tx == NULL) - return -ENOMEM; - - dmu_tx_hold_bonus(tx, o->od_root); - - /* declare delete/insert of the name */ - dmu_tx_hold_zap(tx, o->od_root, TRUE, root2convert); - dmu_tx_hold_zap(tx, o->od_root, FALSE, root2convert); - - /* declare that we'll remove object from fid-dnode mapping */ - zapid = osd_get_name_n_idx(env, o, &lze->lzd_fid, buf); - dmu_tx_hold_bonus(tx, zapid); - dmu_tx_hold_zap(tx, zapid, FALSE, buf); - - /* declare that we'll add object to fid-dnode mapping */ - newfid.f_seq = FID_SEQ_ROOT; - newfid.f_oid = 1; - newfid.f_ver = 0; - zapid = osd_get_name_n_idx(env, o, &newfid, buf); - dmu_tx_hold_bonus(tx, zapid); - dmu_tx_hold_zap(tx, zapid, TRUE, buf); - - rc = -dmu_tx_assign(tx, TXG_WAIT); - if (rc) - GOTO(err, rc); - - rc = -zap_remove(o->od_objset.os, o->od_root, root2convert, tx); - if (rc) - GOTO(err, rc); - - /* remove from OI */ - zapid = osd_get_name_n_idx(env, o, &lze->lzd_fid, buf); - rc = -zap_remove(o->od_objset.os, zapid, buf, tx); - if (rc) - GOTO(err, rc); - - lze->lzd_fid = newfid; - rc = -zap_add(o->od_objset.os, o->od_root, root2convert, - 8, sizeof(*lze) / 8, (void *)lze, tx); - if (rc) - GOTO(err, rc); - - /* add to OI with the new fid */ - zapid = osd_get_name_n_idx(env, o, &newfid, buf); - rc = -zap_add(o->od_objset.os, zapid, buf, 8, 1, &lze->lzd_reg, tx); - if (rc) - GOTO(err, rc); - - - /* LMA will be updated in mdd_compat_fixes */ - dmu_tx_commit(tx); - - RETURN(rc); - -err: - if (tx) - dmu_tx_abort(tx); - CERROR("%s: can't convert to new fid: rc = %d\n", o->od_svname, rc); - RETURN(rc); -} - /** * Initialize the OIs by either opening or creating them as needed. */ @@ -862,5 +764,3 @@ int osd_options_init(void) return 0; } - -