X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_oi.c;h=0802bf46699d99abf54b5bb25c1f555734e8d3c1;hp=427bde0afae4f4d7b5b4b29f3eb53a55e66b47b9;hb=60270c6488b01db756eb216548f83f2826972854;hpb=aafe85fac4aa0589185048c57a0cce2b8c6618ee diff --git a/lustre/osd-zfs/osd_oi.c b/lustre/osd-zfs/osd_oi.c index 427bde0..0802bf4 100644 --- a/lustre/osd-zfs/osd_oi.c +++ b/lustre/osd-zfs/osd_oi.c @@ -26,10 +26,8 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. - */ -/* - * Copyright (c) 2012, Intel Corporation. - * Use is subject to license terms. + * + * Copyright (c) 2012, 2015, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -43,9 +41,6 @@ * Author: Di Wang */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_OSD #include @@ -72,8 +67,6 @@ #include #include -static char *oi_tag = "osd_mount, oi"; - #define OSD_OI_FID_NR (1UL << 7) #define OSD_OI_FID_NR_MAX (1UL << OSD_OI_FID_OID_BITS_MAX) unsigned int osd_oi_count = OSD_OI_FID_NR; @@ -96,14 +89,12 @@ static const struct named_oid oids[] = { { MGS_CONFIGS_OID, NULL /*MOUNT_CONFIGS_DIR*/ }, { FID_SEQ_SRV_OID, "seq_srv" }, { FID_SEQ_CTL_OID, "seq_ctl" }, - { MDD_CAPA_KEYS_OID, NULL /*CAPA_KEYS*/ }, { FLD_INDEX_OID, "fld" }, { MDD_LOV_OBJ_OID, LOV_OBJID }, { OFD_HEALTH_CHECK_OID, HEALTH_CHECK }, { ACCT_USER_OID, "acct_usr_inode" }, { ACCT_GROUP_OID, "acct_grp_inode" }, - { MDD_ROOT_INDEX_OID, NULL }, - { MDD_ORPHAN_OID, NULL }, + { REPLY_DATA_OID, REPLY_DATA }, { 0, NULL } }; @@ -129,14 +120,17 @@ osd_oi_lookup(const struct lu_env *env, struct osd_device *o, struct zpl_direntry *zde = &osd_oti_get(env)->oti_zde.lzd_reg; int rc; - rc = -zap_lookup(o->od_objset.os, parent, name, 8, 1, (void *)zde); + rc = -zap_lookup(o->od_os, parent, name, 8, 1, (void *)zde); if (rc) return rc; - strncpy(oi->oi_name, name, OSD_OI_NAME_SIZE - 1); + rc = strlcpy(oi->oi_name, name, sizeof(oi->oi_name)); + if (rc >= sizeof(oi->oi_name)) + return -E2BIG; + oi->oi_zapid = zde->zde_dnode; - return rc; + return 0; } /** @@ -153,12 +147,12 @@ osd_oi_create(const struct lu_env *env, struct osd_device *o, int rc; /* verify it doesn't already exist */ - rc = -zap_lookup(o->od_objset.os, parent, name, 8, 1, (void *)zde); + rc = -zap_lookup(o->od_os, parent, name, 8, 1, (void *)zde); if (rc == 0) return -EEXIST; /* create fid-to-dnode index */ - tx = dmu_tx_create(o->od_objset.os); + tx = dmu_tx_create(o->od_os); if (tx == NULL) return -ENOMEM; @@ -177,18 +171,18 @@ osd_oi_create(const struct lu_env *env, struct osd_device *o, la->la_valid = LA_MODE | LA_UID | LA_GID; la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; la->la_uid = la->la_gid = 0; - __osd_zap_create(env, &o->od_objset, &db, tx, la, parent, oi_tag, 0); + __osd_zap_create(env, o, &db, tx, la, parent, 0); zde->zde_dnode = db->db_object; zde->zde_pad = 0; zde->zde_type = IFTODT(S_IFDIR); - rc = -zap_add(o->od_objset.os, parent, name, 8, 1, (void *)zde, tx); + rc = -zap_add(o->od_os, parent, name, 8, 1, (void *)zde, tx); dmu_tx_commit(tx); *child = db->db_object; - sa_buf_rele(db, oi_tag); + sa_buf_rele(db, osd_obj_tag); return rc; } @@ -214,74 +208,72 @@ osd_oi_find_or_create(const struct lu_env *env, struct osd_device *o, * the object is located (tgt index) and it is MDT or OST object. */ int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, struct lu_seq_range *range) + u64 seq, struct lu_seq_range *range) { struct seq_server_site *ss = osd_seq_site(osd); - int rc; - if (fid_is_idif(fid)) { - range->lsr_flags = LU_SEQ_RANGE_OST; - range->lsr_index = fid_idif_ost_idx(fid); + if (fid_seq_is_idif(seq)) { + fld_range_set_ost(range); + range->lsr_index = idif_ost_idx(seq); return 0; } - if (!fid_is_norm(fid)) { - range->lsr_flags = LU_SEQ_RANGE_MDT; + if (!fid_seq_in_fldb(seq)) { + fld_range_set_mdt(range); if (ss != NULL) + /* FIXME: If ss is NULL, it suppose not get lsr_index + * at all */ range->lsr_index = ss->ss_node_id; return 0; } LASSERT(ss != NULL); - range->lsr_flags = -1; - rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), range); - if (rc != 0) { - CERROR("%s can not find "DFID": rc = %d\n", - osd2lu_dev(osd)->ld_obd->obd_name, PFID(fid), rc); - } - return rc; + fld_range_set_any(range); + /* OSD will only do local fld lookup */ + return fld_local_lookup(env, ss->ss_server_fld, seq, range); } int fid_is_on_ost(const struct lu_env *env, struct osd_device *osd, const struct lu_fid *fid) { - struct lu_seq_range *range = &osd_oti_get(env)->oti_seq_range; - int rc; + struct lu_seq_range *range = &osd_oti_get(env)->oti_seq_range; + int rc; ENTRY; if (fid_is_idif(fid)) RETURN(1); - rc = osd_fld_lookup(env, osd, fid, range); + if (unlikely(fid_is_local_file(fid) || fid_is_llog(fid)) || + fid_is_name_llog(fid) || fid_is_quota(fid)) + RETURN(0); + + rc = osd_fld_lookup(env, osd, fid_seq(fid), range); if (rc != 0) { - CERROR("%s: Can not lookup fld for "DFID"\n", - osd2lu_dev(osd)->ld_obd->obd_name, PFID(fid)); - RETURN(rc); + if (rc != -ENOENT) + CERROR("%s: "DFID" lookup failed: rc = %d\n", + osd_name(osd), PFID(fid), rc); + RETURN(0); } - CDEBUG(D_INFO, "fid "DFID" range "DRANGE"\n", PFID(fid), - PRANGE(range)); - - if (range->lsr_flags == LU_SEQ_RANGE_OST) + if (fld_range_is_ost(range)) RETURN(1); RETURN(0); } static struct osd_seq *osd_seq_find_locked(struct osd_seq_list *seq_list, - obd_seq seq) + u64 seq) { struct osd_seq *osd_seq; - cfs_list_for_each_entry(osd_seq, &seq_list->osl_seq_list, os_seq_list) { + list_for_each_entry(osd_seq, &seq_list->osl_seq_list, os_seq_list) { if (osd_seq->os_seq == seq) return osd_seq; } return NULL; } -static struct osd_seq *osd_seq_find(struct osd_seq_list *seq_list, - obd_seq seq) +static struct osd_seq *osd_seq_find(struct osd_seq_list *seq_list, u64 seq) { struct osd_seq *osd_seq; @@ -293,7 +285,7 @@ static struct osd_seq *osd_seq_find(struct osd_seq_list *seq_list, } static struct osd_seq *osd_find_or_add_seq(const struct lu_env *env, - struct osd_device *osd, obd_seq seq) + struct osd_device *osd, u64 seq) { struct osd_seq_list *seq_list = &osd->od_seq_list; struct osd_seq *osd_seq; @@ -320,7 +312,7 @@ static struct osd_seq *osd_find_or_add_seq(const struct lu_env *env, if (osd_seq == NULL) GOTO(out, rc = -ENOMEM); - CFS_INIT_LIST_HEAD(&osd_seq->os_seq_list); + INIT_LIST_HEAD(&osd_seq->os_seq_list); osd_seq->os_seq = seq; /* Init subdir count to be 32, but each seq can have @@ -331,12 +323,7 @@ static struct osd_seq *osd_find_or_add_seq(const struct lu_env *env, if (osd_seq->os_compat_dirs == NULL) GOTO(out, rc = -ENOMEM); - rc = osd_oi_lookup(env, osd, osd->od_root, "O", &oi); - if (rc != 0) { - CERROR("%s: Can not find O: rc = %d\n", osd_name(osd), rc); - GOTO(out, rc); - } - + oi.oi_zapid = osd->od_O_id; sprintf(seq_name, (fid_seq_is_rsvd(seq) || fid_seq_is_mdt0(seq)) ? LPU64 : LPX64i, fid_seq_is_idif(seq) ? 0 : seq); @@ -348,19 +335,16 @@ static struct osd_seq *osd_find_or_add_seq(const struct lu_env *env, GOTO(out, rc); } - if (seq == 0) - osd->od_ost_compat_grp0 = odb; - for (i = 0; i < OSD_OST_MAP_SIZE; i++) { sprintf(key, "d%d", i); rc = osd_oi_find_or_create(env, osd, odb, key, &sdb); if (rc) - GOTO(out, osd_seq = ERR_PTR(rc)); + GOTO(out, rc); osd_seq->os_compat_dirs[i] = sdb; } write_lock(&seq_list->osl_seq_list_lock); - cfs_list_add(&osd_seq->os_seq_list, &seq_list->osl_seq_list); + list_add(&osd_seq->os_seq_list, &seq_list->osl_seq_list); write_unlock(&seq_list->osl_seq_list_lock); out: up(&seq_list->osl_seq_init_sem); @@ -386,6 +370,7 @@ osd_get_idx_for_ost_obj(const struct lu_env *env, struct osd_device *osd, { struct osd_seq *osd_seq; unsigned long b; + u64 id; int rc; osd_seq = osd_find_or_add_seq(env, osd, fid_seq(fid)); @@ -394,12 +379,19 @@ osd_get_idx_for_ost_obj(const struct lu_env *env, struct osd_device *osd, PFID(fid)); return PTR_ERR(osd_seq); } - rc = fid_ostid_pack(fid, &osd_oti_get(env)->oti_ostid); - LASSERT(rc == 0); /* we should not get here with IGIF */ - b = osd_oti_get(env)->oti_ostid.oi_id % OSD_OST_MAP_SIZE; + + if (fid_is_last_id(fid)) { + id = 0; + } else { + rc = fid_to_ostid(fid, &osd_oti_get(env)->oti_ostid); + LASSERT(rc == 0); /* we should not get here with IGIF */ + id = ostid_id(&osd_oti_get(env)->oti_ostid); + } + + b = id % OSD_OST_MAP_SIZE; LASSERT(osd_seq->os_compat_dirs[b]); - sprintf(buf, LPU64, osd_oti_get(env)->oti_ostid.oi_id); + sprintf(buf, LPU64, id); return osd_seq->os_compat_dirs[b]; } @@ -437,10 +429,8 @@ uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd, LASSERT(fid); LASSERT(buf); - if (fid_is_on_ost(env, osd, fid) == 1) { + if (fid_is_on_ost(env, osd, fid) == 1 || fid_seq(fid) == FID_SEQ_ECHO) { zapid = osd_get_idx_for_ost_obj(env, osd, fid, buf); - } else if (fid_is_last_id(fid)) { - zapid = osd->od_ost_compat_grp0; } else if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) { /* special objects with fixed known fids get their name */ char *name = oid2name(fid_oid(fid)); @@ -476,7 +466,7 @@ int osd_fid_lookup(const struct lu_env *env, struct osd_device *dev, int rc = 0; ENTRY; - if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) + if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT)) RETURN(-ENOENT); if (unlikely(fid_is_acct(fid))) { @@ -489,13 +479,16 @@ int osd_fid_lookup(const struct lu_env *env, struct osd_device *dev, } else { zapid = osd_get_name_n_idx(env, dev, fid, buf); - rc = -zap_lookup(dev->od_objset.os, zapid, buf, + rc = -zap_lookup(dev->od_os, zapid, buf, 8, 1, &info->oti_zde); if (rc) RETURN(rc); *oid = info->oti_zde.lzd_reg.zde_dnode; } + if (rc == 0) + dmu_prefetch(dev->od_os, *oid, 0, 0); + RETURN(rc); } @@ -511,6 +504,8 @@ osd_oi_remove_table(const struct lu_env *env, struct osd_device *o, int key) oi = o->od_oi_table[key]; if (oi) { + if (oi->oi_db) + sa_buf_rele(oi->oi_db, osd_obj_tag); OBD_FREE_PTR(oi); o->od_oi_table[key] = NULL; } @@ -540,6 +535,7 @@ osd_oi_add_table(const struct lu_env *env, struct osd_device *o, } o->od_oi_table[key] = oi; + __osd_obj2dbuf(env, o->od_os, oi->oi_zapid, &oi->oi_db); return 0; } @@ -623,24 +619,15 @@ osd_oi_probe(const struct lu_env *env, struct osd_device *o, int *count) RETURN(0); } -static void osd_ost_seq_init(const struct lu_env *env, struct osd_device *osd) -{ - struct osd_seq_list *osl = &osd->od_seq_list; - - CFS_INIT_LIST_HEAD(&osl->osl_seq_list); - rwlock_init(&osl->osl_seq_list_lock); - sema_init(&osl->osl_seq_init_sem, 1); -} - static void osd_ost_seq_fini(const struct lu_env *env, struct osd_device *osd) { struct osd_seq_list *osl = &osd->od_seq_list; struct osd_seq *osd_seq, *tmp; write_lock(&osl->osl_seq_list_lock); - cfs_list_for_each_entry_safe(osd_seq, tmp, &osl->osl_seq_list, - os_seq_list) { - cfs_list_del(&osd_seq->os_seq_list); + list_for_each_entry_safe(osd_seq, tmp, &osl->osl_seq_list, + os_seq_list) { + list_del(&osd_seq->os_seq_list); OBD_FREE(osd_seq->os_compat_dirs, sizeof(uint64_t) * osd_seq->os_subdir_count); OBD_FREE(osd_seq, sizeof(*osd_seq)); @@ -664,7 +651,8 @@ osd_oi_init_compat(const struct lu_env *env, struct osd_device *o) if (rc) RETURN(rc); - osd_ost_seq_init(env, o); + o->od_O_id = sdb; + /* Create on-disk indexes to maintain per-UID/GID inode usage. * Those new indexes are created in the top-level ZAP outside the * namespace in order not to confuse ZPL which might interpret those @@ -684,107 +672,6 @@ osd_oi_init_compat(const struct lu_env *env, struct osd_device *o) RETURN(rc); } -static char *root2convert = "ROOT"; -/* - * due to DNE requirements we have to change sequence of /ROOT object - * so that it doesn't belong to the local sequence FID_SEQ_LOCAL_FILE - * but a normal sequence living on MDS#0 - * this is the sole purpose of this function. - * - * This is only needed for pre-production 2.4 ZFS filesystems, and - * can be removed in the future. - */ -int osd_convert_root_to_new_seq(const struct lu_env *env, - struct osd_device *o) -{ - struct luz_direntry *lze = &osd_oti_get(env)->oti_zde; - char *buf = osd_oti_get(env)->oti_str; - struct lu_fid newfid; - uint64_t zapid; - dmu_tx_t *tx = NULL; - int rc; - ENTRY; - - /* ignore OSTs */ - if (strstr(o->od_svname, "MDT") == NULL) - RETURN(0); - - /* lookup /ROOT */ - rc = -zap_lookup(o->od_objset.os, o->od_root, root2convert, 8, - sizeof(*lze) / 8, (void *)lze); - /* doesn't exist or let actual user to handle the error */ - if (rc) - RETURN(0); - - CDEBUG(D_OTHER, "%s: /ROOT -> "DFID" -> "LPU64"\n", o->od_svname, - PFID(&lze->lzd_fid), (long long int) lze->lzd_reg.zde_dnode); - - /* already right one? */ - if (fid_seq(&lze->lzd_fid) == FID_SEQ_ROOT) - return 0; - - tx = dmu_tx_create(o->od_objset.os); - if (tx == NULL) - return -ENOMEM; - - dmu_tx_hold_bonus(tx, o->od_root); - - /* declare delete/insert of the name */ - dmu_tx_hold_zap(tx, o->od_root, TRUE, root2convert); - dmu_tx_hold_zap(tx, o->od_root, FALSE, root2convert); - - /* declare that we'll remove object from fid-dnode mapping */ - zapid = osd_get_name_n_idx(env, o, &lze->lzd_fid, buf); - dmu_tx_hold_bonus(tx, zapid); - dmu_tx_hold_zap(tx, zapid, FALSE, buf); - - /* declare that we'll add object to fid-dnode mapping */ - newfid.f_seq = FID_SEQ_ROOT; - newfid.f_oid = 1; - newfid.f_ver = 0; - zapid = osd_get_name_n_idx(env, o, &newfid, buf); - dmu_tx_hold_bonus(tx, zapid); - dmu_tx_hold_zap(tx, zapid, TRUE, buf); - - rc = -dmu_tx_assign(tx, TXG_WAIT); - if (rc) - GOTO(err, rc); - - rc = -zap_remove(o->od_objset.os, o->od_root, root2convert, tx); - if (rc) - GOTO(err, rc); - - /* remove from OI */ - zapid = osd_get_name_n_idx(env, o, &lze->lzd_fid, buf); - rc = -zap_remove(o->od_objset.os, zapid, buf, tx); - if (rc) - GOTO(err, rc); - - lze->lzd_fid = newfid; - rc = -zap_add(o->od_objset.os, o->od_root, root2convert, - 8, sizeof(*lze) / 8, (void *)lze, tx); - if (rc) - GOTO(err, rc); - - /* add to OI with the new fid */ - zapid = osd_get_name_n_idx(env, o, &newfid, buf); - rc = -zap_add(o->od_objset.os, zapid, buf, 8, 1, &lze->lzd_reg, tx); - if (rc) - GOTO(err, rc); - - - /* LMA will be updated in mdd_compat_fixes */ - dmu_tx_commit(tx); - - RETURN(rc); - -err: - if (tx) - dmu_tx_abort(tx); - CERROR("%s: can't convert to new fid: rc = %d\n", o->od_svname, rc); - RETURN(rc); -} - /** * Initialize the OIs by either opening or creating them as needed. */ @@ -862,5 +749,3 @@ int osd_options_init(void) return 0; } - -