X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_oi.c;h=794035ef0bff27a9a18f44f592e34cf98733098d;hp=e629821120101482d3ac1c775155a71fb731980b;hb=42bedf90159bcaf0c8415f19a821ba11b2957155;hpb=33c7936e9175f5cb151ad0cfc8c9694751eb4430 diff --git a/lustre/osd-zfs/osd_oi.c b/lustre/osd-zfs/osd_oi.c index e629821..794035e 100644 --- a/lustre/osd-zfs/osd_oi.c +++ b/lustre/osd-zfs/osd_oi.c @@ -40,6 +40,7 @@ * * Author: Alex Zhuravlev * Author: Mike Pershin + * Author: Di Wang */ #ifndef EXPORT_SYMTAB @@ -89,7 +90,7 @@ struct named_oid { }; static const struct named_oid oids[] = { - { OFD_LAST_RECV_OID, LAST_RCVD }, + { LAST_RECV_OID, LAST_RCVD }, { OFD_LAST_GROUP_OID, "LAST_GROUP" }, { LLOG_CATALOGS_OID, "CATALOGS" }, { MGS_CONFIGS_OID, NULL /*MOUNT_CONFIGS_DIR*/ }, @@ -98,7 +99,6 @@ static const struct named_oid oids[] = { { MDD_CAPA_KEYS_OID, NULL /*CAPA_KEYS*/ }, { FLD_INDEX_OID, "fld" }, { MDD_LOV_OBJ_OID, LOV_OBJID }, - { MDT_LAST_RECV_OID, LAST_RCVD }, { OFD_HEALTH_CHECK_OID, HEALTH_CHECK }, { ACCT_USER_OID, "acct_usr_inode" }, { ACCT_GROUP_OID, "acct_grp_inode" }, @@ -119,6 +119,262 @@ static char *oid2name(const unsigned long oid) return NULL; } +/** + * Lookup an existing OI by the given name. + */ +static int +osd_oi_lookup(const struct lu_env *env, struct osd_device *o, + uint64_t parent, const char *name, struct osd_oi *oi) +{ + struct zpl_direntry *zde = &osd_oti_get(env)->oti_zde.lzd_reg; + int rc; + + rc = -zap_lookup(o->od_objset.os, parent, name, 8, 1, (void *)zde); + if (rc) + return rc; + + strncpy(oi->oi_name, name, OSD_OI_NAME_SIZE - 1); + oi->oi_zapid = zde->zde_dnode; + + return rc; +} + +/** + * Create a new OI with the given name. + */ +static int +osd_oi_create(const struct lu_env *env, struct osd_device *o, + uint64_t parent, const char *name, uint64_t *child) +{ + struct zpl_direntry *zde = &osd_oti_get(env)->oti_zde.lzd_reg; + struct lu_attr *la = &osd_oti_get(env)->oti_la; + dmu_buf_t *db; + dmu_tx_t *tx; + int rc; + + /* verify it doesn't already exist */ + rc = -zap_lookup(o->od_objset.os, parent, name, 8, 1, (void *)zde); + if (rc == 0) + return -EEXIST; + + /* create fid-to-dnode index */ + tx = dmu_tx_create(o->od_objset.os); + if (tx == NULL) + return -ENOMEM; + + dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 1, NULL); + dmu_tx_hold_bonus(tx, parent); + dmu_tx_hold_zap(tx, parent, TRUE, name); + LASSERT(tx->tx_objset->os_sa); + dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE); + + rc = -dmu_tx_assign(tx, TXG_WAIT); + if (rc) { + dmu_tx_abort(tx); + return rc; + } + + la->la_valid = LA_MODE | LA_UID | LA_GID; + la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; + la->la_uid = la->la_gid = 0; + __osd_zap_create(env, &o->od_objset, &db, tx, la, oi_tag, 0); + + zde->zde_dnode = db->db_object; + zde->zde_pad = 0; + zde->zde_type = IFTODT(S_IFDIR); + + rc = -zap_add(o->od_objset.os, parent, name, 8, 1, (void *)zde, tx); + + dmu_tx_commit(tx); + + *child = db->db_object; + sa_buf_rele(db, oi_tag); + + return rc; +} + +static int +osd_oi_find_or_create(const struct lu_env *env, struct osd_device *o, + uint64_t parent, const char *name, uint64_t *child) +{ + struct osd_oi oi; + int rc; + + rc = osd_oi_lookup(env, o, parent, name, &oi); + if (rc == 0) + *child = oi.oi_zapid; + else if (rc == -ENOENT) + rc = osd_oi_create(env, o, parent, name, child); + + return rc; +} + +/** + * Lookup the target index/flags of the fid, so it will know where + * the object is located (tgt index) and it is MDT or OST object. + */ +int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, + const struct lu_fid *fid, struct lu_seq_range *range) +{ + struct seq_server_site *ss = osd_seq_site(osd); + int rc; + + if (fid_is_idif(fid)) { + range->lsr_flags = LU_SEQ_RANGE_OST; + range->lsr_index = fid_idif_ost_idx(fid); + return 0; + } + + if (!fid_is_norm(fid)) { + range->lsr_flags = LU_SEQ_RANGE_MDT; + if (ss != NULL) + range->lsr_index = ss->ss_node_id; + return 0; + } + + LASSERT(ss != NULL); + range->lsr_flags = -1; + rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), range); + if (rc != 0) { + CERROR("%s can not find "DFID": rc = %d\n", + osd2lu_dev(osd)->ld_obd->obd_name, PFID(fid), rc); + } + return rc; +} + +int fid_is_on_ost(const struct lu_env *env, struct osd_device *osd, + const struct lu_fid *fid) +{ + struct lu_seq_range *range = &osd_oti_get(env)->oti_seq_range; + int rc; + ENTRY; + + if (fid_is_idif(fid)) + RETURN(1); + + rc = osd_fld_lookup(env, osd, fid, range); + if (rc != 0) { + CERROR("%s: Can not lookup fld for "DFID"\n", + osd2lu_dev(osd)->ld_obd->obd_name, PFID(fid)); + RETURN(rc); + } + + CDEBUG(D_INFO, "fid "DFID" range "DRANGE"\n", PFID(fid), + PRANGE(range)); + + if (range->lsr_flags == LU_SEQ_RANGE_OST) + RETURN(1); + + RETURN(0); +} + +static struct osd_seq *osd_seq_find_locked(struct osd_seq_list *seq_list, + obd_seq seq) +{ + struct osd_seq *osd_seq; + + cfs_list_for_each_entry(osd_seq, &seq_list->osl_seq_list, os_seq_list) { + if (osd_seq->os_seq == seq) + return osd_seq; + } + return NULL; +} + +static struct osd_seq *osd_seq_find(struct osd_seq_list *seq_list, + obd_seq seq) +{ + struct osd_seq *osd_seq; + + read_lock(&seq_list->osl_seq_list_lock); + osd_seq = osd_seq_find_locked(seq_list, seq); + read_unlock(&seq_list->osl_seq_list_lock); + + return osd_seq; +} + +static struct osd_seq *osd_find_or_add_seq(const struct lu_env *env, + struct osd_device *osd, obd_seq seq) +{ + struct osd_seq_list *seq_list = &osd->od_seq_list; + struct osd_seq *osd_seq; + char *key = osd_oti_get(env)->oti_buf; + char *seq_name = osd_oti_get(env)->oti_str; + struct osd_oi oi; + uint64_t sdb, odb; + int i; + int rc = 0; + ENTRY; + + osd_seq = osd_seq_find(seq_list, seq); + if (osd_seq != NULL) + RETURN(osd_seq); + + down(&seq_list->osl_seq_init_sem); + /* Check again, in case some one else already add it + * to the list */ + osd_seq = osd_seq_find(seq_list, seq); + if (osd_seq != NULL) + GOTO(out, rc = 0); + + OBD_ALLOC_PTR(osd_seq); + if (osd_seq == NULL) + GOTO(out, rc = -ENOMEM); + + CFS_INIT_LIST_HEAD(&osd_seq->os_seq_list); + osd_seq->os_seq = seq; + + /* Init subdir count to be 32, but each seq can have + * different subdir count */ + osd_seq->os_subdir_count = OSD_OST_MAP_SIZE; + OBD_ALLOC(osd_seq->os_compat_dirs, + sizeof(uint64_t) * osd_seq->os_subdir_count); + if (osd_seq->os_compat_dirs == NULL) + GOTO(out, rc = -ENOMEM); + + rc = osd_oi_lookup(env, osd, osd->od_root, "O", &oi); + if (rc != 0) { + CERROR("%s: Can not find O: rc = %d\n", osd_name(osd), rc); + GOTO(out, rc); + } + + sprintf(seq_name, (fid_seq_is_rsvd(seq) || + fid_seq_is_mdt0(seq)) ? LPU64 : LPX64i, + fid_seq_is_idif(seq) ? 0 : seq); + + rc = osd_oi_find_or_create(env, osd, oi.oi_zapid, seq_name, &odb); + if (rc != 0) { + CERROR("%s: Can not create %s : rc = %d\n", + osd_name(osd), seq_name, rc); + GOTO(out, rc); + } + + if (seq == 0) + osd->od_ost_compat_grp0 = odb; + + for (i = 0; i < OSD_OST_MAP_SIZE; i++) { + sprintf(key, "d%d", i); + rc = osd_oi_find_or_create(env, osd, odb, key, &sdb); + if (rc) + GOTO(out, osd_seq = ERR_PTR(rc)); + osd_seq->os_compat_dirs[i] = sdb; + } + + write_lock(&seq_list->osl_seq_list_lock); + cfs_list_add(&osd_seq->os_seq_list, &seq_list->osl_seq_list); + write_unlock(&seq_list->osl_seq_list_lock); +out: + up(&seq_list->osl_seq_init_sem); + if (rc != 0) { + if (osd_seq != NULL && osd_seq->os_compat_dirs != NULL) + OBD_FREE(osd_seq->os_compat_dirs, + sizeof(uint64_t) * osd_seq->os_subdir_count); + if (osd_seq != NULL) + OBD_FREE_PTR(osd_seq); + osd_seq = ERR_PTR(rc); + } + RETURN(osd_seq); +} + /* * objects w/o a natural reference (unlike a file on a MDS) * are put under a special hierarchy /O//d0..dXX @@ -128,17 +384,24 @@ static uint64_t osd_get_idx_for_ost_obj(const struct lu_env *env, struct osd_device *osd, const struct lu_fid *fid, char *buf) { + struct osd_seq *osd_seq; unsigned long b; int rc; + osd_seq = osd_find_or_add_seq(env, osd, fid_seq(fid)); + if (IS_ERR(osd_seq)) { + CERROR("%s: Can not find seq group "DFID"\n", osd_name(osd), + PFID(fid)); + return PTR_ERR(osd_seq); + } rc = fid_ostid_pack(fid, &osd_oti_get(env)->oti_ostid); LASSERT(rc == 0); /* we should not get here with IGIF */ b = osd_oti_get(env)->oti_ostid.oi_id % OSD_OST_MAP_SIZE; - LASSERT(osd->od_ost_compat_dirs[b]); + LASSERT(osd_seq->os_compat_dirs[b]); sprintf(buf, LPU64, osd_oti_get(env)->oti_ostid.oi_id); - return osd->od_ost_compat_dirs[b]; + return osd_seq->os_compat_dirs[b]; } /* XXX: f_ver is not counted, but may differ too */ @@ -174,7 +437,7 @@ uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd, LASSERT(fid); LASSERT(buf); - if (fid_is_idif(fid)) { + if (fid_is_on_ost(env, osd, fid) == 1) { zapid = osd_get_idx_for_ost_obj(env, osd, fid, buf); } else if (fid_is_last_id(fid)) { zapid = osd->od_ost_compat_grp0; @@ -237,97 +500,6 @@ int osd_fid_lookup(const struct lu_env *env, struct osd_device *dev, } /** - * Lookup an existing OI by the given name. - */ -static int -osd_oi_lookup(const struct lu_env *env, struct osd_device *o, - uint64_t parent, const char *name, struct osd_oi *oi) -{ - struct zpl_direntry *zde = &osd_oti_get(env)->oti_zde.lzd_reg; - int rc; - - rc = -zap_lookup(o->od_objset.os, parent, name, 8, 1, (void *)zde); - if (rc) - return rc; - - strncpy(oi->oi_name, name, OSD_OI_NAME_SIZE - 1); - oi->oi_zapid = zde->zde_dnode; - - return rc; -} - -/** - * Create a new OI with the given name. - */ -static int -osd_oi_create(const struct lu_env *env, struct osd_device *o, - uint64_t parent, const char *name, uint64_t *child) -{ - struct zpl_direntry *zde = &osd_oti_get(env)->oti_zde.lzd_reg; - struct lu_attr *la = &osd_oti_get(env)->oti_la; - dmu_buf_t *db; - dmu_tx_t *tx; - int rc; - - /* verify it doesn't already exist */ - rc = -zap_lookup(o->od_objset.os, parent, name, 8, 1, (void *)zde); - if (rc == 0) - return -EEXIST; - - /* create fid-to-dnode index */ - tx = dmu_tx_create(o->od_objset.os); - if (tx == NULL) - return -ENOMEM; - - dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 1, NULL); - dmu_tx_hold_bonus(tx, parent); - dmu_tx_hold_zap(tx, parent, TRUE, name); - LASSERT(tx->tx_objset->os_sa); - dmu_tx_hold_sa_create(tx, ZFS_SA_BASE_ATTR_SIZE); - - rc = -dmu_tx_assign(tx, TXG_WAIT); - if (rc) { - dmu_tx_abort(tx); - return rc; - } - - la->la_valid = LA_MODE | LA_UID | LA_GID; - la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; - la->la_uid = la->la_gid = 0; - __osd_zap_create(env, &o->od_objset, &db, tx, la, oi_tag, 0); - - zde->zde_dnode = db->db_object; - zde->zde_pad = 0; - zde->zde_type = IFTODT(S_IFDIR); - - rc = -zap_add(o->od_objset.os, parent, name, 8, 1, (void *)zde, tx); - - dmu_tx_commit(tx); - - *child = db->db_object; - sa_buf_rele(db, oi_tag); - - return rc; -} - -static int -osd_oi_find_or_create(const struct lu_env *env, struct osd_device *o, - uint64_t parent, const char *name, uint64_t *child) -{ - struct osd_oi oi; - int rc; - - rc = osd_oi_lookup(env, o, parent, name, &oi); - if (rc == 0) { - *child = oi.oi_zapid; - } else if (rc == -ENOENT) { - rc = osd_oi_create(env, o, parent, name, child); - } - - return rc; -} - -/** * Close an entry in a specific slot. */ static void @@ -451,37 +623,48 @@ osd_oi_probe(const struct lu_env *env, struct osd_device *o, int *count) RETURN(0); } +static void osd_ost_seq_init(const struct lu_env *env, struct osd_device *osd) +{ + struct osd_seq_list *osl = &osd->od_seq_list; + + CFS_INIT_LIST_HEAD(&osl->osl_seq_list); + rwlock_init(&osl->osl_seq_list_lock); + sema_init(&osl->osl_seq_init_sem, 1); +} + +static void osd_ost_seq_fini(const struct lu_env *env, struct osd_device *osd) +{ + struct osd_seq_list *osl = &osd->od_seq_list; + struct osd_seq *osd_seq, *tmp; + + write_lock(&osl->osl_seq_list_lock); + cfs_list_for_each_entry_safe(osd_seq, tmp, &osl->osl_seq_list, + os_seq_list) { + cfs_list_del(&osd_seq->os_seq_list); + OBD_FREE(osd_seq->os_compat_dirs, + sizeof(uint64_t) * osd_seq->os_subdir_count); + OBD_FREE(osd_seq, sizeof(*osd_seq)); + } + write_unlock(&osl->osl_seq_list_lock); + + return; +} + /** * Create /O subdirectory to map legacy OST objects for compatibility. */ static int osd_oi_init_compat(const struct lu_env *env, struct osd_device *o) { - char *key = osd_oti_get(env)->oti_buf; uint64_t odb, sdb; - int i, rc; + int rc; ENTRY; rc = osd_oi_find_or_create(env, o, o->od_root, "O", &sdb); if (rc) RETURN(rc); - /* create /O/0 subdirectory to map legacy OST objects */ - rc = osd_oi_find_or_create(env, o, sdb, "0", &odb); - if (rc) - RETURN(rc); - - o->od_ost_compat_grp0 = odb; - - for (i = 0; i < OSD_OST_MAP_SIZE; i++) { - sprintf(key, "d%d", i); - rc = osd_oi_find_or_create(env, o, odb, key, &sdb); - if (rc) - RETURN(rc); - - o->od_ost_compat_dirs[i] = sdb; - } - + osd_ost_seq_init(env, o); /* Create on-disk indexes to maintain per-UID/GID inode usage. * Those new indexes are created in the top-level ZAP outside the * namespace in order not to confuse ZPL which might interpret those @@ -551,6 +734,8 @@ void osd_oi_fini(const struct lu_env *env, struct osd_device *o) { ENTRY; + osd_ost_seq_fini(env, o); + if (o->od_oi_table != NULL) { (void) osd_oi_close_table(env, o); OBD_FREE(o->od_oi_table,