X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_index.c;h=92f22da7b4b6d4e101e2615dd5c934923e3a83e4;hb=90a1b772d26c86c8c08317bc594372500273cb0d;hp=92a272389dce762bf1ea6fb4351238f2325fab79;hpb=2acb75c36511aca9f30ca381562dc8f1ea451bd4;p=fs%2Flustre-release.git diff --git a/lustre/osd-zfs/osd_index.c b/lustre/osd-zfs/osd_index.c index 92a2723..92f22da 100644 --- a/lustre/osd-zfs/osd_index.c +++ b/lustre/osd-zfs/osd_index.c @@ -28,7 +28,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2012, Intel Corporation. + * Copyright (c) 2012, 2013, Intel Corporation. * Use is subject to license terms. */ /* @@ -48,7 +48,6 @@ #include #include -#include #include #include #include @@ -190,13 +189,100 @@ static inline void osd_it_append_attrs(struct lu_dirent *ent, __u32 attr, len = (len + align) & ~align; lt = (void *)ent->lde_name + len; - lt->lt_type = cpu_to_le16(CFS_DTTOIF(type)); + lt->lt_type = cpu_to_le16(DTTOIF(type)); ent->lde_attrs |= LUDA_TYPE; } ent->lde_attrs = cpu_to_le32(ent->lde_attrs); } +/* + * as we don't know FID, we can't use LU object, so this function + * partially duplicate __osd_xattr_get() which is built around + * LU-object and uses it to cache data like regular EA dnode, etc + */ +static int osd_find_parent_by_dnode(const struct lu_env *env, + struct dt_object *o, + struct lu_fid *fid) +{ + struct lustre_mdt_attrs *lma; + udmu_objset_t *uos = &osd_obj2dev(osd_dt_obj(o))->od_objset; + struct lu_buf buf; + sa_handle_t *sa_hdl; + nvlist_t *nvbuf = NULL; + uchar_t *value; + uint64_t dnode; + int rc, size; + ENTRY; + + /* first of all, get parent dnode from own attributes */ + LASSERT(osd_dt_obj(o)->oo_db); + rc = -sa_handle_get(uos->os, osd_dt_obj(o)->oo_db->db_object, + NULL, SA_HDL_PRIVATE, &sa_hdl); + if (rc) + RETURN(rc); + + dnode = ZFS_NO_OBJECT; + rc = -sa_lookup(sa_hdl, SA_ZPL_PARENT(uos), &dnode, 8); + sa_handle_destroy(sa_hdl); + if (rc) + RETURN(rc); + + /* now get EA buffer */ + rc = __osd_xattr_load(uos, dnode, &nvbuf); + if (rc) + GOTO(regular, rc); + + /* XXX: if we get that far.. should we cache the result? */ + + /* try to find LMA attribute */ + LASSERT(nvbuf != NULL); + rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA, &value, &size); + if (rc == 0 && size >= sizeof(*lma)) { + lma = (struct lustre_mdt_attrs *)value; + lustre_lma_swab(lma); + *fid = lma->lma_self_fid; + GOTO(out, rc = 0); + } + +regular: + /* no LMA attribute in SA, let's try regular EA */ + + /* first of all, get parent dnode storing regular EA */ + rc = -sa_handle_get(uos->os, dnode, NULL, SA_HDL_PRIVATE, &sa_hdl); + if (rc) + GOTO(out, rc); + + dnode = ZFS_NO_OBJECT; + rc = -sa_lookup(sa_hdl, SA_ZPL_XATTR(uos), &dnode, 8); + sa_handle_destroy(sa_hdl); + if (rc) + GOTO(out, rc); + + CLASSERT(sizeof(*lma) <= sizeof(osd_oti_get(env)->oti_buf)); + buf.lb_buf = osd_oti_get(env)->oti_buf; + buf.lb_len = sizeof(osd_oti_get(env)->oti_buf); + + /* now try to find LMA */ + rc = __osd_xattr_get_large(env, uos, dnode, &buf, + XATTR_NAME_LMA, &size); + if (rc == 0 && size >= sizeof(*lma)) { + lma = buf.lb_buf; + lustre_lma_swab(lma); + *fid = lma->lma_self_fid; + GOTO(out, rc = 0); + } else if (rc < 0) { + GOTO(out, rc); + } else { + GOTO(out, rc = -EIO); + } + +out: + if (nvbuf != NULL) + nvlist_free(nvbuf); + RETURN(rc); +} + static int osd_find_parent_fid(const struct lu_env *env, struct dt_object *o, struct lu_fid *fid) { @@ -245,6 +331,25 @@ static int osd_find_parent_fid(const struct lu_env *env, struct dt_object *o, out: if (buf.lb_buf != osd_oti_get(env)->oti_buf) OBD_FREE(buf.lb_buf, buf.lb_len); + +#if 0 + /* this block can be enabled for additional verification + * it's trying to match FID from LinkEA vs. FID from LMA */ + if (rc == 0) { + struct lu_fid fid2; + int rc2; + rc2 = osd_find_parent_by_dnode(env, o, &fid2); + if (rc2 == 0) + if (lu_fid_eq(fid, &fid2) == 0) + CERROR("wrong parent: "DFID" != "DFID"\n", + PFID(fid), PFID(&fid2)); + } +#endif + + /* no LinkEA is found, let's try to find the fid in parent's LMA */ + if (unlikely(rc != 0)) + rc = osd_find_parent_by_dnode(env, o, fid); + RETURN(rc); } @@ -370,6 +475,27 @@ static inline void osd_object_put(const struct lu_env *env, lu_object_put(env, &obj->oo_dt.do_lu); } +static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd, + struct lu_fid *fid) +{ + struct lu_seq_range *range = &osd_oti_get(env)->oti_seq_range; + struct seq_server_site *ss = osd_seq_site(osd); + int rc; + ENTRY; + + if (!fid_is_norm(fid) && !fid_is_root(fid)) + RETURN(0); + + rc = osd_fld_lookup(env, osd, fid, range); + if (rc != 0) { + CERROR("%s: Can not lookup fld for "DFID"\n", + osd_name(osd), PFID(fid)); + RETURN(rc); + } + + RETURN(ss->ss_node_id != range->lsr_index); +} + /** * Inserts (key, value) pair in \a directory object. * @@ -393,7 +519,7 @@ static int osd_dir_insert(const struct lu_env *env, struct dt_object *dt, struct osd_device *osd = osd_obj2dev(parent); struct lu_fid *fid = (struct lu_fid *)rec; struct osd_thandle *oh; - struct osd_object *child; + struct osd_object *child = NULL; __u32 attr; char *name = (char *)key; int rc; @@ -408,56 +534,60 @@ static int osd_dir_insert(const struct lu_env *env, struct dt_object *dt, LASSERT(th != NULL); oh = container_of0(th, struct osd_thandle, ot_super); - child = osd_object_find(env, dt, fid); - if (IS_ERR(child)) - RETURN(PTR_ERR(child)); - -/* - * to simulate old Orion setups with ./.. stored in the directories - */ -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 3, 91, 0) -#define OSD_ZFS_INSERT_DOTS_FOR_TESTING__ -#endif + rc = osd_remote_fid(env, osd, fid); + if (rc < 0) { + CERROR("%s: Can not find object "DFID": rc = %d\n", + osd->od_svname, PFID(fid), rc); + RETURN(rc); + } - LASSERT(child->oo_db); - if (name[0] == '.') { - if (name[1] == 0) { - /* do not store ".", instead generate it - * during iteration */ -#ifndef OSD_ZFS_INSERT_DOTS_FOR_TESTING - GOTO(out, rc = 0); -#endif - } else if (name[1] == '.' && name[2] == 0) { - /* update parent dnode in the child. - * later it will be used to generate ".." */ - udmu_objset_t *uos = &osd->od_objset; - rc = osd_object_sa_update(child, - SA_ZPL_PARENT(uos), - &parent->oo_db->db_object, - 8, oh); - -#ifndef OSD_ZFS_INSERT_DOTS_FOR_TESTING - GOTO(out, rc); -#endif + if (unlikely(rc == 1)) { + /* Insert remote entry */ + memset(&oti->oti_zde.lzd_reg, 0, sizeof(oti->oti_zde.lzd_reg)); + oti->oti_zde.lzd_reg.zde_type = IFTODT(S_IFDIR & S_IFMT); + } else { + /* + * To simulate old Orion setups with ./.. stored in the + * directories + */ + /* Insert local entry */ + child = osd_object_find(env, dt, fid); + if (IS_ERR(child)) + RETURN(PTR_ERR(child)); + + LASSERT(child->oo_db); + if (name[0] == '.') { + if (name[1] == 0) { + /* do not store ".", instead generate it + * during iteration */ + GOTO(out, rc = 0); + } else if (name[1] == '.' && name[2] == 0) { + /* update parent dnode in the child. + * later it will be used to generate ".." */ + udmu_objset_t *uos = &osd->od_objset; + rc = osd_object_sa_update(parent, + SA_ZPL_PARENT(uos), + &child->oo_db->db_object, + 8, oh); + GOTO(out, rc); + } } + CLASSERT(sizeof(oti->oti_zde.lzd_reg) == 8); + CLASSERT(sizeof(oti->oti_zde) % 8 == 0); + attr = child->oo_dt.do_lu.lo_header ->loh_attr; + oti->oti_zde.lzd_reg.zde_type = IFTODT(attr & S_IFMT); + oti->oti_zde.lzd_reg.zde_dnode = child->oo_db->db_object; } - CLASSERT(sizeof(oti->oti_zde.lzd_reg) == 8); - CLASSERT(sizeof(oti->oti_zde) % 8 == 0); - attr = child->oo_dt.do_lu.lo_header ->loh_attr; - oti->oti_zde.lzd_reg.zde_type = IFTODT(attr & S_IFMT); - oti->oti_zde.lzd_reg.zde_dnode = child->oo_db->db_object; oti->oti_zde.lzd_fid = *fid; - /* Insert (key,oid) into ZAP */ rc = -zap_add(osd->od_objset.os, parent->oo_db->db_object, (char *)key, 8, sizeof(oti->oti_zde) / 8, (void *)&oti->oti_zde, oh->ot_tx); -#ifndef OSD_ZFS_INSERT_DOTS_FOR_TESTING out: -#endif - osd_object_put(env, child); + if (child != NULL) + osd_object_put(env, child); RETURN(rc); } @@ -503,10 +633,9 @@ static int osd_dir_delete(const struct lu_env *env, struct dt_object *dt, LASSERT(th != NULL); oh = container_of0(th, struct osd_thandle, ot_super); -#ifndef OSD_ZFS_INSERT_DOTS_FOR_TESTING /* - * in Orion . and .. were stored in the directory (not generated up on - * request as now. we preserve them for backward compatibility + * In Orion . and .. were stored in the directory (not generated upon + * request as now). we preserve them for backward compatibility */ if (name[0] == '.') { if (name[1] == 0) { @@ -515,7 +644,6 @@ static int osd_dir_delete(const struct lu_env *env, struct dt_object *dt, RETURN(0); } } -#endif /* Remove key from the ZAP */ rc = -zap_remove(osd->od_objset.os, zap_db->db_object, @@ -702,7 +830,7 @@ static struct dt_key *osd_dir_it_key(const struct lu_env *env, strcpy(it->ozi_name, za->za_name); -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 3, 91, 0) +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 3, 90, 0) if (za->za_name[0] == '.') { if (za->za_name[1] == 0 || (za->za_name[1] == '.' && za->za_name[2] == 0)) { @@ -735,7 +863,7 @@ static int osd_dir_it_key_size(const struct lu_env *env, const struct dt_it *di) if ((rc = -zap_cursor_retrieve(it->ozi_zc, za)) == 0) rc = strlen(za->za_name); -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 3, 99, 0) +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 3, 90, 0) if (rc == 0 && za->za_name[0] == '.') { if (za->za_name[1] == 0 || (za->za_name[1] == '.' && za->za_name[2] == 0)) { @@ -859,14 +987,10 @@ static int osd_dir_it_load(const struct lu_env *env, int rc; ENTRY; - if (it->ozi_pos != 0) { - /* the cursor wasn't at the beginning - * so we should reset ZAP cursor as well */ - udmu_zap_cursor_fini(it->ozi_zc); - if (udmu_zap_cursor_init(&it->ozi_zc, &osd->od_objset, - obj->oo_db->db_object, hash)) - RETURN(-ENOMEM); - } + udmu_zap_cursor_fini(it->ozi_zc); + if (udmu_zap_cursor_init(&it->ozi_zc, &osd->od_objset, + obj->oo_db->db_object, hash)) + RETURN(-ENOMEM); if (hash <= 2) { it->ozi_pos = hash;