From 360c7908539f7443abb890717f4e2666dd2e522d Mon Sep 17 00:00:00 2001 From: nikita Date: Sat, 9 Dec 2006 20:51:32 +0000 Subject: [PATCH] mdd,osd,libiam,fid,lu,mkfs: store fids on disk as variable sized records. --- lustre/fid/fid_lib.c | 31 +------------ lustre/include/lustre/lustre_idl.h | 57 ++++++++++++++++++++--- lustre/include/lustre_fid.h | 3 +- lustre/mdd/mdd_dir.c | 13 +++--- lustre/mdd/mdd_internal.h | 1 + lustre/mdd/mdd_object.c | 18 ++++---- lustre/obdclass/dt_object.c | 13 ++++-- lustre/obdclass/lu_object.c | 93 ++++++++++++++++++++++++++++++++++---- lustre/osd/osd_handler.c | 28 +++++++----- lustre/utils/libiam.c | 3 ++ lustre/utils/mkfs_lustre.c | 4 +- 11 files changed, 186 insertions(+), 78 deletions(-) diff --git a/lustre/fid/fid_lib.c b/lustre/fid/fid_lib.c index 9759489..5074393 100644 --- a/lustre/fid/fid_lib.c +++ b/lustre/fid/fid_lib.c @@ -62,7 +62,7 @@ const struct lu_range LUSTRE_SEQ_ZERO_RANGE = { EXPORT_SYMBOL(LUSTRE_SEQ_ZERO_RANGE); /* Lustre Big Fs Lock fid. */ -const struct lu_fid LUSTRE_BFL_FID = { .f_seq = 0x0000000000000003, +const struct lu_fid LUSTRE_BFL_FID = { .f_seq = 0x0000000000000003, .f_oid = 0x0000000000000001, .f_ver = 0x0000000000000000 }; EXPORT_SYMBOL(LUSTRE_BFL_FID); @@ -93,34 +93,6 @@ void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src) } EXPORT_SYMBOL(fid_le_to_cpu); -#ifdef __KERNEL__ -void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src) -{ - /* check that all fields are converted */ - CLASSERT(sizeof *src == - sizeof fid_seq(src) + - sizeof fid_oid(src) + sizeof fid_ver(src)); - LASSERTF(fid_is_igif(src) || fid_ver(src) == 0, DFID"\n", PFID(src)); - dst->f_seq = cpu_to_be64(fid_seq(src)); - dst->f_oid = cpu_to_be32(fid_oid(src)); - dst->f_ver = cpu_to_be32(fid_ver(src)); -} -EXPORT_SYMBOL(fid_cpu_to_be); - -void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src) -{ - /* check that all fields are converted */ - CLASSERT(sizeof *src == - sizeof fid_seq(src) + - sizeof fid_oid(src) + sizeof fid_ver(src)); - dst->f_seq = be64_to_cpu(fid_seq(src)); - dst->f_oid = be32_to_cpu(fid_oid(src)); - dst->f_ver = be32_to_cpu(fid_ver(src)); - LASSERTF(fid_is_igif(dst) || fid_ver(dst) == 0, DFID"\n", PFID(dst)); -} -EXPORT_SYMBOL(fid_be_to_cpu); -#endif - void range_cpu_to_le(struct lu_range *dst, const struct lu_range *src) { /* check that all fields are converted */ @@ -165,4 +137,5 @@ void range_be_to_cpu(struct lu_range *dst, const struct lu_range *src) dst->lr_end = be64_to_cpu(src->lr_end); } EXPORT_SYMBOL(range_be_to_cpu); + #endif diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index d11d25d..d1322ae 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -239,6 +239,56 @@ static inline int fid_is_igif(const struct lu_fid *fid) return fid_seq(fid) == LUSTRE_ROOT_FID_SEQ; } +#define DFID "[0x%16.16"LPF64"x/0x%8.8x:0x%8.8x]" + +#define PFID(fid) \ + fid_seq(fid), \ + fid_oid(fid), \ + fid_ver(fid) + +#ifdef __KERNEL__ +static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + LASSERTF(fid_is_igif(src) || fid_ver(src) == 0, DFID"\n", PFID(src)); + dst->f_seq = cpu_to_be64(fid_seq(src)); + dst->f_oid = cpu_to_be32(fid_oid(src)); + dst->f_ver = cpu_to_be32(fid_ver(src)); +} + +static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + dst->f_seq = be64_to_cpu(fid_seq(src)); + dst->f_oid = be32_to_cpu(fid_oid(src)); + dst->f_ver = be32_to_cpu(fid_ver(src)); + LASSERTF(fid_is_igif(dst) || fid_ver(dst) == 0, DFID"\n", PFID(dst)); +} + +/* + * Storage representation for fids. + * + * Variable size, first byte contains the length of the whole record. + */ + +struct lu_fid_pack { + char fp_len; + char fp_area[sizeof(struct lu_fid)]; +}; + +void fid_pack(struct lu_fid_pack *pack, const struct lu_fid *fid, + struct lu_fid *befider); +void fid_unpack(const struct lu_fid_pack *pack, struct lu_fid *fid); + +/* __KERNEL__ */ +#endif + static inline int fid_is_sane(const struct lu_fid *fid) { return @@ -253,13 +303,6 @@ static inline int fid_is_zero(const struct lu_fid *fid) return fid_seq(fid) == 0 && fid_oid(fid) == 0; } -#define DFID "[0x%16.16"LPF64"x/0x%8.8x:0x%8.8x]" - -#define PFID(fid) \ - fid_seq(fid), \ - fid_oid(fid), \ - fid_ver(fid) - extern void lustre_swab_lu_fid(struct lu_fid *fid); extern void lustre_swab_lu_range(struct lu_range *range); diff --git a/lustre/include/lustre_fid.h b/lustre/include/lustre_fid.h index 62abbe2..e30f4db 100644 --- a/lustre/include/lustre_fid.h +++ b/lustre/include/lustre_fid.h @@ -191,10 +191,9 @@ int seq_client_alloc_fid(struct lu_client_seq *seq, /* Fids common stuff */ int fid_is_local(struct lu_site *site, const struct lu_fid *fid); + void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src); -void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src); void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src); -void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src); /* fid locking */ diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index de5b001..0815466 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -318,10 +318,10 @@ int mdd_link_sanity_check(const struct lu_env *env, const struct dt_rec *__mdd_fid_rec(const struct lu_env *env, const struct lu_fid *fid) { - struct mdd_thread_info *info = mdd_env_info(env); + struct lu_fid_pack *pack = &mdd_env_info(env)->mti_pack; - fid_cpu_to_be(&info->mti_fid2, fid); - return (const struct dt_rec *)&info->mti_fid2; + fid_pack(pack, fid, &mdd_env_info(env)->mti_fid2); + return (const struct dt_rec *)pack; } @@ -960,7 +960,7 @@ __mdd_lookup(const struct lu_env *env, struct md_object *pobj, struct mdd_object *mdd_obj = md2mdd_obj(pobj); struct mdd_device *m = mdo2mdd(pobj); struct dt_object *dir = mdd_object_child(mdd_obj); - struct dt_rec *rec = (struct dt_rec *)fid; + struct lu_fid_pack *pack = &mdd_env_info(env)->mti_pack; struct timeval start; int rc; ENTRY; @@ -987,10 +987,11 @@ __mdd_lookup(const struct lu_env *env, struct md_object *pobj, RETURN(rc); if (S_ISDIR(mdd_object_type(mdd_obj)) && dt_try_as_dir(env, dir)) { - rc = dir->do_index_ops->dio_lookup(env, dir, rec, key, + rc = dir->do_index_ops->dio_lookup(env, dir, + (struct dt_rec *)pack, key, mdd_object_capa(env, mdd_obj)); if (rc == 0) - fid_be_to_cpu(fid, fid); + fid_unpack(pack, fid); } else rc = -ENOTDIR; diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index c26b53c..890981e 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -112,6 +112,7 @@ struct mdd_thread_info { struct obdo mti_oa; char mti_xattr_buf[LUSTRE_POSIX_ACL_MAX_SIZE]; struct lu_fid mti_fid2; /* used for be & cpu converting */ + struct lu_fid_pack mti_pack; }; int mdd_init_obd(const struct lu_env *env, struct mdd_device *mdd, diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index 984a1b2..2de0c21 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -1260,9 +1260,9 @@ static int mdd_dir_page_build(const struct lu_env *env, int first, struct dt_it *it, __u32 *start, __u32 *end, struct lu_dirent **last) { - struct lu_fid *fid2 = &mdd_env_info(env)->mti_fid2; + struct lu_fid *fid = &mdd_env_info(env)->mti_fid2; struct mdd_thread_info *info = mdd_env_info(env); - struct lu_fid *fid = &info->mti_fid; + struct lu_fid_pack *pack = &info->mti_pack; int result; struct lu_dirent *ent; @@ -1285,18 +1285,18 @@ static int mdd_dir_page_build(const struct lu_env *env, int first, name = (char *)iops->key(env, it); len = iops->key_size(env, it); - fid = (struct lu_fid *)iops->rec(env, it); - fid_be_to_cpu(fid2, fid); + pack = (struct lu_fid_pack *)iops->rec(env, it); + fid_unpack(pack, fid); recsize = (sizeof(*ent) + len + 3) & ~3; hash = iops->store(env, it); *end = hash; CDEBUG(D_INFO, "%p %p %d "DFID": %#8.8x (%d) \"%*.*s\"\n", - name, ent, nob, PFID(fid2), hash, len, len, len, name); + name, ent, nob, PFID(fid), hash, len, len, len, name); if (nob >= recsize) { - fid_be_to_cpu(&ent->lde_fid, fid); + ent->lde_fid = *fid; fid_cpu_to_le(&ent->lde_fid, &ent->lde_fid); ent->lde_hash = hash; ent->lde_namelen = cpu_to_le16(len); @@ -1427,16 +1427,16 @@ static int mdd_readpage(const struct lu_env *env, struct md_object *obj, rc = mdd_readpage_sanity_check(env, mdd_obj); if (rc) GOTO(out_unlock, rc); - + if (mdd_is_dead_obj(mdd_obj)) { struct page *pg; struct lu_dirpage *dp; - /* + /* * According to POSIX, please do not return any entry to client: * even dot and dotdot should not be returned. */ - CWARN("readdir from dead object: "DFID"\n", + CWARN("readdir from dead object: "DFID"\n", PFID(lu_object_fid(mdd2lu_obj(mdd_obj)))); if (rdpg->rp_count <= 0) diff --git a/lustre/obdclass/dt_object.c b/lustre/obdclass/dt_object.c index a6111ac..07ea54b 100644 --- a/lustre/obdclass/dt_object.c +++ b/lustre/obdclass/dt_object.c @@ -35,6 +35,8 @@ #include #include #include +/* fid_be_to_cpu() */ +#include /* no lock is necessary to protect the list, because call-backs * are added during system startup. Please refer to "struct dt_device". @@ -141,17 +143,22 @@ int dt_try_as_dir(const struct lu_env *env, struct dt_object *obj) } EXPORT_SYMBOL(dt_try_as_dir); +extern struct lu_context_key lu_global_key; + static int dt_lookup(const struct lu_env *env, struct dt_object *dir, const char *name, struct lu_fid *fid) { - struct dt_rec *rec = (struct dt_rec *)fid; + struct lu_fid_pack *pack = lu_context_key_get(&env->le_ctx, + &lu_global_key); + struct dt_rec *rec = (struct dt_rec *)pack; const struct dt_key *key = (const struct dt_key *)name; int result; - if (dt_try_as_dir(env, dir)) + if (dt_try_as_dir(env, dir)) { result = dir->do_index_ops->dio_lookup(env, dir, rec, key, BYPASS_CAPA); - else + fid_unpack(pack, fid); + } else result = -ENOTDIR; return result; } diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index 0c9774f..a353e53 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -282,9 +282,13 @@ struct lu_cdebug_data { * Temporary buffer. */ char lck_area[LU_CDEBUG_LINE]; + /* + * fid staging area used by dt_store_open(). + */ + struct lu_fid_pack lck_pack; }; -static void *lu_cdebug_key_init(const struct lu_context *ctx, +static void *lu_global_key_init(const struct lu_context *ctx, struct lu_context_key *key) { struct lu_cdebug_data *value; @@ -295,7 +299,7 @@ static void *lu_cdebug_key_init(const struct lu_context *ctx, return value; } -static void lu_cdebug_key_fini(const struct lu_context *ctx, +static void lu_global_key_fini(const struct lu_context *ctx, struct lu_context_key *key, void *data) { struct lu_cdebug_data *value = data; @@ -306,10 +310,10 @@ static void lu_cdebug_key_fini(const struct lu_context *ctx, * Key, holding temporary buffer. This key is registered very early by * lu_global_init(). */ -static struct lu_context_key lu_cdebug_key = { +struct lu_context_key lu_global_key = { .lct_tags = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD, - .lct_init = lu_cdebug_key_init, - .lct_fini = lu_cdebug_key_fini + .lct_init = lu_global_key_init, + .lct_fini = lu_global_key_fini }; /* @@ -326,7 +330,7 @@ int lu_cdebug_printer(const struct lu_env *env, va_start(args, format); - key = lu_context_key_get(&env->le_ctx, &lu_cdebug_key); + key = lu_context_key_get(&env->le_ctx, &lu_global_key); LASSERT(key != NULL); used = strlen(key->lck_area); @@ -1046,7 +1050,7 @@ int lu_global_init(void) { int result; - result = lu_context_key_register(&lu_cdebug_key); + result = lu_context_key_register(&lu_global_key); if (result == 0) { /* * At this level, we don't know what tags are needed, so @@ -1073,11 +1077,11 @@ void lu_global_fini(void) lu_site_shrinker = NULL; } - lu_context_key_degister(&lu_cdebug_key); + lu_context_key_degister(&lu_global_key); /* * Tear shrinker environment down _after_ de-registering - * lu_cdebug_key, because the latter has a value in the former. + * lu_global_key, because the latter has a value in the former. */ down(&lu_sites_guard); lu_env_fini(&lu_shrink_env); @@ -1089,3 +1093,74 @@ struct lu_buf LU_BUF_NULL = { .lb_len = 0 }; EXPORT_SYMBOL(LU_BUF_NULL); + +/* + * XXX: Functions below logically belong to fid module, but they are used by + * dt_store_open(). Put them here until better place is found. + */ + +void fid_pack(struct lu_fid_pack *pack, const struct lu_fid *fid, + struct lu_fid *befider) +{ + int recsize; + __u64 seq; + __u32 oid; + + seq = fid_seq(fid); + oid = fid_oid(fid); + + /* + * Two cases: compact 6 bytes representation for a common case, and + * full 17 byte representation for "unusual" fid. + */ + + /* + * Check that usual case is really usual. + */ + CLASSERT(LUSTRE_SEQ_MAX_WIDTH < 0xffffull); + + if (fid_is_igif(fid) || + seq > 0xffffffull || oid > 0xffff || fid_ver(fid) != 0) { + fid_cpu_to_be(befider, fid); + recsize = sizeof *befider; + } else { + unsigned char *small_befider; + + small_befider = (char *)befider; + + small_befider[0] = seq >> 16; + small_befider[1] = seq >> 8; + small_befider[2] = seq; + + small_befider[3] = oid >> 8; + small_befider[4] = oid; + + recsize = 5; + } + memcpy(pack->fp_area, befider, recsize); + pack->fp_len = recsize + 1; +} +EXPORT_SYMBOL(fid_pack); + +void fid_unpack(const struct lu_fid_pack *pack, struct lu_fid *fid) +{ + switch (pack->fp_len) { + case sizeof *fid + 1: + memcpy(fid, pack->fp_area, sizeof *fid); + fid_be_to_cpu(fid, fid); + break; + case 6: { + const unsigned char *area; + + area = pack->fp_area; + fid->f_seq = (area[0] << 16) | (area[1] << 8) | area[2]; + fid->f_oid = (area[3] << 8) | area[4]; + fid->f_ver = 0; + break; + } + default: + CERROR("Unexpected packed fid size: %d\n", pack->fp_len); + LBUG(); + } +} +EXPORT_SYMBOL(fid_unpack); diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c index c8e022a..8f86854 100644 --- a/lustre/osd/osd_handler.c +++ b/lustre/osd/osd_handler.c @@ -1087,7 +1087,7 @@ static int osd_mkdir(struct osd_thread_info *info, struct osd_object *obj, * XXX uh-oh... call low-level iam function directly. */ result = iam_lvar_create(obj->oo_inode, OSD_NAME_LEN, 4, - sizeof (struct lu_fid), + sizeof (struct lu_fid_pack), oth->ot_handle); } return result; @@ -1520,7 +1520,7 @@ static int osd_index_probe(const struct lu_env *env, struct osd_object *o, if (feat == &dt_directory_features) return osd_sb(osd_obj2dev(o))->s_root->d_inode == o->oo_inode || descr == &iam_htree_compat_param || - (descr->id_rec_size == sizeof(struct lu_fid) && + (descr->id_rec_size == sizeof(struct lu_fid_pack) && 1 /* * XXX check that index looks like directory. */ @@ -1845,13 +1845,16 @@ static int osd_index_compat_delete(const struct lu_env *env, */ -static int osd_build_fid(struct osd_device *osd, - struct dentry *dentry, struct lu_fid *fid) +static void osd_build_pack(const struct lu_env *env, struct osd_device *osd, + struct dentry *dentry, struct lu_fid_pack *pack) { struct inode *inode = dentry->d_inode; + struct lu_fid *fid = &osd_oti_get(env)->oti_fid; lu_igif_build(fid, inode->i_ino, inode->i_generation); - return 0; + fid_cpu_to_be(fid, fid); + pack->fp_len = sizeof *fid + 1; + memcpy(pack->fp_area, fid, sizeof *fid); } static int osd_index_compat_lookup(const struct lu_env *env, @@ -1903,10 +1906,11 @@ static int osd_index_compat_lookup(const struct lu_env *env, /* * normal case, result is in @dentry. */ - if (dentry->d_inode != NULL) - result = osd_build_fid(osd, dentry, - (struct lu_fid *)rec); - else + if (dentry->d_inode != NULL) { + osd_build_pack(env, osd, dentry, + (struct lu_fid_pack *)rec); + result = 0; + } else result = -ENOENT; } else { /* What? Disconnected alias? Ppheeeww... */ @@ -1976,13 +1980,14 @@ static int osd_index_compat_insert(const struct lu_env *env, { struct osd_object *obj = osd_dt_obj(dt); - const struct lu_fid *fid = (const struct lu_fid *)rec; const char *name = (const char *)key; struct lu_device *ludev = dt->do_lu.lo_dev; struct lu_object *luch; - struct osd_thread_info *info = osd_oti_get(env); + struct osd_thread_info *info = osd_oti_get(env); + const struct lu_fid_pack *pack = (const struct lu_fid_pack *)rec; + struct lu_fid *fid = &osd_oti_get(env)->oti_fid; int result; @@ -1993,6 +1998,7 @@ static int osd_index_compat_insert(const struct lu_env *env, if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_INSERT)) return -EACCES; + fid_unpack(pack, fid); luch = lu_object_find(env, ludev->ld_site, fid); if (!IS_ERR(luch)) { if (lu_object_exists(luch)) { diff --git a/lustre/utils/libiam.c b/lustre/utils/libiam.c index 1480bda..688400f 100644 --- a/lustre/utils/libiam.c +++ b/lustre/utils/libiam.c @@ -243,6 +243,7 @@ static void lvar_leaf(void *buf, int blocksize, int keysize, int ptrsize, int recsize) { struct lvar_leaf_header *head; + char *rec; /* form leaf */ head = buf; @@ -250,6 +251,8 @@ static void lvar_leaf(void *buf, .vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC), .vlh_used = cpu_to_le16(sizeof *head + lvar_esize(0, recsize)) }; + rec = (head + 1); + rec[offsetof(struct lvar_leaf_entry, vle_key)] = recsize; } diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index ce97961..be235df 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -1282,7 +1282,7 @@ static int mkfs_mdt(struct mkfs_opts *mop) } snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "root"); - ret = iam_creat(filepnm, FMT_LVAR, L_BLOCK_SIZE, 4, 16, 4); + ret = iam_creat(filepnm, FMT_LVAR, L_BLOCK_SIZE, 4, 17, 4); if (ret) { goto out_umount; } @@ -1352,7 +1352,7 @@ static int mkfs_mdt(struct mkfs_opts *mop) goto out_umount; } - snprintf(recbuf, sizeof(recbuf) - 1, "0000000000000001%8.8x%8.8x", + snprintf(recbuf, sizeof(recbuf) - 1, "110000000000000001%8.8x%8.8x", (unsigned int)st.st_ino, (unsigned int)generation); ret = mkfs_iam_insert(0, ".", 1, recbuf, filepnm); if (ret) { -- 1.8.3.1