From 0dfeb470b19746701b275573308bf4ecbe54bbfb Mon Sep 17 00:00:00 2001 From: nikita Date: Sun, 10 Dec 2006 15:17:33 +0000 Subject: [PATCH] One of main bottlenecks on b_new_cmd md server is the necessity to keep additional data structures like object index (oi), and fids in directory entries. To minimize overhead of this on-disk structures should be as compact as possible. For directory entries this was achieved by "compressing" fids, but this approach cannot be applied to oi as is, because here fids are *keys* rather than records, and iam doesn't support variable sized keys. Instead of complicating iam with such support, this patch splits object index into multiple indices with different key size. Currently two indices are created: oi.16 --- with full sized 16 byte fid as a key, and oi.5 --- with compact 5 byte representation of "usual" fid as a key. "Usual fid" is defined as a fid with fid_seq(fid) < 0xffffff fid_oid(fid) < 0xffff fid_ver(fid) == 0 (which gives 5 bytes of data). It is expected that under usual workload majority of fids fall into second category. --- lustre/osd/osd_internal.h | 2 + lustre/osd/osd_oi.c | 151 +++++++++++++++++++++++++++------------------ lustre/osd/osd_oi.h | 10 ++- lustre/utils/mkfs_lustre.c | 10 ++- 4 files changed, 108 insertions(+), 65 deletions(-) diff --git a/lustre/osd/osd_internal.h b/lustre/osd/osd_internal.h index 38edf3b..1216598 100644 --- a/lustre/osd/osd_internal.h +++ b/lustre/osd/osd_internal.h @@ -73,6 +73,8 @@ struct osd_thread_info { */ struct lustre_capa_key oti_capa_key; struct lustre_capa oti_capa; + + struct lu_fid_pack oti_pack; }; #endif /* __KERNEL__ */ diff --git a/lustre/osd/osd_oi.c b/lustre/osd/osd_oi.c index 24672a9..d4bb09c 100644 --- a/lustre/osd/osd_oi.c +++ b/lustre/osd/osd_oi.c @@ -59,57 +59,101 @@ #include "osd_igif.h" #include "dt_object.h" -static const struct dt_key *oi_fid_key(struct osd_thread_info *info, - const struct lu_fid *fid); -static const char oi_dirname[] = "oi"; - -static const struct dt_index_features oi_index_features = { - .dif_flags = DT_IND_UPDATE, - .dif_keysize_min = sizeof(struct lu_fid), - .dif_keysize_max = sizeof(struct lu_fid), - .dif_recsize_min = sizeof(struct osd_inode_id), - .dif_recsize_max = sizeof(struct osd_inode_id) +struct oi_descr { + int fid_size; + char *name; +}; + +static const struct oi_descr oi_descr[OSD_OI_FID_NR] = { + [OSD_OI_FID_SMALL] = { + .fid_size = 5, + .name = "oi.5" + }, + [OSD_OI_FID_OTHER] = { + .fid_size = sizeof(struct lu_fid), + .name = "oi.16" + } }; int osd_oi_init(struct osd_thread_info *info, struct osd_oi *oi, struct dt_device *dev) { int rc; - struct dt_object *obj; + int i; const struct lu_env *env; + CLASSERT(ARRAY_SIZE(oi->oi_dir) == ARRAY_SIZE(oi_descr)); + env = info->oti_env; - obj = dt_store_open(env, dev, oi_dirname, &info->oti_fid); - if (!IS_ERR(obj)) { - rc = obj->do_ops->do_index_try(env, obj, &oi_index_features); - if (rc == 0) { - LASSERT(obj->do_index_ops != NULL); - oi->oi_dir = obj; + memset(oi, 0, sizeof *oi); + + for (i = rc = 0; i < ARRAY_SIZE(oi->oi_dir) && rc == 0; ++i) { + const char *name; + /* + * Allocate on stack---this is initialization. + */ + const struct dt_index_features feat = { + .dif_flags = DT_IND_UPDATE, + .dif_keysize_min = oi_descr[i].fid_size, + .dif_keysize_max = oi_descr[i].fid_size, + .dif_recsize_min = sizeof(struct osd_inode_id), + .dif_recsize_max = sizeof(struct osd_inode_id) + }; + struct dt_object *obj; + + name = oi_descr[i].name; + obj = dt_store_open(env, dev, name, &info->oti_fid); + if (!IS_ERR(obj)) { + rc = obj->do_ops->do_index_try(env, obj, &feat); + if (rc == 0) { + LASSERT(obj->do_index_ops != NULL); + oi->oi_dir[i] = obj; + } else { + CERROR("Wrong index \"%s\": %d\n", name, rc); + lu_object_put(env, &obj->do_lu); + } } else { - CERROR("Wrong index \"%s\": %d\n", oi_dirname, rc); - lu_object_put(env, &obj->do_lu); + rc = PTR_ERR(obj); + CERROR("Cannot open \"%s\": %d\n", name, rc); } - } else { - rc = PTR_ERR(obj); - CERROR("Cannot open \"%s\": %d\n", oi_dirname, rc); } + if (rc != 0) + osd_oi_fini(info, oi); return rc; } void osd_oi_fini(struct osd_thread_info *info, struct osd_oi *oi) { - if (oi->oi_dir != NULL) { - lu_object_put(info->oti_env, &oi->oi_dir->do_lu); - oi->oi_dir = NULL; + int i; + for (i = 0; i < ARRAY_SIZE(oi->oi_dir); ++i) { + if (oi->oi_dir[i] != NULL) { + lu_object_put(info->oti_env, &oi->oi_dir[i]->do_lu); + oi->oi_dir[i] = NULL; + } } } static const struct dt_key *oi_fid_key(struct osd_thread_info *info, - const struct lu_fid *fid) + struct osd_oi *oi, + const struct lu_fid *fid, + struct dt_object **idx) { - fid_cpu_to_be(&info->oti_fid, fid); - return (const struct dt_key *)&info->oti_fid; + int i; + struct lu_fid_pack *pack; + + pack = &info->oti_pack; + fid_pack(pack, fid, &info->oti_fid); + for (i = 0; i < ARRAY_SIZE(oi->oi_dir); ++i) { + if (pack->fp_len == oi_descr[i].fid_size + sizeof pack->fp_len){ + *idx = oi->oi_dir[i]; + return (const struct dt_key *)&pack->fp_area; + } + } + CERROR("Unsupported packed fid size: %d ("DFID")\n", + pack->fp_len, PFID(fid)); + LBUG(); + return NULL; } enum { @@ -117,20 +161,6 @@ enum { OI_TXN_DELETE_CREDITS = 20 }; -static inline void oid_lookup_init(struct osd_inode_id *id, - __u64 ino, __u32 gen) -{ - id->oii_ino = be64_to_cpu(ino); - id->oii_gen = be32_to_cpu(gen); -} - -static inline void oid_insert_init(struct osd_inode_id *id, - __u64 ino, __u32 gen) -{ - id->oii_ino = cpu_to_be64(ino); - id->oii_gen = cpu_to_be32(gen); -} - int osd_oi_lookup(struct osd_thread_info *info, struct osd_oi *oi, const struct lu_fid *fid, struct osd_inode_id *id) { @@ -140,11 +170,15 @@ int osd_oi_lookup(struct osd_thread_info *info, struct osd_oi *oi, lu_igif_to_id(fid, id); rc = 0; } else { - rc = oi->oi_dir->do_index_ops->dio_lookup - (info->oti_env, oi->oi_dir, - (struct dt_rec *)id, oi_fid_key(info, fid), - BYPASS_CAPA); - oid_lookup_init(id, id->oii_ino, id->oii_gen); + struct dt_object *idx; + const struct dt_key *key; + + key = oi_fid_key(info, oi, fid, &idx); + rc = idx->do_index_ops->dio_lookup(info->oti_env, idx, + (struct dt_rec *)id, key, + BYPASS_CAPA); + id->oii_ino = be32_to_cpu(id->oii_ino); + id->oii_gen = be32_to_cpu(id->oii_gen); } return rc; } @@ -154,36 +188,33 @@ int osd_oi_insert(struct osd_thread_info *info, struct osd_oi *oi, struct thandle *th) { struct dt_object *idx; - struct dt_device *dev; struct osd_inode_id *id; + const struct dt_key *key; if (fid_is_igif(fid)) return 0; - idx = oi->oi_dir; - dev = lu2dt_dev(idx->do_lu.lo_dev); - id = &info->oti_id; - oid_insert_init(id, id0->oii_ino, id0->oii_gen); + key = oi_fid_key(info, oi, fid, &idx); + id = &info->oti_id; + id->oii_ino = cpu_to_be32(id0->oii_ino); + id->oii_gen = cpu_to_be32(id0->oii_gen); return idx->do_index_ops->dio_insert(info->oti_env, idx, (const struct dt_rec *)id, - oi_fid_key(info, fid), th, - BYPASS_CAPA); + key, th, BYPASS_CAPA); } int osd_oi_delete(struct osd_thread_info *info, struct osd_oi *oi, const struct lu_fid *fid, struct thandle *th) { - struct dt_object *idx; - struct dt_device *dev; + struct dt_object *idx; + const struct dt_key *key; if (fid_is_igif(fid)) return 0; - idx = oi->oi_dir; - dev = lu2dt_dev(idx->do_lu.lo_dev); + key = oi_fid_key(info, oi, fid, &idx); return idx->do_index_ops->dio_delete(info->oti_env, idx, - oi_fid_key(info, fid), th, - BYPASS_CAPA); + key, th, BYPASS_CAPA); } diff --git a/lustre/osd/osd_oi.h b/lustre/osd/osd_oi.h index a6811e4..66799b5 100644 --- a/lustre/osd/osd_oi.h +++ b/lustre/osd/osd_oi.h @@ -47,6 +47,11 @@ struct thandle; struct dt_device; +enum { + OSD_OI_FID_SMALL, + OSD_OI_FID_OTHER, + OSD_OI_FID_NR +}; /* * Object Index (oi) instance. @@ -55,7 +60,7 @@ struct osd_oi { /* * underlying index object, where fid->id mapping in stored. */ - struct dt_object *oi_dir; + struct dt_object *oi_dir[OSD_OI_FID_NR]; }; /* @@ -66,9 +71,8 @@ struct osd_oi { * should be generalized to work with other local file systems. */ struct osd_inode_id { - __u64 oii_ino; /* inode number */ + __u32 oii_ino; /* inode number */ __u32 oii_gen; /* inode generation */ - __u32 oii_pad; /* alignment padding */ }; int osd_oi_init(struct osd_thread_info *info, diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index be235df..7ee3bcc 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -1299,8 +1299,14 @@ static int mkfs_mdt(struct mkfs_opts *mop) goto out_umount; } - snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "oi"); - ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 16, 16, 4); + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "oi.16"); + ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 16, 8, 4); + if (ret) { + goto out_umount; + } + + snprintf(filepnm, sizeof(filepnm) - 1, "%s/%s", mntpt, "oi.5"); + ret = iam_creat(filepnm, FMT_LFIX, L_BLOCK_SIZE, 5, 8, 4); if (ret) { goto out_umount; } -- 1.8.3.1