1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see [sun.com URL with a
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
37 * implementation of iam format for fixed size records.
39 * Author: Wang Di <wangdi@clusterfs.com>
40 * Author: Nikita Danilov <nikita@clusterfs.com>
43 #include <linux/types.h>
44 #include <linux/jbd.h>
46 #include <linux/ldiskfs_fs.h>
47 #include "osd_internal.h"
54 IAM_LEAF_HEADER_MAGIC = 0x1976 /* This is duplicated in
55 * lustre/utils/create_iam.c */
58 /* This is duplicated in lustre/utils/create_iam.c */
59 struct iam_leaf_head {
64 static inline int iam_lfix_entry_size(const struct iam_leaf *l)
66 return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
69 static inline struct iam_lentry *
70 iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
72 return (void *)entry + shift * iam_lfix_entry_size(l);
75 static inline struct iam_key *iam_leaf_key_at(struct iam_lentry *entry)
77 return (struct iam_key *)entry;
80 static inline int lfix_keycmp(const struct iam_container *c,
81 const struct iam_key *k1,
82 const struct iam_key *k2)
84 return memcmp(k1, k2, c->ic_descr->id_key_size);
87 static struct iam_leaf_head *iam_get_head(const struct iam_leaf *l)
89 return (struct iam_leaf_head *)l->il_bh->b_data;
92 static struct iam_lentry *iam_entries(const struct buffer_head *bh)
94 return (void *)bh->b_data + sizeof(struct iam_leaf_head);
97 static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
99 return iam_entries(l->il_bh);
102 static int leaf_count_limit(const struct iam_leaf *leaf)
106 free_space = iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
107 free_space -= sizeof(struct iam_leaf_head);
108 return free_space / iam_lfix_entry_size(leaf);
111 static int lentry_count_get(const struct iam_leaf *leaf)
113 return le16_to_cpu(iam_get_head(leaf)->ill_count);
116 static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
118 assert_corr(0 <= count && count <= leaf_count_limit(leaf));
119 iam_get_head(leaf)->ill_count = cpu_to_le16(count);
122 static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l);
124 #if LDISKFS_CORRECTNESS_ON || LDISKFS_INVARIANT_ON
125 static int iam_leaf_at_rec(const struct iam_leaf *folio)
128 iam_get_lentries(folio) <= folio->il_at &&
129 folio->il_at < iam_lfix_get_end(folio);
133 static struct iam_ikey *iam_lfix_ikey(const struct iam_leaf *l,
134 struct iam_ikey *key)
137 assert_corr(iam_leaf_at_rec(l));
138 return (struct iam_ikey*)ie;
141 static struct iam_key *iam_lfix_key(const struct iam_leaf *l)
144 assert_corr(iam_leaf_at_rec(l));
145 return (struct iam_key*)ie;
148 static int iam_lfix_key_size(const struct iam_leaf *l)
150 return iam_leaf_descr(l)->id_key_size;
153 static void iam_lfix_start(struct iam_leaf *l)
155 l->il_at = iam_get_lentries(l);
158 static inline ptrdiff_t iam_lfix_diff(const struct iam_leaf *l,
159 const struct iam_lentry *e1,
160 const struct iam_lentry *e2)
165 esize = iam_lfix_entry_size(l);
166 diff = (void *)e1 - (void *)e2;
167 assert_corr(diff / esize * esize == diff);
171 static int iam_lfix_init(struct iam_leaf *l)
174 struct iam_leaf_head *ill;
177 assert_corr(l->il_bh != NULL);
179 ill = iam_get_head(l);
180 count = le16_to_cpu(ill->ill_count);
181 if (ill->ill_magic == le16_to_cpu(IAM_LEAF_HEADER_MAGIC) &&
182 0 <= count && count <= leaf_count_limit(l)) {
183 l->il_at = l->il_entries = iam_get_lentries(l);
188 obj = iam_leaf_container(l)->ic_object;
189 CERROR("Wrong magic in node %llu (#%lu): %#x != %#x or "
190 "wrong count: %i (%i)",
191 (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
192 ill->ill_magic, le16_to_cpu(IAM_LEAF_HEADER_MAGIC),
193 count, leaf_count_limit(l));
199 static void iam_lfix_fini(struct iam_leaf *l)
201 l->il_entries = l->il_at = NULL;
204 static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
206 int count = lentry_count_get(l);
207 struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
212 struct iam_rec *iam_lfix_rec(const struct iam_leaf *l)
215 assert_corr(iam_leaf_at_rec(l));
216 return e + iam_leaf_descr(l)->id_key_size;
219 static void iam_lfix_next(struct iam_leaf *l)
221 assert_corr(iam_leaf_at_rec(l));
222 l->il_at = iam_lfix_shift(l, l->il_at, 1);
229 EXPORT_SYMBOL(lfix_dump);
231 static char hdigit(char ch)
233 static char d[] = "0123456789abcdef";
237 static char *hex(char ch, char *area)
239 area[0] = hdigit(ch >> 4);
240 area[1] = hdigit(ch);
245 static void l_print(struct iam_leaf *leaf, struct iam_lentry *entry)
251 area = (char *)entry;
252 printk(KERN_EMERG "[");
253 for (i = iam_lfix_key_size(leaf); i > 0; --i, ++area)
254 printk("%s", hex(*area, h));
256 for (i = iam_leaf_descr(leaf)->id_rec_size; i > 0; --i, ++area)
257 printk("%s", hex(*area, h));
261 static void lfix_print(struct iam_leaf *leaf)
263 struct iam_lentry *entry;
267 entry = leaf->il_entries;
268 count = lentry_count_get(leaf);
269 printk(KERN_EMERG "lfix: %p %p %d\n", leaf, leaf->il_at, count);
270 for (i = 0; i < count; ++i, entry = iam_lfix_shift(leaf, entry, 1))
271 l_print(leaf, entry);
274 static int iam_lfix_lookup(struct iam_leaf *l, const struct iam_key *k)
276 struct iam_lentry *p, *q, *m, *t;
277 struct iam_container *c;
281 count = lentry_count_get(l);
283 return IAM_LOOKUP_EMPTY;
285 result = IAM_LOOKUP_OK;
286 c = iam_leaf_container(l);
289 q = iam_lfix_shift(l, p, count - 1);
290 if (lfix_keycmp(c, k, iam_leaf_key_at(p)) < 0) {
292 * @k is less than the least key in the leaf
295 result = IAM_LOOKUP_BEFORE;
296 } else if (lfix_keycmp(c, iam_leaf_key_at(q), k) <= 0) {
302 while (iam_lfix_shift(l, p, 1) != q) {
303 m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
304 assert_corr(p < m && m < q);
305 if (lfix_keycmp(c, iam_leaf_key_at(m), k) <= 0)
310 assert_corr(lfix_keycmp(c, iam_leaf_key_at(p), k) <= 0 &&
311 lfix_keycmp(c, k, iam_leaf_key_at(q)) < 0);
313 * skip over records with duplicate keys.
315 while (p > l->il_entries) {
316 t = iam_lfix_shift(l, p, -1);
317 if (lfix_keycmp(c, iam_leaf_key_at(t), k) == 0)
324 assert_corr(iam_leaf_at_rec(l));
326 if (lfix_keycmp(c, iam_leaf_key_at(l->il_at), k) == 0)
327 result = IAM_LOOKUP_EXACT;
335 static int iam_lfix_ilookup(struct iam_leaf *l, const struct iam_ikey *ik)
338 return IAM_LOOKUP_OK;
341 static void iam_lfix_key_set(struct iam_leaf *l, const struct iam_key *k)
343 assert_corr(iam_leaf_at_rec(l));
344 memcpy(iam_leaf_key_at(l->il_at), k, iam_leaf_descr(l)->id_key_size);
347 static int iam_lfix_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
349 return lfix_keycmp(iam_leaf_container(l), iam_leaf_key_at(l->il_at), k);
352 static int iam_lfix_key_eq(const struct iam_leaf *l, const struct iam_key *k)
354 return !lfix_keycmp(iam_leaf_container(l),
355 iam_leaf_key_at(l->il_at), k);
358 static void iam_lfix_rec_set(struct iam_leaf *l, const struct iam_rec *r)
360 assert_corr(iam_leaf_at_rec(l));
361 memcpy(iam_lfix_rec(l), r, iam_leaf_descr(l)->id_rec_size);
364 static void iam_lfix_rec_get(const struct iam_leaf *l, struct iam_rec *r)
366 assert_corr(iam_leaf_at_rec(l));
367 memcpy(r, iam_lfix_rec(l), iam_leaf_descr(l)->id_rec_size);
370 static void iam_lfix_rec_add(struct iam_leaf *leaf,
371 const struct iam_key *k, const struct iam_rec *r)
373 struct iam_lentry *end;
374 struct iam_lentry *cur;
375 struct iam_lentry *start;
379 assert_corr(iam_leaf_can_add(leaf, k, r));
381 count = lentry_count_get(leaf);
383 * This branch handles two exceptional cases:
385 * - leaf positioned beyond last record, and
389 if (!iam_leaf_at_end(leaf)) {
390 end = iam_lfix_get_end(leaf);
392 if (lfix_keycmp(iam_leaf_container(leaf),
393 k, iam_leaf_key_at(cur)) >= 0)
397 * Another exceptional case: insertion with the key
398 * less than least key in the leaf.
400 assert_corr(cur == leaf->il_entries);
403 diff = (void *)end - (void *)start;
404 assert_corr(diff >= 0);
405 memmove(iam_lfix_shift(leaf, start, 1), start, diff);
407 lentry_count_set(leaf, count + 1);
408 iam_lfix_key_set(leaf, k);
409 iam_lfix_rec_set(leaf, r);
410 assert_corr(iam_leaf_at_rec(leaf));
413 static void iam_lfix_rec_del(struct iam_leaf *leaf, int shift)
415 struct iam_lentry *next, *end;
419 assert_corr(iam_leaf_at_rec(leaf));
421 count = lentry_count_get(leaf);
422 end = iam_lfix_get_end(leaf);
423 next = iam_lfix_shift(leaf, leaf->il_at, 1);
424 diff = (void *)end - (void *)next;
425 memmove(leaf->il_at, next, diff);
427 lentry_count_set(leaf, count - 1);
430 static int iam_lfix_can_add(const struct iam_leaf *l,
431 const struct iam_key *k, const struct iam_rec *r)
433 return lentry_count_get(l) < leaf_count_limit(l);
436 static int iam_lfix_at_end(const struct iam_leaf *folio)
438 return folio->il_at == iam_lfix_get_end(folio);
441 static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
443 struct iam_leaf_head *hdr;
445 hdr = (struct iam_leaf_head*)bh->b_data;
446 hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
447 hdr->ill_count = cpu_to_le16(0);
450 static void iam_lfix_split(struct iam_leaf *l, struct buffer_head **bh,
453 struct iam_path *path;
454 struct iam_leaf_head *hdr;
455 const struct iam_ikey *pivot;
456 struct buffer_head *new_leaf;
465 path = iam_leaf_path(l);
467 hdr = (void *)new_leaf->b_data;
469 count = lentry_count_get(l);
472 start = iam_lfix_shift(l, iam_get_lentries(l), split);
473 finis = iam_lfix_shift(l, iam_get_lentries(l), count);
475 pivot = (const struct iam_ikey *)iam_leaf_key_at(start);
477 memmove(iam_entries(new_leaf), start, finis - start);
478 hdr->ill_count = count - split;
479 lentry_count_set(l, split);
480 if ((void *)l->il_at >= start) {
482 * insertion point moves into new leaf.
487 shift = iam_lfix_diff(l, l->il_at, start);
490 l->il_curidx = new_blknr;
491 result = iam_lfix_init(l);
493 * init cannot fail, as node was just initialized.
495 assert_corr(result == 0);
496 l->il_at = iam_lfix_shift(l, iam_get_lentries(l), shift);
499 * Insert pointer to the new node (together with the least key in
500 * the node) into index node.
502 iam_insert_key_lock(path, path->ip_frame, pivot, new_blknr);
505 static struct iam_leaf_operations iam_lfix_leaf_ops = {
506 .init = iam_lfix_init,
507 .init_new = iam_lfix_init_new,
508 .fini = iam_lfix_fini,
509 .start = iam_lfix_start,
510 .next = iam_lfix_next,
512 .ikey = iam_lfix_ikey,
514 .key_set = iam_lfix_key_set,
515 .key_cmp = iam_lfix_key_cmp,
516 .key_eq = iam_lfix_key_eq,
517 .key_size = iam_lfix_key_size,
518 .rec_set = iam_lfix_rec_set,
519 .rec_get = iam_lfix_rec_get,
520 .lookup = iam_lfix_lookup,
521 .ilookup = iam_lfix_ilookup,
522 .at_end = iam_lfix_at_end,
523 .rec_add = iam_lfix_rec_add,
524 .rec_del = iam_lfix_rec_del,
525 .can_add = iam_lfix_can_add,
526 .split = iam_lfix_split
534 /* This is duplicated in lustre/utils/create_iam.c */
536 * Then shalt thou see the dew-BEDABBLED wretch
537 * Turn, and return, indenting with the way;
538 * Each envious brier his weary legs doth scratch,
539 * Each shadow makes him stop, each murmur stay:
540 * For misery is trodden on by many,
541 * And being low never relieved by any.
543 IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL // d01efull
546 /* This is duplicated in lustre/utils/create_iam.c */
547 struct iam_lfix_root {
552 u8 ilr_indirect_levels;
556 static __u32 iam_lfix_root_ptr(struct iam_container *c)
561 static int iam_lfix_node_init(struct iam_container *c, struct buffer_head *bh,
567 static struct iam_entry *iam_lfix_root_inc(struct iam_container *c,
568 struct iam_path *path,
569 struct iam_frame *frame)
571 struct iam_lfix_root *root;
572 struct iam_entry *entries;
574 entries = frame->entries;
576 dx_set_count(entries, 2);
577 assert_corr(dx_get_limit(entries) == dx_root_limit(path));
579 root = (void *)frame->bh->b_data;
580 assert_corr(le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC);
581 root->ilr_indirect_levels ++;
582 frame->at = entries = iam_entry_shift(path, entries, 1);
583 memset(iam_ikey_at(path, entries), 0,
584 iam_path_descr(path)->id_ikey_size);
588 static int iam_lfix_node_check(struct iam_path *path, struct iam_frame *frame)
592 unsigned limit_correct;
593 struct iam_entry *entries;
595 entries = dx_node_get_entries(path, frame);
597 if (frame == path->ip_frames) {
598 struct iam_lfix_root *root;
600 root = (void *)frame->bh->b_data;
601 if (le64_to_cpu(root->ilr_magic) != IAM_LFIX_ROOT_MAGIC) {
604 limit_correct = dx_root_limit(path);
606 limit_correct = dx_node_limit(path);
607 count = dx_get_count(entries);
608 limit = dx_get_limit(entries);
612 if (limit != limit_correct) {
618 static int iam_lfix_node_load(struct iam_path *path, struct iam_frame *frame)
620 struct iam_entry *entries;
622 entries = dx_node_get_entries(path, frame);
624 data = frame->bh->b_data;
626 if (frame == path->ip_frames) {
627 struct iam_lfix_root *root;
630 path->ip_indirect = root->ilr_indirect_levels;
631 if (path->ip_ikey_target == NULL)
632 path->ip_ikey_target =
633 (struct iam_ikey *)path->ip_key_target;
635 frame->entries = frame->at = entries;
639 static int iam_lfix_ikeycmp(const struct iam_container *c,
640 const struct iam_ikey *k1,
641 const struct iam_ikey *k2)
643 return memcmp(k1, k2, c->ic_descr->id_ikey_size);
646 static struct iam_path_descr *iam_lfix_ipd_alloc(const struct iam_container *c,
649 return iam_ipd_alloc(area, c->ic_descr->id_ikey_size);
652 static struct iam_operations iam_lfix_ops = {
653 .id_root_ptr = iam_lfix_root_ptr,
654 .id_node_read = iam_node_read,
655 .id_node_init = iam_lfix_node_init,
656 .id_node_check = iam_lfix_node_check,
657 .id_node_load = iam_lfix_node_load,
658 .id_ikeycmp = iam_lfix_ikeycmp,
659 .id_root_inc = iam_lfix_root_inc,
660 .id_ipd_alloc = iam_lfix_ipd_alloc,
661 .id_ipd_free = iam_ipd_free,
665 static int iam_lfix_guess(struct iam_container *c)
668 struct buffer_head *bh;
669 const struct iam_lfix_root *root;
671 assert_corr(c->ic_object != NULL);
673 result = iam_node_read(c, iam_lfix_root_ptr(c), NULL, &bh);
675 root = (void *)bh->b_data;
676 if (le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC) {
677 struct iam_descr *descr;
680 descr->id_key_size = le16_to_cpu(root->ilr_keysize);
681 descr->id_ikey_size = le16_to_cpu(root->ilr_keysize);
682 descr->id_rec_size = le16_to_cpu(root->ilr_recsize);
683 descr->id_ptr_size = le16_to_cpu(root->ilr_ptrsize);
684 descr->id_root_gap = sizeof(struct iam_lfix_root);
685 descr->id_node_gap = 0;
686 descr->id_ops = &iam_lfix_ops;
687 descr->id_leaf_ops = &iam_lfix_leaf_ops;
695 static struct iam_format iam_lfix_format = {
696 .if_guess = iam_lfix_guess
699 void iam_lfix_format_init(void)
701 iam_format_register(&iam_lfix_format);
712 #define LFIX_ROOT_RECNO \
713 ((4096 - sizeof(struct iam_lfix_root)) / (KEYSIZE + PTRSIZE))
715 #define LFIX_INDEX_RECNO (4096 / (KEYSIZE + PTRSIZE))
717 #define LFIX_LEAF_RECNO \
718 ((4096 - sizeof(struct iam_leaf_head)) / (KEYSIZE + RECSIZE))
721 struct iam_lfix_root lr_root;
725 } lr_entry[LFIX_ROOT_RECNO];
729 struct dx_countlimit li_cl;
730 char li_padding[KEYSIZE + PTRSIZE - sizeof(struct dx_countlimit)];
734 } li_entry[LFIX_INDEX_RECNO - 1];
738 struct iam_leaf_head ll_head;
742 } ll_entry[LFIX_LEAF_RECNO];
745 #define STORE_UNALIGNED(val, dst) \
747 typeof(val) __val = (val); \
748 CLASSERT(sizeof(val) == sizeof(*(dst))); \
749 memcpy(dst, &__val, sizeof(*(dst))); \
752 #include <linux/jbd.h>
753 #include <linux/ldiskfs_fs.h>
754 #include <linux/ldiskfs_jbd.h>
756 static void lfix_root(void *buf,
757 int blocksize, int keysize, int ptrsize, int recsize)
759 struct iam_lfix_root *root;
760 struct dx_countlimit *limit;
764 *root = (typeof(*root)) {
765 .ilr_magic = cpu_to_le64(IAM_LFIX_ROOT_MAGIC),
766 .ilr_keysize = cpu_to_le16(keysize),
767 .ilr_recsize = cpu_to_le16(recsize),
768 .ilr_ptrsize = cpu_to_le16(ptrsize),
769 .ilr_indirect_levels = 0
772 limit = (void *)(root + 1);
773 *limit = (typeof(*limit)){
775 * limit itself + one pointer to the leaf.
777 .count = cpu_to_le16(2),
778 .limit = iam_root_limit(sizeof(struct iam_lfix_root),
779 blocksize, keysize + ptrsize)
786 entry += keysize + ptrsize;
789 * Entry format is <key> followed by <ptr>. In the minimal tree
790 * consisting of a root and single node, <key> is a minimal possible
793 * XXX: this key is hard-coded to be a sequence of 0's.
797 /* now @entry points to <ptr> */
799 STORE_UNALIGNED(cpu_to_le32(1), (u_int32_t *)entry);
801 STORE_UNALIGNED(cpu_to_le64(1), (u_int64_t *)entry);
804 static void lfix_leaf(void *buf,
805 int blocksize, int keysize, int ptrsize, int recsize)
807 struct iam_leaf_head *head;
811 *head = (struct iam_leaf_head) {
812 .ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC),
814 * Leaf contains an entry with the smallest possible key
815 * (created by zeroing).
817 .ill_count = cpu_to_le16(1),
821 int iam_lfix_create(struct inode *obj,
822 int keysize, int ptrsize, int recsize, handle_t *handle)
824 struct buffer_head *root_node;
825 struct buffer_head *leaf_node;
826 struct super_block *sb;
832 assert_corr(obj->i_size == 0);
835 bsize = sb->s_blocksize;
836 root_node = ldiskfs_append(handle, obj, &blknr, &result);
837 leaf_node = ldiskfs_append(handle, obj, &blknr, &result);
838 if (root_node != NULL && leaf_node != NULL) {
839 lfix_root(root_node->b_data, bsize, keysize, ptrsize, recsize);
840 lfix_leaf(leaf_node->b_data, bsize, keysize, ptrsize, recsize);
841 ldiskfs_mark_inode_dirty(handle, obj);
842 result = ldiskfs_journal_dirty_metadata(handle, root_node);
844 result = ldiskfs_journal_dirty_metadata(handle, leaf_node);
846 ldiskfs_std_error(sb, result);
852 EXPORT_SYMBOL(iam_lfix_create);