4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
32 * implementation of iam format for fixed size records.
34 * Author: Wang Di <wangdi@clusterfs.com>
35 * Author: Nikita Danilov <nikita@clusterfs.com>
38 #include <linux/types.h>
39 #include "osd_internal.h"
46 IAM_LEAF_HEADER_MAGIC = 0x1976 /* This is duplicated in
47 * lustre/utils/create_iam.c */
50 /* This is duplicated in lustre/utils/create_iam.c */
51 struct iam_leaf_head {
56 static inline int iam_lfix_entry_size(const struct iam_leaf *l)
58 return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
61 static inline struct iam_lentry *
62 iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
64 return (void *)entry + shift * iam_lfix_entry_size(l);
67 static inline struct iam_key *iam_leaf_key_at(struct iam_lentry *entry)
69 return (struct iam_key *)entry;
72 static inline int lfix_keycmp(const struct iam_container *c,
73 const struct iam_key *k1,
74 const struct iam_key *k2)
76 return memcmp(k1, k2, c->ic_descr->id_key_size);
79 static struct iam_leaf_head *iam_get_head(const struct iam_leaf *l)
81 return (struct iam_leaf_head *)l->il_bh->b_data;
84 static struct iam_lentry *iam_entries(const struct buffer_head *bh)
86 return (void *)bh->b_data + sizeof(struct iam_leaf_head);
89 static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
91 return iam_entries(l->il_bh);
94 static int leaf_count_limit(const struct iam_leaf *leaf)
98 free_space = iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
99 free_space -= sizeof(struct iam_leaf_head);
100 return free_space / iam_lfix_entry_size(leaf);
103 static int lentry_count_get(const struct iam_leaf *leaf)
105 return le16_to_cpu(iam_get_head(leaf)->ill_count);
108 static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
110 assert_corr(0 <= count && count <= leaf_count_limit(leaf));
111 iam_get_head(leaf)->ill_count = cpu_to_le16(count);
114 static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l);
116 #if LDISKFS_CORRECTNESS_ON || LDISKFS_INVARIANT_ON
117 static int iam_leaf_at_rec(const struct iam_leaf *folio)
119 return iam_get_lentries(folio) <= folio->il_at &&
120 folio->il_at < iam_lfix_get_end(folio);
124 static struct iam_ikey *iam_lfix_ikey(const struct iam_leaf *l,
125 struct iam_ikey *key)
129 assert_corr(iam_leaf_at_rec(l));
130 return (struct iam_ikey *)ie;
133 static struct iam_key *iam_lfix_key(const struct iam_leaf *l)
137 assert_corr(iam_leaf_at_rec(l));
138 return (struct iam_key *)ie;
141 static int iam_lfix_key_size(const struct iam_leaf *l)
143 return iam_leaf_descr(l)->id_key_size;
146 static void iam_lfix_start(struct iam_leaf *l)
148 l->il_at = iam_get_lentries(l);
151 static inline ptrdiff_t iam_lfix_diff(const struct iam_leaf *l,
152 const struct iam_lentry *e1,
153 const struct iam_lentry *e2)
158 esize = iam_lfix_entry_size(l);
159 diff = (void *)e1 - (void *)e2;
160 assert_corr(diff / esize * esize == diff);
164 static int iam_lfix_init(struct iam_leaf *l)
167 struct iam_leaf_head *ill;
170 assert_corr(l->il_bh != NULL);
172 ill = iam_get_head(l);
173 count = le16_to_cpu(ill->ill_count);
174 if (le16_to_cpu(ill->ill_magic) == IAM_LEAF_HEADER_MAGIC &&
175 0 <= count && count <= leaf_count_limit(l)) {
176 l->il_at = l->il_entries = iam_get_lentries(l);
182 obj = iam_leaf_container(l)->ic_object;
184 "Bad magic in node %llu #%lu: %#x != %#x or bad cnt: %d %d: rc = %d\n",
185 (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
186 le16_to_cpu(ill->ill_magic), IAM_LEAF_HEADER_MAGIC,
187 count, leaf_count_limit(l), result);
192 static void iam_lfix_fini(struct iam_leaf *l)
194 l->il_entries = l->il_at = NULL;
197 static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
199 int count = lentry_count_get(l);
200 struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
205 static struct iam_rec *iam_lfix_rec(const struct iam_leaf *l)
209 assert_corr(iam_leaf_at_rec(l));
210 return e + iam_leaf_descr(l)->id_key_size;
213 static void iam_lfix_next(struct iam_leaf *l)
215 assert_corr(iam_leaf_at_rec(l));
216 l->il_at = iam_lfix_shift(l, l->il_at, 1);
224 static char hdigit(char ch)
226 static const char d[] = "0123456789abcdef";
231 static char *hex(char ch, char *area)
233 area[0] = hdigit(ch >> 4);
234 area[1] = hdigit(ch);
239 static void l_print(struct iam_leaf *leaf, struct iam_lentry *entry)
245 area = (char *)entry;
246 printk(KERN_EMERG "[");
247 for (i = iam_lfix_key_size(leaf); i > 0; --i, ++area)
248 printk("%s", hex(*area, h));
250 for (i = iam_leaf_descr(leaf)->id_rec_size; i > 0; --i, ++area)
251 printk("%s", hex(*area, h));
255 static void lfix_print(struct iam_leaf *leaf)
257 struct iam_lentry *entry;
261 entry = leaf->il_entries;
262 count = lentry_count_get(leaf);
263 printk(KERN_EMERG "lfix: %p %p %d\n", leaf, leaf->il_at, count);
264 for (i = 0; i < count; ++i, entry = iam_lfix_shift(leaf, entry, 1))
265 l_print(leaf, entry);
268 static int iam_lfix_lookup(struct iam_leaf *l, const struct iam_key *k)
270 struct iam_lentry *p, *q, *m, *t;
271 struct iam_container *c;
275 count = lentry_count_get(l);
277 return IAM_LOOKUP_EMPTY;
279 result = IAM_LOOKUP_OK;
280 c = iam_leaf_container(l);
283 q = iam_lfix_shift(l, p, count - 1);
284 if (lfix_keycmp(c, k, iam_leaf_key_at(p)) < 0) {
286 * @k is less than the least key in the leaf
289 result = IAM_LOOKUP_BEFORE;
290 } else if (lfix_keycmp(c, iam_leaf_key_at(q), k) <= 0) {
296 while (iam_lfix_shift(l, p, 1) != q) {
297 m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
298 assert_corr(p < m && m < q);
299 if (lfix_keycmp(c, iam_leaf_key_at(m), k) <= 0)
304 assert_corr(lfix_keycmp(c, iam_leaf_key_at(p), k) <= 0 &&
305 lfix_keycmp(c, k, iam_leaf_key_at(q)) < 0);
307 * skip over records with duplicate keys.
309 while (p > l->il_entries) {
310 t = iam_lfix_shift(l, p, -1);
311 if (lfix_keycmp(c, iam_leaf_key_at(t), k) == 0)
318 assert_corr(iam_leaf_at_rec(l));
320 if (lfix_keycmp(c, iam_leaf_key_at(l->il_at), k) == 0)
321 result = IAM_LOOKUP_EXACT;
329 static int iam_lfix_ilookup(struct iam_leaf *l, const struct iam_ikey *ik)
331 return iam_lfix_lookup(l, (const struct iam_key *)ik);
334 static void iam_lfix_key_set(struct iam_leaf *l, const struct iam_key *k)
336 assert_corr(iam_leaf_at_rec(l));
337 memcpy(iam_leaf_key_at(l->il_at), k, iam_leaf_descr(l)->id_key_size);
340 static int iam_lfix_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
342 return lfix_keycmp(iam_leaf_container(l), iam_leaf_key_at(l->il_at), k);
345 static int iam_lfix_key_eq(const struct iam_leaf *l, const struct iam_key *k)
347 return !lfix_keycmp(iam_leaf_container(l),
348 iam_leaf_key_at(l->il_at), k);
351 static void iam_lfix_rec_set(struct iam_leaf *l, const struct iam_rec *r)
353 assert_corr(iam_leaf_at_rec(l));
354 memcpy(iam_lfix_rec(l), r, iam_leaf_descr(l)->id_rec_size);
357 static inline int lfix_reccmp(const struct iam_container *c,
358 const struct iam_rec *r1,
359 const struct iam_rec *r2)
361 return memcmp(r1, r2, c->ic_descr->id_rec_size);
364 static int iam_lfix_rec_eq(const struct iam_leaf *l, const struct iam_rec *r)
366 return !lfix_reccmp(iam_leaf_container(l), iam_lfix_rec(l), r);
369 static void iam_lfix_rec_get(const struct iam_leaf *l, struct iam_rec *r)
371 assert_corr(iam_leaf_at_rec(l));
372 memcpy(r, iam_lfix_rec(l), iam_leaf_descr(l)->id_rec_size);
375 static void iam_lfix_rec_add(struct iam_leaf *leaf,
376 const struct iam_key *k, const struct iam_rec *r)
378 struct iam_lentry *end;
379 struct iam_lentry *cur;
380 struct iam_lentry *start;
384 assert_corr(iam_leaf_can_add(leaf, k, r));
386 count = lentry_count_get(leaf);
388 * This branch handles two exceptional cases:
390 * - leaf positioned beyond last record, and
394 if (!iam_leaf_at_end(leaf)) {
395 end = iam_lfix_get_end(leaf);
397 if (lfix_keycmp(iam_leaf_container(leaf),
398 k, iam_leaf_key_at(cur)) >= 0)
402 * Another exceptional case: insertion with the key
403 * less than least key in the leaf.
405 assert_corr(cur == leaf->il_entries);
408 diff = (void *)end - (void *)start;
409 assert_corr(diff >= 0);
410 memmove(iam_lfix_shift(leaf, start, 1), start, diff);
412 lentry_count_set(leaf, count + 1);
413 iam_lfix_key_set(leaf, k);
414 iam_lfix_rec_set(leaf, r);
415 assert_corr(iam_leaf_at_rec(leaf));
418 static void iam_lfix_rec_del(struct iam_leaf *leaf, int shift)
420 struct iam_lentry *next, *end;
424 assert_corr(iam_leaf_at_rec(leaf));
426 count = lentry_count_get(leaf);
427 end = iam_lfix_get_end(leaf);
428 next = iam_lfix_shift(leaf, leaf->il_at, 1);
429 diff = (void *)end - (void *)next;
430 memmove(leaf->il_at, next, diff);
432 lentry_count_set(leaf, count - 1);
435 static int iam_lfix_can_add(const struct iam_leaf *l,
436 const struct iam_key *k, const struct iam_rec *r)
438 return lentry_count_get(l) < leaf_count_limit(l);
441 static int iam_lfix_at_end(const struct iam_leaf *folio)
443 return folio->il_at == iam_lfix_get_end(folio);
446 static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
448 struct iam_leaf_head *hdr;
450 hdr = (struct iam_leaf_head *)bh->b_data;
451 hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
452 hdr->ill_count = cpu_to_le16(0);
455 static void iam_lfix_split(struct iam_leaf *l, struct buffer_head **bh,
458 struct iam_path *path;
459 struct iam_leaf_head *hdr;
460 const struct iam_ikey *pivot;
461 struct buffer_head *new_leaf;
470 path = iam_leaf_path(l);
472 hdr = (void *)new_leaf->b_data;
474 count = lentry_count_get(l);
477 start = iam_lfix_shift(l, iam_get_lentries(l), split);
478 finis = iam_lfix_shift(l, iam_get_lentries(l), count);
480 pivot = (const struct iam_ikey *)iam_leaf_key_at(start);
482 memmove(iam_entries(new_leaf), start, finis - start);
483 hdr->ill_count = cpu_to_le16(count - split);
484 lentry_count_set(l, split);
485 if ((void *)l->il_at >= start) {
487 * insertion point moves into new leaf.
491 shift = iam_lfix_diff(l, l->il_at, start);
494 l->il_curidx = new_blknr;
497 * init cannot fail, as node was just initialized.
499 assert_corr(result == 0);
500 l->il_at = iam_lfix_shift(l, iam_get_lentries(l), shift);
503 * Insert pointer to the new node (together with the least key in
504 * the node) into index node.
506 iam_insert_key_lock(path, path->ip_frame, pivot, new_blknr);
509 static int iam_lfix_leaf_empty(struct iam_leaf *leaf)
511 return lentry_count_get(leaf) == 0;
514 static const struct iam_leaf_operations iam_lfix_leaf_ops = {
515 .init = iam_lfix_init,
516 .init_new = iam_lfix_init_new,
517 .fini = iam_lfix_fini,
518 .start = iam_lfix_start,
519 .next = iam_lfix_next,
521 .ikey = iam_lfix_ikey,
523 .key_set = iam_lfix_key_set,
524 .key_cmp = iam_lfix_key_cmp,
525 .key_eq = iam_lfix_key_eq,
526 .key_size = iam_lfix_key_size,
527 .rec_set = iam_lfix_rec_set,
528 .rec_eq = iam_lfix_rec_eq,
529 .rec_get = iam_lfix_rec_get,
530 .lookup = iam_lfix_lookup,
531 .ilookup = iam_lfix_ilookup,
532 .at_end = iam_lfix_at_end,
533 .rec_add = iam_lfix_rec_add,
534 .rec_del = iam_lfix_rec_del,
535 .can_add = iam_lfix_can_add,
536 .split = iam_lfix_split,
537 .leaf_empty = iam_lfix_leaf_empty,
545 /* This is duplicated in lustre/utils/create_iam.c */
547 * Then shalt thou see the dew-BEDABBLED wretch
548 * Turn, and return, indenting with the way;
549 * Each envious brier his weary legs doth scratch,
550 * Each shadow makes him stop, each murmur stay:
551 * For misery is trodden on by many,
552 * And being low never relieved by any.
554 IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL /* d01efull */
557 /* This is duplicated in lustre/utils/create_iam.c */
558 struct iam_lfix_root {
563 u8 ilr_indirect_levels;
567 static __u32 iam_lfix_root_ptr(struct iam_container *c)
572 static int iam_lfix_node_init(struct iam_container *c, struct buffer_head *bh,
578 static struct iam_entry *iam_lfix_root_inc(struct iam_container *c,
579 struct iam_path *path,
580 struct iam_frame *frame)
582 struct iam_lfix_root *root;
583 struct iam_entry *entries;
585 entries = frame->entries;
587 dx_set_count(entries, 2);
588 assert_corr(dx_get_limit(entries) == dx_root_limit(path));
590 root = (void *)frame->bh->b_data;
591 assert_corr(le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC);
592 root->ilr_indirect_levels++;
593 frame->at = entries = iam_entry_shift(path, entries, 1);
594 memset(iam_ikey_at(path, entries), 0,
595 iam_path_descr(path)->id_ikey_size);
599 static int iam_lfix_node_check(struct iam_path *path, struct iam_frame *frame)
603 unsigned int limit_correct;
604 struct iam_entry *entries;
606 entries = dx_node_get_entries(path, frame);
608 if (frame == path->ip_frames) {
609 struct iam_lfix_root *root;
611 root = (void *)frame->bh->b_data;
612 if (le64_to_cpu(root->ilr_magic) != IAM_LFIX_ROOT_MAGIC)
614 limit_correct = dx_root_limit(path);
616 limit_correct = dx_node_limit(path);
618 count = dx_get_count(entries);
619 limit = dx_get_limit(entries);
622 if (limit != limit_correct)
627 static int iam_lfix_node_load(struct iam_path *path, struct iam_frame *frame)
629 struct iam_entry *entries;
632 entries = dx_node_get_entries(path, frame);
633 data = frame->bh->b_data;
635 if (frame == path->ip_frames) {
636 struct iam_lfix_root *root;
639 path->ip_indirect = root->ilr_indirect_levels;
640 if (path->ip_ikey_target == NULL)
641 path->ip_ikey_target =
642 (struct iam_ikey *)path->ip_key_target;
644 frame->entries = frame->at = entries;
648 static int iam_lfix_ikeycmp(const struct iam_container *c,
649 const struct iam_ikey *k1,
650 const struct iam_ikey *k2)
652 return memcmp(k1, k2, c->ic_descr->id_ikey_size);
655 static struct iam_path_descr *iam_lfix_ipd_alloc(const struct iam_container *c,
658 return iam_ipd_alloc(area, c->ic_descr->id_ikey_size);
661 static const struct iam_operations iam_lfix_ops = {
662 .id_root_ptr = iam_lfix_root_ptr,
663 .id_node_read = iam_node_read,
664 .id_node_init = iam_lfix_node_init,
665 .id_node_check = iam_lfix_node_check,
666 .id_node_load = iam_lfix_node_load,
667 .id_ikeycmp = iam_lfix_ikeycmp,
668 .id_root_inc = iam_lfix_root_inc,
669 .id_ipd_alloc = iam_lfix_ipd_alloc,
670 .id_ipd_free = iam_ipd_free,
674 int iam_lfix_guess(struct iam_container *c)
677 struct buffer_head *bh;
678 const struct iam_lfix_root *root;
680 assert_corr(c->ic_object != NULL);
682 result = iam_node_read(c, iam_lfix_root_ptr(c), NULL, &bh);
684 root = (void *)bh->b_data;
685 if (le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC) {
686 struct iam_descr *descr;
689 descr->id_key_size = le16_to_cpu(root->ilr_keysize);
690 descr->id_ikey_size = le16_to_cpu(root->ilr_keysize);
691 descr->id_rec_size = le16_to_cpu(root->ilr_recsize);
692 descr->id_ptr_size = le16_to_cpu(root->ilr_ptrsize);
693 descr->id_root_gap = sizeof(struct iam_lfix_root);
694 descr->id_node_gap = 0;
695 descr->id_ops = &iam_lfix_ops;
696 descr->id_leaf_ops = &iam_lfix_leaf_ops;
714 #define LFIX_ROOT_RECNO \
715 ((4096 - sizeof(struct iam_lfix_root)) / (KEYSIZE + PTRSIZE))
717 #define LFIX_INDEX_RECNO (4096 / (KEYSIZE + PTRSIZE))
719 #define LFIX_LEAF_RECNO \
720 ((4096 - sizeof(struct iam_leaf_head)) / (KEYSIZE + RECSIZE))
723 struct iam_lfix_root lr_root;
727 } lr_entry[LFIX_ROOT_RECNO];
731 struct dx_countlimit li_cl;
732 char li_padding[KEYSIZE + PTRSIZE - sizeof(struct dx_countlimit)];
736 } li_entry[LFIX_INDEX_RECNO - 1];
740 struct iam_leaf_head ll_head;
744 } ll_entry[LFIX_LEAF_RECNO];
747 #define STORE_UNALIGNED(val, dst) \
749 typeof(val) __val = (val); \
750 BUILD_BUG_ON(sizeof(val) != sizeof(*(dst))); \
751 memcpy(dst, &__val, sizeof(*(dst))); \
754 static void lfix_root(void *buf,
755 int blocksize, int keysize, int ptrsize, int recsize)
757 struct iam_lfix_root *root;
758 struct dx_countlimit *limit;
762 *root = (typeof(*root)) {
763 .ilr_magic = cpu_to_le64(IAM_LFIX_ROOT_MAGIC),
764 .ilr_keysize = cpu_to_le16(keysize),
765 .ilr_recsize = cpu_to_le16(recsize),
766 .ilr_ptrsize = cpu_to_le16(ptrsize),
767 .ilr_indirect_levels = 0
770 limit = (void *)(root + 1);
771 *limit = (typeof(*limit)){
773 * limit itself + one pointer to the leaf.
775 .count = cpu_to_le16(2),
776 .limit = iam_root_limit(sizeof(struct iam_lfix_root),
777 blocksize, keysize + ptrsize)
780 /* To guarantee that the padding "keysize + ptrsize"
781 * covers the "dx_countlimit" and the "idle_blocks". */
782 LASSERT((keysize + ptrsize) >=
783 (sizeof(struct dx_countlimit) + sizeof(__u32)));
785 entry = (void *)(limit + 1);
786 /* Put "idle_blocks" just after the limit. There was padding after
787 * the limit, the "idle_blocks" re-uses part of the padding, so no
788 * compatibility issues with old layout.
795 entry = (void *)(root + 1) + keysize + ptrsize;
798 * Entry format is <key> followed by <ptr>. In the minimal tree
799 * consisting of a root and single node, <key> is a minimal possible
802 * XXX: this key is hard-coded to be a sequence of 0's.
805 memset(entry, 0, keysize);
807 /* now @entry points to <ptr> */
809 STORE_UNALIGNED(cpu_to_le32(1), (u_int32_t *)entry);
811 STORE_UNALIGNED(cpu_to_le64(1), (u_int64_t *)entry);
814 static void lfix_leaf(void *buf,
815 int blocksize, int keysize, int ptrsize, int recsize)
817 struct iam_leaf_head *head;
822 *head = (struct iam_leaf_head) {
823 .ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC),
825 * Leaf contains an entry with the smallest possible key
826 * (created by zeroing).
828 .ill_count = cpu_to_le16(1),
831 entry = (void *)(head + 1);
832 memset(entry, 0, keysize + recsize);
835 int iam_lfix_create(struct inode *obj,
836 int keysize, int ptrsize, int recsize, handle_t *handle)
838 struct buffer_head *root_node;
839 struct buffer_head *leaf_node;
840 struct super_block *sb;
845 assert_corr(obj->i_size == 0);
848 bsize = sb->s_blocksize;
849 root_node = osd_ldiskfs_append(handle, obj, &blknr);
850 if (IS_ERR(root_node))
851 GOTO(out, result = PTR_ERR(root_node));
853 leaf_node = osd_ldiskfs_append(handle, obj, &blknr);
854 if (IS_ERR(leaf_node))
855 GOTO(out_root, result = PTR_ERR(leaf_node));
857 lfix_root(root_node->b_data, bsize, keysize, ptrsize, recsize);
858 lfix_leaf(leaf_node->b_data, bsize, keysize, ptrsize, recsize);
859 ldiskfs_mark_inode_dirty(handle, obj);
860 result = ldiskfs_handle_dirty_metadata(handle, NULL, root_node);
862 result = ldiskfs_handle_dirty_metadata(handle, NULL, leaf_node);
864 ldiskfs_std_error(sb, result);
868 GOTO(out_root, result);