4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2014, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/fld/fld_cache.c
34 * FLD (Fids Location Database)
36 * Author: Pravin Shelar <pravin.shelar@sun.com>
37 * Author: Yury Umanets <umka@clusterfs.com>
40 #define DEBUG_SUBSYSTEM S_FLD
42 #include <libcfs/libcfs.h>
43 #include <linux/module.h>
44 #include <linux/math64.h>
45 #include <obd_support.h>
46 #include <lustre_fld.h>
47 #include "fld_internal.h"
52 struct fld_cache *fld_cache_init(const char *name,
53 int cache_size, int cache_threshold)
55 struct fld_cache *cache;
58 LASSERT(name != NULL);
59 LASSERT(cache_threshold < cache_size);
63 RETURN(ERR_PTR(-ENOMEM));
65 INIT_LIST_HEAD(&cache->fci_entries_head);
66 INIT_LIST_HEAD(&cache->fci_lru);
68 cache->fci_cache_count = 0;
69 rwlock_init(&cache->fci_lock);
71 strlcpy(cache->fci_name, name,
72 sizeof(cache->fci_name));
74 cache->fci_cache_size = cache_size;
75 cache->fci_threshold = cache_threshold;
77 /* Init fld cache info. */
78 memset(&cache->fci_stat, 0, sizeof(cache->fci_stat));
80 CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n",
81 cache->fci_name, cache_size, cache_threshold);
89 void fld_cache_fini(struct fld_cache *cache)
94 LASSERT(cache != NULL);
95 fld_cache_flush(cache);
97 if (cache->fci_stat.fst_count > 0) {
98 pct = cache->fci_stat.fst_cache * 100;
99 do_div(pct, cache->fci_stat.fst_count);
104 CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name);
105 CDEBUG(D_INFO, " Total reqs: %llu\n", cache->fci_stat.fst_count);
106 CDEBUG(D_INFO, " Cache reqs: %llu\n", cache->fci_stat.fst_cache);
107 CDEBUG(D_INFO, " Cache hits: %llu%%\n", pct);
115 * delete given node from list.
117 void fld_cache_entry_delete(struct fld_cache *cache,
118 struct fld_cache_entry *node)
120 list_del(&node->fce_list);
121 list_del(&node->fce_lru);
122 cache->fci_cache_count--;
127 * fix list by checking new entry with NEXT entry in order.
129 static void fld_fix_new_list(struct fld_cache *cache)
131 struct fld_cache_entry *f_curr;
132 struct fld_cache_entry *f_next;
133 struct lu_seq_range *c_range;
134 struct lu_seq_range *n_range;
135 struct list_head *head = &cache->fci_entries_head;
140 list_for_each_entry_safe(f_curr, f_next, head, fce_list) {
141 c_range = &f_curr->fce_range;
142 n_range = &f_next->fce_range;
144 LASSERT(lu_seq_range_is_sane(c_range));
145 if (&f_next->fce_list == head)
148 if (c_range->lsr_flags != n_range->lsr_flags)
151 LASSERTF(c_range->lsr_start <= n_range->lsr_start,
152 "cur lsr_start "DRANGE" next lsr_start "DRANGE"\n",
153 PRANGE(c_range), PRANGE(n_range));
155 /* check merge possibility with next range */
156 if (c_range->lsr_end == n_range->lsr_start) {
157 if (c_range->lsr_index != n_range->lsr_index)
159 n_range->lsr_start = c_range->lsr_start;
160 fld_cache_entry_delete(cache, f_curr);
164 /* check if current range overlaps with next range. */
165 if (n_range->lsr_start < c_range->lsr_end) {
166 if (c_range->lsr_index == n_range->lsr_index) {
167 n_range->lsr_start = c_range->lsr_start;
168 n_range->lsr_end = max(c_range->lsr_end,
170 fld_cache_entry_delete(cache, f_curr);
172 if (n_range->lsr_end <= c_range->lsr_end) {
174 fld_cache_entry_delete(cache, f_curr);
176 n_range->lsr_start = c_range->lsr_end;
179 /* we could have overlap over next
180 * range too. better restart. */
184 /* kill duplicates */
185 if (c_range->lsr_start == n_range->lsr_start &&
186 c_range->lsr_end == n_range->lsr_end)
187 fld_cache_entry_delete(cache, f_curr);
194 * add node to fld cache
196 static inline void fld_cache_entry_add(struct fld_cache *cache,
197 struct fld_cache_entry *f_new,
198 struct list_head *pos)
200 list_add(&f_new->fce_list, pos);
201 list_add(&f_new->fce_lru, &cache->fci_lru);
203 cache->fci_cache_count++;
204 fld_fix_new_list(cache);
208 * Check if cache needs to be shrunk. If so - do it.
209 * Remove one entry in list and so on until cache is shrunk enough.
211 static int fld_cache_shrink(struct fld_cache *cache)
213 struct fld_cache_entry *flde;
214 struct list_head *curr;
218 LASSERT(cache != NULL);
220 if (cache->fci_cache_count < cache->fci_cache_size)
223 curr = cache->fci_lru.prev;
225 while (cache->fci_cache_count + cache->fci_threshold >
226 cache->fci_cache_size && curr != &cache->fci_lru) {
228 flde = list_entry(curr, struct fld_cache_entry, fce_lru);
230 fld_cache_entry_delete(cache, flde);
234 CDEBUG(D_INFO, "%s: FLD cache - Shrunk by "
235 "%d entries\n", cache->fci_name, num);
241 * kill all fld cache entries.
243 void fld_cache_flush(struct fld_cache *cache)
247 write_lock(&cache->fci_lock);
248 cache->fci_cache_size = 0;
249 fld_cache_shrink(cache);
250 write_unlock(&cache->fci_lock);
256 * punch hole in existing range. divide this range and add new
260 static void fld_cache_punch_hole(struct fld_cache *cache,
261 struct fld_cache_entry *f_curr,
262 struct fld_cache_entry *f_new)
264 const struct lu_seq_range *range = &f_new->fce_range;
265 const u64 new_start = range->lsr_start;
266 const u64 new_end = range->lsr_end;
267 struct fld_cache_entry *fldt;
270 OBD_ALLOC_GFP(fldt, sizeof *fldt, GFP_ATOMIC);
274 /* overlap is not allowed, so dont mess up list. */
277 /* break f_curr RANGE into three RANGES:
278 * f_curr, f_new , fldt
284 fldt->fce_range.lsr_start = new_end;
285 fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end;
286 fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index;
289 f_curr->fce_range.lsr_end = new_start;
291 /* add these two entries to list */
292 fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
293 fld_cache_entry_add(cache, fldt, &f_new->fce_list);
295 /* no need to fixup */
300 * handle range overlap in fld cache.
302 static void fld_cache_overlap_handle(struct fld_cache *cache,
303 struct fld_cache_entry *f_curr,
304 struct fld_cache_entry *f_new)
306 const struct lu_seq_range *range = &f_new->fce_range;
307 const u64 new_start = range->lsr_start;
308 const u64 new_end = range->lsr_end;
309 const u32 mdt = range->lsr_index;
311 /* this is overlap case, these case are checking overlapping with
312 * prev range only. fixup will handle overlaping with next range. */
314 if (f_curr->fce_range.lsr_index == mdt) {
315 f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start,
318 f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end,
322 fld_fix_new_list(cache);
324 } else if (new_start <= f_curr->fce_range.lsr_start &&
325 f_curr->fce_range.lsr_end <= new_end) {
326 /* case 1: new range completely overshadowed existing range.
327 * e.g. whole range migrated. update fld cache entry */
329 f_curr->fce_range = *range;
331 fld_fix_new_list(cache);
333 } else if (f_curr->fce_range.lsr_start < new_start &&
334 new_end < f_curr->fce_range.lsr_end) {
335 /* case 2: new range fit within existing range. */
337 fld_cache_punch_hole(cache, f_curr, f_new);
339 } else if (new_end <= f_curr->fce_range.lsr_end) {
341 * [new_start [c_start new_end) c_end)
344 LASSERT(new_start <= f_curr->fce_range.lsr_start);
346 f_curr->fce_range.lsr_start = new_end;
347 fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev);
349 } else if (f_curr->fce_range.lsr_start <= new_start) {
351 * [c_start [new_start c_end) new_end)
354 LASSERT(f_curr->fce_range.lsr_end <= new_end);
356 f_curr->fce_range.lsr_end = new_start;
357 fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
359 CERROR("NEW range ="DRANGE" curr = "DRANGE"\n",
360 PRANGE(range),PRANGE(&f_curr->fce_range));
363 struct fld_cache_entry
364 *fld_cache_entry_create(const struct lu_seq_range *range)
366 struct fld_cache_entry *f_new;
368 LASSERT(lu_seq_range_is_sane(range));
370 OBD_ALLOC_PTR(f_new);
372 RETURN(ERR_PTR(-ENOMEM));
374 f_new->fce_range = *range;
379 * Insert FLD entry in FLD cache.
381 * This function handles all cases of merging and breaking up of
384 int fld_cache_insert_nolock(struct fld_cache *cache,
385 struct fld_cache_entry *f_new)
387 struct fld_cache_entry *f_curr;
388 struct fld_cache_entry *n;
389 struct list_head *head;
390 struct list_head *prev = NULL;
391 const u64 new_start = f_new->fce_range.lsr_start;
392 const u64 new_end = f_new->fce_range.lsr_end;
393 __u32 new_flags = f_new->fce_range.lsr_flags;
397 * Duplicate entries are eliminated in insert op.
398 * So we don't need to search new entry before starting
402 if (!cache->fci_no_shrink)
403 fld_cache_shrink(cache);
405 head = &cache->fci_entries_head;
407 list_for_each_entry_safe(f_curr, n, head, fce_list) {
408 /* add list if next is end of list */
409 if (new_end < f_curr->fce_range.lsr_start ||
410 (new_end == f_curr->fce_range.lsr_start &&
411 new_flags != f_curr->fce_range.lsr_flags))
414 prev = &f_curr->fce_list;
415 /* check if this range is to left of new range. */
416 if (new_start < f_curr->fce_range.lsr_end &&
417 new_flags == f_curr->fce_range.lsr_flags) {
418 fld_cache_overlap_handle(cache, f_curr, f_new);
426 CDEBUG(D_INFO, "insert range "DRANGE"\n", PRANGE(&f_new->fce_range));
427 /* Add new entry to cache and lru list. */
428 fld_cache_entry_add(cache, f_new, prev);
433 int fld_cache_insert(struct fld_cache *cache,
434 const struct lu_seq_range *range)
436 struct fld_cache_entry *flde;
439 flde = fld_cache_entry_create(range);
441 RETURN(PTR_ERR(flde));
443 write_lock(&cache->fci_lock);
444 rc = fld_cache_insert_nolock(cache, flde);
445 write_unlock(&cache->fci_lock);
452 void fld_cache_delete_nolock(struct fld_cache *cache,
453 const struct lu_seq_range *range)
455 struct fld_cache_entry *flde;
456 struct fld_cache_entry *tmp;
457 struct list_head *head;
459 head = &cache->fci_entries_head;
460 list_for_each_entry_safe(flde, tmp, head, fce_list) {
461 /* add list if next is end of list */
462 if (range->lsr_start == flde->fce_range.lsr_start ||
463 (range->lsr_end == flde->fce_range.lsr_end &&
464 range->lsr_flags == flde->fce_range.lsr_flags)) {
465 fld_cache_entry_delete(cache, flde);
472 * Delete FLD entry in FLD cache.
475 void fld_cache_delete(struct fld_cache *cache,
476 const struct lu_seq_range *range)
478 write_lock(&cache->fci_lock);
479 fld_cache_delete_nolock(cache, range);
480 write_unlock(&cache->fci_lock);
483 struct fld_cache_entry *
484 fld_cache_entry_lookup_nolock(struct fld_cache *cache,
485 const struct lu_seq_range *range)
487 struct fld_cache_entry *flde;
488 struct fld_cache_entry *got = NULL;
489 struct list_head *head;
491 head = &cache->fci_entries_head;
492 list_for_each_entry(flde, head, fce_list) {
493 if (range->lsr_start == flde->fce_range.lsr_start ||
494 (range->lsr_end == flde->fce_range.lsr_end &&
495 range->lsr_flags == flde->fce_range.lsr_flags)) {
505 * lookup \a seq sequence for range in fld cache.
507 struct fld_cache_entry *
508 fld_cache_entry_lookup(struct fld_cache *cache,
509 const struct lu_seq_range *range)
511 struct fld_cache_entry *got = NULL;
514 read_lock(&cache->fci_lock);
515 got = fld_cache_entry_lookup_nolock(cache, range);
516 read_unlock(&cache->fci_lock);
522 * lookup \a seq sequence for range in fld cache.
524 int fld_cache_lookup(struct fld_cache *cache,
525 const u64 seq, struct lu_seq_range *range)
527 struct fld_cache_entry *flde;
528 struct fld_cache_entry *prev = NULL;
529 struct list_head *head;
532 read_lock(&cache->fci_lock);
533 head = &cache->fci_entries_head;
535 cache->fci_stat.fst_count++;
536 list_for_each_entry(flde, head, fce_list) {
537 if (flde->fce_range.lsr_start > seq) {
539 *range = prev->fce_range;
544 if (lu_seq_range_within(&flde->fce_range, seq)) {
545 *range = flde->fce_range;
547 cache->fci_stat.fst_cache++;
548 read_unlock(&cache->fci_lock);
552 read_unlock(&cache->fci_lock);