Whamcloud - gitweb
bb42482036c817e8aaa0addc3994cc3a99aa8001
[fs/lustre-release.git] / lustre / fld / fld_cache.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, 2013, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/fld/fld_cache.c
37  *
38  * FLD (Fids Location Database)
39  *
40  * Author: Pravin Shelar <pravin.shelar@sun.com>
41  * Author: Yury Umanets <umka@clusterfs.com>
42  */
43
44 #define DEBUG_SUBSYSTEM S_FLD
45
46 #ifdef __KERNEL__
47 # include <libcfs/libcfs.h>
48 # include <linux/module.h>
49 # include <linux/math64.h>
50 #else /* __KERNEL__ */
51 # include <liblustre.h>
52 # include <libcfs/list.h>
53 #endif
54
55 #include <obd_support.h>
56 #include <lustre_fld.h>
57 #include "fld_internal.h"
58
59 /**
60  * create fld cache.
61  */
62 struct fld_cache *fld_cache_init(const char *name,
63                                  int cache_size, int cache_threshold)
64 {
65         struct fld_cache *cache;
66         ENTRY;
67
68         LASSERT(name != NULL);
69         LASSERT(cache_threshold < cache_size);
70
71         OBD_ALLOC_PTR(cache);
72         if (cache == NULL)
73                 RETURN(ERR_PTR(-ENOMEM));
74
75         INIT_LIST_HEAD(&cache->fci_entries_head);
76         INIT_LIST_HEAD(&cache->fci_lru);
77
78         cache->fci_cache_count = 0;
79         rwlock_init(&cache->fci_lock);
80
81         strlcpy(cache->fci_name, name,
82                 sizeof(cache->fci_name));
83
84         cache->fci_cache_size = cache_size;
85         cache->fci_threshold = cache_threshold;
86
87         /* Init fld cache info. */
88         memset(&cache->fci_stat, 0, sizeof(cache->fci_stat));
89
90         CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n",
91                cache->fci_name, cache_size, cache_threshold);
92
93         RETURN(cache);
94 }
95
96 /**
97  * destroy fld cache.
98  */
99 void fld_cache_fini(struct fld_cache *cache)
100 {
101         __u64 pct;
102         ENTRY;
103
104         LASSERT(cache != NULL);
105         fld_cache_flush(cache);
106
107         if (cache->fci_stat.fst_count > 0) {
108                 pct = cache->fci_stat.fst_cache * 100;
109                 do_div(pct, cache->fci_stat.fst_count);
110         } else {
111                 pct = 0;
112         }
113
114         CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name);
115         CDEBUG(D_INFO, "  Total reqs: "LPU64"\n", cache->fci_stat.fst_count);
116         CDEBUG(D_INFO, "  Cache reqs: "LPU64"\n", cache->fci_stat.fst_cache);
117         CDEBUG(D_INFO, "  Cache hits: "LPU64"%%\n", pct);
118
119         OBD_FREE_PTR(cache);
120
121         EXIT;
122 }
123
124 /**
125  * delete given node from list.
126  */
127 void fld_cache_entry_delete(struct fld_cache *cache,
128                             struct fld_cache_entry *node)
129 {
130         list_del(&node->fce_list);
131         list_del(&node->fce_lru);
132         cache->fci_cache_count--;
133         OBD_FREE_PTR(node);
134 }
135
136 /**
137  * fix list by checking new entry with NEXT entry in order.
138  */
139 static void fld_fix_new_list(struct fld_cache *cache)
140 {
141         struct fld_cache_entry *f_curr;
142         struct fld_cache_entry *f_next;
143         struct lu_seq_range *c_range;
144         struct lu_seq_range *n_range;
145         struct list_head *head = &cache->fci_entries_head;
146         ENTRY;
147
148 restart_fixup:
149
150         list_for_each_entry_safe(f_curr, f_next, head, fce_list) {
151                 c_range = &f_curr->fce_range;
152                 n_range = &f_next->fce_range;
153
154                 LASSERT(range_is_sane(c_range));
155                 if (&f_next->fce_list == head)
156                         break;
157
158                 if (c_range->lsr_flags != n_range->lsr_flags)
159                         continue;
160
161                 LASSERTF(c_range->lsr_start <= n_range->lsr_start,
162                          "cur lsr_start "DRANGE" next lsr_start "DRANGE"\n",
163                          PRANGE(c_range), PRANGE(n_range));
164
165                 /* check merge possibility with next range */
166                 if (c_range->lsr_end == n_range->lsr_start) {
167                         if (c_range->lsr_index != n_range->lsr_index)
168                                 continue;
169                         n_range->lsr_start = c_range->lsr_start;
170                         fld_cache_entry_delete(cache, f_curr);
171                         continue;
172                 }
173
174                 /* check if current range overlaps with next range. */
175                 if (n_range->lsr_start < c_range->lsr_end) {
176                         if (c_range->lsr_index == n_range->lsr_index) {
177                                 n_range->lsr_start = c_range->lsr_start;
178                                 n_range->lsr_end = max(c_range->lsr_end,
179                                                        n_range->lsr_end);
180                                 fld_cache_entry_delete(cache, f_curr);
181                         } else {
182                                 if (n_range->lsr_end <= c_range->lsr_end) {
183                                         *n_range = *c_range;
184                                         fld_cache_entry_delete(cache, f_curr);
185                                 } else
186                                         n_range->lsr_start = c_range->lsr_end;
187                         }
188
189                         /* we could have overlap over next
190                          * range too. better restart. */
191                         goto restart_fixup;
192                 }
193
194                 /* kill duplicates */
195                 if (c_range->lsr_start == n_range->lsr_start &&
196                     c_range->lsr_end == n_range->lsr_end)
197                         fld_cache_entry_delete(cache, f_curr);
198         }
199
200         EXIT;
201 }
202
203 /**
204  * add node to fld cache
205  */
206 static inline void fld_cache_entry_add(struct fld_cache *cache,
207                                        struct fld_cache_entry *f_new,
208                                        struct list_head *pos)
209 {
210         list_add(&f_new->fce_list, pos);
211         list_add(&f_new->fce_lru, &cache->fci_lru);
212
213         cache->fci_cache_count++;
214         fld_fix_new_list(cache);
215 }
216
217 /**
218  * Check if cache needs to be shrunk. If so - do it.
219  * Remove one entry in list and so on until cache is shrunk enough.
220  */
221 static int fld_cache_shrink(struct fld_cache *cache)
222 {
223         struct fld_cache_entry *flde;
224         struct list_head *curr;
225         int num = 0;
226         ENTRY;
227
228         LASSERT(cache != NULL);
229
230         if (cache->fci_cache_count < cache->fci_cache_size)
231                 RETURN(0);
232
233         curr = cache->fci_lru.prev;
234
235         while (cache->fci_cache_count + cache->fci_threshold >
236                cache->fci_cache_size && curr != &cache->fci_lru) {
237
238                 flde = list_entry(curr, struct fld_cache_entry, fce_lru);
239                 curr = curr->prev;
240                 fld_cache_entry_delete(cache, flde);
241                 num++;
242         }
243
244         CDEBUG(D_INFO, "%s: FLD cache - Shrunk by "
245                "%d entries\n", cache->fci_name, num);
246
247         RETURN(0);
248 }
249
250 /**
251  * kill all fld cache entries.
252  */
253 void fld_cache_flush(struct fld_cache *cache)
254 {
255         ENTRY;
256
257         write_lock(&cache->fci_lock);
258         cache->fci_cache_size = 0;
259         fld_cache_shrink(cache);
260         write_unlock(&cache->fci_lock);
261
262         EXIT;
263 }
264
265 /**
266  * punch hole in existing range. divide this range and add new
267  * entry accordingly.
268  */
269
270 static void fld_cache_punch_hole(struct fld_cache *cache,
271                                  struct fld_cache_entry *f_curr,
272                                  struct fld_cache_entry *f_new)
273 {
274         const struct lu_seq_range *range = &f_new->fce_range;
275         const u64 new_start  = range->lsr_start;
276         const u64 new_end  = range->lsr_end;
277         struct fld_cache_entry *fldt;
278
279         ENTRY;
280         OBD_ALLOC_GFP(fldt, sizeof *fldt, GFP_ATOMIC);
281         if (!fldt) {
282                 OBD_FREE_PTR(f_new);
283                 EXIT;
284                 /* overlap is not allowed, so dont mess up list. */
285                 return;
286         }
287         /*  break f_curr RANGE into three RANGES:
288          *        f_curr, f_new , fldt
289          */
290
291         /* f_new = *range */
292
293         /* fldt */
294         fldt->fce_range.lsr_start = new_end;
295         fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end;
296         fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index;
297
298         /* f_curr */
299         f_curr->fce_range.lsr_end = new_start;
300
301         /* add these two entries to list */
302         fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
303         fld_cache_entry_add(cache, fldt, &f_new->fce_list);
304
305         /* no need to fixup */
306         EXIT;
307 }
308
309 /**
310  * handle range overlap in fld cache.
311  */
312 static void fld_cache_overlap_handle(struct fld_cache *cache,
313                                 struct fld_cache_entry *f_curr,
314                                 struct fld_cache_entry *f_new)
315 {
316         const struct lu_seq_range *range = &f_new->fce_range;
317         const u64 new_start  = range->lsr_start;
318         const u64 new_end  = range->lsr_end;
319         const u32 mdt = range->lsr_index;
320
321         /* this is overlap case, these case are checking overlapping with
322          * prev range only. fixup will handle overlaping with next range. */
323
324         if (f_curr->fce_range.lsr_index == mdt) {
325                 f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start,
326                                                   new_start);
327
328                 f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end,
329                                                 new_end);
330
331                 OBD_FREE_PTR(f_new);
332                 fld_fix_new_list(cache);
333
334         } else if (new_start <= f_curr->fce_range.lsr_start &&
335                         f_curr->fce_range.lsr_end <= new_end) {
336                 /* case 1: new range completely overshadowed existing range.
337                  *         e.g. whole range migrated. update fld cache entry */
338
339                 f_curr->fce_range = *range;
340                 OBD_FREE_PTR(f_new);
341                 fld_fix_new_list(cache);
342
343         } else if (f_curr->fce_range.lsr_start < new_start &&
344                         new_end < f_curr->fce_range.lsr_end) {
345                 /* case 2: new range fit within existing range. */
346
347                 fld_cache_punch_hole(cache, f_curr, f_new);
348
349         } else  if (new_end <= f_curr->fce_range.lsr_end) {
350                 /* case 3: overlap:
351                  *         [new_start [c_start  new_end)  c_end)
352                  */
353
354                 LASSERT(new_start <= f_curr->fce_range.lsr_start);
355
356                 f_curr->fce_range.lsr_start = new_end;
357                 fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev);
358
359         } else if (f_curr->fce_range.lsr_start <= new_start) {
360                 /* case 4: overlap:
361                  *         [c_start [new_start c_end) new_end)
362                  */
363
364                 LASSERT(f_curr->fce_range.lsr_end <= new_end);
365
366                 f_curr->fce_range.lsr_end = new_start;
367                 fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
368         } else
369                 CERROR("NEW range ="DRANGE" curr = "DRANGE"\n",
370                        PRANGE(range),PRANGE(&f_curr->fce_range));
371 }
372
373 struct fld_cache_entry
374 *fld_cache_entry_create(const struct lu_seq_range *range)
375 {
376         struct fld_cache_entry *f_new;
377
378         LASSERT(range_is_sane(range));
379
380         OBD_ALLOC_PTR(f_new);
381         if (!f_new)
382                 RETURN(ERR_PTR(-ENOMEM));
383
384         f_new->fce_range = *range;
385         RETURN(f_new);
386 }
387
388 /**
389  * Insert FLD entry in FLD cache.
390  *
391  * This function handles all cases of merging and breaking up of
392  * ranges.
393  */
394 int fld_cache_insert_nolock(struct fld_cache *cache,
395                             struct fld_cache_entry *f_new)
396 {
397         struct fld_cache_entry *f_curr;
398         struct fld_cache_entry *n;
399         struct list_head *head;
400         struct list_head *prev = NULL;
401         const u64 new_start  = f_new->fce_range.lsr_start;
402         const u64 new_end  = f_new->fce_range.lsr_end;
403         __u32 new_flags  = f_new->fce_range.lsr_flags;
404         ENTRY;
405
406         /*
407          * Duplicate entries are eliminated in insert op.
408          * So we don't need to search new entry before starting
409          * insertion loop.
410          */
411
412         if (!cache->fci_no_shrink)
413                 fld_cache_shrink(cache);
414
415         head = &cache->fci_entries_head;
416
417         list_for_each_entry_safe(f_curr, n, head, fce_list) {
418                 /* add list if next is end of list */
419                 if (new_end < f_curr->fce_range.lsr_start ||
420                    (new_end == f_curr->fce_range.lsr_start &&
421                     new_flags != f_curr->fce_range.lsr_flags))
422                         break;
423
424                 prev = &f_curr->fce_list;
425                 /* check if this range is to left of new range. */
426                 if (new_start < f_curr->fce_range.lsr_end &&
427                     new_flags == f_curr->fce_range.lsr_flags) {
428                         fld_cache_overlap_handle(cache, f_curr, f_new);
429                         goto out;
430                 }
431         }
432
433         if (prev == NULL)
434                 prev = head;
435
436         CDEBUG(D_INFO, "insert range "DRANGE"\n", PRANGE(&f_new->fce_range));
437         /* Add new entry to cache and lru list. */
438         fld_cache_entry_add(cache, f_new, prev);
439 out:
440         RETURN(0);
441 }
442
443 int fld_cache_insert(struct fld_cache *cache,
444                      const struct lu_seq_range *range)
445 {
446         struct fld_cache_entry  *flde;
447         int rc;
448
449         flde = fld_cache_entry_create(range);
450         if (IS_ERR(flde))
451                 RETURN(PTR_ERR(flde));
452
453         write_lock(&cache->fci_lock);
454         rc = fld_cache_insert_nolock(cache, flde);
455         write_unlock(&cache->fci_lock);
456         if (rc)
457                 OBD_FREE_PTR(flde);
458
459         RETURN(rc);
460 }
461
462 void fld_cache_delete_nolock(struct fld_cache *cache,
463                       const struct lu_seq_range *range)
464 {
465         struct fld_cache_entry *flde;
466         struct fld_cache_entry *tmp;
467         struct list_head *head;
468
469         head = &cache->fci_entries_head;
470         list_for_each_entry_safe(flde, tmp, head, fce_list) {
471                 /* add list if next is end of list */
472                 if (range->lsr_start == flde->fce_range.lsr_start ||
473                    (range->lsr_end == flde->fce_range.lsr_end &&
474                     range->lsr_flags == flde->fce_range.lsr_flags)) {
475                         fld_cache_entry_delete(cache, flde);
476                         break;
477                 }
478         }
479 }
480
481 /**
482  * Delete FLD entry in FLD cache.
483  *
484  */
485 void fld_cache_delete(struct fld_cache *cache,
486                       const struct lu_seq_range *range)
487 {
488         write_lock(&cache->fci_lock);
489         fld_cache_delete_nolock(cache, range);
490         write_unlock(&cache->fci_lock);
491 }
492
493 struct fld_cache_entry *
494 fld_cache_entry_lookup_nolock(struct fld_cache *cache,
495                               const struct lu_seq_range *range)
496 {
497         struct fld_cache_entry *flde;
498         struct fld_cache_entry *got = NULL;
499         struct list_head *head;
500
501         head = &cache->fci_entries_head;
502         list_for_each_entry(flde, head, fce_list) {
503                 if (range->lsr_start == flde->fce_range.lsr_start ||
504                    (range->lsr_end == flde->fce_range.lsr_end &&
505                     range->lsr_flags == flde->fce_range.lsr_flags)) {
506                         got = flde;
507                         break;
508                 }
509         }
510
511         RETURN(got);
512 }
513
514 /**
515  * lookup \a seq sequence for range in fld cache.
516  */
517 struct fld_cache_entry *
518 fld_cache_entry_lookup(struct fld_cache *cache,
519                        const struct lu_seq_range *range)
520 {
521         struct fld_cache_entry *got = NULL;
522         ENTRY;
523
524         read_lock(&cache->fci_lock);
525         got = fld_cache_entry_lookup_nolock(cache, range);
526         read_unlock(&cache->fci_lock);
527
528         RETURN(got);
529 }
530
531 /**
532  * lookup \a seq sequence for range in fld cache.
533  */
534 int fld_cache_lookup(struct fld_cache *cache,
535                      const u64 seq, struct lu_seq_range *range)
536 {
537         struct fld_cache_entry *flde;
538         struct fld_cache_entry *prev = NULL;
539         struct list_head *head;
540         ENTRY;
541
542         read_lock(&cache->fci_lock);
543         head = &cache->fci_entries_head;
544
545         cache->fci_stat.fst_count++;
546         list_for_each_entry(flde, head, fce_list) {
547                 if (flde->fce_range.lsr_start > seq) {
548                         if (prev != NULL)
549                                 *range = prev->fce_range;
550                         break;
551                 }
552
553                 prev = flde;
554                 if (range_within(&flde->fce_range, seq)) {
555                         *range = flde->fce_range;
556
557                         cache->fci_stat.fst_cache++;
558                         read_unlock(&cache->fci_lock);
559                         RETURN(0);
560                 }
561         }
562         read_unlock(&cache->fci_lock);
563         RETURN(-ENOENT);
564 }