Whamcloud - gitweb
9f322f4923c567789a02e5ead1975073f84e8569
[fs/lustre-release.git] / lustre / fld / fld_cache.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2014, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/fld/fld_cache.c
33  *
34  * FLD (Fids Location Database)
35  *
36  * Author: Pravin Shelar <pravin.shelar@sun.com>
37  * Author: Yury Umanets <umka@clusterfs.com>
38  */
39
40 #define DEBUG_SUBSYSTEM S_FLD
41
42 #include <libcfs/libcfs.h>
43 #include <linux/module.h>
44 #include <linux/math64.h>
45 #include <obd_support.h>
46 #include <lustre_fld.h>
47 #include "fld_internal.h"
48
49 /**
50  * create fld cache.
51  */
52 struct fld_cache *fld_cache_init(const char *name, int cache_size,
53                                  int cache_threshold)
54 {
55         struct fld_cache *cache;
56
57         ENTRY;
58
59         LASSERT(name != NULL);
60         LASSERT(cache_threshold < cache_size);
61
62         OBD_ALLOC_PTR(cache);
63         if (cache == NULL)
64                 RETURN(ERR_PTR(-ENOMEM));
65
66         INIT_LIST_HEAD(&cache->fci_entries_head);
67         INIT_LIST_HEAD(&cache->fci_lru);
68
69         cache->fci_cache_count = 0;
70         rwlock_init(&cache->fci_lock);
71
72         strlcpy(cache->fci_name, name, sizeof(cache->fci_name));
73
74         cache->fci_cache_size = cache_size;
75         cache->fci_threshold = cache_threshold;
76
77         /* Init fld cache info. */
78         memset(&cache->fci_stat, 0, sizeof(cache->fci_stat));
79
80         CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n",
81                cache->fci_name, cache_size, cache_threshold);
82
83         RETURN(cache);
84 }
85
86 /**
87  * destroy fld cache.
88  */
89 void fld_cache_fini(struct fld_cache *cache)
90 {
91         LASSERT(cache != NULL);
92         fld_cache_flush(cache);
93
94         CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name);
95         CDEBUG(D_INFO, "  Cache reqs: %llu\n", cache->fci_stat.fst_cache);
96         CDEBUG(D_INFO, "  Total reqs: %llu\n", cache->fci_stat.fst_count);
97
98         OBD_FREE_PTR(cache);
99 }
100
101 /**
102  * delete given node from list.
103  */
104 static void fld_cache_entry_delete(struct fld_cache *cache,
105                                    struct fld_cache_entry *node)
106 {
107         list_del(&node->fce_list);
108         list_del(&node->fce_lru);
109         cache->fci_cache_count--;
110         OBD_FREE_PTR(node);
111 }
112
113 /**
114  * fix list by checking new entry with NEXT entry in order.
115  */
116 static void fld_fix_new_list(struct fld_cache *cache)
117 {
118         struct fld_cache_entry *f_curr;
119         struct fld_cache_entry *f_next;
120         struct lu_seq_range *c_range;
121         struct lu_seq_range *n_range;
122         struct list_head *head = &cache->fci_entries_head;
123
124         ENTRY;
125
126 restart_fixup:
127
128         list_for_each_entry_safe(f_curr, f_next, head, fce_list) {
129                 c_range = &f_curr->fce_range;
130                 n_range = &f_next->fce_range;
131
132                 LASSERT(lu_seq_range_is_sane(c_range));
133                 if (&f_next->fce_list == head)
134                         break;
135
136                 if (c_range->lsr_flags != n_range->lsr_flags)
137                         continue;
138
139                 LASSERTF(c_range->lsr_start <= n_range->lsr_start,
140                          "cur lsr_start "DRANGE" next lsr_start "DRANGE"\n",
141                          PRANGE(c_range), PRANGE(n_range));
142
143                 /* check merge possibility with next range */
144                 if (c_range->lsr_end == n_range->lsr_start) {
145                         if (c_range->lsr_index != n_range->lsr_index)
146                                 continue;
147                         n_range->lsr_start = c_range->lsr_start;
148                         fld_cache_entry_delete(cache, f_curr);
149                         continue;
150                 }
151
152                 /* check if current range overlaps with next range. */
153                 if (n_range->lsr_start < c_range->lsr_end) {
154                         if (c_range->lsr_index == n_range->lsr_index) {
155                                 n_range->lsr_start = c_range->lsr_start;
156                                 n_range->lsr_end = max(c_range->lsr_end,
157                                                        n_range->lsr_end);
158                                 fld_cache_entry_delete(cache, f_curr);
159                         } else {
160                                 if (n_range->lsr_end <= c_range->lsr_end) {
161                                         *n_range = *c_range;
162                                         fld_cache_entry_delete(cache, f_curr);
163                                 } else
164                                         n_range->lsr_start = c_range->lsr_end;
165                         }
166
167                         /* we could have overlap over next
168                          * range too. better restart.
169                          */
170                         goto restart_fixup;
171                 }
172
173                 /* kill duplicates */
174                 if (c_range->lsr_start == n_range->lsr_start &&
175                     c_range->lsr_end == n_range->lsr_end)
176                         fld_cache_entry_delete(cache, f_curr);
177         }
178
179         EXIT;
180 }
181
182 /**
183  * add node to fld cache
184  */
185 static inline void fld_cache_entry_add(struct fld_cache *cache,
186                                        struct fld_cache_entry *f_new,
187                                        struct list_head *pos)
188 {
189         list_add(&f_new->fce_list, pos);
190         list_add(&f_new->fce_lru, &cache->fci_lru);
191
192         cache->fci_cache_count++;
193         fld_fix_new_list(cache);
194 }
195
196 /**
197  * Check if cache needs to be shrunk. If so - do it.
198  * Remove one entry in list and so on until cache is shrunk enough.
199  */
200 static int fld_cache_shrink(struct fld_cache *cache)
201 {
202         int num = 0;
203
204         ENTRY;
205
206         LASSERT(cache != NULL);
207
208         if (cache->fci_cache_count < cache->fci_cache_size)
209                 RETURN(0);
210
211         while (cache->fci_cache_count + cache->fci_threshold >
212                cache->fci_cache_size &&
213                !list_empty(&cache->fci_lru)) {
214                 struct fld_cache_entry *flde =
215                         list_last_entry(&cache->fci_lru, struct fld_cache_entry,
216                                         fce_lru);
217
218                 fld_cache_entry_delete(cache, flde);
219                 num++;
220         }
221
222         CDEBUG(D_INFO, "%s: FLD cache - Shrunk by %d entries\n",
223                cache->fci_name, num);
224
225         RETURN(0);
226 }
227
228 /**
229  * kill all fld cache entries.
230  */
231 void fld_cache_flush(struct fld_cache *cache)
232 {
233         ENTRY;
234
235         write_lock(&cache->fci_lock);
236         cache->fci_cache_size = 0;
237         fld_cache_shrink(cache);
238         write_unlock(&cache->fci_lock);
239
240         EXIT;
241 }
242
243 /**
244  * punch hole in existing range. divide this range and add new
245  * entry accordingly.
246  */
247
248 static void fld_cache_punch_hole(struct fld_cache *cache,
249                                  struct fld_cache_entry *f_curr,
250                                  struct fld_cache_entry *f_new)
251 {
252         const struct lu_seq_range *range = &f_new->fce_range;
253         const u64 new_start  = range->lsr_start;
254         const u64 new_end  = range->lsr_end;
255         struct fld_cache_entry *fldt;
256
257         ENTRY;
258         OBD_ALLOC_GFP(fldt, sizeof(*fldt), GFP_ATOMIC);
259         if (!fldt) {
260                 OBD_FREE_PTR(f_new);
261                 EXIT;
262                 /* overlap is not allowed, so dont mess up list. */
263                 return;
264         }
265         /*  break f_curr RANGE into three RANGES:
266          *        f_curr, f_new , fldt
267          */
268
269         /* fldt */
270         fldt->fce_range.lsr_start = new_end;
271         fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end;
272         fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index;
273
274         /* f_curr */
275         f_curr->fce_range.lsr_end = new_start;
276
277         /* add these two entries to list */
278         fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
279         fld_cache_entry_add(cache, fldt, &f_new->fce_list);
280
281         /* no need to fixup */
282         EXIT;
283 }
284
285 /**
286  * handle range overlap in fld cache.
287  */
288 static void fld_cache_overlap_handle(struct fld_cache *cache,
289                                 struct fld_cache_entry *f_curr,
290                                 struct fld_cache_entry *f_new)
291 {
292         const struct lu_seq_range *range = &f_new->fce_range;
293         const u64 new_start  = range->lsr_start;
294         const u64 new_end  = range->lsr_end;
295         const u32 mdt = range->lsr_index;
296
297         /* this is overlap case, these case are checking overlapping with
298          * prev range only. fixup will handle overlaping with next range.
299          */
300
301         if (f_curr->fce_range.lsr_index == mdt) {
302                 f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start,
303                                                   new_start);
304
305                 f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end,
306                                                 new_end);
307
308                 OBD_FREE_PTR(f_new);
309                 fld_fix_new_list(cache);
310
311         } else if (new_start <= f_curr->fce_range.lsr_start &&
312                         f_curr->fce_range.lsr_end <= new_end) {
313                 /* case 1: new range completely overshadowed existing range.
314                  *         e.g. whole range migrated. update fld cache entry
315                  */
316
317                 f_curr->fce_range = *range;
318                 OBD_FREE_PTR(f_new);
319                 fld_fix_new_list(cache);
320
321         } else if (f_curr->fce_range.lsr_start < new_start &&
322                         new_end < f_curr->fce_range.lsr_end) {
323                 /* case 2: new range fit within existing range. */
324
325                 fld_cache_punch_hole(cache, f_curr, f_new);
326
327         } else  if (new_end <= f_curr->fce_range.lsr_end) {
328                 /* case 3: overlap:
329                  *         [new_start [c_start  new_end)  c_end)
330                  */
331
332                 LASSERT(new_start <= f_curr->fce_range.lsr_start);
333
334                 f_curr->fce_range.lsr_start = new_end;
335                 fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev);
336
337         } else if (f_curr->fce_range.lsr_start <= new_start) {
338                 /* case 4: overlap:
339                  *         [c_start [new_start c_end) new_end)
340                  */
341
342                 LASSERT(f_curr->fce_range.lsr_end <= new_end);
343
344                 f_curr->fce_range.lsr_end = new_start;
345                 fld_cache_entry_add(cache, f_new, &f_curr->fce_list);
346         } else
347                 CERROR("NEW range ="DRANGE" curr = "DRANGE"\n",
348                        PRANGE(range), PRANGE(&f_curr->fce_range));
349 }
350
351 struct fld_cache_entry
352 *fld_cache_entry_create(const struct lu_seq_range *range)
353 {
354         struct fld_cache_entry *f_new;
355
356         LASSERT(lu_seq_range_is_sane(range));
357
358         OBD_ALLOC_PTR(f_new);
359         if (!f_new)
360                 RETURN(ERR_PTR(-ENOMEM));
361
362         f_new->fce_range = *range;
363         RETURN(f_new);
364 }
365
366 /**
367  * Insert FLD entry in FLD cache.
368  *
369  * This function handles all cases of merging and breaking up of
370  * ranges.
371  */
372 int fld_cache_insert_nolock(struct fld_cache *cache,
373                             struct fld_cache_entry *f_new)
374 {
375         struct fld_cache_entry *f_curr;
376         struct fld_cache_entry *n;
377         struct list_head *head;
378         struct list_head *prev = NULL;
379         const u64 new_start  = f_new->fce_range.lsr_start;
380         const u64 new_end  = f_new->fce_range.lsr_end;
381         __u32 new_flags  = f_new->fce_range.lsr_flags;
382
383         ENTRY;
384
385         /*
386          * Duplicate entries are eliminated in insert op.
387          * So we don't need to search new entry before starting
388          * insertion loop.
389          */
390
391         fld_cache_shrink(cache);
392
393         head = &cache->fci_entries_head;
394
395         list_for_each_entry_safe(f_curr, n, head, fce_list) {
396                 /* add list if next is end of list */
397                 if (new_end < f_curr->fce_range.lsr_start ||
398                    (new_end == f_curr->fce_range.lsr_start &&
399                     new_flags != f_curr->fce_range.lsr_flags))
400                         break;
401
402                 prev = &f_curr->fce_list;
403                 /* check if this range is to left of new range. */
404                 if (new_start < f_curr->fce_range.lsr_end &&
405                     new_flags == f_curr->fce_range.lsr_flags) {
406                         fld_cache_overlap_handle(cache, f_curr, f_new);
407                         goto out;
408                 }
409         }
410
411         if (prev == NULL)
412                 prev = head;
413
414         CDEBUG(D_INFO, "insert range "DRANGE"\n", PRANGE(&f_new->fce_range));
415         /* Add new entry to cache and lru list. */
416         fld_cache_entry_add(cache, f_new, prev);
417 out:
418         RETURN(0);
419 }
420
421 int fld_cache_insert(struct fld_cache *cache,
422                      const struct lu_seq_range *range)
423 {
424         struct fld_cache_entry  *flde;
425         int rc;
426
427         flde = fld_cache_entry_create(range);
428         if (IS_ERR(flde))
429                 RETURN(PTR_ERR(flde));
430
431         write_lock(&cache->fci_lock);
432         rc = fld_cache_insert_nolock(cache, flde);
433         write_unlock(&cache->fci_lock);
434         if (rc)
435                 OBD_FREE_PTR(flde);
436
437         RETURN(rc);
438 }
439
440 void fld_cache_delete_nolock(struct fld_cache *cache,
441                       const struct lu_seq_range *range)
442 {
443         struct fld_cache_entry *flde;
444         struct fld_cache_entry *tmp;
445         struct list_head *head;
446
447         head = &cache->fci_entries_head;
448         list_for_each_entry_safe(flde, tmp, head, fce_list) {
449                 /* add list if next is end of list */
450                 if (range->lsr_start == flde->fce_range.lsr_start ||
451                    (range->lsr_end == flde->fce_range.lsr_end &&
452                     range->lsr_flags == flde->fce_range.lsr_flags)) {
453                         fld_cache_entry_delete(cache, flde);
454                         break;
455                 }
456         }
457 }
458
459 /**
460  * lookup \a seq sequence for range in fld cache.
461  */
462 int fld_cache_lookup(struct fld_cache *cache,
463                      const u64 seq, struct lu_seq_range *range)
464 {
465         struct fld_cache_entry *flde;
466         struct fld_cache_entry *prev = NULL;
467         struct list_head *head;
468
469         ENTRY;
470
471         read_lock(&cache->fci_lock);
472         head = &cache->fci_entries_head;
473
474         cache->fci_stat.fst_count++;
475         list_for_each_entry(flde, head, fce_list) {
476                 if (flde->fce_range.lsr_start > seq) {
477                         if (prev != NULL)
478                                 *range = prev->fce_range;
479                         break;
480                 }
481
482                 prev = flde;
483                 if (lu_seq_range_within(&flde->fce_range, seq)) {
484                         *range = flde->fce_range;
485
486                         cache->fci_stat.fst_cache++;
487                         read_unlock(&cache->fci_lock);
488                         RETURN(0);
489                 }
490         }
491         read_unlock(&cache->fci_lock);
492         RETURN(-ENOENT);
493 }