1 /* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/fld/fld_cache.c
5 * FLD (Fids Location Database)
7 * Copyright (C) 2006 Cluster File Systems, Inc.
8 * Author: Yury Umanets <umka@clusterfs.com>
10 * This file is part of the Lustre file system, http://www.lustre.org
11 * Lustre is a trademark of Cluster File Systems, Inc.
13 * You may have signed or agreed to another license before downloading
14 * this software. If so, you are bound by the terms and conditions
15 * of that agreement, and the following does not apply to you. See the
16 * LICENSE file included with this distribution for more information.
18 * If you did not agree to a different license, then this copy of Lustre
19 * is open source software; you can redistribute it and/or modify it
20 * under the terms of version 2 of the GNU General Public License as
21 * published by the Free Software Foundation.
23 * In either case, Lustre is distributed in the hope that it will be
24 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
25 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * license text for more details.
29 # define EXPORT_SYMTAB
31 #define DEBUG_SUBSYSTEM S_FLD
34 # include <libcfs/libcfs.h>
35 # include <linux/module.h>
36 # include <linux/jbd.h>
37 # include <asm/div64.h>
38 #else /* __KERNEL__ */
39 # include <liblustre.h>
40 # include <libcfs/list.h>
44 #include <obd_class.h>
45 #include <lustre_ver.h>
46 #include <obd_support.h>
47 #include <lprocfs_status.h>
49 #include <dt_object.h>
50 #include <md_object.h>
51 #include <lustre_req_layout.h>
52 #include <lustre_fld.h>
53 #include "fld_internal.h"
56 static inline __u32 fld_cache_hash(seqno_t seq)
61 void fld_cache_flush(struct fld_cache *cache)
63 struct fld_cache_entry *flde;
64 struct hlist_head *bucket;
65 struct hlist_node *scan;
66 struct hlist_node *next;
70 /* Free all cache entries. */
71 spin_lock(&cache->fci_lock);
72 for (i = 0; i < cache->fci_hash_size; i++) {
73 bucket = cache->fci_hash_table + i;
74 hlist_for_each_entry_safe(flde, scan, next, bucket, fce_list) {
75 hlist_del_init(&flde->fce_list);
76 list_del_init(&flde->fce_lru);
77 cache->fci_cache_count--;
81 spin_unlock(&cache->fci_lock);
85 struct fld_cache *fld_cache_init(const char *name, int hash_size,
86 int cache_size, int cache_threshold)
88 struct fld_cache *cache;
92 LASSERT(name != NULL);
93 LASSERT(IS_PO2(hash_size));
94 LASSERT(cache_threshold < cache_size);
98 RETURN(ERR_PTR(-ENOMEM));
100 INIT_LIST_HEAD(&cache->fci_lru);
102 cache->fci_cache_count = 0;
103 spin_lock_init(&cache->fci_lock);
105 strncpy(cache->fci_name, name,
106 sizeof(cache->fci_name));
108 cache->fci_hash_size = hash_size;
109 cache->fci_cache_size = cache_size;
110 cache->fci_threshold = cache_threshold;
112 /* Init fld cache info. */
113 cache->fci_hash_mask = hash_size - 1;
114 OBD_ALLOC(cache->fci_hash_table,
115 hash_size * sizeof(*cache->fci_hash_table));
116 if (cache->fci_hash_table == NULL) {
118 RETURN(ERR_PTR(-ENOMEM));
121 for (i = 0; i < hash_size; i++)
122 INIT_HLIST_HEAD(&cache->fci_hash_table[i]);
123 memset(&cache->fci_stat, 0, sizeof(cache->fci_stat));
125 CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n",
126 cache->fci_name, cache_size, cache_threshold);
130 EXPORT_SYMBOL(fld_cache_init);
132 void fld_cache_fini(struct fld_cache *cache)
137 LASSERT(cache != NULL);
138 fld_cache_flush(cache);
140 if (cache->fci_stat.fst_count > 0) {
141 pct = cache->fci_stat.fst_cache * 100;
142 do_div(pct, cache->fci_stat.fst_count);
147 printk("FLD cache statistics (%s):\n", cache->fci_name);
148 printk(" Total reqs: "LPU64"\n", cache->fci_stat.fst_count);
149 printk(" Cache reqs: "LPU64"\n", cache->fci_stat.fst_cache);
150 printk(" Saved RPCs: "LPU64"\n", cache->fci_stat.fst_inflight);
151 printk(" Cache hits: "LPU64"%%\n", pct);
153 OBD_FREE(cache->fci_hash_table, cache->fci_hash_size *
154 sizeof(*cache->fci_hash_table));
159 EXPORT_SYMBOL(fld_cache_fini);
161 static inline struct hlist_head *
162 fld_cache_bucket(struct fld_cache *cache, seqno_t seq)
164 return cache->fci_hash_table + (fld_cache_hash(seq) &
165 cache->fci_hash_mask);
169 * Check if cache needs to be shrinked. If so - do it. Tries to keep all
170 * collision lists well balanced. That is, check all of them and remove one
171 * entry in list and so on until cache is shrinked enough.
173 static int fld_cache_shrink(struct fld_cache *cache)
175 struct fld_cache_entry *flde;
176 struct list_head *curr;
180 LASSERT(cache != NULL);
182 if (cache->fci_cache_count < cache->fci_cache_size)
185 curr = cache->fci_lru.prev;
187 while (cache->fci_cache_count + cache->fci_threshold >
188 cache->fci_cache_size && curr != &cache->fci_lru)
190 flde = list_entry(curr, struct fld_cache_entry, fce_lru);
194 if (flde->fce_inflight)
197 hlist_del_init(&flde->fce_list);
198 list_del_init(&flde->fce_lru);
199 cache->fci_cache_count--;
204 CDEBUG(D_INFO, "%s: FLD cache - Shrinked by "
205 "%d entries\n", cache->fci_name, num);
210 int fld_cache_insert_inflight(struct fld_cache *cache, seqno_t seq)
212 struct fld_cache_entry *flde, *fldt;
213 struct hlist_head *bucket;
214 struct hlist_node *scan;
217 spin_lock(&cache->fci_lock);
219 /* Check if cache already has the entry with such a seq. */
220 bucket = fld_cache_bucket(cache, seq);
221 hlist_for_each_entry(fldt, scan, bucket, fce_list) {
222 if (fldt->fce_seq == seq) {
223 spin_unlock(&cache->fci_lock);
227 spin_unlock(&cache->fci_lock);
229 /* Allocate new entry. */
235 * Check if cache has the entry with such a seq again. It could be added
236 * while we were allocating new entry.
238 spin_lock(&cache->fci_lock);
239 hlist_for_each_entry(fldt, scan, bucket, fce_list) {
240 if (fldt->fce_seq == seq) {
241 spin_unlock(&cache->fci_lock);
247 /* Add new entry to cache and lru list. */
248 INIT_HLIST_NODE(&flde->fce_list);
249 flde->fce_inflight = 1;
250 flde->fce_invalid = 1;
251 cfs_waitq_init(&flde->fce_waitq);
254 hlist_add_head(&flde->fce_list, bucket);
255 list_add(&flde->fce_lru, &cache->fci_lru);
256 cache->fci_cache_count++;
258 spin_unlock(&cache->fci_lock);
262 EXPORT_SYMBOL(fld_cache_insert_inflight);
264 int fld_cache_insert(struct fld_cache *cache,
265 seqno_t seq, mdsno_t mds)
267 struct fld_cache_entry *flde, *fldt;
268 struct hlist_head *bucket;
269 struct hlist_node *scan;
273 spin_lock(&cache->fci_lock);
275 /* Check if need to shrink cache. */
276 rc = fld_cache_shrink(cache);
278 spin_unlock(&cache->fci_lock);
282 /* Check if cache already has the entry with such a seq. */
283 bucket = fld_cache_bucket(cache, seq);
284 hlist_for_each_entry(fldt, scan, bucket, fce_list) {
285 if (fldt->fce_seq == seq) {
286 if (fldt->fce_inflight) {
287 /* set mds for inflight entry */
289 fldt->fce_inflight = 0;
290 fldt->fce_invalid = 0;
291 cfs_waitq_signal(&fldt->fce_waitq);
295 spin_unlock(&cache->fci_lock);
299 spin_unlock(&cache->fci_lock);
301 /* Allocate new entry. */
307 * Check if cache has the entry with such a seq again. It could be added
308 * while we were allocating new entry.
310 spin_lock(&cache->fci_lock);
311 hlist_for_each_entry(fldt, scan, bucket, fce_list) {
312 if (fldt->fce_seq == seq) {
313 spin_unlock(&cache->fci_lock);
319 /* Add new entry to cache and lru list. */
320 INIT_HLIST_NODE(&flde->fce_list);
323 flde->fce_inflight = 0;
324 flde->fce_invalid = 0;
326 hlist_add_head(&flde->fce_list, bucket);
327 list_add(&flde->fce_lru, &cache->fci_lru);
328 cache->fci_cache_count++;
330 spin_unlock(&cache->fci_lock);
334 EXPORT_SYMBOL(fld_cache_insert);
336 void fld_cache_delete(struct fld_cache *cache, seqno_t seq)
338 struct fld_cache_entry *flde;
339 struct hlist_node *scan, *n;
340 struct hlist_head *bucket;
343 bucket = fld_cache_bucket(cache, seq);
345 spin_lock(&cache->fci_lock);
346 hlist_for_each_entry_safe(flde, scan, n, bucket, fce_list) {
347 if (flde->fce_seq == seq) {
348 hlist_del_init(&flde->fce_list);
349 list_del_init(&flde->fce_lru);
350 if (flde->fce_inflight) {
351 flde->fce_inflight = 0;
352 flde->fce_invalid = 1;
353 cfs_waitq_signal(&flde->fce_waitq);
355 cache->fci_cache_count--;
363 spin_unlock(&cache->fci_lock);
365 EXPORT_SYMBOL(fld_cache_delete);
367 static int fld_check_inflight(struct fld_cache_entry *flde)
369 return (flde->fce_inflight);
372 int fld_cache_lookup(struct fld_cache *cache,
373 seqno_t seq, mdsno_t *mds)
375 struct fld_cache_entry *flde;
376 struct hlist_node *scan, *n;
377 struct hlist_head *bucket;
380 bucket = fld_cache_bucket(cache, seq);
382 spin_lock(&cache->fci_lock);
383 cache->fci_stat.fst_count++;
384 hlist_for_each_entry_safe(flde, scan, n, bucket, fce_list) {
385 if (flde->fce_seq == seq) {
386 if (flde->fce_inflight) {
387 /* lookup RPC is inflight need to wait */
388 struct l_wait_info lwi;
389 spin_unlock(&cache->fci_lock);
390 lwi = LWI_TIMEOUT(0, NULL, NULL);
391 l_wait_event(flde->fce_waitq,
392 !fld_check_inflight(flde), &lwi);
393 LASSERT(!flde->fce_inflight);
394 if (flde->fce_invalid)
397 *mds = flde->fce_mds;
398 cache->fci_stat.fst_inflight++;
400 LASSERT(!flde->fce_invalid);
401 *mds = flde->fce_mds;
402 list_del(&flde->fce_lru);
403 list_add(&flde->fce_lru, &cache->fci_lru);
404 cache->fci_stat.fst_cache++;
405 spin_unlock(&cache->fci_lock);
410 spin_unlock(&cache->fci_lock);
413 EXPORT_SYMBOL(fld_cache_lookup);
415 int fld_cache_insert_inflight(struct fld_cache *cache, seqno_t seq)
419 EXPORT_SYMBOL(fld_cache_insert_inflight);
421 int fld_cache_insert(struct fld_cache *cache,
422 seqno_t seq, mdsno_t mds)
426 EXPORT_SYMBOL(fld_cache_insert);
428 void fld_cache_delete(struct fld_cache *cache,
433 EXPORT_SYMBOL(fld_cache_delete);
435 int fld_cache_lookup(struct fld_cache *cache,
436 seqno_t seq, mdsno_t *mds)
440 EXPORT_SYMBOL(fld_cache_lookup);