1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Cache of triples - object, lock, extent
42 # define EXPORT_SYMTAB
44 #define DEBUG_SUBSYSTEM S_OSC
47 # include <linux/version.h>
48 # include <linux/module.h>
49 # include <linux/list.h>
50 #else /* __KERNEL__ */
51 # include <liblustre.h>
54 #include <lustre_dlm.h>
55 #include <lustre_cache.h>
57 #include <lustre_debug.h>
59 #include "osc_internal.h"
61 /* Adding @lock to the @cache */
62 int cache_add_lock(struct lustre_cache *cache, struct lustre_handle *lockh)
64 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
66 if (!lock) // Lock disappeared under us.
69 spin_lock(&cache->lc_locks_list_lock);
70 list_add_tail(&lock->l_cache_locks_list, &cache->lc_locks_list);
71 spin_unlock(&cache->lc_locks_list_lock);
78 /* Tries to add @extent to lock represented by @lockh if non-NULL, otherwise
79 just tries to match some suitable lock by resource and data contained in
81 /* Should be called with oap->lock held (except on initial addition, see
82 comment in osc_request.c*/
83 int cache_add_extent(struct lustre_cache *cache, struct ldlm_res_id *res,
84 struct osc_async_page *extent, struct lustre_handle *lockh)
86 struct lustre_handle tmplockh;
87 ldlm_policy_data_t tmpex;
88 struct ldlm_lock *lock = NULL;
91 /* Don't add anything second time */
92 if (!list_empty(&extent->oap_page_list)) {
97 if (lockh && lustre_handle_is_used(lockh)) {
98 lock = ldlm_handle2lock(lockh);
102 LASSERTF(lock->l_policy_data.l_extent.start <=
103 extent->oap_obj_off &&
104 extent->oap_obj_off + CFS_PAGE_SIZE - 1 <=
105 lock->l_policy_data.l_extent.end,
106 "Got wrong lock [" LPU64 "," LPU64 "] for page with "
107 "offset " LPU64 "\n",
108 lock->l_policy_data.l_extent.start,
109 lock->l_policy_data.l_extent.end, extent->oap_obj_off);
112 /* Real extent width calculation here once we have real
115 tmpex.l_extent.start = extent->oap_obj_off;
116 tmpex.l_extent.end = tmpex.l_extent.start + CFS_PAGE_SIZE - 1;
118 /* XXX find lock from extent or something like that */
119 /* The lock mode does not matter. If this is dirty page - then
120 * there could be only one PW lock. If the page is clean,
121 * any PR lock is good
123 mode = ldlm_lock_match(cache->lc_obd->obd_namespace,
124 LDLM_FL_BLOCK_GRANTED |
125 LDLM_FL_CBPENDING, res, LDLM_EXTENT,
126 &tmpex, LCK_PW | LCK_PR, &tmplockh);
129 CDEBUG(D_CACHE, "No lock to attach " LPU64 "->" LPU64
130 " extent to!\n", tmpex.l_extent.start,
132 RETURN((mode < 0) ? mode : -ENOLCK);
135 lock = ldlm_handle2lock(&tmplockh);
136 if (!lock) { // Race - lock disappeared under us (eviction?)
137 CDEBUG(D_CACHE, "Newly matched lock just disappeared "
141 ldlm_lock_decref(&tmplockh, mode);
144 spin_lock(&lock->l_extents_list_lock);
145 list_add_tail(&extent->oap_page_list, &lock->l_extents_list);
146 spin_unlock(&lock->l_extents_list_lock);
147 extent->oap_ldlm_lock = lock;
153 static void cache_extent_removal_get(struct page_removal_cb_element *element)
155 atomic_inc(&element->prce_refcnt);
158 static void cache_extent_removal_put(struct page_removal_cb_element *element)
160 if(atomic_dec_and_test(&element->prce_refcnt))
161 OBD_FREE_PTR(element);
164 static int cache_extent_removal_event(struct lustre_cache *cache,
165 void *data, int discard)
167 struct page *page = data;
168 struct list_head *iter;
169 struct page_removal_cb_element *element;
171 read_lock(&cache->lc_page_removal_cb_lock);
172 iter = cache->lc_page_removal_callback_list.next;
173 while(iter != &cache->lc_page_removal_callback_list) {
174 element = list_entry(iter, struct page_removal_cb_element, prce_list);
175 cache_extent_removal_get(element);
176 read_unlock(&cache->lc_page_removal_cb_lock);
178 element->prce_callback(page, discard);
180 read_lock(&cache->lc_page_removal_cb_lock);
182 cache_extent_removal_put(element);
184 read_unlock(&cache->lc_page_removal_cb_lock);
189 /* Registers set of pin/remove callbacks for extents. Current limitation is
190 there could be only one pin_cb per cache.
191 @pin_cb is called when we have the page locked to pin it in memory so that
192 it does not disappear after we release page lock (which we need to do
194 @func_cb is removal callback that is called after page and all spinlocks are
195 released, and is supposed to clean the page and remove it from all
196 (vfs) caches it might be in */
197 int cache_add_extent_removal_cb(struct lustre_cache *cache,
198 obd_page_removal_cb_t func_cb,
199 obd_pin_extent_cb pin_cb)
201 struct page_removal_cb_element *element;
206 OBD_ALLOC_PTR(element);
209 element->prce_callback = func_cb;
210 atomic_set(&element->prce_refcnt, 1);
212 write_lock(&cache->lc_page_removal_cb_lock);
213 list_add_tail(&element->prce_list,
214 &cache->lc_page_removal_callback_list);
215 write_unlock(&cache->lc_page_removal_cb_lock);
217 cache->lc_pin_extent_cb = pin_cb;
220 EXPORT_SYMBOL(cache_add_extent_removal_cb);
222 /* Unregister exntent removal callback registered earlier. If the list of
223 registered removal callbacks becomes empty, we also clear pin callback
224 since it could only be one */
225 int cache_del_extent_removal_cb(struct lustre_cache *cache,
226 obd_page_removal_cb_t func_cb)
229 struct page_removal_cb_element *element, *t;
231 write_lock(&cache->lc_page_removal_cb_lock);
232 list_for_each_entry_safe(element, t,
233 &cache->lc_page_removal_callback_list,
235 if (element->prce_callback == func_cb) {
236 list_del(&element->prce_list);
237 write_unlock(&cache->lc_page_removal_cb_lock);
239 cache_extent_removal_put(element);
240 write_lock(&cache->lc_page_removal_cb_lock);
241 /* We continue iterating the list in case this function
242 was registered more than once */
245 write_unlock(&cache->lc_page_removal_cb_lock);
247 if (list_empty(&cache->lc_page_removal_callback_list))
248 cache->lc_pin_extent_cb = NULL;
252 EXPORT_SYMBOL(cache_del_extent_removal_cb);
254 static int cache_remove_extent_nolock(struct lustre_cache *cache,
255 struct osc_async_page *extent)
257 int have_lock = !!extent->oap_ldlm_lock;
258 /* We used to check oap_ldlm_lock for non NULL here, but it might be
259 NULL, in fact, due to parallel page eviction clearing it and waiting
260 on a lock's page list lock */
261 extent->oap_ldlm_lock = NULL;
263 if (!list_empty(&extent->oap_page_list))
264 list_del_init(&extent->oap_page_list);
269 /* Request the @extent to be removed from cache and locks it belongs to. */
270 void cache_remove_extent(struct lustre_cache *cache,
271 struct osc_async_page *extent)
273 struct ldlm_lock *lock;
275 spin_lock(&extent->oap_lock);
276 lock = extent->oap_ldlm_lock;
278 extent->oap_ldlm_lock = NULL;
279 spin_unlock(&extent->oap_lock);
281 /* No lock - means this extent is not in any list */
285 spin_lock(&lock->l_extents_list_lock);
286 if (!list_empty(&extent->oap_page_list))
287 list_del_init(&extent->oap_page_list);
288 spin_unlock(&lock->l_extents_list_lock);
291 /* Iterate through list of extents in given lock identified by @lockh,
292 calling @cb_func for every such extent. Also passed @data to every call.
293 Stops iterating prematurely if @cb_func returns nonzero. */
294 int cache_iterate_extents(struct lustre_cache *cache,
295 struct lustre_handle *lockh,
296 cache_iterate_extents_cb_t cb_func, void *data)
298 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
299 struct osc_async_page *extent, *t;
301 if (!lock) // Lock disappeared
303 /* Parallel page removal from mem pressure can race with us */
304 spin_lock(&lock->l_extents_list_lock);
305 list_for_each_entry_safe(extent, t, &lock->l_extents_list,
307 if (cb_func(cache, lockh, extent, data))
310 spin_unlock(&lock->l_extents_list_lock);
316 static int cache_remove_extents_from_lock(struct lustre_cache *cache,
317 struct ldlm_lock *lock, void *data)
319 struct osc_async_page *extent;
324 spin_lock(&lock->l_extents_list_lock);
325 while (!list_empty(&lock->l_extents_list)) {
326 extent = list_entry(lock->l_extents_list.next,
327 struct osc_async_page, oap_page_list);
329 spin_lock(&extent->oap_lock);
330 /* If there is no lock referenced from this oap, it means
331 there is parallel page-removal process waiting to free that
332 page on l_extents_list_lock and it holds page lock.
333 We need this page to completely go away and for that to
334 happen we will just try to truncate it here too.
335 Serialisation on page lock will achieve that goal for us. */
336 /* Try to add extent back to the cache first, but only if we
337 * cancel read lock, write locks cannot have other overlapping
338 * locks. If adding is not possible (or canceling pw lock),
339 * then remove extent from cache */
340 if (!cache_remove_extent_nolock(cache, extent) ||
341 (lock->l_granted_mode == LCK_PW) ||
342 cache_add_extent(cache, &lock->l_resource->lr_name, extent,
344 /* We need to remember this oap_page value now,
345 once we release spinlocks, extent struct
346 might be freed and we endup requesting
347 page with address 0x5a5a5a5a in
348 cache_extent_removal_event */
349 ext_data = extent->oap_page;
350 cache->lc_pin_extent_cb(extent->oap_page);
352 if (lock->l_flags & LDLM_FL_BL_AST)
353 extent->oap_async_flags |= ASYNC_HP;
354 spin_unlock(&extent->oap_lock);
355 spin_unlock(&lock->l_extents_list_lock);
356 cache_extent_removal_event(cache, ext_data,
359 LDLM_FL_DISCARD_DATA);
360 spin_lock(&lock->l_extents_list_lock);
362 spin_unlock(&extent->oap_lock);
365 spin_unlock(&lock->l_extents_list_lock);
370 /* Remoes @lock from cache after necessary checks. */
371 int cache_remove_lock(struct lustre_cache *cache, struct lustre_handle *lockh)
373 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
375 if (!lock) // The lock was removed by somebody just now, nothing to do
378 cache_remove_extents_from_lock(cache, lock, NULL /*data */ );
380 spin_lock(&cache->lc_locks_list_lock);
381 list_del_init(&lock->l_cache_locks_list);
382 spin_unlock(&cache->lc_locks_list_lock);
389 /* Supposed to iterate through all locks in the cache for given resource.
390 Not implemented atthe moment. */
391 int cache_iterate_locks(struct lustre_cache *cache, struct ldlm_res_id *res,
392 cache_iterate_locks_cb_t cb_fun, void *data)
397 /* Create lustre cache and attach it to @obd */
398 struct lustre_cache *cache_create(struct obd_device *obd)
400 struct lustre_cache *cache;
402 OBD_ALLOC(cache, sizeof(*cache));
405 spin_lock_init(&cache->lc_locks_list_lock);
406 CFS_INIT_LIST_HEAD(&cache->lc_locks_list);
407 CFS_INIT_LIST_HEAD(&cache->lc_page_removal_callback_list);
408 rwlock_init(&cache->lc_page_removal_cb_lock);
415 /* Destroy @cache and free its memory */
416 int cache_destroy(struct lustre_cache *cache)
419 spin_lock(&cache->lc_locks_list_lock);
420 if (!list_empty(&cache->lc_locks_list)) {
421 struct ldlm_lock *lock, *tmp;
422 CERROR("still have locks in the list on cleanup:\n");
424 list_for_each_entry_safe(lock, tmp,
425 &cache->lc_locks_list,
426 l_cache_locks_list) {
427 list_del_init(&lock->l_cache_locks_list);
428 /* XXX: Of course natural idea would be to print
429 offending locks here, but if we use
430 e.g. LDLM_ERROR, we will likely crash here,
431 as LDLM error tries to access e.g.
432 nonexisting namespace. Normally this kind of
433 case could only happen when somebody did not
434 release lock reference and we have other ways
436 /* Make sure there are no pages left under the
438 LASSERT(list_empty(&lock->l_extents_list));
441 spin_unlock(&cache->lc_locks_list_lock);
442 LASSERT(list_empty(&cache->lc_page_removal_callback_list));
443 OBD_FREE(cache, sizeof(*cache));