1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Cache of triples - object, lock, extent
42 # define EXPORT_SYMTAB
44 #define DEBUG_SUBSYSTEM S_OSC
47 # include <linux/version.h>
48 # include <linux/module.h>
49 # include <linux/list.h>
50 #else /* __KERNEL__ */
51 # include <liblustre.h>
54 #include <lustre_dlm.h>
55 #include <lustre_cache.h>
57 #include <lustre_debug.h>
59 #include "osc_internal.h"
61 /* Adding @lock to the @cache */
62 int cache_add_lock(struct lustre_cache *cache, struct lustre_handle *lockh)
64 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
66 if (!lock) // Lock disappeared under us.
69 spin_lock(&cache->lc_locks_list_lock);
70 list_add_tail(&lock->l_cache_locks_list, &cache->lc_locks_list);
71 spin_unlock(&cache->lc_locks_list_lock);
78 /* Tries to add @extent to lock represented by @lockh if non-NULL, otherwise
79 just tries to match some suitable lock by resource and data contained in
81 /* Should be called with oap->lock held (except on initial addition, see
82 comment in osc_request.c*/
83 int cache_add_extent(struct lustre_cache *cache, struct ldlm_res_id *res,
84 struct osc_async_page *extent, struct lustre_handle *lockh)
86 struct lustre_handle tmplockh;
87 ldlm_policy_data_t tmpex;
88 struct ldlm_lock *lock = NULL;
92 /* Don't add anything second time */
93 if (!list_empty(&extent->oap_page_list)) {
98 if (lockh && lustre_handle_is_used(lockh)) {
99 lock = ldlm_handle2lock(lockh);
103 LASSERTF(lock->l_policy_data.l_extent.start <=
104 extent->oap_obj_off &&
105 extent->oap_obj_off + CFS_PAGE_SIZE - 1 <=
106 lock->l_policy_data.l_extent.end,
107 "Got wrong lock [" LPU64 "," LPU64 "] for page with "
108 "offset " LPU64 "\n",
109 lock->l_policy_data.l_extent.start,
110 lock->l_policy_data.l_extent.end, extent->oap_obj_off);
112 /* Real extent width calculation here once we have real
115 tmpex.l_extent.start = extent->oap_obj_off;
116 tmpex.l_extent.end = tmpex.l_extent.start + CFS_PAGE_SIZE - 1;
118 /* XXX find lock from extent or something like that */
119 /* The lock mode does not matter. If this is dirty page - then
120 * there could be only one PW lock. If the page is clean,
121 * any PR lock is good
123 mode = ldlm_lock_match(cache->lc_obd->obd_namespace,
124 LDLM_FL_BLOCK_GRANTED |
125 LDLM_FL_CBPENDING, res, LDLM_EXTENT,
126 &tmpex, LCK_PW | LCK_PR, &tmplockh);
129 CDEBUG(D_CACHE, "No lock to attach " LPU64 "->" LPU64
130 " extent to!\n", tmpex.l_extent.start,
132 RETURN((mode < 0) ? mode : -ENOLCK);
135 lock = ldlm_handle2lock(&tmplockh);
136 if (!lock) { // Race - lock disappeared under us (eviction?)
137 CDEBUG(D_CACHE, "Newly matched lock just disappeared "
143 spin_lock(&lock->l_extents_list_lock);
144 list_add_tail(&extent->oap_page_list, &lock->l_extents_list);
145 spin_unlock(&lock->l_extents_list_lock);
146 extent->oap_ldlm_lock = lock;
147 LASSERTF(!(lock->l_flags & LDLM_FL_CANCEL), "Adding a page to already "
148 "cancelled lock %p", lock);
150 ldlm_lock_decref(&tmplockh, mode);
156 static void cache_extent_removal_get(struct page_removal_cb_element *element)
158 atomic_inc(&element->prce_refcnt);
161 static void cache_extent_removal_put(struct page_removal_cb_element *element)
163 if(atomic_dec_and_test(&element->prce_refcnt))
164 OBD_FREE_PTR(element);
167 static int cache_extent_removal_event(struct lustre_cache *cache,
168 void *data, int discard)
170 struct page *page = data;
171 struct list_head *iter;
172 struct page_removal_cb_element *element;
174 read_lock(&cache->lc_page_removal_cb_lock);
175 iter = cache->lc_page_removal_callback_list.next;
176 while(iter != &cache->lc_page_removal_callback_list) {
177 element = list_entry(iter, struct page_removal_cb_element, prce_list);
178 cache_extent_removal_get(element);
179 read_unlock(&cache->lc_page_removal_cb_lock);
181 element->prce_callback(page, discard);
183 read_lock(&cache->lc_page_removal_cb_lock);
185 cache_extent_removal_put(element);
187 read_unlock(&cache->lc_page_removal_cb_lock);
192 /* Registers set of pin/remove callbacks for extents. Current limitation is
193 there could be only one pin_cb per cache.
194 @pin_cb is called when we have the page locked to pin it in memory so that
195 it does not disappear after we release page lock (which we need to do
197 @func_cb is removal callback that is called after page and all spinlocks are
198 released, and is supposed to clean the page and remove it from all
199 (vfs) caches it might be in */
200 int cache_add_extent_removal_cb(struct lustre_cache *cache,
201 obd_page_removal_cb_t func_cb,
202 obd_pin_extent_cb pin_cb)
204 struct page_removal_cb_element *element;
209 OBD_ALLOC_PTR(element);
212 element->prce_callback = func_cb;
213 atomic_set(&element->prce_refcnt, 1);
215 write_lock(&cache->lc_page_removal_cb_lock);
216 list_add_tail(&element->prce_list,
217 &cache->lc_page_removal_callback_list);
218 write_unlock(&cache->lc_page_removal_cb_lock);
220 cache->lc_pin_extent_cb = pin_cb;
223 EXPORT_SYMBOL(cache_add_extent_removal_cb);
225 /* Unregister exntent removal callback registered earlier. If the list of
226 registered removal callbacks becomes empty, we also clear pin callback
227 since it could only be one */
228 int cache_del_extent_removal_cb(struct lustre_cache *cache,
229 obd_page_removal_cb_t func_cb)
232 struct page_removal_cb_element *element, *t;
235 write_lock(&cache->lc_page_removal_cb_lock);
236 list_for_each_entry_safe(element, t,
237 &cache->lc_page_removal_callback_list,
239 if (element->prce_callback == func_cb) {
240 list_del(&element->prce_list);
241 write_unlock(&cache->lc_page_removal_cb_lock);
243 cache_extent_removal_put(element);
244 write_lock(&cache->lc_page_removal_cb_lock);
245 /* We continue iterating the list in case this function
246 was registered more than once */
249 write_unlock(&cache->lc_page_removal_cb_lock);
251 if (list_empty(&cache->lc_page_removal_callback_list))
252 cache->lc_pin_extent_cb = NULL;
256 EXPORT_SYMBOL(cache_del_extent_removal_cb);
258 static int cache_remove_extent_nolock(struct lustre_cache *cache,
259 struct osc_async_page *extent)
261 int have_lock = !!extent->oap_ldlm_lock;
262 /* We used to check oap_ldlm_lock for non NULL here, but it might be
263 NULL, in fact, due to parallel page eviction clearing it and waiting
264 on a lock's page list lock */
265 extent->oap_ldlm_lock = NULL;
267 if (!list_empty(&extent->oap_page_list))
268 list_del_init(&extent->oap_page_list);
273 /* Request the @extent to be removed from cache and locks it belongs to. */
274 void cache_remove_extent(struct lustre_cache *cache,
275 struct osc_async_page *extent)
277 struct ldlm_lock *lock;
279 spin_lock(&extent->oap_lock);
280 lock = extent->oap_ldlm_lock;
282 extent->oap_ldlm_lock = NULL;
283 spin_unlock(&extent->oap_lock);
285 /* No lock - means this extent is not in any list */
289 spin_lock(&lock->l_extents_list_lock);
290 if (!list_empty(&extent->oap_page_list))
291 list_del_init(&extent->oap_page_list);
292 spin_unlock(&lock->l_extents_list_lock);
295 /* Iterate through list of extents in given lock identified by @lockh,
296 calling @cb_func for every such extent. Also passed @data to every call.
297 Stops iterating prematurely if @cb_func returns nonzero. */
298 int cache_iterate_extents(struct lustre_cache *cache,
299 struct lustre_handle *lockh,
300 cache_iterate_extents_cb_t cb_func, void *data)
302 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
303 struct osc_async_page *extent, *t;
305 if (!lock) // Lock disappeared
307 /* Parallel page removal from mem pressure can race with us */
308 spin_lock(&lock->l_extents_list_lock);
309 list_for_each_entry_safe(extent, t, &lock->l_extents_list,
311 if (cb_func(cache, lockh, extent, data))
314 spin_unlock(&lock->l_extents_list_lock);
320 static int cache_remove_extents_from_lock(struct lustre_cache *cache,
321 struct ldlm_lock *lock, void *data)
323 struct osc_async_page *extent;
328 spin_lock(&lock->l_extents_list_lock);
329 while (!list_empty(&lock->l_extents_list)) {
330 extent = list_entry(lock->l_extents_list.next,
331 struct osc_async_page, oap_page_list);
333 spin_lock(&extent->oap_lock);
334 /* If there is no lock referenced from this oap, it means
335 there is parallel page-removal process waiting to free that
336 page on l_extents_list_lock and it holds page lock.
337 We need this page to completely go away and for that to
338 happen we will just try to truncate it here too.
339 Serialisation on page lock will achieve that goal for us. */
340 /* Try to add extent back to the cache first, but only if we
341 * cancel read lock, write locks cannot have other overlapping
342 * locks. If adding is not possible (or canceling pw lock),
343 * then remove extent from cache */
344 if (!cache_remove_extent_nolock(cache, extent) ||
345 (lock->l_granted_mode == LCK_PW) ||
346 cache_add_extent(cache, &lock->l_resource->lr_name, extent,
348 /* We need to remember this oap_page value now,
349 once we release spinlocks, extent struct
350 might be freed and we endup requesting
351 page with address 0x5a5a5a5a in
352 cache_extent_removal_event */
353 ext_data = extent->oap_page;
354 LASSERT(cache->lc_pin_extent_cb != NULL);
355 cache->lc_pin_extent_cb(extent->oap_page);
357 if (lock->l_flags & LDLM_FL_BL_AST)
358 extent->oap_async_flags |= ASYNC_HP;
359 spin_unlock(&extent->oap_lock);
360 spin_unlock(&lock->l_extents_list_lock);
361 cache_extent_removal_event(cache, ext_data,
364 LDLM_FL_DISCARD_DATA);
365 spin_lock(&lock->l_extents_list_lock);
367 spin_unlock(&extent->oap_lock);
370 spin_unlock(&lock->l_extents_list_lock);
375 /* Remoes @lock from cache after necessary checks. */
376 int cache_remove_lock(struct lustre_cache *cache, struct lustre_handle *lockh)
378 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
380 if (!lock) // The lock was removed by somebody just now, nothing to do
383 cache_remove_extents_from_lock(cache, lock, NULL /*data */ );
385 spin_lock(&cache->lc_locks_list_lock);
386 list_del_init(&lock->l_cache_locks_list);
387 spin_unlock(&cache->lc_locks_list_lock);
394 /* Supposed to iterate through all locks in the cache for given resource.
395 Not implemented atthe moment. */
396 int cache_iterate_locks(struct lustre_cache *cache, struct ldlm_res_id *res,
397 cache_iterate_locks_cb_t cb_fun, void *data)
402 /* Create lustre cache and attach it to @obd */
403 struct lustre_cache *cache_create(struct obd_device *obd)
405 struct lustre_cache *cache;
407 OBD_ALLOC(cache, sizeof(*cache));
411 spin_lock_init(&cache->lc_locks_list_lock);
412 CFS_INIT_LIST_HEAD(&cache->lc_locks_list);
413 CFS_INIT_LIST_HEAD(&cache->lc_page_removal_callback_list);
414 rwlock_init(&cache->lc_page_removal_cb_lock);
421 /* Destroy @cache and free its memory */
422 int cache_destroy(struct lustre_cache *cache)
427 spin_lock(&cache->lc_locks_list_lock);
428 if (!list_empty(&cache->lc_locks_list)) {
429 struct ldlm_lock *lock, *tmp;
430 CERROR("still have locks in the list on cleanup:\n");
432 list_for_each_entry_safe(lock, tmp,
433 &cache->lc_locks_list,
434 l_cache_locks_list) {
435 list_del_init(&lock->l_cache_locks_list);
436 /* XXX: Of course natural idea would be to print
437 * offending locks here, but if we use
438 * e.g. LDLM_ERROR, we will likely crash here,
439 * as LDLM error tries to access e.g.
440 * nonexisting namespace. Normally this kind of
441 * case could only happen when somebody did not
442 * release lock reference and we have other ways
444 /* Make sure there are no pages left under the
446 LASSERT(list_empty(&lock->l_extents_list));
449 spin_unlock(&cache->lc_locks_list_lock);
450 LASSERT(list_empty(&cache->lc_page_removal_callback_list));
452 OBD_FREE(cache, sizeof(*cache));