Whamcloud - gitweb
land b_gns onto HEAD. If you are working on CMD, you MUST UPDATE YOUR
[fs/lustre-release.git] / lustre / ldlm / ldlm_lock.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
5  *   Author: Peter Braam <braam@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_LDLM
25
26 #ifdef __KERNEL__
27 # include <linux/slab.h>
28 # include <linux/module.h>
29 # include <linux/lustre_dlm.h>
30 #else
31 # include <liblustre.h>
32 # include <linux/kp30.h>
33 #endif
34
35 #include <linux/obd_class.h>
36 #include "ldlm_internal.h"
37
38 //struct lustre_lock ldlm_everything_lock;
39
40 /* lock types */
41 char *ldlm_lockname[] = {
42         [0] "--",
43         [LCK_EX] "EX",
44         [LCK_PW] "PW",
45         [LCK_PR] "PR",
46         [LCK_CW] "CW",
47         [LCK_CR] "CR",
48         [LCK_NL] "NL",
49         [LCK_GROUP] "GROUP"
50 };
51 char *ldlm_typename[] = {
52         [LDLM_PLAIN] "PLN",
53         [LDLM_EXTENT] "EXT",
54         [LDLM_FLOCK] "FLK",
55         [LDLM_IBITS] "IBT",
56 };
57
58 char *ldlm_it2str(int it)
59 {
60         switch (it) {
61         case IT_OPEN:
62                 return "open";
63         case IT_CREAT:
64                 return "creat";
65         case (IT_OPEN | IT_CREAT):
66                 return "open|creat";
67         case IT_READDIR:
68                 return "readdir";
69         case IT_GETATTR:
70                 return "getattr";
71         case IT_LOOKUP:
72                 return "lookup";
73         case IT_UNLINK:
74                 return "unlink";
75         case IT_GETXATTR:
76                 return "getxattr";
77         case IT_CHDIR:
78                 return "chdir";
79         default:
80                 CERROR("Unknown intent %d\n", it);
81                 return "UNKNOWN";
82         }
83 }
84
85 extern kmem_cache_t *ldlm_lock_slab;
86 struct lustre_lock ldlm_handle_lock;
87
88 static ldlm_processing_policy ldlm_processing_policy_table[] = {
89         [LDLM_PLAIN] ldlm_process_plain_lock,
90         [LDLM_EXTENT] ldlm_process_extent_lock,
91 #ifdef __KERNEL__
92         [LDLM_FLOCK] ldlm_process_flock_lock,
93 #endif
94         [LDLM_IBITS] ldlm_process_inodebits_lock,
95 };
96
97 ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res)
98 {
99         return ldlm_processing_policy_table[res->lr_type];
100 }
101
102 void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg)
103 {
104         ns->ns_policy = arg;
105 }
106
107 /*
108  * REFCOUNTED LOCK OBJECTS
109  */
110
111
112 /*
113  * Lock refcounts, during creation:
114  *   - one special one for allocation, dec'd only once in destroy
115  *   - one for being a lock that's in-use
116  *   - one for the addref associated with a new lock
117  */
118 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
119 {
120         atomic_inc(&lock->l_refc);
121         return lock;
122 }
123
124 void ldlm_lock_put(struct ldlm_lock *lock)
125 {
126         ENTRY;
127
128         if (atomic_dec_and_test(&lock->l_refc)) {
129                 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
130
131                 l_lock(&ns->ns_lock);
132                 LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing");
133                 LASSERT(lock->l_destroyed);
134                 LASSERT(list_empty(&lock->l_res_link));
135
136                 spin_lock(&ns->ns_counter_lock);
137                 ns->ns_locks--;
138                 spin_unlock(&ns->ns_counter_lock);
139
140                 ldlm_resource_putref(lock->l_resource);
141                 lock->l_resource = NULL;
142                 if (lock->l_export)
143                         class_export_put(lock->l_export);
144
145                 if (lock->l_parent)
146                         LDLM_LOCK_PUT(lock->l_parent);
147
148                 if (lock->l_lvb_data != NULL)
149                         OBD_FREE(lock->l_lvb_data, lock->l_lvb_len);
150
151                 OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
152                 l_unlock(&ns->ns_lock);
153         }
154
155         EXIT;
156 }
157
158 void ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
159 {
160         ENTRY;
161         l_lock(&lock->l_resource->lr_namespace->ns_lock);
162         if (!list_empty(&lock->l_lru)) {
163                 list_del_init(&lock->l_lru);
164                 lock->l_resource->lr_namespace->ns_nr_unused--;
165                 LASSERT(lock->l_resource->lr_namespace->ns_nr_unused >= 0);
166         }
167         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
168         EXIT;
169 }
170
171 /* This used to have a 'strict' flact, which recovery would use to mark an
172  * in-use lock as needing-to-die.  Lest I am ever tempted to put it back, I
173  * shall explain why it's gone: with the new hash table scheme, once you call
174  * ldlm_lock_destroy, you can never drop your final references on this lock.
175  * Because it's not in the hash table anymore.  -phil */
176 void ldlm_lock_destroy(struct ldlm_lock *lock)
177 {
178         ENTRY;
179         l_lock(&lock->l_resource->lr_namespace->ns_lock);
180
181         if (!list_empty(&lock->l_children)) {
182                 LDLM_ERROR(lock, "still has children (%p)!",
183                            lock->l_children.next);
184                 ldlm_lock_dump(D_ERROR, lock, 0);
185                 LBUG();
186         }
187         if (lock->l_readers || lock->l_writers) {
188                 LDLM_ERROR(lock, "lock still has references");
189                 ldlm_lock_dump(D_ERROR, lock, 0);
190                 LBUG();
191         }
192
193         if (!list_empty(&lock->l_res_link)) {
194                 LDLM_ERROR(lock, "lock still on resource");
195                 ldlm_lock_dump(D_ERROR, lock, 0);
196                 LBUG();
197         }
198
199         if (lock->l_destroyed) {
200                 LASSERT(list_empty(&lock->l_lru));
201                 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
202                 EXIT;
203                 return;
204         }
205         lock->l_destroyed = 1;
206
207         list_del_init(&lock->l_export_chain);
208         ldlm_lock_remove_from_lru(lock);
209         class_handle_unhash(&lock->l_handle);
210
211 #if 0
212         /* Wake anyone waiting for this lock */
213         /* FIXME: I should probably add yet another flag, instead of using
214          * l_export to only call this on clients */
215         if (lock->l_export)
216                 class_export_put(lock->l_export);
217         lock->l_export = NULL;
218         if (lock->l_export && lock->l_completion_ast)
219                 lock->l_completion_ast(lock, 0);
220 #endif
221
222         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
223         LDLM_LOCK_PUT(lock);
224         EXIT;
225 }
226
227 /* this is called by portals_handle2object with the handle lock taken */
228 static void lock_handle_addref(void *lock)
229 {
230         LDLM_LOCK_GET((struct ldlm_lock *)lock);
231 }
232
233 /*
234  * usage: pass in a resource on which you have done ldlm_resource_get
235  *        pass in a parent lock on which you have done a ldlm_lock_get
236  *        after return, ldlm_*_put the resource and parent
237  * returns: lock with refcount 1
238  */
239 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
240                                        struct ldlm_resource *resource)
241 {
242         struct ldlm_lock *lock;
243         ENTRY;
244
245         if (resource == NULL)
246                 LBUG();
247
248         OBD_SLAB_ALLOC(lock, ldlm_lock_slab, SLAB_NOFS, sizeof(*lock));
249         if (lock == NULL)
250                 RETURN(NULL);
251
252         lock->l_resource = ldlm_resource_getref(resource);
253
254         atomic_set(&lock->l_refc, 2);
255         INIT_LIST_HEAD(&lock->l_children);
256         INIT_LIST_HEAD(&lock->l_res_link);
257         INIT_LIST_HEAD(&lock->l_lru);
258         INIT_LIST_HEAD(&lock->l_export_chain);
259         INIT_LIST_HEAD(&lock->l_pending_chain);
260         init_waitqueue_head(&lock->l_waitq);
261
262         spin_lock(&resource->lr_namespace->ns_counter_lock);
263         resource->lr_namespace->ns_locks++;
264         spin_unlock(&resource->lr_namespace->ns_counter_lock);
265
266         if (parent != NULL) {
267                 l_lock(&parent->l_resource->lr_namespace->ns_lock);
268                 lock->l_parent = LDLM_LOCK_GET(parent);
269                 list_add(&lock->l_childof, &parent->l_children);
270                 l_unlock(&parent->l_resource->lr_namespace->ns_lock);
271         }
272
273         INIT_LIST_HEAD(&lock->l_handle.h_link);
274         class_handle_hash(&lock->l_handle, lock_handle_addref);
275
276         RETURN(lock);
277 }
278
279 int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
280                               struct ldlm_res_id new_resid)
281 {
282         struct ldlm_resource *oldres = lock->l_resource;
283         ENTRY;
284
285         l_lock(&ns->ns_lock);
286         if (memcmp(&new_resid, &lock->l_resource->lr_name,
287                    sizeof(lock->l_resource->lr_name)) == 0) {
288                 /* Nothing to do */
289                 l_unlock(&ns->ns_lock);
290                 RETURN(0);
291         }
292
293         LASSERT(new_resid.name[0] != 0);
294
295         /* This function assumes that the lock isn't on any lists */
296         LASSERT(list_empty(&lock->l_res_link));
297
298         lock->l_resource = ldlm_resource_get(ns, NULL, new_resid,
299                                              lock->l_resource->lr_type, 1);
300         if (lock->l_resource == NULL) {
301                 LBUG();
302                 RETURN(-ENOMEM);
303         }
304
305         /* ...and the flowers are still standing! */
306         ldlm_resource_putref(oldres);
307
308         l_unlock(&ns->ns_lock);
309         RETURN(0);
310 }
311
312 /*
313  *  HANDLES
314  */
315
316 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
317 {
318         lockh->cookie = lock->l_handle.h_cookie;
319 }
320
321 /* if flags: atomically get the lock and set the flags.
322  *           Return NULL if flag already set
323  */
324
325 struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int flags)
326 {
327         struct ldlm_namespace *ns;
328         struct ldlm_lock *lock = NULL, *retval = NULL;
329         ENTRY;
330
331         LASSERT(handle);
332
333         lock = class_handle2object(handle->cookie);
334         if (lock == NULL)
335                 RETURN(NULL);
336
337         LASSERT(lock->l_resource != NULL);
338         ns = lock->l_resource->lr_namespace;
339         LASSERT(ns != NULL);
340
341         l_lock(&ns->ns_lock);
342
343         /* It's unlikely but possible that someone marked the lock as
344          * destroyed after we did handle2object on it */
345         if (lock->l_destroyed) {
346                 CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
347                 LDLM_LOCK_PUT(lock);
348                 GOTO(out, retval);
349         }
350
351         if (flags && (lock->l_flags & flags)) {
352                 LDLM_LOCK_PUT(lock);
353                 GOTO(out, retval);
354         }
355
356         if (flags)
357                 lock->l_flags |= flags;
358
359         retval = lock;
360         EXIT;
361  out:
362         l_unlock(&ns->ns_lock);
363         return retval;
364 }
365
366 struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *ns,
367                                       struct lustre_handle *handle)
368 {
369         struct ldlm_lock *retval = NULL;
370
371         l_lock(&ns->ns_lock);
372         retval = __ldlm_handle2lock(handle, 0);
373         l_unlock(&ns->ns_lock);
374
375         return retval;
376 }
377
378 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
379 {
380         ldlm_res2desc(lock->l_resource, &desc->l_resource);
381         desc->l_req_mode = lock->l_req_mode;
382         desc->l_granted_mode = lock->l_granted_mode;
383         memcpy(&desc->l_policy_data, &lock->l_policy_data,
384                sizeof(desc->l_policy_data));
385 }
386
387 void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
388                             void *data, int datalen)
389 {
390         struct ldlm_ast_work *w;
391         ENTRY;
392
393         l_lock(&lock->l_resource->lr_namespace->ns_lock);
394         if (new && (lock->l_flags & LDLM_FL_AST_SENT))
395                 GOTO(out, 0);
396
397         CDEBUG(D_OTHER, "lock %p incompatible; sending blocking AST.\n", lock);
398
399         OBD_ALLOC(w, sizeof(*w));
400         if (!w) {
401                 LBUG();
402                 GOTO(out, 0);
403         }
404
405         w->w_data = data;
406         w->w_datalen = datalen;
407         if (new) {
408                 LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
409                 lock->l_flags |= LDLM_FL_AST_SENT;
410                 /* If the enqueuing client said so, tell the AST recipient to
411                  * discard dirty data, rather than writing back. */
412                 if (new->l_flags & LDLM_AST_DISCARD_DATA)
413                         lock->l_flags |= LDLM_FL_DISCARD_DATA;
414                 w->w_blocking = 1;
415                 ldlm_lock2desc(new, &w->w_desc);
416         }
417
418         w->w_lock = LDLM_LOCK_GET(lock);
419         list_add(&w->w_list, lock->l_resource->lr_tmp);
420         EXIT;
421  out:
422         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
423 }
424
425 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
426 {
427         struct ldlm_lock *lock;
428
429         lock = ldlm_handle2lock(lockh);
430         ldlm_lock_addref_internal(lock, mode);
431         LDLM_LOCK_PUT(lock);
432 }
433
434 /* only called for local locks */
435 void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
436 {
437         l_lock(&lock->l_resource->lr_namespace->ns_lock);
438         ldlm_lock_remove_from_lru(lock);
439         if (mode & (LCK_NL | LCK_CR | LCK_PR))
440                 lock->l_readers++;
441         if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP))
442                 lock->l_writers++;
443         lock->l_last_used = jiffies;
444         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
445         LDLM_LOCK_GET(lock);
446         LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
447 }
448
449 void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
450 {
451         struct ldlm_namespace *ns;
452         ENTRY;
453
454         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
455         ns = lock->l_resource->lr_namespace;
456         l_lock(&ns->ns_lock);
457         if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
458                 LASSERT(lock->l_readers > 0);
459                 lock->l_readers--;
460         }
461         if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP)) {
462                 LASSERT(lock->l_writers > 0);
463                 lock->l_writers--;
464         }
465
466         if (lock->l_flags & LDLM_FL_LOCAL &&
467             !lock->l_readers && !lock->l_writers) {
468                 /* If this is a local lock on a server namespace and this was
469                  * the last reference, cancel the lock. */
470                 CDEBUG(D_INFO, "forcing cancel of local lock\n");
471                 lock->l_flags |= LDLM_FL_CBPENDING;
472         }
473
474         if (!lock->l_readers && !lock->l_writers &&
475             (lock->l_flags & LDLM_FL_CBPENDING)) {
476                 /* If we received a blocked AST and this was the last reference,
477                  * run the callback. */
478                 if (ns->ns_client == LDLM_NAMESPACE_SERVER && lock->l_export)
479                         CERROR("FL_CBPENDING set on non-local lock--just a "
480                                "warning\n");
481
482                 LDLM_DEBUG(lock, "final decref done on cbpending lock");
483
484                 LDLM_LOCK_GET(lock); /* dropped by bl thread */
485                 ldlm_lock_remove_from_lru(lock);
486 #ifdef __KERNEL__
487                 ldlm_bl_to_thread(ns, NULL, lock);
488                 l_unlock(&ns->ns_lock);
489 #else
490                 l_unlock(&ns->ns_lock);
491                 ldlm_handle_bl_callback(ns, NULL, lock);
492 #endif
493         } else if (ns->ns_client == LDLM_NAMESPACE_CLIENT &&
494                    !lock->l_readers && !lock->l_writers) {
495                 /* If this is a client-side namespace and this was the last
496                  * reference, put it on the LRU. */
497                 LASSERT(list_empty(&lock->l_lru));
498                 LASSERT(ns->ns_nr_unused >= 0);
499                 list_add_tail(&lock->l_lru, &ns->ns_unused_list);
500                 ns->ns_nr_unused++;
501                 l_unlock(&ns->ns_lock);
502                 ldlm_cancel_lru(ns, LDLM_ASYNC);
503         } else {
504                 l_unlock(&ns->ns_lock);
505         }
506
507         LDLM_LOCK_PUT(lock);    /* matches the ldlm_lock_get in addref */
508
509         EXIT;
510 }
511
512 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
513 {
514         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
515         LASSERT(lock != NULL);
516         ldlm_lock_decref_internal(lock, mode);
517         LDLM_LOCK_PUT(lock);
518 }
519
520 /* This will drop a lock reference and mark it for destruction, but will not
521  * necessarily cancel the lock before returning. */
522 void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
523 {
524         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
525         ENTRY;
526
527         LASSERT(lock != NULL);
528
529         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
530         l_lock(&lock->l_resource->lr_namespace->ns_lock);
531         lock->l_flags |= LDLM_FL_CBPENDING;
532         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
533         ldlm_lock_decref_internal(lock, mode);
534         LDLM_LOCK_PUT(lock);
535 }
536
537 /* NOTE: called by
538  *  - ldlm_lock_enqueue
539  *  - ldlm_reprocess_queue
540  *  - ldlm_lock_convert
541  */
542 void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen,
543                      int run_ast)
544 {
545         struct ldlm_resource *res = lock->l_resource;
546         ENTRY;
547
548         l_lock(&lock->l_resource->lr_namespace->ns_lock);
549         lock->l_granted_mode = lock->l_req_mode;
550         ldlm_resource_add_lock(res, &res->lr_granted, lock);
551
552         if (lock->l_granted_mode < res->lr_most_restr)
553                 res->lr_most_restr = lock->l_granted_mode;
554
555         if (run_ast && lock->l_completion_ast != NULL)
556                 ldlm_add_ast_work_item(lock, NULL, data, datalen);
557
558         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
559         EXIT;
560 }
561
562 /* returns a referenced lock or NULL.  See the flag descriptions below, in the
563  * comment above ldlm_lock_match */
564 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
565                                       ldlm_policy_data_t *policy,
566                                       struct ldlm_lock *old_lock, int flags)
567 {
568         struct ldlm_lock *lock;
569         struct list_head *tmp;
570
571         list_for_each(tmp, queue) {
572                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
573
574                 if (lock == old_lock)
575                         break;
576
577                 /* llite sometimes wants to match locks that will be
578                  * canceled when their users drop, but we allow it to match
579                  * if it passes in CBPENDING and the lock still has users.
580                  * this is generally only going to be used by children 
581                  * whose parents already hold a lock so forward progress
582                  * can still happen. */
583                 if (lock->l_flags & LDLM_FL_CBPENDING &&
584                     !(flags & LDLM_FL_CBPENDING))
585                         continue;
586                 if (lock->l_flags & LDLM_FL_CBPENDING &&
587                     lock->l_readers == 0 && lock->l_writers == 0)
588                         continue;
589
590                 if (!(lock->l_req_mode & mode))
591                         continue;
592
593                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
594                     (lock->l_policy_data.l_extent.start >
595                      policy->l_extent.start ||
596                      lock->l_policy_data.l_extent.end < policy->l_extent.end))
597                         continue;
598
599                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
600                     mode == LCK_GROUP &&
601                     lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
602                         continue;
603
604                 /* We match if we have existing lock with same or wider set
605                    of bits. */
606                 if (lock->l_resource->lr_type == LDLM_IBITS &&
607                      ((lock->l_policy_data.l_inodebits.bits &
608                       policy->l_inodebits.bits) !=
609                       policy->l_inodebits.bits))
610                         continue;
611
612                 if (lock->l_destroyed)
613                         continue;
614
615                 if ((flags & LDLM_FL_LOCAL_ONLY) &&
616                     !(lock->l_flags & LDLM_FL_LOCAL))
617                         continue;
618
619                 if (flags & LDLM_FL_TEST_LOCK)
620                         LDLM_LOCK_GET(lock);
621                 else
622                         ldlm_lock_addref_internal(lock, mode);
623                 return lock;
624         }
625
626         return NULL;
627 }
628
629 void ldlm_lock_allow_match(struct ldlm_lock *lock)
630 {
631         l_lock(&lock->l_resource->lr_namespace->ns_lock);
632         lock->l_flags |= LDLM_FL_CAN_MATCH;
633         wake_up(&lock->l_waitq);
634         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
635 }
636
637 /* Can be called in two ways:
638  *
639  * If 'ns' is NULL, then lockh describes an existing lock that we want to look
640  * for a duplicate of.
641  *
642  * Otherwise, all of the fields must be filled in, to match against.
643  *
644  * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the
645  *     server (ie, connh is NULL)
646  * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted
647  *     list will be considered
648  * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked
649  *     to be canceled can still be matched as long as they still have reader
650  *     or writer refernces
651  * If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock,
652  *     just tell us if we would have matched.
653  *
654  * Returns 1 if it finds an already-existing lock that is compatible; in this
655  * case, lockh is filled in with a addref()ed lock
656  */
657 int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
658                     struct ldlm_res_id *res_id, __u32 type,
659                     ldlm_policy_data_t *policy, ldlm_mode_t mode,
660                     struct lustre_handle *lockh)
661 {
662         struct ldlm_resource *res;
663         struct ldlm_lock *lock, *old_lock = NULL;
664         int rc = 0;
665         ENTRY;
666
667         if (ns == NULL) {
668                 old_lock = ldlm_handle2lock(lockh);
669                 LASSERT(old_lock);
670
671                 ns = old_lock->l_resource->lr_namespace;
672                 res_id = &old_lock->l_resource->lr_name;
673                 type = old_lock->l_resource->lr_type;
674                 mode = old_lock->l_req_mode;
675         }
676
677         res = ldlm_resource_get(ns, NULL, *res_id, type, 0);
678         if (res == NULL) {
679                 LASSERT(old_lock == NULL);
680                 RETURN(0);
681         }
682
683         l_lock(&ns->ns_lock);
684
685         lock = search_queue(&res->lr_granted, mode, policy, old_lock, flags);
686         if (lock != NULL)
687                 GOTO(out, rc = 1);
688         if (flags & LDLM_FL_BLOCK_GRANTED)
689                 GOTO(out, rc = 0);
690         lock = search_queue(&res->lr_converting, mode, policy, old_lock, flags);
691         if (lock != NULL)
692                 GOTO(out, rc = 1);
693         lock = search_queue(&res->lr_waiting, mode, policy, old_lock, flags);
694         if (lock != NULL)
695                 GOTO(out, rc = 1);
696
697         EXIT;
698  out:
699         ldlm_resource_putref(res);
700         l_unlock(&ns->ns_lock);
701
702         if (lock) {
703                 ldlm_lock2handle(lock, lockh);
704                 if (!(lock->l_flags & LDLM_FL_CAN_MATCH)) {
705                         struct l_wait_info lwi;
706                         if (lock->l_completion_ast)
707                                 lock->l_completion_ast(lock,
708                                                        LDLM_FL_WAIT_NOREPROC,
709                                                        NULL);
710
711                         lwi = LWI_TIMEOUT_INTR(obd_timeout*HZ, NULL,NULL,NULL);
712
713                         /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */
714                         l_wait_event(lock->l_waitq,
715                                      (lock->l_flags & LDLM_FL_CAN_MATCH), &lwi);
716                 }
717         }
718         if (rc)
719                 LDLM_DEBUG(lock, "matched ("LPU64" "LPU64")",
720                            type == LDLM_PLAIN ? res_id->name[2] :
721                                 policy->l_extent.start,
722                            type == LDLM_PLAIN ? res_id->name[3] :
723                                 policy->l_extent.end);
724         else if (!(flags & LDLM_FL_TEST_LOCK)) /* less verbose for test-only */
725                 LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
726                                   LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
727                                   type, mode, res_id->name[0], res_id->name[1],
728                                   type == LDLM_PLAIN ? res_id->name[2] :
729                                         policy->l_extent.start,
730                                   type == LDLM_PLAIN ? res_id->name[3] :
731                                         policy->l_extent.end);
732
733         if (old_lock)
734                 LDLM_LOCK_PUT(old_lock);
735         if (flags & LDLM_FL_TEST_LOCK && rc)
736                 LDLM_LOCK_PUT(lock);
737
738         return rc;
739 }
740
741 /* Returns a referenced lock */
742 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
743                                    struct lustre_handle *parent_lock_handle,
744                                    struct ldlm_res_id res_id, __u32 type,
745                                    ldlm_mode_t mode,
746                                    ldlm_blocking_callback blocking,
747                                    ldlm_completion_callback completion,
748                                    ldlm_glimpse_callback glimpse,
749                                    void *data, __u32 lvb_len)
750 {
751         struct ldlm_resource *res, *parent_res = NULL;
752         struct ldlm_lock *lock, *parent_lock = NULL;
753         ENTRY;
754
755         if (parent_lock_handle) {
756                 parent_lock = ldlm_handle2lock(parent_lock_handle);
757                 if (parent_lock)
758                         parent_res = parent_lock->l_resource;
759         }
760
761         res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
762         if (res == NULL)
763                 RETURN(NULL);
764
765         lock = ldlm_lock_new(parent_lock, res);
766         ldlm_resource_putref(res);
767         if (parent_lock != NULL)
768                 LDLM_LOCK_PUT(parent_lock);
769
770         if (lock == NULL)
771                 RETURN(NULL);
772
773         lock->l_req_mode = mode;
774         lock->l_ast_data = data;
775         lock->l_blocking_ast = blocking;
776         lock->l_completion_ast = completion;
777         lock->l_glimpse_ast = glimpse;
778
779         if (lvb_len) {
780                 lock->l_lvb_len = lvb_len;
781                 OBD_ALLOC(lock->l_lvb_data, lvb_len);
782                 if (lock->l_lvb_data == NULL) {
783                         OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
784                         RETURN(NULL);
785                 }
786         }
787
788         RETURN(lock);
789 }
790
791 ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
792                                struct ldlm_lock **lockp,
793                                void *cookie, int *flags)
794 {
795         struct ldlm_lock *lock = *lockp;
796         struct ldlm_resource *res = lock->l_resource;
797         int local = res->lr_namespace->ns_client;
798         ldlm_processing_policy policy;
799         ldlm_error_t rc = ELDLM_OK;
800         ENTRY;
801
802         /* policies are not executed on the client or during replay */
803         if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
804             && !local && ns->ns_policy) {
805                 rc = ns->ns_policy(ns, lockp, cookie, lock->l_req_mode, *flags,
806                                    NULL);
807                 if (rc == ELDLM_LOCK_REPLACED) {
808                         /* The lock that was returned has already been granted,
809                          * and placed into lockp.  If it's not the same as the
810                          * one we passed in, then destroy the old one and our
811                          * work here is done. */
812                         if (lock != *lockp) {
813                                 ldlm_lock_destroy(lock);
814                                 LDLM_LOCK_PUT(lock);
815                         }
816                         *flags |= LDLM_FL_LOCK_CHANGED;
817                         RETURN(0);
818                 } else if (rc == ELDLM_LOCK_ABORTED ||
819                            (rc == 0 && (*flags & LDLM_FL_INTENT_ONLY))) {
820                         ldlm_lock_destroy(lock);
821                         RETURN(rc);
822                 }
823                 LASSERT(rc == ELDLM_OK);
824         }
825
826         l_lock(&ns->ns_lock);
827         if (local && lock->l_req_mode == lock->l_granted_mode) {
828                 /* The server returned a blocked lock, but it was granted before
829                  * we got a chance to actually enqueue it.  We don't need to do
830                  * anything else. */
831                 *flags &= ~(LDLM_FL_BLOCK_GRANTED |
832                             LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
833                 GOTO(out, ELDLM_OK);
834         }
835
836         /* Some flags from the enqueue want to make it into the AST, via the
837          * lock's l_flags. */
838         lock->l_flags |= (*flags & LDLM_AST_DISCARD_DATA);
839
840         /* This distinction between local lock trees is very important; a client
841          * namespace only has information about locks taken by that client, and
842          * thus doesn't have enough information to decide for itself if it can
843          * be granted (below).  In this case, we do exactly what the server
844          * tells us to do, as dictated by the 'flags'.
845          *
846          * We do exactly the same thing during recovery, when the server is
847          * more or less trusting the clients not to lie.
848          *
849          * FIXME (bug 268): Detect obvious lies by checking compatibility in
850          * granted/converting queues. */
851         ldlm_resource_unlink_lock(lock);
852         if (local) {
853                 if (*flags & LDLM_FL_BLOCK_CONV)
854                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
855                 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
856                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
857                 else
858                         ldlm_grant_lock(lock, NULL, 0, 0);
859                 GOTO(out, ELDLM_OK);
860         } else if (*flags & LDLM_FL_REPLAY) {
861                 if (*flags & LDLM_FL_BLOCK_CONV) {
862                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
863                         GOTO(out, ELDLM_OK);
864                 } else if (*flags & LDLM_FL_BLOCK_WAIT) {
865                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
866                         GOTO(out, ELDLM_OK);
867                 } else if (*flags & LDLM_FL_BLOCK_GRANTED) {
868                         ldlm_grant_lock(lock, NULL, 0, 0);
869                         GOTO(out, ELDLM_OK);
870                 }
871                 /* If no flags, fall through to normal enqueue path. */
872         }
873
874         policy = ldlm_processing_policy_table[res->lr_type];
875         policy(lock, flags, 1, &rc);
876         EXIT;
877 out:
878         l_unlock(&ns->ns_lock);
879         return rc;
880 }
881
882 /* Must be called with namespace taken: queue is waiting or converting. */
883 int ldlm_reprocess_queue(struct ldlm_resource *res, struct list_head *queue)
884 {
885         struct list_head *tmp, *pos;
886         ldlm_processing_policy policy;
887         int flags;
888         int rc = LDLM_ITER_CONTINUE;
889         ldlm_error_t err;
890         ENTRY;
891
892         policy = ldlm_processing_policy_table[res->lr_type];
893         LASSERT(policy);
894
895         list_for_each_safe(tmp, pos, queue) {
896                 struct ldlm_lock *pending;
897                 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
898
899                 CDEBUG(D_INFO, "Reprocessing lock %p\n", pending);
900
901                 flags = 0;
902                 rc = policy(pending, &flags, 0, &err);
903                 if (rc != LDLM_ITER_CONTINUE)
904                         break;
905         }
906
907         RETURN(rc);
908 }
909
910 int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list)
911 {
912         struct list_head *tmp, *pos;
913         int rc, retval = 0;
914         ENTRY;
915
916         l_check_no_ns_lock(ns);
917
918         list_for_each_safe(tmp, pos, rpc_list) {
919                 struct ldlm_ast_work *w =
920                         list_entry(tmp, struct ldlm_ast_work, w_list);
921
922                 /* It's possible to receive a completion AST before we've set
923                  * the l_completion_ast pointer: either because the AST arrived
924                  * before the reply, or simply because there's a small race
925                  * window between receiving the reply and finishing the local
926                  * enqueue. (bug 842)
927                  *
928                  * This can't happen with the blocking_ast, however, because we
929                  * will never call the local blocking_ast until we drop our
930                  * reader/writer reference, which we won't do until we get the
931                  * reply and finish enqueueing. */
932                 LASSERT(w->w_lock != NULL);
933                 if (w->w_blocking) {
934                         LASSERT(w->w_lock->l_blocking_ast != NULL);
935                         rc = w->w_lock->l_blocking_ast
936                                 (w->w_lock, &w->w_desc, w->w_data,
937                                  LDLM_CB_BLOCKING);
938                 } else if (w->w_lock->l_completion_ast != NULL) {
939                         LASSERT(w->w_lock->l_completion_ast != NULL);
940                         rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags,
941                                                          w->w_data);
942                 } else {
943                         rc = 0;
944                 }
945                 if (rc == -ERESTART)
946                         retval = rc;
947                 else if (rc)
948                         CDEBUG(D_DLMTRACE, "Failed AST - should clean & "
949                                "disconnect client\n");
950                 LDLM_LOCK_PUT(w->w_lock);
951                 list_del(&w->w_list);
952                 OBD_FREE(w, sizeof(*w));
953         }
954         RETURN(retval);
955 }
956
957 static int reprocess_one_queue(struct ldlm_resource *res, void *closure)
958 {
959         ldlm_reprocess_all(res);
960         return LDLM_ITER_CONTINUE;
961 }
962
963 void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
964 {
965         int i, rc;
966
967         l_lock(&ns->ns_lock);
968         for (i = 0; i < RES_HASH_SIZE; i++) {
969                 struct list_head *tmp, *next;
970                 list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
971                         struct ldlm_resource *res =
972                                 list_entry(tmp, struct ldlm_resource, lr_hash);
973
974                         ldlm_resource_getref(res);
975                         l_unlock(&ns->ns_lock);
976                         rc = reprocess_one_queue(res, NULL);
977                         l_lock(&ns->ns_lock);
978                         next = tmp->next;
979                         ldlm_resource_putref(res);
980                         if (rc == LDLM_ITER_STOP)
981                                 GOTO(out, rc);
982                 }
983         }
984  out:
985         l_unlock(&ns->ns_lock);
986         EXIT;
987 }
988
989 void ldlm_reprocess_all(struct ldlm_resource *res)
990 {
991         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
992         int rc;
993         ENTRY;
994
995         /* Local lock trees don't get reprocessed. */
996         if (res->lr_namespace->ns_client) {
997                 EXIT;
998                 return;
999         }
1000
1001  restart:
1002         l_lock(&res->lr_namespace->ns_lock);
1003         res->lr_tmp = &rpc_list;
1004
1005         rc = ldlm_reprocess_queue(res, &res->lr_converting);
1006         if (rc == LDLM_ITER_CONTINUE)
1007                 ldlm_reprocess_queue(res, &res->lr_waiting);
1008
1009         res->lr_tmp = NULL;
1010         l_unlock(&res->lr_namespace->ns_lock);
1011
1012         rc = ldlm_run_ast_work(res->lr_namespace, &rpc_list);
1013         if (rc == -ERESTART) {
1014                 LASSERT(list_empty(&rpc_list));
1015                 goto restart;
1016         }
1017         EXIT;
1018 }
1019
1020 void ldlm_cancel_callback(struct ldlm_lock *lock)
1021 {
1022         l_lock(&lock->l_resource->lr_namespace->ns_lock);
1023         if (!(lock->l_flags & LDLM_FL_CANCEL)) {
1024                 lock->l_flags |= LDLM_FL_CANCEL;
1025                 if (lock->l_blocking_ast) {
1026                         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
1027                         // l_check_no_ns_lock(lock->l_resource->lr_namespace);
1028                         lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
1029                                              LDLM_CB_CANCELING);
1030                         return;
1031                 } else {
1032                         LDLM_DEBUG(lock, "no blocking ast");
1033                 }
1034         }
1035         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
1036 }
1037
1038 void ldlm_lock_cancel(struct ldlm_lock *lock)
1039 {
1040         struct ldlm_resource *res;
1041         struct ldlm_namespace *ns;
1042         ENTRY;
1043
1044         /* There's no race between calling this and taking the ns lock below;
1045          * a lock can only be put on the waiting list once, because it can only
1046          * issue a blocking AST once. */
1047         ldlm_del_waiting_lock(lock);
1048
1049         res = lock->l_resource;
1050         ns = res->lr_namespace;
1051
1052         l_lock(&ns->ns_lock);
1053         /* Please do not, no matter how tempting, remove this LBUG without
1054          * talking to me first. -phik */
1055         if (lock->l_readers || lock->l_writers) {
1056                 LDLM_ERROR(lock, "lock still has references");
1057                 LBUG();
1058         }
1059
1060         ldlm_cancel_callback(lock);
1061
1062         ldlm_resource_unlink_lock(lock);
1063         ldlm_lock_destroy(lock);
1064         l_unlock(&ns->ns_lock);
1065         EXIT;
1066 }
1067
1068 int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
1069 {
1070         struct ldlm_lock *lock = ldlm_handle2lock(lockh);
1071         ENTRY;
1072
1073         if (lock == NULL)
1074                 RETURN(-EINVAL);
1075
1076         lock->l_ast_data = data;
1077         LDLM_LOCK_PUT(lock);
1078         RETURN(0);
1079 }
1080
1081 void ldlm_cancel_locks_for_export(struct obd_export *exp)
1082 {
1083         struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
1084         struct ldlm_lock *lock;
1085         struct ldlm_resource *res;
1086
1087         l_lock(&ns->ns_lock);
1088         while(!list_empty(&exp->exp_ldlm_data.led_held_locks)) { 
1089                 lock = list_entry(exp->exp_ldlm_data.led_held_locks.next,
1090                                   struct ldlm_lock, l_export_chain);
1091                 res = ldlm_resource_getref(lock->l_resource);
1092                 LDLM_DEBUG(lock, "export %p", exp);
1093                 ldlm_lock_cancel(lock);
1094                 l_unlock(&ns->ns_lock);
1095                 ldlm_reprocess_all(res);
1096                 ldlm_resource_putref(res);
1097                 l_lock(&ns->ns_lock);
1098         }
1099         l_unlock(&ns->ns_lock);
1100 }
1101
1102 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
1103                                         int *flags)
1104 {
1105         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
1106         struct ldlm_resource *res;
1107         struct ldlm_namespace *ns;
1108         int granted = 0;
1109         ENTRY;
1110
1111         LBUG();
1112
1113         res = lock->l_resource;
1114         ns = res->lr_namespace;
1115
1116         l_lock(&ns->ns_lock);
1117
1118         lock->l_req_mode = new_mode;
1119         ldlm_resource_unlink_lock(lock);
1120
1121         /* If this is a local resource, put it on the appropriate list. */
1122         if (res->lr_namespace->ns_client) {
1123                 if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED)) {
1124                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
1125                 } else {
1126                         /* This should never happen, because of the way the
1127                          * server handles conversions. */
1128                         LBUG();
1129
1130                         res->lr_tmp = &rpc_list;
1131                         ldlm_grant_lock(lock, NULL, 0, 0);
1132                         res->lr_tmp = NULL;
1133                         granted = 1;
1134                         /* FIXME: completion handling not with ns_lock held ! */
1135                         if (lock->l_completion_ast)
1136                                 lock->l_completion_ast(lock, 0, NULL);
1137                 }
1138         } else {
1139                 /* FIXME: We should try the conversion right away and possibly
1140                  * return success without the need for an extra AST */
1141                 ldlm_resource_add_lock(res, &res->lr_converting, lock);
1142                 *flags |= LDLM_FL_BLOCK_CONV;
1143         }
1144
1145         l_unlock(&ns->ns_lock);
1146
1147         if (granted)
1148                 ldlm_run_ast_work(ns, &rpc_list);
1149         RETURN(res);
1150 }
1151
1152 void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos)
1153 {
1154         char str[PTL_NALFMT_SIZE];
1155         struct obd_device *obd = NULL;
1156
1157         if (!((portal_debug | D_ERROR) & level))
1158                 return;
1159
1160         if (!lock) {
1161                 CDEBUG(level, "  NULL LDLM lock\n");
1162                 return;
1163         }
1164
1165         CDEBUG(level, "  -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d)\n",
1166                lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
1167                pos);
1168         if (lock->l_conn_export != NULL)
1169                 obd = lock->l_conn_export->exp_obd;
1170         if (lock->l_export && lock->l_export->exp_connection) {
1171                 CDEBUG(level, "  Node: NID %s on %s (rhandle: "LPX64")\n",
1172                        ptlrpc_peernid2str(&lock->l_export->exp_connection->c_peer, str),
1173                        lock->l_export->exp_connection->c_peer.peer_ni->pni_name,
1174                        lock->l_remote_handle.cookie);
1175         } else if (obd == NULL) {
1176                 CDEBUG(level, "  Node: local\n");
1177         } else {
1178                 struct obd_import *imp = obd->u.cli.cl_import;
1179                 CDEBUG(level, "  Node: NID %s on %s (rhandle: "LPX64")\n",
1180                        ptlrpc_peernid2str(&imp->imp_connection->c_peer, str),
1181                        imp->imp_connection->c_peer.peer_ni->pni_name,
1182                        lock->l_remote_handle.cookie);
1183         }
1184         CDEBUG(level, "  Resource: %p ("LPU64"/"LPU64")\n", lock->l_resource,
1185                lock->l_resource->lr_name.name[0],
1186                lock->l_resource->lr_name.name[1]);
1187         CDEBUG(level, "  Req mode: %s, grant mode: %s, rc: %u, read: %d, "
1188                "write: %d\n", ldlm_lockname[lock->l_req_mode],
1189                ldlm_lockname[lock->l_granted_mode],
1190                atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers);
1191         if (lock->l_resource->lr_type == LDLM_EXTENT)
1192                 CDEBUG(level, "  Extent: "LPU64" -> "LPU64
1193                        " (req "LPU64"-"LPU64")\n",
1194                        lock->l_policy_data.l_extent.start,
1195                        lock->l_policy_data.l_extent.end,
1196                        lock->l_req_extent.start, lock->l_req_extent.end);
1197         else if (lock->l_resource->lr_type == LDLM_FLOCK)
1198                 CDEBUG(level, "  Pid: "LPU64" Extent: "LPU64" -> "LPU64"\n",
1199                        lock->l_policy_data.l_flock.pid,
1200                        lock->l_policy_data.l_flock.start,
1201                        lock->l_policy_data.l_flock.end);
1202         else if (lock->l_resource->lr_type == LDLM_IBITS)
1203                 CDEBUG(level, " Bits: "LPX64"\n",
1204                        lock->l_policy_data.l_inodebits.bits);
1205 }
1206
1207 void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
1208 {
1209         struct ldlm_lock *lock;
1210
1211         lock = ldlm_handle2lock(lockh);
1212         if (lock == NULL)
1213                 return;
1214
1215         ldlm_lock_dump(D_OTHER, lock, 0);
1216
1217         LDLM_LOCK_PUT(lock);
1218 }