Whamcloud - gitweb
land b_inodebits
[fs/lustre-release.git] / lustre / ldlm / ldlm_lock.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
5  *   Author: Peter Braam <braam@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_LDLM
25
26 #ifdef __KERNEL__
27 # include <linux/slab.h>
28 # include <linux/module.h>
29 # include <linux/lustre_dlm.h>
30 #else
31 # include <liblustre.h>
32 # include <linux/kp30.h>
33 #endif
34
35 #include <linux/obd_class.h>
36 #include "ldlm_internal.h"
37
38 //struct lustre_lock ldlm_everything_lock;
39
40 /* lock types */
41 char *ldlm_lockname[] = {
42         [0] "--",
43         [LCK_EX] "EX",
44         [LCK_PW] "PW",
45         [LCK_PR] "PR",
46         [LCK_CW] "CW",
47         [LCK_CR] "CR",
48         [LCK_NL] "NL",
49         [LCK_GROUP] "GROUP"
50 };
51 char *ldlm_typename[] = {
52         [LDLM_PLAIN] "PLN",
53         [LDLM_EXTENT] "EXT",
54         [LDLM_FLOCK] "FLK",
55         [LDLM_IBITS] "IBT",
56 };
57
58 char *ldlm_it2str(int it)
59 {
60         switch (it) {
61         case IT_OPEN:
62                 return "open";
63         case IT_CREAT:
64                 return "creat";
65         case (IT_OPEN | IT_CREAT):
66                 return "open|creat";
67         case IT_READDIR:
68                 return "readdir";
69         case IT_GETATTR:
70                 return "getattr";
71         case IT_LOOKUP:
72                 return "lookup";
73         case IT_UNLINK:
74                 return "unlink";
75         case IT_GETXATTR:
76                 return "getxattr";
77         default:
78                 CERROR("Unknown intent %d\n", it);
79                 return "UNKNOWN";
80         }
81 }
82
83 extern kmem_cache_t *ldlm_lock_slab;
84 struct lustre_lock ldlm_handle_lock;
85
86 static ldlm_processing_policy ldlm_processing_policy_table[] = {
87         [LDLM_PLAIN] ldlm_process_plain_lock,
88         [LDLM_EXTENT] ldlm_process_extent_lock,
89 #ifdef __KERNEL__
90         [LDLM_FLOCK] ldlm_process_flock_lock,
91 #endif
92         [LDLM_IBITS] ldlm_process_inodebits_lock,
93 };
94
95 ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res)
96 {
97         return ldlm_processing_policy_table[res->lr_type];
98 }
99
100 void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg)
101 {
102         ns->ns_policy = arg;
103 }
104
105 /*
106  * REFCOUNTED LOCK OBJECTS
107  */
108
109
110 /*
111  * Lock refcounts, during creation:
112  *   - one special one for allocation, dec'd only once in destroy
113  *   - one for being a lock that's in-use
114  *   - one for the addref associated with a new lock
115  */
116 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
117 {
118         atomic_inc(&lock->l_refc);
119         return lock;
120 }
121
122 void ldlm_lock_put(struct ldlm_lock *lock)
123 {
124         ENTRY;
125
126         if (atomic_dec_and_test(&lock->l_refc)) {
127                 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
128
129                 l_lock(&ns->ns_lock);
130                 LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing");
131                 LASSERT(lock->l_destroyed);
132                 LASSERT(list_empty(&lock->l_res_link));
133
134                 spin_lock(&ns->ns_counter_lock);
135                 ns->ns_locks--;
136                 spin_unlock(&ns->ns_counter_lock);
137
138                 ldlm_resource_putref(lock->l_resource);
139                 lock->l_resource = NULL;
140                 if (lock->l_export)
141                         class_export_put(lock->l_export);
142
143                 if (lock->l_parent)
144                         LDLM_LOCK_PUT(lock->l_parent);
145
146                 if (lock->l_lvb_data != NULL)
147                         OBD_FREE(lock->l_lvb_data, lock->l_lvb_len);
148
149                 OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
150                 l_unlock(&ns->ns_lock);
151         }
152
153         EXIT;
154 }
155
156 void ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
157 {
158         ENTRY;
159         l_lock(&lock->l_resource->lr_namespace->ns_lock);
160         if (!list_empty(&lock->l_lru)) {
161                 list_del_init(&lock->l_lru);
162                 lock->l_resource->lr_namespace->ns_nr_unused--;
163                 LASSERT(lock->l_resource->lr_namespace->ns_nr_unused >= 0);
164         }
165         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
166         EXIT;
167 }
168
169 /* This used to have a 'strict' flact, which recovery would use to mark an
170  * in-use lock as needing-to-die.  Lest I am ever tempted to put it back, I
171  * shall explain why it's gone: with the new hash table scheme, once you call
172  * ldlm_lock_destroy, you can never drop your final references on this lock.
173  * Because it's not in the hash table anymore.  -phil */
174 void ldlm_lock_destroy(struct ldlm_lock *lock)
175 {
176         ENTRY;
177         l_lock(&lock->l_resource->lr_namespace->ns_lock);
178
179         if (!list_empty(&lock->l_children)) {
180                 LDLM_ERROR(lock, "still has children (%p)!",
181                            lock->l_children.next);
182                 ldlm_lock_dump(D_ERROR, lock, 0);
183                 LBUG();
184         }
185         if (lock->l_readers || lock->l_writers) {
186                 LDLM_ERROR(lock, "lock still has references");
187                 ldlm_lock_dump(D_ERROR, lock, 0);
188                 LBUG();
189         }
190
191         if (!list_empty(&lock->l_res_link)) {
192                 LDLM_ERROR(lock, "lock still on resource");
193                 ldlm_lock_dump(D_ERROR, lock, 0);
194                 LBUG();
195         }
196
197         if (lock->l_destroyed) {
198                 LASSERT(list_empty(&lock->l_lru));
199                 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
200                 EXIT;
201                 return;
202         }
203         lock->l_destroyed = 1;
204
205         list_del_init(&lock->l_export_chain);
206         ldlm_lock_remove_from_lru(lock);
207         class_handle_unhash(&lock->l_handle);
208
209 #if 0
210         /* Wake anyone waiting for this lock */
211         /* FIXME: I should probably add yet another flag, instead of using
212          * l_export to only call this on clients */
213         if (lock->l_export)
214                 class_export_put(lock->l_export);
215         lock->l_export = NULL;
216         if (lock->l_export && lock->l_completion_ast)
217                 lock->l_completion_ast(lock, 0);
218 #endif
219
220         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
221         LDLM_LOCK_PUT(lock);
222         EXIT;
223 }
224
225 /* this is called by portals_handle2object with the handle lock taken */
226 static void lock_handle_addref(void *lock)
227 {
228         LDLM_LOCK_GET((struct ldlm_lock *)lock);
229 }
230
231 /*
232  * usage: pass in a resource on which you have done ldlm_resource_get
233  *        pass in a parent lock on which you have done a ldlm_lock_get
234  *        after return, ldlm_*_put the resource and parent
235  * returns: lock with refcount 1
236  */
237 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
238                                        struct ldlm_resource *resource)
239 {
240         struct ldlm_lock *lock;
241         ENTRY;
242
243         if (resource == NULL)
244                 LBUG();
245
246         OBD_SLAB_ALLOC(lock, ldlm_lock_slab, SLAB_NOFS, sizeof(*lock));
247         if (lock == NULL)
248                 RETURN(NULL);
249
250         lock->l_resource = ldlm_resource_getref(resource);
251
252         atomic_set(&lock->l_refc, 2);
253         INIT_LIST_HEAD(&lock->l_children);
254         INIT_LIST_HEAD(&lock->l_res_link);
255         INIT_LIST_HEAD(&lock->l_lru);
256         INIT_LIST_HEAD(&lock->l_export_chain);
257         INIT_LIST_HEAD(&lock->l_pending_chain);
258         init_waitqueue_head(&lock->l_waitq);
259
260         spin_lock(&resource->lr_namespace->ns_counter_lock);
261         resource->lr_namespace->ns_locks++;
262         spin_unlock(&resource->lr_namespace->ns_counter_lock);
263
264         if (parent != NULL) {
265                 l_lock(&parent->l_resource->lr_namespace->ns_lock);
266                 lock->l_parent = LDLM_LOCK_GET(parent);
267                 list_add(&lock->l_childof, &parent->l_children);
268                 l_unlock(&parent->l_resource->lr_namespace->ns_lock);
269         }
270
271         INIT_LIST_HEAD(&lock->l_handle.h_link);
272         class_handle_hash(&lock->l_handle, lock_handle_addref);
273
274         RETURN(lock);
275 }
276
277 int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
278                               struct ldlm_res_id new_resid)
279 {
280         struct ldlm_resource *oldres = lock->l_resource;
281         ENTRY;
282
283         l_lock(&ns->ns_lock);
284         if (memcmp(&new_resid, &lock->l_resource->lr_name,
285                    sizeof(lock->l_resource->lr_name)) == 0) {
286                 /* Nothing to do */
287                 l_unlock(&ns->ns_lock);
288                 RETURN(0);
289         }
290
291         LASSERT(new_resid.name[0] != 0);
292
293         /* This function assumes that the lock isn't on any lists */
294         LASSERT(list_empty(&lock->l_res_link));
295
296         lock->l_resource = ldlm_resource_get(ns, NULL, new_resid,
297                                              lock->l_resource->lr_type, 1);
298         if (lock->l_resource == NULL) {
299                 LBUG();
300                 RETURN(-ENOMEM);
301         }
302
303         /* ...and the flowers are still standing! */
304         ldlm_resource_putref(oldres);
305
306         l_unlock(&ns->ns_lock);
307         RETURN(0);
308 }
309
310 /*
311  *  HANDLES
312  */
313
314 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
315 {
316         lockh->cookie = lock->l_handle.h_cookie;
317 }
318
319 /* if flags: atomically get the lock and set the flags.
320  *           Return NULL if flag already set
321  */
322
323 struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int flags)
324 {
325         struct ldlm_namespace *ns;
326         struct ldlm_lock *lock = NULL, *retval = NULL;
327         ENTRY;
328
329         LASSERT(handle);
330
331         lock = class_handle2object(handle->cookie);
332         if (lock == NULL)
333                 RETURN(NULL);
334
335         LASSERT(lock->l_resource != NULL);
336         ns = lock->l_resource->lr_namespace;
337         LASSERT(ns != NULL);
338
339         l_lock(&ns->ns_lock);
340
341         /* It's unlikely but possible that someone marked the lock as
342          * destroyed after we did handle2object on it */
343         if (lock->l_destroyed) {
344                 CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
345                 LDLM_LOCK_PUT(lock);
346                 GOTO(out, retval);
347         }
348
349         if (flags && (lock->l_flags & flags)) {
350                 LDLM_LOCK_PUT(lock);
351                 GOTO(out, retval);
352         }
353
354         if (flags)
355                 lock->l_flags |= flags;
356
357         retval = lock;
358         EXIT;
359  out:
360         l_unlock(&ns->ns_lock);
361         return retval;
362 }
363
364 struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *ns,
365                                       struct lustre_handle *handle)
366 {
367         struct ldlm_lock *retval = NULL;
368
369         l_lock(&ns->ns_lock);
370         retval = __ldlm_handle2lock(handle, 0);
371         l_unlock(&ns->ns_lock);
372
373         return retval;
374 }
375
376 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
377 {
378         ldlm_res2desc(lock->l_resource, &desc->l_resource);
379         desc->l_req_mode = lock->l_req_mode;
380         desc->l_granted_mode = lock->l_granted_mode;
381         memcpy(&desc->l_policy_data, &lock->l_policy_data,
382                sizeof(desc->l_policy_data));
383 }
384
385 void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
386                             void *data, int datalen)
387 {
388         struct ldlm_ast_work *w;
389         ENTRY;
390
391         l_lock(&lock->l_resource->lr_namespace->ns_lock);
392         if (new && (lock->l_flags & LDLM_FL_AST_SENT))
393                 GOTO(out, 0);
394
395         CDEBUG(D_OTHER, "lock %p incompatible; sending blocking AST.\n", lock);
396
397         OBD_ALLOC(w, sizeof(*w));
398         if (!w) {
399                 LBUG();
400                 GOTO(out, 0);
401         }
402
403         w->w_data = data;
404         w->w_datalen = datalen;
405         if (new) {
406                 LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
407                 lock->l_flags |= LDLM_FL_AST_SENT;
408                 /* If the enqueuing client said so, tell the AST recipient to
409                  * discard dirty data, rather than writing back. */
410                 if (new->l_flags & LDLM_AST_DISCARD_DATA)
411                         lock->l_flags |= LDLM_FL_DISCARD_DATA;
412                 w->w_blocking = 1;
413                 ldlm_lock2desc(new, &w->w_desc);
414         }
415
416         w->w_lock = LDLM_LOCK_GET(lock);
417         list_add(&w->w_list, lock->l_resource->lr_tmp);
418         EXIT;
419  out:
420         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
421 }
422
423 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
424 {
425         struct ldlm_lock *lock;
426
427         lock = ldlm_handle2lock(lockh);
428         ldlm_lock_addref_internal(lock, mode);
429         LDLM_LOCK_PUT(lock);
430 }
431
432 /* only called for local locks */
433 void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
434 {
435         l_lock(&lock->l_resource->lr_namespace->ns_lock);
436         ldlm_lock_remove_from_lru(lock);
437         if (mode & (LCK_NL | LCK_CR | LCK_PR))
438                 lock->l_readers++;
439         if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP))
440                 lock->l_writers++;
441         lock->l_last_used = jiffies;
442         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
443         LDLM_LOCK_GET(lock);
444         LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
445 }
446
447 void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
448 {
449         struct ldlm_namespace *ns;
450         ENTRY;
451
452         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
453         ns = lock->l_resource->lr_namespace;
454         l_lock(&ns->ns_lock);
455         if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
456                 LASSERT(lock->l_readers > 0);
457                 lock->l_readers--;
458         }
459         if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP)) {
460                 LASSERT(lock->l_writers > 0);
461                 lock->l_writers--;
462         }
463
464         if (lock->l_flags & LDLM_FL_LOCAL &&
465             !lock->l_readers && !lock->l_writers) {
466                 /* If this is a local lock on a server namespace and this was
467                  * the last reference, cancel the lock. */
468                 CDEBUG(D_INFO, "forcing cancel of local lock\n");
469                 lock->l_flags |= LDLM_FL_CBPENDING;
470         }
471
472         if (!lock->l_readers && !lock->l_writers &&
473             (lock->l_flags & LDLM_FL_CBPENDING)) {
474                 /* If we received a blocked AST and this was the last reference,
475                  * run the callback. */
476                 if (ns->ns_client == LDLM_NAMESPACE_SERVER && lock->l_export)
477                         CERROR("FL_CBPENDING set on non-local lock--just a "
478                                "warning\n");
479
480                 LDLM_DEBUG(lock, "final decref done on cbpending lock");
481
482                 LDLM_LOCK_GET(lock); /* dropped by bl thread */
483                 ldlm_lock_remove_from_lru(lock);
484 #ifdef __KERNEL__
485                 ldlm_bl_to_thread(ns, NULL, lock);
486                 l_unlock(&ns->ns_lock);
487 #else
488                 l_unlock(&ns->ns_lock);
489                 ldlm_handle_bl_callback(ns, NULL, lock);
490 #endif
491         } else if (ns->ns_client == LDLM_NAMESPACE_CLIENT &&
492                    !lock->l_readers && !lock->l_writers) {
493                 /* If this is a client-side namespace and this was the last
494                  * reference, put it on the LRU. */
495                 LASSERT(list_empty(&lock->l_lru));
496                 LASSERT(ns->ns_nr_unused >= 0);
497                 list_add_tail(&lock->l_lru, &ns->ns_unused_list);
498                 ns->ns_nr_unused++;
499                 l_unlock(&ns->ns_lock);
500                 ldlm_cancel_lru(ns, LDLM_ASYNC);
501         } else {
502                 l_unlock(&ns->ns_lock);
503         }
504
505         LDLM_LOCK_PUT(lock);    /* matches the ldlm_lock_get in addref */
506
507         EXIT;
508 }
509
510 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
511 {
512         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
513         LASSERT(lock != NULL);
514         ldlm_lock_decref_internal(lock, mode);
515         LDLM_LOCK_PUT(lock);
516 }
517
518 /* This will drop a lock reference and mark it for destruction, but will not
519  * necessarily cancel the lock before returning. */
520 void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
521 {
522         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
523         ENTRY;
524
525         LASSERT(lock != NULL);
526
527         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
528         l_lock(&lock->l_resource->lr_namespace->ns_lock);
529         lock->l_flags |= LDLM_FL_CBPENDING;
530         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
531         ldlm_lock_decref_internal(lock, mode);
532         LDLM_LOCK_PUT(lock);
533 }
534
535 /* NOTE: called by
536  *  - ldlm_lock_enqueue
537  *  - ldlm_reprocess_queue
538  *  - ldlm_lock_convert
539  */
540 void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen,
541                      int run_ast)
542 {
543         struct ldlm_resource *res = lock->l_resource;
544         ENTRY;
545
546         l_lock(&lock->l_resource->lr_namespace->ns_lock);
547         lock->l_granted_mode = lock->l_req_mode;
548         ldlm_resource_add_lock(res, &res->lr_granted, lock);
549
550         if (lock->l_granted_mode < res->lr_most_restr)
551                 res->lr_most_restr = lock->l_granted_mode;
552
553         if (run_ast && lock->l_completion_ast != NULL)
554                 ldlm_add_ast_work_item(lock, NULL, data, datalen);
555
556         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
557         EXIT;
558 }
559
560 /* returns a referenced lock or NULL.  See the flag descriptions below, in the
561  * comment above ldlm_lock_match */
562 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
563                                       ldlm_policy_data_t *policy,
564                                       struct ldlm_lock *old_lock, int flags)
565 {
566         struct ldlm_lock *lock;
567         struct list_head *tmp;
568
569         list_for_each(tmp, queue) {
570                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
571
572                 if (lock == old_lock)
573                         break;
574
575                 /* llite sometimes wants to match locks that will be
576                  * canceled when their users drop, but we allow it to match
577                  * if it passes in CBPENDING and the lock still has users.
578                  * this is generally only going to be used by children 
579                  * whose parents already hold a lock so forward progress
580                  * can still happen. */
581                 if (lock->l_flags & LDLM_FL_CBPENDING &&
582                     !(flags & LDLM_FL_CBPENDING))
583                         continue;
584                 if (lock->l_flags & LDLM_FL_CBPENDING &&
585                     lock->l_readers == 0 && lock->l_writers == 0)
586                         continue;
587
588                 if (!(lock->l_req_mode & mode))
589                         continue;
590
591                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
592                     (lock->l_policy_data.l_extent.start >
593                      policy->l_extent.start ||
594                      lock->l_policy_data.l_extent.end < policy->l_extent.end))
595                         continue;
596
597                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
598                     mode == LCK_GROUP &&
599                     lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
600                         continue;
601
602                 /* We match if we have existing lock with same or wider set
603                    of bits. */
604                 if (lock->l_resource->lr_type == LDLM_IBITS &&
605                      ((lock->l_policy_data.l_inodebits.bits &
606                       policy->l_inodebits.bits) !=
607                       policy->l_inodebits.bits))
608                         continue;
609
610                 if (lock->l_destroyed)
611                         continue;
612
613                 if ((flags & LDLM_FL_LOCAL_ONLY) &&
614                     !(lock->l_flags & LDLM_FL_LOCAL))
615                         continue;
616
617                 if (flags & LDLM_FL_TEST_LOCK)
618                         LDLM_LOCK_GET(lock);
619                 else
620                         ldlm_lock_addref_internal(lock, mode);
621                 return lock;
622         }
623
624         return NULL;
625 }
626
627 void ldlm_lock_allow_match(struct ldlm_lock *lock)
628 {
629         l_lock(&lock->l_resource->lr_namespace->ns_lock);
630         lock->l_flags |= LDLM_FL_CAN_MATCH;
631         wake_up(&lock->l_waitq);
632         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
633 }
634
635 /* Can be called in two ways:
636  *
637  * If 'ns' is NULL, then lockh describes an existing lock that we want to look
638  * for a duplicate of.
639  *
640  * Otherwise, all of the fields must be filled in, to match against.
641  *
642  * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the
643  *     server (ie, connh is NULL)
644  * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted
645  *     list will be considered
646  * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked
647  *     to be canceled can still be matched as long as they still have reader
648  *     or writer refernces
649  * If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock,
650  *     just tell us if we would have matched.
651  *
652  * Returns 1 if it finds an already-existing lock that is compatible; in this
653  * case, lockh is filled in with a addref()ed lock
654  */
655 int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
656                     struct ldlm_res_id *res_id, __u32 type,
657                     ldlm_policy_data_t *policy, ldlm_mode_t mode,
658                     struct lustre_handle *lockh)
659 {
660         struct ldlm_resource *res;
661         struct ldlm_lock *lock, *old_lock = NULL;
662         int rc = 0;
663         ENTRY;
664
665         if (ns == NULL) {
666                 old_lock = ldlm_handle2lock(lockh);
667                 LASSERT(old_lock);
668
669                 ns = old_lock->l_resource->lr_namespace;
670                 res_id = &old_lock->l_resource->lr_name;
671                 type = old_lock->l_resource->lr_type;
672                 mode = old_lock->l_req_mode;
673         }
674
675         res = ldlm_resource_get(ns, NULL, *res_id, type, 0);
676         if (res == NULL) {
677                 LASSERT(old_lock == NULL);
678                 RETURN(0);
679         }
680
681         l_lock(&ns->ns_lock);
682
683         lock = search_queue(&res->lr_granted, mode, policy, old_lock, flags);
684         if (lock != NULL)
685                 GOTO(out, rc = 1);
686         if (flags & LDLM_FL_BLOCK_GRANTED)
687                 GOTO(out, rc = 0);
688         lock = search_queue(&res->lr_converting, mode, policy, old_lock, flags);
689         if (lock != NULL)
690                 GOTO(out, rc = 1);
691         lock = search_queue(&res->lr_waiting, mode, policy, old_lock, flags);
692         if (lock != NULL)
693                 GOTO(out, rc = 1);
694
695         EXIT;
696  out:
697         ldlm_resource_putref(res);
698         l_unlock(&ns->ns_lock);
699
700         if (lock) {
701                 ldlm_lock2handle(lock, lockh);
702                 if (!(lock->l_flags & LDLM_FL_CAN_MATCH)) {
703                         struct l_wait_info lwi;
704                         if (lock->l_completion_ast)
705                                 lock->l_completion_ast(lock,
706                                                        LDLM_FL_WAIT_NOREPROC,
707                                                        NULL);
708
709                         lwi = LWI_TIMEOUT_INTR(obd_timeout*HZ, NULL,NULL,NULL);
710
711                         /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */
712                         l_wait_event(lock->l_waitq,
713                                      (lock->l_flags & LDLM_FL_CAN_MATCH), &lwi);
714                 }
715         }
716         if (rc)
717                 LDLM_DEBUG(lock, "matched ("LPU64" "LPU64")",
718                            type == LDLM_PLAIN ? res_id->name[2] :
719                                 policy->l_extent.start,
720                            type == LDLM_PLAIN ? res_id->name[3] :
721                                 policy->l_extent.end);
722         else if (!(flags & LDLM_FL_TEST_LOCK)) /* less verbose for test-only */
723                 LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
724                                   LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
725                                   type, mode, res_id->name[0], res_id->name[1],
726                                   type == LDLM_PLAIN ? res_id->name[2] :
727                                         policy->l_extent.start,
728                                   type == LDLM_PLAIN ? res_id->name[3] :
729                                         policy->l_extent.end);
730
731         if (old_lock)
732                 LDLM_LOCK_PUT(old_lock);
733         if (flags & LDLM_FL_TEST_LOCK && rc)
734                 LDLM_LOCK_PUT(lock);
735
736         return rc;
737 }
738
739 /* Returns a referenced lock */
740 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
741                                    struct lustre_handle *parent_lock_handle,
742                                    struct ldlm_res_id res_id, __u32 type,
743                                    ldlm_mode_t mode,
744                                    ldlm_blocking_callback blocking,
745                                    ldlm_completion_callback completion,
746                                    ldlm_glimpse_callback glimpse,
747                                    void *data, __u32 lvb_len)
748 {
749         struct ldlm_resource *res, *parent_res = NULL;
750         struct ldlm_lock *lock, *parent_lock = NULL;
751         ENTRY;
752
753         if (parent_lock_handle) {
754                 parent_lock = ldlm_handle2lock(parent_lock_handle);
755                 if (parent_lock)
756                         parent_res = parent_lock->l_resource;
757         }
758
759         res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
760         if (res == NULL)
761                 RETURN(NULL);
762
763         lock = ldlm_lock_new(parent_lock, res);
764         ldlm_resource_putref(res);
765         if (parent_lock != NULL)
766                 LDLM_LOCK_PUT(parent_lock);
767
768         if (lock == NULL)
769                 RETURN(NULL);
770
771         lock->l_req_mode = mode;
772         lock->l_ast_data = data;
773         lock->l_blocking_ast = blocking;
774         lock->l_completion_ast = completion;
775         lock->l_glimpse_ast = glimpse;
776
777         if (lvb_len) {
778                 lock->l_lvb_len = lvb_len;
779                 OBD_ALLOC(lock->l_lvb_data, lvb_len);
780                 if (lock->l_lvb_data == NULL) {
781                         OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
782                         RETURN(NULL);
783                 }
784         }
785
786         RETURN(lock);
787 }
788
789 ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
790                                struct ldlm_lock **lockp,
791                                void *cookie, int *flags)
792 {
793         struct ldlm_lock *lock = *lockp;
794         struct ldlm_resource *res = lock->l_resource;
795         int local = res->lr_namespace->ns_client;
796         ldlm_processing_policy policy;
797         ldlm_error_t rc = ELDLM_OK;
798         ENTRY;
799
800         /* policies are not executed on the client or during replay */
801         if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
802             && !local && ns->ns_policy) {
803                 rc = ns->ns_policy(ns, lockp, cookie, lock->l_req_mode, *flags,
804                                    NULL);
805                 if (rc == ELDLM_LOCK_REPLACED) {
806                         /* The lock that was returned has already been granted,
807                          * and placed into lockp.  If it's not the same as the
808                          * one we passed in, then destroy the old one and our
809                          * work here is done. */
810                         if (lock != *lockp) {
811                                 ldlm_lock_destroy(lock);
812                                 LDLM_LOCK_PUT(lock);
813                         }
814                         *flags |= LDLM_FL_LOCK_CHANGED;
815                         RETURN(0);
816                 } else if (rc == ELDLM_LOCK_ABORTED ||
817                            (rc == 0 && (*flags & LDLM_FL_INTENT_ONLY))) {
818                         ldlm_lock_destroy(lock);
819                         RETURN(rc);
820                 }
821                 LASSERT(rc == ELDLM_OK);
822         }
823
824         l_lock(&ns->ns_lock);
825         if (local && lock->l_req_mode == lock->l_granted_mode) {
826                 /* The server returned a blocked lock, but it was granted before
827                  * we got a chance to actually enqueue it.  We don't need to do
828                  * anything else. */
829                 *flags &= ~(LDLM_FL_BLOCK_GRANTED |
830                             LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
831                 GOTO(out, ELDLM_OK);
832         }
833
834         /* Some flags from the enqueue want to make it into the AST, via the
835          * lock's l_flags. */
836         lock->l_flags |= (*flags & LDLM_AST_DISCARD_DATA);
837
838         /* This distinction between local lock trees is very important; a client
839          * namespace only has information about locks taken by that client, and
840          * thus doesn't have enough information to decide for itself if it can
841          * be granted (below).  In this case, we do exactly what the server
842          * tells us to do, as dictated by the 'flags'.
843          *
844          * We do exactly the same thing during recovery, when the server is
845          * more or less trusting the clients not to lie.
846          *
847          * FIXME (bug 268): Detect obvious lies by checking compatibility in
848          * granted/converting queues. */
849         ldlm_resource_unlink_lock(lock);
850         if (local) {
851                 if (*flags & LDLM_FL_BLOCK_CONV)
852                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
853                 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
854                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
855                 else
856                         ldlm_grant_lock(lock, NULL, 0, 0);
857                 GOTO(out, ELDLM_OK);
858         } else if (*flags & LDLM_FL_REPLAY) {
859                 if (*flags & LDLM_FL_BLOCK_CONV) {
860                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
861                         GOTO(out, ELDLM_OK);
862                 } else if (*flags & LDLM_FL_BLOCK_WAIT) {
863                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
864                         GOTO(out, ELDLM_OK);
865                 } else if (*flags & LDLM_FL_BLOCK_GRANTED) {
866                         ldlm_grant_lock(lock, NULL, 0, 0);
867                         GOTO(out, ELDLM_OK);
868                 }
869                 /* If no flags, fall through to normal enqueue path. */
870         }
871
872         policy = ldlm_processing_policy_table[res->lr_type];
873         policy(lock, flags, 1, &rc);
874         EXIT;
875 out:
876         l_unlock(&ns->ns_lock);
877         return rc;
878 }
879
880 /* Must be called with namespace taken: queue is waiting or converting. */
881 int ldlm_reprocess_queue(struct ldlm_resource *res, struct list_head *queue)
882 {
883         struct list_head *tmp, *pos;
884         ldlm_processing_policy policy;
885         int flags;
886         int rc = LDLM_ITER_CONTINUE;
887         ldlm_error_t err;
888         ENTRY;
889
890         policy = ldlm_processing_policy_table[res->lr_type];
891         LASSERT(policy);
892
893         list_for_each_safe(tmp, pos, queue) {
894                 struct ldlm_lock *pending;
895                 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
896
897                 CDEBUG(D_INFO, "Reprocessing lock %p\n", pending);
898
899                 flags = 0;
900                 rc = policy(pending, &flags, 0, &err);
901                 if (rc != LDLM_ITER_CONTINUE)
902                         break;
903         }
904
905         RETURN(rc);
906 }
907
908 int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list)
909 {
910         struct list_head *tmp, *pos;
911         int rc, retval = 0;
912         ENTRY;
913
914         l_check_no_ns_lock(ns);
915
916         list_for_each_safe(tmp, pos, rpc_list) {
917                 struct ldlm_ast_work *w =
918                         list_entry(tmp, struct ldlm_ast_work, w_list);
919
920                 /* It's possible to receive a completion AST before we've set
921                  * the l_completion_ast pointer: either because the AST arrived
922                  * before the reply, or simply because there's a small race
923                  * window between receiving the reply and finishing the local
924                  * enqueue. (bug 842)
925                  *
926                  * This can't happen with the blocking_ast, however, because we
927                  * will never call the local blocking_ast until we drop our
928                  * reader/writer reference, which we won't do until we get the
929                  * reply and finish enqueueing. */
930                 LASSERT(w->w_lock != NULL);
931                 if (w->w_blocking) {
932                         LASSERT(w->w_lock->l_blocking_ast != NULL);
933                         rc = w->w_lock->l_blocking_ast
934                                 (w->w_lock, &w->w_desc, w->w_data,
935                                  LDLM_CB_BLOCKING);
936                 } else if (w->w_lock->l_completion_ast != NULL) {
937                         LASSERT(w->w_lock->l_completion_ast != NULL);
938                         rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags,
939                                                          w->w_data);
940                 } else {
941                         rc = 0;
942                 }
943                 if (rc == -ERESTART)
944                         retval = rc;
945                 else if (rc)
946                         CDEBUG(D_DLMTRACE, "Failed AST - should clean & "
947                                "disconnect client\n");
948                 LDLM_LOCK_PUT(w->w_lock);
949                 list_del(&w->w_list);
950                 OBD_FREE(w, sizeof(*w));
951         }
952         RETURN(retval);
953 }
954
955 static int reprocess_one_queue(struct ldlm_resource *res, void *closure)
956 {
957         ldlm_reprocess_all(res);
958         return LDLM_ITER_CONTINUE;
959 }
960
961 void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
962 {
963         int i, rc;
964
965         l_lock(&ns->ns_lock);
966         for (i = 0; i < RES_HASH_SIZE; i++) {
967                 struct list_head *tmp, *next;
968                 list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
969                         struct ldlm_resource *res =
970                                 list_entry(tmp, struct ldlm_resource, lr_hash);
971
972                         ldlm_resource_getref(res);
973                         l_unlock(&ns->ns_lock);
974                         rc = reprocess_one_queue(res, NULL);
975                         l_lock(&ns->ns_lock);
976                         next = tmp->next;
977                         ldlm_resource_putref(res);
978                         if (rc == LDLM_ITER_STOP)
979                                 GOTO(out, rc);
980                 }
981         }
982  out:
983         l_unlock(&ns->ns_lock);
984         EXIT;
985 }
986
987 void ldlm_reprocess_all(struct ldlm_resource *res)
988 {
989         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
990         int rc;
991         ENTRY;
992
993         /* Local lock trees don't get reprocessed. */
994         if (res->lr_namespace->ns_client) {
995                 EXIT;
996                 return;
997         }
998
999  restart:
1000         l_lock(&res->lr_namespace->ns_lock);
1001         res->lr_tmp = &rpc_list;
1002
1003         rc = ldlm_reprocess_queue(res, &res->lr_converting);
1004         if (rc == LDLM_ITER_CONTINUE)
1005                 ldlm_reprocess_queue(res, &res->lr_waiting);
1006
1007         res->lr_tmp = NULL;
1008         l_unlock(&res->lr_namespace->ns_lock);
1009
1010         rc = ldlm_run_ast_work(res->lr_namespace, &rpc_list);
1011         if (rc == -ERESTART) {
1012                 LASSERT(list_empty(&rpc_list));
1013                 goto restart;
1014         }
1015         EXIT;
1016 }
1017
1018 void ldlm_cancel_callback(struct ldlm_lock *lock)
1019 {
1020         l_lock(&lock->l_resource->lr_namespace->ns_lock);
1021         if (!(lock->l_flags & LDLM_FL_CANCEL)) {
1022                 lock->l_flags |= LDLM_FL_CANCEL;
1023                 if (lock->l_blocking_ast) {
1024                         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
1025                         // l_check_no_ns_lock(lock->l_resource->lr_namespace);
1026                         lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
1027                                              LDLM_CB_CANCELING);
1028                         return;
1029                 } else {
1030                         LDLM_DEBUG(lock, "no blocking ast");
1031                 }
1032         }
1033         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
1034 }
1035
1036 void ldlm_lock_cancel(struct ldlm_lock *lock)
1037 {
1038         struct ldlm_resource *res;
1039         struct ldlm_namespace *ns;
1040         ENTRY;
1041
1042         /* There's no race between calling this and taking the ns lock below;
1043          * a lock can only be put on the waiting list once, because it can only
1044          * issue a blocking AST once. */
1045         ldlm_del_waiting_lock(lock);
1046
1047         res = lock->l_resource;
1048         ns = res->lr_namespace;
1049
1050         l_lock(&ns->ns_lock);
1051         /* Please do not, no matter how tempting, remove this LBUG without
1052          * talking to me first. -phik */
1053         if (lock->l_readers || lock->l_writers) {
1054                 LDLM_ERROR(lock, "lock still has references");
1055                 LBUG();
1056         }
1057
1058         ldlm_cancel_callback(lock);
1059
1060         ldlm_resource_unlink_lock(lock);
1061         ldlm_lock_destroy(lock);
1062         l_unlock(&ns->ns_lock);
1063         EXIT;
1064 }
1065
1066 int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
1067 {
1068         struct ldlm_lock *lock = ldlm_handle2lock(lockh);
1069         ENTRY;
1070
1071         if (lock == NULL)
1072                 RETURN(-EINVAL);
1073
1074         lock->l_ast_data = data;
1075         LDLM_LOCK_PUT(lock);
1076         RETURN(0);
1077 }
1078
1079 void ldlm_cancel_locks_for_export(struct obd_export *exp)
1080 {
1081         struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
1082         struct ldlm_lock *lock;
1083         struct ldlm_resource *res;
1084
1085         l_lock(&ns->ns_lock);
1086         while(!list_empty(&exp->exp_ldlm_data.led_held_locks)) { 
1087                 lock = list_entry(exp->exp_ldlm_data.led_held_locks.next,
1088                                   struct ldlm_lock, l_export_chain);
1089                 res = ldlm_resource_getref(lock->l_resource);
1090                 LDLM_DEBUG(lock, "export %p", exp);
1091                 ldlm_lock_cancel(lock);
1092                 l_unlock(&ns->ns_lock);
1093                 ldlm_reprocess_all(res);
1094                 ldlm_resource_putref(res);
1095                 l_lock(&ns->ns_lock);
1096         }
1097         l_unlock(&ns->ns_lock);
1098 }
1099
1100 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
1101                                         int *flags)
1102 {
1103         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
1104         struct ldlm_resource *res;
1105         struct ldlm_namespace *ns;
1106         int granted = 0;
1107         ENTRY;
1108
1109         LBUG();
1110
1111         res = lock->l_resource;
1112         ns = res->lr_namespace;
1113
1114         l_lock(&ns->ns_lock);
1115
1116         lock->l_req_mode = new_mode;
1117         ldlm_resource_unlink_lock(lock);
1118
1119         /* If this is a local resource, put it on the appropriate list. */
1120         if (res->lr_namespace->ns_client) {
1121                 if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED)) {
1122                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
1123                 } else {
1124                         /* This should never happen, because of the way the
1125                          * server handles conversions. */
1126                         LBUG();
1127
1128                         res->lr_tmp = &rpc_list;
1129                         ldlm_grant_lock(lock, NULL, 0, 0);
1130                         res->lr_tmp = NULL;
1131                         granted = 1;
1132                         /* FIXME: completion handling not with ns_lock held ! */
1133                         if (lock->l_completion_ast)
1134                                 lock->l_completion_ast(lock, 0, NULL);
1135                 }
1136         } else {
1137                 /* FIXME: We should try the conversion right away and possibly
1138                  * return success without the need for an extra AST */
1139                 ldlm_resource_add_lock(res, &res->lr_converting, lock);
1140                 *flags |= LDLM_FL_BLOCK_CONV;
1141         }
1142
1143         l_unlock(&ns->ns_lock);
1144
1145         if (granted)
1146                 ldlm_run_ast_work(ns, &rpc_list);
1147         RETURN(res);
1148 }
1149
1150 void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos)
1151 {
1152         char str[PTL_NALFMT_SIZE];
1153         struct obd_device *obd = NULL;
1154
1155         if (!((portal_debug | D_ERROR) & level))
1156                 return;
1157
1158         if (!lock) {
1159                 CDEBUG(level, "  NULL LDLM lock\n");
1160                 return;
1161         }
1162
1163         CDEBUG(level, "  -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d)\n",
1164                lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
1165                pos);
1166         if (lock->l_conn_export != NULL)
1167                 obd = lock->l_conn_export->exp_obd;
1168         if (lock->l_export && lock->l_export->exp_connection) {
1169                 CDEBUG(level, "  Node: NID "LPX64" (%s) on %s (rhandle: "LPX64")\n",
1170                        lock->l_export->exp_connection->c_peer.peer_nid,
1171                        portals_nid2str(lock->l_export->exp_connection->c_peer.peer_ni->pni_number,
1172                                        lock->l_export->exp_connection->c_peer.peer_nid, str),
1173                        lock->l_export->exp_connection->c_peer.peer_ni->pni_name,
1174                        lock->l_remote_handle.cookie);
1175         } else if (obd == NULL) {
1176                 CDEBUG(level, "  Node: local\n");
1177         } else {
1178                 struct obd_import *imp = obd->u.cli.cl_import;
1179                 CDEBUG(level, "  Node: NID "LPX64" (%s) on %s (rhandle: "LPX64")\n",
1180                        imp->imp_connection->c_peer.peer_nid,
1181                        portals_nid2str(imp->imp_connection->c_peer.peer_ni->pni_number,
1182                                        imp->imp_connection->c_peer.peer_nid, str),
1183                        imp->imp_connection->c_peer.peer_ni->pni_name,
1184                        lock->l_remote_handle.cookie);
1185         }
1186         CDEBUG(level, "  Resource: %p ("LPU64"/"LPU64")\n", lock->l_resource,
1187                lock->l_resource->lr_name.name[0],
1188                lock->l_resource->lr_name.name[1]);
1189         CDEBUG(level, "  Req mode: %s, grant mode: %s, rc: %u, read: %d, "
1190                "write: %d\n", ldlm_lockname[lock->l_req_mode],
1191                ldlm_lockname[lock->l_granted_mode],
1192                atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers);
1193         if (lock->l_resource->lr_type == LDLM_EXTENT)
1194                 CDEBUG(level, "  Extent: "LPU64" -> "LPU64
1195                        " (req "LPU64"-"LPU64")\n",
1196                        lock->l_policy_data.l_extent.start,
1197                        lock->l_policy_data.l_extent.end,
1198                        lock->l_req_extent.start, lock->l_req_extent.end);
1199         else if (lock->l_resource->lr_type == LDLM_FLOCK)
1200                 CDEBUG(level, "  Pid: "LPU64" Extent: "LPU64" -> "LPU64"\n",
1201                        lock->l_policy_data.l_flock.pid,
1202                        lock->l_policy_data.l_flock.start,
1203                        lock->l_policy_data.l_flock.end);
1204         else if (lock->l_resource->lr_type == LDLM_IBITS)
1205                 CDEBUG(level, " Bits: "LPX64"\n",
1206                        lock->l_policy_data.l_inodebits.bits);
1207 }
1208
1209 void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
1210 {
1211         struct ldlm_lock *lock;
1212
1213         lock = ldlm_handle2lock(lockh);
1214         if (lock == NULL)
1215                 return;
1216
1217         ldlm_lock_dump(D_OTHER, lock, 0);
1218
1219         LDLM_LOCK_PUT(lock);
1220 }