Whamcloud - gitweb
land b_cray_delivery on HEAD
[fs/lustre-release.git] / lustre / ldlm / ldlm_lock.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
5  *   Author: Peter Braam <braam@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_LDLM
25
26 #ifdef __KERNEL__
27 # include <linux/slab.h>
28 # include <linux/module.h>
29 # include <linux/lustre_dlm.h>
30 #else
31 # include <liblustre.h>
32 # include <linux/kp30.h>
33 #endif
34
35 #include <linux/obd_class.h>
36 #include "ldlm_internal.h"
37
38 //struct lustre_lock ldlm_everything_lock;
39
40 /* lock types */
41 char *ldlm_lockname[] = {
42         [0] "--",
43         [LCK_EX] "EX",
44         [LCK_PW] "PW",
45         [LCK_PR] "PR",
46         [LCK_CW] "CW",
47         [LCK_CR] "CR",
48         [LCK_NL] "NL"
49 };
50 char *ldlm_typename[] = {
51         [LDLM_PLAIN] "PLN",
52         [LDLM_EXTENT] "EXT",
53         [LDLM_FLOCK] "FLK",
54 };
55
56 char *ldlm_it2str(int it)
57 {
58         switch (it) {
59         case IT_OPEN:
60                 return "open";
61         case IT_CREAT:
62                 return "creat";
63         case (IT_OPEN | IT_CREAT):
64                 return "open|creat";
65         case IT_READDIR:
66                 return "readdir";
67         case IT_GETATTR:
68                 return "getattr";
69         case IT_LOOKUP:
70                 return "lookup";
71         case IT_UNLINK:
72                 return "unlink";
73         case IT_GETXATTR:
74                 return "getxattr";
75         default:
76                 CERROR("Unknown intent %d\n", it);
77                 return "UNKNOWN";
78         }
79 }
80
81 extern kmem_cache_t *ldlm_lock_slab;
82 struct lustre_lock ldlm_handle_lock;
83
84 static ldlm_processing_policy ldlm_processing_policy_table[] = {
85         [LDLM_PLAIN] ldlm_process_plain_lock,
86         [LDLM_EXTENT] ldlm_process_extent_lock,
87 #ifdef __KERNEL__
88         [LDLM_FLOCK] ldlm_process_flock_lock,
89 #endif
90 };
91
92 ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res)
93 {
94         return ldlm_processing_policy_table[res->lr_type];
95 }
96
97 void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg)
98 {
99         ns->ns_policy = arg;
100 }
101
102 /*
103  * REFCOUNTED LOCK OBJECTS
104  */
105
106
107 /*
108  * Lock refcounts, during creation:
109  *   - one special one for allocation, dec'd only once in destroy
110  *   - one for being a lock that's in-use
111  *   - one for the addref associated with a new lock
112  */
113 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
114 {
115         atomic_inc(&lock->l_refc);
116         return lock;
117 }
118
119 void ldlm_lock_put(struct ldlm_lock *lock)
120 {
121         ENTRY;
122
123         if (atomic_dec_and_test(&lock->l_refc)) {
124                 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
125
126                 l_lock(&ns->ns_lock);
127                 LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing");
128                 LASSERT(lock->l_destroyed);
129                 LASSERT(list_empty(&lock->l_res_link));
130
131                 spin_lock(&ns->ns_counter_lock);
132                 ns->ns_locks--;
133                 spin_unlock(&ns->ns_counter_lock);
134
135                 ldlm_resource_putref(lock->l_resource);
136                 lock->l_resource = NULL;
137                 if (lock->l_export)
138                         class_export_put(lock->l_export);
139
140                 if (lock->l_parent)
141                         LDLM_LOCK_PUT(lock->l_parent);
142
143                 if (lock->l_lvb_data != NULL)
144                         OBD_FREE(lock->l_lvb_data, lock->l_lvb_len);
145
146                 OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
147                 l_unlock(&ns->ns_lock);
148         }
149
150         EXIT;
151 }
152
153 void ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
154 {
155         ENTRY;
156         l_lock(&lock->l_resource->lr_namespace->ns_lock);
157         if (!list_empty(&lock->l_lru)) {
158                 list_del_init(&lock->l_lru);
159                 lock->l_resource->lr_namespace->ns_nr_unused--;
160                 LASSERT(lock->l_resource->lr_namespace->ns_nr_unused >= 0);
161         }
162         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
163         EXIT;
164 }
165
166 /* This used to have a 'strict' flact, which recovery would use to mark an
167  * in-use lock as needing-to-die.  Lest I am ever tempted to put it back, I
168  * shall explain why it's gone: with the new hash table scheme, once you call
169  * ldlm_lock_destroy, you can never drop your final references on this lock.
170  * Because it's not in the hash table anymore.  -phil */
171 void ldlm_lock_destroy(struct ldlm_lock *lock)
172 {
173         ENTRY;
174         l_lock(&lock->l_resource->lr_namespace->ns_lock);
175
176         if (!list_empty(&lock->l_children)) {
177                 LDLM_ERROR(lock, "still has children (%p)!",
178                            lock->l_children.next);
179                 ldlm_lock_dump(D_ERROR, lock, 0);
180                 LBUG();
181         }
182         if (lock->l_readers || lock->l_writers) {
183                 LDLM_ERROR(lock, "lock still has references");
184                 ldlm_lock_dump(D_ERROR, lock, 0);
185                 LBUG();
186         }
187
188         if (!list_empty(&lock->l_res_link)) {
189                 ldlm_lock_dump(D_ERROR, lock, 0);
190                 LBUG();
191         }
192
193         if (lock->l_destroyed) {
194                 LASSERT(list_empty(&lock->l_lru));
195                 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
196                 EXIT;
197                 return;
198         }
199         lock->l_destroyed = 1;
200
201         list_del_init(&lock->l_export_chain);
202         ldlm_lock_remove_from_lru(lock);
203         class_handle_unhash(&lock->l_handle);
204
205 #if 0
206         /* Wake anyone waiting for this lock */
207         /* FIXME: I should probably add yet another flag, instead of using
208          * l_export to only call this on clients */
209         if (lock->l_export)
210                 class_export_put(lock->l_export);
211         lock->l_export = NULL;
212         if (lock->l_export && lock->l_completion_ast)
213                 lock->l_completion_ast(lock, 0);
214 #endif
215
216         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
217         LDLM_LOCK_PUT(lock);
218         EXIT;
219 }
220
221 /* this is called by portals_handle2object with the handle lock taken */
222 static void lock_handle_addref(void *lock)
223 {
224         LDLM_LOCK_GET((struct ldlm_lock *)lock);
225 }
226
227 /*
228  * usage: pass in a resource on which you have done ldlm_resource_get
229  *        pass in a parent lock on which you have done a ldlm_lock_get
230  *        after return, ldlm_*_put the resource and parent
231  * returns: lock with refcount 1
232  */
233 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
234                                        struct ldlm_resource *resource)
235 {
236         struct ldlm_lock *lock;
237         ENTRY;
238
239         if (resource == NULL)
240                 LBUG();
241
242         OBD_SLAB_ALLOC(lock, ldlm_lock_slab, SLAB_NOFS, sizeof(*lock));
243         if (lock == NULL)
244                 RETURN(NULL);
245
246         lock->l_resource = ldlm_resource_getref(resource);
247
248         atomic_set(&lock->l_refc, 2);
249         INIT_LIST_HEAD(&lock->l_children);
250         INIT_LIST_HEAD(&lock->l_res_link);
251         INIT_LIST_HEAD(&lock->l_lru);
252         INIT_LIST_HEAD(&lock->l_export_chain);
253         INIT_LIST_HEAD(&lock->l_pending_chain);
254         init_waitqueue_head(&lock->l_waitq);
255
256         spin_lock(&resource->lr_namespace->ns_counter_lock);
257         resource->lr_namespace->ns_locks++;
258         spin_unlock(&resource->lr_namespace->ns_counter_lock);
259
260         if (parent != NULL) {
261                 l_lock(&parent->l_resource->lr_namespace->ns_lock);
262                 lock->l_parent = LDLM_LOCK_GET(parent);
263                 list_add(&lock->l_childof, &parent->l_children);
264                 l_unlock(&parent->l_resource->lr_namespace->ns_lock);
265         }
266
267         INIT_LIST_HEAD(&lock->l_handle.h_link);
268         class_handle_hash(&lock->l_handle, lock_handle_addref);
269
270         RETURN(lock);
271 }
272
273 int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
274                               struct ldlm_res_id new_resid)
275 {
276         struct ldlm_resource *oldres = lock->l_resource;
277         ENTRY;
278
279         l_lock(&ns->ns_lock);
280         if (memcmp(&new_resid, &lock->l_resource->lr_name,
281                    sizeof(lock->l_resource->lr_name)) == 0) {
282                 /* Nothing to do */
283                 l_unlock(&ns->ns_lock);
284                 RETURN(0);
285         }
286
287         LASSERT(new_resid.name[0] != 0);
288
289         /* This function assumes that the lock isn't on any lists */
290         LASSERT(list_empty(&lock->l_res_link));
291
292         lock->l_resource = ldlm_resource_get(ns, NULL, new_resid,
293                                              lock->l_resource->lr_type, 1);
294         if (lock->l_resource == NULL) {
295                 LBUG();
296                 RETURN(-ENOMEM);
297         }
298
299         /* ...and the flowers are still standing! */
300         ldlm_resource_putref(oldres);
301
302         l_unlock(&ns->ns_lock);
303         RETURN(0);
304 }
305
306 /*
307  *  HANDLES
308  */
309
310 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
311 {
312         lockh->cookie = lock->l_handle.h_cookie;
313 }
314
315 /* if flags: atomically get the lock and set the flags.
316  *           Return NULL if flag already set
317  */
318
319 struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int flags)
320 {
321         struct ldlm_namespace *ns;
322         struct ldlm_lock *lock = NULL, *retval = NULL;
323         ENTRY;
324
325         LASSERT(handle);
326
327         lock = class_handle2object(handle->cookie);
328         if (lock == NULL)
329                 RETURN(NULL);
330
331         LASSERT(lock->l_resource != NULL);
332         ns = lock->l_resource->lr_namespace;
333         LASSERT(ns != NULL);
334
335         l_lock(&ns->ns_lock);
336
337         /* It's unlikely but possible that someone marked the lock as
338          * destroyed after we did handle2object on it */
339         if (lock->l_destroyed) {
340                 CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
341                 LDLM_LOCK_PUT(lock);
342                 GOTO(out, retval);
343         }
344
345         if (flags && (lock->l_flags & flags)) {
346                 LDLM_LOCK_PUT(lock);
347                 GOTO(out, retval);
348         }
349
350         if (flags)
351                 lock->l_flags |= flags;
352
353         retval = lock;
354         EXIT;
355  out:
356         l_unlock(&ns->ns_lock);
357         return retval;
358 }
359
360 struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *ns,
361                                       struct lustre_handle *handle)
362 {
363         struct ldlm_lock *retval = NULL;
364
365         l_lock(&ns->ns_lock);
366         retval = __ldlm_handle2lock(handle, 0);
367         l_unlock(&ns->ns_lock);
368
369         return retval;
370 }
371
372 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
373 {
374         ldlm_res2desc(lock->l_resource, &desc->l_resource);
375         desc->l_req_mode = lock->l_req_mode;
376         desc->l_granted_mode = lock->l_granted_mode;
377         memcpy(&desc->l_policy_data, &lock->l_policy_data,
378                sizeof(desc->l_policy_data));
379 }
380
381 void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
382                             void *data, int datalen)
383 {
384         struct ldlm_ast_work *w;
385         ENTRY;
386
387         l_lock(&lock->l_resource->lr_namespace->ns_lock);
388         if (new && (lock->l_flags & LDLM_FL_AST_SENT))
389                 GOTO(out, 0);
390
391         CDEBUG(D_OTHER, "lock %p incompatible; sending blocking AST.\n", lock);
392
393         OBD_ALLOC(w, sizeof(*w));
394         if (!w) {
395                 LBUG();
396                 GOTO(out, 0);
397         }
398
399         w->w_data = data;
400         w->w_datalen = datalen;
401         if (new) {
402                 LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
403                 lock->l_flags |= LDLM_FL_AST_SENT;
404                 /* If the enqueuing client said so, tell the AST recipient to
405                  * discard dirty data, rather than writing back. */
406                 if (new->l_flags & LDLM_AST_DISCARD_DATA)
407                         lock->l_flags |= LDLM_FL_DISCARD_DATA;
408                 w->w_blocking = 1;
409                 ldlm_lock2desc(new, &w->w_desc);
410         }
411
412         w->w_lock = LDLM_LOCK_GET(lock);
413         list_add(&w->w_list, lock->l_resource->lr_tmp);
414         EXIT;
415  out:
416         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
417 }
418
419 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
420 {
421         struct ldlm_lock *lock;
422
423         lock = ldlm_handle2lock(lockh);
424         ldlm_lock_addref_internal(lock, mode);
425         LDLM_LOCK_PUT(lock);
426 }
427
428 /* only called for local locks */
429 void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
430 {
431         l_lock(&lock->l_resource->lr_namespace->ns_lock);
432         ldlm_lock_remove_from_lru(lock);
433         if (mode & (LCK_NL | LCK_CR | LCK_PR))
434                 lock->l_readers++;
435         if (mode & (LCK_EX | LCK_CW | LCK_PW))
436                 lock->l_writers++;
437         lock->l_last_used = jiffies;
438         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
439         LDLM_LOCK_GET(lock);
440         LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
441 }
442
443 void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
444 {
445         struct ldlm_namespace *ns;
446         ENTRY;
447
448         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
449         ns = lock->l_resource->lr_namespace;
450         l_lock(&ns->ns_lock);
451         if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
452                 LASSERT(lock->l_readers > 0);
453                 lock->l_readers--;
454         }
455         if (mode & (LCK_EX | LCK_CW | LCK_PW)) {
456                 LASSERT(lock->l_writers > 0);
457                 lock->l_writers--;
458         }
459
460         if (lock->l_flags & LDLM_FL_LOCAL &&
461             !lock->l_readers && !lock->l_writers) {
462                 /* If this is a local lock on a server namespace and this was
463                  * the last reference, cancel the lock. */
464                 CDEBUG(D_INFO, "forcing cancel of local lock\n");
465                 lock->l_flags |= LDLM_FL_CBPENDING;
466         }
467
468         if (!lock->l_readers && !lock->l_writers &&
469             (lock->l_flags & LDLM_FL_CBPENDING)) {
470                 /* If we received a blocked AST and this was the last reference,
471                  * run the callback. */
472                 if (ns->ns_client == LDLM_NAMESPACE_SERVER && lock->l_export)
473                         CERROR("FL_CBPENDING set on non-local lock--just a "
474                                "warning\n");
475
476                 LDLM_DEBUG(lock, "final decref done on cbpending lock");
477
478                 LDLM_LOCK_GET(lock); /* dropped by bl thread */
479                 ldlm_lock_remove_from_lru(lock);
480 #ifdef __KERNEL__
481                 ldlm_bl_to_thread(ns, NULL, lock);
482                 l_unlock(&ns->ns_lock);
483 #else
484                 l_unlock(&ns->ns_lock);
485                 liblustre_ldlm_handle_bl_callback(ns, NULL, lock);
486 #endif
487         } else if (ns->ns_client == LDLM_NAMESPACE_CLIENT &&
488                    !lock->l_readers && !lock->l_writers) {
489                 /* If this is a client-side namespace and this was the last
490                  * reference, put it on the LRU. */
491                 LASSERT(list_empty(&lock->l_lru));
492                 LASSERT(ns->ns_nr_unused >= 0);
493                 list_add_tail(&lock->l_lru, &ns->ns_unused_list);
494                 ns->ns_nr_unused++;
495                 l_unlock(&ns->ns_lock);
496                 ldlm_cancel_lru(ns);
497         } else {
498                 l_unlock(&ns->ns_lock);
499         }
500
501         LDLM_LOCK_PUT(lock);    /* matches the ldlm_lock_get in addref */
502
503         EXIT;
504 }
505
506 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
507 {
508         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
509         LASSERT(lock != NULL);
510         ldlm_lock_decref_internal(lock, mode);
511         LDLM_LOCK_PUT(lock);
512 }
513
514 /* This will drop a lock reference and mark it for destruction, but will not
515  * necessarily cancel the lock before returning. */
516 void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
517 {
518         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
519         ENTRY;
520
521         LASSERT(lock != NULL);
522
523         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
524         l_lock(&lock->l_resource->lr_namespace->ns_lock);
525         lock->l_flags |= LDLM_FL_CBPENDING;
526         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
527         ldlm_lock_decref_internal(lock, mode);
528         LDLM_LOCK_PUT(lock);
529 }
530
531 /* NOTE: called by
532  *  - ldlm_lock_enqueue
533  *  - ldlm_reprocess_queue
534  *  - ldlm_lock_convert
535  */
536 void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen,
537                      int run_ast)
538 {
539         struct ldlm_resource *res = lock->l_resource;
540         ENTRY;
541
542         l_lock(&lock->l_resource->lr_namespace->ns_lock);
543         lock->l_granted_mode = lock->l_req_mode;
544         ldlm_resource_add_lock(res, &res->lr_granted, lock);
545
546         if (lock->l_granted_mode < res->lr_most_restr)
547                 res->lr_most_restr = lock->l_granted_mode;
548
549         if (run_ast && lock->l_completion_ast != NULL)
550                 ldlm_add_ast_work_item(lock, NULL, data, datalen);
551
552         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
553         EXIT;
554 }
555
556 /* returns a referenced lock or NULL.  See the flag descriptions below, in the
557  * comment above ldlm_lock_match */
558 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
559                                       ldlm_policy_data_t *policy,
560                                       struct ldlm_lock *old_lock, int flags)
561 {
562         struct ldlm_lock *lock;
563         struct list_head *tmp;
564
565         list_for_each(tmp, queue) {
566                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
567
568                 if (lock == old_lock)
569                         break;
570
571                 /* llite sometimes wants to match locks that will be
572                  * canceled when their users drop, but we allow it to match
573                  * if it passes in CBPENDING and the lock still has users.
574                  * this is generally only going to be used by children 
575                  * whose parents already hold a lock so forward progress
576                  * can still happen. */
577                 if (lock->l_flags & LDLM_FL_CBPENDING &&
578                     !(flags & LDLM_FL_CBPENDING))
579                         continue;
580                 if (lock->l_flags & LDLM_FL_CBPENDING &&
581                     lock->l_readers == 0 && lock->l_writers == 0)
582                         continue;
583
584                 if (!(lock->l_req_mode & mode))
585                         continue;
586
587                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
588                     (lock->l_policy_data.l_extent.start >
589                      policy->l_extent.start ||
590                      lock->l_policy_data.l_extent.end < policy->l_extent.end))
591                         continue;
592
593                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
594                     mode == LCK_CW &&
595                     lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
596                         continue;
597
598                 if (lock->l_destroyed)
599                         continue;
600
601                 if ((flags & LDLM_FL_LOCAL_ONLY) &&
602                     !(lock->l_flags & LDLM_FL_LOCAL))
603                         continue;
604
605                 if (flags & LDLM_FL_TEST_LOCK)
606                         LDLM_LOCK_GET(lock);
607                 else
608                         ldlm_lock_addref_internal(lock, mode);
609                 return lock;
610         }
611
612         return NULL;
613 }
614
615 void ldlm_lock_allow_match(struct ldlm_lock *lock)
616 {
617         l_lock(&lock->l_resource->lr_namespace->ns_lock);
618         lock->l_flags |= LDLM_FL_CAN_MATCH;
619         wake_up(&lock->l_waitq);
620         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
621 }
622
623 /* Can be called in two ways:
624  *
625  * If 'ns' is NULL, then lockh describes an existing lock that we want to look
626  * for a duplicate of.
627  *
628  * Otherwise, all of the fields must be filled in, to match against.
629  *
630  * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the
631  *     server (ie, connh is NULL)
632  * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted
633  *     list will be considered
634  * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked
635  *     to be canceled can still be matched as long as they still have reader
636  *     or writer refernces
637  * If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock,
638  *     just tell us if we would have matched.
639  *
640  * Returns 1 if it finds an already-existing lock that is compatible; in this
641  * case, lockh is filled in with a addref()ed lock
642  */
643 int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
644                     struct ldlm_res_id *res_id, __u32 type,
645                     ldlm_policy_data_t *policy, ldlm_mode_t mode,
646                     struct lustre_handle *lockh)
647 {
648         struct ldlm_resource *res;
649         struct ldlm_lock *lock, *old_lock = NULL;
650         int rc = 0;
651         ENTRY;
652
653         if (ns == NULL) {
654                 old_lock = ldlm_handle2lock(lockh);
655                 LASSERT(old_lock);
656
657                 ns = old_lock->l_resource->lr_namespace;
658                 res_id = &old_lock->l_resource->lr_name;
659                 type = old_lock->l_resource->lr_type;
660                 mode = old_lock->l_req_mode;
661         }
662
663         res = ldlm_resource_get(ns, NULL, *res_id, type, 0);
664         if (res == NULL) {
665                 LASSERT(old_lock == NULL);
666                 RETURN(0);
667         }
668
669         l_lock(&ns->ns_lock);
670
671         lock = search_queue(&res->lr_granted, mode, policy, old_lock, flags);
672         if (lock != NULL)
673                 GOTO(out, rc = 1);
674         if (flags & LDLM_FL_BLOCK_GRANTED)
675                 GOTO(out, rc = 0);
676         lock = search_queue(&res->lr_converting, mode, policy, old_lock, flags);
677         if (lock != NULL)
678                 GOTO(out, rc = 1);
679         lock = search_queue(&res->lr_waiting, mode, policy, old_lock, flags);
680         if (lock != NULL)
681                 GOTO(out, rc = 1);
682
683         EXIT;
684  out:
685         ldlm_resource_putref(res);
686         l_unlock(&ns->ns_lock);
687
688         if (lock) {
689                 ldlm_lock2handle(lock, lockh);
690                 if (!(lock->l_flags & LDLM_FL_CAN_MATCH)) {
691                         struct l_wait_info lwi;
692                         if (lock->l_completion_ast)
693                                 lock->l_completion_ast(lock,
694                                                        LDLM_FL_WAIT_NOREPROC,
695                                                        NULL);
696
697                         lwi = LWI_TIMEOUT_INTR(obd_timeout*HZ, NULL,NULL,NULL);
698
699                         /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */
700                         l_wait_event(lock->l_waitq,
701                                      (lock->l_flags & LDLM_FL_CAN_MATCH), &lwi);
702                 }
703         }
704         if (rc)
705                 LDLM_DEBUG(lock, "matched ("LPU64" "LPU64")",
706                            type == LDLM_PLAIN ? res_id->name[2] :
707                                 policy->l_extent.start,
708                            type == LDLM_PLAIN ? res_id->name[3] :
709                                 policy->l_extent.end);
710         else if (!(flags & LDLM_FL_TEST_LOCK)) /* less verbose for test-only */
711                 LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
712                                   LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
713                                   type, mode, res_id->name[0], res_id->name[1],
714                                   type == LDLM_PLAIN ? res_id->name[2] :
715                                         policy->l_extent.start,
716                                   type == LDLM_PLAIN ? res_id->name[3] :
717                                         policy->l_extent.end);
718
719         if (old_lock)
720                 LDLM_LOCK_PUT(old_lock);
721         if (flags & LDLM_FL_TEST_LOCK && rc)
722                 LDLM_LOCK_PUT(lock);
723
724         return rc;
725 }
726
727 /* Returns a referenced lock */
728 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
729                                    struct lustre_handle *parent_lock_handle,
730                                    struct ldlm_res_id res_id, __u32 type,
731                                    ldlm_mode_t mode,
732                                    ldlm_blocking_callback blocking,
733                                    ldlm_completion_callback completion,
734                                    ldlm_glimpse_callback glimpse,
735                                    void *data, __u32 lvb_len)
736 {
737         struct ldlm_resource *res, *parent_res = NULL;
738         struct ldlm_lock *lock, *parent_lock = NULL;
739         ENTRY;
740
741         if (parent_lock_handle) {
742                 parent_lock = ldlm_handle2lock(parent_lock_handle);
743                 if (parent_lock)
744                         parent_res = parent_lock->l_resource;
745         }
746
747         res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
748         if (res == NULL)
749                 RETURN(NULL);
750
751         lock = ldlm_lock_new(parent_lock, res);
752         ldlm_resource_putref(res);
753         if (parent_lock != NULL)
754                 LDLM_LOCK_PUT(parent_lock);
755
756         if (lock == NULL)
757                 RETURN(NULL);
758
759         lock->l_req_mode = mode;
760         lock->l_ast_data = data;
761         lock->l_blocking_ast = blocking;
762         lock->l_completion_ast = completion;
763         lock->l_glimpse_ast = glimpse;
764
765         if (lvb_len) {
766                 lock->l_lvb_len = lvb_len;
767                 OBD_ALLOC(lock->l_lvb_data, lvb_len);
768                 if (lock->l_lvb_data == NULL) {
769                         OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
770                         RETURN(NULL);
771                 }
772         }
773
774         RETURN(lock);
775 }
776
777 ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
778                                struct ldlm_lock **lockp,
779                                void *cookie, int *flags)
780 {
781         struct ldlm_lock *lock = *lockp;
782         struct ldlm_resource *res = lock->l_resource;
783         int local = res->lr_namespace->ns_client;
784         ldlm_processing_policy policy;
785         ldlm_error_t rc = ELDLM_OK;
786         ENTRY;
787
788         /* policies are not executed on the client or during replay */
789         if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
790             && !local && ns->ns_policy) {
791                 rc = ns->ns_policy(ns, lockp, cookie, lock->l_req_mode, *flags,
792                                    NULL);
793                 if (rc == ELDLM_LOCK_REPLACED) {
794                         /* The lock that was returned has already been granted,
795                          * and placed into lockp.  If it's not the same as the
796                          * one we passed in, then destroy the old one and our
797                          * work here is done. */
798                         if (lock != *lockp) {
799                                 ldlm_lock_destroy(lock);
800                                 LDLM_LOCK_PUT(lock);
801                         }
802                         *flags |= LDLM_FL_LOCK_CHANGED;
803                         RETURN(0);
804                 } else if (rc == ELDLM_LOCK_ABORTED ||
805                            (rc == 0 && (*flags & LDLM_FL_INTENT_ONLY))) {
806                         ldlm_lock_destroy(lock);
807                         RETURN(rc);
808                 }
809                 LASSERT(rc == ELDLM_OK);
810         }
811
812         l_lock(&ns->ns_lock);
813         if (local && lock->l_req_mode == lock->l_granted_mode) {
814                 /* The server returned a blocked lock, but it was granted before
815                  * we got a chance to actually enqueue it.  We don't need to do
816                  * anything else. */
817                 *flags &= ~(LDLM_FL_BLOCK_GRANTED |
818                             LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
819                 GOTO(out, ELDLM_OK);
820         }
821
822         /* Some flags from the enqueue want to make it into the AST, via the
823          * lock's l_flags. */
824         lock->l_flags |= (*flags & LDLM_AST_DISCARD_DATA);
825
826         /* This distinction between local lock trees is very important; a client
827          * namespace only has information about locks taken by that client, and
828          * thus doesn't have enough information to decide for itself if it can
829          * be granted (below).  In this case, we do exactly what the server
830          * tells us to do, as dictated by the 'flags'.
831          *
832          * We do exactly the same thing during recovery, when the server is
833          * more or less trusting the clients not to lie.
834          *
835          * FIXME (bug 268): Detect obvious lies by checking compatibility in
836          * granted/converting queues. */
837         ldlm_resource_unlink_lock(lock);
838         if (local) {
839                 if (*flags & LDLM_FL_BLOCK_CONV)
840                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
841                 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
842                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
843                 else
844                         ldlm_grant_lock(lock, NULL, 0, 0);
845                 GOTO(out, ELDLM_OK);
846         } else if (*flags & LDLM_FL_REPLAY) {
847                 if (*flags & LDLM_FL_BLOCK_CONV) {
848                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
849                         GOTO(out, ELDLM_OK);
850                 } else if (*flags & LDLM_FL_BLOCK_WAIT) {
851                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
852                         GOTO(out, ELDLM_OK);
853                 } else if (*flags & LDLM_FL_BLOCK_GRANTED) {
854                         ldlm_grant_lock(lock, NULL, 0, 0);
855                         GOTO(out, ELDLM_OK);
856                 }
857                 /* If no flags, fall through to normal enqueue path. */
858         }
859
860         policy = ldlm_processing_policy_table[res->lr_type];
861         policy(lock, flags, 1, &rc);
862         EXIT;
863 out:
864         l_unlock(&ns->ns_lock);
865         return rc;
866 }
867
868 /* Must be called with namespace taken: queue is waiting or converting. */
869 int ldlm_reprocess_queue(struct ldlm_resource *res, struct list_head *queue)
870 {
871         struct list_head *tmp, *pos;
872         ldlm_processing_policy policy;
873         int flags;
874         int rc = LDLM_ITER_CONTINUE;
875         ldlm_error_t err;
876         ENTRY;
877
878         policy = ldlm_processing_policy_table[res->lr_type];
879         LASSERT(policy);
880
881         list_for_each_safe(tmp, pos, queue) {
882                 struct ldlm_lock *pending;
883                 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
884
885                 CDEBUG(D_INFO, "Reprocessing lock %p\n", pending);
886
887                 flags = 0;
888                 rc = policy(pending, &flags, 0, &err);
889                 if (rc != LDLM_ITER_CONTINUE)
890                         break;
891         }
892
893         RETURN(rc);
894 }
895
896 int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list)
897 {
898         struct list_head *tmp, *pos;
899         int rc, retval = 0;
900         ENTRY;
901
902         l_check_no_ns_lock(ns);
903
904         list_for_each_safe(tmp, pos, rpc_list) {
905                 struct ldlm_ast_work *w =
906                         list_entry(tmp, struct ldlm_ast_work, w_list);
907
908                 /* It's possible to receive a completion AST before we've set
909                  * the l_completion_ast pointer: either because the AST arrived
910                  * before the reply, or simply because there's a small race
911                  * window between receiving the reply and finishing the local
912                  * enqueue. (bug 842)
913                  *
914                  * This can't happen with the blocking_ast, however, because we
915                  * will never call the local blocking_ast until we drop our
916                  * reader/writer reference, which we won't do until we get the
917                  * reply and finish enqueueing. */
918                 LASSERT(w->w_lock != NULL);
919                 if (w->w_blocking) {
920                         LASSERT(w->w_lock->l_blocking_ast != NULL);
921                         rc = w->w_lock->l_blocking_ast
922                                 (w->w_lock, &w->w_desc, w->w_data,
923                                  LDLM_CB_BLOCKING);
924                 } else if (w->w_lock->l_completion_ast != NULL) {
925                         LASSERT(w->w_lock->l_completion_ast != NULL);
926                         rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags,
927                                                          w->w_data);
928                 } else {
929                         rc = 0;
930                 }
931                 if (rc == -ERESTART)
932                         retval = rc;
933                 else if (rc)
934                         CDEBUG(D_DLMTRACE, "Failed AST - should clean & "
935                                "disconnect client\n");
936                 LDLM_LOCK_PUT(w->w_lock);
937                 list_del(&w->w_list);
938                 OBD_FREE(w, sizeof(*w));
939         }
940         RETURN(retval);
941 }
942
943 static int reprocess_one_queue(struct ldlm_resource *res, void *closure)
944 {
945         ldlm_reprocess_all(res);
946         return LDLM_ITER_CONTINUE;
947 }
948
949 void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
950 {
951         int i, rc;
952
953         l_lock(&ns->ns_lock);
954         for (i = 0; i < RES_HASH_SIZE; i++) {
955                 struct list_head *tmp, *next;
956                 list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
957                         struct ldlm_resource *res =
958                                 list_entry(tmp, struct ldlm_resource, lr_hash);
959
960                         ldlm_resource_getref(res);
961                         l_unlock(&ns->ns_lock);
962                         rc = reprocess_one_queue(res, NULL);
963                         l_lock(&ns->ns_lock);
964                         next = tmp->next;
965                         ldlm_resource_putref(res);
966                         if (rc == LDLM_ITER_STOP)
967                                 GOTO(out, rc);
968                 }
969         }
970  out:
971         l_unlock(&ns->ns_lock);
972         EXIT;
973 }
974
975 void ldlm_reprocess_all(struct ldlm_resource *res)
976 {
977         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
978         int rc;
979         ENTRY;
980
981         /* Local lock trees don't get reprocessed. */
982         if (res->lr_namespace->ns_client) {
983                 EXIT;
984                 return;
985         }
986
987  restart:
988         l_lock(&res->lr_namespace->ns_lock);
989         res->lr_tmp = &rpc_list;
990
991         rc = ldlm_reprocess_queue(res, &res->lr_converting);
992         if (rc == LDLM_ITER_CONTINUE)
993                 ldlm_reprocess_queue(res, &res->lr_waiting);
994
995         res->lr_tmp = NULL;
996         l_unlock(&res->lr_namespace->ns_lock);
997
998         rc = ldlm_run_ast_work(res->lr_namespace, &rpc_list);
999         if (rc == -ERESTART) {
1000                 LASSERT(list_empty(&rpc_list));
1001                 goto restart;
1002         }
1003         EXIT;
1004 }
1005
1006 void ldlm_cancel_callback(struct ldlm_lock *lock)
1007 {
1008         l_lock(&lock->l_resource->lr_namespace->ns_lock);
1009         if (!(lock->l_flags & LDLM_FL_CANCEL)) {
1010                 lock->l_flags |= LDLM_FL_CANCEL;
1011                 if (lock->l_blocking_ast) {
1012                         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
1013                         // l_check_no_ns_lock(lock->l_resource->lr_namespace);
1014                         lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
1015                                              LDLM_CB_CANCELING);
1016                         return;
1017                 } else {
1018                         LDLM_DEBUG(lock, "no blocking ast");
1019                 }
1020         }
1021         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
1022 }
1023
1024 void ldlm_lock_cancel(struct ldlm_lock *lock)
1025 {
1026         struct ldlm_resource *res;
1027         struct ldlm_namespace *ns;
1028         ENTRY;
1029
1030         /* There's no race between calling this and taking the ns lock below;
1031          * a lock can only be put on the waiting list once, because it can only
1032          * issue a blocking AST once. */
1033         ldlm_del_waiting_lock(lock);
1034
1035         res = lock->l_resource;
1036         ns = res->lr_namespace;
1037
1038         l_lock(&ns->ns_lock);
1039         /* Please do not, no matter how tempting, remove this LBUG without
1040          * talking to me first. -phik */
1041         if (lock->l_readers || lock->l_writers) {
1042                 LDLM_DEBUG(lock, "lock still has references");
1043                 ldlm_lock_dump(D_OTHER, lock, 0);
1044                 LBUG();
1045         }
1046
1047         ldlm_cancel_callback(lock);
1048
1049         ldlm_resource_unlink_lock(lock);
1050         ldlm_lock_destroy(lock);
1051         l_unlock(&ns->ns_lock);
1052         EXIT;
1053 }
1054
1055 int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
1056 {
1057         struct ldlm_lock *lock = ldlm_handle2lock(lockh);
1058         ENTRY;
1059
1060         if (lock == NULL)
1061                 RETURN(-EINVAL);
1062
1063         lock->l_ast_data = data;
1064         LDLM_LOCK_PUT(lock);
1065         RETURN(0);
1066 }
1067
1068 void ldlm_cancel_locks_for_export(struct obd_export *exp)
1069 {
1070         struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
1071         struct ldlm_lock *lock;
1072         struct ldlm_resource *res;
1073
1074         l_lock(&ns->ns_lock);
1075         while(!list_empty(&exp->exp_ldlm_data.led_held_locks)) { 
1076                 lock = list_entry(exp->exp_ldlm_data.led_held_locks.next,
1077                                   struct ldlm_lock, l_export_chain);
1078                 res = ldlm_resource_getref(lock->l_resource);
1079                 LDLM_DEBUG(lock, "export %p", exp);
1080                 ldlm_lock_cancel(lock);
1081                 l_unlock(&ns->ns_lock);
1082                 ldlm_reprocess_all(res);
1083                 ldlm_resource_putref(res);
1084                 l_lock(&ns->ns_lock);
1085         }
1086         l_unlock(&ns->ns_lock);
1087 }
1088
1089 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
1090                                         int *flags)
1091 {
1092         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
1093         struct ldlm_resource *res;
1094         struct ldlm_namespace *ns;
1095         int granted = 0;
1096         ENTRY;
1097
1098         LBUG();
1099
1100         res = lock->l_resource;
1101         ns = res->lr_namespace;
1102
1103         l_lock(&ns->ns_lock);
1104
1105         lock->l_req_mode = new_mode;
1106         ldlm_resource_unlink_lock(lock);
1107
1108         /* If this is a local resource, put it on the appropriate list. */
1109         if (res->lr_namespace->ns_client) {
1110                 if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED)) {
1111                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
1112                 } else {
1113                         /* This should never happen, because of the way the
1114                          * server handles conversions. */
1115                         LBUG();
1116
1117                         res->lr_tmp = &rpc_list;
1118                         ldlm_grant_lock(lock, NULL, 0, 0);
1119                         res->lr_tmp = NULL;
1120                         granted = 1;
1121                         /* FIXME: completion handling not with ns_lock held ! */
1122                         if (lock->l_completion_ast)
1123                                 lock->l_completion_ast(lock, 0, NULL);
1124                 }
1125         } else {
1126                 /* FIXME: We should try the conversion right away and possibly
1127                  * return success without the need for an extra AST */
1128                 ldlm_resource_add_lock(res, &res->lr_converting, lock);
1129                 *flags |= LDLM_FL_BLOCK_CONV;
1130         }
1131
1132         l_unlock(&ns->ns_lock);
1133
1134         if (granted)
1135                 ldlm_run_ast_work(ns, &rpc_list);
1136         RETURN(res);
1137 }
1138
1139 void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos)
1140 {
1141         char str[PTL_NALFMT_SIZE];
1142         struct obd_device *obd = NULL;
1143
1144         if (!((portal_debug | D_ERROR) & level))
1145                 return;
1146
1147         if (!lock) {
1148                 CDEBUG(level, "  NULL LDLM lock\n");
1149                 return;
1150         }
1151
1152         CDEBUG(level, "  -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d)\n",
1153                lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
1154                pos);
1155         if (lock->l_conn_export != NULL)
1156                 obd = lock->l_conn_export->exp_obd;
1157         if (lock->l_export && lock->l_export->exp_connection) {
1158                 CDEBUG(level, "  Node: NID "LPX64" (%s) on %s (rhandle: "LPX64")\n",
1159                        lock->l_export->exp_connection->c_peer.peer_nid,
1160                        portals_nid2str(lock->l_export->exp_connection->c_peer.peer_ni->pni_number,
1161                                        lock->l_export->exp_connection->c_peer.peer_nid, str),
1162                        lock->l_export->exp_connection->c_peer.peer_ni->pni_name,
1163                        lock->l_remote_handle.cookie);
1164         } else if (obd == NULL) {
1165                 CDEBUG(level, "  Node: local\n");
1166         } else {
1167                 struct obd_import *imp = obd->u.cli.cl_import;
1168                 CDEBUG(level, "  Node: NID "LPX64" (%s) on %s (rhandle: "LPX64")\n",
1169                        imp->imp_connection->c_peer.peer_nid,
1170                        portals_nid2str(imp->imp_connection->c_peer.peer_ni->pni_number,
1171                                        imp->imp_connection->c_peer.peer_nid, str),
1172                        imp->imp_connection->c_peer.peer_ni->pni_name,
1173                        lock->l_remote_handle.cookie);
1174         }
1175         CDEBUG(level, "  Resource: %p ("LPU64"/"LPU64")\n", lock->l_resource,
1176                lock->l_resource->lr_name.name[0],
1177                lock->l_resource->lr_name.name[1]);
1178         CDEBUG(level, "  Req mode: %d, grant mode: %d, rc: %u, read: %d, "
1179                "write: %d\n", (int)lock->l_req_mode, (int)lock->l_granted_mode,
1180                atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers);
1181         if (lock->l_resource->lr_type == LDLM_EXTENT)
1182                 CDEBUG(level, "  Extent: "LPU64" -> "LPU64
1183                        " (req "LPU64"-"LPU64")\n",
1184                        lock->l_policy_data.l_extent.start,
1185                        lock->l_policy_data.l_extent.end,
1186                        lock->l_req_extent.start, lock->l_req_extent.end);
1187         else if (lock->l_resource->lr_type == LDLM_FLOCK)
1188                 CDEBUG(level, "  Pid: "LPU64" Extent: "LPU64" -> "LPU64"\n",
1189                        lock->l_policy_data.l_flock.pid,
1190                        lock->l_policy_data.l_flock.start,
1191                        lock->l_policy_data.l_flock.end);
1192 }
1193
1194 void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
1195 {
1196         struct ldlm_lock *lock;
1197
1198         lock = ldlm_handle2lock(lockh);
1199         if (lock == NULL)
1200                 return;
1201
1202         ldlm_lock_dump(D_OTHER, lock, 0);
1203
1204         LDLM_LOCK_PUT(lock);
1205 }