Whamcloud - gitweb
smash the HEAD with the contents of b_cmd. HEAD_PRE_CMD_SMASH and
[fs/lustre-release.git] / lustre / ldlm / ldlm_lock.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
5  *   Author: Peter Braam <braam@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_LDLM
25
26 #ifdef __KERNEL__
27 # include <linux/slab.h>
28 # include <linux/module.h>
29 # include <linux/lustre_dlm.h>
30 #else
31 # include <liblustre.h>
32 # include <linux/kp30.h>
33 #endif
34
35 #include <linux/obd_class.h>
36 #include "ldlm_internal.h"
37
38 //struct lustre_lock ldlm_everything_lock;
39
40 /* lock types */
41 char *ldlm_lockname[] = {
42         [0] "--",
43         [LCK_EX] "EX",
44         [LCK_PW] "PW",
45         [LCK_PR] "PR",
46         [LCK_CW] "CW",
47         [LCK_CR] "CR",
48         [LCK_NL] "NL"
49 };
50 char *ldlm_typename[] = {
51         [LDLM_PLAIN] "PLN",
52         [LDLM_EXTENT] "EXT",
53         [LDLM_FLOCK] "FLK",
54         [LDLM_IBITS] "IBT",
55 };
56
57 char *ldlm_it2str(int it)
58 {
59         switch (it) {
60         case IT_OPEN:
61                 return "open";
62         case IT_CREAT:
63                 return "creat";
64         case (IT_OPEN | IT_CREAT):
65                 return "open|creat";
66         case IT_READDIR:
67                 return "readdir";
68         case IT_GETATTR:
69                 return "getattr";
70         case IT_LOOKUP:
71                 return "lookup";
72         case IT_UNLINK:
73                 return "unlink";
74         case IT_GETXATTR:
75                 return "getxattr";
76         default:
77                 CERROR("Unknown intent %d\n", it);
78                 return "UNKNOWN";
79         }
80 }
81
82 extern kmem_cache_t *ldlm_lock_slab;
83 struct lustre_lock ldlm_handle_lock;
84
85 static ldlm_processing_policy ldlm_processing_policy_table[] = {
86         [LDLM_PLAIN] ldlm_process_plain_lock,
87         [LDLM_EXTENT] ldlm_process_extent_lock,
88 #ifdef __KERNEL__
89         [LDLM_FLOCK] ldlm_process_flock_lock,
90 #endif
91         [LDLM_IBITS] ldlm_process_inodebits_lock,
92 };
93
94 ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res)
95 {
96         return ldlm_processing_policy_table[res->lr_type];
97 }
98
99 void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg)
100 {
101         ns->ns_policy = arg;
102 }
103
104 /*
105  * REFCOUNTED LOCK OBJECTS
106  */
107
108
109 /*
110  * Lock refcounts, during creation:
111  *   - one special one for allocation, dec'd only once in destroy
112  *   - one for being a lock that's in-use
113  *   - one for the addref associated with a new lock
114  */
115 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
116 {
117         atomic_inc(&lock->l_refc);
118         return lock;
119 }
120
121 void ldlm_lock_put(struct ldlm_lock *lock)
122 {
123         ENTRY;
124
125         if (atomic_dec_and_test(&lock->l_refc)) {
126                 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
127
128                 l_lock(&ns->ns_lock);
129                 LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing");
130                 LASSERT(lock->l_destroyed);
131                 LASSERT(list_empty(&lock->l_res_link));
132
133                 spin_lock(&ns->ns_counter_lock);
134                 ns->ns_locks--;
135                 spin_unlock(&ns->ns_counter_lock);
136
137                 ldlm_resource_putref(lock->l_resource);
138                 lock->l_resource = NULL;
139                 if (lock->l_export)
140                         class_export_put(lock->l_export);
141
142                 if (lock->l_parent)
143                         LDLM_LOCK_PUT(lock->l_parent);
144
145                 if (lock->l_lvb_data != NULL)
146                         OBD_FREE(lock->l_lvb_data, lock->l_lvb_len);
147
148                 OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
149                 l_unlock(&ns->ns_lock);
150         }
151
152         EXIT;
153 }
154
155 void ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
156 {
157         ENTRY;
158         l_lock(&lock->l_resource->lr_namespace->ns_lock);
159         if (!list_empty(&lock->l_lru)) {
160                 list_del_init(&lock->l_lru);
161                 lock->l_resource->lr_namespace->ns_nr_unused--;
162                 LASSERT(lock->l_resource->lr_namespace->ns_nr_unused >= 0);
163         }
164         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
165         EXIT;
166 }
167
168 /* This used to have a 'strict' flact, which recovery would use to mark an
169  * in-use lock as needing-to-die.  Lest I am ever tempted to put it back, I
170  * shall explain why it's gone: with the new hash table scheme, once you call
171  * ldlm_lock_destroy, you can never drop your final references on this lock.
172  * Because it's not in the hash table anymore.  -phil */
173 void ldlm_lock_destroy(struct ldlm_lock *lock)
174 {
175         ENTRY;
176         l_lock(&lock->l_resource->lr_namespace->ns_lock);
177
178         if (!list_empty(&lock->l_children)) {
179                 LDLM_ERROR(lock, "still has children (%p)!",
180                            lock->l_children.next);
181                 ldlm_lock_dump(D_ERROR, lock, 0);
182                 LBUG();
183         }
184         if (lock->l_readers || lock->l_writers) {
185                 LDLM_ERROR(lock, "lock still has references");
186                 ldlm_lock_dump(D_ERROR, lock, 0);
187                 LBUG();
188         }
189
190         if (!list_empty(&lock->l_res_link)) {
191                 ldlm_lock_dump(D_ERROR, lock, 0);
192                 LBUG();
193         }
194
195         if (lock->l_destroyed) {
196                 LASSERT(list_empty(&lock->l_lru));
197                 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
198                 EXIT;
199                 return;
200         }
201         lock->l_destroyed = 1;
202
203         list_del_init(&lock->l_export_chain);
204         ldlm_lock_remove_from_lru(lock);
205         class_handle_unhash(&lock->l_handle);
206
207 #if 0
208         /* Wake anyone waiting for this lock */
209         /* FIXME: I should probably add yet another flag, instead of using
210          * l_export to only call this on clients */
211         if (lock->l_export)
212                 class_export_put(lock->l_export);
213         lock->l_export = NULL;
214         if (lock->l_export && lock->l_completion_ast)
215                 lock->l_completion_ast(lock, 0);
216 #endif
217
218         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
219         LDLM_LOCK_PUT(lock);
220         EXIT;
221 }
222
223 /* this is called by portals_handle2object with the handle lock taken */
224 static void lock_handle_addref(void *lock)
225 {
226         LDLM_LOCK_GET((struct ldlm_lock *)lock);
227 }
228
229 /*
230  * usage: pass in a resource on which you have done ldlm_resource_get
231  *        pass in a parent lock on which you have done a ldlm_lock_get
232  *        after return, ldlm_*_put the resource and parent
233  * returns: lock with refcount 1
234  */
235 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
236                                        struct ldlm_resource *resource)
237 {
238         struct ldlm_lock *lock;
239         ENTRY;
240
241         if (resource == NULL)
242                 LBUG();
243
244         OBD_SLAB_ALLOC(lock, ldlm_lock_slab, SLAB_NOFS, sizeof(*lock));
245         if (lock == NULL)
246                 RETURN(NULL);
247
248         lock->l_resource = ldlm_resource_getref(resource);
249
250         atomic_set(&lock->l_refc, 2);
251         INIT_LIST_HEAD(&lock->l_children);
252         INIT_LIST_HEAD(&lock->l_res_link);
253         INIT_LIST_HEAD(&lock->l_lru);
254         INIT_LIST_HEAD(&lock->l_export_chain);
255         INIT_LIST_HEAD(&lock->l_pending_chain);
256         init_waitqueue_head(&lock->l_waitq);
257
258         spin_lock(&resource->lr_namespace->ns_counter_lock);
259         resource->lr_namespace->ns_locks++;
260         spin_unlock(&resource->lr_namespace->ns_counter_lock);
261
262         if (parent != NULL) {
263                 l_lock(&parent->l_resource->lr_namespace->ns_lock);
264                 lock->l_parent = LDLM_LOCK_GET(parent);
265                 list_add(&lock->l_childof, &parent->l_children);
266                 l_unlock(&parent->l_resource->lr_namespace->ns_lock);
267         }
268
269         INIT_LIST_HEAD(&lock->l_handle.h_link);
270         class_handle_hash(&lock->l_handle, lock_handle_addref);
271
272         RETURN(lock);
273 }
274
275 int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
276                               struct ldlm_res_id new_resid)
277 {
278         struct ldlm_resource *oldres = lock->l_resource;
279         ENTRY;
280
281         l_lock(&ns->ns_lock);
282         if (memcmp(&new_resid, &lock->l_resource->lr_name,
283                    sizeof(lock->l_resource->lr_name)) == 0) {
284                 /* Nothing to do */
285                 l_unlock(&ns->ns_lock);
286                 RETURN(0);
287         }
288
289         LASSERT(new_resid.name[0] != 0);
290
291         /* This function assumes that the lock isn't on any lists */
292         LASSERT(list_empty(&lock->l_res_link));
293
294         lock->l_resource = ldlm_resource_get(ns, NULL, new_resid,
295                                              lock->l_resource->lr_type, 1);
296         if (lock->l_resource == NULL) {
297                 LBUG();
298                 RETURN(-ENOMEM);
299         }
300
301         /* ...and the flowers are still standing! */
302         ldlm_resource_putref(oldres);
303
304         l_unlock(&ns->ns_lock);
305         RETURN(0);
306 }
307
308 /*
309  *  HANDLES
310  */
311
312 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
313 {
314         lockh->cookie = lock->l_handle.h_cookie;
315 }
316
317 /* if flags: atomically get the lock and set the flags.
318  *           Return NULL if flag already set
319  */
320
321 struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int flags)
322 {
323         struct ldlm_namespace *ns;
324         struct ldlm_lock *lock = NULL, *retval = NULL;
325         ENTRY;
326
327         LASSERT(handle);
328
329         lock = class_handle2object(handle->cookie);
330         if (lock == NULL)
331                 RETURN(NULL);
332
333         LASSERT(lock->l_resource != NULL);
334         ns = lock->l_resource->lr_namespace;
335         LASSERT(ns != NULL);
336
337         l_lock(&ns->ns_lock);
338
339         /* It's unlikely but possible that someone marked the lock as
340          * destroyed after we did handle2object on it */
341         if (lock->l_destroyed) {
342                 CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
343                 LDLM_LOCK_PUT(lock);
344                 GOTO(out, retval);
345         }
346
347         if (flags && (lock->l_flags & flags)) {
348                 LDLM_LOCK_PUT(lock);
349                 GOTO(out, retval);
350         }
351
352         if (flags)
353                 lock->l_flags |= flags;
354
355         retval = lock;
356         EXIT;
357  out:
358         l_unlock(&ns->ns_lock);
359         return retval;
360 }
361
362 struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *ns,
363                                       struct lustre_handle *handle)
364 {
365         struct ldlm_lock *retval = NULL;
366
367         l_lock(&ns->ns_lock);
368         retval = __ldlm_handle2lock(handle, 0);
369         l_unlock(&ns->ns_lock);
370
371         return retval;
372 }
373
374 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
375 {
376         ldlm_res2desc(lock->l_resource, &desc->l_resource);
377         desc->l_req_mode = lock->l_req_mode;
378         desc->l_granted_mode = lock->l_granted_mode;
379         memcpy(&desc->l_policy_data, &lock->l_policy_data,
380                sizeof(desc->l_policy_data));
381 }
382
383 void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
384                             void *data, int datalen)
385 {
386         struct ldlm_ast_work *w;
387         ENTRY;
388
389         l_lock(&lock->l_resource->lr_namespace->ns_lock);
390         if (new && (lock->l_flags & LDLM_FL_AST_SENT))
391                 GOTO(out, 0);
392
393         CDEBUG(D_OTHER, "lock %p incompatible; sending blocking AST.\n", lock);
394
395         OBD_ALLOC(w, sizeof(*w));
396         if (!w) {
397                 LBUG();
398                 GOTO(out, 0);
399         }
400
401         w->w_data = data;
402         w->w_datalen = datalen;
403         if (new) {
404                 LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
405                 lock->l_flags |= LDLM_FL_AST_SENT;
406                 /* If the enqueuing client said so, tell the AST recipient to
407                  * discard dirty data, rather than writing back. */
408                 if (new->l_flags & LDLM_AST_DISCARD_DATA)
409                         lock->l_flags |= LDLM_FL_DISCARD_DATA;
410                 w->w_blocking = 1;
411                 ldlm_lock2desc(new, &w->w_desc);
412         }
413
414         w->w_lock = LDLM_LOCK_GET(lock);
415         list_add(&w->w_list, lock->l_resource->lr_tmp);
416         EXIT;
417  out:
418         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
419 }
420
421 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
422 {
423         struct ldlm_lock *lock;
424
425         lock = ldlm_handle2lock(lockh);
426         ldlm_lock_addref_internal(lock, mode);
427         LDLM_LOCK_PUT(lock);
428 }
429
430 /* only called for local locks */
431 void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
432 {
433         l_lock(&lock->l_resource->lr_namespace->ns_lock);
434         ldlm_lock_remove_from_lru(lock);
435         if (mode & (LCK_NL | LCK_CR | LCK_PR))
436                 lock->l_readers++;
437         if (mode & (LCK_EX | LCK_CW | LCK_PW))
438                 lock->l_writers++;
439         lock->l_last_used = jiffies;
440         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
441         LDLM_LOCK_GET(lock);
442         LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
443 }
444
445 void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
446 {
447         struct ldlm_namespace *ns;
448         ENTRY;
449
450         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
451         ns = lock->l_resource->lr_namespace;
452         l_lock(&ns->ns_lock);
453         if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
454                 LASSERT(lock->l_readers > 0);
455                 lock->l_readers--;
456         }
457         if (mode & (LCK_EX | LCK_CW | LCK_PW)) {
458                 LASSERT(lock->l_writers > 0);
459                 lock->l_writers--;
460         }
461
462         if (lock->l_flags & LDLM_FL_LOCAL &&
463             !lock->l_readers && !lock->l_writers) {
464                 /* If this is a local lock on a server namespace and this was
465                  * the last reference, cancel the lock. */
466                 CDEBUG(D_INFO, "forcing cancel of local lock\n");
467                 lock->l_flags |= LDLM_FL_CBPENDING;
468         }
469
470         if (!lock->l_readers && !lock->l_writers &&
471             (lock->l_flags & LDLM_FL_CBPENDING)) {
472                 /* If we received a blocked AST and this was the last reference,
473                  * run the callback. */
474                 if (ns->ns_client == LDLM_NAMESPACE_SERVER && lock->l_export)
475                         CERROR("FL_CBPENDING set on non-local lock--just a "
476                                "warning\n");
477
478                 LDLM_DEBUG(lock, "final decref done on cbpending lock");
479
480                 LDLM_LOCK_GET(lock); /* dropped by bl thread */
481                 ldlm_lock_remove_from_lru(lock);
482 #ifdef __KERNEL__
483                 ldlm_bl_to_thread(ns, NULL, lock);
484                 l_unlock(&ns->ns_lock);
485 #else
486                 l_unlock(&ns->ns_lock);
487                 liblustre_ldlm_handle_bl_callback(ns, NULL, lock);
488 #endif
489         } else if (ns->ns_client == LDLM_NAMESPACE_CLIENT &&
490                    !lock->l_readers && !lock->l_writers) {
491                 /* If this is a client-side namespace and this was the last
492                  * reference, put it on the LRU. */
493                 LASSERT(list_empty(&lock->l_lru));
494                 LASSERT(ns->ns_nr_unused >= 0);
495                 list_add_tail(&lock->l_lru, &ns->ns_unused_list);
496                 ns->ns_nr_unused++;
497                 l_unlock(&ns->ns_lock);
498                 ldlm_cancel_lru(ns);
499         } else {
500                 l_unlock(&ns->ns_lock);
501         }
502
503         LDLM_LOCK_PUT(lock);    /* matches the ldlm_lock_get in addref */
504
505         EXIT;
506 }
507
508 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
509 {
510         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
511         LASSERT(lock != NULL);
512         ldlm_lock_decref_internal(lock, mode);
513         LDLM_LOCK_PUT(lock);
514 }
515
516 /* This will drop a lock reference and mark it for destruction, but will not
517  * necessarily cancel the lock before returning. */
518 void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
519 {
520         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
521         ENTRY;
522
523         LASSERT(lock != NULL);
524
525         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
526         l_lock(&lock->l_resource->lr_namespace->ns_lock);
527         lock->l_flags |= LDLM_FL_CBPENDING;
528         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
529         ldlm_lock_decref_internal(lock, mode);
530         LDLM_LOCK_PUT(lock);
531 }
532
533 /* NOTE: called by
534  *  - ldlm_lock_enqueue
535  *  - ldlm_reprocess_queue
536  *  - ldlm_lock_convert
537  */
538 void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen,
539                      int run_ast)
540 {
541         struct ldlm_resource *res = lock->l_resource;
542         ENTRY;
543
544         l_lock(&lock->l_resource->lr_namespace->ns_lock);
545         lock->l_granted_mode = lock->l_req_mode;
546         ldlm_resource_add_lock(res, &res->lr_granted, lock);
547
548         if (lock->l_granted_mode < res->lr_most_restr)
549                 res->lr_most_restr = lock->l_granted_mode;
550
551         if (run_ast && lock->l_completion_ast != NULL)
552                 ldlm_add_ast_work_item(lock, NULL, data, datalen);
553
554         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
555         EXIT;
556 }
557
558 /* returns a referenced lock or NULL.  See the flag descriptions below, in the
559  * comment above ldlm_lock_match */
560 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
561                                       ldlm_policy_data_t *policy,
562                                       struct ldlm_lock *old_lock, int flags)
563 {
564         struct ldlm_lock *lock;
565         struct list_head *tmp;
566
567         list_for_each(tmp, queue) {
568                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
569
570                 if (lock == old_lock)
571                         break;
572
573                 /* llite sometimes wants to match locks that will be
574                  * canceled when their users drop, but we allow it to match
575                  * if it passes in CBPENDING and the lock still has users.
576                  * this is generally only going to be used by children 
577                  * whose parents already hold a lock so forward progress
578                  * can still happen. */
579                 if (lock->l_flags & LDLM_FL_CBPENDING &&
580                     !(flags & LDLM_FL_CBPENDING))
581                         continue;
582                 if (lock->l_flags & LDLM_FL_CBPENDING &&
583                     lock->l_readers == 0 && lock->l_writers == 0)
584                         continue;
585
586                 if (!(lock->l_req_mode & mode))
587                         continue;
588
589                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
590                     (lock->l_policy_data.l_extent.start >
591                      policy->l_extent.start ||
592                      lock->l_policy_data.l_extent.end < policy->l_extent.end))
593                         continue;
594
595                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
596                     mode == LCK_CW &&
597                     lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
598                         continue;
599
600                 /* We match if we have existing lock with same or wider set
601                    of bits. */
602                 if (lock->l_resource->lr_type == LDLM_IBITS &&
603                      ((lock->l_policy_data.l_inodebits.bits &
604                       policy->l_inodebits.bits) !=
605                       policy->l_inodebits.bits))
606                         continue;
607
608                 if (lock->l_destroyed)
609                         continue;
610
611                 if ((flags & LDLM_FL_LOCAL_ONLY) &&
612                     !(lock->l_flags & LDLM_FL_LOCAL))
613                         continue;
614
615                 if (flags & LDLM_FL_TEST_LOCK)
616                         LDLM_LOCK_GET(lock);
617                 else
618                         ldlm_lock_addref_internal(lock, mode);
619                 return lock;
620         }
621
622         return NULL;
623 }
624
625 void ldlm_lock_allow_match(struct ldlm_lock *lock)
626 {
627         l_lock(&lock->l_resource->lr_namespace->ns_lock);
628         lock->l_flags |= LDLM_FL_CAN_MATCH;
629         wake_up(&lock->l_waitq);
630         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
631 }
632
633 /* Can be called in two ways:
634  *
635  * If 'ns' is NULL, then lockh describes an existing lock that we want to look
636  * for a duplicate of.
637  *
638  * Otherwise, all of the fields must be filled in, to match against.
639  *
640  * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the
641  *     server (ie, connh is NULL)
642  * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted
643  *     list will be considered
644  * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked
645  *     to be canceled can still be matched as long as they still have reader
646  *     or writer refernces
647  * If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock,
648  *     just tell us if we would have matched.
649  *
650  * Returns 1 if it finds an already-existing lock that is compatible; in this
651  * case, lockh is filled in with a addref()ed lock
652  */
653 int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
654                     struct ldlm_res_id *res_id, __u32 type,
655                     ldlm_policy_data_t *policy, ldlm_mode_t mode,
656                     struct lustre_handle *lockh)
657 {
658         struct ldlm_resource *res;
659         struct ldlm_lock *lock, *old_lock = NULL;
660         int rc = 0;
661         ENTRY;
662
663         if (ns == NULL) {
664                 old_lock = ldlm_handle2lock(lockh);
665                 LASSERT(old_lock);
666
667                 ns = old_lock->l_resource->lr_namespace;
668                 res_id = &old_lock->l_resource->lr_name;
669                 type = old_lock->l_resource->lr_type;
670                 mode = old_lock->l_req_mode;
671         }
672
673         res = ldlm_resource_get(ns, NULL, *res_id, type, 0);
674         if (res == NULL) {
675                 LASSERT(old_lock == NULL);
676                 RETURN(0);
677         }
678
679         l_lock(&ns->ns_lock);
680
681         lock = search_queue(&res->lr_granted, mode, policy, old_lock, flags);
682         if (lock != NULL)
683                 GOTO(out, rc = 1);
684         if (flags & LDLM_FL_BLOCK_GRANTED)
685                 GOTO(out, rc = 0);
686         lock = search_queue(&res->lr_converting, mode, policy, old_lock, flags);
687         if (lock != NULL)
688                 GOTO(out, rc = 1);
689         lock = search_queue(&res->lr_waiting, mode, policy, old_lock, flags);
690         if (lock != NULL)
691                 GOTO(out, rc = 1);
692
693         EXIT;
694  out:
695         ldlm_resource_putref(res);
696         l_unlock(&ns->ns_lock);
697
698         if (lock) {
699                 ldlm_lock2handle(lock, lockh);
700                 if (!(lock->l_flags & LDLM_FL_CAN_MATCH)) {
701                         struct l_wait_info lwi;
702                         if (lock->l_completion_ast)
703                                 lock->l_completion_ast(lock,
704                                                        LDLM_FL_WAIT_NOREPROC,
705                                                        NULL);
706
707                         lwi = LWI_TIMEOUT_INTR(obd_timeout*HZ, NULL,NULL,NULL);
708
709                         /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */
710                         l_wait_event(lock->l_waitq,
711                                      (lock->l_flags & LDLM_FL_CAN_MATCH), &lwi);
712                 }
713         }
714         if (rc)
715                 LDLM_DEBUG(lock, "matched ("LPU64" "LPU64")",
716                            type == LDLM_PLAIN ? res_id->name[2] :
717                                 policy->l_extent.start,
718                            type == LDLM_PLAIN ? res_id->name[3] :
719                                 policy->l_extent.end);
720         else if (!(flags & LDLM_FL_TEST_LOCK)) /* less verbose for test-only */
721                 LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
722                                   LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
723                                   type, mode, res_id->name[0], res_id->name[1],
724                                   type == LDLM_PLAIN ? res_id->name[2] :
725                                         policy->l_extent.start,
726                                   type == LDLM_PLAIN ? res_id->name[3] :
727                                         policy->l_extent.end);
728
729         if (old_lock)
730                 LDLM_LOCK_PUT(old_lock);
731         if (flags & LDLM_FL_TEST_LOCK && rc)
732                 LDLM_LOCK_PUT(lock);
733
734         return rc;
735 }
736
737 /* Returns a referenced lock */
738 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
739                                    struct lustre_handle *parent_lock_handle,
740                                    struct ldlm_res_id res_id, __u32 type,
741                                    ldlm_mode_t mode,
742                                    ldlm_blocking_callback blocking,
743                                    ldlm_completion_callback completion,
744                                    ldlm_glimpse_callback glimpse,
745                                    void *data, __u32 lvb_len)
746 {
747         struct ldlm_resource *res, *parent_res = NULL;
748         struct ldlm_lock *lock, *parent_lock = NULL;
749         ENTRY;
750
751         if (parent_lock_handle) {
752                 parent_lock = ldlm_handle2lock(parent_lock_handle);
753                 if (parent_lock)
754                         parent_res = parent_lock->l_resource;
755         }
756
757         res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
758         if (res == NULL)
759                 RETURN(NULL);
760
761         lock = ldlm_lock_new(parent_lock, res);
762         ldlm_resource_putref(res);
763         if (parent_lock != NULL)
764                 LDLM_LOCK_PUT(parent_lock);
765
766         if (lock == NULL)
767                 RETURN(NULL);
768
769         lock->l_req_mode = mode;
770         lock->l_ast_data = data;
771         lock->l_blocking_ast = blocking;
772         lock->l_completion_ast = completion;
773         lock->l_glimpse_ast = glimpse;
774
775         if (lvb_len) {
776                 lock->l_lvb_len = lvb_len;
777                 OBD_ALLOC(lock->l_lvb_data, lvb_len);
778                 if (lock->l_lvb_data == NULL) {
779                         OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
780                         RETURN(NULL);
781                 }
782         }
783
784         RETURN(lock);
785 }
786
787 ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
788                                struct ldlm_lock **lockp,
789                                void *cookie, int *flags)
790 {
791         struct ldlm_lock *lock = *lockp;
792         struct ldlm_resource *res = lock->l_resource;
793         int local = res->lr_namespace->ns_client;
794         ldlm_processing_policy policy;
795         ldlm_error_t rc = ELDLM_OK;
796         ENTRY;
797
798         /* policies are not executed on the client or during replay */
799         if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
800             && !local && ns->ns_policy) {
801                 rc = ns->ns_policy(ns, lockp, cookie, lock->l_req_mode, *flags,
802                                    NULL);
803                 if (rc == ELDLM_LOCK_REPLACED) {
804                         /* The lock that was returned has already been granted,
805                          * and placed into lockp.  If it's not the same as the
806                          * one we passed in, then destroy the old one and our
807                          * work here is done. */
808                         if (lock != *lockp) {
809                                 ldlm_lock_destroy(lock);
810                                 LDLM_LOCK_PUT(lock);
811                         }
812                         *flags |= LDLM_FL_LOCK_CHANGED;
813                         RETURN(0);
814                 } else if (rc == ELDLM_LOCK_ABORTED ||
815                            (rc == 0 && (*flags & LDLM_FL_INTENT_ONLY))) {
816                         ldlm_lock_destroy(lock);
817                         RETURN(rc);
818                 }
819                 LASSERT(rc == ELDLM_OK);
820         }
821
822         l_lock(&ns->ns_lock);
823         if (local && lock->l_req_mode == lock->l_granted_mode) {
824                 /* The server returned a blocked lock, but it was granted before
825                  * we got a chance to actually enqueue it.  We don't need to do
826                  * anything else. */
827                 *flags &= ~(LDLM_FL_BLOCK_GRANTED |
828                             LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
829                 GOTO(out, ELDLM_OK);
830         }
831
832         /* Some flags from the enqueue want to make it into the AST, via the
833          * lock's l_flags. */
834         lock->l_flags |= (*flags & LDLM_AST_DISCARD_DATA);
835
836         /* This distinction between local lock trees is very important; a client
837          * namespace only has information about locks taken by that client, and
838          * thus doesn't have enough information to decide for itself if it can
839          * be granted (below).  In this case, we do exactly what the server
840          * tells us to do, as dictated by the 'flags'.
841          *
842          * We do exactly the same thing during recovery, when the server is
843          * more or less trusting the clients not to lie.
844          *
845          * FIXME (bug 268): Detect obvious lies by checking compatibility in
846          * granted/converting queues. */
847         ldlm_resource_unlink_lock(lock);
848         if (local) {
849                 if (*flags & LDLM_FL_BLOCK_CONV)
850                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
851                 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
852                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
853                 else
854                         ldlm_grant_lock(lock, NULL, 0, 0);
855                 GOTO(out, ELDLM_OK);
856         } else if (*flags & LDLM_FL_REPLAY) {
857                 if (*flags & LDLM_FL_BLOCK_CONV) {
858                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
859                         GOTO(out, ELDLM_OK);
860                 } else if (*flags & LDLM_FL_BLOCK_WAIT) {
861                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
862                         GOTO(out, ELDLM_OK);
863                 } else if (*flags & LDLM_FL_BLOCK_GRANTED) {
864                         ldlm_grant_lock(lock, NULL, 0, 0);
865                         GOTO(out, ELDLM_OK);
866                 }
867                 /* If no flags, fall through to normal enqueue path. */
868         }
869
870         policy = ldlm_processing_policy_table[res->lr_type];
871         policy(lock, flags, 1, &rc);
872         EXIT;
873 out:
874         l_unlock(&ns->ns_lock);
875         return rc;
876 }
877
878 /* Must be called with namespace taken: queue is waiting or converting. */
879 int ldlm_reprocess_queue(struct ldlm_resource *res, struct list_head *queue)
880 {
881         struct list_head *tmp, *pos;
882         ldlm_processing_policy policy;
883         int flags;
884         int rc = LDLM_ITER_CONTINUE;
885         ldlm_error_t err;
886         ENTRY;
887
888         policy = ldlm_processing_policy_table[res->lr_type];
889         LASSERT(policy);
890
891         list_for_each_safe(tmp, pos, queue) {
892                 struct ldlm_lock *pending;
893                 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
894
895                 CDEBUG(D_INFO, "Reprocessing lock %p\n", pending);
896
897                 flags = 0;
898                 rc = policy(pending, &flags, 0, &err);
899                 if (rc != LDLM_ITER_CONTINUE)
900                         break;
901         }
902
903         RETURN(rc);
904 }
905
906 int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list)
907 {
908         struct list_head *tmp, *pos;
909         int rc, retval = 0;
910         ENTRY;
911
912         l_check_no_ns_lock(ns);
913
914         list_for_each_safe(tmp, pos, rpc_list) {
915                 struct ldlm_ast_work *w =
916                         list_entry(tmp, struct ldlm_ast_work, w_list);
917
918                 /* It's possible to receive a completion AST before we've set
919                  * the l_completion_ast pointer: either because the AST arrived
920                  * before the reply, or simply because there's a small race
921                  * window between receiving the reply and finishing the local
922                  * enqueue. (bug 842)
923                  *
924                  * This can't happen with the blocking_ast, however, because we
925                  * will never call the local blocking_ast until we drop our
926                  * reader/writer reference, which we won't do until we get the
927                  * reply and finish enqueueing. */
928                 LASSERT(w->w_lock != NULL);
929                 if (w->w_blocking) {
930                         LASSERT(w->w_lock->l_blocking_ast != NULL);
931                         rc = w->w_lock->l_blocking_ast
932                                 (w->w_lock, &w->w_desc, w->w_data,
933                                  LDLM_CB_BLOCKING);
934                 } else if (w->w_lock->l_completion_ast != NULL) {
935                         LASSERT(w->w_lock->l_completion_ast != NULL);
936                         rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags,
937                                                          w->w_data);
938                 } else {
939                         rc = 0;
940                 }
941                 if (rc == -ERESTART)
942                         retval = rc;
943                 else if (rc)
944                         CDEBUG(D_DLMTRACE, "Failed AST - should clean & "
945                                "disconnect client\n");
946                 LDLM_LOCK_PUT(w->w_lock);
947                 list_del(&w->w_list);
948                 OBD_FREE(w, sizeof(*w));
949         }
950         RETURN(retval);
951 }
952
953 static int reprocess_one_queue(struct ldlm_resource *res, void *closure)
954 {
955         ldlm_reprocess_all(res);
956         return LDLM_ITER_CONTINUE;
957 }
958
959 void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
960 {
961         int i, rc;
962
963         l_lock(&ns->ns_lock);
964         for (i = 0; i < RES_HASH_SIZE; i++) {
965                 struct list_head *tmp, *next;
966                 list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
967                         struct ldlm_resource *res =
968                                 list_entry(tmp, struct ldlm_resource, lr_hash);
969
970                         ldlm_resource_getref(res);
971                         l_unlock(&ns->ns_lock);
972                         rc = reprocess_one_queue(res, NULL);
973                         l_lock(&ns->ns_lock);
974                         next = tmp->next;
975                         ldlm_resource_putref(res);
976                         if (rc == LDLM_ITER_STOP)
977                                 GOTO(out, rc);
978                 }
979         }
980  out:
981         l_unlock(&ns->ns_lock);
982         EXIT;
983 }
984
985 void ldlm_reprocess_all(struct ldlm_resource *res)
986 {
987         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
988         int rc;
989         ENTRY;
990
991         /* Local lock trees don't get reprocessed. */
992         if (res->lr_namespace->ns_client) {
993                 EXIT;
994                 return;
995         }
996
997  restart:
998         l_lock(&res->lr_namespace->ns_lock);
999         res->lr_tmp = &rpc_list;
1000
1001         rc = ldlm_reprocess_queue(res, &res->lr_converting);
1002         if (rc == LDLM_ITER_CONTINUE)
1003                 ldlm_reprocess_queue(res, &res->lr_waiting);
1004
1005         res->lr_tmp = NULL;
1006         l_unlock(&res->lr_namespace->ns_lock);
1007
1008         rc = ldlm_run_ast_work(res->lr_namespace, &rpc_list);
1009         if (rc == -ERESTART) {
1010                 LASSERT(list_empty(&rpc_list));
1011                 goto restart;
1012         }
1013         EXIT;
1014 }
1015
1016 void ldlm_cancel_callback(struct ldlm_lock *lock)
1017 {
1018         l_lock(&lock->l_resource->lr_namespace->ns_lock);
1019         if (!(lock->l_flags & LDLM_FL_CANCEL)) {
1020                 lock->l_flags |= LDLM_FL_CANCEL;
1021                 if (lock->l_blocking_ast) {
1022                         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
1023                         // l_check_no_ns_lock(lock->l_resource->lr_namespace);
1024                         lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
1025                                              LDLM_CB_CANCELING);
1026                         return;
1027                 } else {
1028                         LDLM_DEBUG(lock, "no blocking ast");
1029                 }
1030         }
1031         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
1032 }
1033
1034 void ldlm_lock_cancel(struct ldlm_lock *lock)
1035 {
1036         struct ldlm_resource *res;
1037         struct ldlm_namespace *ns;
1038         ENTRY;
1039
1040         /* There's no race between calling this and taking the ns lock below;
1041          * a lock can only be put on the waiting list once, because it can only
1042          * issue a blocking AST once. */
1043         ldlm_del_waiting_lock(lock);
1044
1045         res = lock->l_resource;
1046         ns = res->lr_namespace;
1047
1048         l_lock(&ns->ns_lock);
1049         /* Please do not, no matter how tempting, remove this LBUG without
1050          * talking to me first. -phik */
1051         if (lock->l_readers || lock->l_writers) {
1052                 LDLM_DEBUG(lock, "lock still has references");
1053                 ldlm_lock_dump(D_OTHER, lock, 0);
1054                 LBUG();
1055         }
1056
1057         ldlm_cancel_callback(lock);
1058
1059         ldlm_resource_unlink_lock(lock);
1060         ldlm_lock_destroy(lock);
1061         l_unlock(&ns->ns_lock);
1062         EXIT;
1063 }
1064
1065 int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
1066 {
1067         struct ldlm_lock *lock = ldlm_handle2lock(lockh);
1068         ENTRY;
1069
1070         if (lock == NULL)
1071                 RETURN(-EINVAL);
1072
1073         lock->l_ast_data = data;
1074         LDLM_LOCK_PUT(lock);
1075         RETURN(0);
1076 }
1077
1078 void ldlm_cancel_locks_for_export(struct obd_export *exp)
1079 {
1080         struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
1081         struct ldlm_lock *lock;
1082         struct ldlm_resource *res;
1083
1084         l_lock(&ns->ns_lock);
1085         while(!list_empty(&exp->exp_ldlm_data.led_held_locks)) { 
1086                 lock = list_entry(exp->exp_ldlm_data.led_held_locks.next,
1087                                   struct ldlm_lock, l_export_chain);
1088                 res = ldlm_resource_getref(lock->l_resource);
1089                 LDLM_DEBUG(lock, "export %p", exp);
1090                 ldlm_lock_cancel(lock);
1091                 l_unlock(&ns->ns_lock);
1092                 ldlm_reprocess_all(res);
1093                 ldlm_resource_putref(res);
1094                 l_lock(&ns->ns_lock);
1095         }
1096         l_unlock(&ns->ns_lock);
1097 }
1098
1099 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
1100                                         int *flags)
1101 {
1102         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
1103         struct ldlm_resource *res;
1104         struct ldlm_namespace *ns;
1105         int granted = 0;
1106         ENTRY;
1107
1108         LBUG();
1109
1110         res = lock->l_resource;
1111         ns = res->lr_namespace;
1112
1113         l_lock(&ns->ns_lock);
1114
1115         lock->l_req_mode = new_mode;
1116         ldlm_resource_unlink_lock(lock);
1117
1118         /* If this is a local resource, put it on the appropriate list. */
1119         if (res->lr_namespace->ns_client) {
1120                 if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED)) {
1121                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
1122                 } else {
1123                         /* This should never happen, because of the way the
1124                          * server handles conversions. */
1125                         LBUG();
1126
1127                         res->lr_tmp = &rpc_list;
1128                         ldlm_grant_lock(lock, NULL, 0, 0);
1129                         res->lr_tmp = NULL;
1130                         granted = 1;
1131                         /* FIXME: completion handling not with ns_lock held ! */
1132                         if (lock->l_completion_ast)
1133                                 lock->l_completion_ast(lock, 0, NULL);
1134                 }
1135         } else {
1136                 /* FIXME: We should try the conversion right away and possibly
1137                  * return success without the need for an extra AST */
1138                 ldlm_resource_add_lock(res, &res->lr_converting, lock);
1139                 *flags |= LDLM_FL_BLOCK_CONV;
1140         }
1141
1142         l_unlock(&ns->ns_lock);
1143
1144         if (granted)
1145                 ldlm_run_ast_work(ns, &rpc_list);
1146         RETURN(res);
1147 }
1148
1149 void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos)
1150 {
1151         char str[PTL_NALFMT_SIZE];
1152         struct obd_device *obd = NULL;
1153
1154         if (!((portal_debug | D_ERROR) & level))
1155                 return;
1156
1157         if (!lock) {
1158                 CDEBUG(level, "  NULL LDLM lock\n");
1159                 return;
1160         }
1161
1162         CDEBUG(level, "  -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d)\n",
1163                lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
1164                pos);
1165         if (lock->l_conn_export != NULL)
1166                 obd = lock->l_conn_export->exp_obd;
1167         if (lock->l_export && lock->l_export->exp_connection) {
1168                 CDEBUG(level, "  Node: NID "LPX64" (%s) on %s (rhandle: "LPX64")\n",
1169                        lock->l_export->exp_connection->c_peer.peer_nid,
1170                        portals_nid2str(lock->l_export->exp_connection->c_peer.peer_ni->pni_number,
1171                                        lock->l_export->exp_connection->c_peer.peer_nid, str),
1172                        lock->l_export->exp_connection->c_peer.peer_ni->pni_name,
1173                        lock->l_remote_handle.cookie);
1174         } else if (obd == NULL) {
1175                 CDEBUG(level, "  Node: local\n");
1176         } else {
1177                 struct obd_import *imp = obd->u.cli.cl_import;
1178                 CDEBUG(level, "  Node: NID "LPX64" (%s) on %s (rhandle: "LPX64")\n",
1179                        imp->imp_connection->c_peer.peer_nid,
1180                        portals_nid2str(imp->imp_connection->c_peer.peer_ni->pni_number,
1181                                        imp->imp_connection->c_peer.peer_nid, str),
1182                        imp->imp_connection->c_peer.peer_ni->pni_name,
1183                        lock->l_remote_handle.cookie);
1184         }
1185         CDEBUG(level, "  Resource: %p ("LPU64"/"LPU64")\n", lock->l_resource,
1186                lock->l_resource->lr_name.name[0],
1187                lock->l_resource->lr_name.name[1]);
1188         CDEBUG(level, "  Req mode: %d, grant mode: %d, rc: %u, read: %d, "
1189                "write: %d\n", (int)lock->l_req_mode, (int)lock->l_granted_mode,
1190                atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers);
1191         if (lock->l_resource->lr_type == LDLM_EXTENT)
1192                 CDEBUG(level, "  Extent: "LPU64" -> "LPU64
1193                        " (req "LPU64"-"LPU64")\n",
1194                        lock->l_policy_data.l_extent.start,
1195                        lock->l_policy_data.l_extent.end,
1196                        lock->l_req_extent.start, lock->l_req_extent.end);
1197         else if (lock->l_resource->lr_type == LDLM_FLOCK)
1198                 CDEBUG(level, "  Pid: "LPU64" Extent: "LPU64" -> "LPU64"\n",
1199                        lock->l_policy_data.l_flock.pid,
1200                        lock->l_policy_data.l_flock.start,
1201                        lock->l_policy_data.l_flock.end);
1202         else if (lock->l_resource->lr_type == LDLM_IBITS)
1203                 CDEBUG(level, " Bits: "LPX64"\n",
1204                        lock->l_policy_data.l_inodebits.bits);
1205 }
1206
1207 void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
1208 {
1209         struct ldlm_lock *lock;
1210
1211         lock = ldlm_handle2lock(lockh);
1212         if (lock == NULL)
1213                 return;
1214
1215         ldlm_lock_dump(D_OTHER, lock, 0);
1216
1217         LDLM_LOCK_PUT(lock);
1218 }