Whamcloud - gitweb
b=2518
[fs/lustre-release.git] / lustre / ldlm / ldlm_lock.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
5  *   Author: Peter Braam <braam@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_LDLM
25
26 #ifdef __KERNEL__
27 # include <linux/slab.h>
28 # include <linux/module.h>
29 # include <linux/lustre_dlm.h>
30 #else
31 # include <liblustre.h>
32 # include <linux/kp30.h>
33 #endif
34
35 #include <linux/obd_class.h>
36 #include "ldlm_internal.h"
37
38 //struct lustre_lock ldlm_everything_lock;
39
40 /* lock types */
41 char *ldlm_lockname[] = {
42         [0] "--",
43         [LCK_EX] "EX",
44         [LCK_PW] "PW",
45         [LCK_PR] "PR",
46         [LCK_CW] "CW",
47         [LCK_CR] "CR",
48         [LCK_NL] "NL",
49         [LCK_GROUP] "GROUP"
50 };
51 char *ldlm_typename[] = {
52         [LDLM_PLAIN] "PLN",
53         [LDLM_EXTENT] "EXT",
54         [LDLM_FLOCK] "FLK",
55 };
56
57 char *ldlm_it2str(int it)
58 {
59         switch (it) {
60         case IT_OPEN:
61                 return "open";
62         case IT_CREAT:
63                 return "creat";
64         case (IT_OPEN | IT_CREAT):
65                 return "open|creat";
66         case IT_READDIR:
67                 return "readdir";
68         case IT_GETATTR:
69                 return "getattr";
70         case IT_LOOKUP:
71                 return "lookup";
72         case IT_UNLINK:
73                 return "unlink";
74         case IT_GETXATTR:
75                 return "getxattr";
76         default:
77                 CERROR("Unknown intent %d\n", it);
78                 return "UNKNOWN";
79         }
80 }
81
82 extern kmem_cache_t *ldlm_lock_slab;
83 struct lustre_lock ldlm_handle_lock;
84
85 static ldlm_processing_policy ldlm_processing_policy_table[] = {
86         [LDLM_PLAIN] ldlm_process_plain_lock,
87         [LDLM_EXTENT] ldlm_process_extent_lock,
88 #ifdef __KERNEL__
89         [LDLM_FLOCK] ldlm_process_flock_lock,
90 #endif
91 };
92
93 ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res)
94 {
95         return ldlm_processing_policy_table[res->lr_type];
96 }
97
98 void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg)
99 {
100         ns->ns_policy = arg;
101 }
102
103 /*
104  * REFCOUNTED LOCK OBJECTS
105  */
106
107
108 /*
109  * Lock refcounts, during creation:
110  *   - one special one for allocation, dec'd only once in destroy
111  *   - one for being a lock that's in-use
112  *   - one for the addref associated with a new lock
113  */
114 struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock)
115 {
116         atomic_inc(&lock->l_refc);
117         return lock;
118 }
119
120 void ldlm_lock_put(struct ldlm_lock *lock)
121 {
122         ENTRY;
123
124         if (atomic_dec_and_test(&lock->l_refc)) {
125                 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
126
127                 l_lock(&ns->ns_lock);
128                 LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing");
129                 LASSERT(lock->l_destroyed);
130                 LASSERT(list_empty(&lock->l_res_link));
131
132                 spin_lock(&ns->ns_counter_lock);
133                 ns->ns_locks--;
134                 spin_unlock(&ns->ns_counter_lock);
135
136                 ldlm_resource_putref(lock->l_resource);
137                 lock->l_resource = NULL;
138                 if (lock->l_export)
139                         class_export_put(lock->l_export);
140
141                 if (lock->l_parent)
142                         LDLM_LOCK_PUT(lock->l_parent);
143
144                 if (lock->l_lvb_data != NULL)
145                         OBD_FREE(lock->l_lvb_data, lock->l_lvb_len);
146
147                 OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
148                 l_unlock(&ns->ns_lock);
149         }
150
151         EXIT;
152 }
153
154 void ldlm_lock_remove_from_lru(struct ldlm_lock *lock)
155 {
156         ENTRY;
157         l_lock(&lock->l_resource->lr_namespace->ns_lock);
158         if (!list_empty(&lock->l_lru)) {
159                 list_del_init(&lock->l_lru);
160                 lock->l_resource->lr_namespace->ns_nr_unused--;
161                 LASSERT(lock->l_resource->lr_namespace->ns_nr_unused >= 0);
162         }
163         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
164         EXIT;
165 }
166
167 /* This used to have a 'strict' flact, which recovery would use to mark an
168  * in-use lock as needing-to-die.  Lest I am ever tempted to put it back, I
169  * shall explain why it's gone: with the new hash table scheme, once you call
170  * ldlm_lock_destroy, you can never drop your final references on this lock.
171  * Because it's not in the hash table anymore.  -phil */
172 void ldlm_lock_destroy(struct ldlm_lock *lock)
173 {
174         ENTRY;
175         l_lock(&lock->l_resource->lr_namespace->ns_lock);
176
177         if (!list_empty(&lock->l_children)) {
178                 LDLM_ERROR(lock, "still has children (%p)!",
179                            lock->l_children.next);
180                 ldlm_lock_dump(D_ERROR, lock, 0);
181                 LBUG();
182         }
183         if (lock->l_readers || lock->l_writers) {
184                 LDLM_ERROR(lock, "lock still has references");
185                 ldlm_lock_dump(D_ERROR, lock, 0);
186                 LBUG();
187         }
188
189         if (!list_empty(&lock->l_res_link)) {
190                 ldlm_lock_dump(D_ERROR, lock, 0);
191                 LBUG();
192         }
193
194         if (lock->l_destroyed) {
195                 LASSERT(list_empty(&lock->l_lru));
196                 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
197                 EXIT;
198                 return;
199         }
200         lock->l_destroyed = 1;
201
202         list_del_init(&lock->l_export_chain);
203         ldlm_lock_remove_from_lru(lock);
204         class_handle_unhash(&lock->l_handle);
205
206 #if 0
207         /* Wake anyone waiting for this lock */
208         /* FIXME: I should probably add yet another flag, instead of using
209          * l_export to only call this on clients */
210         if (lock->l_export)
211                 class_export_put(lock->l_export);
212         lock->l_export = NULL;
213         if (lock->l_export && lock->l_completion_ast)
214                 lock->l_completion_ast(lock, 0);
215 #endif
216
217         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
218         LDLM_LOCK_PUT(lock);
219         EXIT;
220 }
221
222 /* this is called by portals_handle2object with the handle lock taken */
223 static void lock_handle_addref(void *lock)
224 {
225         LDLM_LOCK_GET((struct ldlm_lock *)lock);
226 }
227
228 /*
229  * usage: pass in a resource on which you have done ldlm_resource_get
230  *        pass in a parent lock on which you have done a ldlm_lock_get
231  *        after return, ldlm_*_put the resource and parent
232  * returns: lock with refcount 1
233  */
234 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
235                                        struct ldlm_resource *resource)
236 {
237         struct ldlm_lock *lock;
238         ENTRY;
239
240         if (resource == NULL)
241                 LBUG();
242
243         OBD_SLAB_ALLOC(lock, ldlm_lock_slab, SLAB_NOFS, sizeof(*lock));
244         if (lock == NULL)
245                 RETURN(NULL);
246
247         lock->l_resource = ldlm_resource_getref(resource);
248
249         atomic_set(&lock->l_refc, 2);
250         INIT_LIST_HEAD(&lock->l_children);
251         INIT_LIST_HEAD(&lock->l_res_link);
252         INIT_LIST_HEAD(&lock->l_lru);
253         INIT_LIST_HEAD(&lock->l_export_chain);
254         INIT_LIST_HEAD(&lock->l_pending_chain);
255         init_waitqueue_head(&lock->l_waitq);
256
257         spin_lock(&resource->lr_namespace->ns_counter_lock);
258         resource->lr_namespace->ns_locks++;
259         spin_unlock(&resource->lr_namespace->ns_counter_lock);
260
261         if (parent != NULL) {
262                 l_lock(&parent->l_resource->lr_namespace->ns_lock);
263                 lock->l_parent = LDLM_LOCK_GET(parent);
264                 list_add(&lock->l_childof, &parent->l_children);
265                 l_unlock(&parent->l_resource->lr_namespace->ns_lock);
266         }
267
268         INIT_LIST_HEAD(&lock->l_handle.h_link);
269         class_handle_hash(&lock->l_handle, lock_handle_addref);
270
271         RETURN(lock);
272 }
273
274 int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
275                               struct ldlm_res_id new_resid)
276 {
277         struct ldlm_resource *oldres = lock->l_resource;
278         ENTRY;
279
280         l_lock(&ns->ns_lock);
281         if (memcmp(&new_resid, &lock->l_resource->lr_name,
282                    sizeof(lock->l_resource->lr_name)) == 0) {
283                 /* Nothing to do */
284                 l_unlock(&ns->ns_lock);
285                 RETURN(0);
286         }
287
288         LASSERT(new_resid.name[0] != 0);
289
290         /* This function assumes that the lock isn't on any lists */
291         LASSERT(list_empty(&lock->l_res_link));
292
293         lock->l_resource = ldlm_resource_get(ns, NULL, new_resid,
294                                              lock->l_resource->lr_type, 1);
295         if (lock->l_resource == NULL) {
296                 LBUG();
297                 RETURN(-ENOMEM);
298         }
299
300         /* ...and the flowers are still standing! */
301         ldlm_resource_putref(oldres);
302
303         l_unlock(&ns->ns_lock);
304         RETURN(0);
305 }
306
307 /*
308  *  HANDLES
309  */
310
311 void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
312 {
313         lockh->cookie = lock->l_handle.h_cookie;
314 }
315
316 /* if flags: atomically get the lock and set the flags.
317  *           Return NULL if flag already set
318  */
319
320 struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int flags)
321 {
322         struct ldlm_namespace *ns;
323         struct ldlm_lock *lock = NULL, *retval = NULL;
324         ENTRY;
325
326         LASSERT(handle);
327
328         lock = class_handle2object(handle->cookie);
329         if (lock == NULL)
330                 RETURN(NULL);
331
332         LASSERT(lock->l_resource != NULL);
333         ns = lock->l_resource->lr_namespace;
334         LASSERT(ns != NULL);
335
336         l_lock(&ns->ns_lock);
337
338         /* It's unlikely but possible that someone marked the lock as
339          * destroyed after we did handle2object on it */
340         if (lock->l_destroyed) {
341                 CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
342                 LDLM_LOCK_PUT(lock);
343                 GOTO(out, retval);
344         }
345
346         if (flags && (lock->l_flags & flags)) {
347                 LDLM_LOCK_PUT(lock);
348                 GOTO(out, retval);
349         }
350
351         if (flags)
352                 lock->l_flags |= flags;
353
354         retval = lock;
355         EXIT;
356  out:
357         l_unlock(&ns->ns_lock);
358         return retval;
359 }
360
361 struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *ns,
362                                       struct lustre_handle *handle)
363 {
364         struct ldlm_lock *retval = NULL;
365
366         l_lock(&ns->ns_lock);
367         retval = __ldlm_handle2lock(handle, 0);
368         l_unlock(&ns->ns_lock);
369
370         return retval;
371 }
372
373 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
374 {
375         ldlm_res2desc(lock->l_resource, &desc->l_resource);
376         desc->l_req_mode = lock->l_req_mode;
377         desc->l_granted_mode = lock->l_granted_mode;
378         memcpy(&desc->l_policy_data, &lock->l_policy_data,
379                sizeof(desc->l_policy_data));
380 }
381
382 void ldlm_add_ast_work_item(struct ldlm_lock *lock, struct ldlm_lock *new,
383                             void *data, int datalen)
384 {
385         struct ldlm_ast_work *w;
386         ENTRY;
387
388         l_lock(&lock->l_resource->lr_namespace->ns_lock);
389         if (new && (lock->l_flags & LDLM_FL_AST_SENT))
390                 GOTO(out, 0);
391
392         CDEBUG(D_OTHER, "lock %p incompatible; sending blocking AST.\n", lock);
393
394         OBD_ALLOC(w, sizeof(*w));
395         if (!w) {
396                 LBUG();
397                 GOTO(out, 0);
398         }
399
400         w->w_data = data;
401         w->w_datalen = datalen;
402         if (new) {
403                 LDLM_DEBUG(lock, "lock incompatible; sending blocking AST.");
404                 lock->l_flags |= LDLM_FL_AST_SENT;
405                 /* If the enqueuing client said so, tell the AST recipient to
406                  * discard dirty data, rather than writing back. */
407                 if (new->l_flags & LDLM_AST_DISCARD_DATA)
408                         lock->l_flags |= LDLM_FL_DISCARD_DATA;
409                 w->w_blocking = 1;
410                 ldlm_lock2desc(new, &w->w_desc);
411         }
412
413         w->w_lock = LDLM_LOCK_GET(lock);
414         list_add(&w->w_list, lock->l_resource->lr_tmp);
415         EXIT;
416  out:
417         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
418 }
419
420 void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode)
421 {
422         struct ldlm_lock *lock;
423
424         lock = ldlm_handle2lock(lockh);
425         ldlm_lock_addref_internal(lock, mode);
426         LDLM_LOCK_PUT(lock);
427 }
428
429 /* only called for local locks */
430 void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
431 {
432         l_lock(&lock->l_resource->lr_namespace->ns_lock);
433         ldlm_lock_remove_from_lru(lock);
434         if (mode & (LCK_NL | LCK_CR | LCK_PR))
435                 lock->l_readers++;
436         if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP))
437                 lock->l_writers++;
438         lock->l_last_used = jiffies;
439         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
440         LDLM_LOCK_GET(lock);
441         LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
442 }
443
444 void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
445 {
446         struct ldlm_namespace *ns;
447         ENTRY;
448
449         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
450         ns = lock->l_resource->lr_namespace;
451         l_lock(&ns->ns_lock);
452         if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
453                 LASSERT(lock->l_readers > 0);
454                 lock->l_readers--;
455         }
456         if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP)) {
457                 LASSERT(lock->l_writers > 0);
458                 lock->l_writers--;
459         }
460
461         if (lock->l_flags & LDLM_FL_LOCAL &&
462             !lock->l_readers && !lock->l_writers) {
463                 /* If this is a local lock on a server namespace and this was
464                  * the last reference, cancel the lock. */
465                 CDEBUG(D_INFO, "forcing cancel of local lock\n");
466                 lock->l_flags |= LDLM_FL_CBPENDING;
467         }
468
469         if (!lock->l_readers && !lock->l_writers &&
470             (lock->l_flags & LDLM_FL_CBPENDING)) {
471                 /* If we received a blocked AST and this was the last reference,
472                  * run the callback. */
473                 if (ns->ns_client == LDLM_NAMESPACE_SERVER && lock->l_export)
474                         CERROR("FL_CBPENDING set on non-local lock--just a "
475                                "warning\n");
476
477                 LDLM_DEBUG(lock, "final decref done on cbpending lock");
478
479                 LDLM_LOCK_GET(lock); /* dropped by bl thread */
480                 ldlm_lock_remove_from_lru(lock);
481 #ifdef __KERNEL__
482                 ldlm_bl_to_thread(ns, NULL, lock);
483                 l_unlock(&ns->ns_lock);
484 #else
485                 l_unlock(&ns->ns_lock);
486                 ldlm_handle_bl_callback(ns, NULL, lock);
487 #endif
488         } else if (ns->ns_client == LDLM_NAMESPACE_CLIENT &&
489                    !lock->l_readers && !lock->l_writers) {
490                 /* If this is a client-side namespace and this was the last
491                  * reference, put it on the LRU. */
492                 LASSERT(list_empty(&lock->l_lru));
493                 LASSERT(ns->ns_nr_unused >= 0);
494                 list_add_tail(&lock->l_lru, &ns->ns_unused_list);
495                 ns->ns_nr_unused++;
496                 l_unlock(&ns->ns_lock);
497                 ldlm_cancel_lru(ns, LDLM_ASYNC);
498         } else {
499                 l_unlock(&ns->ns_lock);
500         }
501
502         LDLM_LOCK_PUT(lock);    /* matches the ldlm_lock_get in addref */
503
504         EXIT;
505 }
506
507 void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
508 {
509         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
510         LASSERT(lock != NULL);
511         ldlm_lock_decref_internal(lock, mode);
512         LDLM_LOCK_PUT(lock);
513 }
514
515 /* This will drop a lock reference and mark it for destruction, but will not
516  * necessarily cancel the lock before returning. */
517 void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
518 {
519         struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
520         ENTRY;
521
522         LASSERT(lock != NULL);
523
524         LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
525         l_lock(&lock->l_resource->lr_namespace->ns_lock);
526         lock->l_flags |= LDLM_FL_CBPENDING;
527         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
528         ldlm_lock_decref_internal(lock, mode);
529         LDLM_LOCK_PUT(lock);
530 }
531
532 /* NOTE: called by
533  *  - ldlm_lock_enqueue
534  *  - ldlm_reprocess_queue
535  *  - ldlm_lock_convert
536  */
537 void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen,
538                      int run_ast)
539 {
540         struct ldlm_resource *res = lock->l_resource;
541         ENTRY;
542
543         l_lock(&lock->l_resource->lr_namespace->ns_lock);
544         lock->l_granted_mode = lock->l_req_mode;
545         ldlm_resource_add_lock(res, &res->lr_granted, lock);
546
547         if (lock->l_granted_mode < res->lr_most_restr)
548                 res->lr_most_restr = lock->l_granted_mode;
549
550         if (run_ast && lock->l_completion_ast != NULL)
551                 ldlm_add_ast_work_item(lock, NULL, data, datalen);
552
553         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
554         EXIT;
555 }
556
557 /* returns a referenced lock or NULL.  See the flag descriptions below, in the
558  * comment above ldlm_lock_match */
559 static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode,
560                                       ldlm_policy_data_t *policy,
561                                       struct ldlm_lock *old_lock, int flags)
562 {
563         struct ldlm_lock *lock;
564         struct list_head *tmp;
565
566         list_for_each(tmp, queue) {
567                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
568
569                 if (lock == old_lock)
570                         break;
571
572                 /* llite sometimes wants to match locks that will be
573                  * canceled when their users drop, but we allow it to match
574                  * if it passes in CBPENDING and the lock still has users.
575                  * this is generally only going to be used by children 
576                  * whose parents already hold a lock so forward progress
577                  * can still happen. */
578                 if (lock->l_flags & LDLM_FL_CBPENDING &&
579                     !(flags & LDLM_FL_CBPENDING))
580                         continue;
581                 if (lock->l_flags & LDLM_FL_CBPENDING &&
582                     lock->l_readers == 0 && lock->l_writers == 0)
583                         continue;
584
585                 if (!(lock->l_req_mode & mode))
586                         continue;
587
588                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
589                     (lock->l_policy_data.l_extent.start >
590                      policy->l_extent.start ||
591                      lock->l_policy_data.l_extent.end < policy->l_extent.end))
592                         continue;
593
594                 if (lock->l_resource->lr_type == LDLM_EXTENT &&
595                     mode == LCK_GROUP &&
596                     lock->l_policy_data.l_extent.gid != policy->l_extent.gid)
597                         continue;
598
599                 if (lock->l_destroyed)
600                         continue;
601
602                 if ((flags & LDLM_FL_LOCAL_ONLY) &&
603                     !(lock->l_flags & LDLM_FL_LOCAL))
604                         continue;
605
606                 if (flags & LDLM_FL_TEST_LOCK)
607                         LDLM_LOCK_GET(lock);
608                 else
609                         ldlm_lock_addref_internal(lock, mode);
610                 return lock;
611         }
612
613         return NULL;
614 }
615
616 void ldlm_lock_allow_match(struct ldlm_lock *lock)
617 {
618         l_lock(&lock->l_resource->lr_namespace->ns_lock);
619         lock->l_flags |= LDLM_FL_CAN_MATCH;
620         wake_up(&lock->l_waitq);
621         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
622 }
623
624 /* Can be called in two ways:
625  *
626  * If 'ns' is NULL, then lockh describes an existing lock that we want to look
627  * for a duplicate of.
628  *
629  * Otherwise, all of the fields must be filled in, to match against.
630  *
631  * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the
632  *     server (ie, connh is NULL)
633  * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted
634  *     list will be considered
635  * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked
636  *     to be canceled can still be matched as long as they still have reader
637  *     or writer refernces
638  * If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock,
639  *     just tell us if we would have matched.
640  *
641  * Returns 1 if it finds an already-existing lock that is compatible; in this
642  * case, lockh is filled in with a addref()ed lock
643  */
644 int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
645                     struct ldlm_res_id *res_id, __u32 type,
646                     ldlm_policy_data_t *policy, ldlm_mode_t mode,
647                     struct lustre_handle *lockh)
648 {
649         struct ldlm_resource *res;
650         struct ldlm_lock *lock, *old_lock = NULL;
651         int rc = 0;
652         ENTRY;
653
654         if (ns == NULL) {
655                 old_lock = ldlm_handle2lock(lockh);
656                 LASSERT(old_lock);
657
658                 ns = old_lock->l_resource->lr_namespace;
659                 res_id = &old_lock->l_resource->lr_name;
660                 type = old_lock->l_resource->lr_type;
661                 mode = old_lock->l_req_mode;
662         }
663
664         res = ldlm_resource_get(ns, NULL, *res_id, type, 0);
665         if (res == NULL) {
666                 LASSERT(old_lock == NULL);
667                 RETURN(0);
668         }
669
670         l_lock(&ns->ns_lock);
671
672         lock = search_queue(&res->lr_granted, mode, policy, old_lock, flags);
673         if (lock != NULL)
674                 GOTO(out, rc = 1);
675         if (flags & LDLM_FL_BLOCK_GRANTED)
676                 GOTO(out, rc = 0);
677         lock = search_queue(&res->lr_converting, mode, policy, old_lock, flags);
678         if (lock != NULL)
679                 GOTO(out, rc = 1);
680         lock = search_queue(&res->lr_waiting, mode, policy, old_lock, flags);
681         if (lock != NULL)
682                 GOTO(out, rc = 1);
683
684         EXIT;
685  out:
686         ldlm_resource_putref(res);
687         l_unlock(&ns->ns_lock);
688
689         if (lock) {
690                 ldlm_lock2handle(lock, lockh);
691                 if (!(lock->l_flags & LDLM_FL_CAN_MATCH)) {
692                         struct l_wait_info lwi;
693                         if (lock->l_completion_ast)
694                                 lock->l_completion_ast(lock,
695                                                        LDLM_FL_WAIT_NOREPROC,
696                                                        NULL);
697
698                         lwi = LWI_TIMEOUT_INTR(obd_timeout*HZ, NULL,NULL,NULL);
699
700                         /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */
701                         l_wait_event(lock->l_waitq,
702                                      (lock->l_flags & LDLM_FL_CAN_MATCH), &lwi);
703                 }
704         }
705         if (rc)
706                 LDLM_DEBUG(lock, "matched ("LPU64" "LPU64")",
707                            type == LDLM_PLAIN ? res_id->name[2] :
708                                 policy->l_extent.start,
709                            type == LDLM_PLAIN ? res_id->name[3] :
710                                 policy->l_extent.end);
711         else if (!(flags & LDLM_FL_TEST_LOCK)) /* less verbose for test-only */
712                 LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
713                                   LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
714                                   type, mode, res_id->name[0], res_id->name[1],
715                                   type == LDLM_PLAIN ? res_id->name[2] :
716                                         policy->l_extent.start,
717                                   type == LDLM_PLAIN ? res_id->name[3] :
718                                         policy->l_extent.end);
719
720         if (old_lock)
721                 LDLM_LOCK_PUT(old_lock);
722         if (flags & LDLM_FL_TEST_LOCK && rc)
723                 LDLM_LOCK_PUT(lock);
724
725         return rc;
726 }
727
728 /* Returns a referenced lock */
729 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
730                                    struct lustre_handle *parent_lock_handle,
731                                    struct ldlm_res_id res_id, __u32 type,
732                                    ldlm_mode_t mode,
733                                    ldlm_blocking_callback blocking,
734                                    ldlm_completion_callback completion,
735                                    ldlm_glimpse_callback glimpse,
736                                    void *data, __u32 lvb_len)
737 {
738         struct ldlm_resource *res, *parent_res = NULL;
739         struct ldlm_lock *lock, *parent_lock = NULL;
740         ENTRY;
741
742         if (parent_lock_handle) {
743                 parent_lock = ldlm_handle2lock(parent_lock_handle);
744                 if (parent_lock)
745                         parent_res = parent_lock->l_resource;
746         }
747
748         res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
749         if (res == NULL)
750                 RETURN(NULL);
751
752         lock = ldlm_lock_new(parent_lock, res);
753         ldlm_resource_putref(res);
754         if (parent_lock != NULL)
755                 LDLM_LOCK_PUT(parent_lock);
756
757         if (lock == NULL)
758                 RETURN(NULL);
759
760         lock->l_req_mode = mode;
761         lock->l_ast_data = data;
762         lock->l_blocking_ast = blocking;
763         lock->l_completion_ast = completion;
764         lock->l_glimpse_ast = glimpse;
765
766         if (lvb_len) {
767                 lock->l_lvb_len = lvb_len;
768                 OBD_ALLOC(lock->l_lvb_data, lvb_len);
769                 if (lock->l_lvb_data == NULL) {
770                         OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock));
771                         RETURN(NULL);
772                 }
773         }
774
775         RETURN(lock);
776 }
777
778 ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
779                                struct ldlm_lock **lockp,
780                                void *cookie, int *flags)
781 {
782         struct ldlm_lock *lock = *lockp;
783         struct ldlm_resource *res = lock->l_resource;
784         int local = res->lr_namespace->ns_client;
785         ldlm_processing_policy policy;
786         ldlm_error_t rc = ELDLM_OK;
787         ENTRY;
788
789         /* policies are not executed on the client or during replay */
790         if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
791             && !local && ns->ns_policy) {
792                 rc = ns->ns_policy(ns, lockp, cookie, lock->l_req_mode, *flags,
793                                    NULL);
794                 if (rc == ELDLM_LOCK_REPLACED) {
795                         /* The lock that was returned has already been granted,
796                          * and placed into lockp.  If it's not the same as the
797                          * one we passed in, then destroy the old one and our
798                          * work here is done. */
799                         if (lock != *lockp) {
800                                 ldlm_lock_destroy(lock);
801                                 LDLM_LOCK_PUT(lock);
802                         }
803                         *flags |= LDLM_FL_LOCK_CHANGED;
804                         RETURN(0);
805                 } else if (rc == ELDLM_LOCK_ABORTED ||
806                            (rc == 0 && (*flags & LDLM_FL_INTENT_ONLY))) {
807                         ldlm_lock_destroy(lock);
808                         RETURN(rc);
809                 }
810                 LASSERT(rc == ELDLM_OK);
811         }
812
813         l_lock(&ns->ns_lock);
814         if (local && lock->l_req_mode == lock->l_granted_mode) {
815                 /* The server returned a blocked lock, but it was granted before
816                  * we got a chance to actually enqueue it.  We don't need to do
817                  * anything else. */
818                 *flags &= ~(LDLM_FL_BLOCK_GRANTED |
819                             LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
820                 GOTO(out, ELDLM_OK);
821         }
822
823         /* Some flags from the enqueue want to make it into the AST, via the
824          * lock's l_flags. */
825         lock->l_flags |= (*flags & LDLM_AST_DISCARD_DATA);
826
827         /* This distinction between local lock trees is very important; a client
828          * namespace only has information about locks taken by that client, and
829          * thus doesn't have enough information to decide for itself if it can
830          * be granted (below).  In this case, we do exactly what the server
831          * tells us to do, as dictated by the 'flags'.
832          *
833          * We do exactly the same thing during recovery, when the server is
834          * more or less trusting the clients not to lie.
835          *
836          * FIXME (bug 268): Detect obvious lies by checking compatibility in
837          * granted/converting queues. */
838         ldlm_resource_unlink_lock(lock);
839         if (local) {
840                 if (*flags & LDLM_FL_BLOCK_CONV)
841                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
842                 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
843                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
844                 else
845                         ldlm_grant_lock(lock, NULL, 0, 0);
846                 GOTO(out, ELDLM_OK);
847         } else if (*flags & LDLM_FL_REPLAY) {
848                 if (*flags & LDLM_FL_BLOCK_CONV) {
849                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
850                         GOTO(out, ELDLM_OK);
851                 } else if (*flags & LDLM_FL_BLOCK_WAIT) {
852                         ldlm_resource_add_lock(res, &res->lr_waiting, lock);
853                         GOTO(out, ELDLM_OK);
854                 } else if (*flags & LDLM_FL_BLOCK_GRANTED) {
855                         ldlm_grant_lock(lock, NULL, 0, 0);
856                         GOTO(out, ELDLM_OK);
857                 }
858                 /* If no flags, fall through to normal enqueue path. */
859         }
860
861         policy = ldlm_processing_policy_table[res->lr_type];
862         policy(lock, flags, 1, &rc);
863         EXIT;
864 out:
865         l_unlock(&ns->ns_lock);
866         return rc;
867 }
868
869 /* Must be called with namespace taken: queue is waiting or converting. */
870 int ldlm_reprocess_queue(struct ldlm_resource *res, struct list_head *queue)
871 {
872         struct list_head *tmp, *pos;
873         ldlm_processing_policy policy;
874         int flags;
875         int rc = LDLM_ITER_CONTINUE;
876         ldlm_error_t err;
877         ENTRY;
878
879         policy = ldlm_processing_policy_table[res->lr_type];
880         LASSERT(policy);
881
882         list_for_each_safe(tmp, pos, queue) {
883                 struct ldlm_lock *pending;
884                 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
885
886                 CDEBUG(D_INFO, "Reprocessing lock %p\n", pending);
887
888                 flags = 0;
889                 rc = policy(pending, &flags, 0, &err);
890                 if (rc != LDLM_ITER_CONTINUE)
891                         break;
892         }
893
894         RETURN(rc);
895 }
896
897 int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list)
898 {
899         struct list_head *tmp, *pos;
900         int rc, retval = 0;
901         ENTRY;
902
903         l_check_no_ns_lock(ns);
904
905         list_for_each_safe(tmp, pos, rpc_list) {
906                 struct ldlm_ast_work *w =
907                         list_entry(tmp, struct ldlm_ast_work, w_list);
908
909                 /* It's possible to receive a completion AST before we've set
910                  * the l_completion_ast pointer: either because the AST arrived
911                  * before the reply, or simply because there's a small race
912                  * window between receiving the reply and finishing the local
913                  * enqueue. (bug 842)
914                  *
915                  * This can't happen with the blocking_ast, however, because we
916                  * will never call the local blocking_ast until we drop our
917                  * reader/writer reference, which we won't do until we get the
918                  * reply and finish enqueueing. */
919                 LASSERT(w->w_lock != NULL);
920                 if (w->w_blocking) {
921                         LASSERT(w->w_lock->l_blocking_ast != NULL);
922                         rc = w->w_lock->l_blocking_ast
923                                 (w->w_lock, &w->w_desc, w->w_data,
924                                  LDLM_CB_BLOCKING);
925                 } else if (w->w_lock->l_completion_ast != NULL) {
926                         LASSERT(w->w_lock->l_completion_ast != NULL);
927                         rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags,
928                                                          w->w_data);
929                 } else {
930                         rc = 0;
931                 }
932                 if (rc == -ERESTART)
933                         retval = rc;
934                 else if (rc)
935                         CDEBUG(D_DLMTRACE, "Failed AST - should clean & "
936                                "disconnect client\n");
937                 LDLM_LOCK_PUT(w->w_lock);
938                 list_del(&w->w_list);
939                 OBD_FREE(w, sizeof(*w));
940         }
941         RETURN(retval);
942 }
943
944 static int reprocess_one_queue(struct ldlm_resource *res, void *closure)
945 {
946         ldlm_reprocess_all(res);
947         return LDLM_ITER_CONTINUE;
948 }
949
950 void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
951 {
952         int i, rc;
953
954         l_lock(&ns->ns_lock);
955         for (i = 0; i < RES_HASH_SIZE; i++) {
956                 struct list_head *tmp, *next;
957                 list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
958                         struct ldlm_resource *res =
959                                 list_entry(tmp, struct ldlm_resource, lr_hash);
960
961                         ldlm_resource_getref(res);
962                         l_unlock(&ns->ns_lock);
963                         rc = reprocess_one_queue(res, NULL);
964                         l_lock(&ns->ns_lock);
965                         next = tmp->next;
966                         ldlm_resource_putref(res);
967                         if (rc == LDLM_ITER_STOP)
968                                 GOTO(out, rc);
969                 }
970         }
971  out:
972         l_unlock(&ns->ns_lock);
973         EXIT;
974 }
975
976 void ldlm_reprocess_all(struct ldlm_resource *res)
977 {
978         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
979         int rc;
980         ENTRY;
981
982         /* Local lock trees don't get reprocessed. */
983         if (res->lr_namespace->ns_client) {
984                 EXIT;
985                 return;
986         }
987
988  restart:
989         l_lock(&res->lr_namespace->ns_lock);
990         res->lr_tmp = &rpc_list;
991
992         rc = ldlm_reprocess_queue(res, &res->lr_converting);
993         if (rc == LDLM_ITER_CONTINUE)
994                 ldlm_reprocess_queue(res, &res->lr_waiting);
995
996         res->lr_tmp = NULL;
997         l_unlock(&res->lr_namespace->ns_lock);
998
999         rc = ldlm_run_ast_work(res->lr_namespace, &rpc_list);
1000         if (rc == -ERESTART) {
1001                 LASSERT(list_empty(&rpc_list));
1002                 goto restart;
1003         }
1004         EXIT;
1005 }
1006
1007 void ldlm_cancel_callback(struct ldlm_lock *lock)
1008 {
1009         l_lock(&lock->l_resource->lr_namespace->ns_lock);
1010         if (!(lock->l_flags & LDLM_FL_CANCEL)) {
1011                 lock->l_flags |= LDLM_FL_CANCEL;
1012                 if (lock->l_blocking_ast) {
1013                         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
1014                         // l_check_no_ns_lock(lock->l_resource->lr_namespace);
1015                         lock->l_blocking_ast(lock, NULL, lock->l_ast_data,
1016                                              LDLM_CB_CANCELING);
1017                         return;
1018                 } else {
1019                         LDLM_DEBUG(lock, "no blocking ast");
1020                 }
1021         }
1022         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
1023 }
1024
1025 void ldlm_lock_cancel(struct ldlm_lock *lock)
1026 {
1027         struct ldlm_resource *res;
1028         struct ldlm_namespace *ns;
1029         ENTRY;
1030
1031         /* There's no race between calling this and taking the ns lock below;
1032          * a lock can only be put on the waiting list once, because it can only
1033          * issue a blocking AST once. */
1034         ldlm_del_waiting_lock(lock);
1035
1036         res = lock->l_resource;
1037         ns = res->lr_namespace;
1038
1039         l_lock(&ns->ns_lock);
1040         /* Please do not, no matter how tempting, remove this LBUG without
1041          * talking to me first. -phik */
1042         if (lock->l_readers || lock->l_writers) {
1043                 LDLM_DEBUG(lock, "lock still has references");
1044                 ldlm_lock_dump(D_OTHER, lock, 0);
1045                 LBUG();
1046         }
1047
1048         ldlm_cancel_callback(lock);
1049
1050         ldlm_resource_unlink_lock(lock);
1051         ldlm_lock_destroy(lock);
1052         l_unlock(&ns->ns_lock);
1053         EXIT;
1054 }
1055
1056 int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
1057 {
1058         struct ldlm_lock *lock = ldlm_handle2lock(lockh);
1059         ENTRY;
1060
1061         if (lock == NULL)
1062                 RETURN(-EINVAL);
1063
1064         lock->l_ast_data = data;
1065         LDLM_LOCK_PUT(lock);
1066         RETURN(0);
1067 }
1068
1069 void ldlm_cancel_locks_for_export(struct obd_export *exp)
1070 {
1071         struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
1072         struct ldlm_lock *lock;
1073         struct ldlm_resource *res;
1074
1075         l_lock(&ns->ns_lock);
1076         while(!list_empty(&exp->exp_ldlm_data.led_held_locks)) { 
1077                 lock = list_entry(exp->exp_ldlm_data.led_held_locks.next,
1078                                   struct ldlm_lock, l_export_chain);
1079                 res = ldlm_resource_getref(lock->l_resource);
1080                 LDLM_DEBUG(lock, "export %p", exp);
1081                 ldlm_lock_cancel(lock);
1082                 l_unlock(&ns->ns_lock);
1083                 ldlm_reprocess_all(res);
1084                 ldlm_resource_putref(res);
1085                 l_lock(&ns->ns_lock);
1086         }
1087         l_unlock(&ns->ns_lock);
1088 }
1089
1090 struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
1091                                         int *flags)
1092 {
1093         struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
1094         struct ldlm_resource *res;
1095         struct ldlm_namespace *ns;
1096         int granted = 0;
1097         ENTRY;
1098
1099         LBUG();
1100
1101         res = lock->l_resource;
1102         ns = res->lr_namespace;
1103
1104         l_lock(&ns->ns_lock);
1105
1106         lock->l_req_mode = new_mode;
1107         ldlm_resource_unlink_lock(lock);
1108
1109         /* If this is a local resource, put it on the appropriate list. */
1110         if (res->lr_namespace->ns_client) {
1111                 if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED)) {
1112                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
1113                 } else {
1114                         /* This should never happen, because of the way the
1115                          * server handles conversions. */
1116                         LBUG();
1117
1118                         res->lr_tmp = &rpc_list;
1119                         ldlm_grant_lock(lock, NULL, 0, 0);
1120                         res->lr_tmp = NULL;
1121                         granted = 1;
1122                         /* FIXME: completion handling not with ns_lock held ! */
1123                         if (lock->l_completion_ast)
1124                                 lock->l_completion_ast(lock, 0, NULL);
1125                 }
1126         } else {
1127                 /* FIXME: We should try the conversion right away and possibly
1128                  * return success without the need for an extra AST */
1129                 ldlm_resource_add_lock(res, &res->lr_converting, lock);
1130                 *flags |= LDLM_FL_BLOCK_CONV;
1131         }
1132
1133         l_unlock(&ns->ns_lock);
1134
1135         if (granted)
1136                 ldlm_run_ast_work(ns, &rpc_list);
1137         RETURN(res);
1138 }
1139
1140 void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos)
1141 {
1142         char str[PTL_NALFMT_SIZE];
1143         struct obd_device *obd = NULL;
1144
1145         if (!((portal_debug | D_ERROR) & level))
1146                 return;
1147
1148         if (!lock) {
1149                 CDEBUG(level, "  NULL LDLM lock\n");
1150                 return;
1151         }
1152
1153         CDEBUG(level, "  -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d)\n",
1154                lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
1155                pos);
1156         if (lock->l_conn_export != NULL)
1157                 obd = lock->l_conn_export->exp_obd;
1158         if (lock->l_export && lock->l_export->exp_connection) {
1159                 CDEBUG(level, "  Node: NID "LPX64" (%s) on %s (rhandle: "LPX64")\n",
1160                        lock->l_export->exp_connection->c_peer.peer_nid,
1161                        portals_nid2str(lock->l_export->exp_connection->c_peer.peer_ni->pni_number,
1162                                        lock->l_export->exp_connection->c_peer.peer_nid, str),
1163                        lock->l_export->exp_connection->c_peer.peer_ni->pni_name,
1164                        lock->l_remote_handle.cookie);
1165         } else if (obd == NULL) {
1166                 CDEBUG(level, "  Node: local\n");
1167         } else {
1168                 struct obd_import *imp = obd->u.cli.cl_import;
1169                 CDEBUG(level, "  Node: NID "LPX64" (%s) on %s (rhandle: "LPX64")\n",
1170                        imp->imp_connection->c_peer.peer_nid,
1171                        portals_nid2str(imp->imp_connection->c_peer.peer_ni->pni_number,
1172                                        imp->imp_connection->c_peer.peer_nid, str),
1173                        imp->imp_connection->c_peer.peer_ni->pni_name,
1174                        lock->l_remote_handle.cookie);
1175         }
1176         CDEBUG(level, "  Resource: %p ("LPU64"/"LPU64")\n", lock->l_resource,
1177                lock->l_resource->lr_name.name[0],
1178                lock->l_resource->lr_name.name[1]);
1179         CDEBUG(level, "  Req mode: %d, grant mode: %d, rc: %u, read: %d, "
1180                "write: %d\n", (int)lock->l_req_mode, (int)lock->l_granted_mode,
1181                atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers);
1182         if (lock->l_resource->lr_type == LDLM_EXTENT)
1183                 CDEBUG(level, "  Extent: "LPU64" -> "LPU64
1184                        " (req "LPU64"-"LPU64")\n",
1185                        lock->l_policy_data.l_extent.start,
1186                        lock->l_policy_data.l_extent.end,
1187                        lock->l_req_extent.start, lock->l_req_extent.end);
1188         else if (lock->l_resource->lr_type == LDLM_FLOCK)
1189                 CDEBUG(level, "  Pid: "LPU64" Extent: "LPU64" -> "LPU64"\n",
1190                        lock->l_policy_data.l_flock.pid,
1191                        lock->l_policy_data.l_flock.start,
1192                        lock->l_policy_data.l_flock.end);
1193 }
1194
1195 void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
1196 {
1197         struct ldlm_lock *lock;
1198
1199         lock = ldlm_handle2lock(lockh);
1200         if (lock == NULL)
1201                 return;
1202
1203         ldlm_lock_dump(D_OTHER, lock, 0);
1204
1205         LDLM_LOCK_PUT(lock);
1206 }