4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2010, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ldlm/ldlm_lockd.c
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
42 #define DEBUG_SUBSYSTEM S_LDLM
45 # include <libcfs/libcfs.h>
47 # include <liblustre.h>
50 #include <lustre_dlm.h>
51 #include <obd_class.h>
52 #include <libcfs/list.h>
53 #include "ldlm_internal.h"
55 static int ldlm_num_threads;
56 CFS_MODULE_PARM(ldlm_num_threads, "i", int, 0444,
57 "number of DLM service threads to start");
59 static char *ldlm_cpts;
60 CFS_MODULE_PARM(ldlm_cpts, "s", charp, 0444,
61 "CPU partitions ldlm threads should run on");
63 extern cfs_mem_cache_t *ldlm_resource_slab;
64 extern cfs_mem_cache_t *ldlm_lock_slab;
65 static struct mutex ldlm_ref_mutex;
66 static int ldlm_refcount;
68 struct ldlm_cb_async_args {
69 struct ldlm_cb_set_arg *ca_set_arg;
70 struct ldlm_lock *ca_lock;
75 static struct ldlm_state *ldlm_state;
77 inline cfs_time_t round_timeout(cfs_time_t timeout)
79 return cfs_time_seconds((int)cfs_duration_sec(cfs_time_sub(timeout, 0)) + 1);
82 /* timeout for initial callback (AST) reply (bz10399) */
83 static inline unsigned int ldlm_get_rq_timeout(void)
86 unsigned int timeout = min(ldlm_timeout, obd_timeout / 3);
88 return timeout < 1 ? 1 : timeout;
93 #define ELT_TERMINATE 2
99 * blp_prio_list is used for callbacks that should be handled
100 * as a priority. It is used for LDLM_FL_DISCARD_DATA requests.
103 cfs_list_t blp_prio_list;
106 * blp_list is used for all other callbacks which are likely
107 * to take longer to process.
111 cfs_waitq_t blp_waitq;
112 struct completion blp_comp;
113 cfs_atomic_t blp_num_threads;
114 cfs_atomic_t blp_busy_threads;
119 struct ldlm_bl_work_item {
120 cfs_list_t blwi_entry;
121 struct ldlm_namespace *blwi_ns;
122 struct ldlm_lock_desc blwi_ld;
123 struct ldlm_lock *blwi_lock;
124 cfs_list_t blwi_head;
126 struct completion blwi_comp;
127 ldlm_cancel_flags_t blwi_flags;
128 int blwi_mem_pressure;
131 #if defined(HAVE_SERVER_SUPPORT) && defined(__KERNEL__)
134 * Protects both waiting_locks_list and expired_lock_thread.
136 static spinlock_t waiting_locks_spinlock; /* BH lock (timer) */
139 * List for contended locks.
141 * As soon as a lock is contended, it gets placed on this list and
142 * expected time to get a response is filled in the lock. A special
143 * thread walks the list looking for locks that should be released and
144 * schedules client evictions for those that have not been released in
147 * All access to it should be under waiting_locks_spinlock.
149 static cfs_list_t waiting_locks_list;
150 static cfs_timer_t waiting_locks_timer;
152 static struct expired_lock_thread {
153 cfs_waitq_t elt_waitq;
156 cfs_list_t elt_expired_locks;
157 } expired_lock_thread;
159 static inline int have_expired_locks(void)
164 spin_lock_bh(&waiting_locks_spinlock);
165 need_to_run = !cfs_list_empty(&expired_lock_thread.elt_expired_locks);
166 spin_unlock_bh(&waiting_locks_spinlock);
172 * Check expired lock list for expired locks and time them out.
174 static int expired_lock_main(void *arg)
176 cfs_list_t *expired = &expired_lock_thread.elt_expired_locks;
177 struct l_wait_info lwi = { 0 };
182 expired_lock_thread.elt_state = ELT_READY;
183 cfs_waitq_signal(&expired_lock_thread.elt_waitq);
186 l_wait_event(expired_lock_thread.elt_waitq,
187 have_expired_locks() ||
188 expired_lock_thread.elt_state == ELT_TERMINATE,
191 spin_lock_bh(&waiting_locks_spinlock);
192 if (expired_lock_thread.elt_dump) {
193 struct libcfs_debug_msg_data msgdata = {
194 .msg_file = __FILE__,
195 .msg_fn = "waiting_locks_callback",
196 .msg_line = expired_lock_thread.elt_dump };
197 spin_unlock_bh(&waiting_locks_spinlock);
199 /* from waiting_locks_callback, but not in timer */
200 libcfs_debug_dumplog();
201 libcfs_run_lbug_upcall(&msgdata);
203 spin_lock_bh(&waiting_locks_spinlock);
204 expired_lock_thread.elt_dump = 0;
209 while (!cfs_list_empty(expired)) {
210 struct obd_export *export;
211 struct ldlm_lock *lock;
213 lock = cfs_list_entry(expired->next, struct ldlm_lock,
215 if ((void *)lock < LP_POISON + CFS_PAGE_SIZE &&
216 (void *)lock >= LP_POISON) {
217 spin_unlock_bh(&waiting_locks_spinlock);
218 CERROR("free lock on elt list %p\n", lock);
221 cfs_list_del_init(&lock->l_pending_chain);
222 if ((void *)lock->l_export < LP_POISON + CFS_PAGE_SIZE &&
223 (void *)lock->l_export >= LP_POISON) {
224 CERROR("lock with free export on elt list %p\n",
226 lock->l_export = NULL;
227 LDLM_ERROR(lock, "free export");
228 /* release extra ref grabbed by
229 * ldlm_add_waiting_lock() or
230 * ldlm_failed_ast() */
231 LDLM_LOCK_RELEASE(lock);
235 if (lock->l_flags & LDLM_FL_DESTROYED) {
236 /* release the lock refcount where
237 * waiting_locks_callback() founds */
238 LDLM_LOCK_RELEASE(lock);
241 export = class_export_lock_get(lock->l_export, lock);
242 spin_unlock_bh(&waiting_locks_spinlock);
245 class_fail_export(export);
246 class_export_lock_put(export, lock);
248 /* release extra ref grabbed by ldlm_add_waiting_lock()
249 * or ldlm_failed_ast() */
250 LDLM_LOCK_RELEASE(lock);
252 spin_lock_bh(&waiting_locks_spinlock);
254 spin_unlock_bh(&waiting_locks_spinlock);
256 if (do_dump && obd_dump_on_eviction) {
257 CERROR("dump the log upon eviction\n");
258 libcfs_debug_dumplog();
261 if (expired_lock_thread.elt_state == ELT_TERMINATE)
265 expired_lock_thread.elt_state = ELT_STOPPED;
266 cfs_waitq_signal(&expired_lock_thread.elt_waitq);
270 static int ldlm_add_waiting_lock(struct ldlm_lock *lock);
271 static int __ldlm_add_waiting_lock(struct ldlm_lock *lock, int seconds);
274 * Check if there is a request in the export request list
275 * which prevents the lock canceling.
277 static int ldlm_lock_busy(struct ldlm_lock *lock)
279 struct ptlrpc_request *req;
283 if (lock->l_export == NULL)
286 spin_lock_bh(&lock->l_export->exp_rpc_lock);
287 cfs_list_for_each_entry(req, &lock->l_export->exp_hp_rpcs,
289 if (req->rq_ops->hpreq_lock_match) {
290 match = req->rq_ops->hpreq_lock_match(req, lock);
295 spin_unlock_bh(&lock->l_export->exp_rpc_lock);
299 /* This is called from within a timer interrupt and cannot schedule */
300 static void waiting_locks_callback(unsigned long unused)
302 struct ldlm_lock *lock;
305 spin_lock_bh(&waiting_locks_spinlock);
306 while (!cfs_list_empty(&waiting_locks_list)) {
307 lock = cfs_list_entry(waiting_locks_list.next, struct ldlm_lock,
309 if (cfs_time_after(lock->l_callback_timeout,
310 cfs_time_current()) ||
311 (lock->l_req_mode == LCK_GROUP))
314 if (ptlrpc_check_suspend()) {
315 /* there is a case when we talk to one mds, holding
316 * lock from another mds. this way we easily can get
317 * here, if second mds is being recovered. so, we
318 * suspend timeouts. bug 6019 */
320 LDLM_ERROR(lock, "recharge timeout: %s@%s nid %s ",
321 lock->l_export->exp_client_uuid.uuid,
322 lock->l_export->exp_connection->c_remote_uuid.uuid,
323 libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid));
325 cfs_list_del_init(&lock->l_pending_chain);
326 if (lock->l_flags & LDLM_FL_DESTROYED) {
327 /* relay the lock refcount decrease to
328 * expired lock thread */
329 cfs_list_add(&lock->l_pending_chain,
330 &expired_lock_thread.elt_expired_locks);
332 __ldlm_add_waiting_lock(lock,
333 ldlm_get_enq_timeout(lock));
338 /* if timeout overlaps the activation time of suspended timeouts
339 * then extend it to give a chance for client to reconnect */
340 if (cfs_time_before(cfs_time_sub(lock->l_callback_timeout,
341 cfs_time_seconds(obd_timeout)/2),
342 ptlrpc_suspend_wakeup_time())) {
343 LDLM_ERROR(lock, "extend timeout due to recovery: %s@%s nid %s ",
344 lock->l_export->exp_client_uuid.uuid,
345 lock->l_export->exp_connection->c_remote_uuid.uuid,
346 libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid));
348 cfs_list_del_init(&lock->l_pending_chain);
349 if (lock->l_flags & LDLM_FL_DESTROYED) {
350 /* relay the lock refcount decrease to
351 * expired lock thread */
352 cfs_list_add(&lock->l_pending_chain,
353 &expired_lock_thread.elt_expired_locks);
355 __ldlm_add_waiting_lock(lock,
356 ldlm_get_enq_timeout(lock));
361 /* Check if we need to prolong timeout */
362 if (!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT) &&
363 ldlm_lock_busy(lock)) {
366 if (lock->l_pending_chain.next == &waiting_locks_list)
371 spin_unlock_bh(&waiting_locks_spinlock);
372 LDLM_DEBUG(lock, "prolong the busy lock");
373 ldlm_refresh_waiting_lock(lock,
374 ldlm_get_enq_timeout(lock));
375 spin_lock_bh(&waiting_locks_spinlock);
378 LDLM_LOCK_RELEASE(lock);
382 LDLM_LOCK_RELEASE(lock);
385 ldlm_lock_to_ns(lock)->ns_timeouts++;
386 LDLM_ERROR(lock, "lock callback timer expired after %lds: "
387 "evicting client at %s ",
388 cfs_time_current_sec()- lock->l_last_activity,
390 lock->l_export->exp_connection->c_peer.nid));
392 /* no needs to take an extra ref on the lock since it was in
393 * the waiting_locks_list and ldlm_add_waiting_lock()
394 * already grabbed a ref */
395 cfs_list_del(&lock->l_pending_chain);
396 cfs_list_add(&lock->l_pending_chain,
397 &expired_lock_thread.elt_expired_locks);
401 if (!cfs_list_empty(&expired_lock_thread.elt_expired_locks)) {
402 if (obd_dump_on_timeout && need_dump)
403 expired_lock_thread.elt_dump = __LINE__;
405 cfs_waitq_signal(&expired_lock_thread.elt_waitq);
409 * Make sure the timer will fire again if we have any locks
412 if (!cfs_list_empty(&waiting_locks_list)) {
413 cfs_time_t timeout_rounded;
414 lock = cfs_list_entry(waiting_locks_list.next, struct ldlm_lock,
416 timeout_rounded = (cfs_time_t)round_timeout(lock->l_callback_timeout);
417 cfs_timer_arm(&waiting_locks_timer, timeout_rounded);
419 spin_unlock_bh(&waiting_locks_spinlock);
423 * Add lock to the list of contended locks.
425 * Indicate that we're waiting for a client to call us back cancelling a given
426 * lock. We add it to the pending-callback chain, and schedule the lock-timeout
427 * timer to fire appropriately. (We round up to the next second, to avoid
428 * floods of timer firings during periods of high lock contention and traffic).
429 * As done by ldlm_add_waiting_lock(), the caller must grab a lock reference
430 * if it has been added to the waiting list (1 is returned).
432 * Called with the namespace lock held.
434 static int __ldlm_add_waiting_lock(struct ldlm_lock *lock, int seconds)
437 cfs_time_t timeout_rounded;
439 if (!cfs_list_empty(&lock->l_pending_chain))
442 if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT) ||
443 OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
446 timeout = cfs_time_shift(seconds);
447 if (likely(cfs_time_after(timeout, lock->l_callback_timeout)))
448 lock->l_callback_timeout = timeout;
450 timeout_rounded = round_timeout(lock->l_callback_timeout);
452 if (cfs_time_before(timeout_rounded,
453 cfs_timer_deadline(&waiting_locks_timer)) ||
454 !cfs_timer_is_armed(&waiting_locks_timer)) {
455 cfs_timer_arm(&waiting_locks_timer, timeout_rounded);
457 /* if the new lock has a shorter timeout than something earlier on
458 the list, we'll wait the longer amount of time; no big deal. */
460 cfs_list_add_tail(&lock->l_pending_chain, &waiting_locks_list);
464 static int ldlm_add_waiting_lock(struct ldlm_lock *lock)
467 int timeout = ldlm_get_enq_timeout(lock);
469 /* NB: must be called with hold of lock_res_and_lock() */
470 LASSERT(lock->l_flags & LDLM_FL_RES_LOCKED);
471 lock->l_flags |= LDLM_FL_WAITED;
473 LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK));
475 spin_lock_bh(&waiting_locks_spinlock);
476 if (lock->l_flags & LDLM_FL_DESTROYED) {
477 static cfs_time_t next;
478 spin_unlock_bh(&waiting_locks_spinlock);
479 LDLM_ERROR(lock, "not waiting on destroyed lock (bug 5653)");
480 if (cfs_time_after(cfs_time_current(), next)) {
481 next = cfs_time_shift(14400);
482 libcfs_debug_dumpstack(NULL);
487 ret = __ldlm_add_waiting_lock(lock, timeout);
489 /* grab ref on the lock if it has been added to the
493 spin_unlock_bh(&waiting_locks_spinlock);
496 spin_lock_bh(&lock->l_export->exp_bl_list_lock);
497 if (cfs_list_empty(&lock->l_exp_list))
498 cfs_list_add(&lock->l_exp_list,
499 &lock->l_export->exp_bl_list);
500 spin_unlock_bh(&lock->l_export->exp_bl_list_lock);
503 LDLM_DEBUG(lock, "%sadding to wait list(timeout: %d, AT: %s)",
504 ret == 0 ? "not re-" : "", timeout,
505 AT_OFF ? "off" : "on");
510 * Remove a lock from the pending list, likely because it had its cancellation
511 * callback arrive without incident. This adjusts the lock-timeout timer if
512 * needed. Returns 0 if the lock wasn't pending after all, 1 if it was.
513 * As done by ldlm_del_waiting_lock(), the caller must release the lock
514 * reference when the lock is removed from any list (1 is returned).
516 * Called with namespace lock held.
518 static int __ldlm_del_waiting_lock(struct ldlm_lock *lock)
520 cfs_list_t *list_next;
522 if (cfs_list_empty(&lock->l_pending_chain))
525 list_next = lock->l_pending_chain.next;
526 if (lock->l_pending_chain.prev == &waiting_locks_list) {
527 /* Removing the head of the list, adjust timer. */
528 if (list_next == &waiting_locks_list) {
529 /* No more, just cancel. */
530 cfs_timer_disarm(&waiting_locks_timer);
532 struct ldlm_lock *next;
533 next = cfs_list_entry(list_next, struct ldlm_lock,
535 cfs_timer_arm(&waiting_locks_timer,
536 round_timeout(next->l_callback_timeout));
539 cfs_list_del_init(&lock->l_pending_chain);
544 int ldlm_del_waiting_lock(struct ldlm_lock *lock)
548 if (lock->l_export == NULL) {
549 /* We don't have a "waiting locks list" on clients. */
550 CDEBUG(D_DLMTRACE, "Client lock %p : no-op\n", lock);
554 spin_lock_bh(&waiting_locks_spinlock);
555 ret = __ldlm_del_waiting_lock(lock);
556 spin_unlock_bh(&waiting_locks_spinlock);
558 /* remove the lock out of export blocking list */
559 spin_lock_bh(&lock->l_export->exp_bl_list_lock);
560 cfs_list_del_init(&lock->l_exp_list);
561 spin_unlock_bh(&lock->l_export->exp_bl_list_lock);
564 /* release lock ref if it has indeed been removed
566 LDLM_LOCK_RELEASE(lock);
569 LDLM_DEBUG(lock, "%s", ret == 0 ? "wasn't waiting" : "removed");
572 EXPORT_SYMBOL(ldlm_del_waiting_lock);
575 * Prolong the contended lock waiting time.
577 * Called with namespace lock held.
579 int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, int timeout)
581 if (lock->l_export == NULL) {
582 /* We don't have a "waiting locks list" on clients. */
583 LDLM_DEBUG(lock, "client lock: no-op");
587 spin_lock_bh(&waiting_locks_spinlock);
589 if (cfs_list_empty(&lock->l_pending_chain)) {
590 spin_unlock_bh(&waiting_locks_spinlock);
591 LDLM_DEBUG(lock, "wasn't waiting");
595 /* we remove/add the lock to the waiting list, so no needs to
596 * release/take a lock reference */
597 __ldlm_del_waiting_lock(lock);
598 __ldlm_add_waiting_lock(lock, timeout);
599 spin_unlock_bh(&waiting_locks_spinlock);
601 LDLM_DEBUG(lock, "refreshed");
604 EXPORT_SYMBOL(ldlm_refresh_waiting_lock);
606 #else /* !HAVE_SERVER_SUPPORT || !__KERNEL__ */
608 int ldlm_del_waiting_lock(struct ldlm_lock *lock)
613 int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, int timeout)
618 # ifdef HAVE_SERVER_SUPPORT
619 static int ldlm_add_waiting_lock(struct ldlm_lock *lock)
621 LASSERT((lock->l_flags & (LDLM_FL_RES_LOCKED|LDLM_FL_CANCEL_ON_BLOCK))
622 == LDLM_FL_RES_LOCKED);
627 #endif /* HAVE_SERVER_SUPPORT && __KERNEL__ */
629 #ifdef HAVE_SERVER_SUPPORT
632 * Perform lock cleanup if AST sending failed.
634 static void ldlm_failed_ast(struct ldlm_lock *lock, int rc,
635 const char *ast_type)
637 LCONSOLE_ERROR_MSG(0x138, "%s: A client on nid %s was evicted due "
638 "to a lock %s callback time out: rc %d\n",
639 lock->l_export->exp_obd->obd_name,
640 obd_export_nid2str(lock->l_export), ast_type, rc);
642 if (obd_dump_on_timeout)
643 libcfs_debug_dumplog();
645 spin_lock_bh(&waiting_locks_spinlock);
646 if (__ldlm_del_waiting_lock(lock) == 0)
647 /* the lock was not in any list, grab an extra ref before adding
648 * the lock to the expired list */
650 cfs_list_add(&lock->l_pending_chain,
651 &expired_lock_thread.elt_expired_locks);
652 cfs_waitq_signal(&expired_lock_thread.elt_waitq);
653 spin_unlock_bh(&waiting_locks_spinlock);
655 class_fail_export(lock->l_export);
660 * Perform lock cleanup if AST reply came with error.
662 static int ldlm_handle_ast_error(struct ldlm_lock *lock,
663 struct ptlrpc_request *req, int rc,
664 const char *ast_type)
666 lnet_process_id_t peer = req->rq_import->imp_connection->c_peer;
668 if (rc == -ETIMEDOUT || rc == -EINTR || rc == -ENOTCONN) {
669 LASSERT(lock->l_export);
670 if (lock->l_export->exp_libclient) {
671 LDLM_DEBUG(lock, "%s AST to liblustre client (nid %s)"
672 " timeout, just cancelling lock", ast_type,
673 libcfs_nid2str(peer.nid));
674 ldlm_lock_cancel(lock);
676 } else if (lock->l_flags & LDLM_FL_CANCEL) {
677 LDLM_DEBUG(lock, "%s AST timeout from nid %s, but "
678 "cancel was received (AST reply lost?)",
679 ast_type, libcfs_nid2str(peer.nid));
680 ldlm_lock_cancel(lock);
683 ldlm_del_waiting_lock(lock);
684 ldlm_failed_ast(lock, rc, ast_type);
688 struct ldlm_resource *res = lock->l_resource;
689 LDLM_DEBUG(lock, "client (nid %s) returned %d"
690 " from %s AST - normal race",
691 libcfs_nid2str(peer.nid),
693 lustre_msg_get_status(req->rq_repmsg) : -1,
696 /* update lvbo to return proper attributes.
698 ldlm_resource_getref(res);
699 ldlm_res_lvbo_update(res, NULL, 1);
700 ldlm_resource_putref(res);
704 LDLM_ERROR(lock, "client (nid %s) returned %d "
705 "from %s AST", libcfs_nid2str(peer.nid),
706 (req->rq_repmsg != NULL) ?
707 lustre_msg_get_status(req->rq_repmsg) : 0,
710 ldlm_lock_cancel(lock);
711 /* Server-side AST functions are called from ldlm_reprocess_all,
712 * which needs to be told to please restart its reprocessing. */
719 static int ldlm_cb_interpret(const struct lu_env *env,
720 struct ptlrpc_request *req, void *data, int rc)
722 struct ldlm_cb_async_args *ca = data;
723 struct ldlm_lock *lock = ca->ca_lock;
724 struct ldlm_cb_set_arg *arg = ca->ca_set_arg;
727 LASSERT(lock != NULL);
730 case LDLM_GL_CALLBACK:
731 /* Update the LVB from disk if the AST failed
732 * (this is a legal race)
734 * - Glimpse callback of local lock just returns
735 * -ELDLM_NO_LOCK_DATA.
736 * - Glimpse callback of remote lock might return
737 * -ELDLM_NO_LOCK_DATA when inode is cleared. LU-274
739 if (rc == -ELDLM_NO_LOCK_DATA) {
740 LDLM_DEBUG(lock, "lost race - client has a lock but no "
742 ldlm_res_lvbo_update(lock->l_resource, NULL, 1);
743 } else if (rc != 0) {
744 rc = ldlm_handle_ast_error(lock, req, rc, "glimpse");
746 rc = ldlm_res_lvbo_update(lock->l_resource, req, 1);
749 case LDLM_BL_CALLBACK:
751 rc = ldlm_handle_ast_error(lock, req, rc, "blocking");
753 case LDLM_CP_CALLBACK:
755 rc = ldlm_handle_ast_error(lock, req, rc, "completion");
758 LDLM_ERROR(lock, "invalid opcode for lock callback %d",
763 /* release extra reference taken in ldlm_ast_fini() */
764 LDLM_LOCK_RELEASE(lock);
767 cfs_atomic_inc(&arg->restart);
772 static inline int ldlm_ast_fini(struct ptlrpc_request *req,
773 struct ldlm_cb_set_arg *arg,
774 struct ldlm_lock *lock,
780 if (unlikely(instant_cancel)) {
781 rc = ptl_send_rpc(req, 1);
782 ptlrpc_req_finished(req);
784 cfs_atomic_inc(&arg->restart);
787 ptlrpc_set_add_req(arg->set, req);
794 * Check if there are requests in the export request list which prevent
795 * the lock canceling and make these requests high priority ones.
797 static void ldlm_lock_reorder_req(struct ldlm_lock *lock)
799 struct ptlrpc_request *req;
802 if (lock->l_export == NULL) {
803 LDLM_DEBUG(lock, "client lock: no-op");
807 spin_lock_bh(&lock->l_export->exp_rpc_lock);
808 cfs_list_for_each_entry(req, &lock->l_export->exp_hp_rpcs,
810 /* Do not process requests that were not yet added to there
811 * incoming queue or were already removed from there for
812 * processing. We evaluate ptlrpc_nrs_req_can_move() without
813 * holding svcpt->scp_req_lock, and then redo the check with
814 * the lock held once we need to obtain a reliable result.
816 if (ptlrpc_nrs_req_can_move(req) &&
817 req->rq_ops->hpreq_lock_match &&
818 req->rq_ops->hpreq_lock_match(req, lock))
819 ptlrpc_nrs_req_hp_move(req);
821 spin_unlock_bh(&lock->l_export->exp_rpc_lock);
826 * ->l_blocking_ast() method for server-side locks. This is invoked when newly
827 * enqueued server lock conflicts with given one.
829 * Sends blocking AST RPC to the client owning that lock; arms timeout timer
830 * to wait for client response.
832 int ldlm_server_blocking_ast(struct ldlm_lock *lock,
833 struct ldlm_lock_desc *desc,
834 void *data, int flag)
836 struct ldlm_cb_async_args *ca;
837 struct ldlm_cb_set_arg *arg = data;
838 struct ldlm_request *body;
839 struct ptlrpc_request *req;
840 int instant_cancel = 0;
844 if (flag == LDLM_CB_CANCELING)
845 /* Don't need to do anything here. */
849 LASSERT(data != NULL);
850 if (lock->l_export->exp_obd->obd_recovering != 0)
851 LDLM_ERROR(lock, "BUG 6063: lock collide during recovery");
853 ldlm_lock_reorder_req(lock);
855 req = ptlrpc_request_alloc_pack(lock->l_export->exp_imp_reverse,
856 &RQF_LDLM_BL_CALLBACK,
857 LUSTRE_DLM_VERSION, LDLM_BL_CALLBACK);
861 CLASSERT(sizeof(*ca) <= sizeof(req->rq_async_args));
862 ca = ptlrpc_req_async_args(req);
863 ca->ca_set_arg = arg;
866 req->rq_interpret_reply = ldlm_cb_interpret;
867 req->rq_no_resend = 1;
869 lock_res_and_lock(lock);
870 if (lock->l_granted_mode != lock->l_req_mode) {
871 /* this blocking AST will be communicated as part of the
872 * completion AST instead */
873 unlock_res_and_lock(lock);
875 ptlrpc_req_finished(req);
876 LDLM_DEBUG(lock, "lock not granted, not sending blocking AST");
880 if (lock->l_flags & LDLM_FL_DESTROYED) {
881 /* What's the point? */
882 unlock_res_and_lock(lock);
883 ptlrpc_req_finished(req);
887 if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)
890 body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
891 body->lock_handle[0] = lock->l_remote_handle;
892 body->lock_desc = *desc;
893 body->lock_flags |= ldlm_flags_to_wire(lock->l_flags & LDLM_AST_FLAGS);
895 LDLM_DEBUG(lock, "server preparing blocking AST");
897 ptlrpc_request_set_replen(req);
898 if (instant_cancel) {
899 unlock_res_and_lock(lock);
900 ldlm_lock_cancel(lock);
902 LASSERT(lock->l_granted_mode == lock->l_req_mode);
903 ldlm_add_waiting_lock(lock);
904 unlock_res_and_lock(lock);
907 req->rq_send_state = LUSTRE_IMP_FULL;
908 /* ptlrpc_request_alloc_pack already set timeout */
910 req->rq_timeout = ldlm_get_rq_timeout();
912 if (lock->l_export && lock->l_export->exp_nid_stats &&
913 lock->l_export->exp_nid_stats->nid_ldlm_stats)
914 lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats,
915 LDLM_BL_CALLBACK - LDLM_FIRST_OPC);
917 rc = ldlm_ast_fini(req, arg, lock, instant_cancel);
921 EXPORT_SYMBOL(ldlm_server_blocking_ast);
924 * ->l_completion_ast callback for a remote lock in server namespace.
926 * Sends AST to the client notifying it of lock granting. If initial
927 * lock response was not sent yet, instead of sending another RPC, just
928 * mark the lock as granted and client will understand
930 int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
932 struct ldlm_cb_set_arg *arg = data;
933 struct ldlm_request *body;
934 struct ptlrpc_request *req;
935 struct ldlm_cb_async_args *ca;
936 long total_enqueue_wait;
937 int instant_cancel = 0;
942 LASSERT(lock != NULL);
943 LASSERT(data != NULL);
945 total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
946 lock->l_last_activity);
948 req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
949 &RQF_LDLM_CP_CALLBACK);
953 /* server namespace, doesn't need lock */
954 lvb_len = ldlm_lvbo_size(lock);
955 /* LU-3124 & LU-2187: to not return layout in completion AST because
956 * it may deadlock for LU-2187, or client may not have enough space
957 * for large layout. The layout will be returned to client with an
958 * extra RPC to fetch xattr.lov */
959 if (ldlm_has_layout(lock))
962 req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT, lvb_len);
963 rc = ptlrpc_request_pack(req, LUSTRE_DLM_VERSION, LDLM_CP_CALLBACK);
965 ptlrpc_request_free(req);
969 CLASSERT(sizeof(*ca) <= sizeof(req->rq_async_args));
970 ca = ptlrpc_req_async_args(req);
971 ca->ca_set_arg = arg;
974 req->rq_interpret_reply = ldlm_cb_interpret;
975 req->rq_no_resend = 1;
976 body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
978 body->lock_handle[0] = lock->l_remote_handle;
979 body->lock_flags = ldlm_flags_to_wire(flags);
980 ldlm_lock2desc(lock, &body->lock_desc);
982 void *lvb = req_capsule_client_get(&req->rq_pill, &RMF_DLM_LVB);
984 lvb_len = ldlm_lvbo_fill(lock, lvb, lvb_len);
986 /* We still need to send the RPC to wake up the blocked
987 * enqueue thread on the client.
989 * Consider old client, there is no better way to notify
990 * the failure, just zero-sized the LVB, then the client
991 * will fail out as "-EPROTO". */
992 req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB, 0,
996 req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB, lvb_len,
1001 LDLM_DEBUG(lock, "server preparing completion AST (after %lds wait)",
1002 total_enqueue_wait);
1004 /* Server-side enqueue wait time estimate, used in
1005 __ldlm_add_waiting_lock to set future enqueue timers */
1006 if (total_enqueue_wait < ldlm_get_enq_timeout(lock))
1007 at_measured(ldlm_lock_to_ns_at(lock),
1008 total_enqueue_wait);
1010 /* bz18618. Don't add lock enqueue time we spend waiting for a
1011 previous callback to fail. Locks waiting legitimately will
1012 get extended by ldlm_refresh_waiting_lock regardless of the
1013 estimate, so it's okay to underestimate here. */
1014 LDLM_DEBUG(lock, "lock completed after %lus; estimate was %ds. "
1015 "It is likely that a previous callback timed out.",
1017 at_get(ldlm_lock_to_ns_at(lock)));
1019 ptlrpc_request_set_replen(req);
1021 req->rq_send_state = LUSTRE_IMP_FULL;
1022 /* ptlrpc_request_pack already set timeout */
1024 req->rq_timeout = ldlm_get_rq_timeout();
1026 /* We only send real blocking ASTs after the lock is granted */
1027 lock_res_and_lock(lock);
1028 if (lock->l_flags & LDLM_FL_AST_SENT) {
1029 body->lock_flags |= ldlm_flags_to_wire(LDLM_FL_AST_SENT);
1030 /* Copy AST flags like LDLM_FL_DISCARD_DATA. */
1031 body->lock_flags |= ldlm_flags_to_wire(lock->l_flags &
1034 /* We might get here prior to ldlm_handle_enqueue setting
1035 * LDLM_FL_CANCEL_ON_BLOCK flag. Then we will put this lock
1036 * into waiting list, but this is safe and similar code in
1037 * ldlm_handle_enqueue will call ldlm_lock_cancel() still,
1038 * that would not only cancel the lock, but will also remove
1039 * it from waiting list */
1040 if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) {
1041 unlock_res_and_lock(lock);
1042 ldlm_lock_cancel(lock);
1044 lock_res_and_lock(lock);
1046 /* start the lock-timeout clock */
1047 ldlm_add_waiting_lock(lock);
1050 unlock_res_and_lock(lock);
1052 if (lock->l_export && lock->l_export->exp_nid_stats &&
1053 lock->l_export->exp_nid_stats->nid_ldlm_stats)
1054 lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats,
1055 LDLM_CP_CALLBACK - LDLM_FIRST_OPC);
1057 rc = ldlm_ast_fini(req, arg, lock, instant_cancel);
1059 RETURN(lvb_len < 0 ? lvb_len : rc);
1061 EXPORT_SYMBOL(ldlm_server_completion_ast);
1064 * Server side ->l_glimpse_ast handler for client locks.
1066 * Sends glimpse AST to the client and waits for reply. Then updates
1067 * lvbo with the result.
1069 int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
1071 struct ldlm_cb_set_arg *arg = data;
1072 struct ldlm_request *body;
1073 struct ptlrpc_request *req;
1074 struct ldlm_cb_async_args *ca;
1076 struct req_format *req_fmt;
1079 LASSERT(lock != NULL);
1081 if (arg->gl_desc != NULL)
1082 /* There is a glimpse descriptor to pack */
1083 req_fmt = &RQF_LDLM_GL_DESC_CALLBACK;
1085 req_fmt = &RQF_LDLM_GL_CALLBACK;
1087 req = ptlrpc_request_alloc_pack(lock->l_export->exp_imp_reverse,
1088 req_fmt, LUSTRE_DLM_VERSION,
1094 if (arg->gl_desc != NULL) {
1095 /* copy the GL descriptor */
1096 union ldlm_gl_desc *desc;
1097 desc = req_capsule_client_get(&req->rq_pill, &RMF_DLM_GL_DESC);
1098 *desc = *arg->gl_desc;
1101 body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
1102 body->lock_handle[0] = lock->l_remote_handle;
1103 ldlm_lock2desc(lock, &body->lock_desc);
1105 CLASSERT(sizeof(*ca) <= sizeof(req->rq_async_args));
1106 ca = ptlrpc_req_async_args(req);
1107 ca->ca_set_arg = arg;
1110 /* server namespace, doesn't need lock */
1111 req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
1112 ldlm_lvbo_size(lock));
1113 ptlrpc_request_set_replen(req);
1115 req->rq_send_state = LUSTRE_IMP_FULL;
1116 /* ptlrpc_request_alloc_pack already set timeout */
1118 req->rq_timeout = ldlm_get_rq_timeout();
1120 req->rq_interpret_reply = ldlm_cb_interpret;
1122 if (lock->l_export && lock->l_export->exp_nid_stats &&
1123 lock->l_export->exp_nid_stats->nid_ldlm_stats)
1124 lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats,
1125 LDLM_GL_CALLBACK - LDLM_FIRST_OPC);
1127 rc = ldlm_ast_fini(req, arg, lock, 0);
1131 EXPORT_SYMBOL(ldlm_server_glimpse_ast);
1133 int ldlm_glimpse_locks(struct ldlm_resource *res, cfs_list_t *gl_work_list)
1138 rc = ldlm_run_ast_work(ldlm_res_to_ns(res), gl_work_list,
1140 if (rc == -ERESTART)
1141 ldlm_reprocess_all(res);
1145 EXPORT_SYMBOL(ldlm_glimpse_locks);
1147 /* return LDLM lock associated with a lock callback request */
1148 struct ldlm_lock *ldlm_request_lock(struct ptlrpc_request *req)
1150 struct ldlm_cb_async_args *ca;
1151 struct ldlm_lock *lock;
1154 ca = ptlrpc_req_async_args(req);
1157 RETURN(ERR_PTR(-EFAULT));
1161 EXPORT_SYMBOL(ldlm_request_lock);
1163 static void ldlm_svc_get_eopc(const struct ldlm_request *dlm_req,
1164 struct lprocfs_stats *srv_stats)
1166 int lock_type = 0, op = 0;
1168 lock_type = dlm_req->lock_desc.l_resource.lr_type;
1170 switch (lock_type) {
1172 op = PTLRPC_LAST_CNTR + LDLM_PLAIN_ENQUEUE;
1175 if (dlm_req->lock_flags & LDLM_FL_HAS_INTENT)
1176 op = PTLRPC_LAST_CNTR + LDLM_GLIMPSE_ENQUEUE;
1178 op = PTLRPC_LAST_CNTR + LDLM_EXTENT_ENQUEUE;
1181 op = PTLRPC_LAST_CNTR + LDLM_FLOCK_ENQUEUE;
1184 op = PTLRPC_LAST_CNTR + LDLM_IBITS_ENQUEUE;
1192 lprocfs_counter_incr(srv_stats, op);
1198 * Main server-side entry point into LDLM for enqueue. This is called by ptlrpc
1199 * service threads to carry out client lock enqueueing requests.
1201 int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
1202 struct ptlrpc_request *req,
1203 const struct ldlm_request *dlm_req,
1204 const struct ldlm_callback_suite *cbs)
1206 struct ldlm_reply *dlm_rep;
1208 ldlm_error_t err = ELDLM_OK;
1209 struct ldlm_lock *lock = NULL;
1210 void *cookie = NULL;
1214 LDLM_DEBUG_NOLOCK("server-side enqueue handler START");
1216 ldlm_request_cancel(req, dlm_req, LDLM_ENQUEUE_CANCEL_OFF);
1217 flags = ldlm_flags_from_wire(dlm_req->lock_flags);
1219 LASSERT(req->rq_export);
1221 if (ptlrpc_req2svc(req)->srv_stats != NULL)
1222 ldlm_svc_get_eopc(dlm_req, ptlrpc_req2svc(req)->srv_stats);
1224 if (req->rq_export && req->rq_export->exp_nid_stats &&
1225 req->rq_export->exp_nid_stats->nid_ldlm_stats)
1226 lprocfs_counter_incr(req->rq_export->exp_nid_stats->nid_ldlm_stats,
1227 LDLM_ENQUEUE - LDLM_FIRST_OPC);
1229 if (unlikely(dlm_req->lock_desc.l_resource.lr_type < LDLM_MIN_TYPE ||
1230 dlm_req->lock_desc.l_resource.lr_type >= LDLM_MAX_TYPE)) {
1231 DEBUG_REQ(D_ERROR, req, "invalid lock request type %d",
1232 dlm_req->lock_desc.l_resource.lr_type);
1233 GOTO(out, rc = -EFAULT);
1236 if (unlikely(dlm_req->lock_desc.l_req_mode <= LCK_MINMODE ||
1237 dlm_req->lock_desc.l_req_mode >= LCK_MAXMODE ||
1238 dlm_req->lock_desc.l_req_mode &
1239 (dlm_req->lock_desc.l_req_mode-1))) {
1240 DEBUG_REQ(D_ERROR, req, "invalid lock request mode %d",
1241 dlm_req->lock_desc.l_req_mode);
1242 GOTO(out, rc = -EFAULT);
1245 if (exp_connect_flags(req->rq_export) & OBD_CONNECT_IBITS) {
1246 if (unlikely(dlm_req->lock_desc.l_resource.lr_type ==
1248 DEBUG_REQ(D_ERROR, req,
1249 "PLAIN lock request from IBITS client?");
1250 GOTO(out, rc = -EPROTO);
1252 } else if (unlikely(dlm_req->lock_desc.l_resource.lr_type ==
1254 DEBUG_REQ(D_ERROR, req,
1255 "IBITS lock request from unaware client?");
1256 GOTO(out, rc = -EPROTO);
1260 /* FIXME this makes it impossible to use LDLM_PLAIN locks -- check
1261 against server's _CONNECT_SUPPORTED flags? (I don't want to use
1262 ibits for mgc/mgs) */
1264 /* INODEBITS_INTEROP: Perform conversion from plain lock to
1265 * inodebits lock if client does not support them. */
1266 if (!(exp_connect_flags(req->rq_export) & OBD_CONNECT_IBITS) &&
1267 (dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN)) {
1268 dlm_req->lock_desc.l_resource.lr_type = LDLM_IBITS;
1269 dlm_req->lock_desc.l_policy_data.l_inodebits.bits =
1270 MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE;
1271 if (dlm_req->lock_desc.l_req_mode == LCK_PR)
1272 dlm_req->lock_desc.l_req_mode = LCK_CR;
1276 if (unlikely(flags & LDLM_FL_REPLAY)) {
1277 /* Find an existing lock in the per-export lock hash */
1278 /* In the function below, .hs_keycmp resolves to
1279 * ldlm_export_lock_keycmp() */
1280 /* coverity[overrun-buffer-val] */
1281 lock = cfs_hash_lookup(req->rq_export->exp_lock_hash,
1282 (void *)&dlm_req->lock_handle[0]);
1284 DEBUG_REQ(D_DLMTRACE, req, "found existing lock cookie "
1285 LPX64, lock->l_handle.h_cookie);
1286 GOTO(existing_lock, rc = 0);
1290 /* The lock's callback data might be set in the policy function */
1291 lock = ldlm_lock_create(ns, &dlm_req->lock_desc.l_resource.lr_name,
1292 dlm_req->lock_desc.l_resource.lr_type,
1293 dlm_req->lock_desc.l_req_mode,
1294 cbs, NULL, 0, LVB_T_NONE);
1296 GOTO(out, rc = -ENOMEM);
1298 lock->l_last_activity = cfs_time_current_sec();
1299 lock->l_remote_handle = dlm_req->lock_handle[0];
1300 LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
1302 OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_BLOCKED, obd_timeout * 2);
1303 /* Don't enqueue a lock onto the export if it is been disonnected
1304 * due to eviction (bug 3822) or server umount (bug 24324).
1305 * Cancel it now instead. */
1306 if (req->rq_export->exp_disconnected) {
1307 LDLM_ERROR(lock, "lock on disconnected export %p",
1309 GOTO(out, rc = -ENOTCONN);
1312 lock->l_export = class_export_lock_get(req->rq_export, lock);
1313 if (lock->l_export->exp_lock_hash)
1314 cfs_hash_add(lock->l_export->exp_lock_hash,
1315 &lock->l_remote_handle,
1320 if (flags & LDLM_FL_HAS_INTENT) {
1321 /* In this case, the reply buffer is allocated deep in
1322 * local_lock_enqueue by the policy function. */
1325 /* based on the assumption that lvb size never changes during
1326 * resource life time otherwise it need resource->lr_lock's
1328 req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB,
1329 RCL_SERVER, ldlm_lvbo_size(lock));
1331 if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR))
1332 GOTO(out, rc = -ENOMEM);
1334 rc = req_capsule_server_pack(&req->rq_pill);
1339 if (dlm_req->lock_desc.l_resource.lr_type != LDLM_PLAIN)
1340 ldlm_convert_policy_to_local(req->rq_export,
1341 dlm_req->lock_desc.l_resource.lr_type,
1342 &dlm_req->lock_desc.l_policy_data,
1343 &lock->l_policy_data);
1344 if (dlm_req->lock_desc.l_resource.lr_type == LDLM_EXTENT)
1345 lock->l_req_extent = lock->l_policy_data.l_extent;
1347 err = ldlm_lock_enqueue(ns, &lock, cookie, &flags);
1354 dlm_rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
1355 dlm_rep->lock_flags = ldlm_flags_to_wire(flags);
1357 ldlm_lock2desc(lock, &dlm_rep->lock_desc);
1358 ldlm_lock2handle(lock, &dlm_rep->lock_handle);
1360 /* We never send a blocking AST until the lock is granted, but
1361 * we can tell it right now */
1362 lock_res_and_lock(lock);
1364 /* Now take into account flags to be inherited from original lock
1365 request both in reply to client and in our own lock flags. */
1366 dlm_rep->lock_flags |= dlm_req->lock_flags & LDLM_INHERIT_FLAGS;
1367 lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags &
1368 LDLM_INHERIT_FLAGS);
1370 /* Don't move a pending lock onto the export if it has already been
1371 * disconnected due to eviction (bug 5683) or server umount (bug 24324).
1372 * Cancel it now instead. */
1373 if (unlikely(req->rq_export->exp_disconnected ||
1374 OBD_FAIL_CHECK(OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT))) {
1375 LDLM_ERROR(lock, "lock on destroyed export %p", req->rq_export);
1377 } else if (lock->l_flags & LDLM_FL_AST_SENT) {
1378 dlm_rep->lock_flags |= ldlm_flags_to_wire(LDLM_FL_AST_SENT);
1379 if (lock->l_granted_mode == lock->l_req_mode) {
1381 * Only cancel lock if it was granted, because it would
1382 * be destroyed immediately and would never be granted
1383 * in the future, causing timeouts on client. Not
1384 * granted lock will be cancelled immediately after
1385 * sending completion AST.
1387 if (dlm_rep->lock_flags & LDLM_FL_CANCEL_ON_BLOCK) {
1388 unlock_res_and_lock(lock);
1389 ldlm_lock_cancel(lock);
1390 lock_res_and_lock(lock);
1392 ldlm_add_waiting_lock(lock);
1395 /* Make sure we never ever grant usual metadata locks to liblustre
1397 if ((dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN ||
1398 dlm_req->lock_desc.l_resource.lr_type == LDLM_IBITS) &&
1399 req->rq_export->exp_libclient) {
1400 if (unlikely(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) ||
1401 !(dlm_rep->lock_flags & LDLM_FL_CANCEL_ON_BLOCK))){
1402 CERROR("Granting sync lock to libclient. "
1403 "req fl %d, rep fl %d, lock fl "LPX64"\n",
1404 dlm_req->lock_flags, dlm_rep->lock_flags,
1406 LDLM_ERROR(lock, "sync lock");
1407 if (dlm_req->lock_flags & LDLM_FL_HAS_INTENT) {
1408 struct ldlm_intent *it;
1410 it = req_capsule_client_get(&req->rq_pill,
1413 CERROR("This is intent %s ("LPU64")\n",
1414 ldlm_it2str(it->opc), it->opc);
1420 unlock_res_and_lock(lock);
1424 req->rq_status = rc ?: err; /* return either error - bug 11190 */
1425 if (!req->rq_packed_final) {
1426 err = lustre_pack_reply(req, 1, NULL, NULL);
1431 /* The LOCK_CHANGED code in ldlm_lock_enqueue depends on this
1432 * ldlm_reprocess_all. If this moves, revisit that code. -phil */
1434 LDLM_DEBUG(lock, "server-side enqueue handler, sending reply"
1435 "(err=%d, rc=%d)", err, rc);
1438 if (req_capsule_has_field(&req->rq_pill, &RMF_DLM_LVB,
1440 ldlm_lvbo_size(lock) > 0) {
1444 buf = req_capsule_server_get(&req->rq_pill,
1446 LASSERTF(buf != NULL, "req %p, lock %p\n",
1448 buflen = req_capsule_get_size(&req->rq_pill,
1449 &RMF_DLM_LVB, RCL_SERVER);
1450 buflen = ldlm_lvbo_fill(lock, buf, buflen);
1452 req_capsule_shrink(&req->rq_pill,
1454 buflen, RCL_SERVER);
1459 lock_res_and_lock(lock);
1460 ldlm_resource_unlink_lock(lock);
1461 ldlm_lock_destroy_nolock(lock);
1462 unlock_res_and_lock(lock);
1465 if (!err && dlm_req->lock_desc.l_resource.lr_type != LDLM_FLOCK)
1466 ldlm_reprocess_all(lock->l_resource);
1468 LDLM_LOCK_RELEASE(lock);
1471 LDLM_DEBUG_NOLOCK("server-side enqueue handler END (lock %p, rc %d)",
1476 EXPORT_SYMBOL(ldlm_handle_enqueue0);
1479 * Old-style LDLM main entry point for server code enqueue.
1481 int ldlm_handle_enqueue(struct ptlrpc_request *req,
1482 ldlm_completion_callback completion_callback,
1483 ldlm_blocking_callback blocking_callback,
1484 ldlm_glimpse_callback glimpse_callback)
1486 struct ldlm_request *dlm_req;
1487 struct ldlm_callback_suite cbs = {
1488 .lcs_completion = completion_callback,
1489 .lcs_blocking = blocking_callback,
1490 .lcs_glimpse = glimpse_callback
1494 dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
1495 if (dlm_req != NULL) {
1496 rc = ldlm_handle_enqueue0(req->rq_export->exp_obd->obd_namespace,
1497 req, dlm_req, &cbs);
1503 EXPORT_SYMBOL(ldlm_handle_enqueue);
1506 * Main LDLM entry point for server code to process lock conversion requests.
1508 int ldlm_handle_convert0(struct ptlrpc_request *req,
1509 const struct ldlm_request *dlm_req)
1511 struct ldlm_reply *dlm_rep;
1512 struct ldlm_lock *lock;
1516 if (req->rq_export && req->rq_export->exp_nid_stats &&
1517 req->rq_export->exp_nid_stats->nid_ldlm_stats)
1518 lprocfs_counter_incr(req->rq_export->exp_nid_stats->nid_ldlm_stats,
1519 LDLM_CONVERT - LDLM_FIRST_OPC);
1521 rc = req_capsule_server_pack(&req->rq_pill);
1525 dlm_rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
1526 dlm_rep->lock_flags = dlm_req->lock_flags;
1528 lock = ldlm_handle2lock(&dlm_req->lock_handle[0]);
1530 req->rq_status = LUSTRE_EINVAL;
1534 LDLM_DEBUG(lock, "server-side convert handler START");
1536 lock->l_last_activity = cfs_time_current_sec();
1537 res = ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode,
1538 &dlm_rep->lock_flags);
1540 if (ldlm_del_waiting_lock(lock))
1541 LDLM_DEBUG(lock, "converted waiting lock");
1544 req->rq_status = LUSTRE_EDEADLK;
1549 if (!req->rq_status)
1550 ldlm_reprocess_all(lock->l_resource);
1551 LDLM_DEBUG(lock, "server-side convert handler END");
1552 LDLM_LOCK_PUT(lock);
1554 LDLM_DEBUG_NOLOCK("server-side convert handler END");
1558 EXPORT_SYMBOL(ldlm_handle_convert0);
1561 * Old-style main LDLM entry point for server code to process lock conversion
1564 int ldlm_handle_convert(struct ptlrpc_request *req)
1567 struct ldlm_request *dlm_req;
1569 dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
1570 if (dlm_req != NULL) {
1571 rc = ldlm_handle_convert0(req, dlm_req);
1573 CERROR ("Can't unpack dlm_req\n");
1578 EXPORT_SYMBOL(ldlm_handle_convert);
1581 * Cancel all the locks whose handles are packed into ldlm_request
1583 * Called by server code expecting such combined cancel activity
1586 int ldlm_request_cancel(struct ptlrpc_request *req,
1587 const struct ldlm_request *dlm_req, int first)
1589 struct ldlm_resource *res, *pres = NULL;
1590 struct ldlm_lock *lock;
1591 int i, count, done = 0;
1594 count = dlm_req->lock_count ? dlm_req->lock_count : 1;
1598 if (count == 1 && dlm_req->lock_handle[0].cookie == 0)
1601 /* There is no lock on the server at the replay time,
1602 * skip lock cancelling to make replay tests to pass. */
1603 if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)
1606 LDLM_DEBUG_NOLOCK("server-side cancel handler START: %d locks, "
1607 "starting at %d", count, first);
1609 for (i = first; i < count; i++) {
1610 lock = ldlm_handle2lock(&dlm_req->lock_handle[i]);
1612 LDLM_DEBUG_NOLOCK("server-side cancel handler stale "
1613 "lock (cookie "LPU64")",
1614 dlm_req->lock_handle[i].cookie);
1618 res = lock->l_resource;
1621 /* This code is an optimization to only attempt lock
1622 * granting on the resource (that could be CPU-expensive)
1623 * after we are done cancelling lock in that resource. */
1626 ldlm_reprocess_all(pres);
1627 LDLM_RESOURCE_DELREF(pres);
1628 ldlm_resource_putref(pres);
1631 ldlm_resource_getref(res);
1632 LDLM_RESOURCE_ADDREF(res);
1633 ldlm_res_lvbo_update(res, NULL, 1);
1637 ldlm_lock_cancel(lock);
1638 LDLM_LOCK_PUT(lock);
1641 ldlm_reprocess_all(pres);
1642 LDLM_RESOURCE_DELREF(pres);
1643 ldlm_resource_putref(pres);
1645 LDLM_DEBUG_NOLOCK("server-side cancel handler END");
1648 EXPORT_SYMBOL(ldlm_request_cancel);
1651 * Main LDLM entry point for server code to cancel locks.
1653 * Typically gets called from service handler on LDLM_CANCEL opc.
1655 int ldlm_handle_cancel(struct ptlrpc_request *req)
1657 struct ldlm_request *dlm_req;
1661 dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
1662 if (dlm_req == NULL) {
1663 CDEBUG(D_INFO, "bad request buffer for cancel\n");
1667 if (req->rq_export && req->rq_export->exp_nid_stats &&
1668 req->rq_export->exp_nid_stats->nid_ldlm_stats)
1669 lprocfs_counter_incr(req->rq_export->exp_nid_stats->nid_ldlm_stats,
1670 LDLM_CANCEL - LDLM_FIRST_OPC);
1672 rc = req_capsule_server_pack(&req->rq_pill);
1676 if (!ldlm_request_cancel(req, dlm_req, 0))
1677 req->rq_status = LUSTRE_ESTALE;
1679 RETURN(ptlrpc_reply(req));
1681 EXPORT_SYMBOL(ldlm_handle_cancel);
1682 #endif /* HAVE_SERVER_SUPPORT */
1685 * Callback handler for receiving incoming blocking ASTs.
1687 * This can only happen on client side.
1689 void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
1690 struct ldlm_lock_desc *ld, struct ldlm_lock *lock)
1695 LDLM_DEBUG(lock, "client blocking AST callback handler");
1697 lock_res_and_lock(lock);
1698 lock->l_flags |= LDLM_FL_CBPENDING;
1700 if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)
1701 lock->l_flags |= LDLM_FL_CANCEL;
1703 do_ast = (!lock->l_readers && !lock->l_writers);
1704 unlock_res_and_lock(lock);
1707 CDEBUG(D_DLMTRACE, "Lock %p already unused, calling callback (%p)\n",
1708 lock, lock->l_blocking_ast);
1709 if (lock->l_blocking_ast != NULL)
1710 lock->l_blocking_ast(lock, ld, lock->l_ast_data,
1713 CDEBUG(D_DLMTRACE, "Lock %p is referenced, will be cancelled later\n",
1717 LDLM_DEBUG(lock, "client blocking callback handler END");
1718 LDLM_LOCK_RELEASE(lock);
1723 * Callback handler for receiving incoming completion ASTs.
1725 * This only can happen on client side.
1727 static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
1728 struct ldlm_namespace *ns,
1729 struct ldlm_request *dlm_req,
1730 struct ldlm_lock *lock)
1733 CFS_LIST_HEAD(ast_list);
1737 LDLM_DEBUG(lock, "client completion callback handler START");
1739 if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) {
1740 int to = cfs_time_seconds(1);
1742 cfs_schedule_timeout_and_set_state(
1743 CFS_TASK_INTERRUPTIBLE, to);
1744 if (lock->l_granted_mode == lock->l_req_mode ||
1745 lock->l_flags & LDLM_FL_DESTROYED)
1750 lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT);
1752 LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", lvb_len);
1753 GOTO(out, rc = lvb_len);
1754 } else if (lvb_len > 0) {
1755 if (lock->l_lvb_len > 0) {
1756 /* for extent lock, lvb contains ost_lvb{}. */
1757 LASSERT(lock->l_lvb_data != NULL);
1759 if (unlikely(lock->l_lvb_len < lvb_len)) {
1760 LDLM_ERROR(lock, "Replied LVB is larger than "
1761 "expectation, expected = %d, "
1763 lock->l_lvb_len, lvb_len);
1764 GOTO(out, rc = -EINVAL);
1766 } else if (ldlm_has_layout(lock)) { /* for layout lock, lvb has
1767 * variable length */
1770 OBD_ALLOC(lvb_data, lvb_len);
1771 if (lvb_data == NULL) {
1772 LDLM_ERROR(lock, "No memory: %d.\n", lvb_len);
1773 GOTO(out, rc = -ENOMEM);
1776 lock_res_and_lock(lock);
1777 LASSERT(lock->l_lvb_data == NULL);
1778 lock->l_lvb_data = lvb_data;
1779 lock->l_lvb_len = lvb_len;
1780 unlock_res_and_lock(lock);
1784 lock_res_and_lock(lock);
1785 if ((lock->l_flags & LDLM_FL_DESTROYED) ||
1786 lock->l_granted_mode == lock->l_req_mode) {
1787 /* bug 11300: the lock has already been granted */
1788 unlock_res_and_lock(lock);
1789 LDLM_DEBUG(lock, "Double grant race happened");
1793 /* If we receive the completion AST before the actual enqueue returned,
1794 * then we might need to switch lock modes, resources, or extents. */
1795 if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) {
1796 lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
1797 LDLM_DEBUG(lock, "completion AST, new lock mode");
1800 if (lock->l_resource->lr_type != LDLM_PLAIN) {
1801 ldlm_convert_policy_to_local(req->rq_export,
1802 dlm_req->lock_desc.l_resource.lr_type,
1803 &dlm_req->lock_desc.l_policy_data,
1804 &lock->l_policy_data);
1805 LDLM_DEBUG(lock, "completion AST, new policy data");
1808 ldlm_resource_unlink_lock(lock);
1809 if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
1810 &lock->l_resource->lr_name,
1811 sizeof(lock->l_resource->lr_name)) != 0) {
1812 unlock_res_and_lock(lock);
1813 rc = ldlm_lock_change_resource(ns, lock,
1814 &dlm_req->lock_desc.l_resource.lr_name);
1816 LDLM_ERROR(lock, "Failed to allocate resource");
1819 LDLM_DEBUG(lock, "completion AST, new resource");
1820 CERROR("change resource!\n");
1821 lock_res_and_lock(lock);
1824 if (dlm_req->lock_flags & LDLM_FL_AST_SENT) {
1825 /* BL_AST locks are not needed in LRU.
1826 * Let ldlm_cancel_lru() be fast. */
1827 ldlm_lock_remove_from_lru(lock);
1828 lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
1829 LDLM_DEBUG(lock, "completion AST includes blocking AST");
1832 if (lock->l_lvb_len > 0) {
1833 rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_CLIENT,
1834 lock->l_lvb_data, lvb_len);
1836 unlock_res_and_lock(lock);
1841 ldlm_grant_lock(lock, &ast_list);
1842 unlock_res_and_lock(lock);
1844 LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work");
1846 /* Let Enqueue to call osc_lock_upcall() and initialize
1848 OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 2);
1850 ldlm_run_ast_work(ns, &ast_list, LDLM_WORK_CP_AST);
1852 LDLM_DEBUG_NOLOCK("client completion callback handler END (lock %p)",
1858 lock_res_and_lock(lock);
1859 lock->l_flags |= LDLM_FL_FAILED;
1860 unlock_res_and_lock(lock);
1861 cfs_waitq_signal(&lock->l_waitq);
1863 LDLM_LOCK_RELEASE(lock);
1867 * Callback handler for receiving incoming glimpse ASTs.
1869 * This only can happen on client side. After handling the glimpse AST
1870 * we also consider dropping the lock here if it is unused locally for a
1873 static void ldlm_handle_gl_callback(struct ptlrpc_request *req,
1874 struct ldlm_namespace *ns,
1875 struct ldlm_request *dlm_req,
1876 struct ldlm_lock *lock)
1881 LDLM_DEBUG(lock, "client glimpse AST callback handler");
1883 if (lock->l_glimpse_ast != NULL)
1884 rc = lock->l_glimpse_ast(lock, req);
1886 if (req->rq_repmsg != NULL) {
1889 req->rq_status = rc;
1893 lock_res_and_lock(lock);
1894 if (lock->l_granted_mode == LCK_PW &&
1895 !lock->l_readers && !lock->l_writers &&
1896 cfs_time_after(cfs_time_current(),
1897 cfs_time_add(lock->l_last_used,
1898 cfs_time_seconds(10)))) {
1899 unlock_res_and_lock(lock);
1900 if (ldlm_bl_to_thread_lock(ns, NULL, lock))
1901 ldlm_handle_bl_callback(ns, NULL, lock);
1906 unlock_res_and_lock(lock);
1907 LDLM_LOCK_RELEASE(lock);
1911 static int ldlm_callback_reply(struct ptlrpc_request *req, int rc)
1913 if (req->rq_no_reply)
1916 req->rq_status = rc;
1917 if (!req->rq_packed_final) {
1918 rc = lustre_pack_reply(req, 1, NULL, NULL);
1922 return ptlrpc_reply(req);
1926 static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi,
1927 ldlm_cancel_flags_t cancel_flags)
1929 struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
1932 spin_lock(&blp->blp_lock);
1933 if (blwi->blwi_lock &&
1934 blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) {
1935 /* add LDLM_FL_DISCARD_DATA requests to the priority list */
1936 cfs_list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list);
1938 /* other blocking callbacks are added to the regular list */
1939 cfs_list_add_tail(&blwi->blwi_entry, &blp->blp_list);
1941 spin_unlock(&blp->blp_lock);
1943 cfs_waitq_signal(&blp->blp_waitq);
1945 /* can not check blwi->blwi_flags as blwi could be already freed in
1947 if (!(cancel_flags & LCF_ASYNC))
1948 wait_for_completion(&blwi->blwi_comp);
1953 static inline void init_blwi(struct ldlm_bl_work_item *blwi,
1954 struct ldlm_namespace *ns,
1955 struct ldlm_lock_desc *ld,
1956 cfs_list_t *cancels, int count,
1957 struct ldlm_lock *lock,
1958 ldlm_cancel_flags_t cancel_flags)
1960 init_completion(&blwi->blwi_comp);
1961 CFS_INIT_LIST_HEAD(&blwi->blwi_head);
1963 if (cfs_memory_pressure_get())
1964 blwi->blwi_mem_pressure = 1;
1967 blwi->blwi_flags = cancel_flags;
1969 blwi->blwi_ld = *ld;
1971 cfs_list_add(&blwi->blwi_head, cancels);
1972 cfs_list_del_init(cancels);
1973 blwi->blwi_count = count;
1975 blwi->blwi_lock = lock;
1980 * Queues a list of locks \a cancels containing \a count locks
1981 * for later processing by a blocking thread. If \a count is zero,
1982 * then the lock referenced as \a lock is queued instead.
1984 * The blocking thread would then call ->l_blocking_ast callback in the lock.
1985 * If list addition fails an error is returned and caller is supposed to
1986 * call ->l_blocking_ast itself.
1988 static int ldlm_bl_to_thread(struct ldlm_namespace *ns,
1989 struct ldlm_lock_desc *ld,
1990 struct ldlm_lock *lock,
1991 cfs_list_t *cancels, int count,
1992 ldlm_cancel_flags_t cancel_flags)
1996 if (cancels && count == 0)
1999 if (cancel_flags & LCF_ASYNC) {
2000 struct ldlm_bl_work_item *blwi;
2002 OBD_ALLOC(blwi, sizeof(*blwi));
2005 init_blwi(blwi, ns, ld, cancels, count, lock, cancel_flags);
2007 RETURN(__ldlm_bl_to_thread(blwi, cancel_flags));
2009 /* if it is synchronous call do minimum mem alloc, as it could
2010 * be triggered from kernel shrinker
2012 struct ldlm_bl_work_item blwi;
2014 memset(&blwi, 0, sizeof(blwi));
2015 init_blwi(&blwi, ns, ld, cancels, count, lock, cancel_flags);
2016 RETURN(__ldlm_bl_to_thread(&blwi, cancel_flags));
2022 int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
2023 struct ldlm_lock *lock)
2026 return ldlm_bl_to_thread(ns, ld, lock, NULL, 0, LCF_ASYNC);
2032 int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
2033 cfs_list_t *cancels, int count,
2034 ldlm_cancel_flags_t cancel_flags)
2037 return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags);
2043 /* Setinfo coming from Server (eg MDT) to Client (eg MDC)! */
2044 static int ldlm_handle_setinfo(struct ptlrpc_request *req)
2046 struct obd_device *obd = req->rq_export->exp_obd;
2053 DEBUG_REQ(D_HSM, req, "%s: handle setinfo\n", obd->obd_name);
2055 req_capsule_set(&req->rq_pill, &RQF_OBD_SET_INFO);
2057 key = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
2059 DEBUG_REQ(D_IOCTL, req, "no set_info key");
2062 keylen = req_capsule_get_size(&req->rq_pill, &RMF_SETINFO_KEY,
2064 val = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL);
2066 DEBUG_REQ(D_IOCTL, req, "no set_info val");
2069 vallen = req_capsule_get_size(&req->rq_pill, &RMF_SETINFO_VAL,
2072 /* We are responsible for swabbing contents of val */
2074 if (KEY_IS(KEY_HSM_COPYTOOL_SEND))
2075 /* Pass it on to mdc (the "export" in this case) */
2076 rc = obd_set_info_async(req->rq_svc_thread->t_env,
2078 sizeof(KEY_HSM_COPYTOOL_SEND),
2079 KEY_HSM_COPYTOOL_SEND,
2082 DEBUG_REQ(D_WARNING, req, "ignoring unknown key %s", key);
2087 static inline void ldlm_callback_errmsg(struct ptlrpc_request *req,
2088 const char *msg, int rc,
2089 struct lustre_handle *handle)
2091 DEBUG_REQ((req->rq_no_reply || rc) ? D_WARNING : D_DLMTRACE, req,
2092 "%s: [nid %s] [rc %d] [lock "LPX64"]",
2093 msg, libcfs_id2str(req->rq_peer), rc,
2094 handle ? handle->cookie : 0);
2095 if (req->rq_no_reply)
2096 CWARN("No reply was sent, maybe cause bug 21636.\n");
2098 CWARN("Send reply failed, maybe cause bug 21636.\n");
2101 static int ldlm_handle_qc_callback(struct ptlrpc_request *req)
2103 struct obd_quotactl *oqctl;
2104 struct client_obd *cli = &req->rq_export->exp_obd->u.cli;
2106 oqctl = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
2107 if (oqctl == NULL) {
2108 CERROR("Can't unpack obd_quotactl\n");
2112 oqctl->qc_stat = ptlrpc_status_ntoh(oqctl->qc_stat);
2114 cli->cl_qchk_stat = oqctl->qc_stat;
2118 /* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */
2119 static int ldlm_callback_handler(struct ptlrpc_request *req)
2121 struct ldlm_namespace *ns;
2122 struct ldlm_request *dlm_req;
2123 struct ldlm_lock *lock;
2127 /* Requests arrive in sender's byte order. The ptlrpc service
2128 * handler has already checked and, if necessary, byte-swapped the
2129 * incoming request message body, but I am responsible for the
2130 * message buffers. */
2132 /* do nothing for sec context finalize */
2133 if (lustre_msg_get_opc(req->rq_reqmsg) == SEC_CTX_FINI)
2136 req_capsule_init(&req->rq_pill, req, RCL_SERVER);
2138 if (req->rq_export == NULL) {
2139 rc = ldlm_callback_reply(req, -ENOTCONN);
2140 ldlm_callback_errmsg(req, "Operate on unconnected server",
2145 LASSERT(req->rq_export != NULL);
2146 LASSERT(req->rq_export->exp_obd != NULL);
2148 switch (lustre_msg_get_opc(req->rq_reqmsg)) {
2149 case LDLM_BL_CALLBACK:
2150 if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
2153 case LDLM_CP_CALLBACK:
2154 if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CP_CALLBACK_NET))
2157 case LDLM_GL_CALLBACK:
2158 if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GL_CALLBACK_NET))
2162 rc = ldlm_handle_setinfo(req);
2163 ldlm_callback_reply(req, rc);
2165 case OBD_LOG_CANCEL: /* remove this eventually - for 1.4.0 compat */
2166 CERROR("shouldn't be handling OBD_LOG_CANCEL on DLM thread\n");
2167 req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL);
2168 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET))
2170 rc = llog_origin_handle_cancel(req);
2171 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP))
2173 ldlm_callback_reply(req, rc);
2175 case LLOG_ORIGIN_HANDLE_CREATE:
2176 req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
2177 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
2179 rc = llog_origin_handle_open(req);
2180 ldlm_callback_reply(req, rc);
2182 case LLOG_ORIGIN_HANDLE_NEXT_BLOCK:
2183 req_capsule_set(&req->rq_pill,
2184 &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
2185 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
2187 rc = llog_origin_handle_next_block(req);
2188 ldlm_callback_reply(req, rc);
2190 case LLOG_ORIGIN_HANDLE_READ_HEADER:
2191 req_capsule_set(&req->rq_pill,
2192 &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER);
2193 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
2195 rc = llog_origin_handle_read_header(req);
2196 ldlm_callback_reply(req, rc);
2198 case LLOG_ORIGIN_HANDLE_CLOSE:
2199 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
2201 rc = llog_origin_handle_close(req);
2202 ldlm_callback_reply(req, rc);
2204 case OBD_QC_CALLBACK:
2205 req_capsule_set(&req->rq_pill, &RQF_QC_CALLBACK);
2206 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_QC_CALLBACK_NET))
2208 rc = ldlm_handle_qc_callback(req);
2209 ldlm_callback_reply(req, rc);
2212 CERROR("unknown opcode %u\n",
2213 lustre_msg_get_opc(req->rq_reqmsg));
2214 ldlm_callback_reply(req, -EPROTO);
2218 ns = req->rq_export->exp_obd->obd_namespace;
2219 LASSERT(ns != NULL);
2221 req_capsule_set(&req->rq_pill, &RQF_LDLM_CALLBACK);
2223 dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
2224 if (dlm_req == NULL) {
2225 rc = ldlm_callback_reply(req, -EPROTO);
2226 ldlm_callback_errmsg(req, "Operate without parameter", rc,
2231 /* Force a known safe race, send a cancel to the server for a lock
2232 * which the server has already started a blocking callback on. */
2233 if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) &&
2234 lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
2235 rc = ldlm_cli_cancel(&dlm_req->lock_handle[0], 0);
2237 CERROR("ldlm_cli_cancel: %d\n", rc);
2240 lock = ldlm_handle2lock_long(&dlm_req->lock_handle[0], 0);
2242 CDEBUG(D_DLMTRACE, "callback on lock "LPX64" - lock "
2243 "disappeared\n", dlm_req->lock_handle[0].cookie);
2244 rc = ldlm_callback_reply(req, -EINVAL);
2245 ldlm_callback_errmsg(req, "Operate with invalid parameter", rc,
2246 &dlm_req->lock_handle[0]);
2250 if ((lock->l_flags & LDLM_FL_FAIL_LOC) &&
2251 lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK)
2252 OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
2254 /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */
2255 lock_res_and_lock(lock);
2256 lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags &
2258 if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
2259 /* If somebody cancels lock and cache is already dropped,
2260 * or lock is failed before cp_ast received on client,
2261 * we can tell the server we have no lock. Otherwise, we
2262 * should send cancel after dropping the cache. */
2263 if (((lock->l_flags & LDLM_FL_CANCELING) &&
2264 (lock->l_flags & LDLM_FL_BL_DONE)) ||
2265 (lock->l_flags & LDLM_FL_FAILED)) {
2266 LDLM_DEBUG(lock, "callback on lock "
2267 LPX64" - lock disappeared\n",
2268 dlm_req->lock_handle[0].cookie);
2269 unlock_res_and_lock(lock);
2270 LDLM_LOCK_RELEASE(lock);
2271 rc = ldlm_callback_reply(req, -EINVAL);
2272 ldlm_callback_errmsg(req, "Operate on stale lock", rc,
2273 &dlm_req->lock_handle[0]);
2276 /* BL_AST locks are not needed in LRU.
2277 * Let ldlm_cancel_lru() be fast. */
2278 ldlm_lock_remove_from_lru(lock);
2279 lock->l_flags |= LDLM_FL_BL_AST;
2281 unlock_res_and_lock(lock);
2283 /* We want the ost thread to get this reply so that it can respond
2284 * to ost requests (write cache writeback) that might be triggered
2287 * But we'd also like to be able to indicate in the reply that we're
2288 * cancelling right now, because it's unused, or have an intent result
2289 * in the reply, so we might have to push the responsibility for sending
2290 * the reply down into the AST handlers, alas. */
2292 switch (lustre_msg_get_opc(req->rq_reqmsg)) {
2293 case LDLM_BL_CALLBACK:
2294 CDEBUG(D_INODE, "blocking ast\n");
2295 req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK);
2296 if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) {
2297 rc = ldlm_callback_reply(req, 0);
2298 if (req->rq_no_reply || rc)
2299 ldlm_callback_errmsg(req, "Normal process", rc,
2300 &dlm_req->lock_handle[0]);
2302 if (ldlm_bl_to_thread_lock(ns, &dlm_req->lock_desc, lock))
2303 ldlm_handle_bl_callback(ns, &dlm_req->lock_desc, lock);
2305 case LDLM_CP_CALLBACK:
2306 CDEBUG(D_INODE, "completion ast\n");
2307 req_capsule_extend(&req->rq_pill, &RQF_LDLM_CP_CALLBACK);
2308 ldlm_callback_reply(req, 0);
2309 ldlm_handle_cp_callback(req, ns, dlm_req, lock);
2311 case LDLM_GL_CALLBACK:
2312 CDEBUG(D_INODE, "glimpse ast\n");
2313 req_capsule_extend(&req->rq_pill, &RQF_LDLM_GL_CALLBACK);
2314 ldlm_handle_gl_callback(req, ns, dlm_req, lock);
2317 LBUG(); /* checked above */
2323 #ifdef HAVE_SERVER_SUPPORT
2325 * Main handler for canceld thread.
2327 * Separated into its own thread to avoid deadlocks.
2329 static int ldlm_cancel_handler(struct ptlrpc_request *req)
2334 /* Requests arrive in sender's byte order. The ptlrpc service
2335 * handler has already checked and, if necessary, byte-swapped the
2336 * incoming request message body, but I am responsible for the
2337 * message buffers. */
2339 req_capsule_init(&req->rq_pill, req, RCL_SERVER);
2341 if (req->rq_export == NULL) {
2342 struct ldlm_request *dlm_req;
2344 CERROR("%s from %s arrived at %lu with bad export cookie "
2346 ll_opcode2str(lustre_msg_get_opc(req->rq_reqmsg)),
2347 libcfs_nid2str(req->rq_peer.nid),
2348 req->rq_arrival_time.tv_sec,
2349 lustre_msg_get_handle(req->rq_reqmsg)->cookie);
2351 if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_CANCEL) {
2352 req_capsule_set(&req->rq_pill, &RQF_LDLM_CALLBACK);
2353 dlm_req = req_capsule_client_get(&req->rq_pill,
2355 if (dlm_req != NULL)
2356 ldlm_lock_dump_handle(D_ERROR,
2357 &dlm_req->lock_handle[0]);
2359 ldlm_callback_reply(req, -ENOTCONN);
2363 switch (lustre_msg_get_opc(req->rq_reqmsg)) {
2365 /* XXX FIXME move this back to mds/handler.c, bug 249 */
2367 req_capsule_set(&req->rq_pill, &RQF_LDLM_CANCEL);
2368 CDEBUG(D_INODE, "cancel\n");
2369 if (CFS_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_NET) ||
2370 CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))
2372 rc = ldlm_handle_cancel(req);
2376 case OBD_LOG_CANCEL:
2377 req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL);
2378 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET))
2380 rc = llog_origin_handle_cancel(req);
2381 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP))
2383 ldlm_callback_reply(req, rc);
2386 CERROR("invalid opcode %d\n",
2387 lustre_msg_get_opc(req->rq_reqmsg));
2388 req_capsule_set(&req->rq_pill, &RQF_LDLM_CALLBACK);
2389 ldlm_callback_reply(req, -EINVAL);
2395 static int ldlm_cancel_hpreq_lock_match(struct ptlrpc_request *req,
2396 struct ldlm_lock *lock)
2398 struct ldlm_request *dlm_req;
2399 struct lustre_handle lockh;
2404 dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
2405 if (dlm_req == NULL)
2408 ldlm_lock2handle(lock, &lockh);
2409 for (i = 0; i < dlm_req->lock_count; i++) {
2410 if (lustre_handle_equal(&dlm_req->lock_handle[i],
2412 DEBUG_REQ(D_RPCTRACE, req,
2413 "Prio raised by lock "LPX64".", lockh.cookie);
2424 static int ldlm_cancel_hpreq_check(struct ptlrpc_request *req)
2426 struct ldlm_request *dlm_req;
2431 /* no prolong in recovery */
2432 if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)
2435 dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
2436 if (dlm_req == NULL)
2439 for (i = 0; i < dlm_req->lock_count; i++) {
2440 struct ldlm_lock *lock;
2442 lock = ldlm_handle2lock(&dlm_req->lock_handle[i]);
2446 rc = !!(lock->l_flags & LDLM_FL_AST_SENT);
2448 LDLM_DEBUG(lock, "hpreq cancel lock");
2449 LDLM_LOCK_PUT(lock);
2458 static struct ptlrpc_hpreq_ops ldlm_cancel_hpreq_ops = {
2459 .hpreq_lock_match = ldlm_cancel_hpreq_lock_match,
2460 .hpreq_check = ldlm_cancel_hpreq_check,
2464 static int ldlm_hpreq_handler(struct ptlrpc_request *req)
2468 req_capsule_init(&req->rq_pill, req, RCL_SERVER);
2470 if (req->rq_export == NULL)
2473 if (LDLM_CANCEL == lustre_msg_get_opc(req->rq_reqmsg)) {
2474 req_capsule_set(&req->rq_pill, &RQF_LDLM_CANCEL);
2475 req->rq_ops = &ldlm_cancel_hpreq_ops;
2480 int ldlm_revoke_lock_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
2481 cfs_hlist_node_t *hnode, void *data)
2484 cfs_list_t *rpc_list = data;
2485 struct ldlm_lock *lock = cfs_hash_object(hs, hnode);
2487 lock_res_and_lock(lock);
2489 if (lock->l_req_mode != lock->l_granted_mode) {
2490 unlock_res_and_lock(lock);
2494 LASSERT(lock->l_resource);
2495 if (lock->l_resource->lr_type != LDLM_IBITS &&
2496 lock->l_resource->lr_type != LDLM_PLAIN) {
2497 unlock_res_and_lock(lock);
2501 if (lock->l_flags & LDLM_FL_AST_SENT) {
2502 unlock_res_and_lock(lock);
2506 LASSERT(lock->l_blocking_ast);
2507 LASSERT(!lock->l_blocking_lock);
2509 lock->l_flags |= LDLM_FL_AST_SENT;
2510 if (lock->l_export && lock->l_export->exp_lock_hash) {
2511 /* NB: it's safe to call cfs_hash_del() even lock isn't
2512 * in exp_lock_hash. */
2513 /* In the function below, .hs_keycmp resolves to
2514 * ldlm_export_lock_keycmp() */
2515 /* coverity[overrun-buffer-val] */
2516 cfs_hash_del(lock->l_export->exp_lock_hash,
2517 &lock->l_remote_handle, &lock->l_exp_hash);
2520 cfs_list_add_tail(&lock->l_rk_ast, rpc_list);
2521 LDLM_LOCK_GET(lock);
2523 unlock_res_and_lock(lock);
2527 void ldlm_revoke_export_locks(struct obd_export *exp)
2529 cfs_list_t rpc_list;
2532 CFS_INIT_LIST_HEAD(&rpc_list);
2533 cfs_hash_for_each_empty(exp->exp_lock_hash,
2534 ldlm_revoke_lock_cb, &rpc_list);
2535 ldlm_run_ast_work(exp->exp_obd->obd_namespace, &rpc_list,
2536 LDLM_WORK_REVOKE_AST);
2540 EXPORT_SYMBOL(ldlm_revoke_export_locks);
2541 #endif /* HAVE_SERVER_SUPPORT */
2544 static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp)
2546 struct ldlm_bl_work_item *blwi = NULL;
2547 static unsigned int num_bl = 0;
2549 spin_lock(&blp->blp_lock);
2550 /* process a request from the blp_list at least every blp_num_threads */
2551 if (!cfs_list_empty(&blp->blp_list) &&
2552 (cfs_list_empty(&blp->blp_prio_list) || num_bl == 0))
2553 blwi = cfs_list_entry(blp->blp_list.next,
2554 struct ldlm_bl_work_item, blwi_entry);
2556 if (!cfs_list_empty(&blp->blp_prio_list))
2557 blwi = cfs_list_entry(blp->blp_prio_list.next,
2558 struct ldlm_bl_work_item,
2562 if (++num_bl >= cfs_atomic_read(&blp->blp_num_threads))
2564 cfs_list_del(&blwi->blwi_entry);
2566 spin_unlock(&blp->blp_lock);
2571 /* This only contains temporary data until the thread starts */
2572 struct ldlm_bl_thread_data {
2573 char bltd_name[CFS_CURPROC_COMM_MAX];
2574 struct ldlm_bl_pool *bltd_blp;
2575 struct completion bltd_comp;
2579 static int ldlm_bl_thread_main(void *arg);
2581 static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
2583 struct ldlm_bl_thread_data bltd = { .bltd_blp = blp };
2586 init_completion(&bltd.bltd_comp);
2587 bltd.bltd_num = cfs_atomic_read(&blp->blp_num_threads);
2588 snprintf(bltd.bltd_name, sizeof(bltd.bltd_name) - 1,
2589 "ldlm_bl_%02d", bltd.bltd_num);
2590 task = kthread_run(ldlm_bl_thread_main, &bltd, bltd.bltd_name);
2592 CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n",
2593 cfs_atomic_read(&blp->blp_num_threads), PTR_ERR(task));
2594 return PTR_ERR(task);
2596 wait_for_completion(&bltd.bltd_comp);
2602 * Main blocking requests processing thread.
2604 * Callers put locks into its queue by calling ldlm_bl_to_thread.
2605 * This thread in the end ends up doing actual call to ->l_blocking_ast
2608 static int ldlm_bl_thread_main(void *arg)
2610 struct ldlm_bl_pool *blp;
2614 struct ldlm_bl_thread_data *bltd = arg;
2616 blp = bltd->bltd_blp;
2618 cfs_atomic_inc(&blp->blp_num_threads);
2619 cfs_atomic_inc(&blp->blp_busy_threads);
2621 complete(&bltd->bltd_comp);
2622 /* cannot use bltd after this, it is only on caller's stack */
2626 struct l_wait_info lwi = { 0 };
2627 struct ldlm_bl_work_item *blwi = NULL;
2630 blwi = ldlm_bl_get_work(blp);
2633 cfs_atomic_dec(&blp->blp_busy_threads);
2634 l_wait_event_exclusive(blp->blp_waitq,
2635 (blwi = ldlm_bl_get_work(blp)) != NULL,
2637 busy = cfs_atomic_inc_return(&blp->blp_busy_threads);
2639 busy = cfs_atomic_read(&blp->blp_busy_threads);
2642 if (blwi->blwi_ns == NULL)
2643 /* added by ldlm_cleanup() */
2646 /* Not fatal if racy and have a few too many threads */
2647 if (unlikely(busy < blp->blp_max_threads &&
2648 busy >= cfs_atomic_read(&blp->blp_num_threads) &&
2649 !blwi->blwi_mem_pressure))
2650 /* discard the return value, we tried */
2651 ldlm_bl_thread_start(blp);
2653 if (blwi->blwi_mem_pressure)
2654 cfs_memory_pressure_set();
2656 if (blwi->blwi_count) {
2658 /* The special case when we cancel locks in LRU
2659 * asynchronously, we pass the list of locks here.
2660 * Thus locks are marked LDLM_FL_CANCELING, but NOT
2661 * canceled locally yet. */
2662 count = ldlm_cli_cancel_list_local(&blwi->blwi_head,
2665 ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL,
2668 ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld,
2671 if (blwi->blwi_mem_pressure)
2672 cfs_memory_pressure_clr();
2674 if (blwi->blwi_flags & LCF_ASYNC)
2675 OBD_FREE(blwi, sizeof(*blwi));
2677 complete(&blwi->blwi_comp);
2680 cfs_atomic_dec(&blp->blp_busy_threads);
2681 cfs_atomic_dec(&blp->blp_num_threads);
2682 complete(&blp->blp_comp);
2688 static int ldlm_setup(void);
2689 static int ldlm_cleanup(void);
2691 int ldlm_get_ref(void)
2695 mutex_lock(&ldlm_ref_mutex);
2696 if (++ldlm_refcount == 1) {
2701 mutex_unlock(&ldlm_ref_mutex);
2705 EXPORT_SYMBOL(ldlm_get_ref);
2707 void ldlm_put_ref(void)
2710 mutex_lock(&ldlm_ref_mutex);
2711 if (ldlm_refcount == 1) {
2712 int rc = ldlm_cleanup();
2714 CERROR("ldlm_cleanup failed: %d\n", rc);
2720 mutex_unlock(&ldlm_ref_mutex);
2724 EXPORT_SYMBOL(ldlm_put_ref);
2727 * Export handle<->lock hash operations.
2730 ldlm_export_lock_hash(cfs_hash_t *hs, const void *key, unsigned mask)
2732 return cfs_hash_u64_hash(((struct lustre_handle *)key)->cookie, mask);
2736 ldlm_export_lock_key(cfs_hlist_node_t *hnode)
2738 struct ldlm_lock *lock;
2740 lock = cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_hash);
2741 return &lock->l_remote_handle;
2745 ldlm_export_lock_keycpy(cfs_hlist_node_t *hnode, void *key)
2747 struct ldlm_lock *lock;
2749 lock = cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_hash);
2750 lock->l_remote_handle = *(struct lustre_handle *)key;
2754 ldlm_export_lock_keycmp(const void *key, cfs_hlist_node_t *hnode)
2756 return lustre_handle_equal(ldlm_export_lock_key(hnode), key);
2760 ldlm_export_lock_object(cfs_hlist_node_t *hnode)
2762 return cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_hash);
2766 ldlm_export_lock_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
2768 struct ldlm_lock *lock;
2770 lock = cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_hash);
2771 LDLM_LOCK_GET(lock);
2775 ldlm_export_lock_put(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
2777 struct ldlm_lock *lock;
2779 lock = cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_hash);
2780 LDLM_LOCK_RELEASE(lock);
2783 static cfs_hash_ops_t ldlm_export_lock_ops = {
2784 .hs_hash = ldlm_export_lock_hash,
2785 .hs_key = ldlm_export_lock_key,
2786 .hs_keycmp = ldlm_export_lock_keycmp,
2787 .hs_keycpy = ldlm_export_lock_keycpy,
2788 .hs_object = ldlm_export_lock_object,
2789 .hs_get = ldlm_export_lock_get,
2790 .hs_put = ldlm_export_lock_put,
2791 .hs_put_locked = ldlm_export_lock_put,
2794 int ldlm_init_export(struct obd_export *exp)
2798 exp->exp_lock_hash =
2799 cfs_hash_create(obd_uuid2str(&exp->exp_client_uuid),
2800 HASH_EXP_LOCK_CUR_BITS,
2801 HASH_EXP_LOCK_MAX_BITS,
2802 HASH_EXP_LOCK_BKT_BITS, 0,
2803 CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA,
2804 &ldlm_export_lock_ops,
2805 CFS_HASH_DEFAULT | CFS_HASH_REHASH_KEY |
2806 CFS_HASH_NBLK_CHANGE);
2808 if (!exp->exp_lock_hash)
2813 EXPORT_SYMBOL(ldlm_init_export);
2815 void ldlm_destroy_export(struct obd_export *exp)
2818 cfs_hash_putref(exp->exp_lock_hash);
2819 exp->exp_lock_hash = NULL;
2821 ldlm_destroy_flock_export(exp);
2824 EXPORT_SYMBOL(ldlm_destroy_export);
2826 static int ldlm_setup(void)
2828 static struct ptlrpc_service_conf conf;
2829 struct ldlm_bl_pool *blp = NULL;
2836 if (ldlm_state != NULL)
2839 OBD_ALLOC(ldlm_state, sizeof(*ldlm_state));
2840 if (ldlm_state == NULL)
2844 rc = ldlm_proc_setup();
2849 memset(&conf, 0, sizeof(conf));
2850 conf = (typeof(conf)) {
2851 .psc_name = "ldlm_cbd",
2852 .psc_watchdog_factor = 2,
2854 .bc_nbufs = LDLM_CLIENT_NBUFS,
2855 .bc_buf_size = LDLM_BUFSIZE,
2856 .bc_req_max_size = LDLM_MAXREQSIZE,
2857 .bc_rep_max_size = LDLM_MAXREPSIZE,
2858 .bc_req_portal = LDLM_CB_REQUEST_PORTAL,
2859 .bc_rep_portal = LDLM_CB_REPLY_PORTAL,
2862 .tc_thr_name = "ldlm_cb",
2863 .tc_thr_factor = LDLM_THR_FACTOR,
2864 .tc_nthrs_init = LDLM_NTHRS_INIT,
2865 .tc_nthrs_base = LDLM_NTHRS_BASE,
2866 .tc_nthrs_max = LDLM_NTHRS_MAX,
2867 .tc_nthrs_user = ldlm_num_threads,
2868 .tc_cpu_affinity = 1,
2869 .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD,
2872 .cc_pattern = ldlm_cpts,
2875 .so_req_handler = ldlm_callback_handler,
2878 ldlm_state->ldlm_cb_service = \
2879 ptlrpc_register_service(&conf, ldlm_svc_proc_dir);
2880 if (IS_ERR(ldlm_state->ldlm_cb_service)) {
2881 CERROR("failed to start service\n");
2882 rc = PTR_ERR(ldlm_state->ldlm_cb_service);
2883 ldlm_state->ldlm_cb_service = NULL;
2887 #ifdef HAVE_SERVER_SUPPORT
2888 memset(&conf, 0, sizeof(conf));
2889 conf = (typeof(conf)) {
2890 .psc_name = "ldlm_canceld",
2891 .psc_watchdog_factor = 6,
2893 .bc_nbufs = LDLM_SERVER_NBUFS,
2894 .bc_buf_size = LDLM_BUFSIZE,
2895 .bc_req_max_size = LDLM_MAXREQSIZE,
2896 .bc_rep_max_size = LDLM_MAXREPSIZE,
2897 .bc_req_portal = LDLM_CANCEL_REQUEST_PORTAL,
2898 .bc_rep_portal = LDLM_CANCEL_REPLY_PORTAL,
2902 .tc_thr_name = "ldlm_cn",
2903 .tc_thr_factor = LDLM_THR_FACTOR,
2904 .tc_nthrs_init = LDLM_NTHRS_INIT,
2905 .tc_nthrs_base = LDLM_NTHRS_BASE,
2906 .tc_nthrs_max = LDLM_NTHRS_MAX,
2907 .tc_nthrs_user = ldlm_num_threads,
2908 .tc_cpu_affinity = 1,
2909 .tc_ctx_tags = LCT_MD_THREAD | \
2914 .cc_pattern = ldlm_cpts,
2917 .so_req_handler = ldlm_cancel_handler,
2918 .so_hpreq_handler = ldlm_hpreq_handler,
2921 ldlm_state->ldlm_cancel_service = \
2922 ptlrpc_register_service(&conf, ldlm_svc_proc_dir);
2923 if (IS_ERR(ldlm_state->ldlm_cancel_service)) {
2924 CERROR("failed to start service\n");
2925 rc = PTR_ERR(ldlm_state->ldlm_cancel_service);
2926 ldlm_state->ldlm_cancel_service = NULL;
2931 OBD_ALLOC(blp, sizeof(*blp));
2933 GOTO(out, rc = -ENOMEM);
2934 ldlm_state->ldlm_bl_pool = blp;
2936 spin_lock_init(&blp->blp_lock);
2937 CFS_INIT_LIST_HEAD(&blp->blp_list);
2938 CFS_INIT_LIST_HEAD(&blp->blp_prio_list);
2939 cfs_waitq_init(&blp->blp_waitq);
2940 cfs_atomic_set(&blp->blp_num_threads, 0);
2941 cfs_atomic_set(&blp->blp_busy_threads, 0);
2944 if (ldlm_num_threads == 0) {
2945 blp->blp_min_threads = LDLM_NTHRS_INIT;
2946 blp->blp_max_threads = LDLM_NTHRS_MAX;
2948 blp->blp_min_threads = blp->blp_max_threads = \
2949 min_t(int, LDLM_NTHRS_MAX, max_t(int, LDLM_NTHRS_INIT,
2953 for (i = 0; i < blp->blp_min_threads; i++) {
2954 rc = ldlm_bl_thread_start(blp);
2959 # ifdef HAVE_SERVER_SUPPORT
2960 CFS_INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks);
2961 expired_lock_thread.elt_state = ELT_STOPPED;
2962 cfs_waitq_init(&expired_lock_thread.elt_waitq);
2964 CFS_INIT_LIST_HEAD(&waiting_locks_list);
2965 spin_lock_init(&waiting_locks_spinlock);
2966 cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0);
2968 rc = PTR_ERR(kthread_run(expired_lock_main, NULL, "ldlm_elt"));
2969 if (IS_ERR_VALUE(rc)) {
2970 CERROR("Cannot start ldlm expired-lock thread: %d\n", rc);
2974 cfs_wait_event(expired_lock_thread.elt_waitq,
2975 expired_lock_thread.elt_state == ELT_READY);
2976 # endif /* HAVE_SERVER_SUPPORT */
2978 rc = ldlm_pools_init();
2980 CERROR("Failed to initialize LDLM pools: %d\n", rc);
2991 static int ldlm_cleanup(void)
2995 if (!cfs_list_empty(ldlm_namespace_list(LDLM_NAMESPACE_SERVER)) ||
2996 !cfs_list_empty(ldlm_namespace_list(LDLM_NAMESPACE_CLIENT))) {
2997 CERROR("ldlm still has namespaces; clean these up first.\n");
2998 ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE);
2999 ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE);
3006 if (ldlm_state->ldlm_bl_pool != NULL) {
3007 struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
3009 while (cfs_atomic_read(&blp->blp_num_threads) > 0) {
3010 struct ldlm_bl_work_item blwi = { .blwi_ns = NULL };
3012 init_completion(&blp->blp_comp);
3014 spin_lock(&blp->blp_lock);
3015 cfs_list_add_tail(&blwi.blwi_entry, &blp->blp_list);
3016 cfs_waitq_signal(&blp->blp_waitq);
3017 spin_unlock(&blp->blp_lock);
3019 wait_for_completion(&blp->blp_comp);
3022 OBD_FREE(blp, sizeof(*blp));
3024 #endif /* __KERNEL__ */
3026 if (ldlm_state->ldlm_cb_service != NULL)
3027 ptlrpc_unregister_service(ldlm_state->ldlm_cb_service);
3028 # ifdef HAVE_SERVER_SUPPORT
3029 if (ldlm_state->ldlm_cancel_service != NULL)
3030 ptlrpc_unregister_service(ldlm_state->ldlm_cancel_service);
3034 ldlm_proc_cleanup();
3036 # ifdef HAVE_SERVER_SUPPORT
3037 if (expired_lock_thread.elt_state != ELT_STOPPED) {
3038 expired_lock_thread.elt_state = ELT_TERMINATE;
3039 cfs_waitq_signal(&expired_lock_thread.elt_waitq);
3040 cfs_wait_event(expired_lock_thread.elt_waitq,
3041 expired_lock_thread.elt_state == ELT_STOPPED);
3044 #endif /* __KERNEL__ */
3046 OBD_FREE(ldlm_state, sizeof(*ldlm_state));
3054 mutex_init(&ldlm_ref_mutex);
3055 mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER));
3056 mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
3057 ldlm_resource_slab = cfs_mem_cache_create("ldlm_resources",
3058 sizeof(struct ldlm_resource), 0,
3059 CFS_SLAB_HWCACHE_ALIGN);
3060 if (ldlm_resource_slab == NULL)
3063 ldlm_lock_slab = cfs_mem_cache_create("ldlm_locks",
3064 sizeof(struct ldlm_lock), 0,
3065 CFS_SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU);
3066 if (ldlm_lock_slab == NULL) {
3067 cfs_mem_cache_destroy(ldlm_resource_slab);
3071 ldlm_interval_slab = cfs_mem_cache_create("interval_node",
3072 sizeof(struct ldlm_interval),
3073 0, CFS_SLAB_HWCACHE_ALIGN);
3074 if (ldlm_interval_slab == NULL) {
3075 cfs_mem_cache_destroy(ldlm_resource_slab);
3076 cfs_mem_cache_destroy(ldlm_lock_slab);
3079 #if LUSTRE_TRACKS_LOCK_EXP_REFS
3080 class_export_dump_hook = ldlm_dump_export_locks;
3085 void ldlm_exit(void)
3089 CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount);
3090 rc = cfs_mem_cache_destroy(ldlm_resource_slab);
3091 LASSERTF(rc == 0, "couldn't free ldlm resource slab\n");
3093 /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call
3094 * synchronize_rcu() to wait a grace period elapsed, so that
3095 * ldlm_lock_free() get a chance to be called. */
3098 rc = cfs_mem_cache_destroy(ldlm_lock_slab);
3099 LASSERTF(rc == 0, "couldn't free ldlm lock slab\n");
3100 rc = cfs_mem_cache_destroy(ldlm_interval_slab);
3101 LASSERTF(rc == 0, "couldn't free interval node slab\n");