X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Fldlm%2Fldlm_lockd.c;h=4d9e72f8b9510cbab4bc664033473594b8fa1674;hb=63851b5816bb30687fbf3750380d6b448e9400f1;hp=46949f4c76c50e4015037171faadc7e614ccffb6;hpb=24e202a76d477c39b7e714f043f5c1f53457e400;p=fs%2Flustre-release.git diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 46949f4..4d9e72f 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -27,7 +27,7 @@ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Copyright (c) 2010, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -60,8 +60,8 @@ static char *ldlm_cpts; CFS_MODULE_PARM(ldlm_cpts, "s", charp, 0444, "CPU partitions ldlm threads should run on"); -extern cfs_mem_cache_t *ldlm_resource_slab; -extern cfs_mem_cache_t *ldlm_lock_slab; +extern struct kmem_cache *ldlm_resource_slab; +extern struct kmem_cache *ldlm_lock_slab; static struct mutex ldlm_ref_mutex; static int ldlm_refcount; @@ -95,25 +95,25 @@ static inline unsigned int ldlm_get_rq_timeout(void) struct ldlm_bl_pool { spinlock_t blp_lock; - /* - * blp_prio_list is used for callbacks that should be handled - * as a priority. It is used for LDLM_FL_DISCARD_DATA requests. - * see bug 13843 - */ - cfs_list_t blp_prio_list; - - /* - * blp_list is used for all other callbacks which are likely - * to take longer to process. - */ - cfs_list_t blp_list; - - cfs_waitq_t blp_waitq; - struct completion blp_comp; - cfs_atomic_t blp_num_threads; - cfs_atomic_t blp_busy_threads; - int blp_min_threads; - int blp_max_threads; + /* + * blp_prio_list is used for callbacks that should be handled + * as a priority. It is used for LDLM_FL_DISCARD_DATA requests. + * see bug 13843 + */ + cfs_list_t blp_prio_list; + + /* + * blp_list is used for all other callbacks which are likely + * to take longer to process. + */ + cfs_list_t blp_list; + + wait_queue_head_t blp_waitq; + struct completion blp_comp; + atomic_t blp_num_threads; + atomic_t blp_busy_threads; + int blp_min_threads; + int blp_max_threads; }; struct ldlm_bl_work_item { @@ -124,7 +124,7 @@ struct ldlm_bl_work_item { cfs_list_t blwi_head; int blwi_count; struct completion blwi_comp; - int blwi_mode; + ldlm_cancel_flags_t blwi_flags; int blwi_mem_pressure; }; @@ -147,10 +147,10 @@ static spinlock_t waiting_locks_spinlock; /* BH lock (timer) */ * All access to it should be under waiting_locks_spinlock. */ static cfs_list_t waiting_locks_list; -static cfs_timer_t waiting_locks_timer; +static struct timer_list waiting_locks_timer; static struct expired_lock_thread { - cfs_waitq_t elt_waitq; + wait_queue_head_t elt_waitq; int elt_state; int elt_dump; cfs_list_t elt_expired_locks; @@ -173,21 +173,20 @@ static inline int have_expired_locks(void) */ static int expired_lock_main(void *arg) { - cfs_list_t *expired = &expired_lock_thread.elt_expired_locks; - struct l_wait_info lwi = { 0 }; - int do_dump; + cfs_list_t *expired = &expired_lock_thread.elt_expired_locks; + struct l_wait_info lwi = { 0 }; + int do_dump; - ENTRY; - cfs_daemonize("ldlm_elt"); + ENTRY; - expired_lock_thread.elt_state = ELT_READY; - cfs_waitq_signal(&expired_lock_thread.elt_waitq); + expired_lock_thread.elt_state = ELT_READY; + wake_up(&expired_lock_thread.elt_waitq); - while (1) { - l_wait_event(expired_lock_thread.elt_waitq, - have_expired_locks() || - expired_lock_thread.elt_state == ELT_TERMINATE, - &lwi); + while (1) { + l_wait_event(expired_lock_thread.elt_waitq, + have_expired_locks() || + expired_lock_thread.elt_state == ELT_TERMINATE, + &lwi); spin_lock_bh(&waiting_locks_spinlock); if (expired_lock_thread.elt_dump) { @@ -202,38 +201,39 @@ static int expired_lock_main(void *arg) libcfs_run_lbug_upcall(&msgdata); spin_lock_bh(&waiting_locks_spinlock); - expired_lock_thread.elt_dump = 0; - } + expired_lock_thread.elt_dump = 0; + } - do_dump = 0; + do_dump = 0; - while (!cfs_list_empty(expired)) { - struct obd_export *export; - struct ldlm_lock *lock; + while (!cfs_list_empty(expired)) { + struct obd_export *export; + struct ldlm_lock *lock; - lock = cfs_list_entry(expired->next, struct ldlm_lock, - l_pending_chain); - if ((void *)lock < LP_POISON + CFS_PAGE_SIZE && - (void *)lock >= LP_POISON) { + lock = cfs_list_entry(expired->next, struct ldlm_lock, + l_pending_chain); + if ((void *)lock < LP_POISON + PAGE_CACHE_SIZE && + (void *)lock >= LP_POISON) { spin_unlock_bh(&waiting_locks_spinlock); - CERROR("free lock on elt list %p\n", lock); - LBUG(); - } - cfs_list_del_init(&lock->l_pending_chain); - if ((void *)lock->l_export < LP_POISON + CFS_PAGE_SIZE && - (void *)lock->l_export >= LP_POISON) { - CERROR("lock with free export on elt list %p\n", - lock->l_export); - lock->l_export = NULL; - LDLM_ERROR(lock, "free export"); - /* release extra ref grabbed by - * ldlm_add_waiting_lock() or - * ldlm_failed_ast() */ - LDLM_LOCK_RELEASE(lock); - continue; - } + CERROR("free lock on elt list %p\n", lock); + LBUG(); + } + cfs_list_del_init(&lock->l_pending_chain); + if ((void *)lock->l_export < + LP_POISON + PAGE_CACHE_SIZE && + (void *)lock->l_export >= LP_POISON) { + CERROR("lock with free export on elt list %p\n", + lock->l_export); + lock->l_export = NULL; + LDLM_ERROR(lock, "free export"); + /* release extra ref grabbed by + * ldlm_add_waiting_lock() or + * ldlm_failed_ast() */ + LDLM_LOCK_RELEASE(lock); + continue; + } - if (lock->l_destroyed) { + if (ldlm_is_destroyed(lock)) { /* release the lock refcount where * waiting_locks_callback() founds */ LDLM_LOCK_RELEASE(lock); @@ -254,18 +254,18 @@ static int expired_lock_main(void *arg) } spin_unlock_bh(&waiting_locks_spinlock); - if (do_dump && obd_dump_on_eviction) { - CERROR("dump the log upon eviction\n"); - libcfs_debug_dumplog(); - } + if (do_dump && obd_dump_on_eviction) { + CERROR("dump the log upon eviction\n"); + libcfs_debug_dumplog(); + } - if (expired_lock_thread.elt_state == ELT_TERMINATE) - break; - } + if (expired_lock_thread.elt_state == ELT_TERMINATE) + break; + } - expired_lock_thread.elt_state = ELT_STOPPED; - cfs_waitq_signal(&expired_lock_thread.elt_waitq); - RETURN(0); + expired_lock_thread.elt_state = ELT_STOPPED; + wake_up(&expired_lock_thread.elt_waitq); + RETURN(0); } static int ldlm_add_waiting_lock(struct ldlm_lock *lock); @@ -312,53 +312,6 @@ static void waiting_locks_callback(unsigned long unused) (lock->l_req_mode == LCK_GROUP)) break; - if (ptlrpc_check_suspend()) { - /* there is a case when we talk to one mds, holding - * lock from another mds. this way we easily can get - * here, if second mds is being recovered. so, we - * suspend timeouts. bug 6019 */ - - LDLM_ERROR(lock, "recharge timeout: %s@%s nid %s ", - lock->l_export->exp_client_uuid.uuid, - lock->l_export->exp_connection->c_remote_uuid.uuid, - libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid)); - - cfs_list_del_init(&lock->l_pending_chain); - if (lock->l_destroyed) { - /* relay the lock refcount decrease to - * expired lock thread */ - cfs_list_add(&lock->l_pending_chain, - &expired_lock_thread.elt_expired_locks); - } else { - __ldlm_add_waiting_lock(lock, - ldlm_get_enq_timeout(lock)); - } - continue; - } - - /* if timeout overlaps the activation time of suspended timeouts - * then extend it to give a chance for client to reconnect */ - if (cfs_time_before(cfs_time_sub(lock->l_callback_timeout, - cfs_time_seconds(obd_timeout)/2), - ptlrpc_suspend_wakeup_time())) { - LDLM_ERROR(lock, "extend timeout due to recovery: %s@%s nid %s ", - lock->l_export->exp_client_uuid.uuid, - lock->l_export->exp_connection->c_remote_uuid.uuid, - libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid)); - - cfs_list_del_init(&lock->l_pending_chain); - if (lock->l_destroyed) { - /* relay the lock refcount decrease to - * expired lock thread */ - cfs_list_add(&lock->l_pending_chain, - &expired_lock_thread.elt_expired_locks); - } else { - __ldlm_add_waiting_lock(lock, - ldlm_get_enq_timeout(lock)); - } - continue; - } - /* Check if we need to prolong timeout */ if (!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT) && ldlm_lock_busy(lock)) { @@ -386,7 +339,7 @@ static void waiting_locks_callback(unsigned long unused) ldlm_lock_to_ns(lock)->ns_timeouts++; LDLM_ERROR(lock, "lock callback timer expired after %lds: " "evicting client at %s ", - cfs_time_current_sec()- lock->l_last_activity, + cfs_time_current_sec() - lock->l_last_activity, libcfs_nid2str( lock->l_export->exp_connection->c_peer.nid)); @@ -403,7 +356,7 @@ static void waiting_locks_callback(unsigned long unused) if (obd_dump_on_timeout && need_dump) expired_lock_thread.elt_dump = __LINE__; - cfs_waitq_signal(&expired_lock_thread.elt_waitq); + wake_up(&expired_lock_thread.elt_waitq); } /* @@ -468,13 +421,13 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) int timeout = ldlm_get_enq_timeout(lock); /* NB: must be called with hold of lock_res_and_lock() */ - LASSERT(lock->l_res_locked); - lock->l_waited = 1; + LASSERT(ldlm_is_res_locked(lock)); + ldlm_set_waited(lock); - LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)); + LASSERT(!ldlm_is_cancel_on_block(lock)); spin_lock_bh(&waiting_locks_spinlock); - if (lock->l_destroyed) { + if (ldlm_is_destroyed(lock)) { static cfs_time_t next; spin_unlock_bh(&waiting_locks_spinlock); LDLM_ERROR(lock, "not waiting on destroyed lock (bug 5653)"); @@ -619,8 +572,7 @@ int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, int timeout) # ifdef HAVE_SERVER_SUPPORT static int ldlm_add_waiting_lock(struct ldlm_lock *lock) { - LASSERT(lock->l_res_locked); - LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)); + LASSERT(ldlm_is_res_locked(lock) && !ldlm_is_cancel_on_block(lock)); RETURN(1); } @@ -650,7 +602,7 @@ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, LDLM_LOCK_GET(lock); cfs_list_add(&lock->l_pending_chain, &expired_lock_thread.elt_expired_locks); - cfs_waitq_signal(&expired_lock_thread.elt_waitq); + wake_up(&expired_lock_thread.elt_waitq); spin_unlock_bh(&waiting_locks_spinlock); #else class_fail_export(lock->l_export); @@ -674,7 +626,7 @@ static int ldlm_handle_ast_error(struct ldlm_lock *lock, libcfs_nid2str(peer.nid)); ldlm_lock_cancel(lock); rc = -ERESTART; - } else if (lock->l_flags & LDLM_FL_CANCEL) { + } else if (ldlm_is_cancel(lock)) { LDLM_DEBUG(lock, "%s AST timeout from nid %s, but " "cancel was received (AST reply lost?)", ast_type, libcfs_nid2str(peer.nid)); @@ -765,9 +717,9 @@ static int ldlm_cb_interpret(const struct lu_env *env, LDLM_LOCK_RELEASE(lock); if (rc == -ERESTART) - cfs_atomic_inc(&arg->restart); + atomic_inc(&arg->restart); - RETURN(0); + RETURN(0); } static inline int ldlm_ast_fini(struct ptlrpc_request *req, @@ -782,7 +734,7 @@ static inline int ldlm_ast_fini(struct ptlrpc_request *req, rc = ptl_send_rpc(req, 1); ptlrpc_req_finished(req); if (rc == 0) - cfs_atomic_inc(&arg->restart); + atomic_inc(&arg->restart); } else { LDLM_LOCK_GET(lock); ptlrpc_set_add_req(arg->set, req); @@ -797,25 +749,28 @@ static inline int ldlm_ast_fini(struct ptlrpc_request *req, */ static void ldlm_lock_reorder_req(struct ldlm_lock *lock) { - struct ptlrpc_request *req; - ENTRY; + struct ptlrpc_request *req; + ENTRY; - if (lock->l_export == NULL) { - LDLM_DEBUG(lock, "client lock: no-op"); - RETURN_EXIT; - } + if (lock->l_export == NULL) { + LDLM_DEBUG(lock, "client lock: no-op"); + RETURN_EXIT; + } spin_lock_bh(&lock->l_export->exp_rpc_lock); - cfs_list_for_each_entry(req, &lock->l_export->exp_hp_rpcs, - rq_exp_list) { - /* Do not process requests that were not yet added to there - * incoming queue or were already removed from there for - * processing */ - if (!req->rq_hp && !cfs_list_empty(&req->rq_list) && - req->rq_ops->hpreq_lock_match && - req->rq_ops->hpreq_lock_match(req, lock)) - ptlrpc_hpreq_reorder(req); - } + cfs_list_for_each_entry(req, &lock->l_export->exp_hp_rpcs, + rq_exp_list) { + /* Do not process requests that were not yet added to there + * incoming queue or were already removed from there for + * processing. We evaluate ptlrpc_nrs_req_can_move() without + * holding svcpt->scp_req_lock, and then redo the check with + * the lock held once we need to obtain a reliable result. + */ + if (ptlrpc_nrs_req_can_move(req) && + req->rq_ops->hpreq_lock_match && + req->rq_ops->hpreq_lock_match(req, lock)) + ptlrpc_nrs_req_hp_move(req); + } spin_unlock_bh(&lock->l_export->exp_rpc_lock); EXIT; } @@ -875,20 +830,20 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, RETURN(0); } - if (lock->l_destroyed) { + if (ldlm_is_destroyed(lock)) { /* What's the point? */ unlock_res_and_lock(lock); ptlrpc_req_finished(req); RETURN(0); } - if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) + if (ldlm_is_cancel_on_block(lock)) instant_cancel = 1; body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); body->lock_handle[0] = lock->l_remote_handle; body->lock_desc = *desc; - body->lock_flags |= ldlm_flags_to_wire(lock->l_flags & LDLM_AST_FLAGS); + body->lock_flags |= ldlm_flags_to_wire(lock->l_flags & LDLM_FL_AST_MASK); LDLM_DEBUG(lock, "server preparing blocking AST"); @@ -907,6 +862,8 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, if (AT_OFF) req->rq_timeout = ldlm_get_rq_timeout(); + lock->l_last_activity = cfs_time_current_sec(); + if (lock->l_export && lock->l_export->exp_nid_stats && lock->l_export->exp_nid_stats->nid_ldlm_stats) lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats, @@ -943,6 +900,11 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(), lock->l_last_activity); + if (OBD_FAIL_PRECHECK(OBD_FAIL_OST_LDLM_REPLY_NET)) { + LDLM_DEBUG(lock, "dropping CP AST"); + RETURN(0); + } + req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse, &RQF_LDLM_CP_CALLBACK); if (req == NULL) @@ -950,6 +912,13 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) /* server namespace, doesn't need lock */ lvb_len = ldlm_lvbo_size(lock); + /* LU-3124 & LU-2187: to not return layout in completion AST because + * it may deadlock for LU-2187, or client may not have enough space + * for large layout. The layout will be returned to client with an + * extra RPC to fetch xattr.lov */ + if (ldlm_has_layout(lock)) + lvb_len = 0; + req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT, lvb_len); rc = ptlrpc_request_pack(req, LUSTRE_DLM_VERSION, LDLM_CP_CALLBACK); if (rc) { @@ -973,13 +942,27 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) void *lvb = req_capsule_client_get(&req->rq_pill, &RMF_DLM_LVB); lvb_len = ldlm_lvbo_fill(lock, lvb, lvb_len); - req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB, - lvb_len, RCL_CLIENT); + if (lvb_len < 0) { + /* We still need to send the RPC to wake up the blocked + * enqueue thread on the client. + * + * Consider old client, there is no better way to notify + * the failure, just zero-sized the LVB, then the client + * will fail out as "-EPROTO". */ + req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB, 0, + RCL_CLIENT); + instant_cancel = 1; + } else { + req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB, lvb_len, + RCL_CLIENT); + } } LDLM_DEBUG(lock, "server preparing completion AST (after %lds wait)", total_enqueue_wait); + lock->l_last_activity = cfs_time_current_sec(); + /* Server-side enqueue wait time estimate, used in __ldlm_add_waiting_lock to set future enqueue timers */ if (total_enqueue_wait < ldlm_get_enq_timeout(lock)) @@ -1004,11 +987,11 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) /* We only send real blocking ASTs after the lock is granted */ lock_res_and_lock(lock); - if (lock->l_flags & LDLM_FL_AST_SENT) { + if (ldlm_is_ast_sent(lock)) { body->lock_flags |= ldlm_flags_to_wire(LDLM_FL_AST_SENT); /* Copy AST flags like LDLM_FL_DISCARD_DATA. */ body->lock_flags |= ldlm_flags_to_wire(lock->l_flags & - LDLM_AST_FLAGS); + LDLM_FL_AST_MASK); /* We might get here prior to ldlm_handle_enqueue setting * LDLM_FL_CANCEL_ON_BLOCK flag. Then we will put this lock @@ -1016,7 +999,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) * ldlm_handle_enqueue will call ldlm_lock_cancel() still, * that would not only cancel the lock, but will also remove * it from waiting list */ - if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) { + if (ldlm_is_cancel_on_block(lock)) { unlock_res_and_lock(lock); ldlm_lock_cancel(lock); instant_cancel = 1; @@ -1035,7 +1018,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) rc = ldlm_ast_fini(req, arg, lock, instant_cancel); - RETURN(rc); + RETURN(lvb_len < 0 ? lvb_len : rc); } EXPORT_SYMBOL(ldlm_server_completion_ast); @@ -1096,6 +1079,8 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data) if (AT_OFF) req->rq_timeout = ldlm_get_rq_timeout(); + lock->l_last_activity = cfs_time_current_sec(); + req->rq_interpret_reply = ldlm_cb_interpret; if (lock->l_export && lock->l_export->exp_nid_stats && @@ -1221,7 +1206,7 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns, GOTO(out, rc = -EFAULT); } - if (req->rq_export->exp_connect_flags & OBD_CONNECT_IBITS) { + if (exp_connect_flags(req->rq_export) & OBD_CONNECT_IBITS) { if (unlikely(dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN)) { DEBUG_REQ(D_ERROR, req, @@ -1242,7 +1227,7 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns, /* INODEBITS_INTEROP: Perform conversion from plain lock to * inodebits lock if client does not support them. */ - if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_IBITS) && + if (!(exp_connect_flags(req->rq_export) & OBD_CONNECT_IBITS) && (dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN)) { dlm_req->lock_desc.l_resource.lr_type = LDLM_IBITS; dlm_req->lock_desc.l_policy_data.l_inodebits.bits = @@ -1252,7 +1237,8 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns, } #endif - if (unlikely(flags & LDLM_FL_REPLAY)) { + if (unlikely((flags & LDLM_FL_REPLAY) || + (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT))) { /* Find an existing lock in the per-export lock hash */ /* In the function below, .hs_keycmp resolves to * ldlm_export_lock_keycmp() */ @@ -1262,17 +1248,18 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns, if (lock != NULL) { DEBUG_REQ(D_DLMTRACE, req, "found existing lock cookie " LPX64, lock->l_handle.h_cookie); + flags |= LDLM_FL_RESENT; GOTO(existing_lock, rc = 0); - } + } } - /* The lock's callback data might be set in the policy function */ - lock = ldlm_lock_create(ns, &dlm_req->lock_desc.l_resource.lr_name, - dlm_req->lock_desc.l_resource.lr_type, - dlm_req->lock_desc.l_req_mode, + /* The lock's callback data might be set in the policy function */ + lock = ldlm_lock_create(ns, &dlm_req->lock_desc.l_resource.lr_name, + dlm_req->lock_desc.l_resource.lr_type, + dlm_req->lock_desc.l_req_mode, cbs, NULL, 0, LVB_T_NONE); - if (!lock) - GOTO(out, rc = -ENOMEM); + if (IS_ERR(lock)) + GOTO(out, rc = PTR_ERR(lock)); lock->l_last_activity = cfs_time_current_sec(); lock->l_remote_handle = dlm_req->lock_handle[0]; @@ -1324,8 +1311,11 @@ existing_lock: lock->l_req_extent = lock->l_policy_data.l_extent; err = ldlm_lock_enqueue(ns, &lock, cookie, &flags); - if (err) - GOTO(out, err); + if (err) { + if ((int)err < 0) + rc = (int)err; + GOTO(out, err); + } dlm_rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); dlm_rep->lock_flags = ldlm_flags_to_wire(flags); @@ -1339,9 +1329,9 @@ existing_lock: /* Now take into account flags to be inherited from original lock request both in reply to client and in our own lock flags. */ - dlm_rep->lock_flags |= dlm_req->lock_flags & LDLM_INHERIT_FLAGS; + dlm_rep->lock_flags |= dlm_req->lock_flags & LDLM_FL_INHERIT_MASK; lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags & - LDLM_INHERIT_FLAGS); + LDLM_FL_INHERIT_MASK); /* Don't move a pending lock onto the export if it has already been * disconnected due to eviction (bug 5683) or server umount (bug 24324). @@ -1350,7 +1340,7 @@ existing_lock: OBD_FAIL_CHECK(OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT))) { LDLM_ERROR(lock, "lock on destroyed export %p", req->rq_export); rc = -ENOTCONN; - } else if (lock->l_flags & LDLM_FL_AST_SENT) { + } else if (ldlm_is_ast_sent(lock)) { dlm_rep->lock_flags |= ldlm_flags_to_wire(LDLM_FL_AST_SENT); if (lock->l_granted_mode == lock->l_req_mode) { /* @@ -1373,7 +1363,7 @@ existing_lock: if ((dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN || dlm_req->lock_desc.l_resource.lr_type == LDLM_IBITS) && req->rq_export->exp_libclient) { - if (unlikely(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) || + if (unlikely(!ldlm_is_cancel_on_block(lock) || !(dlm_rep->lock_flags & LDLM_FL_CANCEL_ON_BLOCK))){ CERROR("Granting sync lock to libclient. " "req fl %d, rep fl %d, lock fl "LPX64"\n", @@ -1406,14 +1396,14 @@ existing_lock: /* The LOCK_CHANGED code in ldlm_lock_enqueue depends on this * ldlm_reprocess_all. If this moves, revisit that code. -phil */ - if (lock) { - LDLM_DEBUG(lock, "server-side enqueue handler, sending reply" - "(err=%d, rc=%d)", err, rc); - - if (rc == 0) { - int lvb_len = ldlm_lvbo_size(lock); - - if (lvb_len > 0) { + if (!IS_ERR(lock)) { + LDLM_DEBUG(lock, "server-side enqueue handler, sending reply" + "(err=%d, rc=%d)", err, rc); + + if (rc == 0) { + if (req_capsule_has_field(&req->rq_pill, &RMF_DLM_LVB, + RCL_SERVER) && + ldlm_lvbo_size(lock) > 0) { void *buf; int buflen; @@ -1423,9 +1413,19 @@ existing_lock: req, lock); buflen = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER); - buflen = ldlm_lvbo_fill(lock, buf, buflen); - req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB, - buflen, RCL_SERVER); + if (buflen > 0) { + buflen = ldlm_lvbo_fill(lock, buf, + buflen); + if (buflen >= 0) + req_capsule_shrink( + &req->rq_pill, + &RMF_DLM_LVB, + buflen, RCL_SERVER); + else + rc = buflen; + } else { + rc = buflen; + } } } else { lock_res_and_lock(lock); @@ -1499,7 +1499,7 @@ int ldlm_handle_convert0(struct ptlrpc_request *req, lock = ldlm_handle2lock(&dlm_req->lock_handle[0]); if (!lock) { - req->rq_status = EINVAL; + req->rq_status = LUSTRE_EINVAL; } else { void *res = NULL; @@ -1513,7 +1513,7 @@ int ldlm_handle_convert0(struct ptlrpc_request *req, LDLM_DEBUG(lock, "converted waiting lock"); req->rq_status = 0; } else { - req->rq_status = EDEADLOCK; + req->rq_status = LUSTRE_EDEADLK; } } @@ -1646,7 +1646,7 @@ int ldlm_handle_cancel(struct ptlrpc_request *req) RETURN(rc); if (!ldlm_request_cancel(req, dlm_req, 0)) - req->rq_status = ESTALE; + req->rq_status = LUSTRE_ESTALE; RETURN(ptlrpc_reply(req)); } @@ -1667,10 +1667,10 @@ void ldlm_handle_bl_callback(struct ldlm_namespace *ns, LDLM_DEBUG(lock, "client blocking AST callback handler"); lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_set_cbpending(lock); - if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) - lock->l_flags |= LDLM_FL_CANCEL; + if (ldlm_is_cancel_on_block(lock)) + ldlm_set_cancel(lock); do_ast = (!lock->l_readers && !lock->l_writers); unlock_res_and_lock(lock); @@ -1702,22 +1702,22 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, struct ldlm_lock *lock) { int lvb_len; - CFS_LIST_HEAD(ast_list); + CFS_LIST_HEAD(ast_list); int rc = 0; - ENTRY; + ENTRY; - LDLM_DEBUG(lock, "client completion callback handler START"); + LDLM_DEBUG(lock, "client completion callback handler START"); - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) { - int to = cfs_time_seconds(1); - while (to > 0) { - cfs_schedule_timeout_and_set_state( - CFS_TASK_INTERRUPTIBLE, to); - if (lock->l_granted_mode == lock->l_req_mode || - lock->l_destroyed) - break; - } - } + if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) { + int to = cfs_time_seconds(1); + while (to > 0) { + schedule_timeout_and_set_state( + TASK_INTERRUPTIBLE, to); + if (lock->l_granted_mode == lock->l_req_mode || + ldlm_is_destroyed(lock)) + break; + } + } lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT); if (lvb_len < 0) { @@ -1735,46 +1735,48 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, lock->l_lvb_len, lvb_len); GOTO(out, rc = -EINVAL); } - } else { /* for layout lock, lvb has variable length */ + } else if (ldlm_has_layout(lock)) { /* for layout lock, lvb has + * variable length */ void *lvb_data; - OBD_ALLOC(lvb_data, lvb_len); + OBD_ALLOC_LARGE(lvb_data, lvb_len); if (lvb_data == NULL) { - LDLM_ERROR(lock, "No memory.\n"); + LDLM_ERROR(lock, "No memory: %d.\n", lvb_len); GOTO(out, rc = -ENOMEM); } lock_res_and_lock(lock); LASSERT(lock->l_lvb_data == NULL); + lock->l_lvb_type = LVB_T_LAYOUT; lock->l_lvb_data = lvb_data; lock->l_lvb_len = lvb_len; unlock_res_and_lock(lock); } } - lock_res_and_lock(lock); - if (lock->l_destroyed || - lock->l_granted_mode == lock->l_req_mode) { - /* bug 11300: the lock has already been granted */ - unlock_res_and_lock(lock); - LDLM_DEBUG(lock, "Double grant race happened"); + lock_res_and_lock(lock); + if (ldlm_is_destroyed(lock) || + lock->l_granted_mode == lock->l_req_mode) { + /* bug 11300: the lock has already been granted */ + unlock_res_and_lock(lock); + LDLM_DEBUG(lock, "Double grant race happened"); GOTO(out, rc = 0); - } + } - /* If we receive the completion AST before the actual enqueue returned, - * then we might need to switch lock modes, resources, or extents. */ - if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) { - lock->l_req_mode = dlm_req->lock_desc.l_granted_mode; - LDLM_DEBUG(lock, "completion AST, new lock mode"); - } + /* If we receive the completion AST before the actual enqueue returned, + * then we might need to switch lock modes, resources, or extents. */ + if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) { + lock->l_req_mode = dlm_req->lock_desc.l_granted_mode; + LDLM_DEBUG(lock, "completion AST, new lock mode"); + } - if (lock->l_resource->lr_type != LDLM_PLAIN) { - ldlm_convert_policy_to_local(req->rq_export, - dlm_req->lock_desc.l_resource.lr_type, - &dlm_req->lock_desc.l_policy_data, - &lock->l_policy_data); - LDLM_DEBUG(lock, "completion AST, new policy data"); - } + if (lock->l_resource->lr_type != LDLM_PLAIN) { + ldlm_convert_policy_to_local(req->rq_export, + dlm_req->lock_desc.l_resource.lr_type, + &dlm_req->lock_desc.l_policy_data, + &lock->l_policy_data); + LDLM_DEBUG(lock, "completion AST, new policy data"); + } ldlm_resource_unlink_lock(lock); if (memcmp(&dlm_req->lock_desc.l_resource.lr_name, @@ -1796,7 +1798,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, /* BL_AST locks are not needed in LRU. * Let ldlm_cancel_lru() be fast. */ ldlm_lock_remove_from_lru(lock); - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST; + lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST; LDLM_DEBUG(lock, "completion AST includes blocking AST"); } @@ -1827,8 +1829,9 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, out: if (rc < 0) { lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_FAILED; + ldlm_set_failed(lock); unlock_res_and_lock(lock); + wake_up(&lock->l_waitq); } LDLM_LOCK_RELEASE(lock); } @@ -1893,14 +1896,15 @@ static int ldlm_callback_reply(struct ptlrpc_request *req, int rc) } #ifdef __KERNEL__ -static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi, int mode) +static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi, + ldlm_cancel_flags_t cancel_flags) { struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool; ENTRY; spin_lock(&blp->blp_lock); if (blwi->blwi_lock && - blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) { + ldlm_is_discard_data(blwi->blwi_lock)) { /* add LDLM_FL_DISCARD_DATA requests to the priority list */ cfs_list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list); } else { @@ -1909,31 +1913,31 @@ static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi, int mode) } spin_unlock(&blp->blp_lock); - cfs_waitq_signal(&blp->blp_waitq); + wake_up(&blp->blp_waitq); - /* can not use blwi->blwi_mode as blwi could be already freed in - LDLM_ASYNC mode */ - if (mode == LDLM_SYNC) + /* can not check blwi->blwi_flags as blwi could be already freed in + LCF_ASYNC mode */ + if (!(cancel_flags & LCF_ASYNC)) wait_for_completion(&blwi->blwi_comp); RETURN(0); } static inline void init_blwi(struct ldlm_bl_work_item *blwi, - struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, - cfs_list_t *cancels, int count, - struct ldlm_lock *lock, - int mode) + struct ldlm_namespace *ns, + struct ldlm_lock_desc *ld, + cfs_list_t *cancels, int count, + struct ldlm_lock *lock, + ldlm_cancel_flags_t cancel_flags) { init_completion(&blwi->blwi_comp); CFS_INIT_LIST_HEAD(&blwi->blwi_head); - if (cfs_memory_pressure_get()) + if (memory_pressure_get()) blwi->blwi_mem_pressure = 1; blwi->blwi_ns = ns; - blwi->blwi_mode = mode; + blwi->blwi_flags = cancel_flags; if (ld != NULL) blwi->blwi_ld = *ld; if (count) { @@ -1955,52 +1959,57 @@ static inline void init_blwi(struct ldlm_bl_work_item *blwi, * call ->l_blocking_ast itself. */ static int ldlm_bl_to_thread(struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, struct ldlm_lock *lock, - cfs_list_t *cancels, int count, int mode) + struct ldlm_lock_desc *ld, + struct ldlm_lock *lock, + cfs_list_t *cancels, int count, + ldlm_cancel_flags_t cancel_flags) { - ENTRY; + ENTRY; - if (cancels && count == 0) - RETURN(0); + if (cancels && count == 0) + RETURN(0); - if (mode == LDLM_SYNC) { - /* if it is synchronous call do minimum mem alloc, as it could - * be triggered from kernel shrinker - */ - struct ldlm_bl_work_item blwi; - memset(&blwi, 0, sizeof(blwi)); - init_blwi(&blwi, ns, ld, cancels, count, lock, LDLM_SYNC); - RETURN(__ldlm_bl_to_thread(&blwi, LDLM_SYNC)); - } else { - struct ldlm_bl_work_item *blwi; - OBD_ALLOC(blwi, sizeof(*blwi)); - if (blwi == NULL) - RETURN(-ENOMEM); - init_blwi(blwi, ns, ld, cancels, count, lock, LDLM_ASYNC); + if (cancel_flags & LCF_ASYNC) { + struct ldlm_bl_work_item *blwi; - RETURN(__ldlm_bl_to_thread(blwi, LDLM_ASYNC)); - } + OBD_ALLOC(blwi, sizeof(*blwi)); + if (blwi == NULL) + RETURN(-ENOMEM); + init_blwi(blwi, ns, ld, cancels, count, lock, cancel_flags); + + RETURN(__ldlm_bl_to_thread(blwi, cancel_flags)); + } else { + /* if it is synchronous call do minimum mem alloc, as it could + * be triggered from kernel shrinker + */ + struct ldlm_bl_work_item blwi; + + memset(&blwi, 0, sizeof(blwi)); + init_blwi(&blwi, ns, ld, cancels, count, lock, cancel_flags); + RETURN(__ldlm_bl_to_thread(&blwi, cancel_flags)); + } } #endif int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, - struct ldlm_lock *lock) + struct ldlm_lock *lock) { #ifdef __KERNEL__ - RETURN(ldlm_bl_to_thread(ns, ld, lock, NULL, 0, LDLM_ASYNC)); + return ldlm_bl_to_thread(ns, ld, lock, NULL, 0, LCF_ASYNC); #else - RETURN(-ENOSYS); + return -ENOSYS; #endif } int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, - cfs_list_t *cancels, int count, int mode) + cfs_list_t *cancels, int count, + ldlm_cancel_flags_t cancel_flags) { #ifdef __KERNEL__ - RETURN(ldlm_bl_to_thread(ns, ld, NULL, cancels, count, mode)); + return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags); #else - RETURN(-ENOSYS); + return -ENOSYS; #endif } @@ -2073,6 +2082,8 @@ static int ldlm_handle_qc_callback(struct ptlrpc_request *req) RETURN(-EPROTO); } + oqctl->qc_stat = ptlrpc_status_ntoh(oqctl->qc_stat); + cli->cl_qchk_stat = oqctl->qc_stat; return 0; } @@ -2124,16 +2135,6 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) rc = ldlm_handle_setinfo(req); ldlm_callback_reply(req, rc); RETURN(0); - case OBD_LOG_CANCEL: /* remove this eventually - for 1.4.0 compat */ - CERROR("shouldn't be handling OBD_LOG_CANCEL on DLM thread\n"); - req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL); - if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET)) - RETURN(0); - rc = llog_origin_handle_cancel(req); - if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP)) - RETURN(0); - ldlm_callback_reply(req, rc); - RETURN(0); case LLOG_ORIGIN_HANDLE_CREATE: req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE); if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET)) @@ -2194,7 +2195,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) * which the server has already started a blocking callback on. */ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) && lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { - rc = ldlm_cli_cancel(&dlm_req->lock_handle[0]); + rc = ldlm_cli_cancel(&dlm_req->lock_handle[0], 0); if (rc < 0) CERROR("ldlm_cli_cancel: %d\n", rc); } @@ -2209,22 +2210,21 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) RETURN(0); } - if ((lock->l_flags & LDLM_FL_FAIL_LOC) && + if (ldlm_is_fail_loc(lock) && lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */ lock_res_and_lock(lock); lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags & - LDLM_AST_FLAGS); + LDLM_FL_AST_MASK); if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { /* If somebody cancels lock and cache is already dropped, * or lock is failed before cp_ast received on client, * we can tell the server we have no lock. Otherwise, we * should send cancel after dropping the cache. */ - if (((lock->l_flags & LDLM_FL_CANCELING) && - (lock->l_flags & LDLM_FL_BL_DONE)) || - (lock->l_flags & LDLM_FL_FAILED)) { + if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) || + ldlm_is_failed(lock)) { LDLM_DEBUG(lock, "callback on lock " LPX64" - lock disappeared\n", dlm_req->lock_handle[0].cookie); @@ -2238,7 +2238,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) /* BL_AST locks are not needed in LRU. * Let ldlm_cancel_lru() be fast. */ ldlm_lock_remove_from_lru(lock); - lock->l_flags |= LDLM_FL_BL_AST; + ldlm_set_bl_ast(lock); } unlock_res_and_lock(lock); @@ -2255,7 +2255,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) case LDLM_BL_CALLBACK: CDEBUG(D_INODE, "blocking ast\n"); req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK); - if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) { + if (!ldlm_is_cancel_on_block(lock)) { rc = ldlm_callback_reply(req, 0); if (req->rq_no_reply || rc) ldlm_callback_errmsg(req, "Normal process", rc, @@ -2335,15 +2335,6 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req) if (rc) break; RETURN(0); - case OBD_LOG_CANCEL: - req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL); - if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET)) - RETURN(0); - rc = llog_origin_handle_cancel(req); - if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP)) - RETURN(0); - ldlm_callback_reply(req, rc); - RETURN(0); default: CERROR("invalid opcode %d\n", lustre_msg_get_opc(req->rq_reqmsg)); @@ -2405,7 +2396,7 @@ static int ldlm_cancel_hpreq_check(struct ptlrpc_request *req) if (lock == NULL) continue; - rc = !!(lock->l_flags & LDLM_FL_AST_SENT); + rc = ldlm_is_ast_sent(lock) ? 1 : 0; if (rc) LDLM_DEBUG(lock, "hpreq cancel lock"); LDLM_LOCK_PUT(lock); @@ -2460,7 +2451,7 @@ int ldlm_revoke_lock_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd, return 0; } - if (lock->l_flags & LDLM_FL_AST_SENT) { + if (ldlm_is_ast_sent(lock)) { unlock_res_and_lock(lock); return 0; } @@ -2468,7 +2459,7 @@ int ldlm_revoke_lock_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd, LASSERT(lock->l_blocking_ast); LASSERT(!lock->l_blocking_lock); - lock->l_flags |= LDLM_FL_AST_SENT; + ldlm_set_ast_sent(lock); if (lock->l_export && lock->l_export->exp_lock_hash) { /* NB: it's safe to call cfs_hash_del() even lock isn't * in exp_lock_hash. */ @@ -2509,22 +2500,22 @@ static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp) static unsigned int num_bl = 0; spin_lock(&blp->blp_lock); - /* process a request from the blp_list at least every blp_num_threads */ - if (!cfs_list_empty(&blp->blp_list) && - (cfs_list_empty(&blp->blp_prio_list) || num_bl == 0)) - blwi = cfs_list_entry(blp->blp_list.next, - struct ldlm_bl_work_item, blwi_entry); - else - if (!cfs_list_empty(&blp->blp_prio_list)) - blwi = cfs_list_entry(blp->blp_prio_list.next, - struct ldlm_bl_work_item, - blwi_entry); - - if (blwi) { - if (++num_bl >= cfs_atomic_read(&blp->blp_num_threads)) - num_bl = 0; - cfs_list_del(&blwi->blwi_entry); - } + /* process a request from the blp_list at least every blp_num_threads */ + if (!cfs_list_empty(&blp->blp_list) && + (cfs_list_empty(&blp->blp_prio_list) || num_bl == 0)) + blwi = cfs_list_entry(blp->blp_list.next, + struct ldlm_bl_work_item, blwi_entry); + else + if (!cfs_list_empty(&blp->blp_prio_list)) + blwi = cfs_list_entry(blp->blp_prio_list.next, + struct ldlm_bl_work_item, + blwi_entry); + + if (blwi) { + if (++num_bl >= atomic_read(&blp->blp_num_threads)) + num_bl = 0; + cfs_list_del(&blwi->blwi_entry); + } spin_unlock(&blp->blp_lock); return blwi; @@ -2543,14 +2534,17 @@ static int ldlm_bl_thread_main(void *arg); static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp) { struct ldlm_bl_thread_data bltd = { .bltd_blp = blp }; - int rc; + struct task_struct *task; init_completion(&bltd.bltd_comp); - rc = cfs_create_thread(ldlm_bl_thread_main, &bltd, 0); - if (rc < 0) { - CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %d\n", - cfs_atomic_read(&blp->blp_num_threads), rc); - return rc; + bltd.bltd_num = atomic_read(&blp->blp_num_threads); + snprintf(bltd.bltd_name, sizeof(bltd.bltd_name) - 1, + "ldlm_bl_%02d", bltd.bltd_num); + task = kthread_run(ldlm_bl_thread_main, &bltd, bltd.bltd_name); + if (IS_ERR(task)) { + CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n", + atomic_read(&blp->blp_num_threads), PTR_ERR(task)); + return PTR_ERR(task); } wait_for_completion(&bltd.bltd_comp); @@ -2567,55 +2561,49 @@ static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp) static int ldlm_bl_thread_main(void *arg) { struct ldlm_bl_pool *blp; + struct ldlm_bl_thread_data *bltd = arg; ENTRY; - { - struct ldlm_bl_thread_data *bltd = arg; + blp = bltd->bltd_blp; - blp = bltd->bltd_blp; + atomic_inc(&blp->blp_num_threads); + atomic_inc(&blp->blp_busy_threads); - bltd->bltd_num = - cfs_atomic_inc_return(&blp->blp_num_threads) - 1; - cfs_atomic_inc(&blp->blp_busy_threads); + complete(&bltd->bltd_comp); + /* cannot use bltd after this, it is only on caller's stack */ - snprintf(bltd->bltd_name, sizeof(bltd->bltd_name) - 1, - "ldlm_bl_%02d", bltd->bltd_num); - cfs_daemonize(bltd->bltd_name); - - complete(&bltd->bltd_comp); - /* cannot use bltd after this, it is only on caller's stack */ - } + while (1) { + struct l_wait_info lwi = { 0 }; + struct ldlm_bl_work_item *blwi = NULL; + int busy; - while (1) { - struct l_wait_info lwi = { 0 }; - struct ldlm_bl_work_item *blwi = NULL; - int busy; + blwi = ldlm_bl_get_work(blp); - blwi = ldlm_bl_get_work(blp); - - if (blwi == NULL) { - cfs_atomic_dec(&blp->blp_busy_threads); - l_wait_event_exclusive(blp->blp_waitq, - (blwi = ldlm_bl_get_work(blp)) != NULL, - &lwi); - busy = cfs_atomic_inc_return(&blp->blp_busy_threads); - } else { - busy = cfs_atomic_read(&blp->blp_busy_threads); - } + if (blwi == NULL) { + atomic_dec(&blp->blp_busy_threads); + l_wait_event_exclusive(blp->blp_waitq, + (blwi = ldlm_bl_get_work(blp)) != NULL, + &lwi); + busy = atomic_inc_return(&blp->blp_busy_threads); + } else { + busy = atomic_read(&blp->blp_busy_threads); + } - if (blwi->blwi_ns == NULL) - /* added by ldlm_cleanup() */ - break; + if (blwi->blwi_ns == NULL) + /* added by ldlm_cleanup() */ + break; - /* Not fatal if racy and have a few too many threads */ - if (unlikely(busy < blp->blp_max_threads && - busy >= cfs_atomic_read(&blp->blp_num_threads) && - !blwi->blwi_mem_pressure)) - /* discard the return value, we tried */ - ldlm_bl_thread_start(blp); + /* Not fatal if racy and have a few too many threads */ + if (unlikely(busy < blp->blp_max_threads && + busy >= atomic_read(&blp->blp_num_threads) && + !blwi->blwi_mem_pressure)) + /* discard the return value, we tried */ + ldlm_bl_thread_start(blp); if (blwi->blwi_mem_pressure) - cfs_memory_pressure_set(); + memory_pressure_set(); + + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL2, 4); if (blwi->blwi_count) { int count; @@ -2626,24 +2614,25 @@ static int ldlm_bl_thread_main(void *arg) count = ldlm_cli_cancel_list_local(&blwi->blwi_head, blwi->blwi_count, LCF_BL_AST); - ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL, 0); + ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL, + blwi->blwi_flags); } else { ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld, blwi->blwi_lock); } if (blwi->blwi_mem_pressure) - cfs_memory_pressure_clr(); + memory_pressure_clr(); - if (blwi->blwi_mode == LDLM_ASYNC) - OBD_FREE(blwi, sizeof(*blwi)); - else + if (blwi->blwi_flags & LCF_ASYNC) + OBD_FREE(blwi, sizeof(*blwi)); + else complete(&blwi->blwi_comp); - } + } - cfs_atomic_dec(&blp->blp_busy_threads); - cfs_atomic_dec(&blp->blp_num_threads); + atomic_dec(&blp->blp_busy_threads); + atomic_dec(&blp->blp_num_threads); complete(&blp->blp_comp); - RETURN(0); + RETURN(0); } #endif @@ -2756,6 +2745,7 @@ static cfs_hash_ops_t ldlm_export_lock_ops = { int ldlm_init_export(struct obd_export *exp) { + int rc; ENTRY; exp->exp_lock_hash = @@ -2771,7 +2761,14 @@ int ldlm_init_export(struct obd_export *exp) if (!exp->exp_lock_hash) RETURN(-ENOMEM); + rc = ldlm_init_flock_export(exp); + if (rc) + GOTO(err, rc); + RETURN(0); +err: + ldlm_destroy_export(exp); + RETURN(rc); } EXPORT_SYMBOL(ldlm_init_export); @@ -2789,11 +2786,15 @@ EXPORT_SYMBOL(ldlm_destroy_export); static int ldlm_setup(void) { static struct ptlrpc_service_conf conf; - struct ldlm_bl_pool *blp = NULL; - int rc = 0; + struct ldlm_bl_pool *blp = NULL; #ifdef __KERNEL__ - int i; +# ifdef HAVE_SERVER_SUPPORT + struct task_struct *task; +# endif + int i; #endif + int rc = 0; + ENTRY; if (ldlm_state != NULL) @@ -2814,7 +2815,7 @@ static int ldlm_setup(void) .psc_name = "ldlm_cbd", .psc_watchdog_factor = 2, .psc_buf = { - .bc_nbufs = LDLM_NBUFS, + .bc_nbufs = LDLM_CLIENT_NBUFS, .bc_buf_size = LDLM_BUFSIZE, .bc_req_max_size = LDLM_MAXREQSIZE, .bc_rep_max_size = LDLM_MAXREPSIZE, @@ -2853,7 +2854,7 @@ static int ldlm_setup(void) .psc_name = "ldlm_canceld", .psc_watchdog_factor = 6, .psc_buf = { - .bc_nbufs = LDLM_NBUFS, + .bc_nbufs = LDLM_SERVER_NBUFS, .bc_buf_size = LDLM_BUFSIZE, .bc_req_max_size = LDLM_MAXREQSIZE, .bc_rep_max_size = LDLM_MAXREPSIZE, @@ -2897,11 +2898,11 @@ static int ldlm_setup(void) ldlm_state->ldlm_bl_pool = blp; spin_lock_init(&blp->blp_lock); - CFS_INIT_LIST_HEAD(&blp->blp_list); - CFS_INIT_LIST_HEAD(&blp->blp_prio_list); - cfs_waitq_init(&blp->blp_waitq); - cfs_atomic_set(&blp->blp_num_threads, 0); - cfs_atomic_set(&blp->blp_busy_threads, 0); + CFS_INIT_LIST_HEAD(&blp->blp_list); + CFS_INIT_LIST_HEAD(&blp->blp_prio_list); + init_waitqueue_head(&blp->blp_waitq); + atomic_set(&blp->blp_num_threads, 0); + atomic_set(&blp->blp_busy_threads, 0); #ifdef __KERNEL__ if (ldlm_num_threads == 0) { @@ -2920,22 +2921,23 @@ static int ldlm_setup(void) } # ifdef HAVE_SERVER_SUPPORT - CFS_INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks); - expired_lock_thread.elt_state = ELT_STOPPED; - cfs_waitq_init(&expired_lock_thread.elt_waitq); + CFS_INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks); + expired_lock_thread.elt_state = ELT_STOPPED; + init_waitqueue_head(&expired_lock_thread.elt_waitq); - CFS_INIT_LIST_HEAD(&waiting_locks_list); + CFS_INIT_LIST_HEAD(&waiting_locks_list); spin_lock_init(&waiting_locks_spinlock); - cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0); + cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0); - rc = cfs_create_thread(expired_lock_main, NULL, CFS_DAEMON_FLAGS); - if (rc < 0) { + task = kthread_run(expired_lock_main, NULL, "ldlm_elt"); + if (IS_ERR(task)) { + rc = PTR_ERR(task); CERROR("Cannot start ldlm expired-lock thread: %d\n", rc); GOTO(out, rc); } - cfs_wait_event(expired_lock_thread.elt_waitq, - expired_lock_thread.elt_state == ELT_READY); + wait_event(expired_lock_thread.elt_waitq, + expired_lock_thread.elt_state == ELT_READY); # endif /* HAVE_SERVER_SUPPORT */ rc = ldlm_pools_init(); @@ -2969,14 +2971,14 @@ static int ldlm_cleanup(void) if (ldlm_state->ldlm_bl_pool != NULL) { struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool; - while (cfs_atomic_read(&blp->blp_num_threads) > 0) { + while (atomic_read(&blp->blp_num_threads) > 0) { struct ldlm_bl_work_item blwi = { .blwi_ns = NULL }; init_completion(&blp->blp_comp); spin_lock(&blp->blp_lock); cfs_list_add_tail(&blwi.blwi_entry, &blp->blp_list); - cfs_waitq_signal(&blp->blp_waitq); + wake_up(&blp->blp_waitq); spin_unlock(&blp->blp_lock); wait_for_completion(&blp->blp_comp); @@ -2999,8 +3001,8 @@ static int ldlm_cleanup(void) # ifdef HAVE_SERVER_SUPPORT if (expired_lock_thread.elt_state != ELT_STOPPED) { expired_lock_thread.elt_state = ELT_TERMINATE; - cfs_waitq_signal(&expired_lock_thread.elt_waitq); - cfs_wait_event(expired_lock_thread.elt_waitq, + wake_up(&expired_lock_thread.elt_waitq); + wait_event(expired_lock_thread.elt_waitq, expired_lock_thread.elt_state == ELT_STOPPED); } # endif @@ -3017,26 +3019,26 @@ int ldlm_init(void) mutex_init(&ldlm_ref_mutex); mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER)); mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT)); - ldlm_resource_slab = cfs_mem_cache_create("ldlm_resources", - sizeof(struct ldlm_resource), 0, - CFS_SLAB_HWCACHE_ALIGN); - if (ldlm_resource_slab == NULL) - return -ENOMEM; + ldlm_resource_slab = kmem_cache_create("ldlm_resources", + sizeof(struct ldlm_resource), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (ldlm_resource_slab == NULL) + return -ENOMEM; - ldlm_lock_slab = cfs_mem_cache_create("ldlm_locks", + ldlm_lock_slab = kmem_cache_create("ldlm_locks", sizeof(struct ldlm_lock), 0, - CFS_SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU); + SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU, NULL); if (ldlm_lock_slab == NULL) { - cfs_mem_cache_destroy(ldlm_resource_slab); + kmem_cache_destroy(ldlm_resource_slab); return -ENOMEM; } - ldlm_interval_slab = cfs_mem_cache_create("interval_node", + ldlm_interval_slab = kmem_cache_create("interval_node", sizeof(struct ldlm_interval), - 0, CFS_SLAB_HWCACHE_ALIGN); + 0, SLAB_HWCACHE_ALIGN, NULL); if (ldlm_interval_slab == NULL) { - cfs_mem_cache_destroy(ldlm_resource_slab); - cfs_mem_cache_destroy(ldlm_lock_slab); + kmem_cache_destroy(ldlm_resource_slab); + kmem_cache_destroy(ldlm_lock_slab); return -ENOMEM; } #if LUSTRE_TRACKS_LOCK_EXP_REFS @@ -3047,19 +3049,15 @@ int ldlm_init(void) void ldlm_exit(void) { - int rc; - if (ldlm_refcount) - CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount); - rc = cfs_mem_cache_destroy(ldlm_resource_slab); - LASSERTF(rc == 0, "couldn't free ldlm resource slab\n"); + if (ldlm_refcount) + CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount); + kmem_cache_destroy(ldlm_resource_slab); #ifdef __KERNEL__ - /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call - * synchronize_rcu() to wait a grace period elapsed, so that - * ldlm_lock_free() get a chance to be called. */ - synchronize_rcu(); + /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call + * synchronize_rcu() to wait a grace period elapsed, so that + * ldlm_lock_free() get a chance to be called. */ + synchronize_rcu(); #endif - rc = cfs_mem_cache_destroy(ldlm_lock_slab); - LASSERTF(rc == 0, "couldn't free ldlm lock slab\n"); - rc = cfs_mem_cache_destroy(ldlm_interval_slab); - LASSERTF(rc == 0, "couldn't free interval node slab\n"); + kmem_cache_destroy(ldlm_lock_slab); + kmem_cache_destroy(ldlm_interval_slab); }