From f3327caf5e33bd36509b66aab923ec9c649987f3 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Sun, 11 Mar 2012 16:19:56 +0800 Subject: [PATCH] LU-1088 ldlm: dump certain amount of locks for ldlm resouce * dump last 256 granted locks for ldlm resource to avoid DDOS. * replace ldlm_lock_dump with ldlm_lock_debug, and print nid. Signed-off-by: Lai Siyao Change-Id: I9b369978496397be44578e8aa07e2bc8c8bde4af Reviewed-on: http://review.whamcloud.com/2250 Tested-by: Hudson Reviewed-by: Jinshan Xiong Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/lustre_dlm.h | 5 +- lustre/ldlm/ldlm_lib.c | 2 +- lustre/ldlm/ldlm_lock.c | 149 +++++++++++++++----------------------------- lustre/ldlm/ldlm_lockd.c | 5 +- lustre/ldlm/ldlm_request.c | 2 - lustre/ldlm/ldlm_resource.c | 52 ++++++++-------- 6 files changed, 77 insertions(+), 138 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 275c47a..f7c8010 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -920,10 +920,9 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, ldlm_lock_debug(&msgdata, D_DLMTRACE, NULL, lock, "### " fmt , ##a);\ } while (0) #else /* !LIBCFS_DEBUG */ +# define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) ((void)0) # define LDLM_DEBUG(lock, fmt, a...) ((void)0) # define LDLM_ERROR(lock, fmt, a...) ((void)0) -# define ldlm_lock_debuf(cdls, level, lock, file, func, line, fmt, a...) \ - ((void)0) #endif #define LDLM_DEBUG_NOLOCK(format, a...) \ @@ -1105,7 +1104,6 @@ void ldlm_lock_downgrade(struct ldlm_lock *lock, int new_mode); void ldlm_lock_cancel(struct ldlm_lock *lock); void ldlm_reprocess_all(struct ldlm_resource *res); void ldlm_reprocess_all_ns(struct ldlm_namespace *ns); -void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos); void ldlm_lock_dump_handle(int level, struct lustre_handle *); void ldlm_unlink_lock_skiplist(struct ldlm_lock *req); @@ -1253,7 +1251,6 @@ static inline void lock_res_nested(struct ldlm_resource *res, cfs_spin_lock_nested(&res->lr_lock, mode); } - static inline void unlock_res(struct ldlm_resource *res) { cfs_spin_unlock(&res->lr_lock); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 5f39bb2..2933b19 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -2618,7 +2618,7 @@ void ldlm_dump_export_locks(struct obd_export *exp) CERROR("dumping locks for export %p," "ignore if the unmount doesn't hang\n", exp); cfs_list_for_each_entry(lock, &exp->exp_locks_list, l_exp_refs_link) - ldlm_lock_dump(D_ERROR, lock, 0); + LDLM_ERROR(lock, "lock:"); } cfs_spin_unlock(&exp->exp_locks_list_guard); } diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index cf0b9ea..3c9b052 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -327,13 +327,11 @@ int ldlm_lock_destroy_internal(struct ldlm_lock *lock) if (lock->l_readers || lock->l_writers) { LDLM_ERROR(lock, "lock still has references"); - ldlm_lock_dump(D_ERROR, lock, 0); LBUG(); } if (!cfs_list_empty(&lock->l_res_link)) { LDLM_ERROR(lock, "lock still on resource"); - ldlm_lock_dump(D_ERROR, lock, 0); LBUG(); } @@ -948,8 +946,7 @@ static void ldlm_granted_list_add_lock(struct ldlm_lock *lock, check_res_locked(res); ldlm_resource_dump(D_INFO, res); - CDEBUG(D_OTHER, "About to add this lock:\n"); - ldlm_lock_dump(D_OTHER, lock, 0); + LDLM_DEBUG(lock, "About to add lock:"); if (lock->l_destroyed) { CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); @@ -1982,61 +1979,6 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, RETURN(res); } -void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos) -{ - struct obd_device *obd = NULL; - - if (!((libcfs_debug | D_ERROR) & level)) - return; - - if (!lock) { - CDEBUG(level, " NULL LDLM lock\n"); - return; - } - - CDEBUG(level," -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d) (pid: %d)\n", - lock, lock->l_handle.h_cookie, cfs_atomic_read(&lock->l_refc), - pos, lock->l_pid); - if (lock->l_conn_export != NULL) - obd = lock->l_conn_export->exp_obd; - if (lock->l_export && lock->l_export->exp_connection) { - CDEBUG(level, " Node: NID %s (rhandle: "LPX64")\n", - libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid), - lock->l_remote_handle.cookie); - } else if (obd == NULL) { - CDEBUG(level, " Node: local\n"); - } else { - struct obd_import *imp = obd->u.cli.cl_import; - CDEBUG(level, " Node: NID %s (rhandle: "LPX64")\n", - libcfs_nid2str(imp->imp_connection->c_peer.nid), - lock->l_remote_handle.cookie); - } - CDEBUG(level, " Resource: %p ("LPU64"/"LPU64"/"LPU64")\n", - lock->l_resource, - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1], - lock->l_resource->lr_name.name[2]); - CDEBUG(level, " Req mode: %s, grant mode: %s, rc: %u, read: %d, " - "write: %d flags: "LPX64"\n", ldlm_lockname[lock->l_req_mode], - ldlm_lockname[lock->l_granted_mode], - cfs_atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers, - lock->l_flags); - if (lock->l_resource->lr_type == LDLM_EXTENT) - CDEBUG(level, " Extent: "LPU64" -> "LPU64 - " (req "LPU64"-"LPU64")\n", - lock->l_policy_data.l_extent.start, - lock->l_policy_data.l_extent.end, - lock->l_req_extent.start, lock->l_req_extent.end); - else if (lock->l_resource->lr_type == LDLM_FLOCK) - CDEBUG(level, " Pid: %d Extent: "LPU64" -> "LPU64"\n", - lock->l_policy_data.l_flock.pid, - lock->l_policy_data.l_flock.start, - lock->l_policy_data.l_flock.end); - else if (lock->l_resource->lr_type == LDLM_IBITS) - CDEBUG(level, " Bits: "LPX64"\n", - lock->l_policy_data.l_inodebits.bits); -} - void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh) { struct ldlm_lock *lock; @@ -2048,7 +1990,7 @@ void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh) if (lock == NULL) return; - ldlm_lock_dump(D_OTHER, lock, 0); + LDLM_DEBUG_LIMIT(level, lock, "###"); LDLM_LOCK_PUT(lock); } @@ -2058,48 +2000,57 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, const char *fmt, ...) { va_list args; + struct obd_export *exp = lock->l_export; + struct ldlm_resource *resource = lock->l_resource; + char *nid = "local"; va_start(args, fmt); - if (lock->l_resource == NULL) { + if (exp && exp->exp_connection) { + nid = libcfs_nid2str(exp->exp_connection->c_peer.nid); + } else if (exp && exp->exp_obd != NULL) { + struct obd_import *imp = exp->exp_obd->u.cli.cl_import; + nid = libcfs_nid2str(imp->imp_connection->c_peer.nid); + } + + if (resource == NULL) { libcfs_debug_vmsg2(msgdata, fmt, args, " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: \?\? rrc=\?\? type: \?\?\? flags: "LPX64" remote: " - LPX64" expref: %d pid: %u timeout: %lu\n", lock, + "res: \?\? rrc=\?\? type: \?\?\? flags: "LPX64" nid: %s " + "remote: "LPX64" expref: %d pid: %u timeout: %lu\n", + lock, lock->l_handle.h_cookie, cfs_atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers, ldlm_lockname[lock->l_granted_mode], ldlm_lockname[lock->l_req_mode], - lock->l_flags, lock->l_remote_handle.cookie, - lock->l_export ? - cfs_atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_flags, nid, lock->l_remote_handle.cookie, + exp ? cfs_atomic_read(&exp->exp_refcount) : -99, lock->l_pid, lock->l_callback_timeout); va_end(args); return; } - switch (lock->l_resource->lr_type) { + switch (resource->lr_type) { case LDLM_EXTENT: libcfs_debug_vmsg2(msgdata, fmt, args, " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64 - "] (req "LPU64"->"LPU64") flags: "LPX64" remote: "LPX64 - " expref: %d pid: %u timeout %lu\n", + "] (req "LPU64"->"LPU64") flags: "LPX64" nid: %s remote:" + " "LPX64" expref: %d pid: %u timeout %lu\n", ldlm_lock_to_ns_name(lock), lock, lock->l_handle.h_cookie, cfs_atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers, ldlm_lockname[lock->l_granted_mode], ldlm_lockname[lock->l_req_mode], - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1], - cfs_atomic_read(&lock->l_resource->lr_refcount), - ldlm_typename[lock->l_resource->lr_type], + resource->lr_name.name[0], + resource->lr_name.name[1], + cfs_atomic_read(&resource->lr_refcount), + ldlm_typename[resource->lr_type], lock->l_policy_data.l_extent.start, lock->l_policy_data.l_extent.end, lock->l_req_extent.start, lock->l_req_extent.end, - lock->l_flags, lock->l_remote_handle.cookie, - lock->l_export ? - cfs_atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_flags, nid, lock->l_remote_handle.cookie, + exp ? cfs_atomic_read(&exp->exp_refcount) : -99, lock->l_pid, lock->l_callback_timeout); break; @@ -2107,23 +2058,22 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, libcfs_debug_vmsg2(msgdata, fmt, args, " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d " - "["LPU64"->"LPU64"] flags: "LPX64" remote: "LPX64 + "["LPU64"->"LPU64"] flags: "LPX64" nid: %s remote: "LPX64 " expref: %d pid: %u timeout: %lu\n", ldlm_lock_to_ns_name(lock), lock, lock->l_handle.h_cookie, cfs_atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers, ldlm_lockname[lock->l_granted_mode], ldlm_lockname[lock->l_req_mode], - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1], - cfs_atomic_read(&lock->l_resource->lr_refcount), - ldlm_typename[lock->l_resource->lr_type], + resource->lr_name.name[0], + resource->lr_name.name[1], + cfs_atomic_read(&resource->lr_refcount), + ldlm_typename[resource->lr_type], lock->l_policy_data.l_flock.pid, lock->l_policy_data.l_flock.start, lock->l_policy_data.l_flock.end, - lock->l_flags, lock->l_remote_handle.cookie, - lock->l_export ? - cfs_atomic_read(&lock->l_export->exp_refcount) : -99, + lock->l_flags, nid, lock->l_remote_handle.cookie, + exp ? cfs_atomic_read(&exp->exp_refcount) : -99, lock->l_pid, lock->l_callback_timeout); break; @@ -2131,7 +2081,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, libcfs_debug_vmsg2(msgdata, fmt, args, " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s " - "flags: "LPX64" remote: "LPX64" expref: %d " + "flags: "LPX64" nid: %s remote: "LPX64" expref: %d " "pid: %u timeout: %lu\n", ldlm_lock_to_ns_name(lock), lock, lock->l_handle.h_cookie, @@ -2139,14 +2089,13 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, lock->l_readers, lock->l_writers, ldlm_lockname[lock->l_granted_mode], ldlm_lockname[lock->l_req_mode], - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1], + resource->lr_name.name[0], + resource->lr_name.name[1], lock->l_policy_data.l_inodebits.bits, - cfs_atomic_read(&lock->l_resource->lr_refcount), - ldlm_typename[lock->l_resource->lr_type], - lock->l_flags, lock->l_remote_handle.cookie, - lock->l_export ? - cfs_atomic_read(&lock->l_export->exp_refcount) : -99, + cfs_atomic_read(&resource->lr_refcount), + ldlm_typename[resource->lr_type], + lock->l_flags, nid, lock->l_remote_handle.cookie, + exp ? cfs_atomic_read(&exp->exp_refcount) : -99, lock->l_pid, lock->l_callback_timeout); break; @@ -2154,20 +2103,20 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, libcfs_debug_vmsg2(msgdata, fmt, args, " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " "res: "LPU64"/"LPU64" rrc: %d type: %s flags: "LPX64" " - "remote: "LPX64" expref: %d pid: %u timeout %lu\n", + "nid: %s remote: "LPX64" expref: %d pid: %u timeout %lu" + "\n", ldlm_lock_to_ns_name(lock), lock, lock->l_handle.h_cookie, cfs_atomic_read (&lock->l_refc), lock->l_readers, lock->l_writers, ldlm_lockname[lock->l_granted_mode], ldlm_lockname[lock->l_req_mode], - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1], - cfs_atomic_read(&lock->l_resource->lr_refcount), - ldlm_typename[lock->l_resource->lr_type], - lock->l_flags, lock->l_remote_handle.cookie, - lock->l_export ? - cfs_atomic_read(&lock->l_export->exp_refcount) : -99, + resource->lr_name.name[0], + resource->lr_name.name[1], + cfs_atomic_read(&resource->lr_refcount), + ldlm_typename[resource->lr_type], + lock->l_flags, nid, lock->l_remote_handle.cookie, + exp ? cfs_atomic_read(&exp->exp_refcount) : -99, lock->l_pid, lock->l_callback_timeout); break; } diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index f33dde1..3cffc74 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -764,10 +764,8 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, LASSERT(lock); LASSERT(data != NULL); - if (lock->l_export->exp_obd->obd_recovering != 0) { + if (lock->l_export->exp_obd->obd_recovering != 0) LDLM_ERROR(lock, "BUG 6063: lock collide during recovery"); - ldlm_lock_dump(D_ERROR, lock, 0); - } ldlm_lock_reorder_req(lock); @@ -2804,7 +2802,6 @@ EXPORT_SYMBOL(ldlm_lock_decref); EXPORT_SYMBOL(ldlm_lock_decref_and_cancel); EXPORT_SYMBOL(ldlm_lock_change_resource); EXPORT_SYMBOL(ldlm_it2str); -EXPORT_SYMBOL(ldlm_lock_dump); EXPORT_SYMBOL(ldlm_lock_dump_handle); EXPORT_SYMBOL(ldlm_reprocess_all_ns); EXPORT_SYMBOL(ldlm_lock_allow_match_locked); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 412e184..600c3bc 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -172,7 +172,6 @@ int ldlm_completion_ast_async(struct ldlm_lock *lock, int flags, void *data) LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, " "going forward"); - ldlm_lock_dump(D_OTHER, lock, 0); ldlm_reprocess_all(lock->l_resource); RETURN(0); } @@ -222,7 +221,6 @@ int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data) LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, " "sleeping"); - ldlm_lock_dump(D_OTHER, lock, 0); noreproc: diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index dcb75c2..3fc1345 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -67,6 +67,10 @@ cfs_proc_dir_entry_t *ldlm_svc_proc_dir = NULL; extern unsigned int ldlm_cancel_unused_locks_before_replay; +/* during debug dump certain amount of granted locks for one resource to avoid + * DDOS. */ +unsigned int ldlm_dump_granted_max = 256; + #ifdef LPROCFS static int ldlm_proc_dump_ns(struct file *file, const char *buffer, unsigned long count, void *data) @@ -81,6 +85,9 @@ int ldlm_proc_setup(void) int rc; struct lprocfs_vars list[] = { { "dump_namespaces", NULL, ldlm_proc_dump_ns, NULL }, + { "dump_granted_max", + lprocfs_rd_uint, lprocfs_wr_uint, + &ldlm_dump_granted_max, NULL }, { "cancel_unused_locks_before_replay", lprocfs_rd_uint, lprocfs_wr_uint, &ldlm_cancel_unused_locks_before_replay, NULL }, @@ -1197,8 +1204,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, cfs_list_t *head, { check_res_locked(res); - CDEBUG(D_OTHER, "About to add this lock:\n"); - ldlm_lock_dump(D_OTHER, lock, 0); + LDLM_DEBUG(lock, "About to add this lock:\n"); if (lock->l_destroyed) { CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); @@ -1218,8 +1224,7 @@ void ldlm_resource_insert_lock_after(struct ldlm_lock *original, check_res_locked(res); ldlm_resource_dump(D_INFO, res); - CDEBUG(D_OTHER, "About to insert this lock after %p:\n", original); - ldlm_lock_dump(D_OTHER, new, 0); + LDLM_DEBUG(new, "About to insert this lock after %p:\n", original); if (new->l_destroyed) { CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); @@ -1303,8 +1308,8 @@ void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) void ldlm_resource_dump(int level, struct ldlm_resource *res) { - cfs_list_t *tmp; - int pos; + struct ldlm_lock *lock; + unsigned int granted = 0; CLASSERT(RES_NAME_SIZE == 4); @@ -1317,33 +1322,26 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res) cfs_atomic_read(&res->lr_refcount)); if (!cfs_list_empty(&res->lr_granted)) { - pos = 0; - CDEBUG(level, "Granted locks:\n"); - cfs_list_for_each(tmp, &res->lr_granted) { - struct ldlm_lock *lock; - lock = cfs_list_entry(tmp, struct ldlm_lock, - l_res_link); - ldlm_lock_dump(level, lock, ++pos); + CDEBUG(level, "Granted locks (in reverse order):\n"); + cfs_list_for_each_entry_reverse(lock, &res->lr_granted, + l_res_link) { + LDLM_DEBUG_LIMIT(level, lock, "###"); + if (!(level & D_CANTMASK) && + ++granted > ldlm_dump_granted_max) { + CDEBUG(level, "only dump %d granted locks to " + "avoid DDOS.\n", granted); + break; + } } } if (!cfs_list_empty(&res->lr_converting)) { - pos = 0; CDEBUG(level, "Converting locks:\n"); - cfs_list_for_each(tmp, &res->lr_converting) { - struct ldlm_lock *lock; - lock = cfs_list_entry(tmp, struct ldlm_lock, - l_res_link); - ldlm_lock_dump(level, lock, ++pos); - } + cfs_list_for_each_entry(lock, &res->lr_converting, l_res_link) + LDLM_DEBUG_LIMIT(level, lock, "###"); } if (!cfs_list_empty(&res->lr_waiting)) { - pos = 0; CDEBUG(level, "Waiting locks:\n"); - cfs_list_for_each(tmp, &res->lr_waiting) { - struct ldlm_lock *lock; - lock = cfs_list_entry(tmp, struct ldlm_lock, - l_res_link); - ldlm_lock_dump(level, lock, ++pos); - } + cfs_list_for_each_entry(lock, &res->lr_waiting, l_res_link) + LDLM_DEBUG_LIMIT(level, lock, "###"); } } -- 1.8.3.1