X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=libcfs%2Flibcfs%2Fwatchdog.c;h=95a0177b4d580e2eabfea1b94e8e28a326aa285c;hb=ac5044566b97c7f6881bed817c2ed9752a0c6d63;hp=6ce41c3d3d2750220035bc43ac1c7bf7a281e179;hpb=e3a7c58aebafce40323db54bf6056029e5af4a70;p=fs%2Flustre-release.git diff --git a/libcfs/libcfs/watchdog.c b/libcfs/libcfs/watchdog.c index 6ce41c3..95a0177 100644 --- a/libcfs/libcfs/watchdog.c +++ b/libcfs/libcfs/watchdog.c @@ -26,6 +26,8 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2012, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -38,26 +40,27 @@ #define DEBUG_SUBSYSTEM S_LNET +#include #include #include "tracefile.h" struct lc_watchdog { - cfs_spinlock_t lcw_lock; /* check or change lcw_list */ - int lcw_refcount; /* must hold lcw_pending_timers_lock */ - cfs_timer_t lcw_timer; /* kernel timer */ - cfs_list_t lcw_list; /* chain on pending list */ - cfs_time_t lcw_last_touched; /* last touched stamp */ - cfs_task_t *lcw_task; /* owner task */ - void (*lcw_callback)(pid_t, void *); - void *lcw_data; - - pid_t lcw_pid; - - enum { - LC_WATCHDOG_DISABLED, - LC_WATCHDOG_ENABLED, - LC_WATCHDOG_EXPIRED - } lcw_state; + spinlock_t lcw_lock; /* check or change lcw_list */ + int lcw_refcount; /* must hold lcw_pending_timers_lock */ + struct timer_list lcw_timer; /* kernel timer */ + struct list_head lcw_list; /* chain on pending list */ + cfs_time_t lcw_last_touched;/* last touched stamp */ + struct task_struct *lcw_task; /* owner task */ + void (*lcw_callback)(pid_t, void *); + void *lcw_data; + + pid_t lcw_pid; + + enum { + LC_WATCHDOG_DISABLED, + LC_WATCHDOG_ENABLED, + LC_WATCHDOG_EXPIRED + } lcw_state; }; #ifdef WITH_WATCHDOG @@ -66,9 +69,9 @@ struct lc_watchdog { * and lcw_stop_completion when it exits. * Wake lcw_event_waitq to signal timer callback dispatches. */ -static cfs_completion_t lcw_start_completion; -static cfs_completion_t lcw_stop_completion; -static cfs_waitq_t lcw_event_waitq; +static struct completion lcw_start_completion; +static struct completion lcw_stop_completion; +static wait_queue_head_t lcw_event_waitq; /* * Set this and wake lcw_event_waitq to stop the dispatcher. @@ -84,16 +87,15 @@ static unsigned long lcw_flags = 0; * When it hits 0, we stop the dispatcher. */ static __u32 lcw_refcount = 0; -static CFS_DEFINE_MUTEX(lcw_refcount_mutex); +static DEFINE_MUTEX(lcw_refcount_mutex); /* * List of timers that have fired that need their callbacks run by the * dispatcher. */ /* BH lock! */ -static cfs_spinlock_t lcw_pending_timers_lock = CFS_SPIN_LOCK_UNLOCKED; -static cfs_list_t lcw_pending_timers = \ - CFS_LIST_HEAD_INIT(lcw_pending_timers); +static DEFINE_SPINLOCK(lcw_pending_timers_lock); +static struct list_head lcw_pending_timers = LIST_HEAD_INIT(lcw_pending_timers); /* Last time a watchdog expired */ static cfs_time_t lcw_last_watchdog_time; @@ -103,28 +105,20 @@ static void lcw_dump(struct lc_watchdog *lcw) { ENTRY; -#if defined(HAVE_TASKLIST_LOCK) - cfs_read_lock(&tasklist_lock); -#else rcu_read_lock(); -#endif if (lcw->lcw_task == NULL) { - LCONSOLE_WARN("Process " LPPID " was not found in the task " + LCONSOLE_WARN("Process %d was not found in the task " "list; watchdog callback may be incomplete\n", (int)lcw->lcw_pid); } else { libcfs_debug_dumpstack(lcw->lcw_task); } -#if defined(HAVE_TASKLIST_LOCK) - cfs_read_unlock(&tasklist_lock); -#else rcu_read_unlock(); -#endif EXIT; } -static void lcw_cb(ulong_ptr_t data) +static void lcw_cb(uintptr_t data) { struct lc_watchdog *lcw = (struct lc_watchdog *)data; ENTRY; @@ -136,30 +130,30 @@ static void lcw_cb(ulong_ptr_t data) lcw->lcw_state = LC_WATCHDOG_EXPIRED; - cfs_spin_lock_bh(&lcw->lcw_lock); - LASSERT(cfs_list_empty(&lcw->lcw_list)); + spin_lock_bh(&lcw->lcw_lock); + LASSERT(list_empty(&lcw->lcw_list)); - cfs_spin_lock_bh(&lcw_pending_timers_lock); - lcw->lcw_refcount++; /* +1 for pending list */ - cfs_list_add(&lcw->lcw_list, &lcw_pending_timers); - cfs_waitq_signal(&lcw_event_waitq); + spin_lock_bh(&lcw_pending_timers_lock); + lcw->lcw_refcount++; /* +1 for pending list */ + list_add(&lcw->lcw_list, &lcw_pending_timers); + wake_up(&lcw_event_waitq); - cfs_spin_unlock_bh(&lcw_pending_timers_lock); - cfs_spin_unlock_bh(&lcw->lcw_lock); - EXIT; + spin_unlock_bh(&lcw_pending_timers_lock); + spin_unlock_bh(&lcw->lcw_lock); + EXIT; } static int is_watchdog_fired(void) { - int rc; + int rc; - if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags)) - return 1; + if (test_bit(LCW_FLAG_STOP, &lcw_flags)) + return 1; - cfs_spin_lock_bh(&lcw_pending_timers_lock); - rc = !cfs_list_empty(&lcw_pending_timers); - cfs_spin_unlock_bh(&lcw_pending_timers_lock); - return rc; + spin_lock_bh(&lcw_pending_timers_lock); + rc = !list_empty(&lcw_pending_timers); + spin_unlock_bh(&lcw_pending_timers_lock); + return rc; } static void lcw_dump_stack(struct lc_watchdog *lcw) @@ -209,75 +203,76 @@ static void lcw_dump_stack(struct lc_watchdog *lcw) } } +/* + * Provided watchdog handlers + */ + +static void lc_watchdog_dumplog(pid_t pid, void *data) +{ + libcfs_debug_dumplog_internal((void *)((uintptr_t)pid)); +} + static int lcw_dispatch_main(void *data) { int rc = 0; - unsigned long flags; struct lc_watchdog *lcw; - CFS_LIST_HEAD (zombies); + struct list_head zombies = LIST_HEAD_INIT(zombies); ENTRY; - cfs_daemonize("lc_watchdogd"); - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - - cfs_complete(&lcw_start_completion); + complete(&lcw_start_completion); while (1) { int dumplog = 1; - cfs_wait_event_interruptible(lcw_event_waitq, - is_watchdog_fired(), rc); + rc = wait_event_interruptible(lcw_event_waitq, + is_watchdog_fired()); CDEBUG(D_INFO, "Watchdog got woken up...\n"); - if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags)) { - CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n"); - - cfs_spin_lock_bh(&lcw_pending_timers_lock); - rc = !cfs_list_empty(&lcw_pending_timers); - cfs_spin_unlock_bh(&lcw_pending_timers_lock); - if (rc) { - CERROR("pending timers list was not empty at " - "time of watchdog dispatch shutdown\n"); - } - break; - } - - cfs_spin_lock_bh(&lcw_pending_timers_lock); - while (!cfs_list_empty(&lcw_pending_timers)) { - int is_dumplog; - - lcw = cfs_list_entry(lcw_pending_timers.next, - struct lc_watchdog, lcw_list); - /* +1 ref for callback to make sure lwc wouldn't be - * deleted after releasing lcw_pending_timers_lock */ - lcw->lcw_refcount++; - cfs_spin_unlock_bh(&lcw_pending_timers_lock); - - /* lock ordering */ - cfs_spin_lock_bh(&lcw->lcw_lock); - cfs_spin_lock_bh(&lcw_pending_timers_lock); - - if (cfs_list_empty(&lcw->lcw_list)) { - /* already removed from pending list */ - lcw->lcw_refcount--; /* -1 ref for callback */ - if (lcw->lcw_refcount == 0) - cfs_list_add(&lcw->lcw_list, &zombies); - cfs_spin_unlock_bh(&lcw->lcw_lock); - /* still hold lcw_pending_timers_lock */ - continue; - } - - cfs_list_del_init(&lcw->lcw_list); - lcw->lcw_refcount--; /* -1 ref for pending list */ - - cfs_spin_unlock_bh(&lcw_pending_timers_lock); - cfs_spin_unlock_bh(&lcw->lcw_lock); - - CDEBUG(D_INFO, "found lcw for pid " LPPID "\n", + if (test_bit(LCW_FLAG_STOP, &lcw_flags)) { + CDEBUG(D_INFO, "LCW_FLAG_STOP set, shutting down...\n"); + + spin_lock_bh(&lcw_pending_timers_lock); + rc = !list_empty(&lcw_pending_timers); + spin_unlock_bh(&lcw_pending_timers_lock); + if (rc) { + CERROR("pending timers list was not empty at " + "time of watchdog dispatch shutdown\n"); + } + break; + } + + spin_lock_bh(&lcw_pending_timers_lock); + while (!list_empty(&lcw_pending_timers)) { + int is_dumplog; + + lcw = list_entry(lcw_pending_timers.next, + struct lc_watchdog, lcw_list); + /* +1 ref for callback to make sure lwc wouldn't be + * deleted after releasing lcw_pending_timers_lock */ + lcw->lcw_refcount++; + spin_unlock_bh(&lcw_pending_timers_lock); + + /* lock ordering */ + spin_lock_bh(&lcw->lcw_lock); + spin_lock_bh(&lcw_pending_timers_lock); + + if (list_empty(&lcw->lcw_list)) { + /* already removed from pending list */ + lcw->lcw_refcount--; /* -1 ref for callback */ + if (lcw->lcw_refcount == 0) + list_add(&lcw->lcw_list, &zombies); + spin_unlock_bh(&lcw->lcw_lock); + /* still hold lcw_pending_timers_lock */ + continue; + } + + list_del_init(&lcw->lcw_list); + lcw->lcw_refcount--; /* -1 ref for pending list */ + + spin_unlock_bh(&lcw_pending_timers_lock); + spin_unlock_bh(&lcw->lcw_lock); + + CDEBUG(D_INFO, "found lcw for pid %d\n", lcw->lcw_pid); lcw_dump_stack(lcw); @@ -289,65 +284,66 @@ static int lcw_dispatch_main(void *data) dumplog = 0; } - cfs_spin_lock_bh(&lcw_pending_timers_lock); - lcw->lcw_refcount--; /* -1 ref for callback */ - if (lcw->lcw_refcount == 0) - cfs_list_add(&lcw->lcw_list, &zombies); - } - cfs_spin_unlock_bh(&lcw_pending_timers_lock); + spin_lock_bh(&lcw_pending_timers_lock); + lcw->lcw_refcount--; /* -1 ref for callback */ + if (lcw->lcw_refcount == 0) + list_add(&lcw->lcw_list, &zombies); + } + spin_unlock_bh(&lcw_pending_timers_lock); - while (!cfs_list_empty(&zombies)) { - lcw = cfs_list_entry(lcw_pending_timers.next, - struct lc_watchdog, lcw_list); - cfs_list_del(&lcw->lcw_list); - LIBCFS_FREE(lcw, sizeof(*lcw)); - } - } + while (!list_empty(&zombies)) { + lcw = list_entry(zombies.next, + struct lc_watchdog, lcw_list); + list_del_init(&lcw->lcw_list); + LIBCFS_FREE(lcw, sizeof(*lcw)); + } + } - cfs_complete(&lcw_stop_completion); + complete(&lcw_stop_completion); - RETURN(rc); + RETURN(rc); } static void lcw_dispatch_start(void) { - int rc; - - ENTRY; - LASSERT(lcw_refcount == 1); - - cfs_init_completion(&lcw_stop_completion); - cfs_init_completion(&lcw_start_completion); - cfs_waitq_init(&lcw_event_waitq); - - CDEBUG(D_INFO, "starting dispatch thread\n"); - rc = cfs_create_thread(lcw_dispatch_main, NULL, 0); - if (rc < 0) { - CERROR("error spawning watchdog dispatch thread: %d\n", rc); - EXIT; - return; - } - cfs_wait_for_completion(&lcw_start_completion); - CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n"); - - EXIT; + struct task_struct *task; + + ENTRY; + LASSERT(lcw_refcount == 1); + + init_completion(&lcw_stop_completion); + init_completion(&lcw_start_completion); + init_waitqueue_head(&lcw_event_waitq); + + CDEBUG(D_INFO, "starting dispatch thread\n"); + task = kthread_run(lcw_dispatch_main, NULL, "lc_watchdogd"); + if (IS_ERR(task)) { + CERROR("error spawning watchdog dispatch thread: %ld\n", + PTR_ERR(task)); + EXIT; + return; + } + wait_for_completion(&lcw_start_completion); + CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n"); + + EXIT; } static void lcw_dispatch_stop(void) { - ENTRY; - LASSERT(lcw_refcount == 0); + ENTRY; + LASSERT(lcw_refcount == 0); - CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n"); + CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n"); - cfs_set_bit(LCW_FLAG_STOP, &lcw_flags); - cfs_waitq_signal(&lcw_event_waitq); + set_bit(LCW_FLAG_STOP, &lcw_flags); + wake_up(&lcw_event_waitq); - cfs_wait_for_completion(&lcw_stop_completion); + wait_for_completion(&lcw_stop_completion); - CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n"); + CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n"); - EXIT; + EXIT; } struct lc_watchdog *lc_watchdog_add(int timeout, @@ -363,21 +359,21 @@ struct lc_watchdog *lc_watchdog_add(int timeout, RETURN(ERR_PTR(-ENOMEM)); } - cfs_spin_lock_init(&lcw->lcw_lock); - lcw->lcw_refcount = 1; /* refcount for owner */ - lcw->lcw_task = cfs_current(); - lcw->lcw_pid = cfs_curproc_pid(); - lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog; - lcw->lcw_data = data; - lcw->lcw_state = LC_WATCHDOG_DISABLED; + spin_lock_init(&lcw->lcw_lock); + lcw->lcw_refcount = 1; /* refcount for owner */ + lcw->lcw_task = current; + lcw->lcw_pid = current_pid(); + lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog; + lcw->lcw_data = data; + lcw->lcw_state = LC_WATCHDOG_DISABLED; - CFS_INIT_LIST_HEAD(&lcw->lcw_list); - cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw); + INIT_LIST_HEAD(&lcw->lcw_list); + cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw); - cfs_mutex_lock(&lcw_refcount_mutex); - if (++lcw_refcount == 1) - lcw_dispatch_start(); - cfs_mutex_unlock(&lcw_refcount_mutex); + mutex_lock(&lcw_refcount_mutex); + if (++lcw_refcount == 1) + lcw_dispatch_start(); + mutex_unlock(&lcw_refcount_mutex); /* Keep this working in case we enable them by default */ if (lcw->lcw_state == LC_WATCHDOG_ENABLED) { @@ -392,7 +388,7 @@ EXPORT_SYMBOL(lc_watchdog_add); static void lcw_update_time(struct lc_watchdog *lcw, const char *message) { - cfs_time_t newtime = cfs_time_current();; + cfs_time_t newtime = cfs_time_current(); if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) { struct timeval timediff; @@ -414,15 +410,15 @@ static void lcw_update_time(struct lc_watchdog *lcw, const char *message) static void lc_watchdog_del_pending(struct lc_watchdog *lcw) { - cfs_spin_lock_bh(&lcw->lcw_lock); - if (unlikely(!cfs_list_empty(&lcw->lcw_list))) { - cfs_spin_lock_bh(&lcw_pending_timers_lock); - cfs_list_del_init(&lcw->lcw_list); - lcw->lcw_refcount--; /* -1 ref for pending list */ - cfs_spin_unlock_bh(&lcw_pending_timers_lock); - } - - cfs_spin_unlock_bh(&lcw->lcw_lock); + spin_lock_bh(&lcw->lcw_lock); + if (unlikely(!list_empty(&lcw->lcw_list))) { + spin_lock_bh(&lcw_pending_timers_lock); + list_del_init(&lcw->lcw_list); + lcw->lcw_refcount--; /* -1 ref for pending list */ + spin_unlock_bh(&lcw_pending_timers_lock); + } + + spin_unlock_bh(&lcw->lcw_lock); } void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout) @@ -433,10 +429,10 @@ void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout) lc_watchdog_del_pending(lcw); lcw_update_time(lcw, "resumed"); - lcw->lcw_state = LC_WATCHDOG_ENABLED; cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() + cfs_time_seconds(timeout)); + lcw->lcw_state = LC_WATCHDOG_ENABLED; EXIT; } @@ -467,40 +463,30 @@ void lc_watchdog_delete(struct lc_watchdog *lcw) lcw_update_time(lcw, "stopped"); - cfs_spin_lock_bh(&lcw->lcw_lock); - cfs_spin_lock_bh(&lcw_pending_timers_lock); - if (unlikely(!cfs_list_empty(&lcw->lcw_list))) { - cfs_list_del_init(&lcw->lcw_list); - lcw->lcw_refcount--; /* -1 ref for pending list */ - } + spin_lock_bh(&lcw->lcw_lock); + spin_lock_bh(&lcw_pending_timers_lock); + if (unlikely(!list_empty(&lcw->lcw_list))) { + list_del_init(&lcw->lcw_list); + lcw->lcw_refcount--; /* -1 ref for pending list */ + } - lcw->lcw_refcount--; /* -1 ref for owner */ - dead = lcw->lcw_refcount == 0; - cfs_spin_unlock_bh(&lcw_pending_timers_lock); - cfs_spin_unlock_bh(&lcw->lcw_lock); + lcw->lcw_refcount--; /* -1 ref for owner */ + dead = lcw->lcw_refcount == 0; + spin_unlock_bh(&lcw_pending_timers_lock); + spin_unlock_bh(&lcw->lcw_lock); - if (dead) - LIBCFS_FREE(lcw, sizeof(*lcw)); + if (dead) + LIBCFS_FREE(lcw, sizeof(*lcw)); - cfs_mutex_lock(&lcw_refcount_mutex); - if (--lcw_refcount == 0) - lcw_dispatch_stop(); - cfs_mutex_unlock(&lcw_refcount_mutex); + mutex_lock(&lcw_refcount_mutex); + if (--lcw_refcount == 0) + lcw_dispatch_stop(); + mutex_unlock(&lcw_refcount_mutex); - EXIT; + EXIT; } EXPORT_SYMBOL(lc_watchdog_delete); -/* - * Provided watchdog handlers - */ - -void lc_watchdog_dumplog(pid_t pid, void *data) -{ - libcfs_debug_dumplog_internal((void *)((long_ptr_t)pid)); -} -EXPORT_SYMBOL(lc_watchdog_dumplog); - #else /* !defined(WITH_WATCHDOG) */ struct lc_watchdog *lc_watchdog_add(int timeout,