Whamcloud - gitweb
LU-1346 libcfs: cleanup waitq related primitives
[fs/lustre-release.git] / libcfs / libcfs / watchdog.c
index 131daa3..bbad0b1 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -28,6 +26,8 @@
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #include "tracefile.h"
 
 struct lc_watchdog {
-        cfs_timer_t           lcw_timer; /* kernel timer */
-        cfs_list_t            lcw_list;
-        cfs_time_t            lcw_last_touched;
-        cfs_task_t           *lcw_task;
-        cfs_atomic_t          lcw_refcount;
+       spinlock_t  lcw_lock;     /* check or change lcw_list */
+        int             lcw_refcount; /* must hold lcw_pending_timers_lock */
+        cfs_timer_t     lcw_timer;    /* kernel timer */
+        cfs_list_t      lcw_list;     /* chain on pending list */
+        cfs_time_t      lcw_last_touched; /* last touched stamp */
+        cfs_task_t     *lcw_task;     /* owner task */
+        void          (*lcw_callback)(pid_t, void *);
+        void           *lcw_data;
 
-        void                (*lcw_callback)(pid_t, void *);
-        void                 *lcw_data;
-
-        pid_t                 lcw_pid;
+        pid_t           lcw_pid;
 
         enum {
                 LC_WATCHDOG_DISABLED,
@@ -68,9 +68,9 @@ struct lc_watchdog {
  * and lcw_stop_completion when it exits.
  * Wake lcw_event_waitq to signal timer callback dispatches.
  */
-static cfs_completion_t lcw_start_completion;
-static cfs_completion_t  lcw_stop_completion;
-static cfs_waitq_t lcw_event_waitq;
+static struct completion lcw_start_completion;
+static struct completion  lcw_stop_completion;
+static wait_queue_head_t lcw_event_waitq;
 
 /*
  * Set this and wake lcw_event_waitq to stop the dispatcher.
@@ -83,19 +83,18 @@ static unsigned long lcw_flags = 0;
 /*
  * Number of outstanding watchdogs.
  * When it hits 1, we start the dispatcher.
- * When it hits 0, we stop the distpatcher.
+ * When it hits 0, we stop the dispatcher.
  */
 static __u32         lcw_refcount = 0;
-static CFS_DECLARE_MUTEX(lcw_refcount_sem);
+static DEFINE_MUTEX(lcw_refcount_mutex);
 
 /*
  * List of timers that have fired that need their callbacks run by the
  * dispatcher.
  */
 /* BH lock! */
-static cfs_spinlock_t lcw_pending_timers_lock = CFS_SPIN_LOCK_UNLOCKED;
-static cfs_list_t lcw_pending_timers = \
-        CFS_LIST_HEAD_INIT(lcw_pending_timers);
+static DEFINE_SPINLOCK(lcw_pending_timers_lock);
+static cfs_list_t lcw_pending_timers = CFS_LIST_HEAD_INIT(lcw_pending_timers);
 
 /* Last time a watchdog expired */
 static cfs_time_t lcw_last_watchdog_time;
@@ -105,15 +104,8 @@ static void
 lcw_dump(struct lc_watchdog *lcw)
 {
         ENTRY;
-#if defined(HAVE_TASKLIST_LOCK)
-        cfs_read_lock(&tasklist_lock);
-#elif defined(HAVE_TASK_RCU)
         rcu_read_lock();
-#else
-        CERROR("unable to dump stack because of missing export\n"); 
-        RETURN_EXIT;
-#endif
-       if (lcw->lcw_task == NULL) { 
+       if (lcw->lcw_task == NULL) {
                 LCONSOLE_WARN("Process " LPPID " was not found in the task "
                               "list; watchdog callback may be incomplete\n",
                               (int)lcw->lcw_pid);
@@ -121,11 +113,7 @@ lcw_dump(struct lc_watchdog *lcw)
                 libcfs_debug_dumpstack(lcw->lcw_task);
         }
 
-#if defined(HAVE_TASKLIST_LOCK)
-        cfs_read_unlock(&tasklist_lock);
-#elif defined(HAVE_TASK_RCU)
         rcu_read_unlock();
-#endif
         EXIT;
 }
 
@@ -141,38 +129,30 @@ static void lcw_cb(ulong_ptr_t data)
 
         lcw->lcw_state = LC_WATCHDOG_EXPIRED;
 
-        cfs_spin_lock_bh(&lcw_pending_timers_lock);
-        cfs_list_add(&lcw->lcw_list, &lcw_pending_timers);
-        cfs_waitq_signal(&lcw_event_waitq);
-        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
+       spin_lock_bh(&lcw->lcw_lock);
+       LASSERT(cfs_list_empty(&lcw->lcw_list));
 
-        EXIT;
-}
-
-static inline void lcw_get(struct lc_watchdog *lcw)
-{
-        cfs_atomic_inc(&lcw->lcw_refcount);
-}
+       spin_lock_bh(&lcw_pending_timers_lock);
+       lcw->lcw_refcount++; /* +1 for pending list */
+       cfs_list_add(&lcw->lcw_list, &lcw_pending_timers);
+       wake_up(&lcw_event_waitq);
 
-static inline void lcw_put(struct lc_watchdog *lcw)
-{
-        if (cfs_atomic_dec_and_test(&lcw->lcw_refcount)) {
-                LASSERT(cfs_list_empty(&lcw->lcw_list));
-                LIBCFS_FREE(lcw, sizeof(*lcw));
-        }
+       spin_unlock_bh(&lcw_pending_timers_lock);
+       spin_unlock_bh(&lcw->lcw_lock);
+       EXIT;
 }
 
 static int is_watchdog_fired(void)
 {
-        int rc;
+       int rc;
 
-        if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags))
-                return 1;
+       if (test_bit(LCW_FLAG_STOP, &lcw_flags))
+               return 1;
 
-        cfs_spin_lock_bh(&lcw_pending_timers_lock);
-        rc = !cfs_list_empty(&lcw_pending_timers);
-        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
-        return rc;
+       spin_lock_bh(&lcw_pending_timers_lock);
+       rc = !cfs_list_empty(&lcw_pending_timers);
+       spin_unlock_bh(&lcw_pending_timers_lock);
+       return rc;
 }
 
 static void lcw_dump_stack(struct lc_watchdog *lcw)
@@ -225,112 +205,135 @@ static void lcw_dump_stack(struct lc_watchdog *lcw)
 static int lcw_dispatch_main(void *data)
 {
         int                 rc = 0;
-        unsigned long       flags;
-        struct lc_watchdog *lcw, *lcwcb;
+        struct lc_watchdog *lcw;
+        CFS_LIST_HEAD      (zombies);
 
         ENTRY;
 
-        cfs_daemonize("lc_watchdogd");
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-
-        cfs_complete(&lcw_start_completion);
+       complete(&lcw_start_completion);
 
         while (1) {
-                cfs_wait_event_interruptible(lcw_event_waitq,
-                                             is_watchdog_fired(), rc);
-                CDEBUG(D_INFO, "Watchdog got woken up...\n");
-                if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags)) {
-                        CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n");
-
-                        cfs_spin_lock_bh(&lcw_pending_timers_lock);
-                        rc = !cfs_list_empty(&lcw_pending_timers);
-                        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
-                        if (rc) {
-                                CERROR("pending timers list was not empty at "
-                                       "time of watchdog dispatch shutdown\n");
-                        }
-                        break;
-                }
+                int dumplog = 1;
 
-                lcwcb = NULL;
-                cfs_spin_lock_bh(&lcw_pending_timers_lock);
+               rc = wait_event_interruptible(lcw_event_waitq,
+                                             is_watchdog_fired());
+                CDEBUG(D_INFO, "Watchdog got woken up...\n");
+               if (test_bit(LCW_FLAG_STOP, &lcw_flags)) {
+                       CDEBUG(D_INFO, "LCW_FLAG_STOP set, shutting down...\n");
+
+                       spin_lock_bh(&lcw_pending_timers_lock);
+                       rc = !cfs_list_empty(&lcw_pending_timers);
+                       spin_unlock_bh(&lcw_pending_timers_lock);
+                       if (rc) {
+                               CERROR("pending timers list was not empty at "
+                                      "time of watchdog dispatch shutdown\n");
+                       }
+                       break;
+               }
+
+               spin_lock_bh(&lcw_pending_timers_lock);
                 while (!cfs_list_empty(&lcw_pending_timers)) {
+                        int is_dumplog;
 
                         lcw = cfs_list_entry(lcw_pending_timers.next,
-                                         struct lc_watchdog,
-                                         lcw_list);
-                        lcw_get(lcw);
+                                             struct lc_watchdog, lcw_list);
+                        /* +1 ref for callback to make sure lwc wouldn't be
+                         * deleted after releasing lcw_pending_timers_lock */
+                        lcw->lcw_refcount++;
+                       spin_unlock_bh(&lcw_pending_timers_lock);
+
+                       /* lock ordering */
+                       spin_lock_bh(&lcw->lcw_lock);
+                       spin_lock_bh(&lcw_pending_timers_lock);
+
+                       if (cfs_list_empty(&lcw->lcw_list)) {
+                               /* already removed from pending list */
+                               lcw->lcw_refcount--; /* -1 ref for callback */
+                               if (lcw->lcw_refcount == 0)
+                                       cfs_list_add(&lcw->lcw_list, &zombies);
+                               spin_unlock_bh(&lcw->lcw_lock);
+                                /* still hold lcw_pending_timers_lock */
+                                continue;
+                        }
+
                         cfs_list_del_init(&lcw->lcw_list);
-                        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
+                        lcw->lcw_refcount--; /* -1 ref for pending list */
+
+                       spin_unlock_bh(&lcw_pending_timers_lock);
+                       spin_unlock_bh(&lcw->lcw_lock);
 
                         CDEBUG(D_INFO, "found lcw for pid " LPPID "\n",
                                lcw->lcw_pid);
                         lcw_dump_stack(lcw);
 
-                        if (lcwcb == NULL &&
-                            lcw->lcw_state != LC_WATCHDOG_DISABLED)
-                                lcwcb = lcw;
-                        else
-                                lcw_put(lcw);
-                        cfs_spin_lock_bh(&lcw_pending_timers_lock);
-                }
-                cfs_spin_unlock_bh(&lcw_pending_timers_lock);
+                        is_dumplog = lcw->lcw_callback == lc_watchdog_dumplog;
+                        if (lcw->lcw_state != LC_WATCHDOG_DISABLED &&
+                            (dumplog || !is_dumplog)) {
+                                lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
+                                if (dumplog && is_dumplog)
+                                        dumplog = 0;
+                        }
+
+                       spin_lock_bh(&lcw_pending_timers_lock);
+                       lcw->lcw_refcount--; /* -1 ref for callback */
+                       if (lcw->lcw_refcount == 0)
+                               cfs_list_add(&lcw->lcw_list, &zombies);
+               }
+               spin_unlock_bh(&lcw_pending_timers_lock);
 
-                /* only do callback once for this batch of lcws */
-                if (lcwcb != NULL) {
-                        lcwcb->lcw_callback(lcwcb->lcw_pid, lcwcb->lcw_data);
-                        lcw_put(lcwcb);
+                while (!cfs_list_empty(&zombies)) {
+                        lcw = cfs_list_entry(lcw_pending_timers.next,
+                                         struct lc_watchdog, lcw_list);
+                        cfs_list_del(&lcw->lcw_list);
+                        LIBCFS_FREE(lcw, sizeof(*lcw));
                 }
         }
 
-        cfs_complete(&lcw_stop_completion);
+       complete(&lcw_stop_completion);
 
-        RETURN(rc);
+       RETURN(rc);
 }
 
 static void lcw_dispatch_start(void)
 {
-        int rc;
-
-        ENTRY;
-        LASSERT(lcw_refcount == 1);
-
-        cfs_init_completion(&lcw_stop_completion);
-        cfs_init_completion(&lcw_start_completion);
-        cfs_waitq_init(&lcw_event_waitq);
-
-        CDEBUG(D_INFO, "starting dispatch thread\n");
-        rc = cfs_kernel_thread(lcw_dispatch_main, NULL, 0);
-        if (rc < 0) {
-                CERROR("error spawning watchdog dispatch thread: %d\n", rc);
-                EXIT;
-                return;
-        }
-        cfs_wait_for_completion(&lcw_start_completion);
-        CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
-
-        EXIT;
+       cfs_task_t *task;
+
+       ENTRY;
+       LASSERT(lcw_refcount == 1);
+
+       init_completion(&lcw_stop_completion);
+       init_completion(&lcw_start_completion);
+       init_waitqueue_head(&lcw_event_waitq);
+
+       CDEBUG(D_INFO, "starting dispatch thread\n");
+       task = kthread_run(lcw_dispatch_main, NULL, "lc_watchdogd");
+       if (IS_ERR(task)) {
+               CERROR("error spawning watchdog dispatch thread: %ld\n",
+                       PTR_ERR(task));
+               EXIT;
+               return;
+       }
+       wait_for_completion(&lcw_start_completion);
+       CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
+
+       EXIT;
 }
 
 static void lcw_dispatch_stop(void)
 {
-        ENTRY;
-        LASSERT(lcw_refcount == 0);
+       ENTRY;
+       LASSERT(lcw_refcount == 0);
 
-        CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
+       CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
 
-        cfs_set_bit(LCW_FLAG_STOP, &lcw_flags);
-        cfs_waitq_signal(&lcw_event_waitq);
+       set_bit(LCW_FLAG_STOP, &lcw_flags);
+       wake_up(&lcw_event_waitq);
 
-        cfs_wait_for_completion(&lcw_stop_completion);
+       wait_for_completion(&lcw_stop_completion);
 
-        CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
+       CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
 
-        EXIT;
+       EXIT;
 }
 
 struct lc_watchdog *lc_watchdog_add(int timeout,
@@ -346,20 +349,21 @@ struct lc_watchdog *lc_watchdog_add(int timeout,
                 RETURN(ERR_PTR(-ENOMEM));
         }
 
-        lcw->lcw_task     = cfs_current();
-        lcw->lcw_pid      = cfs_curproc_pid();
-        lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
-        lcw->lcw_data     = data;
-        lcw->lcw_state    = LC_WATCHDOG_DISABLED;
+       spin_lock_init(&lcw->lcw_lock);
+       lcw->lcw_refcount = 1; /* refcount for owner */
+       lcw->lcw_task     = cfs_current();
+       lcw->lcw_pid      = current_pid();
+       lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
+       lcw->lcw_data     = data;
+       lcw->lcw_state    = LC_WATCHDOG_DISABLED;
 
         CFS_INIT_LIST_HEAD(&lcw->lcw_list);
         cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw);
-        cfs_atomic_set(&lcw->lcw_refcount, 1);
 
-        cfs_down(&lcw_refcount_sem);
-        if (++lcw_refcount == 1)
-                lcw_dispatch_start();
-        cfs_up(&lcw_refcount_sem);
+       mutex_lock(&lcw_refcount_mutex);
+       if (++lcw_refcount == 1)
+               lcw_dispatch_start();
+       mutex_unlock(&lcw_refcount_mutex);
 
         /* Keep this working in case we enable them by default */
         if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
@@ -394,15 +398,25 @@ static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
         lcw->lcw_last_touched = newtime;
 }
 
+static void lc_watchdog_del_pending(struct lc_watchdog *lcw)
+{
+       spin_lock_bh(&lcw->lcw_lock);
+       if (unlikely(!cfs_list_empty(&lcw->lcw_list))) {
+               spin_lock_bh(&lcw_pending_timers_lock);
+               cfs_list_del_init(&lcw->lcw_list);
+               lcw->lcw_refcount--; /* -1 ref for pending list */
+               spin_unlock_bh(&lcw_pending_timers_lock);
+       }
+
+       spin_unlock_bh(&lcw->lcw_lock);
+}
+
 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
 {
         ENTRY;
         LASSERT(lcw != NULL);
-        LASSERT(cfs_atomic_read(&lcw->lcw_refcount) > 0);
 
-        cfs_spin_lock_bh(&lcw_pending_timers_lock);
-        cfs_list_del_init(&lcw->lcw_list);
-        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
+        lc_watchdog_del_pending(lcw);
 
         lcw_update_time(lcw, "resumed");
         lcw->lcw_state = LC_WATCHDOG_ENABLED;
@@ -418,12 +432,8 @@ void lc_watchdog_disable(struct lc_watchdog *lcw)
 {
         ENTRY;
         LASSERT(lcw != NULL);
-        LASSERT(cfs_atomic_read(&lcw->lcw_refcount) > 0);
 
-        cfs_spin_lock_bh(&lcw_pending_timers_lock);
-        if (!cfs_list_empty(&lcw->lcw_list))
-                cfs_list_del_init(&lcw->lcw_list);
-        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
+        lc_watchdog_del_pending(lcw);
 
         lcw_update_time(lcw, "completed");
         lcw->lcw_state = LC_WATCHDOG_DISABLED;
@@ -434,26 +444,36 @@ EXPORT_SYMBOL(lc_watchdog_disable);
 
 void lc_watchdog_delete(struct lc_watchdog *lcw)
 {
+        int dead;
+
         ENTRY;
         LASSERT(lcw != NULL);
-        LASSERT(cfs_atomic_read(&lcw->lcw_refcount) > 0);
 
         cfs_timer_disarm(&lcw->lcw_timer);
 
         lcw_update_time(lcw, "stopped");
 
-        cfs_spin_lock_bh(&lcw_pending_timers_lock);
-        if (!cfs_list_empty(&lcw->lcw_list))
-                cfs_list_del_init(&lcw->lcw_list);
-        cfs_spin_unlock_bh(&lcw_pending_timers_lock);
-        lcw_put(lcw);
+       spin_lock_bh(&lcw->lcw_lock);
+       spin_lock_bh(&lcw_pending_timers_lock);
+       if (unlikely(!cfs_list_empty(&lcw->lcw_list))) {
+               cfs_list_del_init(&lcw->lcw_list);
+               lcw->lcw_refcount--; /* -1 ref for pending list */
+       }
 
-        cfs_down(&lcw_refcount_sem);
-        if (--lcw_refcount == 0)
-                lcw_dispatch_stop();
-        cfs_up(&lcw_refcount_sem);
+       lcw->lcw_refcount--; /* -1 ref for owner */
+       dead = lcw->lcw_refcount == 0;
+       spin_unlock_bh(&lcw_pending_timers_lock);
+       spin_unlock_bh(&lcw->lcw_lock);
 
-        EXIT;
+       if (dead)
+               LIBCFS_FREE(lcw, sizeof(*lcw));
+
+       mutex_lock(&lcw_refcount_mutex);
+       if (--lcw_refcount == 0)
+               lcw_dispatch_stop();
+       mutex_unlock(&lcw_refcount_mutex);
+
+       EXIT;
 }
 EXPORT_SYMBOL(lc_watchdog_delete);