Whamcloud - gitweb
- More Peter's additions for the ha manager. This doesn't seem to break much -
[fs/lustre-release.git] / lustre / llite / llite_ha.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  linux/mds/handler.c
5  *
6  *  Lustre High Availability Daemon
7  *
8  *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
9  *
10  *  This code is issued under the GNU General Public License.
11  *  See the file COPYING in this distribution
12  *
13  *  by Peter Braam <braam@clusterfs.com>
14  *
15  */
16
17 #define EXPORT_SYMTAB
18
19 #include <linux/version.h>
20 #include <linux/module.h>
21 #include <linux/fs.h>
22 #include <linux/stat.h>
23 #include <linux/locks.h>
24 #include <linux/kmod.h>
25 #include <linux/quotaops.h>
26 #include <asm/unistd.h>
27 #include <asm/uaccess.h>
28
29 #define DEBUG_SUBSYSTEM S_LLITE
30
31 #include <linux/lustre_lite.h>
32 #include <linux/lustre_ha.h>
33 #include <linux/lustre_lib.h>
34 #include <linux/lustre_net.h>
35
36 static int lustre_ha_check_event(struct lustre_ha_mgr *mgr)
37 {
38         int rc = 0; 
39         ENTRY;
40
41         spin_lock(&mgr->mgr_lock); 
42         if (!(mgr->mgr_flags & MGR_WORKING) && 
43             !list_empty(&mgr->mgr_troubled_lh) ) {
44                 mgr->mgr_flags |= MGR_WORKING;
45                 mgr->mgr_waketime = CURRENT_TIME; 
46                 schedule_timeout(4*HZ); 
47                 CERROR("connection in trouble\n"); 
48                 rc = 1;
49         }
50
51         if (!mgr->mgr_flags & MGR_WORKING &&
52             CURRENT_TIME >= mgr->mgr_waketime ) { 
53                 CERROR("woken up once more\n");
54                 mgr->mgr_waketime = CURRENT_TIME; 
55                 schedule_timeout(4*HZ); 
56                 rc = 1;
57         }
58
59         if (mgr->mgr_flags & MGR_STOPPING) { 
60                 CERROR("ha mgr stopping\n");
61                 rc = 1;
62         }
63
64         spin_unlock(&mgr->mgr_lock); 
65         RETURN(rc);
66 }
67
68
69 static int llite_ha_upcall(void)
70 {
71         char *argv[2];
72         char *envp[3];
73
74         argv[0] = "/usr/src/obd/utils/ha_assist.sh";
75         argv[1] = NULL;
76
77         envp [0] = "HOME=/";
78         envp [1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
79         envp [2] = NULL;
80
81         return call_usermodehelper(argv[0], argv, envp);
82 }
83
84 static int llite_ha_main(void *arg)
85 {
86         struct lustre_ha_thread *data = (struct lustre_ha_thread *)arg;
87         struct lustre_ha_mgr *mgr = data->mgr;
88
89         ENTRY;
90
91         lock_kernel();
92         daemonize();
93         spin_lock_irq(&current->sigmask_lock);
94         sigfillset(&current->blocked);
95         recalc_sigpending(current);
96         spin_unlock_irq(&current->sigmask_lock);
97
98         sprintf(current->comm, data->name);
99
100         /* Record that the  thread is running */
101         mgr->mgr_thread = current;
102         mgr->mgr_flags = MGR_RUNNING;
103         wake_up(&mgr->mgr_ctl_waitq);
104
105         /* And now, loop forever on requests */
106         while (1) {
107                 wait_event_interruptible(mgr->mgr_waitq, 
108                                          lustre_ha_check_event(mgr));
109
110                 if (mgr->mgr_flags & MGR_STOPPING) {
111                         spin_unlock(&mgr->mgr_lock);
112                         CERROR("lustre_hamgr quitting\n"); 
113                         EXIT;
114                         break;
115                 }
116
117                 spin_lock(&mgr->mgr_lock);
118                 CERROR("lustre_hamgr woken up\n"); 
119                 llite_ha_upcall();
120                 schedule_timeout(5 * HZ);
121                 spin_unlock(&mgr->mgr_lock);
122         }
123
124         mgr->mgr_thread = NULL;
125         mgr->mgr_flags = MGR_STOPPED;
126         wake_up(&mgr->mgr_ctl_waitq);
127         CDEBUG(D_NET, "mgr exiting process %d\n", current->pid);
128         RETURN(0);
129 }
130
131 struct lustre_ha_mgr *llite_ha_setup(void)
132 {
133         struct lustre_ha_thread d;
134         struct lustre_ha_mgr *mgr;
135         int rc;
136         ENTRY;
137
138         PORTAL_ALLOC(mgr, sizeof(*mgr));
139         if (!mgr) { 
140                 CERROR("out of memory\n");
141                 LBUG();
142                 RETURN(NULL); 
143         }
144         INIT_LIST_HEAD(&mgr->mgr_connections_lh);
145         INIT_LIST_HEAD(&mgr->mgr_troubled_lh);
146         spin_lock_init(&mgr->mgr_lock); 
147
148         d.mgr = mgr;
149         d.name = "lustre_hamgr";
150
151         init_waitqueue_head(&mgr->mgr_waitq);
152         init_waitqueue_head(&mgr->mgr_ctl_waitq);
153
154         rc = kernel_thread(llite_ha_main, (void *) &d,
155                            CLONE_VM | CLONE_FS | CLONE_FILES);
156         if (rc < 0) {
157                 CERROR("cannot start thread\n");
158                 RETURN(NULL);
159         }
160         wait_event(mgr->mgr_ctl_waitq, mgr->mgr_flags & MGR_RUNNING);
161
162         RETURN(mgr);
163 }
164
165
166 int llite_ha_cleanup(struct lustre_ha_mgr *mgr)
167 {
168         mgr->mgr_flags = MGR_STOPPING;
169
170         wake_up(&mgr->mgr_waitq);
171         wait_event_interruptible(mgr->mgr_ctl_waitq,
172                                  (mgr->mgr_flags & MGR_STOPPED));
173         PORTAL_FREE(mgr, sizeof(*mgr));
174         RETURN(0);
175 }