1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * Lustre High Availability Daemon
8 * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
10 * This code is issued under the GNU General Public License.
11 * See the file COPYING in this distribution
13 * by Peter Braam <braam@clusterfs.com>
18 #define DEBUG_SUBSYSTEM S_RPC
20 #include <linux/kmod.h>
21 #include <linux/lustre_lite.h>
22 #include <linux/lustre_ha.h>
24 struct recovd_obd *ptlrpc_connmgr;
26 void recovd_cli_manage(struct recovd_obd *recovd, struct ptlrpc_client *cli)
29 cli->cli_recovd = recovd;
30 spin_lock(&recovd->recovd_lock);
31 list_add(&cli->cli_ha_item, &recovd->recovd_clients_lh);
32 spin_unlock(&recovd->recovd_lock);
36 void recovd_cli_fail(struct ptlrpc_client *cli)
39 spin_lock(&cli->cli_recovd->recovd_lock);
40 cli->cli_recovd->recovd_flags |= RECOVD_FAIL;
41 cli->cli_recovd->recovd_wakeup_flag = 1;
42 list_del(&cli->cli_ha_item);
43 list_add(&cli->cli_ha_item, &cli->cli_recovd->recovd_troubled_lh);
44 spin_unlock(&cli->cli_recovd->recovd_lock);
45 wake_up(&cli->cli_recovd->recovd_waitq);
49 /* this function must be called with cli->cli_lock held */
50 void recovd_cli_fixed(struct ptlrpc_client *cli)
53 list_del(&cli->cli_ha_item);
54 list_add(&cli->cli_ha_item, &cli->cli_recovd->recovd_clients_lh);
59 static int recovd_upcall(void)
64 argv[0] = "/usr/src/obd/utils/ha_assist.sh";
68 envp [1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
71 return call_usermodehelper(argv[0], argv, envp);
74 static int recovd_check_event(struct recovd_obd *recovd)
79 spin_lock(&recovd->recovd_lock);
81 recovd->recovd_waketime = CURRENT_TIME;
82 if (recovd->recovd_timeout)
83 schedule_timeout(recovd->recovd_timeout);
85 if (recovd->recovd_wakeup_flag) {
86 CERROR("service woken\n");
90 if (recovd->recovd_timeout &&
91 CURRENT_TIME > recovd->recovd_waketime + recovd->recovd_timeout) {
92 recovd->recovd_flags |= RECOVD_TIMEOUT;
97 if (recovd->recovd_flags & RECOVD_STOPPING) {
98 CERROR("recovd stopping\n");
103 recovd->recovd_wakeup_flag = 0;
104 spin_unlock(&recovd->recovd_lock);
108 static int recovd_handle_event(struct recovd_obd *recovd)
111 spin_lock(&recovd->recovd_lock);
113 if (!(recovd->recovd_flags & RECOVD_UPCALL_WAIT) &&
114 recovd->recovd_flags & RECOVD_FAIL) {
116 CERROR("client in trouble: flags -> UPCALL_WAITING\n");
117 recovd->recovd_flags |= RECOVD_UPCALL_WAIT;
120 recovd->recovd_waketime = CURRENT_TIME;
121 recovd->recovd_timeout = 10 * HZ;
122 schedule_timeout(recovd->recovd_timeout);
125 if (recovd->recovd_flags & RECOVD_TIMEOUT) {
126 CERROR("timeout - no news from upcall?\n");
127 recovd->recovd_flags &= ~RECOVD_TIMEOUT;
130 if (recovd->recovd_flags & RECOVD_UPCALL_ANSWER) {
131 struct list_head *tmp, *pos;
132 CERROR("UPCALL_WAITING: upcall answer\n");
133 CERROR("** fill me in with recovery\n");
135 list_for_each_safe(tmp, pos, &recovd->recovd_troubled_lh) {
136 struct ptlrpc_client *cli = list_entry
137 (tmp, struct ptlrpc_client, cli_ha_item);
139 list_del(&cli->cli_ha_item);
140 spin_unlock(&recovd->recovd_lock);
141 if (cli->cli_recover)
142 cli->cli_recover(cli);
143 spin_lock(&recovd->recovd_lock);
146 recovd->recovd_timeout = 0;
147 recovd->recovd_flags = RECOVD_IDLE;
150 spin_unlock(&recovd->recovd_lock);
154 static int recovd_main(void *arg)
156 struct recovd_obd *recovd = (struct recovd_obd *)arg;
162 spin_lock_irq(¤t->sigmask_lock);
163 sigfillset(¤t->blocked);
164 recalc_sigpending(current);
165 spin_unlock_irq(¤t->sigmask_lock);
167 sprintf(current->comm, "lustre_recovd");
169 /* Record that the thread is running */
170 recovd->recovd_thread = current;
171 recovd->recovd_flags = RECOVD_IDLE;
172 wake_up(&recovd->recovd_ctl_waitq);
174 /* And now, loop forever on requests */
176 wait_event_interruptible(recovd->recovd_waitq,
177 recovd_check_event(recovd));
179 spin_lock(&recovd->recovd_lock);
180 if (recovd->recovd_flags & RECOVD_STOPPING) {
181 spin_unlock(&recovd->recovd_lock);
182 CERROR("lustre_recovd stopping\n");
187 recovd_handle_event(recovd);
188 spin_unlock(&recovd->recovd_lock);
191 recovd->recovd_thread = NULL;
192 recovd->recovd_flags = RECOVD_STOPPED;
193 wake_up(&recovd->recovd_ctl_waitq);
194 CDEBUG(D_NET, "mgr exiting process %d\n", current->pid);
198 int recovd_setup(struct recovd_obd *recovd)
203 INIT_LIST_HEAD(&recovd->recovd_clients_lh);
204 INIT_LIST_HEAD(&recovd->recovd_troubled_lh);
205 spin_lock_init(&recovd->recovd_lock);
207 init_waitqueue_head(&recovd->recovd_waitq);
208 init_waitqueue_head(&recovd->recovd_recovery_waitq);
209 init_waitqueue_head(&recovd->recovd_ctl_waitq);
211 rc = kernel_thread(recovd_main, (void *)recovd,
212 CLONE_VM | CLONE_FS | CLONE_FILES);
214 CERROR("cannot start thread\n");
217 wait_event(recovd->recovd_ctl_waitq, recovd->recovd_flags & RECOVD_IDLE);
222 int recovd_cleanup(struct recovd_obd *recovd)
224 spin_lock(&recovd->recovd_lock);
225 recovd->recovd_flags = RECOVD_STOPPING;
226 wake_up(&recovd->recovd_waitq);
227 spin_unlock(&recovd->recovd_lock);
229 wait_event_interruptible(recovd->recovd_ctl_waitq,
230 (recovd->recovd_flags & RECOVD_STOPPED));