Whamcloud - gitweb
Land b_smallfix onto HEAD (20040423_1603)
[fs/lustre-release.git] / lustre / ptlrpc / pinger.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Portal-RPC reconnection and replay operations, for use in recovery.
5  *
6  *  Copyright (c) 2003 Cluster File Systems, Inc.
7  *   Authors: Phil Schwan <phil@clusterfs.com>
8  *            Mike Shaver <shaver@clusterfs.com>
9  *
10  *   This file is part of Lustre, http://www.lustre.org.
11  *
12  *   Lustre is free software; you can redistribute it and/or
13  *   modify it under the terms of version 2 of the GNU General Public
14  *   License as published by the Free Software Foundation.
15  *
16  *   Lustre is distributed in the hope that it will be useful,
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *   GNU General Public License for more details.
20  *
21  *   You should have received a copy of the GNU General Public License
22  *   along with Lustre; if not, write to the Free Software
23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25
26 #ifndef __KERNEL__
27 #include <liblustre.h>
28 #else
29 #include <linux/version.h>
30 #include <asm/semaphore.h>
31 #define DEBUG_SUBSYSTEM S_RPC
32 #endif
33
34 #include <linux/obd_support.h>
35 #include <linux/obd_class.h>
36 #include "ptlrpc_internal.h"
37
38 static DECLARE_MUTEX(pinger_sem);
39 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
40
41 int ptlrpc_ping(struct obd_import *imp) 
42 {
43         struct ptlrpc_request *req;
44         int rc = 0;
45         ENTRY;
46
47         req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
48                               NULL);
49         if (req) {
50                 DEBUG_REQ(D_HA, req, "pinging %s->%s",
51                           imp->imp_obd->obd_uuid.uuid,
52                           imp->imp_target_uuid.uuid);
53                 req->rq_no_resend = req->rq_no_delay = 1;
54                 req->rq_replen = lustre_msg_size(0, 
55                                                  NULL);
56                 ptlrpcd_add_req(req);
57         } else {
58                 CERROR("OOM trying to ping %s->%s\n",
59                           imp->imp_obd->obd_uuid.uuid,
60                           imp->imp_target_uuid.uuid);
61                 rc = -ENOMEM;
62         }
63
64         RETURN(rc);
65 }
66
67 #ifdef __KERNEL__
68 static int ptlrpc_pinger_main(void *arg)
69 {
70         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
71         struct ptlrpc_thread *thread = data->thread;
72         unsigned long flags;
73         ENTRY;
74
75         lock_kernel();
76         ptlrpc_daemonize();
77
78         SIGNAL_MASK_LOCK(current, flags);
79         sigfillset(&current->blocked);
80         RECALC_SIGPENDING;
81         SIGNAL_MASK_UNLOCK(current, flags);
82
83         LASSERTF(strlen(data->name) < sizeof(current->comm),
84                  "name %d > len %d\n",strlen(data->name),sizeof(current->comm));
85         THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name);
86         unlock_kernel();
87
88         /* Record that the thread is running */
89         thread->t_flags = SVC_RUNNING;
90         wake_up(&thread->t_ctl_waitq);
91
92         /* And now, loop forever, pinging as needed. */
93         while (1) {
94                 unsigned long this_ping = jiffies;
95                 long time_to_next_ping;
96                 struct l_wait_info lwi = LWI_TIMEOUT(obd_timeout * HZ,
97                                                      NULL, NULL);
98                 struct list_head *iter;
99
100                 down(&pinger_sem);
101                 list_for_each(iter, &pinger_imports) {
102                         struct obd_import *imp =
103                                 list_entry(iter, struct obd_import,
104                                            imp_pinger_chain);
105                         int force, level;
106                         unsigned long flags;
107
108
109                         spin_lock_irqsave(&imp->imp_lock, flags);
110                         level = imp->imp_state;
111                         force = imp->imp_force_verify;
112                         if (force)
113                                 imp->imp_force_verify = 0;
114                         spin_unlock_irqrestore(&imp->imp_lock, flags);
115
116                         if (imp->imp_next_ping <= this_ping || force) {
117                                 if (level == LUSTRE_IMP_DISCON) {
118                                         /* wait at least a timeout before 
119                                            trying recovery again. */
120                                         imp->imp_next_ping = jiffies + 
121                                                 (obd_timeout * HZ);
122                                         ptlrpc_initiate_recovery(imp);
123                                 } 
124                                 else if (level != LUSTRE_IMP_FULL ||
125                                          imp->imp_obd->obd_no_recov) {
126                                         CDEBUG(D_HA, 
127                                                "not pinging %s (in recovery "
128                                                " or recovery disabled: %s)\n",
129                                                imp->imp_target_uuid.uuid,
130                                                ptlrpc_import_state_name(level));
131                                 } 
132                                 else if (imp->imp_pingable || force) {
133                                         ptlrpc_ping(imp);
134                                 }
135
136                         } else {
137                                 if (imp->imp_pingable)
138                                         CDEBUG(D_HA, "don't need to ping %s "
139                                                "(%lu > %lu)\n", 
140                                                imp->imp_target_uuid.uuid,
141                                                imp->imp_next_ping, this_ping);
142                         }
143                 }
144                 up(&pinger_sem);
145
146                 /* Wait until the next ping time, or until we're stopped. */
147                 time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies;
148                 CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
149                        this_ping + (obd_timeout * HZ));
150                 if (time_to_next_ping > 0) {
151                         lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
152                         l_wait_event(thread->t_ctl_waitq,
153                                      thread->t_flags & (SVC_STOPPING|SVC_EVENT),
154                                      &lwi);
155                         if (thread->t_flags & SVC_STOPPING) {
156                                 thread->t_flags &= ~SVC_STOPPING;
157                                 EXIT;
158                                 break;
159                         } else if (thread->t_flags & SVC_EVENT) {
160                                 /* woken after adding import to reset timer */
161                                 thread->t_flags &= ~SVC_EVENT;
162                         }
163                 }
164         }
165
166         thread->t_flags = SVC_STOPPED;
167         wake_up(&thread->t_ctl_waitq);
168
169         CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid);
170         return 0;
171 }
172
173 static struct ptlrpc_thread *pinger_thread = NULL;
174
175 int ptlrpc_start_pinger(void)
176 {
177         struct l_wait_info lwi = { 0 };
178         struct ptlrpc_svc_data d;
179         int rc;
180 #ifndef ENABLE_PINGER
181         return 0;
182 #endif
183         ENTRY;
184
185         if (pinger_thread != NULL)
186                 RETURN(-EALREADY);
187
188         OBD_ALLOC(pinger_thread, sizeof(*pinger_thread));
189         if (pinger_thread == NULL)
190                 RETURN(-ENOMEM);
191         init_waitqueue_head(&pinger_thread->t_ctl_waitq);
192
193         d.name = "ll_ping";
194         d.thread = pinger_thread;
195
196         /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
197          * just drop the VM and FILES in ptlrpc_daemonize() right away. */
198         rc = kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES);
199         if (rc < 0) {
200                 CERROR("cannot start thread: %d\n", rc);
201                 OBD_FREE(pinger_thread, sizeof(*pinger_thread));
202                 RETURN(rc);
203         }
204         l_wait_event(pinger_thread->t_ctl_waitq,
205                      pinger_thread->t_flags & SVC_RUNNING, &lwi);
206
207         RETURN(rc);
208 }
209
210 int ptlrpc_stop_pinger(void)
211 {
212         struct l_wait_info lwi = { 0 };
213         int rc = 0;
214 #ifndef ENABLE_PINGER
215         return 0;
216 #endif
217         ENTRY;
218
219         if (pinger_thread == NULL)
220                 RETURN(-EALREADY);
221         down(&pinger_sem);
222         pinger_thread->t_flags = SVC_STOPPING;
223         wake_up(&pinger_thread->t_ctl_waitq);
224         up(&pinger_sem);
225
226         l_wait_event(pinger_thread->t_ctl_waitq,
227                      (pinger_thread->t_flags & SVC_STOPPED), &lwi);
228
229         OBD_FREE(pinger_thread, sizeof(*pinger_thread));
230         pinger_thread = NULL;
231         RETURN(rc);
232 }
233
234 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
235 {
236         down(&pinger_sem);
237         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
238         up(&pinger_sem);
239 }
240
241 int ptlrpc_pinger_add_import(struct obd_import *imp)
242 {
243         ENTRY;
244         if (!list_empty(&imp->imp_pinger_chain))
245                 RETURN(-EALREADY);
246
247         down(&pinger_sem);
248         CDEBUG(D_HA, "adding pingable import %s->%s\n",
249                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
250         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
251         /* XXX sort, blah blah */
252         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
253         class_import_get(imp);
254
255         ptlrpc_pinger_wake_up();
256         up(&pinger_sem);
257
258         RETURN(0);
259 }
260
261 int ptlrpc_pinger_del_import(struct obd_import *imp)
262 {
263         ENTRY;
264         if (list_empty(&imp->imp_pinger_chain))
265                 RETURN(-ENOENT);
266
267         down(&pinger_sem);
268         list_del_init(&imp->imp_pinger_chain);
269         CDEBUG(D_HA, "removing pingable import %s->%s\n",
270                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
271         class_import_put(imp);
272         up(&pinger_sem);
273         RETURN(0);
274 }
275
276 void ptlrpc_pinger_wake_up()
277 {
278 #ifdef ENABLE_PINGER
279         pinger_thread->t_flags |= SVC_EVENT;
280         wake_up(&pinger_thread->t_ctl_waitq);
281 #endif
282 }
283
284 #else /* !__KERNEL__ */
285
286 /* XXX
287  * the current implementation of pinger in liblustre is not optimized
288  */
289
290 static struct pinger_data {
291         int             pd_recursion;
292         unsigned long   pd_this_ping;
293         unsigned long   pd_next_ping;
294         int             pd_force_check;
295 } pinger_args;
296
297 static int pinger_check_rpcs(void *arg)
298 {
299         unsigned long curtime = time(NULL);
300         struct list_head *iter;
301         struct pinger_data *pd = &pinger_args;
302
303         /* prevent recursion */
304         if (pd->pd_recursion++) {
305                 CDEBUG(D_HA, "pinger: recursion! quit\n");
306                 pd->pd_recursion--;
307                 return 0;
308         }
309
310         /* have we reached ping point? */
311         if (pd->pd_next_ping > curtime && !pd->pd_force_check) {
312                 pd->pd_recursion--;
313                 return 0;
314         }
315
316         if (pd->pd_force_check)
317                 pd->pd_force_check = 0;
318
319         pd->pd_this_ping = curtime;
320
321         /* add rpcs into set */
322         down(&pinger_sem);
323         list_for_each(iter, &pinger_imports) {
324                 struct obd_import *imp =
325                         list_entry(iter, struct obd_import,
326                                    imp_pinger_chain);
327                 int level, force;
328                 unsigned long flags;
329
330
331                 spin_lock_irqsave(&imp->imp_lock, flags);
332                 level = imp->imp_state;
333                 force = imp->imp_force_verify;
334                 if (force)
335                         imp->imp_force_verify = 0;
336                 spin_unlock_irqrestore(&imp->imp_lock, flags);
337
338                 if (imp->imp_next_ping <= pd->pd_this_ping || force) {
339                         if (level == LUSTRE_IMP_DISCON) {
340                                 /* wait at least a timeout before 
341                                    trying recovery again. */
342                                 imp->imp_next_ping = time(NULL) + 
343                                         (obd_timeout * HZ);
344                                 ptlrpc_initiate_recovery(imp);
345                         } 
346                         else if (level != LUSTRE_IMP_FULL ||
347                                  imp->imp_obd->obd_no_recov) {
348                                 CDEBUG(D_HA, 
349                                        "not pinging %s (in recovery "
350                                        " or recovery disabled: %s)\n",
351                                        imp->imp_target_uuid.uuid,
352                                        ptlrpc_import_state_name(level));
353                         } 
354                         else if (imp->imp_pingable || force) {
355                                 ptlrpc_ping(imp);
356                         }
357
358                 } else {
359                         if (imp->imp_pingable) {
360                                 CDEBUG(D_HA, "don't need to ping %s "
361                                        "(%lu > %lu)\n", 
362                                        imp->imp_target_uuid.uuid,
363                                        imp->imp_next_ping, pd->pd_this_ping);
364                         }
365                 }
366         }
367
368         up(&pinger_sem);
369
370         pd->pd_next_ping = pd->pd_this_ping + (obd_timeout * HZ);
371
372         CDEBUG(D_HA, "finished a round ping\n");
373         pd->pd_recursion--;
374         return 0;
375 }
376
377 static void *pinger_callback = NULL;
378
379 int ptlrpc_start_pinger(void)
380 {
381         memset(&pinger_args, 0, sizeof(pinger_args));
382 #ifdef ENABLE_PINGER
383         pinger_callback =
384                 liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args);
385 #endif
386         return 0;
387 }
388
389 int ptlrpc_stop_pinger(void)
390 {
391 #ifdef ENABLE_PINGER
392         if (pinger_callback)
393                 liblustre_deregister_wait_callback(pinger_callback);
394 #endif
395         return 0;
396 }
397
398 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
399 {
400         down(&pinger_sem);
401         imp->imp_next_ping = time(NULL) + obd_timeout;
402         if (pinger_args.pd_next_ping > imp->imp_next_ping) {
403                 CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
404                         imp->imp_next_ping, time(NULL));
405                 pinger_args.pd_next_ping = imp->imp_next_ping;
406         }
407         up(&pinger_sem);
408 }
409
410 int ptlrpc_pinger_add_import(struct obd_import *imp)
411 {
412         ENTRY;
413         if (!list_empty(&imp->imp_pinger_chain))
414                 RETURN(-EALREADY);
415
416         CDEBUG(D_HA, "adding pingable import %s->%s\n",
417                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
418         ptlrpc_pinger_sending_on_import(imp);
419
420         down(&pinger_sem);
421         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
422         class_import_get(imp);
423         up(&pinger_sem);
424
425         RETURN(0);
426 }
427
428 int ptlrpc_pinger_del_import(struct obd_import *imp)
429 {
430         ENTRY;
431         if (list_empty(&imp->imp_pinger_chain))
432                 RETURN(-ENOENT);
433
434         down(&pinger_sem);
435         list_del_init(&imp->imp_pinger_chain);
436         CDEBUG(D_HA, "removing pingable import %s->%s\n",
437                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
438         class_import_put(imp);
439         up(&pinger_sem);
440         RETURN(0);
441 }
442
443 void ptlrpc_pinger_wake_up()
444 {
445         pinger_args.pd_force_check = 1;
446 }
447 #endif /* !__KERNEL__ */