Whamcloud - gitweb
land b_groups onto HEAD:
[fs/lustre-release.git] / lustre / ptlrpc / pinger.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Portal-RPC reconnection and replay operations, for use in recovery.
5  *
6  *  Copyright (c) 2003 Cluster File Systems, Inc.
7  *   Authors: Phil Schwan <phil@clusterfs.com>
8  *            Mike Shaver <shaver@clusterfs.com>
9  *
10  *   This file is part of Lustre, http://www.lustre.org.
11  *
12  *   Lustre is free software; you can redistribute it and/or
13  *   modify it under the terms of version 2 of the GNU General Public
14  *   License as published by the Free Software Foundation.
15  *
16  *   Lustre is distributed in the hope that it will be useful,
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *   GNU General Public License for more details.
20  *
21  *   You should have received a copy of the GNU General Public License
22  *   along with Lustre; if not, write to the Free Software
23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25
26 #ifndef __KERNEL__
27 #include <liblustre.h>
28 #else
29 #include <linux/version.h>
30 #include <asm/semaphore.h>
31 #define DEBUG_SUBSYSTEM S_RPC
32 #endif
33
34 #include <linux/obd_support.h>
35 #include <linux/obd_class.h>
36 #include "ptlrpc_internal.h"
37
38 static DECLARE_MUTEX(pinger_sem);
39 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
40
41 int ptlrpc_ping(struct obd_import *imp) 
42 {
43         struct ptlrpc_request *req;
44         int rc = 0;
45         ENTRY;
46
47         req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, 0, NULL, NULL);
48         if (req) {
49                 DEBUG_REQ(D_HA, req, "pinging %s->%s",
50                           imp->imp_obd->obd_uuid.uuid,
51                           imp->imp_target_uuid.uuid);
52                 req->rq_no_resend = req->rq_no_delay = 1;
53                 req->rq_replen = lustre_msg_size(0, NULL);
54                 ptlrpcd_add_req(req);
55         } else {
56                 CERROR("OOM trying to ping %s->%s\n",
57                        imp->imp_obd->obd_uuid.uuid,
58                        imp->imp_target_uuid.uuid);
59                 rc = -ENOMEM;
60         }
61
62         RETURN(rc);
63 }
64
65 #ifdef __KERNEL__
66 int ptlrpc_next_ping(struct obd_import *imp)
67 {
68         if (imp->imp_server_timeout)
69                 return jiffies + (obd_timeout / 4 * HZ);
70         else
71                 return jiffies + (obd_timeout / 2 * HZ);
72 }
73
74 static int ptlrpc_pinger_main(void *arg)
75 {
76         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
77         struct ptlrpc_thread *thread = data->thread;
78         unsigned long flags;
79         ENTRY;
80
81         lock_kernel();
82         ptlrpc_daemonize();
83
84         SIGNAL_MASK_LOCK(current, flags);
85         sigfillset(&current->blocked);
86         RECALC_SIGPENDING;
87         SIGNAL_MASK_UNLOCK(current, flags);
88
89         LASSERTF(strlen(data->name) < sizeof(current->comm),
90                  "name %d > len %d\n",
91                  (int)strlen(data->name), (int)sizeof(current->comm));
92         THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name);
93         unlock_kernel();
94
95         /* Record that the thread is running */
96         thread->t_flags = SVC_RUNNING;
97         wake_up(&thread->t_ctl_waitq);
98
99         /* And now, loop forever, pinging as needed. */
100         while (1) {
101                 unsigned long this_ping = jiffies;
102                 long time_to_next_ping;
103                 struct l_wait_info lwi = LWI_TIMEOUT(obd_timeout * HZ,
104                                                      NULL, NULL);
105                 struct list_head *iter;
106
107                 time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies;
108                 down(&pinger_sem);
109                 list_for_each(iter, &pinger_imports) {
110                         struct obd_import *imp =
111                                 list_entry(iter, struct obd_import,
112                                            imp_pinger_chain);
113                         int force, level;
114                         unsigned long flags;
115
116
117                         spin_lock_irqsave(&imp->imp_lock, flags);
118                         level = imp->imp_state;
119                         force = imp->imp_force_verify;
120                         if (force)
121                                 imp->imp_force_verify = 0;
122                         spin_unlock_irqrestore(&imp->imp_lock, flags);
123
124                         if (imp->imp_next_ping <= this_ping || force) {
125                                 if (level == LUSTRE_IMP_DISCON) {
126                                         /* wait at least a timeout before 
127                                            trying recovery again. */
128                                         imp->imp_next_ping =
129                                                 ptlrpc_next_ping(imp);
130                                         ptlrpc_initiate_recovery(imp);
131                                 } else if (level != LUSTRE_IMP_FULL ||
132                                            imp->imp_obd->obd_no_recov) {
133                                         CDEBUG(D_HA, 
134                                                "not pinging %s (in recovery "
135                                                " or recovery disabled: %s)\n",
136                                                imp->imp_target_uuid.uuid,
137                                                ptlrpc_import_state_name(level));
138                                 } else if (imp->imp_pingable || force) {
139                                         ptlrpc_ping(imp);
140                                 }
141
142                         } else if (imp->imp_pingable) {
143                                 CDEBUG(D_HA, "don't need to ping %s "
144                                        "(%lu > %lu)\n",
145                                        imp->imp_target_uuid.uuid,
146                                        imp->imp_next_ping, this_ping);
147                         }
148                         CDEBUG(D_OTHER, "%s: pingable %d, next_ping %lu(%lu)\n",
149                                 imp->imp_target_uuid.uuid,
150                                 imp->imp_pingable, imp->imp_next_ping, jiffies);
151                         if (imp->imp_pingable && imp->imp_next_ping &&
152                             imp->imp_next_ping - jiffies < time_to_next_ping &&
153                             imp->imp_next_ping > jiffies)
154                                 time_to_next_ping = imp->imp_next_ping - jiffies;
155                 }
156                 up(&pinger_sem);
157
158                 /* Wait until the next ping time, or until we're stopped. */
159                 CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
160                        this_ping + (obd_timeout * HZ));
161                 if (time_to_next_ping > 0) {
162                         lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
163                         l_wait_event(thread->t_ctl_waitq,
164                                      thread->t_flags & (SVC_STOPPING|SVC_EVENT),
165                                      &lwi);
166                         if (thread->t_flags & SVC_STOPPING) {
167                                 thread->t_flags &= ~SVC_STOPPING;
168                                 EXIT;
169                                 break;
170                         } else if (thread->t_flags & SVC_EVENT) {
171                                 /* woken after adding import to reset timer */
172                                 thread->t_flags &= ~SVC_EVENT;
173                         }
174                 }
175         }
176
177         thread->t_flags = SVC_STOPPED;
178         wake_up(&thread->t_ctl_waitq);
179
180         CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid);
181         return 0;
182 }
183
184 static struct ptlrpc_thread *pinger_thread = NULL;
185
186 int ptlrpc_start_pinger(void)
187 {
188         struct l_wait_info lwi = { 0 };
189         struct ptlrpc_svc_data d;
190         int rc;
191 #ifndef ENABLE_PINGER
192         return 0;
193 #endif
194         ENTRY;
195
196         if (pinger_thread != NULL)
197                 RETURN(-EALREADY);
198
199         OBD_ALLOC(pinger_thread, sizeof(*pinger_thread));
200         if (pinger_thread == NULL)
201                 RETURN(-ENOMEM);
202         init_waitqueue_head(&pinger_thread->t_ctl_waitq);
203
204         d.name = "ll_ping";
205         d.thread = pinger_thread;
206
207         /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
208          * just drop the VM and FILES in ptlrpc_daemonize() right away. */
209         rc = kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES);
210         if (rc < 0) {
211                 CERROR("cannot start thread: %d\n", rc);
212                 OBD_FREE(pinger_thread, sizeof(*pinger_thread));
213                 RETURN(rc);
214         }
215         l_wait_event(pinger_thread->t_ctl_waitq,
216                      pinger_thread->t_flags & SVC_RUNNING, &lwi);
217
218         RETURN(rc);
219 }
220
221 int ptlrpc_stop_pinger(void)
222 {
223         struct l_wait_info lwi = { 0 };
224         int rc = 0;
225 #ifndef ENABLE_PINGER
226         return 0;
227 #endif
228         ENTRY;
229
230         if (pinger_thread == NULL)
231                 RETURN(-EALREADY);
232         down(&pinger_sem);
233         pinger_thread->t_flags = SVC_STOPPING;
234         wake_up(&pinger_thread->t_ctl_waitq);
235         up(&pinger_sem);
236
237         l_wait_event(pinger_thread->t_ctl_waitq,
238                      (pinger_thread->t_flags & SVC_STOPPED), &lwi);
239
240         OBD_FREE(pinger_thread, sizeof(*pinger_thread));
241         pinger_thread = NULL;
242         RETURN(rc);
243 }
244
245 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
246 {
247         down(&pinger_sem);
248         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
249         up(&pinger_sem);
250 }
251
252 int ptlrpc_pinger_add_import(struct obd_import *imp)
253 {
254         ENTRY;
255         if (!list_empty(&imp->imp_pinger_chain))
256                 RETURN(-EALREADY);
257
258         down(&pinger_sem);
259         CDEBUG(D_HA, "adding pingable import %s->%s\n",
260                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
261         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
262         /* XXX sort, blah blah */
263         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
264         class_import_get(imp);
265
266         ptlrpc_pinger_wake_up();
267         up(&pinger_sem);
268
269         RETURN(0);
270 }
271
272 int ptlrpc_pinger_del_import(struct obd_import *imp)
273 {
274         ENTRY;
275         if (list_empty(&imp->imp_pinger_chain))
276                 RETURN(-ENOENT);
277
278         down(&pinger_sem);
279         list_del_init(&imp->imp_pinger_chain);
280         CDEBUG(D_HA, "removing pingable import %s->%s\n",
281                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
282         class_import_put(imp);
283         up(&pinger_sem);
284         RETURN(0);
285 }
286
287 void ptlrpc_pinger_wake_up()
288 {
289 #ifdef ENABLE_PINGER
290         pinger_thread->t_flags |= SVC_EVENT;
291         wake_up(&pinger_thread->t_ctl_waitq);
292 #endif
293 }
294
295 #else /* !__KERNEL__ */
296
297 /* XXX
298  * the current implementation of pinger in liblustre is not optimized
299  */
300
301 static struct pinger_data {
302         int             pd_recursion;
303         unsigned long   pd_this_ping;
304         unsigned long   pd_next_ping;
305         int             pd_force_check;
306 } pinger_args;
307
308 static int pinger_check_rpcs(void *arg)
309 {
310         unsigned long curtime = time(NULL);
311         struct list_head *iter;
312         struct pinger_data *pd = &pinger_args;
313
314         /* prevent recursion */
315         if (pd->pd_recursion++) {
316                 CDEBUG(D_HA, "pinger: recursion! quit\n");
317                 pd->pd_recursion--;
318                 return 0;
319         }
320
321         /* have we reached ping point? */
322         if (pd->pd_next_ping > curtime && !pd->pd_force_check) {
323                 pd->pd_recursion--;
324                 return 0;
325         }
326
327         if (pd->pd_force_check)
328                 pd->pd_force_check = 0;
329
330         pd->pd_this_ping = curtime;
331
332         /* add rpcs into set */
333         down(&pinger_sem);
334         list_for_each(iter, &pinger_imports) {
335                 struct obd_import *imp =
336                         list_entry(iter, struct obd_import,
337                                    imp_pinger_chain);
338                 int level, force;
339                 unsigned long flags;
340
341
342                 spin_lock_irqsave(&imp->imp_lock, flags);
343                 level = imp->imp_state;
344                 force = imp->imp_force_verify;
345                 if (force)
346                         imp->imp_force_verify = 0;
347                 spin_unlock_irqrestore(&imp->imp_lock, flags);
348
349                 if (imp->imp_next_ping <= pd->pd_this_ping || force) {
350                         if (level == LUSTRE_IMP_DISCON) {
351                                 /* wait at least a timeout before 
352                                    trying recovery again. */
353                                 unsigned long timeout = obd_timeout;
354                                 if (imp->imp_server_timeout)
355                                         timeout = obd_timeout / 2;
356                                 imp->imp_next_ping = time(NULL) + 
357                                         (timeout * HZ);
358                                 ptlrpc_initiate_recovery(imp);
359                         } 
360                         else if (level != LUSTRE_IMP_FULL ||
361                                  imp->imp_obd->obd_no_recov) {
362                                 CDEBUG(D_HA, 
363                                        "not pinging %s (in recovery "
364                                        " or recovery disabled: %s)\n",
365                                        imp->imp_target_uuid.uuid,
366                                        ptlrpc_import_state_name(level));
367                         } 
368                         else if (imp->imp_pingable || force) {
369                                 ptlrpc_ping(imp);
370                         }
371
372                 } else {
373                         if (imp->imp_pingable) {
374                                 CDEBUG(D_HA, "don't need to ping %s "
375                                        "(%lu > %lu)\n", 
376                                        imp->imp_target_uuid.uuid,
377                                        imp->imp_next_ping, pd->pd_this_ping);
378                         }
379                 }
380         }
381
382         up(&pinger_sem);
383
384         pd->pd_next_ping = pd->pd_this_ping + (obd_timeout * HZ);
385
386         CDEBUG(D_HA, "finished a round ping\n");
387         pd->pd_recursion--;
388         return 0;
389 }
390
391 static void *pinger_callback = NULL;
392
393 int ptlrpc_start_pinger(void)
394 {
395         memset(&pinger_args, 0, sizeof(pinger_args));
396 #ifdef ENABLE_PINGER
397         pinger_callback =
398                 liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args);
399 #endif
400         return 0;
401 }
402
403 int ptlrpc_stop_pinger(void)
404 {
405 #ifdef ENABLE_PINGER
406         if (pinger_callback)
407                 liblustre_deregister_wait_callback(pinger_callback);
408 #endif
409         return 0;
410 }
411
412 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
413 {
414         down(&pinger_sem);
415         imp->imp_next_ping = time(NULL) + obd_timeout;
416         if (pinger_args.pd_next_ping > imp->imp_next_ping) {
417                 CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
418                         imp->imp_next_ping, time(NULL));
419                 pinger_args.pd_next_ping = imp->imp_next_ping;
420         }
421         up(&pinger_sem);
422 }
423
424 int ptlrpc_pinger_add_import(struct obd_import *imp)
425 {
426         ENTRY;
427         if (!list_empty(&imp->imp_pinger_chain))
428                 RETURN(-EALREADY);
429
430         CDEBUG(D_HA, "adding pingable import %s->%s\n",
431                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
432         ptlrpc_pinger_sending_on_import(imp);
433
434         down(&pinger_sem);
435         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
436         class_import_get(imp);
437         up(&pinger_sem);
438
439         RETURN(0);
440 }
441
442 int ptlrpc_pinger_del_import(struct obd_import *imp)
443 {
444         ENTRY;
445         if (list_empty(&imp->imp_pinger_chain))
446                 RETURN(-ENOENT);
447
448         down(&pinger_sem);
449         list_del_init(&imp->imp_pinger_chain);
450         CDEBUG(D_HA, "removing pingable import %s->%s\n",
451                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
452         class_import_put(imp);
453         up(&pinger_sem);
454         RETURN(0);
455 }
456
457 void ptlrpc_pinger_wake_up()
458 {
459         pinger_args.pd_force_check = 1;
460 }
461 #endif /* !__KERNEL__ */