Whamcloud - gitweb
89b11911113a7a135466fac1af09cc8bb7e89cbb
[fs/lustre-release.git] / lustre / ptlrpc / pinger.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Portal-RPC reconnection and replay operations, for use in recovery.
5  *
6  *  Copyright (c) 2003 Cluster File Systems, Inc.
7  *   Authors: Phil Schwan <phil@clusterfs.com>
8  *            Mike Shaver <shaver@clusterfs.com>
9  *
10  *   This file is part of Lustre, http://www.lustre.org.
11  *
12  *   Lustre is free software; you can redistribute it and/or
13  *   modify it under the terms of version 2 of the GNU General Public
14  *   License as published by the Free Software Foundation.
15  *
16  *   Lustre is distributed in the hope that it will be useful,
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *   GNU General Public License for more details.
20  *
21  *   You should have received a copy of the GNU General Public License
22  *   along with Lustre; if not, write to the Free Software
23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25
26 #ifndef __KERNEL__
27 #include <liblustre.h>
28 #else
29 #include <linux/version.h>
30 #include <asm/semaphore.h>
31 #define DEBUG_SUBSYSTEM S_RPC
32 #endif
33
34 #include <linux/obd_support.h>
35 #include <linux/obd_class.h>
36 #include "ptlrpc_internal.h"
37
38 static DECLARE_MUTEX(pinger_sem);
39 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
40
41 int ptlrpc_ping(struct obd_import *imp) 
42 {
43         struct ptlrpc_request *req;
44         int rc = 0;
45         ENTRY;
46
47         req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, 0, NULL, NULL);
48         if (req) {
49                 DEBUG_REQ(D_HA, req, "pinging %s->%s",
50                           imp->imp_obd->obd_uuid.uuid,
51                           imp->imp_target_uuid.uuid);
52                 req->rq_no_resend = req->rq_no_delay = 1;
53                 req->rq_replen = lustre_msg_size(0, NULL);
54                 ptlrpcd_add_req(req);
55         } else {
56                 CERROR("OOM trying to ping %s->%s\n",
57                        imp->imp_obd->obd_uuid.uuid,
58                        imp->imp_target_uuid.uuid);
59                 rc = -ENOMEM;
60         }
61
62         RETURN(rc);
63 }
64
65 #ifdef __KERNEL__
66 int ptlrpc_next_ping(struct obd_import *imp)
67 {
68         if (imp->imp_server_timeout)
69                 return jiffies + (obd_timeout / 4 * HZ);
70         else
71                 return jiffies + (obd_timeout / 2 * HZ);
72 }
73
74 static int ptlrpc_pinger_main(void *arg)
75 {
76         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
77         struct ptlrpc_thread *thread = data->thread;
78         unsigned long flags;
79         ENTRY;
80
81         lock_kernel();
82         ptlrpc_daemonize();
83
84         SIGNAL_MASK_LOCK(current, flags);
85         sigfillset(&current->blocked);
86         RECALC_SIGPENDING;
87         SIGNAL_MASK_UNLOCK(current, flags);
88
89         LASSERTF(strlen(data->name) < sizeof(current->comm),
90                  "name %d > len %d\n",
91                  (int)strlen(data->name), (int)sizeof(current->comm));
92         THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name);
93         unlock_kernel();
94
95         /* Record that the thread is running */
96         thread->t_flags = SVC_RUNNING;
97         wake_up(&thread->t_ctl_waitq);
98
99         /* And now, loop forever, pinging as needed. */
100         while (1) {
101                 unsigned long this_ping = jiffies;
102                 long time_to_next_ping;
103                 struct l_wait_info lwi = LWI_TIMEOUT(obd_timeout * HZ,
104                                                      NULL, NULL);
105                 struct list_head *iter;
106
107                 time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies;
108                 down(&pinger_sem);
109                 list_for_each(iter, &pinger_imports) {
110                         struct obd_import *imp =
111                                 list_entry(iter, struct obd_import,
112                                            imp_pinger_chain);
113                         int force, level;
114                         unsigned long flags;
115
116
117                         spin_lock_irqsave(&imp->imp_lock, flags);
118                         level = imp->imp_state;
119                         force = imp->imp_force_verify;
120                         if (force)
121                                 imp->imp_force_verify = 0;
122                         spin_unlock_irqrestore(&imp->imp_lock, flags);
123
124                         if (imp->imp_next_ping <= this_ping || force) {
125                                 if (level == LUSTRE_IMP_DISCON &&
126                                     !imp->imp_deactive) {
127                                         /* wait at least a timeout before
128                                            trying recovery again. */
129                                         imp->imp_next_ping =
130                                                 ptlrpc_next_ping(imp);
131                                         ptlrpc_initiate_recovery(imp);
132                                 } else if (level != LUSTRE_IMP_FULL ||
133                                            imp->imp_obd->obd_no_recov) {
134                                         CDEBUG(D_HA, 
135                                                "not pinging %s (in recovery "
136                                                "or recovery disabled: %s)\n",
137                                                imp->imp_target_uuid.uuid,
138                                                ptlrpc_import_state_name(level));
139                                 } else if (imp->imp_pingable || force) {
140                                         ptlrpc_ping(imp);
141                                 }
142
143                         } else if (imp->imp_pingable) {
144                                 CDEBUG(D_HA, "don't need to ping %s "
145                                        "(%lu > %lu)\n",
146                                        imp->imp_target_uuid.uuid,
147                                        imp->imp_next_ping, this_ping);
148                         }
149                         CDEBUG(D_OTHER, "%s: pingable %d, next_ping %lu(%lu)\n",
150                                 imp->imp_target_uuid.uuid,
151                                 imp->imp_pingable, imp->imp_next_ping, jiffies);
152                         if (imp->imp_pingable && imp->imp_next_ping &&
153                             imp->imp_next_ping - jiffies < time_to_next_ping &&
154                             imp->imp_next_ping > jiffies)
155                                 time_to_next_ping = imp->imp_next_ping - jiffies;
156                 }
157                 up(&pinger_sem);
158
159                 /* Wait until the next ping time, or until we're stopped. */
160                 CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
161                        this_ping + (obd_timeout * HZ));
162                 if (time_to_next_ping > 0) {
163                         lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
164                         l_wait_event(thread->t_ctl_waitq,
165                                      thread->t_flags & (SVC_STOPPING|SVC_EVENT),
166                                      &lwi);
167                         if (thread->t_flags & SVC_STOPPING) {
168                                 thread->t_flags &= ~SVC_STOPPING;
169                                 EXIT;
170                                 break;
171                         } else if (thread->t_flags & SVC_EVENT) {
172                                 /* woken after adding import to reset timer */
173                                 thread->t_flags &= ~SVC_EVENT;
174                         }
175                 }
176         }
177
178         thread->t_flags = SVC_STOPPED;
179         wake_up(&thread->t_ctl_waitq);
180
181         CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid);
182         return 0;
183 }
184
185 static struct ptlrpc_thread *pinger_thread = NULL;
186
187 int ptlrpc_start_pinger(void)
188 {
189         struct l_wait_info lwi = { 0 };
190         struct ptlrpc_svc_data d;
191         int rc;
192 #ifndef ENABLE_PINGER
193         return 0;
194 #endif
195         ENTRY;
196
197         if (pinger_thread != NULL)
198                 RETURN(-EALREADY);
199
200         OBD_ALLOC(pinger_thread, sizeof(*pinger_thread));
201         if (pinger_thread == NULL)
202                 RETURN(-ENOMEM);
203         init_waitqueue_head(&pinger_thread->t_ctl_waitq);
204
205         d.name = "ll_ping";
206         d.thread = pinger_thread;
207
208         /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
209          * just drop the VM and FILES in ptlrpc_daemonize() right away. */
210         rc = kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES);
211         if (rc < 0) {
212                 CERROR("cannot start thread: %d\n", rc);
213                 OBD_FREE(pinger_thread, sizeof(*pinger_thread));
214                 RETURN(rc);
215         }
216         l_wait_event(pinger_thread->t_ctl_waitq,
217                      pinger_thread->t_flags & SVC_RUNNING, &lwi);
218
219         RETURN(rc);
220 }
221
222 int ptlrpc_stop_pinger(void)
223 {
224         struct l_wait_info lwi = { 0 };
225         int rc = 0;
226 #ifndef ENABLE_PINGER
227         return 0;
228 #endif
229         ENTRY;
230
231         if (pinger_thread == NULL)
232                 RETURN(-EALREADY);
233         down(&pinger_sem);
234         pinger_thread->t_flags = SVC_STOPPING;
235         wake_up(&pinger_thread->t_ctl_waitq);
236         up(&pinger_sem);
237
238         l_wait_event(pinger_thread->t_ctl_waitq,
239                      (pinger_thread->t_flags & SVC_STOPPED), &lwi);
240
241         OBD_FREE(pinger_thread, sizeof(*pinger_thread));
242         pinger_thread = NULL;
243         RETURN(rc);
244 }
245
246 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
247 {
248         down(&pinger_sem);
249         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
250         up(&pinger_sem);
251 }
252
253 int ptlrpc_pinger_add_import(struct obd_import *imp)
254 {
255         ENTRY;
256         if (!list_empty(&imp->imp_pinger_chain))
257                 RETURN(-EALREADY);
258
259         down(&pinger_sem);
260         CDEBUG(D_HA, "adding pingable import %s->%s\n",
261                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
262         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
263         /* XXX sort, blah blah */
264         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
265         class_import_get(imp);
266
267         ptlrpc_pinger_wake_up();
268         up(&pinger_sem);
269
270         RETURN(0);
271 }
272
273 int ptlrpc_pinger_del_import(struct obd_import *imp)
274 {
275         ENTRY;
276         if (list_empty(&imp->imp_pinger_chain))
277                 RETURN(-ENOENT);
278
279         down(&pinger_sem);
280         list_del_init(&imp->imp_pinger_chain);
281         CDEBUG(D_HA, "removing pingable import %s->%s\n",
282                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
283         class_import_put(imp);
284         up(&pinger_sem);
285         RETURN(0);
286 }
287
288 void ptlrpc_pinger_wake_up()
289 {
290 #ifdef ENABLE_PINGER
291         pinger_thread->t_flags |= SVC_EVENT;
292         wake_up(&pinger_thread->t_ctl_waitq);
293 #endif
294 }
295
296 #else /* !__KERNEL__ */
297
298 /* XXX
299  * the current implementation of pinger in liblustre is not optimized
300  */
301
302 static struct pinger_data {
303         int             pd_recursion;
304         unsigned long   pd_this_ping;
305         unsigned long   pd_next_ping;
306         int             pd_force_check;
307 } pinger_args;
308
309 static int pinger_check_rpcs(void *arg)
310 {
311         unsigned long curtime = time(NULL);
312         struct list_head *iter;
313         struct pinger_data *pd = &pinger_args;
314
315         /* prevent recursion */
316         if (pd->pd_recursion++) {
317                 CDEBUG(D_HA, "pinger: recursion! quit\n");
318                 pd->pd_recursion--;
319                 return 0;
320         }
321
322         /* have we reached ping point? */
323         if (pd->pd_next_ping > curtime && !pd->pd_force_check) {
324                 pd->pd_recursion--;
325                 return 0;
326         }
327
328         if (pd->pd_force_check)
329                 pd->pd_force_check = 0;
330
331         pd->pd_this_ping = curtime;
332
333         /* add rpcs into set */
334         down(&pinger_sem);
335         list_for_each(iter, &pinger_imports) {
336                 struct obd_import *imp =
337                         list_entry(iter, struct obd_import,
338                                    imp_pinger_chain);
339                 int level, force;
340                 unsigned long flags;
341
342
343                 spin_lock_irqsave(&imp->imp_lock, flags);
344                 level = imp->imp_state;
345                 force = imp->imp_force_verify;
346                 if (force)
347                         imp->imp_force_verify = 0;
348                 spin_unlock_irqrestore(&imp->imp_lock, flags);
349
350                 if (imp->imp_next_ping <= pd->pd_this_ping || force) {
351                         if (level == LUSTRE_IMP_DISCON) {
352                                 /* wait at least a timeout before 
353                                    trying recovery again. */
354                                 unsigned long timeout = obd_timeout;
355                                 if (imp->imp_server_timeout)
356                                         timeout = obd_timeout / 2;
357                                 imp->imp_next_ping = time(NULL) + 
358                                         (timeout * HZ);
359                                 ptlrpc_initiate_recovery(imp);
360                         } 
361                         else if (level != LUSTRE_IMP_FULL ||
362                                  imp->imp_obd->obd_no_recov) {
363                                 CDEBUG(D_HA, 
364                                        "not pinging %s (in recovery "
365                                        " or recovery disabled: %s)\n",
366                                        imp->imp_target_uuid.uuid,
367                                        ptlrpc_import_state_name(level));
368                         } 
369                         else if (imp->imp_pingable || force) {
370                                 ptlrpc_ping(imp);
371                         }
372
373                 } else {
374                         if (imp->imp_pingable) {
375                                 CDEBUG(D_HA, "don't need to ping %s "
376                                        "(%lu > %lu)\n", 
377                                        imp->imp_target_uuid.uuid,
378                                        imp->imp_next_ping, pd->pd_this_ping);
379                         }
380                 }
381         }
382
383         up(&pinger_sem);
384
385         pd->pd_next_ping = pd->pd_this_ping + (obd_timeout * HZ);
386
387         CDEBUG(D_HA, "finished a round ping\n");
388         pd->pd_recursion--;
389         return 0;
390 }
391
392 static void *pinger_callback = NULL;
393
394 int ptlrpc_start_pinger(void)
395 {
396         memset(&pinger_args, 0, sizeof(pinger_args));
397 #ifdef ENABLE_PINGER
398         pinger_callback =
399                 liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args);
400 #endif
401         return 0;
402 }
403
404 int ptlrpc_stop_pinger(void)
405 {
406 #ifdef ENABLE_PINGER
407         if (pinger_callback)
408                 liblustre_deregister_wait_callback(pinger_callback);
409 #endif
410         return 0;
411 }
412
413 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
414 {
415         down(&pinger_sem);
416         imp->imp_next_ping = time(NULL) + obd_timeout;
417         if (pinger_args.pd_next_ping > imp->imp_next_ping) {
418                 CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
419                         imp->imp_next_ping, time(NULL));
420                 pinger_args.pd_next_ping = imp->imp_next_ping;
421         }
422         up(&pinger_sem);
423 }
424
425 int ptlrpc_pinger_add_import(struct obd_import *imp)
426 {
427         ENTRY;
428         if (!list_empty(&imp->imp_pinger_chain))
429                 RETURN(-EALREADY);
430
431         CDEBUG(D_HA, "adding pingable import %s->%s\n",
432                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
433         ptlrpc_pinger_sending_on_import(imp);
434
435         down(&pinger_sem);
436         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
437         class_import_get(imp);
438         up(&pinger_sem);
439
440         RETURN(0);
441 }
442
443 int ptlrpc_pinger_del_import(struct obd_import *imp)
444 {
445         ENTRY;
446         if (list_empty(&imp->imp_pinger_chain))
447                 RETURN(-ENOENT);
448
449         down(&pinger_sem);
450         list_del_init(&imp->imp_pinger_chain);
451         CDEBUG(D_HA, "removing pingable import %s->%s\n",
452                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
453         class_import_put(imp);
454         up(&pinger_sem);
455         RETURN(0);
456 }
457
458 void ptlrpc_pinger_wake_up()
459 {
460         pinger_args.pd_force_check = 1;
461 }
462 #endif /* !__KERNEL__ */