Whamcloud - gitweb
- mds->lmv->mdc propagate lower timeout down to import
[fs/lustre-release.git] / lustre / ptlrpc / pinger.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Portal-RPC reconnection and replay operations, for use in recovery.
5  *
6  *  Copyright (c) 2003 Cluster File Systems, Inc.
7  *   Authors: Phil Schwan <phil@clusterfs.com>
8  *            Mike Shaver <shaver@clusterfs.com>
9  *
10  *   This file is part of Lustre, http://www.lustre.org.
11  *
12  *   Lustre is free software; you can redistribute it and/or
13  *   modify it under the terms of version 2 of the GNU General Public
14  *   License as published by the Free Software Foundation.
15  *
16  *   Lustre is distributed in the hope that it will be useful,
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *   GNU General Public License for more details.
20  *
21  *   You should have received a copy of the GNU General Public License
22  *   along with Lustre; if not, write to the Free Software
23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25
26 #ifndef __KERNEL__
27 #include <liblustre.h>
28 #else
29 #include <linux/version.h>
30 #include <asm/semaphore.h>
31 #define DEBUG_SUBSYSTEM S_RPC
32 #endif
33
34 #include <linux/obd_support.h>
35 #include <linux/obd_class.h>
36 #include "ptlrpc_internal.h"
37
38 static DECLARE_MUTEX(pinger_sem);
39 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
40
41 static struct ptlrpc_thread *pinger_thread = NULL;
42
43 int ptlrpc_ping(struct obd_import *imp) 
44 {
45         struct ptlrpc_request *req;
46         int rc = 0;
47         ENTRY;
48
49         req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
50                               NULL);
51         if (req) {
52                 DEBUG_REQ(D_HA, req, "pinging %s->%s",
53                           imp->imp_obd->obd_uuid.uuid,
54                           imp->imp_target_uuid.uuid);
55                 req->rq_no_resend = req->rq_no_delay = 1;
56                 req->rq_replen = lustre_msg_size(0, 
57                                                  NULL);
58                 ptlrpcd_add_req(req);
59         } else {
60                 CERROR("OOM trying to ping %s->%s\n",
61                           imp->imp_obd->obd_uuid.uuid,
62                           imp->imp_target_uuid.uuid);
63                 rc = -ENOMEM;
64         }
65
66         RETURN(rc);
67 }
68
69 #ifdef __KERNEL__
70 static int ptlrpc_pinger_main(void *arg)
71 {
72         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
73         struct ptlrpc_thread *thread = data->thread;
74         unsigned long flags;
75         ENTRY;
76
77         lock_kernel();
78         ptlrpc_daemonize();
79
80         SIGNAL_MASK_LOCK(current, flags);
81         sigfillset(&current->blocked);
82         RECALC_SIGPENDING;
83         SIGNAL_MASK_UNLOCK(current, flags);
84
85         THREAD_NAME(current->comm, "%s", data->name);
86         unlock_kernel();
87
88         /* Record that the thread is running */
89         thread->t_flags = SVC_RUNNING;
90         wake_up(&thread->t_ctl_waitq);
91
92         /* And now, loop forever, pinging as needed. */
93         while (1) {
94                 unsigned long this_ping = jiffies;
95                 long time_to_next_ping;
96                 struct l_wait_info lwi = LWI_TIMEOUT(obd_timeout * HZ,
97                                                      NULL, NULL);
98                 struct list_head *iter;
99
100                 down(&pinger_sem);
101                 list_for_each(iter, &pinger_imports) {
102                         struct obd_import *imp =
103                                 list_entry(iter, struct obd_import,
104                                            imp_pinger_chain);
105                         int force, level;
106                         unsigned long flags;
107
108
109                         spin_lock_irqsave(&imp->imp_lock, flags);
110                         level = imp->imp_state;
111                         force = imp->imp_force_verify;
112                         if (force)
113                                 imp->imp_force_verify = 0;
114                         spin_unlock_irqrestore(&imp->imp_lock, flags);
115
116                         if (imp->imp_next_ping <= this_ping || force) {
117                                 if (level == LUSTRE_IMP_DISCON) {
118                                         /* wait at least a timeout before 
119                                            trying recovery again. */
120                                         imp->imp_next_ping = jiffies + 
121                                                 (obd_timeout * HZ);
122                                         ptlrpc_initiate_recovery(imp);
123                                 } 
124                                 else if (level != LUSTRE_IMP_FULL ||
125                                          imp->imp_obd->obd_no_recov) {
126                                         CDEBUG(D_HA, 
127                                                "not pinging %s (in recovery "
128                                                " or recovery disabled: %s)\n",
129                                                imp->imp_target_uuid.uuid,
130                                                ptlrpc_import_state_name(level));
131                                 } 
132                                 else if (imp->imp_pingable || force) {
133                                         ptlrpc_ping(imp);
134                                 }
135
136                         } else {
137                                 if (imp->imp_pingable)
138                                         CDEBUG(D_HA, "don't need to ping %s "
139                                                "(%lu > %lu)\n", 
140                                                imp->imp_target_uuid.uuid,
141                                                imp->imp_next_ping, this_ping);
142                         }
143                 }
144                 up(&pinger_sem);
145
146                 /* Wait until the next ping time, or until we're stopped. */
147                 time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies;
148                 CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
149                        this_ping + (obd_timeout * HZ));
150                 if (time_to_next_ping > 0) {
151                         lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
152                         l_wait_event(thread->t_ctl_waitq,
153                                      thread->t_flags & (SVC_STOPPING|SVC_EVENT),
154                                      &lwi);
155                         if (thread->t_flags & SVC_STOPPING) {
156                                 thread->t_flags &= ~SVC_STOPPING;
157                                 EXIT;
158                                 break;
159                         } else if (thread->t_flags & SVC_EVENT) {
160                                 /* woken after adding import to reset timer */
161                                 thread->t_flags &= ~SVC_EVENT;
162                         }
163                 }
164         }
165
166         thread->t_flags = SVC_STOPPED;
167         wake_up(&thread->t_ctl_waitq);
168
169         CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid);
170         return 0;
171 }
172
173 int ptlrpc_start_pinger(void)
174 {
175         struct l_wait_info lwi = { 0 };
176         struct ptlrpc_svc_data d;
177         int rc;
178 #ifndef ENABLE_PINGER
179         return 0;
180 #endif
181         ENTRY;
182
183         if (pinger_thread != NULL)
184                 RETURN(-EALREADY);
185
186         OBD_ALLOC(pinger_thread, sizeof(*pinger_thread));
187         if (pinger_thread == NULL)
188                 RETURN(-ENOMEM);
189         init_waitqueue_head(&pinger_thread->t_ctl_waitq);
190
191         d.name = "ll_ping";
192         d.thread = pinger_thread;
193
194         /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
195          * just drop the VM and FILES in ptlrpc_daemonize() right away. */
196         rc = kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES);
197         if (rc < 0) {
198                 CERROR("cannot start thread: %d\n", rc);
199                 OBD_FREE(pinger_thread, sizeof(*pinger_thread));
200                 RETURN(rc);
201         }
202         l_wait_event(pinger_thread->t_ctl_waitq,
203                      pinger_thread->t_flags & SVC_RUNNING, &lwi);
204
205         RETURN(rc);
206 }
207
208 int ptlrpc_stop_pinger(void)
209 {
210         struct l_wait_info lwi = { 0 };
211         int rc = 0;
212 #ifndef ENABLE_PINGER
213         return 0;
214 #endif
215         ENTRY;
216
217         if (pinger_thread == NULL)
218                 RETURN(-EALREADY);
219         down(&pinger_sem);
220         pinger_thread->t_flags = SVC_STOPPING;
221         wake_up(&pinger_thread->t_ctl_waitq);
222         up(&pinger_sem);
223
224         l_wait_event(pinger_thread->t_ctl_waitq,
225                      (pinger_thread->t_flags & SVC_STOPPED), &lwi);
226
227         OBD_FREE(pinger_thread, sizeof(*pinger_thread));
228         pinger_thread = NULL;
229         RETURN(rc);
230 }
231
232 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
233 {
234         down(&pinger_sem);
235         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
236         up(&pinger_sem);
237 }
238
239 int ptlrpc_pinger_add_import(struct obd_import *imp)
240 {
241         ENTRY;
242         if (!list_empty(&imp->imp_pinger_chain))
243                 RETURN(-EALREADY);
244
245         down(&pinger_sem);
246         CDEBUG(D_HA, "adding pingable import %s->%s\n",
247                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
248         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
249         /* XXX sort, blah blah */
250         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
251         class_import_get(imp);
252
253         ptlrpc_pinger_wake_up();
254         up(&pinger_sem);
255
256         RETURN(0);
257 }
258
259 int ptlrpc_pinger_del_import(struct obd_import *imp)
260 {
261         ENTRY;
262         if (list_empty(&imp->imp_pinger_chain))
263                 RETURN(-ENOENT);
264
265         down(&pinger_sem);
266         list_del_init(&imp->imp_pinger_chain);
267         CDEBUG(D_HA, "removing pingable import %s->%s\n",
268                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
269         class_import_put(imp);
270         up(&pinger_sem);
271         RETURN(0);
272 }
273
274 void ptlrpc_pinger_wake_up()
275 {
276 #ifdef ENABLE_PINGER
277         pinger_thread->t_flags |= SVC_EVENT;
278         wake_up(&pinger_thread->t_ctl_waitq);
279 #endif
280 }
281
282 #else /* !__KERNEL__ */
283
284 /* XXX
285  * the current implementation of pinger in liblustre is not optimized
286  */
287
288 static struct pinger_data {
289         int             pd_recursion;
290         unsigned long   pd_this_ping;
291         unsigned long   pd_next_ping;
292         int             pd_force_check;
293 } pinger_args;
294
295 static int pinger_check_rpcs(void *arg)
296 {
297         unsigned long curtime = time(NULL);
298         struct list_head *iter;
299         struct pinger_data *pd = &pinger_args;
300
301         /* prevent recursion */
302         if (pd->pd_recursion++) {
303                 CDEBUG(D_HA, "pinger: recursion! quit\n");
304                 pd->pd_recursion--;
305                 return 0;
306         }
307
308         /* have we reached ping point? */
309         if (pd->pd_next_ping > curtime && !pd->pd_force_check) {
310                 pd->pd_recursion--;
311                 return 0;
312         }
313
314         if (pd->pd_force_check)
315                 pd->pd_force_check = 0;
316
317         pd->pd_this_ping = curtime;
318
319         /* add rpcs into set */
320         down(&pinger_sem);
321         list_for_each(iter, &pinger_imports) {
322                 struct obd_import *imp =
323                         list_entry(iter, struct obd_import,
324                                    imp_pinger_chain);
325                 int level, force;
326                 unsigned long flags;
327
328
329                 spin_lock_irqsave(&imp->imp_lock, flags);
330                 level = imp->imp_state;
331                 force = imp->imp_force_verify;
332                 if (force)
333                         imp->imp_force_verify = 0;
334                 spin_unlock_irqrestore(&imp->imp_lock, flags);
335
336                 if (imp->imp_next_ping <= pd->pd_this_ping || force) {
337                         if (level == LUSTRE_IMP_DISCON) {
338                                 /* wait at least a timeout before 
339                                    trying recovery again. */
340                                 unsigned long timeout = obd_timeout;
341                                 if (imp->imp_server_timeout)
342                                         timeout = obd_timeout / 2;
343                                 imp->imp_next_ping = time(NULL) + 
344                                         (timeout * HZ);
345                                 ptlrpc_initiate_recovery(imp);
346                         } 
347                         else if (level != LUSTRE_IMP_FULL ||
348                                  imp->imp_obd->obd_no_recov) {
349                                 CDEBUG(D_HA, 
350                                        "not pinging %s (in recovery "
351                                        " or recovery disabled: %s)\n",
352                                        imp->imp_target_uuid.uuid,
353                                        ptlrpc_import_state_name(level));
354                         } 
355                         else if (imp->imp_pingable || force) {
356                                 ptlrpc_ping(imp);
357                         }
358
359                 } else {
360                         if (imp->imp_pingable) {
361                                 CDEBUG(D_HA, "don't need to ping %s "
362                                        "(%lu > %lu)\n", 
363                                        imp->imp_target_uuid.uuid,
364                                        imp->imp_next_ping, pd->pd_this_ping);
365                         }
366                 }
367         }
368
369         up(&pinger_sem);
370
371         pd->pd_next_ping = pd->pd_this_ping + (obd_timeout * HZ);
372
373         CDEBUG(D_HA, "finished a round ping\n");
374         pd->pd_recursion--;
375         return 0;
376 }
377
378 static void *pinger_callback = NULL;
379
380 int ptlrpc_start_pinger(void)
381 {
382         memset(&pinger_args, 0, sizeof(pinger_args));
383 #ifdef ENABLE_PINGER
384         pinger_callback =
385                 liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args);
386 #endif
387         return 0;
388 }
389
390 int ptlrpc_stop_pinger(void)
391 {
392 #ifdef ENABLE_PINGER
393         if (pinger_callback)
394                 liblustre_deregister_wait_callback(pinger_callback);
395 #endif
396         return 0;
397 }
398
399 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
400 {
401         down(&pinger_sem);
402         imp->imp_next_ping = time(NULL) + obd_timeout;
403         if (pinger_args.pd_next_ping > imp->imp_next_ping) {
404                 CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
405                         imp->imp_next_ping, time(NULL));
406                 pinger_args.pd_next_ping = imp->imp_next_ping;
407         }
408         up(&pinger_sem);
409 }
410
411 int ptlrpc_pinger_add_import(struct obd_import *imp)
412 {
413         ENTRY;
414         if (!list_empty(&imp->imp_pinger_chain))
415                 RETURN(-EALREADY);
416
417         CDEBUG(D_HA, "adding pingable import %s->%s\n",
418                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
419         ptlrpc_pinger_sending_on_import(imp);
420
421         down(&pinger_sem);
422         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
423         class_import_get(imp);
424         up(&pinger_sem);
425
426         RETURN(0);
427 }
428
429 int ptlrpc_pinger_del_import(struct obd_import *imp)
430 {
431         ENTRY;
432         if (list_empty(&imp->imp_pinger_chain))
433                 RETURN(-ENOENT);
434
435         down(&pinger_sem);
436         list_del_init(&imp->imp_pinger_chain);
437         CDEBUG(D_HA, "removing pingable import %s->%s\n",
438                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
439         class_import_put(imp);
440         up(&pinger_sem);
441         RETURN(0);
442 }
443
444 void ptlrpc_pinger_wake_up()
445 {
446         pinger_args.pd_force_check = 1;
447 }
448 #endif /* !__KERNEL__ */