Whamcloud - gitweb
- import tracks when connection procedure started. if connection error happens
[fs/lustre-release.git] / lustre / ptlrpc / pinger.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Portal-RPC reconnection and replay operations, for use in recovery.
5  *
6  *  Copyright (c) 2003 Cluster File Systems, Inc.
7  *   Authors: Phil Schwan <phil@clusterfs.com>
8  *            Mike Shaver <shaver@clusterfs.com>
9  *
10  *   This file is part of Lustre, http://www.lustre.org.
11  *
12  *   Lustre is free software; you can redistribute it and/or
13  *   modify it under the terms of version 2 of the GNU General Public
14  *   License as published by the Free Software Foundation.
15  *
16  *   Lustre is distributed in the hope that it will be useful,
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *   GNU General Public License for more details.
20  *
21  *   You should have received a copy of the GNU General Public License
22  *   along with Lustre; if not, write to the Free Software
23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25
26 #ifndef __KERNEL__
27 #include <liblustre.h>
28 #else
29 #include <linux/version.h>
30 #include <asm/semaphore.h>
31 #define DEBUG_SUBSYSTEM S_RPC
32 #endif
33
34 #include <linux/obd_support.h>
35 #include <linux/obd_class.h>
36 #include "ptlrpc_internal.h"
37
38 static DECLARE_MUTEX(pinger_sem);
39 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
40
41 int ptlrpc_ping(struct obd_import *imp) 
42 {
43         struct ptlrpc_request *req;
44         int rc = 0;
45         ENTRY;
46
47         req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
48                               NULL);
49         if (req) {
50                 DEBUG_REQ(D_HA, req, "pinging %s->%s",
51                           imp->imp_obd->obd_uuid.uuid,
52                           imp->imp_target_uuid.uuid);
53                 req->rq_no_resend = req->rq_no_delay = 1;
54                 req->rq_replen = lustre_msg_size(0, 
55                                                  NULL);
56                 ptlrpcd_add_req(req);
57         } else {
58                 CERROR("OOM trying to ping %s->%s\n",
59                           imp->imp_obd->obd_uuid.uuid,
60                           imp->imp_target_uuid.uuid);
61                 rc = -ENOMEM;
62         }
63
64         RETURN(rc);
65 }
66
67 #ifdef __KERNEL__
68 int ptlrpc_next_ping(struct obd_import *imp)
69 {
70         if (imp->imp_server_timeout)
71                 return jiffies + (obd_timeout / 4 * HZ);
72         else
73                 return jiffies + (obd_timeout / 2 * HZ);
74 }
75
76 static int ptlrpc_pinger_main(void *arg)
77 {
78         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
79         struct ptlrpc_thread *thread = data->thread;
80         unsigned long flags;
81         ENTRY;
82
83         lock_kernel();
84         ptlrpc_daemonize();
85
86         SIGNAL_MASK_LOCK(current, flags);
87         sigfillset(&current->blocked);
88         RECALC_SIGPENDING;
89         SIGNAL_MASK_UNLOCK(current, flags);
90
91         LASSERTF(strlen(data->name) < sizeof(current->comm),
92                  "name %d > len %d\n",
93                  (int)strlen(data->name), (int)sizeof(current->comm));
94         THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name);
95         unlock_kernel();
96
97         /* Record that the thread is running */
98         thread->t_flags = SVC_RUNNING;
99         wake_up(&thread->t_ctl_waitq);
100
101         /* And now, loop forever, pinging as needed. */
102         while (1) {
103                 unsigned long this_ping = jiffies;
104                 long time_to_next_ping;
105                 struct l_wait_info lwi = LWI_TIMEOUT(obd_timeout * HZ,
106                                                      NULL, NULL);
107                 struct list_head *iter;
108
109                 time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies;
110                 down(&pinger_sem);
111                 list_for_each(iter, &pinger_imports) {
112                         struct obd_import *imp =
113                                 list_entry(iter, struct obd_import,
114                                            imp_pinger_chain);
115                         int force, level;
116                         unsigned long flags;
117
118
119                         spin_lock_irqsave(&imp->imp_lock, flags);
120                         level = imp->imp_state;
121                         force = imp->imp_force_verify;
122                         if (force)
123                                 imp->imp_force_verify = 0;
124                         spin_unlock_irqrestore(&imp->imp_lock, flags);
125
126                         if (imp->imp_next_ping <= this_ping || force) {
127                                 if (level == LUSTRE_IMP_DISCON) {
128                                         /* wait at least a timeout before 
129                                            trying recovery again. */
130                                         imp->imp_next_ping =
131                                                 ptlrpc_next_ping(imp);
132                                         ptlrpc_initiate_recovery(imp);
133                                 } else if (level != LUSTRE_IMP_FULL ||
134                                            imp->imp_obd->obd_no_recov) {
135                                         CDEBUG(D_HA, 
136                                                "not pinging %s (in recovery "
137                                                " or recovery disabled: %s)\n",
138                                                imp->imp_target_uuid.uuid,
139                                                ptlrpc_import_state_name(level));
140                                 } else if (imp->imp_pingable || force) {
141                                         ptlrpc_ping(imp);
142                                 }
143
144                         } else if (imp->imp_pingable) {
145                                 CDEBUG(D_HA, "don't need to ping %s "
146                                        "(%lu > %lu)\n",
147                                        imp->imp_target_uuid.uuid,
148                                        imp->imp_next_ping, this_ping);
149                         }
150                         CDEBUG(D_OTHER, "%s: pingable %d, next_ping %lu(%lu)\n",
151                                 imp->imp_target_uuid.uuid,
152                                 imp->imp_pingable, imp->imp_next_ping, jiffies);
153                         if (imp->imp_pingable && imp->imp_next_ping &&
154                             imp->imp_next_ping - jiffies < time_to_next_ping &&
155                             imp->imp_next_ping > jiffies)
156                                 time_to_next_ping = imp->imp_next_ping - jiffies;
157                 }
158                 up(&pinger_sem);
159
160                 /* Wait until the next ping time, or until we're stopped. */
161                 CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
162                        this_ping + (obd_timeout * HZ));
163                 if (time_to_next_ping > 0) {
164                         lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
165                         l_wait_event(thread->t_ctl_waitq,
166                                      thread->t_flags & (SVC_STOPPING|SVC_EVENT),
167                                      &lwi);
168                         if (thread->t_flags & SVC_STOPPING) {
169                                 thread->t_flags &= ~SVC_STOPPING;
170                                 EXIT;
171                                 break;
172                         } else if (thread->t_flags & SVC_EVENT) {
173                                 /* woken after adding import to reset timer */
174                                 thread->t_flags &= ~SVC_EVENT;
175                         }
176                 }
177         }
178
179         thread->t_flags = SVC_STOPPED;
180         wake_up(&thread->t_ctl_waitq);
181
182         CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid);
183         return 0;
184 }
185
186 static struct ptlrpc_thread *pinger_thread = NULL;
187
188 int ptlrpc_start_pinger(void)
189 {
190         struct l_wait_info lwi = { 0 };
191         struct ptlrpc_svc_data d;
192         int rc;
193 #ifndef ENABLE_PINGER
194         return 0;
195 #endif
196         ENTRY;
197
198         if (pinger_thread != NULL)
199                 RETURN(-EALREADY);
200
201         OBD_ALLOC(pinger_thread, sizeof(*pinger_thread));
202         if (pinger_thread == NULL)
203                 RETURN(-ENOMEM);
204         init_waitqueue_head(&pinger_thread->t_ctl_waitq);
205
206         d.name = "ll_ping";
207         d.thread = pinger_thread;
208
209         /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
210          * just drop the VM and FILES in ptlrpc_daemonize() right away. */
211         rc = kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES);
212         if (rc < 0) {
213                 CERROR("cannot start thread: %d\n", rc);
214                 OBD_FREE(pinger_thread, sizeof(*pinger_thread));
215                 RETURN(rc);
216         }
217         l_wait_event(pinger_thread->t_ctl_waitq,
218                      pinger_thread->t_flags & SVC_RUNNING, &lwi);
219
220         RETURN(rc);
221 }
222
223 int ptlrpc_stop_pinger(void)
224 {
225         struct l_wait_info lwi = { 0 };
226         int rc = 0;
227 #ifndef ENABLE_PINGER
228         return 0;
229 #endif
230         ENTRY;
231
232         if (pinger_thread == NULL)
233                 RETURN(-EALREADY);
234         down(&pinger_sem);
235         pinger_thread->t_flags = SVC_STOPPING;
236         wake_up(&pinger_thread->t_ctl_waitq);
237         up(&pinger_sem);
238
239         l_wait_event(pinger_thread->t_ctl_waitq,
240                      (pinger_thread->t_flags & SVC_STOPPED), &lwi);
241
242         OBD_FREE(pinger_thread, sizeof(*pinger_thread));
243         pinger_thread = NULL;
244         RETURN(rc);
245 }
246
247 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
248 {
249         down(&pinger_sem);
250         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
251         up(&pinger_sem);
252 }
253
254 int ptlrpc_pinger_add_import(struct obd_import *imp)
255 {
256         ENTRY;
257         if (!list_empty(&imp->imp_pinger_chain))
258                 RETURN(-EALREADY);
259
260         down(&pinger_sem);
261         CDEBUG(D_HA, "adding pingable import %s->%s\n",
262                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
263         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
264         /* XXX sort, blah blah */
265         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
266         class_import_get(imp);
267
268         ptlrpc_pinger_wake_up();
269         up(&pinger_sem);
270
271         RETURN(0);
272 }
273
274 int ptlrpc_pinger_del_import(struct obd_import *imp)
275 {
276         ENTRY;
277         if (list_empty(&imp->imp_pinger_chain))
278                 RETURN(-ENOENT);
279
280         down(&pinger_sem);
281         list_del_init(&imp->imp_pinger_chain);
282         CDEBUG(D_HA, "removing pingable import %s->%s\n",
283                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
284         class_import_put(imp);
285         up(&pinger_sem);
286         RETURN(0);
287 }
288
289 void ptlrpc_pinger_wake_up()
290 {
291 #ifdef ENABLE_PINGER
292         pinger_thread->t_flags |= SVC_EVENT;
293         wake_up(&pinger_thread->t_ctl_waitq);
294 #endif
295 }
296
297 #else /* !__KERNEL__ */
298
299 /* XXX
300  * the current implementation of pinger in liblustre is not optimized
301  */
302
303 static struct pinger_data {
304         int             pd_recursion;
305         unsigned long   pd_this_ping;
306         unsigned long   pd_next_ping;
307         int             pd_force_check;
308 } pinger_args;
309
310 static int pinger_check_rpcs(void *arg)
311 {
312         unsigned long curtime = time(NULL);
313         struct list_head *iter;
314         struct pinger_data *pd = &pinger_args;
315
316         /* prevent recursion */
317         if (pd->pd_recursion++) {
318                 CDEBUG(D_HA, "pinger: recursion! quit\n");
319                 pd->pd_recursion--;
320                 return 0;
321         }
322
323         /* have we reached ping point? */
324         if (pd->pd_next_ping > curtime && !pd->pd_force_check) {
325                 pd->pd_recursion--;
326                 return 0;
327         }
328
329         if (pd->pd_force_check)
330                 pd->pd_force_check = 0;
331
332         pd->pd_this_ping = curtime;
333
334         /* add rpcs into set */
335         down(&pinger_sem);
336         list_for_each(iter, &pinger_imports) {
337                 struct obd_import *imp =
338                         list_entry(iter, struct obd_import,
339                                    imp_pinger_chain);
340                 int level, force;
341                 unsigned long flags;
342
343
344                 spin_lock_irqsave(&imp->imp_lock, flags);
345                 level = imp->imp_state;
346                 force = imp->imp_force_verify;
347                 if (force)
348                         imp->imp_force_verify = 0;
349                 spin_unlock_irqrestore(&imp->imp_lock, flags);
350
351                 if (imp->imp_next_ping <= pd->pd_this_ping || force) {
352                         if (level == LUSTRE_IMP_DISCON) {
353                                 /* wait at least a timeout before 
354                                    trying recovery again. */
355                                 unsigned long timeout = obd_timeout;
356                                 if (imp->imp_server_timeout)
357                                         timeout = obd_timeout / 2;
358                                 imp->imp_next_ping = time(NULL) + 
359                                         (timeout * HZ);
360                                 ptlrpc_initiate_recovery(imp);
361                         } 
362                         else if (level != LUSTRE_IMP_FULL ||
363                                  imp->imp_obd->obd_no_recov) {
364                                 CDEBUG(D_HA, 
365                                        "not pinging %s (in recovery "
366                                        " or recovery disabled: %s)\n",
367                                        imp->imp_target_uuid.uuid,
368                                        ptlrpc_import_state_name(level));
369                         } 
370                         else if (imp->imp_pingable || force) {
371                                 ptlrpc_ping(imp);
372                         }
373
374                 } else {
375                         if (imp->imp_pingable) {
376                                 CDEBUG(D_HA, "don't need to ping %s "
377                                        "(%lu > %lu)\n", 
378                                        imp->imp_target_uuid.uuid,
379                                        imp->imp_next_ping, pd->pd_this_ping);
380                         }
381                 }
382         }
383
384         up(&pinger_sem);
385
386         pd->pd_next_ping = pd->pd_this_ping + (obd_timeout * HZ);
387
388         CDEBUG(D_HA, "finished a round ping\n");
389         pd->pd_recursion--;
390         return 0;
391 }
392
393 static void *pinger_callback = NULL;
394
395 int ptlrpc_start_pinger(void)
396 {
397         memset(&pinger_args, 0, sizeof(pinger_args));
398 #ifdef ENABLE_PINGER
399         pinger_callback =
400                 liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args);
401 #endif
402         return 0;
403 }
404
405 int ptlrpc_stop_pinger(void)
406 {
407 #ifdef ENABLE_PINGER
408         if (pinger_callback)
409                 liblustre_deregister_wait_callback(pinger_callback);
410 #endif
411         return 0;
412 }
413
414 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
415 {
416         down(&pinger_sem);
417         imp->imp_next_ping = time(NULL) + obd_timeout;
418         if (pinger_args.pd_next_ping > imp->imp_next_ping) {
419                 CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
420                         imp->imp_next_ping, time(NULL));
421                 pinger_args.pd_next_ping = imp->imp_next_ping;
422         }
423         up(&pinger_sem);
424 }
425
426 int ptlrpc_pinger_add_import(struct obd_import *imp)
427 {
428         ENTRY;
429         if (!list_empty(&imp->imp_pinger_chain))
430                 RETURN(-EALREADY);
431
432         CDEBUG(D_HA, "adding pingable import %s->%s\n",
433                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
434         ptlrpc_pinger_sending_on_import(imp);
435
436         down(&pinger_sem);
437         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
438         class_import_get(imp);
439         up(&pinger_sem);
440
441         RETURN(0);
442 }
443
444 int ptlrpc_pinger_del_import(struct obd_import *imp)
445 {
446         ENTRY;
447         if (list_empty(&imp->imp_pinger_chain))
448                 RETURN(-ENOENT);
449
450         down(&pinger_sem);
451         list_del_init(&imp->imp_pinger_chain);
452         CDEBUG(D_HA, "removing pingable import %s->%s\n",
453                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
454         class_import_put(imp);
455         up(&pinger_sem);
456         RETURN(0);
457 }
458
459 void ptlrpc_pinger_wake_up()
460 {
461         pinger_args.pd_force_check = 1;
462 }
463 #endif /* !__KERNEL__ */