Whamcloud - gitweb
landing b_cmobd_merge on HEAD
[fs/lustre-release.git] / lustre / ptlrpc / pinger.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Portal-RPC reconnection and replay operations, for use in recovery.
5  *
6  *  Copyright (c) 2003 Cluster File Systems, Inc.
7  *   Authors: Phil Schwan <phil@clusterfs.com>
8  *            Mike Shaver <shaver@clusterfs.com>
9  *
10  *   This file is part of Lustre, http://www.lustre.org.
11  *
12  *   Lustre is free software; you can redistribute it and/or
13  *   modify it under the terms of version 2 of the GNU General Public
14  *   License as published by the Free Software Foundation.
15  *
16  *   Lustre is distributed in the hope that it will be useful,
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *   GNU General Public License for more details.
20  *
21  *   You should have received a copy of the GNU General Public License
22  *   along with Lustre; if not, write to the Free Software
23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25
26 #ifndef __KERNEL__
27 #include <liblustre.h>
28 #else
29 #include <linux/version.h>
30 #include <asm/semaphore.h>
31 #define DEBUG_SUBSYSTEM S_RPC
32 #endif
33
34 #include <linux/obd_support.h>
35 #include <linux/obd_class.h>
36 #include "ptlrpc_internal.h"
37
38 static DECLARE_MUTEX(pinger_sem);
39 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
40
41 int ptlrpc_ping(struct obd_import *imp) 
42 {
43         struct ptlrpc_request *req;
44         int rc = 0;
45         ENTRY;
46
47         req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
48                               NULL);
49         if (req) {
50                 DEBUG_REQ(D_HA, req, "pinging %s->%s",
51                           imp->imp_obd->obd_uuid.uuid,
52                           imp->imp_target_uuid.uuid);
53                 req->rq_no_resend = req->rq_no_delay = 1;
54                 req->rq_replen = lustre_msg_size(0, 
55                                                  NULL);
56                 ptlrpcd_add_req(req);
57         } else {
58                 CERROR("OOM trying to ping %s->%s\n",
59                           imp->imp_obd->obd_uuid.uuid,
60                           imp->imp_target_uuid.uuid);
61                 rc = -ENOMEM;
62         }
63
64         RETURN(rc);
65 }
66
67 #ifdef __KERNEL__
68 static int ptlrpc_pinger_main(void *arg)
69 {
70         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
71         struct ptlrpc_thread *thread = data->thread;
72         unsigned long flags;
73         ENTRY;
74
75         lock_kernel();
76         ptlrpc_daemonize();
77
78         SIGNAL_MASK_LOCK(current, flags);
79         sigfillset(&current->blocked);
80         RECALC_SIGPENDING;
81         SIGNAL_MASK_UNLOCK(current, flags);
82
83         LASSERTF(strlen(data->name) < sizeof(current->comm),
84                  "name %d > len %d\n",
85                  (int)strlen(data->name), (int)sizeof(current->comm));
86         THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name);
87         unlock_kernel();
88
89         /* Record that the thread is running */
90         thread->t_flags = SVC_RUNNING;
91         wake_up(&thread->t_ctl_waitq);
92
93         /* And now, loop forever, pinging as needed. */
94         while (1) {
95                 unsigned long this_ping = jiffies;
96                 long time_to_next_ping;
97                 struct l_wait_info lwi = LWI_TIMEOUT(obd_timeout * HZ,
98                                                      NULL, NULL);
99                 struct list_head *iter;
100
101                 down(&pinger_sem);
102                 list_for_each(iter, &pinger_imports) {
103                         struct obd_import *imp =
104                                 list_entry(iter, struct obd_import,
105                                            imp_pinger_chain);
106                         int force, level;
107                         unsigned long flags;
108
109
110                         spin_lock_irqsave(&imp->imp_lock, flags);
111                         level = imp->imp_state;
112                         force = imp->imp_force_verify;
113                         if (force)
114                                 imp->imp_force_verify = 0;
115                         spin_unlock_irqrestore(&imp->imp_lock, flags);
116
117                         if (imp->imp_next_ping <= this_ping || force) {
118                                 if (level == LUSTRE_IMP_DISCON) {
119                                         /* wait at least a timeout before 
120                                            trying recovery again. */
121                                         imp->imp_next_ping = jiffies + 
122                                                 (obd_timeout * HZ);
123                                         ptlrpc_initiate_recovery(imp);
124                                 } 
125                                 else if (level != LUSTRE_IMP_FULL ||
126                                          imp->imp_obd->obd_no_recov) {
127                                         CDEBUG(D_HA, 
128                                                "not pinging %s (in recovery "
129                                                " or recovery disabled: %s)\n",
130                                                imp->imp_target_uuid.uuid,
131                                                ptlrpc_import_state_name(level));
132                                 } 
133                                 else if (imp->imp_pingable || force) {
134                                         ptlrpc_ping(imp);
135                                 }
136
137                         } else {
138                                 if (imp->imp_pingable)
139                                         CDEBUG(D_HA, "don't need to ping %s "
140                                                "(%lu > %lu)\n", 
141                                                imp->imp_target_uuid.uuid,
142                                                imp->imp_next_ping, this_ping);
143                         }
144                 }
145                 up(&pinger_sem);
146
147                 /* Wait until the next ping time, or until we're stopped. */
148                 time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies;
149                 CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
150                        this_ping + (obd_timeout * HZ));
151                 if (time_to_next_ping > 0) {
152                         lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
153                         l_wait_event(thread->t_ctl_waitq,
154                                      thread->t_flags & (SVC_STOPPING|SVC_EVENT),
155                                      &lwi);
156                         if (thread->t_flags & SVC_STOPPING) {
157                                 thread->t_flags &= ~SVC_STOPPING;
158                                 EXIT;
159                                 break;
160                         } else if (thread->t_flags & SVC_EVENT) {
161                                 /* woken after adding import to reset timer */
162                                 thread->t_flags &= ~SVC_EVENT;
163                         }
164                 }
165         }
166
167         thread->t_flags = SVC_STOPPED;
168         wake_up(&thread->t_ctl_waitq);
169
170         CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid);
171         return 0;
172 }
173
174 static struct ptlrpc_thread *pinger_thread = NULL;
175
176 int ptlrpc_start_pinger(void)
177 {
178         struct l_wait_info lwi = { 0 };
179         struct ptlrpc_svc_data d;
180         int rc;
181 #ifndef ENABLE_PINGER
182         return 0;
183 #endif
184         ENTRY;
185
186         if (pinger_thread != NULL)
187                 RETURN(-EALREADY);
188
189         OBD_ALLOC(pinger_thread, sizeof(*pinger_thread));
190         if (pinger_thread == NULL)
191                 RETURN(-ENOMEM);
192         init_waitqueue_head(&pinger_thread->t_ctl_waitq);
193
194         d.name = "ll_ping";
195         d.thread = pinger_thread;
196
197         /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
198          * just drop the VM and FILES in ptlrpc_daemonize() right away. */
199         rc = kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES);
200         if (rc < 0) {
201                 CERROR("cannot start thread: %d\n", rc);
202                 OBD_FREE(pinger_thread, sizeof(*pinger_thread));
203                 RETURN(rc);
204         }
205         l_wait_event(pinger_thread->t_ctl_waitq,
206                      pinger_thread->t_flags & SVC_RUNNING, &lwi);
207
208         RETURN(rc);
209 }
210
211 int ptlrpc_stop_pinger(void)
212 {
213         struct l_wait_info lwi = { 0 };
214         int rc = 0;
215 #ifndef ENABLE_PINGER
216         return 0;
217 #endif
218         ENTRY;
219
220         if (pinger_thread == NULL)
221                 RETURN(-EALREADY);
222         down(&pinger_sem);
223         pinger_thread->t_flags = SVC_STOPPING;
224         wake_up(&pinger_thread->t_ctl_waitq);
225         up(&pinger_sem);
226
227         l_wait_event(pinger_thread->t_ctl_waitq,
228                      (pinger_thread->t_flags & SVC_STOPPED), &lwi);
229
230         OBD_FREE(pinger_thread, sizeof(*pinger_thread));
231         pinger_thread = NULL;
232         RETURN(rc);
233 }
234
235 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
236 {
237         down(&pinger_sem);
238         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
239         up(&pinger_sem);
240 }
241
242 int ptlrpc_pinger_add_import(struct obd_import *imp)
243 {
244         ENTRY;
245         if (!list_empty(&imp->imp_pinger_chain))
246                 RETURN(-EALREADY);
247
248         down(&pinger_sem);
249         CDEBUG(D_HA, "adding pingable import %s->%s\n",
250                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
251         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
252         /* XXX sort, blah blah */
253         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
254         class_import_get(imp);
255
256         ptlrpc_pinger_wake_up();
257         up(&pinger_sem);
258
259         RETURN(0);
260 }
261
262 int ptlrpc_pinger_del_import(struct obd_import *imp)
263 {
264         ENTRY;
265         if (list_empty(&imp->imp_pinger_chain))
266                 RETURN(-ENOENT);
267
268         down(&pinger_sem);
269         list_del_init(&imp->imp_pinger_chain);
270         CDEBUG(D_HA, "removing pingable import %s->%s\n",
271                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
272         class_import_put(imp);
273         up(&pinger_sem);
274         RETURN(0);
275 }
276
277 void ptlrpc_pinger_wake_up()
278 {
279 #ifdef ENABLE_PINGER
280         pinger_thread->t_flags |= SVC_EVENT;
281         wake_up(&pinger_thread->t_ctl_waitq);
282 #endif
283 }
284
285 #else /* !__KERNEL__ */
286
287 /* XXX
288  * the current implementation of pinger in liblustre is not optimized
289  */
290
291 static struct pinger_data {
292         int             pd_recursion;
293         unsigned long   pd_this_ping;
294         unsigned long   pd_next_ping;
295         int             pd_force_check;
296 } pinger_args;
297
298 static int pinger_check_rpcs(void *arg)
299 {
300         unsigned long curtime = time(NULL);
301         struct list_head *iter;
302         struct pinger_data *pd = &pinger_args;
303
304         /* prevent recursion */
305         if (pd->pd_recursion++) {
306                 CDEBUG(D_HA, "pinger: recursion! quit\n");
307                 pd->pd_recursion--;
308                 return 0;
309         }
310
311         /* have we reached ping point? */
312         if (pd->pd_next_ping > curtime && !pd->pd_force_check) {
313                 pd->pd_recursion--;
314                 return 0;
315         }
316
317         if (pd->pd_force_check)
318                 pd->pd_force_check = 0;
319
320         pd->pd_this_ping = curtime;
321
322         /* add rpcs into set */
323         down(&pinger_sem);
324         list_for_each(iter, &pinger_imports) {
325                 struct obd_import *imp =
326                         list_entry(iter, struct obd_import,
327                                    imp_pinger_chain);
328                 int level, force;
329                 unsigned long flags;
330
331
332                 spin_lock_irqsave(&imp->imp_lock, flags);
333                 level = imp->imp_state;
334                 force = imp->imp_force_verify;
335                 if (force)
336                         imp->imp_force_verify = 0;
337                 spin_unlock_irqrestore(&imp->imp_lock, flags);
338
339                 if (imp->imp_next_ping <= pd->pd_this_ping || force) {
340                         if (level == LUSTRE_IMP_DISCON) {
341                                 /* wait at least a timeout before 
342                                    trying recovery again. */
343                                 unsigned long timeout = obd_timeout;
344                                 if (imp->imp_server_timeout)
345                                         timeout = obd_timeout / 2;
346                                 imp->imp_next_ping = time(NULL) + 
347                                         (timeout * HZ);
348                                 ptlrpc_initiate_recovery(imp);
349                         } 
350                         else if (level != LUSTRE_IMP_FULL ||
351                                  imp->imp_obd->obd_no_recov) {
352                                 CDEBUG(D_HA, 
353                                        "not pinging %s (in recovery "
354                                        " or recovery disabled: %s)\n",
355                                        imp->imp_target_uuid.uuid,
356                                        ptlrpc_import_state_name(level));
357                         } 
358                         else if (imp->imp_pingable || force) {
359                                 ptlrpc_ping(imp);
360                         }
361
362                 } else {
363                         if (imp->imp_pingable) {
364                                 CDEBUG(D_HA, "don't need to ping %s "
365                                        "(%lu > %lu)\n", 
366                                        imp->imp_target_uuid.uuid,
367                                        imp->imp_next_ping, pd->pd_this_ping);
368                         }
369                 }
370         }
371
372         up(&pinger_sem);
373
374         pd->pd_next_ping = pd->pd_this_ping + (obd_timeout * HZ);
375
376         CDEBUG(D_HA, "finished a round ping\n");
377         pd->pd_recursion--;
378         return 0;
379 }
380
381 static void *pinger_callback = NULL;
382
383 int ptlrpc_start_pinger(void)
384 {
385         memset(&pinger_args, 0, sizeof(pinger_args));
386 #ifdef ENABLE_PINGER
387         pinger_callback =
388                 liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args);
389 #endif
390         return 0;
391 }
392
393 int ptlrpc_stop_pinger(void)
394 {
395 #ifdef ENABLE_PINGER
396         if (pinger_callback)
397                 liblustre_deregister_wait_callback(pinger_callback);
398 #endif
399         return 0;
400 }
401
402 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
403 {
404         down(&pinger_sem);
405         imp->imp_next_ping = time(NULL) + obd_timeout;
406         if (pinger_args.pd_next_ping > imp->imp_next_ping) {
407                 CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
408                         imp->imp_next_ping, time(NULL));
409                 pinger_args.pd_next_ping = imp->imp_next_ping;
410         }
411         up(&pinger_sem);
412 }
413
414 int ptlrpc_pinger_add_import(struct obd_import *imp)
415 {
416         ENTRY;
417         if (!list_empty(&imp->imp_pinger_chain))
418                 RETURN(-EALREADY);
419
420         CDEBUG(D_HA, "adding pingable import %s->%s\n",
421                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
422         ptlrpc_pinger_sending_on_import(imp);
423
424         down(&pinger_sem);
425         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
426         class_import_get(imp);
427         up(&pinger_sem);
428
429         RETURN(0);
430 }
431
432 int ptlrpc_pinger_del_import(struct obd_import *imp)
433 {
434         ENTRY;
435         if (list_empty(&imp->imp_pinger_chain))
436                 RETURN(-ENOENT);
437
438         down(&pinger_sem);
439         list_del_init(&imp->imp_pinger_chain);
440         CDEBUG(D_HA, "removing pingable import %s->%s\n",
441                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
442         class_import_put(imp);
443         up(&pinger_sem);
444         RETURN(0);
445 }
446
447 void ptlrpc_pinger_wake_up()
448 {
449         pinger_args.pd_force_check = 1;
450 }
451 #endif /* !__KERNEL__ */