Whamcloud - gitweb
merge b_devel into HEAD, which will become 0.7.3
[fs/lustre-release.git] / lustre / ptlrpc / pinger.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Portal-RPC reconnection and replay operations, for use in recovery.
5  *
6  *  Copyright (c) 2003 Cluster File Systems, Inc.
7  *   Authors: Phil Schwan <phil@clusterfs.com>
8  *            Mike Shaver <shaver@clusterfs.com>
9  *
10  *   This file is part of Lustre, http://www.lustre.org.
11  *
12  *   Lustre is free software; you can redistribute it and/or
13  *   modify it under the terms of version 2 of the GNU General Public
14  *   License as published by the Free Software Foundation.
15  *
16  *   Lustre is distributed in the hope that it will be useful,
17  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *   GNU General Public License for more details.
20  *
21  *   You should have received a copy of the GNU General Public License
22  *   along with Lustre; if not, write to the Free Software
23  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25
26 #include <linux/version.h>
27 #include <asm/semaphore.h>
28
29 #define DEBUG_SUBSYSTEM S_RPC
30 #include <linux/obd_support.h>
31 #include <linux/obd_class.h>
32 #include "ptlrpc_internal.h"
33
34 static struct ptlrpc_thread *pinger_thread = NULL;
35 static DECLARE_MUTEX(pinger_sem);
36 static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
37
38 int ptlrpc_start_pinger(void);
39 int ptlrpc_stop_pinger(void);
40
41 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
42 {
43         down(&pinger_sem);
44         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
45         up(&pinger_sem);
46 }
47
48 int ptlrpc_pinger_add_import(struct obd_import *imp)
49 {
50 #ifndef ENABLE_PINGER
51         return 0;
52 #else
53         int rc;
54         ENTRY;
55
56         if (!list_empty(&imp->imp_pinger_chain))
57                 RETURN(-EALREADY);
58
59         down(&pinger_sem);
60         if (list_empty(&pinger_imports)) {
61                 up(&pinger_sem);
62                 rc = ptlrpc_start_pinger();
63                 if (rc < 0)
64                         RETURN(rc);
65                 down(&pinger_sem);
66         }
67                 
68         CDEBUG(D_HA, "adding pingable import %s->%s\n",
69                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
70         imp->imp_next_ping = jiffies + (obd_timeout * HZ);
71         list_add_tail(&imp->imp_pinger_chain, &pinger_imports); /* XXX sort, blah blah */
72         class_import_get(imp);
73         up(&pinger_sem);
74         RETURN(0);
75 #endif
76 }
77
78 int ptlrpc_pinger_del_import(struct obd_import *imp)
79 {
80 #ifndef ENABLE_PINGER
81         return 0;
82 #else
83         int rc;
84         ENTRY;
85
86         if (list_empty(&imp->imp_pinger_chain))
87                 RETURN(-ENOENT);
88
89         down(&pinger_sem);
90         list_del_init(&imp->imp_pinger_chain);
91         CDEBUG(D_HA, "removing pingable import %s->%s\n",
92                imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
93         class_import_put(imp);
94         if (list_empty(&pinger_imports)) {
95                 up(&pinger_sem);
96                 rc = ptlrpc_stop_pinger();
97                 if (rc)
98                         RETURN(rc);
99                 down(&pinger_sem);
100         }
101         up(&pinger_sem);
102         RETURN(0);
103 #endif
104 }
105
106 static int ptlrpc_pinger_main(void *arg)
107 {
108         struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
109         struct ptlrpc_thread *thread = data->thread;
110         unsigned long flags;
111         ENTRY;
112
113         lock_kernel();
114         ptlrpc_daemonize();
115
116         SIGNAL_MASK_LOCK(current, flags);
117         sigfillset(&current->blocked);
118         RECALC_SIGPENDING;
119         SIGNAL_MASK_UNLOCK(current, flags);
120
121         THREAD_NAME(current->comm, "%s", data->name);
122         unlock_kernel();
123
124         /* Record that the thread is running */
125         thread->t_flags = SVC_RUNNING;
126         wake_up(&thread->t_ctl_waitq);
127
128         /* And now, loop forever, pinging as needed. */
129         while (1) {
130                 unsigned long this_ping = jiffies;
131                 long time_to_next_ping;
132                 struct l_wait_info lwi = LWI_TIMEOUT(10 * HZ, NULL, NULL);
133                 struct ptlrpc_request_set *set;
134                 struct ptlrpc_request *req;
135                 struct list_head *iter;
136                 wait_queue_t set_wait;
137                 int rc;
138
139                 set = ptlrpc_prep_set();
140                 down(&pinger_sem);
141                 list_for_each(iter, &pinger_imports) {
142                         struct obd_import *imp =
143                                 list_entry(iter, struct obd_import,
144                                            imp_pinger_chain);
145                         int generation, level;
146                         unsigned long flags;
147
148                         if (imp->imp_next_ping <= this_ping) {
149                                 /* Add a ping. */
150                                 spin_lock_irqsave(&imp->imp_lock, flags);
151                                 generation = imp->imp_generation;
152                                 level = imp->imp_level;
153                                 spin_unlock_irqrestore(&imp->imp_lock, flags);
154
155                                 if (level != LUSTRE_CONN_FULL) {
156                                         CDEBUG(D_HA,
157                                                "not pinging %s (in recovery)\n",
158                                                imp->imp_target_uuid.uuid);
159                                         continue;
160                                 }
161
162                                 req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
163                                                       NULL);
164                                 if (!req) {
165                                         CERROR("OOM trying to ping\n");
166                                         break;
167                                 }
168                                 req->rq_no_resend = 1;
169                                 req->rq_replen = lustre_msg_size(0, NULL);
170                                 req->rq_level = LUSTRE_CONN_FULL;
171                                 req->rq_phase = RQ_PHASE_RPC;
172                                 req->rq_import_generation = generation;
173                                 ptlrpc_set_add_req(set, req);
174                         } else {
175                                 CDEBUG(D_HA, "don't need to ping %s (%lu > %lu)\n",
176                                        imp->imp_target_uuid.uuid, imp->imp_next_ping,
177                                        this_ping);
178                         }
179                 }
180                 up(&pinger_sem);
181
182                 /* Might be empty, that's OK. */
183                 if (set->set_remaining == 0)
184                         CDEBUG(D_HA, "nothing to ping\n");
185                 list_for_each(iter, &set->set_requests) {
186                         struct ptlrpc_request *req =
187                                 list_entry(iter, struct ptlrpc_request, rq_set_chain);
188                         DEBUG_REQ(D_HA, req, "pinging %s->%s",
189                                   req->rq_import->imp_obd->obd_uuid.uuid,
190                                   req->rq_import->imp_target_uuid.uuid);
191                         (void)ptl_send_rpc(req);
192                 }
193
194                 /* Have to wait on both the thread's queue and the set's. */
195                 init_waitqueue_entry(&set_wait, current);
196                 add_wait_queue(&set->set_waitq, &set_wait);
197                 rc = l_wait_event(thread->t_ctl_waitq,
198                                   thread->t_flags & SVC_STOPPING || ptlrpc_check_set(set),
199                                   &lwi);
200                 remove_wait_queue(&set->set_waitq, &set_wait);
201                 CDEBUG(D_HA, "ping complete (%lu)\n", jiffies);
202
203                 if (thread->t_flags & SVC_STOPPING) {
204                         thread->t_flags &= ~SVC_STOPPING;
205                         list_for_each(iter, &set->set_requests) {
206                                 req = list_entry(iter, struct ptlrpc_request,
207                                                  rq_set_chain);
208                                 if (!req->rq_replied)
209                                         ptlrpc_unregister_reply(req);
210                         }
211                         ptlrpc_set_destroy(set);
212                         EXIT;
213                         break;
214                 }
215
216                 /* Expire all the requests that didn't come back. */
217                 down(&pinger_sem);
218                 list_for_each(iter, &set->set_requests) {
219                         req = list_entry(iter, struct ptlrpc_request, rq_set_chain);
220
221                         if (req->rq_replied)
222                                 continue;
223
224                         req->rq_phase = RQ_PHASE_COMPLETE;
225                         set->set_remaining--;
226                         /* If it was disconnected, don't sweat it. */
227                         if (list_empty(&req->rq_import->imp_pinger_chain))
228                                 continue;
229
230                         ptlrpc_expire_one_request(req);
231                 }
232                 up(&pinger_sem);
233                 ptlrpc_set_destroy(set);
234
235                 /* Wait until the next ping time, or until we're stopped. */
236                 time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies;
237                 CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
238                        this_ping + (obd_timeout * HZ));
239                 if (time_to_next_ping > 0) {
240                         lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
241                         l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPING,
242                                      &lwi);
243                         if (thread->t_flags & SVC_STOPPING) {
244                                 thread->t_flags &= ~SVC_STOPPING;
245                                 EXIT;
246                                 break;
247                         }
248                 }
249         }
250
251         thread->t_flags = SVC_STOPPED;
252         wake_up(&thread->t_ctl_waitq);
253
254         CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid);
255         return 0;
256 }
257
258 int ptlrpc_start_pinger(void)
259 {
260         struct l_wait_info lwi = { 0 };
261         struct ptlrpc_svc_data d;
262         int rc;
263         ENTRY;
264
265         down(&pinger_sem);
266         if (pinger_thread != NULL)
267                 GOTO(out, rc = -EALREADY);
268
269         OBD_ALLOC(pinger_thread, sizeof(*pinger_thread));
270         if (pinger_thread == NULL)
271                 GOTO(out, rc = -ENOMEM);
272         init_waitqueue_head(&pinger_thread->t_ctl_waitq);
273
274         d.name = "ll_ping";
275         d.thread = pinger_thread;
276
277         /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
278          * just drop the VM and FILES in ptlrpc_daemonize() right away. */
279         rc = kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES);
280         if (rc < 0) {
281                 CERROR("cannot start thread: %d\n", rc);
282                 OBD_FREE(pinger_thread, sizeof(*pinger_thread));
283                 GOTO(out, rc);
284         }
285         l_wait_event(pinger_thread->t_ctl_waitq,
286                      pinger_thread->t_flags & SVC_RUNNING, &lwi);
287
288  out:
289         up(&pinger_sem);
290         RETURN(rc);
291 }
292
293 int ptlrpc_stop_pinger(void)
294 {
295         struct l_wait_info lwi = { 0 };
296         int rc = 0;
297         ENTRY;
298
299         down(&pinger_sem);
300         if (pinger_thread == NULL)
301                 GOTO(out, rc = -EALREADY);
302
303         pinger_thread->t_flags = SVC_STOPPING;
304         wake_up(&pinger_thread->t_ctl_waitq);
305         l_wait_event(pinger_thread->t_ctl_waitq,
306                      (pinger_thread->t_flags & SVC_STOPPED), &lwi);
307
308         OBD_FREE(pinger_thread, sizeof(*pinger_thread));
309
310  out:
311         up(&pinger_sem);
312         RETURN(rc);
313 }