Whamcloud - gitweb
Branch b1_6
[fs/lustre-release.git] / lustre / ptlrpc / ptlrpcd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
5  *   Author Peter Braam <braam@clusterfs.com>
6  *
7  *   This file is part of the Lustre file system, http://www.lustre.org
8  *   Lustre is a trademark of Cluster File Systems, Inc.
9  *
10  *   You may have signed or agreed to another license before downloading
11  *   this software.  If so, you are bound by the terms and conditions
12  *   of that agreement, and the following does not apply to you.  See the
13  *   LICENSE file included with this distribution for more information.
14  *
15  *   If you did not agree to a different license, then this copy of Lustre
16  *   is open source software; you can redistribute it and/or modify it
17  *   under the terms of version 2 of the GNU General Public License as
18  *   published by the Free Software Foundation.
19  *
20  *   In either case, Lustre is distributed in the hope that it will be
21  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
22  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  *   license text for more details.
24  *
25  */
26
27 #define DEBUG_SUBSYSTEM S_RPC
28
29 #ifdef __KERNEL__
30 # include <libcfs/libcfs.h>
31 #else /* __KERNEL__ */
32 # include <liblustre.h>
33 # include <ctype.h>
34 #endif
35
36 #include <libcfs/kp30.h>
37 #include <lustre_net.h>
38 # include <lustre_lib.h>
39
40 #include <lustre_ha.h>
41 #include <obd_class.h>   /* for obd_zombie */
42 #include <obd_support.h> /* for OBD_FAIL_CHECK */
43 #include <lprocfs_status.h>
44
45 #define LIOD_STOP 0
46 struct ptlrpcd_ctl {
47         unsigned long             pc_flags;
48         spinlock_t                pc_lock;
49         struct completion         pc_starting;
50         struct completion         pc_finishing;
51         struct ptlrpc_request_set *pc_set;
52         char                      pc_name[16];
53 #ifndef __KERNEL__
54         int                       pc_recurred;
55         void                     *pc_wait_callback;
56         void                     *pc_idle_callback;
57 #endif
58 };
59
60 static struct ptlrpcd_ctl ptlrpcd_pc;
61 static struct ptlrpcd_ctl ptlrpcd_recovery_pc;
62
63 struct semaphore ptlrpcd_sem;
64 static int ptlrpcd_users = 0;
65
66 void ptlrpcd_wake(struct ptlrpc_request *req)
67 {
68         struct ptlrpc_request_set *rq_set = req->rq_set;
69
70         LASSERT(rq_set != NULL);
71
72         cfs_waitq_signal(&rq_set->set_waitq);
73 }
74
75 /* requests that are added to the ptlrpcd queue are sent via
76  * ptlrpcd_check->ptlrpc_check_set() */
77 void ptlrpcd_add_req(struct ptlrpc_request *req)
78 {
79         struct ptlrpcd_ctl *pc;
80
81         if (req->rq_send_state == LUSTRE_IMP_FULL)
82                 pc = &ptlrpcd_pc;
83         else
84                 pc = &ptlrpcd_recovery_pc;
85
86         ptlrpc_set_add_new_req(pc->pc_set, req);
87         cfs_waitq_signal(&pc->pc_set->set_waitq);
88 }
89
90 static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
91 {
92         struct list_head *tmp, *pos;
93         struct ptlrpc_request *req;
94         int rc = 0;
95         ENTRY;
96
97         if (test_bit(LIOD_STOP, &pc->pc_flags))
98                 RETURN(1);
99
100         obd_zombie_impexp_cull();
101
102         spin_lock(&pc->pc_set->set_new_req_lock);
103         list_for_each_safe(pos, tmp, &pc->pc_set->set_new_requests) {
104                 req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
105                 list_del_init(&req->rq_set_chain);
106                 ptlrpc_set_add_req(pc->pc_set, req);
107                 rc = 1; /* need to calculate its timeout */
108         }
109         spin_unlock(&pc->pc_set->set_new_req_lock);
110
111         if (pc->pc_set->set_remaining) {
112                 rc = rc | ptlrpc_check_set(pc->pc_set);
113
114                 /* XXX our set never completes, so we prune the completed
115                  * reqs after each iteration. boy could this be smarter. */
116                 list_for_each_safe(pos, tmp, &pc->pc_set->set_requests) {
117                         req = list_entry(pos, struct ptlrpc_request,
118                                          rq_set_chain);
119                         if (req->rq_phase != RQ_PHASE_COMPLETE)
120                                 continue;
121
122                         list_del_init(&req->rq_set_chain);
123                         req->rq_set = NULL;
124                         ptlrpc_req_finished (req);
125                 }
126         }
127
128         if (rc == 0) {
129                 /* If new requests have been added, make sure to wake up */
130                 spin_lock(&pc->pc_set->set_new_req_lock);
131                 rc = !list_empty(&pc->pc_set->set_new_requests);
132                 spin_unlock(&pc->pc_set->set_new_req_lock);
133         }
134
135         RETURN(rc);
136 }
137
138 #ifdef __KERNEL__
139 /* ptlrpc's code paths like to execute in process context, so we have this
140  * thread which spins on a set which contains the io rpcs.  llite specifies
141  * ptlrpcd's set when it pushes pages down into the oscs */
142 static int ptlrpcd(void *arg)
143 {
144         struct ptlrpcd_ctl *pc = arg;
145         int rc;
146         ENTRY;
147
148         if ((rc = cfs_daemonize_ctxt(pc->pc_name))) {
149                 complete(&pc->pc_starting);
150                 return rc;
151         }
152
153         complete(&pc->pc_starting);
154
155         /* this mainloop strongly resembles ptlrpc_set_wait except
156          * that our set never completes.  ptlrpcd_check calls ptlrpc_check_set
157          * when there are requests in the set.  new requests come in
158          * on the set's new_req_list and ptlrpcd_check moves them into
159          * the set. */
160         while (1) {
161                 struct l_wait_info lwi;
162                 cfs_duration_t timeout;
163
164                 timeout = cfs_time_seconds(ptlrpc_set_next_timeout(pc->pc_set));
165                 lwi = LWI_TIMEOUT(timeout, ptlrpc_expired_set, pc->pc_set);
166
167                 l_wait_event(pc->pc_set->set_waitq, ptlrpcd_check(pc), &lwi);
168
169                 if (test_bit(LIOD_STOP, &pc->pc_flags))
170                         break;
171         }
172         /* wait for inflight requests to drain */
173         if (!list_empty(&pc->pc_set->set_requests))
174                 ptlrpc_set_wait(pc->pc_set);
175         complete(&pc->pc_finishing);
176         return 0;
177 }
178
179 static void ptlrpcd_zombie_impexp_notify(void)
180 {
181         LASSERT(ptlrpcd_pc.pc_set != NULL); // call before ptlrpcd inited ?
182
183         cfs_waitq_signal(&ptlrpcd_pc.pc_set->set_waitq);
184 }
185
186 #else
187
188 int ptlrpcd_check_async_rpcs(void *arg)
189 {
190         struct ptlrpcd_ctl *pc = arg;
191         int                  rc = 0;
192
193         /* single threaded!! */
194         pc->pc_recurred++;
195
196         if (pc->pc_recurred == 1) {
197                 rc = ptlrpcd_check(pc);
198                 if (!rc)
199                         ptlrpc_expired_set(pc->pc_set);
200                 /*XXX send replay requests */
201                 if (pc == &ptlrpcd_recovery_pc)
202                         rc = ptlrpcd_check(pc);
203         }
204
205         pc->pc_recurred--;
206         return rc;
207 }
208
209 int ptlrpcd_idle(void *arg)
210 {
211         struct ptlrpcd_ctl *pc = arg;
212
213         return (list_empty(&pc->pc_set->set_new_requests) &&
214                 pc->pc_set->set_remaining == 0);
215 }
216
217 #endif
218
219 static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
220 {
221         int rc;
222
223         ENTRY;
224         memset(pc, 0, sizeof(*pc));
225         init_completion(&pc->pc_starting);
226         init_completion(&pc->pc_finishing);
227         pc->pc_flags = 0;
228         spin_lock_init(&pc->pc_lock);
229         snprintf (pc->pc_name, sizeof (pc->pc_name), name);
230
231         pc->pc_set = ptlrpc_prep_set();
232         if (pc->pc_set == NULL)
233                 RETURN(-ENOMEM);
234
235 #ifdef __KERNEL__
236         /* wake ptlrpcd when zombie imports or exports exist */
237         obd_zombie_impexp_notify = ptlrpcd_zombie_impexp_notify;
238         
239         rc = cfs_kernel_thread(ptlrpcd, pc, 0);
240         if (rc < 0)  {
241                 ptlrpc_set_destroy(pc->pc_set);
242                 RETURN(rc);
243         }
244
245         wait_for_completion(&pc->pc_starting);
246 #else
247         pc->pc_wait_callback =
248                 liblustre_register_wait_callback("ptlrpcd_check_async_rpcs",
249                                                  &ptlrpcd_check_async_rpcs, pc);
250         pc->pc_idle_callback =
251                 liblustre_register_idle_callback("ptlrpcd_check_idle_rpcs",
252                                                  &ptlrpcd_idle, pc);
253         (void)rc;
254 #endif
255         RETURN(0);
256 }
257
258 static void ptlrpcd_stop(struct ptlrpcd_ctl *pc)
259 {
260         set_bit(LIOD_STOP, &pc->pc_flags);
261         cfs_waitq_signal(&pc->pc_set->set_waitq);
262 #ifdef __KERNEL__
263         obd_zombie_impexp_notify = NULL;
264         wait_for_completion(&pc->pc_finishing);
265 #else
266         liblustre_deregister_wait_callback(pc->pc_wait_callback);
267         liblustre_deregister_idle_callback(pc->pc_idle_callback);
268 #endif
269         ptlrpc_set_destroy(pc->pc_set);
270 }
271
272 int ptlrpcd_addref(void)
273 {
274         int rc = 0;
275         ENTRY;
276
277         mutex_down(&ptlrpcd_sem);
278         if (++ptlrpcd_users != 1)
279                 GOTO(out, rc);
280
281         rc = ptlrpcd_start("ptlrpcd", &ptlrpcd_pc);
282         if (rc) {
283                 --ptlrpcd_users;
284                 GOTO(out, rc);
285         }
286
287         rc = ptlrpcd_start("ptlrpcd-recov", &ptlrpcd_recovery_pc);
288         if (rc) {
289                 ptlrpcd_stop(&ptlrpcd_pc);
290                 --ptlrpcd_users;
291                 GOTO(out, rc);
292         }
293 out:
294         mutex_up(&ptlrpcd_sem);
295         RETURN(rc);
296 }
297
298 void ptlrpcd_decref(void)
299 {
300         mutex_down(&ptlrpcd_sem);
301         if (--ptlrpcd_users == 0) {
302                 ptlrpcd_stop(&ptlrpcd_pc);
303                 ptlrpcd_stop(&ptlrpcd_recovery_pc);
304         }
305         mutex_up(&ptlrpcd_sem);
306 }