Whamcloud - gitweb
- make HEAD from b_post_cmd3
[fs/lustre-release.git] / lustre / ptlrpc / ptlrpcd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
5  *   Author Peter Braam <braam@clusterfs.com>
6  *
7  *   This file is part of the Lustre file system, http://www.lustre.org
8  *   Lustre is a trademark of Cluster File Systems, Inc.
9  *
10  *   You may have signed or agreed to another license before downloading
11  *   this software.  If so, you are bound by the terms and conditions
12  *   of that agreement, and the following does not apply to you.  See the
13  *   LICENSE file included with this distribution for more information.
14  *
15  *   If you did not agree to a different license, then this copy of Lustre
16  *   is open source software; you can redistribute it and/or modify it
17  *   under the terms of version 2 of the GNU General Public License as
18  *   published by the Free Software Foundation.
19  *
20  *   In either case, Lustre is distributed in the hope that it will be
21  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
22  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  *   license text for more details.
24  *
25  */
26
27 #define DEBUG_SUBSYSTEM S_RPC
28
29 #ifdef __KERNEL__
30 # include <libcfs/libcfs.h>
31 #else /* __KERNEL__ */
32 # include <liblustre.h>
33 # include <ctype.h>
34 #endif
35
36 #include <libcfs/kp30.h>
37 #include <lustre_net.h>
38 # include <lustre_lib.h>
39
40 #include <lustre_ha.h>
41 #include <obd_class.h>   /* for obd_zombie */
42 #include <obd_support.h> /* for OBD_FAIL_CHECK */
43 #include <lprocfs_status.h>
44
45 #define LIOD_STOP 0
46 struct ptlrpcd_ctl {
47         unsigned long             pc_flags;
48         spinlock_t                pc_lock;
49         struct completion         pc_starting;
50         struct completion         pc_finishing;
51         struct list_head          pc_req_list;
52         cfs_waitq_t               pc_waitq;
53         struct ptlrpc_request_set *pc_set;
54         char                      pc_name[16];
55 #ifndef __KERNEL__
56         int                       pc_recurred;
57         void                     *pc_callback;
58         void                     *pc_wait_callback;
59         void                     *pc_idle_callback;
60 #endif
61 };
62
63 static struct ptlrpcd_ctl ptlrpcd_pc;
64 static struct ptlrpcd_ctl ptlrpcd_recovery_pc;
65
66 struct semaphore ptlrpcd_sem;
67 static int ptlrpcd_users = 0;
68
69 void ptlrpcd_wake(struct ptlrpc_request *req)
70 {
71         struct ptlrpcd_ctl *pc = req->rq_ptlrpcd_data;
72
73         LASSERT(pc != NULL);
74
75         cfs_waitq_signal(&pc->pc_waitq);
76 }
77
78 /* requests that are added to the ptlrpcd queue are sent via
79  * ptlrpcd_check->ptlrpc_check_set() */
80 void ptlrpcd_add_req(struct ptlrpc_request *req)
81 {
82         struct ptlrpcd_ctl *pc;
83
84         if (req->rq_send_state == LUSTRE_IMP_FULL)
85                 pc = &ptlrpcd_pc;
86         else
87                 pc = &ptlrpcd_recovery_pc;
88
89         req->rq_ptlrpcd_data = pc;
90         ptlrpc_set_add_new_req(pc->pc_set, req);
91         wake_up(&pc->pc_waitq);
92 }
93
94 static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
95 {
96         struct list_head *tmp, *pos;
97         struct ptlrpc_request *req;
98         int rc = 0;
99         ENTRY;
100
101         if (test_bit(LIOD_STOP, &pc->pc_flags))
102                 RETURN(1);
103
104         obd_zombie_impexp_cull();
105
106         spin_lock(&pc->pc_set->set_new_req_lock);
107         list_for_each_safe(pos, tmp, &pc->pc_set->set_new_requests) {
108                 req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
109                 list_del_init(&req->rq_set_chain);
110                 ptlrpc_set_add_req(pc->pc_set, req);
111                 rc = 1; /* need to calculate its timeout */
112         }
113         spin_unlock(&pc->pc_set->set_new_req_lock);
114
115         if (pc->pc_set->set_remaining) {
116                 rc = rc | ptlrpc_check_set(pc->pc_set);
117
118                 /* XXX our set never completes, so we prune the completed
119                  * reqs after each iteration. boy could this be smarter. */
120                 list_for_each_safe(pos, tmp, &pc->pc_set->set_requests) {
121                         req = list_entry(pos, struct ptlrpc_request,
122                                          rq_set_chain);
123                         if (req->rq_phase != RQ_PHASE_COMPLETE)
124                                 continue;
125
126                         list_del_init(&req->rq_set_chain);
127                         req->rq_set = NULL;
128                         ptlrpc_req_finished (req);
129                 }
130         }
131
132         if (rc == 0) {
133                 /* If new requests have been added, make sure to wake up */
134                 spin_lock(&pc->pc_set->set_new_req_lock);
135                 rc = !list_empty(&pc->pc_set->set_new_requests);
136                 spin_unlock(&pc->pc_set->set_new_req_lock);
137         }
138
139         RETURN(rc);
140 }
141
142 #ifdef __KERNEL__
143 /* ptlrpc's code paths like to execute in process context, so we have this
144  * thread which spins on a set which contains the io rpcs.  llite specifies
145  * ptlrpcd's set when it pushes pages down into the oscs */
146 static int ptlrpcd(void *arg)
147 {
148         struct ptlrpcd_ctl *pc = arg;
149         int rc;
150         ENTRY;
151
152         if ((rc = cfs_daemonize_ctxt(pc->pc_name))) {
153                 complete(&pc->pc_starting);
154                 return rc;
155         }
156
157         complete(&pc->pc_starting);
158
159         /* this mainloop strongly resembles ptlrpc_set_wait except
160          * that our set never completes.  ptlrpcd_check calls ptlrpc_check_set
161          * when there are requests in the set.  new requests come in
162          * on the set's new_req_list and ptlrpcd_check moves them into
163          * the set. */
164         while (1) {
165                 cfs_waitlink_t set_wait;
166                 struct l_wait_info lwi;
167                 cfs_duration_t timeout;
168
169                 timeout = cfs_time_seconds(ptlrpc_set_next_timeout(pc->pc_set));
170                 lwi = LWI_TIMEOUT(timeout, ptlrpc_expired_set, pc->pc_set);
171
172                 /* ala the pinger, wait on pc's waitqueue and the set's */
173                 cfs_waitlink_init(&set_wait);
174                 cfs_waitq_add(&pc->pc_set->set_waitq, &set_wait);
175                 cfs_waitq_forward(&set_wait, &pc->pc_waitq);
176                 l_wait_event(pc->pc_waitq, ptlrpcd_check(pc), &lwi);
177                 cfs_waitq_del(&pc->pc_set->set_waitq, &set_wait);
178
179                 if (test_bit(LIOD_STOP, &pc->pc_flags))
180                         break;
181         }
182         /* wait for inflight requests to drain */
183         if (!list_empty(&pc->pc_set->set_requests))
184                 ptlrpc_set_wait(pc->pc_set);
185         complete(&pc->pc_finishing);
186         return 0;
187 }
188
189 static void ptlrpcd_zombie_impexp_notify(void)
190 {
191         cfs_waitq_signal(&ptlrpcd_pc.pc_waitq);
192 }
193 #else
194
195 int ptlrpcd_check_async_rpcs(void *arg)
196 {
197         struct ptlrpcd_ctl *pc = arg;
198         int                  rc = 0;
199
200         /* single threaded!! */
201         pc->pc_recurred++;
202
203         if (pc->pc_recurred == 1) {
204                 rc = ptlrpcd_check(pc);
205                 if (!rc)
206                         ptlrpc_expired_set(pc->pc_set);
207                 /*XXX send replay requests */
208                 if (pc == &ptlrpcd_recovery_pc)
209                         rc = ptlrpcd_check(pc);
210         }
211
212         pc->pc_recurred--;
213         return rc;
214 }
215
216 int ptlrpcd_idle(void *arg)
217 {
218         struct ptlrpcd_ctl *pc = arg;
219
220         return (list_empty(&pc->pc_set->set_new_requests) &&
221                 pc->pc_set->set_remaining == 0);
222 }
223
224 #endif
225
226 static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
227 {
228         int rc;
229
230         ENTRY;
231         memset(pc, 0, sizeof(*pc));
232         init_completion(&pc->pc_starting);
233         init_completion(&pc->pc_finishing);
234         cfs_waitq_init(&pc->pc_waitq);
235         pc->pc_flags = 0;
236         spin_lock_init(&pc->pc_lock);
237         CFS_INIT_LIST_HEAD(&pc->pc_req_list);
238         snprintf (pc->pc_name, sizeof (pc->pc_name), name);
239
240         pc->pc_set = ptlrpc_prep_set();
241         if (pc->pc_set == NULL)
242                 RETURN(-ENOMEM);
243
244 #ifdef __KERNEL__
245         /* wake ptlrpcd when zombie imports or exports exist */
246         obd_zombie_impexp_notify = ptlrpcd_zombie_impexp_notify;
247         
248         rc = cfs_kernel_thread(ptlrpcd, pc, 0);
249         if (rc < 0)  {
250                 ptlrpc_set_destroy(pc->pc_set);
251                 RETURN(rc);
252         }
253
254         wait_for_completion(&pc->pc_starting);
255 #else
256         pc->pc_wait_callback =
257                 liblustre_register_wait_callback("ptlrpcd_check_async_rpcs",
258                                                  &ptlrpcd_check_async_rpcs, pc);
259         pc->pc_idle_callback =
260                 liblustre_register_idle_callback("ptlrpcd_check_idle_rpcs",
261                                                  &ptlrpcd_idle, pc);
262         (void)rc;
263 #endif
264         RETURN(0);
265 }
266
267 static void ptlrpcd_stop(struct ptlrpcd_ctl *pc)
268 {
269         set_bit(LIOD_STOP, &pc->pc_flags);
270         cfs_waitq_signal(&pc->pc_waitq);
271 #ifdef __KERNEL__
272         obd_zombie_impexp_notify = NULL;
273         wait_for_completion(&pc->pc_finishing);
274 #else
275         liblustre_deregister_wait_callback(pc->pc_wait_callback);
276         liblustre_deregister_idle_callback(pc->pc_idle_callback);
277 #endif
278         ptlrpc_set_destroy(pc->pc_set);
279 }
280
281 int ptlrpcd_addref(void)
282 {
283         int rc = 0;
284         ENTRY;
285
286         mutex_down(&ptlrpcd_sem);
287         if (++ptlrpcd_users != 1)
288                 GOTO(out, rc);
289
290         rc = ptlrpcd_start("ptlrpcd", &ptlrpcd_pc);
291         if (rc) {
292                 --ptlrpcd_users;
293                 GOTO(out, rc);
294         }
295
296         rc = ptlrpcd_start("ptlrpcd-recov", &ptlrpcd_recovery_pc);
297         if (rc) {
298                 ptlrpcd_stop(&ptlrpcd_pc);
299                 --ptlrpcd_users;
300                 GOTO(out, rc);
301         }
302 out:
303         mutex_up(&ptlrpcd_sem);
304         RETURN(rc);
305 }
306
307 void ptlrpcd_decref(void)
308 {
309         mutex_down(&ptlrpcd_sem);
310         if (--ptlrpcd_users == 0) {
311                 ptlrpcd_stop(&ptlrpcd_pc);
312                 ptlrpcd_stop(&ptlrpcd_recovery_pc);
313         }
314         mutex_up(&ptlrpcd_sem);
315 }