Whamcloud - gitweb
Land b1_4_smallfix onto b1_4 (20041216_1438)
[fs/lustre-release.git] / lustre / ptlrpc / ptlrpcd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
5  *   Author Peter Braam <braam@clusterfs.com>
6  *
7  *   This file is part of Lustre, http://www.lustre.org.
8  *
9  *   Lustre is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Lustre is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Lustre; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  *
22  */
23
24 #define DEBUG_SUBSYSTEM S_RPC
25
26 #ifdef __KERNEL__
27 # include <linux/version.h>
28 # include <linux/module.h>
29 # include <linux/mm.h>
30 # include <linux/highmem.h>
31 # include <linux/lustre_dlm.h>
32 # if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
33 #  include <linux/workqueue.h>
34 #  include <linux/smp_lock.h>
35 # else
36 #  include <linux/locks.h>
37 # endif
38 # include <linux/ctype.h>
39 # include <linux/init.h>
40 #else /* __KERNEL__ */
41 # include <liblustre.h>
42 # include <ctype.h>
43 #endif
44
45 #include <linux/kp30.h>
46 #include <linux/lustre_net.h>
47
48 #include <linux/lustre_ha.h>
49 #include <linux/obd_support.h> /* for OBD_FAIL_CHECK */
50 #include <linux/lprocfs_status.h>
51
52 #define LIOD_STOP 0
53 struct ptlrpcd_ctl {
54         unsigned long             pc_flags;
55         spinlock_t                pc_lock;
56         struct completion         pc_starting;
57         struct completion         pc_finishing;
58         struct list_head          pc_req_list;
59         wait_queue_head_t         pc_waitq;
60         struct ptlrpc_request_set *pc_set;
61 #ifndef __KERNEL__
62         int                       pc_recurred;
63         void                     *pc_callback;
64 #endif
65 };
66
67 static struct ptlrpcd_ctl ptlrpcd_pc;
68 static struct ptlrpcd_ctl ptlrpcd_recovery_pc;
69
70 static DECLARE_MUTEX(ptlrpcd_sem);
71 static int ptlrpcd_users = 0;
72
73 void ptlrpcd_wake(struct ptlrpc_request *req)
74 {
75         struct ptlrpcd_ctl *pc = req->rq_ptlrpcd_data;
76
77         LASSERT(pc != NULL);
78
79         wake_up(&pc->pc_waitq);
80 }
81
82 void ptlrpcd_add_req(struct ptlrpc_request *req)
83 {
84         struct ptlrpcd_ctl *pc;
85
86         if (req->rq_send_state == LUSTRE_IMP_FULL)
87                 pc = &ptlrpcd_pc;
88         else 
89                 pc = &ptlrpcd_recovery_pc;
90
91         ptlrpc_set_add_new_req(pc->pc_set, req);
92         req->rq_ptlrpcd_data = pc;
93                 
94         ptlrpcd_wake(req);
95 }
96
97 static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
98 {
99         struct list_head *tmp, *pos;
100         struct ptlrpc_request *req;
101         unsigned long flags;
102         int rc = 0;
103         ENTRY;
104
105         if (test_bit(LIOD_STOP, &pc->pc_flags))
106                 RETURN(1);
107
108         spin_lock_irqsave(&pc->pc_set->set_new_req_lock, flags);
109         list_for_each_safe(pos, tmp, &pc->pc_set->set_new_requests) {
110                 req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
111                 list_del_init(&req->rq_set_chain);
112                 ptlrpc_set_add_req(pc->pc_set, req);
113                 rc = 1; /* need to calculate its timeout */
114         }
115         spin_unlock_irqrestore(&pc->pc_set->set_new_req_lock, flags);
116
117         if (pc->pc_set->set_remaining) {
118                 rc = rc | ptlrpc_check_set(pc->pc_set);
119
120                 /* XXX our set never completes, so we prune the completed
121                  * reqs after each iteration. boy could this be smarter. */
122                 list_for_each_safe(pos, tmp, &pc->pc_set->set_requests) {
123                         req = list_entry(pos, struct ptlrpc_request,
124                                          rq_set_chain);
125                         if (req->rq_phase != RQ_PHASE_COMPLETE)
126                                 continue;
127
128                         list_del_init(&req->rq_set_chain);
129                         req->rq_set = NULL;
130                         ptlrpc_req_finished (req);
131                 }
132         }
133
134         if (rc == 0) {
135                 /* If new requests have been added, make sure to wake up */
136                 spin_lock_irqsave(&pc->pc_set->set_new_req_lock, flags);
137                 rc = !list_empty(&pc->pc_set->set_new_requests);
138                 spin_unlock_irqrestore(&pc->pc_set->set_new_req_lock, flags);
139         }
140
141         RETURN(rc);
142 }
143
144 #ifdef __KERNEL__
145 /* ptlrpc's code paths like to execute in process context, so we have this
146  * thread which spins on a set which contains the io rpcs.  llite specifies
147  * ptlrpcd's set when it pushes pages down into the oscs */
148 static int ptlrpcd(void *arg)
149 {
150         struct ptlrpcd_ctl *pc = arg;
151         unsigned long flags;
152         ENTRY;
153
154         kportal_daemonize("ptlrpcd");
155
156         SIGNAL_MASK_LOCK(current, flags);
157         sigfillset(&current->blocked);
158         RECALC_SIGPENDING;
159         SIGNAL_MASK_UNLOCK(current, flags);
160
161         complete(&pc->pc_starting);
162
163         /* this mainloop strongly resembles ptlrpc_set_wait except
164          * that our set never completes.  ptlrpcd_check calls ptlrpc_check_set
165          * when there are requests in the set.  new requests come in
166          * on the set's new_req_list and ptlrpcd_check moves them into
167          * the set. */
168         while (1) {
169                 wait_queue_t set_wait;
170                 struct l_wait_info lwi;
171                 int timeout;
172
173                 timeout = ptlrpc_set_next_timeout(pc->pc_set) * HZ;
174                 lwi = LWI_TIMEOUT(timeout, ptlrpc_expired_set, pc->pc_set);
175
176                 /* ala the pinger, wait on pc's waitqueue and the set's */
177                 init_waitqueue_entry(&set_wait, current);
178                 add_wait_queue(&pc->pc_set->set_waitq, &set_wait);
179                 l_wait_event(pc->pc_waitq, ptlrpcd_check(pc), &lwi);
180                 remove_wait_queue(&pc->pc_set->set_waitq, &set_wait);
181
182                 if (test_bit(LIOD_STOP, &pc->pc_flags))
183                         break;
184         }
185         /* wait for inflight requests to drain */
186         if (!list_empty(&pc->pc_set->set_requests))
187                 ptlrpc_set_wait(pc->pc_set);
188         complete(&pc->pc_finishing);
189         return 0;
190 }
191 #else
192
193 int ptlrpcd_check_async_rpcs(void *arg)
194 {
195         struct ptlrpcd_ctl *pc = arg;
196         int                  rc = 0;
197
198         /* single threaded!! */
199         pc->pc_recurred++;
200
201         if (pc->pc_recurred == 1)
202                 rc = ptlrpcd_check(pc);
203
204         pc->pc_recurred--;
205         return rc;
206 }
207 #endif
208
209 static int ptlrpcd_start(struct ptlrpcd_ctl *pc)
210 {
211         int rc = 0;
212
213         memset(pc, 0, sizeof(*pc));
214         init_completion(&pc->pc_starting);
215         init_completion(&pc->pc_finishing);
216         init_waitqueue_head(&pc->pc_waitq);
217         pc->pc_flags = 0;
218         spin_lock_init(&pc->pc_lock);
219         INIT_LIST_HEAD(&pc->pc_req_list);
220
221         pc->pc_set = ptlrpc_prep_set();
222         if (pc->pc_set == NULL)
223                 GOTO(out, rc = -ENOMEM);
224
225 #ifdef __KERNEL__
226         if (kernel_thread(ptlrpcd, pc, 0) < 0)  {
227                 ptlrpc_set_destroy(pc->pc_set);
228                 GOTO(out, rc = -ECHILD);
229         }
230
231         wait_for_completion(&pc->pc_starting);
232 #else
233         pc->pc_callback =
234                 liblustre_register_wait_callback(&ptlrpcd_check_async_rpcs, pc);
235 #endif
236 out:
237         RETURN(rc);
238 }
239
240 static void ptlrpcd_stop(struct ptlrpcd_ctl *pc)
241 {
242         set_bit(LIOD_STOP, &pc->pc_flags);
243         wake_up(&pc->pc_waitq);
244 #ifdef __KERNEL__
245         wait_for_completion(&pc->pc_finishing);
246 #else
247         liblustre_deregister_wait_callback(pc->pc_callback);
248 #endif
249         ptlrpc_set_destroy(pc->pc_set);
250 }
251
252 int ptlrpcd_addref(void)
253 {
254         int rc = 0;
255         ENTRY;
256
257         down(&ptlrpcd_sem);
258         if (++ptlrpcd_users != 1)
259                 GOTO(out, rc);
260
261         rc = ptlrpcd_start(&ptlrpcd_pc);
262         if (rc) {
263                 --ptlrpcd_users;
264                 GOTO(out, rc);
265         }
266
267         rc = ptlrpcd_start(&ptlrpcd_recovery_pc);
268         if (rc) {
269                 ptlrpcd_stop(&ptlrpcd_pc);
270                 --ptlrpcd_users;
271                 GOTO(out, rc);
272         }
273 out:
274         up(&ptlrpcd_sem);
275         RETURN(rc);
276 }
277
278 void ptlrpcd_decref(void)
279 {
280         down(&ptlrpcd_sem);
281         if (--ptlrpcd_users == 0) {
282                 ptlrpcd_stop(&ptlrpcd_pc);
283                 ptlrpcd_stop(&ptlrpcd_recovery_pc);
284         }
285         up(&ptlrpcd_sem);
286 }