Whamcloud - gitweb
landing b_cmobd_merge on HEAD
[fs/lustre-release.git] / lustre / ptlrpc / ptlrpcd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
5  *   Author Peter Braam <braam@clusterfs.com>
6  *
7  *   This file is part of Lustre, http://www.lustre.org.
8  *
9  *   Lustre is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Lustre is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Lustre; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  *
22  */
23
24 #define DEBUG_SUBSYSTEM S_RPC
25
26 #ifdef __KERNEL__
27 # include <linux/version.h>
28 # include <linux/module.h>
29 # include <linux/mm.h>
30 # include <linux/highmem.h>
31 # include <linux/lustre_dlm.h>
32 # if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
33 #  include <linux/workqueue.h>
34 #  include <linux/smp_lock.h>
35 # else
36 #  include <linux/locks.h>
37 # endif
38 #else /* __KERNEL__ */
39 # include <liblustre.h>
40 #endif
41
42 #include <linux/kp30.h>
43 #include <linux/lustre_net.h>
44
45 #ifndef  __CYGWIN__
46 # include <linux/init.h>
47 #else
48 # include <ctype.h>
49 #endif
50
51 #include <linux/lustre_ha.h>
52 #include <linux/obd_support.h> /* for OBD_FAIL_CHECK */
53 #include <linux/lprocfs_status.h>
54
55 #define LIOD_STOP 0
56 struct ptlrpcd_ctl {
57         unsigned long             pc_flags;
58         spinlock_t                pc_lock;
59         struct completion         pc_starting;
60         struct completion         pc_finishing;
61         struct list_head          pc_req_list;
62         wait_queue_head_t         pc_waitq;
63         struct ptlrpc_request_set *pc_set;
64 #ifndef __KERNEL__
65         int                       pc_recurred;
66         void                     *pc_callback;
67 #endif
68 };
69
70 static struct ptlrpcd_ctl ptlrpcd_pc;
71 static struct ptlrpcd_ctl ptlrpcd_recovery_pc;
72
73 static DECLARE_MUTEX(ptlrpcd_sem);
74 static int ptlrpcd_users = 0;
75
76 void ptlrpcd_wake(struct ptlrpc_request *req)
77 {
78         struct ptlrpcd_ctl *pc = req->rq_ptlrpcd_data;
79
80         LASSERT(pc != NULL);
81
82         wake_up(&pc->pc_waitq);
83 }
84
85 void ptlrpcd_add_req(struct ptlrpc_request *req)
86 {
87         struct ptlrpcd_ctl *pc;
88
89         if (req->rq_send_state == LUSTRE_IMP_FULL)
90                 pc = &ptlrpcd_pc;
91         else 
92                 pc = &ptlrpcd_recovery_pc;
93
94         ptlrpc_set_add_new_req(pc->pc_set, req);
95         req->rq_ptlrpcd_data = pc;
96                 
97         ptlrpcd_wake(req);
98 }
99
100 static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
101 {
102         struct list_head *tmp, *pos;
103         struct ptlrpc_request *req;
104         unsigned long flags;
105         int rc = 0;
106         ENTRY;
107
108         if (test_bit(LIOD_STOP, &pc->pc_flags))
109                 RETURN(1);
110
111         spin_lock_irqsave(&pc->pc_set->set_new_req_lock, flags);
112         list_for_each_safe(pos, tmp, &pc->pc_set->set_new_requests) {
113                 req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
114                 list_del_init(&req->rq_set_chain);
115                 ptlrpc_set_add_req(pc->pc_set, req);
116                 rc = 1; /* need to calculate its timeout */
117         }
118         spin_unlock_irqrestore(&pc->pc_set->set_new_req_lock, flags);
119
120         if (pc->pc_set->set_remaining) {
121                 rc = rc | ptlrpc_check_set(pc->pc_set);
122
123                 /* XXX our set never completes, so we prune the completed
124                  * reqs after each iteration. boy could this be smarter. */
125                 list_for_each_safe(pos, tmp, &pc->pc_set->set_requests) {
126                         req = list_entry(pos, struct ptlrpc_request,
127                                          rq_set_chain);
128                         if (req->rq_phase != RQ_PHASE_COMPLETE)
129                                 continue;
130
131                         list_del_init(&req->rq_set_chain);
132                         req->rq_set = NULL;
133                         ptlrpc_req_finished (req);
134                 }
135         }
136
137         RETURN(rc);
138 }
139
140 #ifdef __KERNEL__
141 /* ptlrpc's code paths like to execute in process context, so we have this
142  * thread which spins on a set which contains the io rpcs.  llite specifies
143  * ptlrpcd's set when it pushes pages down into the oscs */
144 static int ptlrpcd(void *arg)
145 {
146         struct ptlrpcd_ctl *pc = arg;
147         unsigned long flags;
148         ENTRY;
149
150         kportal_daemonize("ptlrpcd");
151
152         SIGNAL_MASK_LOCK(current, flags);
153         sigfillset(&current->blocked);
154         RECALC_SIGPENDING;
155         SIGNAL_MASK_UNLOCK(current, flags);
156
157         complete(&pc->pc_starting);
158
159         /* this mainloop strongly resembles ptlrpc_set_wait except
160          * that our set never completes.  ptlrpcd_check calls ptlrpc_check_set
161          * when there are requests in the set.  new requests come in
162          * on the set's new_req_list and ptlrpcd_check moves them into
163          * the set. */
164         while (1) {
165                 wait_queue_t set_wait;
166                 struct l_wait_info lwi;
167                 int timeout;
168
169                 timeout = ptlrpc_set_next_timeout(pc->pc_set) * HZ;
170                 lwi = LWI_TIMEOUT(timeout, ptlrpc_expired_set, pc->pc_set);
171
172                 /* ala the pinger, wait on pc's waitqueue and the set's */
173                 init_waitqueue_entry(&set_wait, current);
174                 add_wait_queue(&pc->pc_set->set_waitq, &set_wait);
175                 l_wait_event(pc->pc_waitq, ptlrpcd_check(pc), &lwi);
176                 remove_wait_queue(&pc->pc_set->set_waitq, &set_wait);
177
178                 if (test_bit(LIOD_STOP, &pc->pc_flags))
179                         break;
180         }
181         /* wait for inflight requests to drain */
182         if (!list_empty(&pc->pc_set->set_requests))
183                 ptlrpc_set_wait(pc->pc_set);
184         complete(&pc->pc_finishing);
185         return 0;
186 }
187 #else
188
189 int ptlrpcd_check_async_rpcs(void *arg)
190 {
191         struct ptlrpcd_ctl *pc = arg;
192         int                  rc = 0;
193
194         /* single threaded!! */
195         pc->pc_recurred++;
196
197         if (pc->pc_recurred == 1)
198                 rc = ptlrpcd_check(pc);
199
200         pc->pc_recurred--;
201         return rc;
202 }
203 #endif
204
205 static int ptlrpcd_start(struct ptlrpcd_ctl *pc)
206 {
207         int rc = 0;
208
209         memset(pc, 0, sizeof(*pc));
210         init_completion(&pc->pc_starting);
211         init_completion(&pc->pc_finishing);
212         init_waitqueue_head(&pc->pc_waitq);
213         pc->pc_flags = 0;
214         spin_lock_init(&pc->pc_lock);
215         INIT_LIST_HEAD(&pc->pc_req_list);
216
217         pc->pc_set = ptlrpc_prep_set();
218         if (pc->pc_set == NULL)
219                 GOTO(out, rc = -ENOMEM);
220
221 #ifdef __KERNEL__
222         if (kernel_thread(ptlrpcd, pc, 0) < 0)  {
223                 ptlrpc_set_destroy(pc->pc_set);
224                 GOTO(out, rc = -ECHILD);
225         }
226
227         wait_for_completion(&pc->pc_starting);
228 #else
229         pc->pc_callback =
230                 liblustre_register_wait_callback(&ptlrpcd_check_async_rpcs, pc);
231 #endif
232 out:
233         RETURN(rc);
234 }
235
236 static void ptlrpcd_stop(struct ptlrpcd_ctl *pc)
237 {
238         set_bit(LIOD_STOP, &pc->pc_flags);
239         wake_up(&pc->pc_waitq);
240 #ifdef __KERNEL__
241         wait_for_completion(&pc->pc_finishing);
242 #else
243         liblustre_deregister_wait_callback(pc->pc_callback);
244 #endif
245         ptlrpc_set_destroy(pc->pc_set);
246 }
247
248 int ptlrpcd_addref(void)
249 {
250         int rc = 0;
251         ENTRY;
252
253         down(&ptlrpcd_sem);
254         if (++ptlrpcd_users != 1)
255                 GOTO(out, rc);
256
257         rc = ptlrpcd_start(&ptlrpcd_pc);
258         if (rc) {
259                 --ptlrpcd_users;
260                 GOTO(out, rc);
261         }
262
263         rc = ptlrpcd_start(&ptlrpcd_recovery_pc);
264         if (rc) {
265                 ptlrpcd_stop(&ptlrpcd_pc);
266                 --ptlrpcd_users;
267                 GOTO(out, rc);
268         }
269 out:
270         up(&ptlrpcd_sem);
271         RETURN(rc);
272 }
273
274 void ptlrpcd_decref(void)
275 {
276         down(&ptlrpcd_sem);
277         if (--ptlrpcd_users == 0) {
278                 ptlrpcd_stop(&ptlrpcd_pc);
279                 ptlrpcd_stop(&ptlrpcd_recovery_pc);
280         }
281         up(&ptlrpcd_sem);
282 }