Whamcloud - gitweb
- merge with 1_5,some fixes.
[fs/lustre-release.git] / lustre / ptlrpc / ptlrpcd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
5  *   Author Peter Braam <braam@clusterfs.com>
6  *
7  *   This file is part of the Lustre file system, http://www.lustre.org
8  *   Lustre is a trademark of Cluster File Systems, Inc.
9  *
10  *   You may have signed or agreed to another license before downloading
11  *   this software.  If so, you are bound by the terms and conditions
12  *   of that agreement, and the following does not apply to you.  See the
13  *   LICENSE file included with this distribution for more information.
14  *
15  *   If you did not agree to a different license, then this copy of Lustre
16  *   is open source software; you can redistribute it and/or modify it
17  *   under the terms of version 2 of the GNU General Public License as
18  *   published by the Free Software Foundation.
19  *
20  *   In either case, Lustre is distributed in the hope that it will be
21  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
22  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  *   license text for more details.
24  *
25  */
26
27 #define DEBUG_SUBSYSTEM S_RPC
28
29 #ifdef __KERNEL__
30 # include <libcfs/libcfs.h>
31 #else /* __KERNEL__ */
32 # include <liblustre.h>
33 # include <ctype.h>
34 #endif
35
36 #include <libcfs/kp30.h>
37 #include <lustre_net.h>
38 # include <lustre_lib.h>
39
40 #include <lustre_ha.h>
41 #include <obd_support.h> /* for OBD_FAIL_CHECK */
42 #include <lprocfs_status.h>
43
44 #define LIOD_STOP 0
45 struct ptlrpcd_ctl {
46         unsigned long             pc_flags;
47         spinlock_t                pc_lock;
48         struct completion         pc_starting;
49         struct completion         pc_finishing;
50         struct list_head          pc_req_list;
51         cfs_waitq_t               pc_waitq;
52         struct ptlrpc_request_set *pc_set;
53         char                      pc_name[16];
54 #ifndef __KERNEL__
55         int                       pc_recurred;
56         void                     *pc_callback;
57 #endif
58 };
59
60 static struct ptlrpcd_ctl ptlrpcd_pc;
61 static struct ptlrpcd_ctl ptlrpcd_recovery_pc;
62
63 struct semaphore ptlrpcd_sem;
64 static int ptlrpcd_users = 0;
65
66 void ptlrpcd_wake(struct ptlrpc_request *req)
67 {
68         struct ptlrpcd_ctl *pc = req->rq_ptlrpcd_data;
69
70         LASSERT(pc != NULL);
71
72         cfs_waitq_signal(&pc->pc_waitq);
73 }
74
75 /* requests that are added to the ptlrpcd queue are sent via
76  * ptlrpcd_check->ptlrpc_check_set() */
77 void ptlrpcd_add_req(struct ptlrpc_request *req)
78 {
79         struct ptlrpcd_ctl *pc;
80
81         if (req->rq_send_state == LUSTRE_IMP_FULL)
82                 pc = &ptlrpcd_pc;
83         else
84                 pc = &ptlrpcd_recovery_pc;
85
86         req->rq_ptlrpcd_data = pc;
87         ptlrpc_set_add_new_req(pc->pc_set, req);
88         wake_up(&pc->pc_waitq);
89 }
90
91 static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
92 {
93         struct list_head *tmp, *pos;
94         struct ptlrpc_request *req;
95         int rc = 0;
96         ENTRY;
97
98         if (test_bit(LIOD_STOP, &pc->pc_flags))
99                 RETURN(1);
100
101         spin_lock(&pc->pc_set->set_new_req_lock);
102         list_for_each_safe(pos, tmp, &pc->pc_set->set_new_requests) {
103                 req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
104                 list_del_init(&req->rq_set_chain);
105                 ptlrpc_set_add_req(pc->pc_set, req);
106                 rc = 1; /* need to calculate its timeout */
107         }
108         spin_unlock(&pc->pc_set->set_new_req_lock);
109
110         if (pc->pc_set->set_remaining) {
111                 rc = rc | ptlrpc_check_set(pc->pc_set);
112
113                 /* XXX our set never completes, so we prune the completed
114                  * reqs after each iteration. boy could this be smarter. */
115                 list_for_each_safe(pos, tmp, &pc->pc_set->set_requests) {
116                         req = list_entry(pos, struct ptlrpc_request,
117                                          rq_set_chain);
118                         if (req->rq_phase != RQ_PHASE_COMPLETE)
119                                 continue;
120
121                         list_del_init(&req->rq_set_chain);
122                         req->rq_set = NULL;
123                         ptlrpc_req_finished (req);
124                 }
125         }
126
127         if (rc == 0) {
128                 /* If new requests have been added, make sure to wake up */
129                 spin_lock(&pc->pc_set->set_new_req_lock);
130                 rc = !list_empty(&pc->pc_set->set_new_requests);
131                 spin_unlock(&pc->pc_set->set_new_req_lock);
132         }
133
134         RETURN(rc);
135 }
136
137 #ifdef __KERNEL__
138 /* ptlrpc's code paths like to execute in process context, so we have this
139  * thread which spins on a set which contains the io rpcs.  llite specifies
140  * ptlrpcd's set when it pushes pages down into the oscs */
141 static int ptlrpcd(void *arg)
142 {
143         struct ptlrpcd_ctl *pc = arg;
144         ENTRY;
145
146         cfs_daemonize(pc->pc_name);
147
148         complete(&pc->pc_starting);
149
150         /* this mainloop strongly resembles ptlrpc_set_wait except
151          * that our set never completes.  ptlrpcd_check calls ptlrpc_check_set
152          * when there are requests in the set.  new requests come in
153          * on the set's new_req_list and ptlrpcd_check moves them into
154          * the set. */
155         while (1) {
156                 cfs_waitlink_t set_wait;
157                 struct l_wait_info lwi;
158                 cfs_duration_t timeout;
159
160                 timeout = cfs_time_seconds(ptlrpc_set_next_timeout(pc->pc_set));
161                 lwi = LWI_TIMEOUT(timeout, ptlrpc_expired_set, pc->pc_set);
162
163                 /* ala the pinger, wait on pc's waitqueue and the set's */
164                 cfs_waitlink_init(&set_wait);
165                 cfs_waitq_add(&pc->pc_set->set_waitq, &set_wait);
166                 cfs_waitq_forward(&set_wait, &pc->pc_waitq);
167                 l_wait_event(pc->pc_waitq, ptlrpcd_check(pc), &lwi);
168                 cfs_waitq_del(&pc->pc_set->set_waitq, &set_wait);
169
170                 if (test_bit(LIOD_STOP, &pc->pc_flags))
171                         break;
172         }
173         /* wait for inflight requests to drain */
174         if (!list_empty(&pc->pc_set->set_requests))
175                 ptlrpc_set_wait(pc->pc_set);
176         complete(&pc->pc_finishing);
177         return 0;
178 }
179 #else
180
181 int ptlrpcd_check_async_rpcs(void *arg)
182 {
183         struct ptlrpcd_ctl *pc = arg;
184         int                  rc = 0;
185
186         /* single threaded!! */
187         pc->pc_recurred++;
188
189         if (pc->pc_recurred == 1) {
190                 rc = ptlrpcd_check(pc);
191                 if (!rc)
192                         ptlrpc_expired_set(pc->pc_set);
193         }
194
195         pc->pc_recurred--;
196         return rc;
197 }
198 #endif
199
200 static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
201 {
202         int rc;
203
204         ENTRY;
205         memset(pc, 0, sizeof(*pc));
206         init_completion(&pc->pc_starting);
207         init_completion(&pc->pc_finishing);
208         cfs_waitq_init(&pc->pc_waitq);
209         pc->pc_flags = 0;
210         spin_lock_init(&pc->pc_lock);
211         CFS_INIT_LIST_HEAD(&pc->pc_req_list);
212         snprintf (pc->pc_name, sizeof (pc->pc_name), name);
213
214         pc->pc_set = ptlrpc_prep_set();
215         if (pc->pc_set == NULL)
216                 RETURN(-ENOMEM);
217
218 #ifdef __KERNEL__
219         rc = cfs_kernel_thread(ptlrpcd, pc, 0);
220         if (rc < 0)  {
221                 ptlrpc_set_destroy(pc->pc_set);
222                 RETURN(rc);
223         }
224
225         wait_for_completion(&pc->pc_starting);
226 #else
227         pc->pc_callback =
228                 liblustre_register_wait_callback(&ptlrpcd_check_async_rpcs, pc);
229         (void)rc;
230 #endif
231         RETURN(0);
232 }
233
234 static void ptlrpcd_stop(struct ptlrpcd_ctl *pc)
235 {
236         set_bit(LIOD_STOP, &pc->pc_flags);
237         cfs_waitq_signal(&pc->pc_waitq);
238 #ifdef __KERNEL__
239         wait_for_completion(&pc->pc_finishing);
240 #else
241         liblustre_deregister_wait_callback(pc->pc_callback);
242 #endif
243         ptlrpc_set_destroy(pc->pc_set);
244 }
245
246 int ptlrpcd_addref(void)
247 {
248         int rc = 0;
249         ENTRY;
250
251         mutex_down(&ptlrpcd_sem);
252         if (++ptlrpcd_users != 1)
253                 GOTO(out, rc);
254
255         rc = ptlrpcd_start("ptlrpcd", &ptlrpcd_pc);
256         if (rc) {
257                 --ptlrpcd_users;
258                 GOTO(out, rc);
259         }
260
261         rc = ptlrpcd_start("ptlrpcd-recov", &ptlrpcd_recovery_pc);
262         if (rc) {
263                 ptlrpcd_stop(&ptlrpcd_pc);
264                 --ptlrpcd_users;
265                 GOTO(out, rc);
266         }
267 out:
268         mutex_up(&ptlrpcd_sem);
269         RETURN(rc);
270 }
271
272 void ptlrpcd_decref(void)
273 {
274         mutex_down(&ptlrpcd_sem);
275         if (--ptlrpcd_users == 0) {
276                 ptlrpcd_stop(&ptlrpcd_pc);
277                 ptlrpcd_stop(&ptlrpcd_recovery_pc);
278         }
279         mutex_up(&ptlrpcd_sem);
280 }