1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001-2003 Cluster File Systems, Inc.
5 * Author Peter Braam <braam@clusterfs.com>
7 * This file is part of the Lustre file system, http://www.lustre.org
8 * Lustre is a trademark of Cluster File Systems, Inc.
10 * You may have signed or agreed to another license before downloading
11 * this software. If so, you are bound by the terms and conditions
12 * of that agreement, and the following does not apply to you. See the
13 * LICENSE file included with this distribution for more information.
15 * If you did not agree to a different license, then this copy of Lustre
16 * is open source software; you can redistribute it and/or modify it
17 * under the terms of version 2 of the GNU General Public License as
18 * published by the Free Software Foundation.
20 * In either case, Lustre is distributed in the hope that it will be
21 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
22 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * license text for more details.
27 #define DEBUG_SUBSYSTEM S_RPC
30 # include <libcfs/libcfs.h>
31 #else /* __KERNEL__ */
32 # include <liblustre.h>
36 #include <libcfs/kp30.h>
37 #include <lustre_net.h>
38 # include <lustre_lib.h>
40 #include <lustre_ha.h>
41 #include <obd_class.h> /* for obd_zombie */
42 #include <obd_support.h> /* for OBD_FAIL_CHECK */
43 #include <lprocfs_status.h>
47 unsigned long pc_flags;
49 struct completion pc_starting;
50 struct completion pc_finishing;
51 struct list_head pc_req_list;
53 struct ptlrpc_request_set *pc_set;
58 void *pc_wait_callback;
59 void *pc_idle_callback;
63 static struct ptlrpcd_ctl ptlrpcd_pc;
64 static struct ptlrpcd_ctl ptlrpcd_recovery_pc;
66 struct semaphore ptlrpcd_sem;
67 static int ptlrpcd_users = 0;
69 void ptlrpcd_wake(struct ptlrpc_request *req)
71 struct ptlrpcd_ctl *pc = req->rq_ptlrpcd_data;
75 cfs_waitq_signal(&pc->pc_waitq);
78 /* requests that are added to the ptlrpcd queue are sent via
79 * ptlrpcd_check->ptlrpc_check_set() */
80 void ptlrpcd_add_req(struct ptlrpc_request *req)
82 struct ptlrpcd_ctl *pc;
84 if (req->rq_send_state == LUSTRE_IMP_FULL)
87 pc = &ptlrpcd_recovery_pc;
89 req->rq_ptlrpcd_data = pc;
90 ptlrpc_set_add_new_req(pc->pc_set, req);
91 wake_up(&pc->pc_waitq);
94 static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
96 struct list_head *tmp, *pos;
97 struct ptlrpc_request *req;
101 if (test_bit(LIOD_STOP, &pc->pc_flags))
104 obd_zombie_impexp_cull();
106 spin_lock(&pc->pc_set->set_new_req_lock);
107 list_for_each_safe(pos, tmp, &pc->pc_set->set_new_requests) {
108 req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
109 list_del_init(&req->rq_set_chain);
110 ptlrpc_set_add_req(pc->pc_set, req);
111 rc = 1; /* need to calculate its timeout */
113 spin_unlock(&pc->pc_set->set_new_req_lock);
115 if (pc->pc_set->set_remaining) {
116 rc = rc | ptlrpc_check_set(pc->pc_set);
118 /* XXX our set never completes, so we prune the completed
119 * reqs after each iteration. boy could this be smarter. */
120 list_for_each_safe(pos, tmp, &pc->pc_set->set_requests) {
121 req = list_entry(pos, struct ptlrpc_request,
123 if (req->rq_phase != RQ_PHASE_COMPLETE)
126 list_del_init(&req->rq_set_chain);
128 ptlrpc_req_finished (req);
133 /* If new requests have been added, make sure to wake up */
134 spin_lock(&pc->pc_set->set_new_req_lock);
135 rc = !list_empty(&pc->pc_set->set_new_requests);
136 spin_unlock(&pc->pc_set->set_new_req_lock);
143 /* ptlrpc's code paths like to execute in process context, so we have this
144 * thread which spins on a set which contains the io rpcs. llite specifies
145 * ptlrpcd's set when it pushes pages down into the oscs */
146 static int ptlrpcd(void *arg)
148 struct ptlrpcd_ctl *pc = arg;
152 if ((rc = cfs_daemonize_ctxt(pc->pc_name))) {
153 complete(&pc->pc_starting);
157 complete(&pc->pc_starting);
159 /* this mainloop strongly resembles ptlrpc_set_wait except
160 * that our set never completes. ptlrpcd_check calls ptlrpc_check_set
161 * when there are requests in the set. new requests come in
162 * on the set's new_req_list and ptlrpcd_check moves them into
165 cfs_waitlink_t set_wait;
166 struct l_wait_info lwi;
167 cfs_duration_t timeout;
169 timeout = cfs_time_seconds(ptlrpc_set_next_timeout(pc->pc_set));
170 lwi = LWI_TIMEOUT(timeout, ptlrpc_expired_set, pc->pc_set);
172 /* ala the pinger, wait on pc's waitqueue and the set's */
173 cfs_waitlink_init(&set_wait);
174 cfs_waitq_add(&pc->pc_set->set_waitq, &set_wait);
175 cfs_waitq_forward(&set_wait, &pc->pc_waitq);
176 l_wait_event(pc->pc_waitq, ptlrpcd_check(pc), &lwi);
177 cfs_waitq_del(&pc->pc_set->set_waitq, &set_wait);
179 if (test_bit(LIOD_STOP, &pc->pc_flags))
182 /* wait for inflight requests to drain */
183 if (!list_empty(&pc->pc_set->set_requests))
184 ptlrpc_set_wait(pc->pc_set);
185 complete(&pc->pc_finishing);
189 static void ptlrpcd_zombie_impexp_notify(void)
191 cfs_waitq_signal(&ptlrpcd_pc.pc_waitq);
195 int ptlrpcd_check_async_rpcs(void *arg)
197 struct ptlrpcd_ctl *pc = arg;
200 /* single threaded!! */
203 if (pc->pc_recurred == 1) {
204 rc = ptlrpcd_check(pc);
206 ptlrpc_expired_set(pc->pc_set);
207 /*XXX send replay requests */
208 if (pc == &ptlrpcd_recovery_pc)
209 rc = ptlrpcd_check(pc);
216 int ptlrpcd_idle(void *arg)
218 struct ptlrpcd_ctl *pc = arg;
220 return (list_empty(&pc->pc_set->set_new_requests) &&
221 pc->pc_set->set_remaining == 0);
226 static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
231 memset(pc, 0, sizeof(*pc));
232 init_completion(&pc->pc_starting);
233 init_completion(&pc->pc_finishing);
234 cfs_waitq_init(&pc->pc_waitq);
236 spin_lock_init(&pc->pc_lock);
237 CFS_INIT_LIST_HEAD(&pc->pc_req_list);
238 snprintf (pc->pc_name, sizeof (pc->pc_name), name);
240 pc->pc_set = ptlrpc_prep_set();
241 if (pc->pc_set == NULL)
245 /* wake ptlrpcd when zombie imports or exports exist */
246 obd_zombie_impexp_notify = ptlrpcd_zombie_impexp_notify;
248 rc = cfs_kernel_thread(ptlrpcd, pc, 0);
250 ptlrpc_set_destroy(pc->pc_set);
254 wait_for_completion(&pc->pc_starting);
256 pc->pc_wait_callback =
257 liblustre_register_wait_callback("ptlrpcd_check_async_rpcs",
258 &ptlrpcd_check_async_rpcs, pc);
259 pc->pc_idle_callback =
260 liblustre_register_idle_callback("ptlrpcd_check_idle_rpcs",
267 static void ptlrpcd_stop(struct ptlrpcd_ctl *pc)
269 set_bit(LIOD_STOP, &pc->pc_flags);
270 cfs_waitq_signal(&pc->pc_waitq);
272 obd_zombie_impexp_notify = NULL;
273 wait_for_completion(&pc->pc_finishing);
275 liblustre_deregister_wait_callback(pc->pc_wait_callback);
276 liblustre_deregister_idle_callback(pc->pc_idle_callback);
278 ptlrpc_set_destroy(pc->pc_set);
281 int ptlrpcd_addref(void)
286 mutex_down(&ptlrpcd_sem);
287 if (++ptlrpcd_users != 1)
290 rc = ptlrpcd_start("ptlrpcd", &ptlrpcd_pc);
296 rc = ptlrpcd_start("ptlrpcd-recov", &ptlrpcd_recovery_pc);
298 ptlrpcd_stop(&ptlrpcd_pc);
303 mutex_up(&ptlrpcd_sem);
307 void ptlrpcd_decref(void)
309 mutex_down(&ptlrpcd_sem);
310 if (--ptlrpcd_users == 0) {
311 ptlrpcd_stop(&ptlrpcd_pc);
312 ptlrpcd_stop(&ptlrpcd_recovery_pc);
314 mutex_up(&ptlrpcd_sem);