Whamcloud - gitweb
LU-3544 nfs: writing to new files will return ENOENT
[fs/lustre-release.git] / libcfs / libcfs / workitem.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/libcfs/workitem.c
37  *
38  * Author: Isaac Huang <isaac@clusterfs.com>
39  *         Liang Zhen  <zhen.liang@sun.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_LNET
43
44 #include <libcfs/libcfs.h>
45
46 #define CFS_WS_NAME_LEN         16
47
48 typedef struct cfs_wi_sched {
49         cfs_list_t              ws_list;        /* chain on global list */
50 #ifdef __KERNEL__
51         /** serialised workitems */
52         spinlock_t              ws_lock;
53         /** where schedulers sleep */
54         cfs_waitq_t             ws_waitq;
55 #endif
56         /** concurrent workitems */
57         cfs_list_t              ws_runq;
58         /** rescheduled running-workitems, a workitem can be rescheduled
59          * while running in wi_action(), but we don't to execute it again
60          * unless it returns from wi_action(), so we put it on ws_rerunq
61          * while rescheduling, and move it to runq after it returns
62          * from wi_action() */
63         cfs_list_t              ws_rerunq;
64         /** CPT-table for this scheduler */
65         struct cfs_cpt_table    *ws_cptab;
66         /** CPT id for affinity */
67         int                     ws_cpt;
68         /** number of scheduled workitems */
69         int                     ws_nscheduled;
70         /** started scheduler thread, protected by cfs_wi_data::wi_glock */
71         unsigned int            ws_nthreads:30;
72         /** shutting down, protected by cfs_wi_data::wi_glock */
73         unsigned int            ws_stopping:1;
74         /** serialize starting thread, protected by cfs_wi_data::wi_glock */
75         unsigned int            ws_starting:1;
76         /** scheduler name */
77         char                    ws_name[CFS_WS_NAME_LEN];
78 } cfs_wi_sched_t;
79
80 struct cfs_workitem_data {
81         /** serialize */
82         spinlock_t              wi_glock;
83         /** list of all schedulers */
84         cfs_list_t              wi_scheds;
85         /** WI module is initialized */
86         int                     wi_init;
87         /** shutting down the whole WI module */
88         int                     wi_stopping;
89 } cfs_wi_data;
90
91 #ifdef __KERNEL__
92 static inline void
93 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
94 {
95         spin_lock(&sched->ws_lock);
96 }
97
98 static inline void
99 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
100 {
101         spin_unlock(&sched->ws_lock);
102 }
103
104 static inline int
105 cfs_wi_sched_cansleep(cfs_wi_sched_t *sched)
106 {
107         cfs_wi_sched_lock(sched);
108         if (sched->ws_stopping) {
109                 cfs_wi_sched_unlock(sched);
110                 return 0;
111         }
112
113         if (!cfs_list_empty(&sched->ws_runq)) {
114                 cfs_wi_sched_unlock(sched);
115                 return 0;
116         }
117         cfs_wi_sched_unlock(sched);
118         return 1;
119 }
120
121 #else /* !__KERNEL__ */
122
123 static inline void
124 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
125 {
126         spin_lock(&cfs_wi_data.wi_glock);
127 }
128
129 static inline void
130 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
131 {
132         spin_unlock(&cfs_wi_data.wi_glock);
133 }
134
135 #endif /* __KERNEL__ */
136
137 /* XXX:
138  * 0. it only works when called from wi->wi_action.
139  * 1. when it returns no one shall try to schedule the workitem.
140  */
141 void
142 cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
143 {
144         LASSERT(!cfs_in_interrupt()); /* because we use plain spinlock */
145         LASSERT(!sched->ws_stopping);
146
147         cfs_wi_sched_lock(sched);
148
149 #ifdef __KERNEL__
150         LASSERT(wi->wi_running);
151 #endif
152         if (wi->wi_scheduled) { /* cancel pending schedules */
153                 LASSERT(!cfs_list_empty(&wi->wi_list));
154                 cfs_list_del_init(&wi->wi_list);
155
156                 LASSERT(sched->ws_nscheduled > 0);
157                 sched->ws_nscheduled--;
158         }
159
160         LASSERT(cfs_list_empty(&wi->wi_list));
161
162         wi->wi_scheduled = 1; /* LBUG future schedule attempts */
163         cfs_wi_sched_unlock(sched);
164
165         return;
166 }
167 EXPORT_SYMBOL(cfs_wi_exit);
168
169 /**
170  * cancel schedule request of workitem \a wi
171  */
172 int
173 cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
174 {
175         int     rc;
176
177         LASSERT(!cfs_in_interrupt()); /* because we use plain spinlock */
178         LASSERT(!sched->ws_stopping);
179
180         /*
181          * return 0 if it's running already, otherwise return 1, which
182          * means the workitem will not be scheduled and will not have
183          * any race with wi_action.
184          */
185         cfs_wi_sched_lock(sched);
186
187         rc = !(wi->wi_running);
188
189         if (wi->wi_scheduled) { /* cancel pending schedules */
190                 LASSERT(!cfs_list_empty(&wi->wi_list));
191                 cfs_list_del_init(&wi->wi_list);
192
193                 LASSERT(sched->ws_nscheduled > 0);
194                 sched->ws_nscheduled--;
195
196                 wi->wi_scheduled = 0;
197         }
198
199         LASSERT (cfs_list_empty(&wi->wi_list));
200
201         cfs_wi_sched_unlock(sched);
202         return rc;
203 }
204 EXPORT_SYMBOL(cfs_wi_deschedule);
205
206 /*
207  * Workitem scheduled with (serial == 1) is strictly serialised not only with
208  * itself, but also with others scheduled this way.
209  *
210  * Now there's only one static serialised queue, but in the future more might
211  * be added, and even dynamic creation of serialised queues might be supported.
212  */
213 void
214 cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
215 {
216         LASSERT(!cfs_in_interrupt()); /* because we use plain spinlock */
217         LASSERT(!sched->ws_stopping);
218
219         cfs_wi_sched_lock(sched);
220
221         if (!wi->wi_scheduled) {
222                 LASSERT (cfs_list_empty(&wi->wi_list));
223
224                 wi->wi_scheduled = 1;
225                 sched->ws_nscheduled++;
226                 if (!wi->wi_running) {
227                         cfs_list_add_tail(&wi->wi_list, &sched->ws_runq);
228 #ifdef __KERNEL__
229                         cfs_waitq_signal(&sched->ws_waitq);
230 #endif
231                 } else {
232                         cfs_list_add(&wi->wi_list, &sched->ws_rerunq);
233                 }
234         }
235
236         LASSERT (!cfs_list_empty(&wi->wi_list));
237         cfs_wi_sched_unlock(sched);
238         return;
239 }
240 EXPORT_SYMBOL(cfs_wi_schedule);
241
242 #ifdef __KERNEL__
243
244 static int
245 cfs_wi_scheduler (void *arg)
246 {
247         struct cfs_wi_sched     *sched = (cfs_wi_sched_t *)arg;
248
249         cfs_block_allsigs();
250
251         /* CPT affinity scheduler? */
252         if (sched->ws_cptab != NULL)
253                 cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt);
254
255         spin_lock(&cfs_wi_data.wi_glock);
256
257         LASSERT(sched->ws_starting == 1);
258         sched->ws_starting--;
259         sched->ws_nthreads++;
260
261         spin_unlock(&cfs_wi_data.wi_glock);
262
263         cfs_wi_sched_lock(sched);
264
265         while (!sched->ws_stopping) {
266                 int             nloops = 0;
267                 int             rc;
268                 cfs_workitem_t *wi;
269
270                 while (!cfs_list_empty(&sched->ws_runq) &&
271                        nloops < CFS_WI_RESCHED) {
272                         wi = cfs_list_entry(sched->ws_runq.next,
273                                             cfs_workitem_t, wi_list);
274                         LASSERT(wi->wi_scheduled && !wi->wi_running);
275
276                         cfs_list_del_init(&wi->wi_list);
277
278                         LASSERT(sched->ws_nscheduled > 0);
279                         sched->ws_nscheduled--;
280
281                         wi->wi_running   = 1;
282                         wi->wi_scheduled = 0;
283
284
285                         cfs_wi_sched_unlock(sched);
286                         nloops++;
287
288                         rc = (*wi->wi_action) (wi);
289
290                         cfs_wi_sched_lock(sched);
291                         if (rc != 0) /* WI should be dead, even be freed! */
292                                 continue;
293
294                         wi->wi_running = 0;
295                         if (cfs_list_empty(&wi->wi_list))
296                                 continue;
297
298                         LASSERT(wi->wi_scheduled);
299                         /* wi is rescheduled, should be on rerunq now, we
300                          * move it to runq so it can run action now */
301                         cfs_list_move_tail(&wi->wi_list, &sched->ws_runq);
302                 }
303
304                 if (!cfs_list_empty(&sched->ws_runq)) {
305                         cfs_wi_sched_unlock(sched);
306                         /* don't sleep because some workitems still
307                          * expect me to come back soon */
308                         cfs_cond_resched();
309                         cfs_wi_sched_lock(sched);
310                         continue;
311                 }
312
313                 cfs_wi_sched_unlock(sched);
314                 cfs_wait_event_interruptible_exclusive(sched->ws_waitq,
315                                 !cfs_wi_sched_cansleep(sched), rc);
316                 cfs_wi_sched_lock(sched);
317         }
318
319         cfs_wi_sched_unlock(sched);
320
321         spin_lock(&cfs_wi_data.wi_glock);
322         sched->ws_nthreads--;
323         spin_unlock(&cfs_wi_data.wi_glock);
324
325         return 0;
326 }
327
328 #else /* __KERNEL__ */
329
330 int
331 cfs_wi_check_events (void)
332 {
333         int               n = 0;
334         cfs_workitem_t   *wi;
335
336         spin_lock(&cfs_wi_data.wi_glock);
337
338         for (;;) {
339                 struct cfs_wi_sched     *sched = NULL;
340                 struct cfs_wi_sched     *tmp;
341
342                 /** rerunq is always empty for userspace */
343                 cfs_list_for_each_entry(tmp,
344                                         &cfs_wi_data.wi_scheds, ws_list) {
345                         if (!cfs_list_empty(&tmp->ws_runq)) {
346                                 sched = tmp;
347                                 break;
348                         }
349                 }
350
351                 if (sched == NULL)
352                         break;
353
354                 wi = cfs_list_entry(sched->ws_runq.next,
355                                     cfs_workitem_t, wi_list);
356                 cfs_list_del_init(&wi->wi_list);
357
358                 LASSERT(sched->ws_nscheduled > 0);
359                 sched->ws_nscheduled--;
360
361                 LASSERT(wi->wi_scheduled);
362                 wi->wi_scheduled = 0;
363                 spin_unlock(&cfs_wi_data.wi_glock);
364
365                 n++;
366                 (*wi->wi_action) (wi);
367
368                 spin_lock(&cfs_wi_data.wi_glock);
369         }
370
371         spin_unlock(&cfs_wi_data.wi_glock);
372         return n;
373 }
374
375 #endif
376
377 void
378 cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
379 {
380         int     i;
381
382         LASSERT(cfs_wi_data.wi_init);
383         LASSERT(!cfs_wi_data.wi_stopping);
384
385         spin_lock(&cfs_wi_data.wi_glock);
386         if (sched->ws_stopping) {
387                 CDEBUG(D_INFO, "%s is in progress of stopping\n",
388                        sched->ws_name);
389                 spin_unlock(&cfs_wi_data.wi_glock);
390                 return;
391         }
392
393         LASSERT(!cfs_list_empty(&sched->ws_list));
394         sched->ws_stopping = 1;
395
396         spin_unlock(&cfs_wi_data.wi_glock);
397
398         i = 2;
399 #ifdef __KERNEL__
400         cfs_waitq_broadcast(&sched->ws_waitq);
401
402         spin_lock(&cfs_wi_data.wi_glock);
403         while (sched->ws_nthreads > 0) {
404                 CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
405                        "waiting for %d threads of WI sched[%s] to terminate\n",
406                        sched->ws_nthreads, sched->ws_name);
407
408                 spin_unlock(&cfs_wi_data.wi_glock);
409                 cfs_pause(cfs_time_seconds(1) / 20);
410                 spin_lock(&cfs_wi_data.wi_glock);
411         }
412
413         cfs_list_del(&sched->ws_list);
414
415         spin_unlock(&cfs_wi_data.wi_glock);
416 #else
417         SET_BUT_UNUSED(i);
418 #endif
419         LASSERT(sched->ws_nscheduled == 0);
420
421         LIBCFS_FREE(sched, sizeof(*sched));
422 }
423 EXPORT_SYMBOL(cfs_wi_sched_destroy);
424
425 int
426 cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
427                     int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
428 {
429         struct cfs_wi_sched     *sched;
430         int                     rc;
431
432         LASSERT(cfs_wi_data.wi_init);
433         LASSERT(!cfs_wi_data.wi_stopping);
434         LASSERT(cptab == NULL || cpt == CFS_CPT_ANY ||
435                 (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
436
437         LIBCFS_ALLOC(sched, sizeof(*sched));
438         if (sched == NULL)
439                 return -ENOMEM;
440
441         strncpy(sched->ws_name, name, CFS_WS_NAME_LEN);
442         sched->ws_cptab = cptab;
443         sched->ws_cpt = cpt;
444
445 #ifdef __KERNEL__
446         spin_lock_init(&sched->ws_lock);
447         cfs_waitq_init(&sched->ws_waitq);
448 #endif
449         CFS_INIT_LIST_HEAD(&sched->ws_runq);
450         CFS_INIT_LIST_HEAD(&sched->ws_rerunq);
451         CFS_INIT_LIST_HEAD(&sched->ws_list);
452
453         rc = 0;
454 #ifdef __KERNEL__
455         while (nthrs > 0)  {
456                 char    name[16];
457                 cfs_task_t      *task;
458                 spin_lock(&cfs_wi_data.wi_glock);
459                 while (sched->ws_starting > 0) {
460                         spin_unlock(&cfs_wi_data.wi_glock);
461                         cfs_schedule();
462                         spin_lock(&cfs_wi_data.wi_glock);
463                 }
464
465                 sched->ws_starting++;
466                 spin_unlock(&cfs_wi_data.wi_glock);
467
468                 if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) {
469                         snprintf(name, sizeof(name), "%s_%02d_%02d",
470                                  sched->ws_name, sched->ws_cpt,
471                                  sched->ws_nthreads);
472                 } else {
473                         snprintf(name, sizeof(name), "%s_%02d",
474                                  sched->ws_name, sched->ws_nthreads);
475                 }
476
477                 task = kthread_run(cfs_wi_scheduler, sched, name);
478                 if (!IS_ERR(task)) {
479                         nthrs--;
480                         continue;
481                 }
482                 rc = PTR_ERR(task);
483
484                 CERROR("Failed to create thread for WI scheduler %s: %d\n",
485                        name, rc);
486
487                 spin_lock(&cfs_wi_data.wi_glock);
488
489                 /* make up for cfs_wi_sched_destroy */
490                 cfs_list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
491                 sched->ws_starting--;
492
493                 spin_unlock(&cfs_wi_data.wi_glock);
494
495                 cfs_wi_sched_destroy(sched);
496                 return rc;
497         }
498 #else
499         SET_BUT_UNUSED(rc);
500 #endif
501         spin_lock(&cfs_wi_data.wi_glock);
502         cfs_list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
503         spin_unlock(&cfs_wi_data.wi_glock);
504
505         *sched_pp = sched;
506         return 0;
507 }
508 EXPORT_SYMBOL(cfs_wi_sched_create);
509
510 int
511 cfs_wi_startup(void)
512 {
513         memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
514
515         spin_lock_init(&cfs_wi_data.wi_glock);
516         CFS_INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
517         cfs_wi_data.wi_init = 1;
518
519         return 0;
520 }
521
522 void
523 cfs_wi_shutdown (void)
524 {
525         struct cfs_wi_sched     *sched;
526
527         spin_lock(&cfs_wi_data.wi_glock);
528         cfs_wi_data.wi_stopping = 1;
529         spin_unlock(&cfs_wi_data.wi_glock);
530
531 #ifdef __KERNEL__
532         /* nobody should contend on this list */
533         cfs_list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
534                 sched->ws_stopping = 1;
535                 cfs_waitq_broadcast(&sched->ws_waitq);
536         }
537
538         cfs_list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
539                 spin_lock(&cfs_wi_data.wi_glock);
540
541                 while (sched->ws_nthreads != 0) {
542                         spin_unlock(&cfs_wi_data.wi_glock);
543                         cfs_pause(cfs_time_seconds(1) / 20);
544                         spin_lock(&cfs_wi_data.wi_glock);
545                 }
546                 spin_unlock(&cfs_wi_data.wi_glock);
547         }
548 #endif
549         while (!cfs_list_empty(&cfs_wi_data.wi_scheds)) {
550                 sched = cfs_list_entry(cfs_wi_data.wi_scheds.next,
551                                        struct cfs_wi_sched, ws_list);
552                 cfs_list_del(&sched->ws_list);
553                 LIBCFS_FREE(sched, sizeof(*sched));
554         }
555
556         cfs_wi_data.wi_stopping = 0;
557         cfs_wi_data.wi_init = 0;
558 }