libcfs/libcfs/workitem.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  * GPL HEADER START
   5  *
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License version 2 only,
  10  * as published by the Free Software Foundation.
  11  *
  12  * This program is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * General Public License version 2 for more details (a copy is included
  16  * in the LICENSE file that accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * version 2 along with this program; If not, see
  20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  21  *
  22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  23  * CA 95054 USA or visit www.sun.com if you need additional information or
  24  * have any questions.
  25  *
  26  * GPL HEADER END
  27  */
  28 /*
  29  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  30  * Use is subject to license terms.
  31  */
  32 /*
  33  * This file is part of Lustre, http://www.lustre.org/
  34  * Lustre is a trademark of Sun Microsystems, Inc.
  35  *
  36  * libcfs/libcfs/workitem.c
  37  *
  38  * Author: Isaac Huang <isaac@clusterfs.com>
  39  *         Liang Zhen  <zhen.liang@sun.com>
  40  */
  41
  42 #define DEBUG_SUBSYSTEM S_LNET
  43
  44 #include <libcfs/libcfs.h>
  45
  46 typedef struct cfs_wi_sched {
  47 #ifdef __KERNEL__
  48         /** serialised workitems */
  49         cfs_spinlock_t  ws_lock;
  50         /** where schedulers sleep */
  51         cfs_waitq_t     ws_waitq;
  52 #endif
  53         /** concurrent workitems */
  54         cfs_list_t      ws_runq;
  55         /** rescheduled running-workitems */
  56         cfs_list_t      ws_rerunq;
  57         /** shutting down */
  58         int             ws_shuttingdown;
  59 } cfs_wi_sched_t;
  60
  61 #ifdef __KERNEL__
  62 /**
  63  * we have 2 cfs_wi_sched_t so far:
  64  * one for CFS_WI_SCHED_ANY, another for CFS_WI_SCHED_SERIAL
  65  * per-cpu implementation will be added for SMP scalability
  66  */
  67
  68 #define CFS_WI_NSCHED   2
  69 #else
  70 /** always 2 for userspace */
  71 #define CFS_WI_NSCHED   2
  72 #endif /* __KERNEL__ */
  73
  74 struct cfs_workitem_data {
  75         /** serialize */
  76         cfs_spinlock_t  wi_glock;
  77         /** number of cfs_wi_sched_t */
  78         int             wi_nsched;
  79         /** number of threads (all schedulers) */
  80         int             wi_nthreads;
  81         /** default scheduler */
  82         cfs_wi_sched_t *wi_scheds;
  83 } cfs_wi_data;
  84
  85 static inline cfs_wi_sched_t *
  86 cfs_wi_to_sched(cfs_workitem_t *wi)
  87 {
  88         LASSERT(wi->wi_sched_id == CFS_WI_SCHED_ANY ||
  89                 wi->wi_sched_id == CFS_WI_SCHED_SERIAL ||
  90                 (wi->wi_sched_id >= 0 &&
  91                  wi->wi_sched_id < cfs_wi_data.wi_nsched));
  92
  93         if (wi->wi_sched_id == CFS_WI_SCHED_ANY)
  94                 return &cfs_wi_data.wi_scheds[0];
  95         if (wi->wi_sched_id == CFS_WI_SCHED_SERIAL)
  96                 return &cfs_wi_data.wi_scheds[cfs_wi_data.wi_nsched - 1];
  97
  98         return &cfs_wi_data.wi_scheds[wi->wi_sched_id];
  99 }
 100
 101 #ifdef __KERNEL__
 102 static inline void
 103 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
 104 {
 105         cfs_spin_lock(&sched->ws_lock);
 106 }
 107
 108 static inline void
 109 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
 110 {
 111         cfs_spin_unlock(&sched->ws_lock);
 112 }
 113
 114 static inline int
 115 cfs_wi_sched_cansleep(cfs_wi_sched_t *sched)
 116 {
 117         cfs_wi_sched_lock(sched);
 118         if (sched->ws_shuttingdown) {
 119                 cfs_wi_sched_unlock(sched);
 120                 return 0;
 121         }
 122
 123         if (!cfs_list_empty(&sched->ws_runq)) {
 124                 cfs_wi_sched_unlock(sched);
 125                 return 0;
 126         }
 127         cfs_wi_sched_unlock(sched);
 128         return 1;
 129 }
 130
 131 #else
 132
 133 static inline void
 134 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
 135 {
 136         cfs_spin_lock(&cfs_wi_data.wi_glock);
 137 }
 138
 139 static inline void
 140 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
 141 {
 142         cfs_spin_unlock(&cfs_wi_data.wi_glock);
 143 }
 144
 145 #endif
 146
 147 /* XXX:
 148  * 0. it only works when called from wi->wi_action.
 149  * 1. when it returns no one shall try to schedule the workitem.
 150  */
 151 void
 152 cfs_wi_exit(cfs_workitem_t *wi)
 153 {
 154         cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
 155
 156         LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
 157         LASSERT (!sched->ws_shuttingdown);
 158
 159         cfs_wi_sched_lock(sched);
 160
 161 #ifdef __KERNEL__
 162         LASSERT (wi->wi_running);
 163 #endif
 164         if (wi->wi_scheduled) { /* cancel pending schedules */
 165                 LASSERT (!cfs_list_empty(&wi->wi_list));
 166                 cfs_list_del_init(&wi->wi_list);
 167         }
 168
 169         LASSERT (cfs_list_empty(&wi->wi_list));
 170         wi->wi_scheduled = 1; /* LBUG future schedule attempts */
 171
 172         cfs_wi_sched_unlock(sched);
 173         return;
 174 }
 175 CFS_EXPORT_SYMBOL(cfs_wi_exit);
 176
 177 /**
 178  * cancel a workitem:
 179  */
 180 int
 181 cfs_wi_cancel (cfs_workitem_t *wi)
 182 {
 183         cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
 184         int             rc;
 185
 186         LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
 187         LASSERT (!sched->ws_shuttingdown);
 188
 189         cfs_wi_sched_lock(sched);
 190         /*
 191          * return 0 if it's running already, otherwise return 1, which
 192          * means the workitem will not be scheduled and will not have
 193          * any race with wi_action.
 194          */
 195         rc = !(wi->wi_running);
 196
 197         if (wi->wi_scheduled) { /* cancel pending schedules */
 198                 LASSERT (!cfs_list_empty(&wi->wi_list));
 199                 cfs_list_del_init(&wi->wi_list);
 200                 wi->wi_scheduled = 0;
 201         }
 202
 203         LASSERT (cfs_list_empty(&wi->wi_list));
 204
 205         cfs_wi_sched_unlock(sched);
 206         return rc;
 207 }
 208
 209 CFS_EXPORT_SYMBOL(cfs_wi_cancel);
 210
 211 /*
 212  * Workitem scheduled with (serial == 1) is strictly serialised not only with
 213  * itself, but also with others scheduled this way.
 214  *
 215  * Now there's only one static serialised queue, but in the future more might
 216  * be added, and even dynamic creation of serialised queues might be supported.
 217  */
 218 void
 219 cfs_wi_schedule(cfs_workitem_t *wi)
 220 {
 221         cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
 222
 223         LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
 224         LASSERT (!sched->ws_shuttingdown);
 225
 226         cfs_wi_sched_lock(sched);
 227
 228         if (!wi->wi_scheduled) {
 229                 LASSERT (cfs_list_empty(&wi->wi_list));
 230
 231                 wi->wi_scheduled = 1;
 232                 if (!wi->wi_running) {
 233                         cfs_list_add_tail(&wi->wi_list, &sched->ws_runq);
 234 #ifdef __KERNEL__
 235                         cfs_waitq_signal(&sched->ws_waitq);
 236 #endif
 237                 } else {
 238                         cfs_list_add(&wi->wi_list, &sched->ws_rerunq);
 239                 }
 240         }
 241
 242         LASSERT (!cfs_list_empty(&wi->wi_list));
 243         cfs_wi_sched_unlock(sched);
 244         return;
 245 }
 246
 247 CFS_EXPORT_SYMBOL(cfs_wi_schedule);
 248
 249 #ifdef __KERNEL__
 250
 251 static int
 252 cfs_wi_scheduler (void *arg)
 253 {
 254         int             id     = (int)(long_ptr_t) arg;
 255         int             serial = (id == -1);
 256         char            name[24];
 257         cfs_wi_sched_t *sched;
 258
 259         if (serial) {
 260                 sched = &cfs_wi_data.wi_scheds[cfs_wi_data.wi_nsched - 1];
 261                 cfs_daemonize("wi_serial_sd");
 262         } else {
 263                 /* will be sched = &cfs_wi_data.wi_scheds[id] in the future */
 264                 sched = &cfs_wi_data.wi_scheds[0];
 265                 snprintf(name, sizeof(name), "cfs_wi_sd%03d", id);
 266                 cfs_daemonize(name);
 267         }
 268
 269         cfs_block_allsigs();
 270
 271         cfs_wi_sched_lock(sched);
 272
 273         while (!sched->ws_shuttingdown) {
 274                 int             nloops = 0;
 275                 int             rc;
 276                 cfs_workitem_t *wi;
 277
 278                 while (!cfs_list_empty(&sched->ws_runq) &&
 279                        nloops < CFS_WI_RESCHED) {
 280                         wi = cfs_list_entry(sched->ws_runq.next,
 281                                             cfs_workitem_t, wi_list);
 282                         LASSERT (wi->wi_scheduled && !wi->wi_running);
 283
 284                         cfs_list_del_init(&wi->wi_list);
 285
 286                         wi->wi_running   = 1;
 287                         wi->wi_scheduled = 0;
 288                         cfs_wi_sched_unlock(sched);
 289                         nloops++;
 290
 291                         rc = (*wi->wi_action) (wi);
 292
 293                         cfs_wi_sched_lock(sched);
 294                         if (rc != 0) /* WI should be dead, even be freed! */
 295                                 continue;
 296
 297                         wi->wi_running = 0;
 298                         if (cfs_list_empty(&wi->wi_list))
 299                                 continue;
 300
 301                         LASSERT (wi->wi_scheduled);
 302                         /* wi is rescheduled, should be on rerunq now, we
 303                          * move it to runq so it can run action now */
 304                         cfs_list_move_tail(&wi->wi_list, &sched->ws_runq);
 305                 }
 306
 307                 if (!cfs_list_empty(&sched->ws_runq)) {
 308                         cfs_wi_sched_unlock(sched);
 309                         /* don't sleep because some workitems still
 310                          * expect me to come back soon */
 311                         cfs_cond_resched();
 312                         cfs_wi_sched_lock(sched);
 313                         continue;
 314                 }
 315
 316                 cfs_wi_sched_unlock(sched);
 317                 cfs_wait_event_interruptible_exclusive(sched->ws_waitq,
 318                                 !cfs_wi_sched_cansleep(sched), rc);
 319                 cfs_wi_sched_lock(sched);
 320         }
 321
 322         cfs_wi_sched_unlock(sched);
 323
 324         cfs_spin_lock(&cfs_wi_data.wi_glock);
 325         cfs_wi_data.wi_nthreads--;
 326         cfs_spin_unlock(&cfs_wi_data.wi_glock);
 327         return 0;
 328 }
 329
 330 static int
 331 cfs_wi_start_thread (int (*func) (void*), void *arg)
 332 {
 333         long pid;
 334
 335         pid = cfs_create_thread(func, arg, 0);
 336         if (pid < 0)
 337                 return (int)pid;
 338
 339         cfs_spin_lock(&cfs_wi_data.wi_glock);
 340         cfs_wi_data.wi_nthreads++;
 341         cfs_spin_unlock(&cfs_wi_data.wi_glock);
 342         return 0;
 343 }
 344
 345 #else /* __KERNEL__ */
 346
 347 int
 348 cfs_wi_check_events (void)
 349 {
 350         int               n = 0;
 351         cfs_workitem_t   *wi;
 352         cfs_list_t       *q;
 353
 354         cfs_spin_lock(&cfs_wi_data.wi_glock);
 355
 356         for (;;) {
 357                 /** rerunq is always empty for userspace */
 358                 if (!cfs_list_empty(&cfs_wi_data.wi_scheds[1].ws_runq))
 359                         q = &cfs_wi_data.wi_scheds[1].ws_runq;
 360                 else if (!cfs_list_empty(&cfs_wi_data.wi_scheds[0].ws_runq))
 361                         q = &cfs_wi_data.wi_scheds[0].ws_runq;
 362                 else
 363                         break;
 364
 365                 wi = cfs_list_entry(q->next, cfs_workitem_t, wi_list);
 366                 cfs_list_del_init(&wi->wi_list);
 367
 368                 LASSERT (wi->wi_scheduled);
 369                 wi->wi_scheduled = 0;
 370                 cfs_spin_unlock(&cfs_wi_data.wi_glock);
 371
 372                 n++;
 373                 (*wi->wi_action) (wi);
 374
 375                 cfs_spin_lock(&cfs_wi_data.wi_glock);
 376         }
 377
 378         cfs_spin_unlock(&cfs_wi_data.wi_glock);
 379         return n;
 380 }
 381
 382 #endif
 383
 384 static void
 385 cfs_wi_sched_init(cfs_wi_sched_t *sched)
 386 {
 387         sched->ws_shuttingdown = 0;
 388 #ifdef __KERNEL__
 389         cfs_spin_lock_init(&sched->ws_lock);
 390         cfs_waitq_init(&sched->ws_waitq);
 391 #endif
 392         CFS_INIT_LIST_HEAD(&sched->ws_runq);
 393         CFS_INIT_LIST_HEAD(&sched->ws_rerunq);
 394 }
 395
 396 static void
 397 cfs_wi_sched_shutdown(cfs_wi_sched_t *sched)
 398 {
 399         cfs_wi_sched_lock(sched);
 400
 401         LASSERT(cfs_list_empty(&sched->ws_runq));
 402         LASSERT(cfs_list_empty(&sched->ws_rerunq));
 403
 404         sched->ws_shuttingdown = 1;
 405
 406 #ifdef __KERNEL__
 407         cfs_waitq_broadcast(&sched->ws_waitq);
 408 #endif
 409         cfs_wi_sched_unlock(sched);
 410 }
 411
 412
 413 int
 414 cfs_wi_startup (void)
 415 {
 416         int i;
 417         int n, rc;
 418
 419         cfs_wi_data.wi_nthreads = 0;
 420         cfs_wi_data.wi_nsched   = CFS_WI_NSCHED;
 421         LIBCFS_ALLOC(cfs_wi_data.wi_scheds,
 422                      cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t));
 423         if (cfs_wi_data.wi_scheds == NULL)
 424                 return -ENOMEM;
 425
 426         cfs_spin_lock_init(&cfs_wi_data.wi_glock);
 427         for (i = 0; i < cfs_wi_data.wi_nsched; i++)
 428                 cfs_wi_sched_init(&cfs_wi_data.wi_scheds[i]);
 429
 430 #ifdef __KERNEL__
 431         n = cfs_num_online_cpus();
 432         for (i = 0; i <= n; i++) {
 433                 rc = cfs_wi_start_thread(cfs_wi_scheduler,
 434                                          (void *)(long_ptr_t)(i == n ? -1 : i));
 435                 if (rc != 0) {
 436                         CERROR ("Can't spawn workitem scheduler: %d\n", rc);
 437                         cfs_wi_shutdown();
 438                         return rc;
 439                 }
 440         }
 441 #else
 442         SET_BUT_UNUSED(rc);
 443         SET_BUT_UNUSED(n);
 444 #endif
 445
 446         return 0;
 447 }
 448
 449 void
 450 cfs_wi_shutdown (void)
 451 {
 452         int i;
 453
 454         if (cfs_wi_data.wi_scheds == NULL)
 455                 return;
 456
 457         for (i = 0; i < cfs_wi_data.wi_nsched; i++)
 458                 cfs_wi_sched_shutdown(&cfs_wi_data.wi_scheds[i]);
 459
 460 #ifdef __KERNEL__
 461         cfs_spin_lock(&cfs_wi_data.wi_glock);
 462         i = 2;
 463         while (cfs_wi_data.wi_nthreads != 0) {
 464                 CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
 465                        "waiting for %d threads to terminate\n",
 466                        cfs_wi_data.wi_nthreads);
 467                 cfs_spin_unlock(&cfs_wi_data.wi_glock);
 468
 469                 cfs_pause(cfs_time_seconds(1));
 470
 471                 cfs_spin_lock(&cfs_wi_data.wi_glock);
 472         }
 473         cfs_spin_unlock(&cfs_wi_data.wi_glock);
 474 #endif
 475         LIBCFS_FREE(cfs_wi_data.wi_scheds,
 476                     cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t));
 477         return;
 478 }