libcfs/libcfs/workitem.c

   1 /*
   2  * GPL HEADER START
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 only,
   8  * as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License version 2 for more details (a copy is included
  14  * in the LICENSE file that accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * version 2 along with this program; If not, see
  18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19  *
  20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21  * CA 95054 USA or visit www.sun.com if you need additional information or
  22  * have any questions.
  23  *
  24  * GPL HEADER END
  25  */
  26 /*
  27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  28  * Use is subject to license terms.
  29  *
  30  * Copyright (c) 2011, 2012, Intel Corporation.
  31  */
  32 /*
  33  * This file is part of Lustre, http://www.lustre.org/
  34  * Lustre is a trademark of Sun Microsystems, Inc.
  35  *
  36  * libcfs/libcfs/workitem.c
  37  *
  38  * Author: Isaac Huang <isaac@clusterfs.com>
  39  *         Liang Zhen  <zhen.liang@sun.com>
  40  */
  41
  42 #define DEBUG_SUBSYSTEM S_LNET
  43
  44 #include <libcfs/libcfs.h>
  45
  46 #define CFS_WS_NAME_LEN         16
  47
  48 typedef struct cfs_wi_sched {
  49         cfs_list_t              ws_list;        /* chain on global list */
  50 #ifdef __KERNEL__
  51         /** serialised workitems */
  52         spinlock_t              ws_lock;
  53         /** where schedulers sleep */
  54         cfs_waitq_t             ws_waitq;
  55 #endif
  56         /** concurrent workitems */
  57         cfs_list_t              ws_runq;
  58         /** rescheduled running-workitems, a workitem can be rescheduled
  59          * while running in wi_action(), but we don't to execute it again
  60          * unless it returns from wi_action(), so we put it on ws_rerunq
  61          * while rescheduling, and move it to runq after it returns
  62          * from wi_action() */
  63         cfs_list_t              ws_rerunq;
  64         /** CPT-table for this scheduler */
  65         struct cfs_cpt_table    *ws_cptab;
  66         /** CPT id for affinity */
  67         int                     ws_cpt;
  68         /** number of scheduled workitems */
  69         int                     ws_nscheduled;
  70         /** started scheduler thread, protected by cfs_wi_data::wi_glock */
  71         unsigned int            ws_nthreads:30;
  72         /** shutting down, protected by cfs_wi_data::wi_glock */
  73         unsigned int            ws_stopping:1;
  74         /** serialize starting thread, protected by cfs_wi_data::wi_glock */
  75         unsigned int            ws_starting:1;
  76         /** scheduler name */
  77         char                    ws_name[CFS_WS_NAME_LEN];
  78 } cfs_wi_sched_t;
  79
  80 struct cfs_workitem_data {
  81         /** serialize */
  82         spinlock_t              wi_glock;
  83         /** list of all schedulers */
  84         cfs_list_t              wi_scheds;
  85         /** WI module is initialized */
  86         int                     wi_init;
  87         /** shutting down the whole WI module */
  88         int                     wi_stopping;
  89 } cfs_wi_data;
  90
  91 #ifdef __KERNEL__
  92 static inline void
  93 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
  94 {
  95         spin_lock(&sched->ws_lock);
  96 }
  97
  98 static inline void
  99 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
 100 {
 101         spin_unlock(&sched->ws_lock);
 102 }
 103
 104 static inline int
 105 cfs_wi_sched_cansleep(cfs_wi_sched_t *sched)
 106 {
 107         cfs_wi_sched_lock(sched);
 108         if (sched->ws_stopping) {
 109                 cfs_wi_sched_unlock(sched);
 110                 return 0;
 111         }
 112
 113         if (!cfs_list_empty(&sched->ws_runq)) {
 114                 cfs_wi_sched_unlock(sched);
 115                 return 0;
 116         }
 117         cfs_wi_sched_unlock(sched);
 118         return 1;
 119 }
 120
 121 #else /* !__KERNEL__ */
 122
 123 static inline void
 124 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
 125 {
 126         spin_lock(&cfs_wi_data.wi_glock);
 127 }
 128
 129 static inline void
 130 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
 131 {
 132         spin_unlock(&cfs_wi_data.wi_glock);
 133 }
 134
 135 #endif /* __KERNEL__ */
 136
 137 /* XXX:
 138  * 0. it only works when called from wi->wi_action.
 139  * 1. when it returns no one shall try to schedule the workitem.
 140  */
 141 void
 142 cfs_wi_exit(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
 143 {
 144         LASSERT(!cfs_in_interrupt()); /* because we use plain spinlock */
 145         LASSERT(!sched->ws_stopping);
 146
 147         cfs_wi_sched_lock(sched);
 148
 149 #ifdef __KERNEL__
 150         LASSERT(wi->wi_running);
 151 #endif
 152         if (wi->wi_scheduled) { /* cancel pending schedules */
 153                 LASSERT(!cfs_list_empty(&wi->wi_list));
 154                 cfs_list_del_init(&wi->wi_list);
 155
 156                 LASSERT(sched->ws_nscheduled > 0);
 157                 sched->ws_nscheduled--;
 158         }
 159
 160         LASSERT(cfs_list_empty(&wi->wi_list));
 161
 162         wi->wi_scheduled = 1; /* LBUG future schedule attempts */
 163         cfs_wi_sched_unlock(sched);
 164
 165         return;
 166 }
 167 EXPORT_SYMBOL(cfs_wi_exit);
 168
 169 /**
 170  * cancel schedule request of workitem \a wi
 171  */
 172 int
 173 cfs_wi_deschedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
 174 {
 175         int     rc;
 176
 177         LASSERT(!cfs_in_interrupt()); /* because we use plain spinlock */
 178         LASSERT(!sched->ws_stopping);
 179
 180         /*
 181          * return 0 if it's running already, otherwise return 1, which
 182          * means the workitem will not be scheduled and will not have
 183          * any race with wi_action.
 184          */
 185         cfs_wi_sched_lock(sched);
 186
 187         rc = !(wi->wi_running);
 188
 189         if (wi->wi_scheduled) { /* cancel pending schedules */
 190                 LASSERT(!cfs_list_empty(&wi->wi_list));
 191                 cfs_list_del_init(&wi->wi_list);
 192
 193                 LASSERT(sched->ws_nscheduled > 0);
 194                 sched->ws_nscheduled--;
 195
 196                 wi->wi_scheduled = 0;
 197         }
 198
 199         LASSERT (cfs_list_empty(&wi->wi_list));
 200
 201         cfs_wi_sched_unlock(sched);
 202         return rc;
 203 }
 204 EXPORT_SYMBOL(cfs_wi_deschedule);
 205
 206 /*
 207  * Workitem scheduled with (serial == 1) is strictly serialised not only with
 208  * itself, but also with others scheduled this way.
 209  *
 210  * Now there's only one static serialised queue, but in the future more might
 211  * be added, and even dynamic creation of serialised queues might be supported.
 212  */
 213 void
 214 cfs_wi_schedule(struct cfs_wi_sched *sched, cfs_workitem_t *wi)
 215 {
 216         LASSERT(!cfs_in_interrupt()); /* because we use plain spinlock */
 217         LASSERT(!sched->ws_stopping);
 218
 219         cfs_wi_sched_lock(sched);
 220
 221         if (!wi->wi_scheduled) {
 222                 LASSERT (cfs_list_empty(&wi->wi_list));
 223
 224                 wi->wi_scheduled = 1;
 225                 sched->ws_nscheduled++;
 226                 if (!wi->wi_running) {
 227                         cfs_list_add_tail(&wi->wi_list, &sched->ws_runq);
 228 #ifdef __KERNEL__
 229                         cfs_waitq_signal(&sched->ws_waitq);
 230 #endif
 231                 } else {
 232                         cfs_list_add(&wi->wi_list, &sched->ws_rerunq);
 233                 }
 234         }
 235
 236         LASSERT (!cfs_list_empty(&wi->wi_list));
 237         cfs_wi_sched_unlock(sched);
 238         return;
 239 }
 240 EXPORT_SYMBOL(cfs_wi_schedule);
 241
 242 #ifdef __KERNEL__
 243
 244 static int
 245 cfs_wi_scheduler (void *arg)
 246 {
 247         struct cfs_wi_sched     *sched = (cfs_wi_sched_t *)arg;
 248         char                    name[16];
 249
 250         if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) {
 251                 snprintf(name, sizeof(name), "%s_%02d_%02d",
 252                          sched->ws_name, sched->ws_cpt, sched->ws_nthreads);
 253         } else {
 254                 snprintf(name, sizeof(name), "%s_%02d",
 255                          sched->ws_name, sched->ws_nthreads);
 256         }
 257
 258         cfs_daemonize(name);
 259         cfs_block_allsigs();
 260
 261         /* CPT affinity scheduler? */
 262         if (sched->ws_cptab != NULL)
 263                 cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt);
 264
 265         spin_lock(&cfs_wi_data.wi_glock);
 266
 267         LASSERT(sched->ws_starting == 1);
 268         sched->ws_starting--;
 269         sched->ws_nthreads++;
 270
 271         spin_unlock(&cfs_wi_data.wi_glock);
 272
 273         cfs_wi_sched_lock(sched);
 274
 275         while (!sched->ws_stopping) {
 276                 int             nloops = 0;
 277                 int             rc;
 278                 cfs_workitem_t *wi;
 279
 280                 while (!cfs_list_empty(&sched->ws_runq) &&
 281                        nloops < CFS_WI_RESCHED) {
 282                         wi = cfs_list_entry(sched->ws_runq.next,
 283                                             cfs_workitem_t, wi_list);
 284                         LASSERT(wi->wi_scheduled && !wi->wi_running);
 285
 286                         cfs_list_del_init(&wi->wi_list);
 287
 288                         LASSERT(sched->ws_nscheduled > 0);
 289                         sched->ws_nscheduled--;
 290
 291                         wi->wi_running   = 1;
 292                         wi->wi_scheduled = 0;
 293
 294
 295                         cfs_wi_sched_unlock(sched);
 296                         nloops++;
 297
 298                         rc = (*wi->wi_action) (wi);
 299
 300                         cfs_wi_sched_lock(sched);
 301                         if (rc != 0) /* WI should be dead, even be freed! */
 302                                 continue;
 303
 304                         wi->wi_running = 0;
 305                         if (cfs_list_empty(&wi->wi_list))
 306                                 continue;
 307
 308                         LASSERT(wi->wi_scheduled);
 309                         /* wi is rescheduled, should be on rerunq now, we
 310                          * move it to runq so it can run action now */
 311                         cfs_list_move_tail(&wi->wi_list, &sched->ws_runq);
 312                 }
 313
 314                 if (!cfs_list_empty(&sched->ws_runq)) {
 315                         cfs_wi_sched_unlock(sched);
 316                         /* don't sleep because some workitems still
 317                          * expect me to come back soon */
 318                         cfs_cond_resched();
 319                         cfs_wi_sched_lock(sched);
 320                         continue;
 321                 }
 322
 323                 cfs_wi_sched_unlock(sched);
 324                 cfs_wait_event_interruptible_exclusive(sched->ws_waitq,
 325                                 !cfs_wi_sched_cansleep(sched), rc);
 326                 cfs_wi_sched_lock(sched);
 327         }
 328
 329         cfs_wi_sched_unlock(sched);
 330
 331         spin_lock(&cfs_wi_data.wi_glock);
 332         sched->ws_nthreads--;
 333         spin_unlock(&cfs_wi_data.wi_glock);
 334
 335         return 0;
 336 }
 337
 338 #else /* __KERNEL__ */
 339
 340 int
 341 cfs_wi_check_events (void)
 342 {
 343         int               n = 0;
 344         cfs_workitem_t   *wi;
 345
 346         spin_lock(&cfs_wi_data.wi_glock);
 347
 348         for (;;) {
 349                 struct cfs_wi_sched     *sched = NULL;
 350                 struct cfs_wi_sched     *tmp;
 351
 352                 /** rerunq is always empty for userspace */
 353                 cfs_list_for_each_entry(tmp,
 354                                         &cfs_wi_data.wi_scheds, ws_list) {
 355                         if (!cfs_list_empty(&tmp->ws_runq)) {
 356                                 sched = tmp;
 357                                 break;
 358                         }
 359                 }
 360
 361                 if (sched == NULL)
 362                         break;
 363
 364                 wi = cfs_list_entry(sched->ws_runq.next,
 365                                     cfs_workitem_t, wi_list);
 366                 cfs_list_del_init(&wi->wi_list);
 367
 368                 LASSERT(sched->ws_nscheduled > 0);
 369                 sched->ws_nscheduled--;
 370
 371                 LASSERT(wi->wi_scheduled);
 372                 wi->wi_scheduled = 0;
 373                 spin_unlock(&cfs_wi_data.wi_glock);
 374
 375                 n++;
 376                 (*wi->wi_action) (wi);
 377
 378                 spin_lock(&cfs_wi_data.wi_glock);
 379         }
 380
 381         spin_unlock(&cfs_wi_data.wi_glock);
 382         return n;
 383 }
 384
 385 #endif
 386
 387 void
 388 cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
 389 {
 390         int     i;
 391
 392         LASSERT(cfs_wi_data.wi_init);
 393         LASSERT(!cfs_wi_data.wi_stopping);
 394
 395         spin_lock(&cfs_wi_data.wi_glock);
 396         if (sched->ws_stopping) {
 397                 CDEBUG(D_INFO, "%s is in progress of stopping\n",
 398                        sched->ws_name);
 399                 spin_unlock(&cfs_wi_data.wi_glock);
 400                 return;
 401         }
 402
 403         LASSERT(!cfs_list_empty(&sched->ws_list));
 404         sched->ws_stopping = 1;
 405
 406         spin_unlock(&cfs_wi_data.wi_glock);
 407
 408         i = 2;
 409 #ifdef __KERNEL__
 410         cfs_waitq_broadcast(&sched->ws_waitq);
 411
 412         spin_lock(&cfs_wi_data.wi_glock);
 413         while (sched->ws_nthreads > 0) {
 414                 CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
 415                        "waiting for %d threads of WI sched[%s] to terminate\n",
 416                        sched->ws_nthreads, sched->ws_name);
 417
 418                 spin_unlock(&cfs_wi_data.wi_glock);
 419                 cfs_pause(cfs_time_seconds(1) / 20);
 420                 spin_lock(&cfs_wi_data.wi_glock);
 421         }
 422
 423         cfs_list_del(&sched->ws_list);
 424
 425         spin_unlock(&cfs_wi_data.wi_glock);
 426 #else
 427         SET_BUT_UNUSED(i);
 428 #endif
 429         LASSERT(sched->ws_nscheduled == 0);
 430
 431         LIBCFS_FREE(sched, sizeof(*sched));
 432 }
 433 EXPORT_SYMBOL(cfs_wi_sched_destroy);
 434
 435 int
 436 cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
 437                     int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
 438 {
 439         struct cfs_wi_sched     *sched;
 440         int                     rc;
 441
 442         LASSERT(cfs_wi_data.wi_init);
 443         LASSERT(!cfs_wi_data.wi_stopping);
 444         LASSERT(cptab == NULL || cpt == CFS_CPT_ANY ||
 445                 (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
 446
 447         LIBCFS_ALLOC(sched, sizeof(*sched));
 448         if (sched == NULL)
 449                 return -ENOMEM;
 450
 451         strncpy(sched->ws_name, name, CFS_WS_NAME_LEN);
 452         sched->ws_cptab = cptab;
 453         sched->ws_cpt = cpt;
 454
 455 #ifdef __KERNEL__
 456         spin_lock_init(&sched->ws_lock);
 457         cfs_waitq_init(&sched->ws_waitq);
 458 #endif
 459         CFS_INIT_LIST_HEAD(&sched->ws_runq);
 460         CFS_INIT_LIST_HEAD(&sched->ws_rerunq);
 461         CFS_INIT_LIST_HEAD(&sched->ws_list);
 462
 463         rc = 0;
 464 #ifdef __KERNEL__
 465         while (nthrs > 0)  {
 466                 spin_lock(&cfs_wi_data.wi_glock);
 467                 while (sched->ws_starting > 0) {
 468                         spin_unlock(&cfs_wi_data.wi_glock);
 469                         cfs_schedule();
 470                         spin_lock(&cfs_wi_data.wi_glock);
 471                 }
 472
 473                 sched->ws_starting++;
 474                 spin_unlock(&cfs_wi_data.wi_glock);
 475
 476                 rc = cfs_create_thread(cfs_wi_scheduler, sched, 0);
 477                 if (rc >= 0) {
 478                         nthrs--;
 479                         continue;
 480                 }
 481
 482                 CERROR("Failed to create thread for WI scheduler %s: %d\n",
 483                        name, rc);
 484
 485                 spin_lock(&cfs_wi_data.wi_glock);
 486
 487                 /* make up for cfs_wi_sched_destroy */
 488                 cfs_list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
 489                 sched->ws_starting--;
 490
 491                 spin_unlock(&cfs_wi_data.wi_glock);
 492
 493                 cfs_wi_sched_destroy(sched);
 494                 return rc;
 495         }
 496 #else
 497         SET_BUT_UNUSED(rc);
 498 #endif
 499         spin_lock(&cfs_wi_data.wi_glock);
 500         cfs_list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
 501         spin_unlock(&cfs_wi_data.wi_glock);
 502
 503         *sched_pp = sched;
 504         return 0;
 505 }
 506 EXPORT_SYMBOL(cfs_wi_sched_create);
 507
 508 int
 509 cfs_wi_startup(void)
 510 {
 511         memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
 512
 513         spin_lock_init(&cfs_wi_data.wi_glock);
 514         CFS_INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
 515         cfs_wi_data.wi_init = 1;
 516
 517         return 0;
 518 }
 519
 520 void
 521 cfs_wi_shutdown (void)
 522 {
 523         struct cfs_wi_sched     *sched;
 524
 525         spin_lock(&cfs_wi_data.wi_glock);
 526         cfs_wi_data.wi_stopping = 1;
 527         spin_unlock(&cfs_wi_data.wi_glock);
 528
 529 #ifdef __KERNEL__
 530         /* nobody should contend on this list */
 531         cfs_list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
 532                 sched->ws_stopping = 1;
 533                 cfs_waitq_broadcast(&sched->ws_waitq);
 534         }
 535
 536         cfs_list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
 537                 spin_lock(&cfs_wi_data.wi_glock);
 538
 539                 while (sched->ws_nthreads != 0) {
 540                         spin_unlock(&cfs_wi_data.wi_glock);
 541                         cfs_pause(cfs_time_seconds(1) / 20);
 542                         spin_lock(&cfs_wi_data.wi_glock);
 543                 }
 544                 spin_unlock(&cfs_wi_data.wi_glock);
 545         }
 546 #endif
 547         while (!cfs_list_empty(&cfs_wi_data.wi_scheds)) {
 548                 sched = cfs_list_entry(cfs_wi_data.wi_scheds.next,
 549                                        struct cfs_wi_sched, ws_list);
 550                 cfs_list_del(&sched->ws_list);
 551                 LIBCFS_FREE(sched, sizeof(*sched));
 552         }
 553
 554         cfs_wi_data.wi_stopping = 0;
 555         cfs_wi_data.wi_init = 0;
 556 }