Whamcloud - gitweb
LU-15636 test: add iabf 44/46744/7
authorJohn L. Hammond <jhammond@whamcloud.com>
Wed, 16 Mar 2022 17:00:36 +0000 (01:00 +0800)
committerOleg Drokin <green@whamcloud.com>
Sat, 11 Jun 2022 05:48:15 +0000 (05:48 +0000)
Usage: [IABF_OPTIONS...] iabf [INIT] --- A --- B --- [FINI] ---

Initialize, run tasks A and B with various overlaps, and Finalize.

Command lines for INIT, A, B, and FINI are terminated by ---.
If INIT or FINI is empty then it will be skipped.
If INIT or FINI fail then we exit immediately with status 1.

For delay = $IABF_DELAY_BEGIN_NS; delay < $IABF_DELAY_END_NS;
delay += $IABF_DELAY_STEP_NS
  Run initializer (INIT).
  In parallel: Fork, delay *, and exec processes A and B.
    If delay is negative then delay A by abs(delay) ns.
    Otherwise delay B by delay ns.
  Wait for A and B to terminate.
  Run finilizer (FINI).

See lustre/tests/iabf/README for more information.

Test-Parameters: trivial
Change-Id: I97920e082a7a5bec458c805c507b4fefb448427b
Signed-off-by: John L. Hammond <jhammond@whamcloud.com>
Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/46744
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/autoconf/lustre-core.m4
lustre/tests/Makefile.am
lustre/tests/iabf/.gitignore [new file with mode: 0644]
lustre/tests/iabf/Makefile.am [new file with mode: 0644]
lustre/tests/iabf/README [new file with mode: 0644]
lustre/tests/iabf/callvpe.c [new symlink]
lustre/tests/iabf/callvpe.h [new symlink]
lustre/tests/iabf/iabf.c [new file with mode: 0644]

index 4a93109..68ea905 100644 (file)
@@ -3271,6 +3271,7 @@ lustre/scripts/Makefile
 lustre/scripts/systemd/Makefile
 lustre/tests/Makefile
 lustre/tests/mpi/Makefile
 lustre/scripts/systemd/Makefile
 lustre/tests/Makefile
 lustre/tests/mpi/Makefile
+lustre/tests/iabf/Makefile
 lustre/tests/lutf/Makefile
 lustre/tests/lutf/src/Makefile
 lustre/tests/kernel/Makefile
 lustre/tests/lutf/Makefile
 lustre/tests/lutf/src/Makefile
 lustre/tests/kernel/Makefile
index 567aa36..069080b 100644 (file)
@@ -3,7 +3,7 @@ AM_CFLAGS := -fPIC -D_GNU_SOURCE \
             -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64
 
 DIST_SUBDIRS = mpi
             -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64
 
 DIST_SUBDIRS = mpi
-DIST_SUBDIRS += lutf
+DIST_SUBDIRS += lutf iabf
 
 noinst_DATA = disk2_4-ldiskfs.tar.bz2 disk2_4-zfs.tar.bz2
 noinst_DATA += disk2_7-ldiskfs.tar.bz2 disk2_7-zfs.tar.bz2
 
 noinst_DATA = disk2_4-ldiskfs.tar.bz2 disk2_4-zfs.tar.bz2
 noinst_DATA += disk2_7-ldiskfs.tar.bz2 disk2_7-zfs.tar.bz2
@@ -89,18 +89,18 @@ if LIBAIO
 THETESTS += aiocp
 endif
 
 THETESTS += aiocp
 endif
 
+
 if TESTS
 if TESTS
+
+SUBDIRS = iabf
+
 if MPITESTS
 if MPITESTS
-SUBDIRS = mpi
+SUBDIRS += mpi
 endif
 
 # Build LUTF only if the packages are available
 if BUILD_LUTF
 endif
 
 # Build LUTF only if the packages are available
 if BUILD_LUTF
-if MPITESTS
 SUBDIRS += lutf
 SUBDIRS += lutf
-else
-SUBDIRS = lutf
-endif
 endif # BUILD_LUTF
 
 bin_PROGRAMS = mcreate munlink statx
 endif # BUILD_LUTF
 
 bin_PROGRAMS = mcreate munlink statx
diff --git a/lustre/tests/iabf/.gitignore b/lustre/tests/iabf/.gitignore
new file mode 100644 (file)
index 0000000..6905d3c
--- /dev/null
@@ -0,0 +1,2 @@
+Makefile.in
+iabf
diff --git a/lustre/tests/iabf/Makefile.am b/lustre/tests/iabf/Makefile.am
new file mode 100644 (file)
index 0000000..992ed28
--- /dev/null
@@ -0,0 +1,9 @@
+if TESTS
+testdir = $(libdir)/lustre/tests
+test_PROGRAMS = iabf
+endif
+
+iabf_CPPFLAGS = -D_GNU_SOURCE
+iabf_CFLAGS = -g -Wall -Werror
+iabf_LDFLAGS = -pthread
+iabf_SOURCES = iabf.c callvpe.c callvpe.h
diff --git a/lustre/tests/iabf/README b/lustre/tests/iabf/README
new file mode 100644 (file)
index 0000000..396d665
--- /dev/null
@@ -0,0 +1,66 @@
+Usage: [IABF_OPTIONS...] iabf [INIT] --- A --- B --- [FINI] ---
+
+Initialize, run tasks A and B with various overlaps, and Finalize.
+
+Command lines for INIT, A, B, and FINI are terminated by ---.
+If INIT or FINI is empty then it will be skipped.
+If INIT or FINI fail then we exit immediately with status 1.
+
+For delay = $IABF_DELAY_BEGIN_NS; delay < $IABF_DELAY_END_NS; delay += $IABF_DELAY_STEP_NS
+  Run initializer (INIT).
+  In parallel: Fork, delay *, and exec processes A and B.
+    If delay is negative then delay A by abs(delay) ns.
+    Otherwise delay B by delay ns.
+  Wait for A and B to terminate.
+  Run finilizer (FINI).
+
+To autotune IABF_DELAY_*, omit any or all of these variables and set
+IABF_STEP_COUNT to the desired number of iterations and iabf will run
+tasks A and B $IABF_AUTOTUNE_COUNT (16) times to determine their
+expected elapsed runtimes. It will then choose IABF_DELAY_BEGIN_NS and
+IABF_DELAY_ED_NS to try to arrange as much overlap coverage as
+possible:
+
+     AAAAAAAAAA         delay(A) is approx elapsed(B)
+BBBBB                   delay(B) == 0
+
+   AAAAAAAAAA           delay(A) < elapsed(B)
+BBBBB                   delay(B) == 0
+
+AAAAAAAAAA              delay(A) == 0
+BBBBB                   delay(B) == 0
+
+AAAAAAAAAA              delay(A) == 0
+    BBBBB               delay(B) < elapsed(A)
+
+AAAAAAAAAA              delay(A) == 0
+         BBBBB          delay(B) is approx elapsed(A)
+
+ENVIRONMENT VARIABLES:
+  IABF_DELAY_BEGIN_NS=N
+  IABF_DELAY_END_NS=N
+  IABF_DELAY_STEP_NS=N
+  IABF_AFFINITY='0 1 7'         run task A on CPU 0, task B on CPU 1, main task on CPU 7.
+  IABF_AUTOTUNE_COUNT=COUNT     set autotune count
+  IABF_DEBUG=[01]
+  IABF_STEP_COUNT=COUNT         set number of steps when autotuning
+
+EXAMPLES:
+
+IABF_DELAY_BEGIN_NS=000000000 IABF_DELAY_END_NS=200000000 IABF_DELAY_STEP_NS=100000 \
+iabf rm -f /mnt/lustre/f0 ---  \
+     dd if=/dev/zero of=/mnt/lustre/f0 bs=1M count=16 conv=notrunc --- \
+     truncate /mnt/lustre2/f0 $(( 5 *  1048576)) --- \
+     ---
+
+IABF_STEP_COUNT=4096 \
+IABF_AFFINITY='0 1 2' \
+iabf dd if=/dev/zero of=/mnt/lustre/f0 bs=1M count=128 --- \
+     dd if=/mnt/lustre2/f0 of=/dev/null bs=1M --- \
+     truncate /mnt/lustre/f0 $((5 << 20)) --- \
+     rm /mnt/lustre/f0 ---
+
+TODO
+* Start with a coarse step value (10ms or something) and refine.
+* Add options to stop on failure of A and/or B.
+
diff --git a/lustre/tests/iabf/callvpe.c b/lustre/tests/iabf/callvpe.c
new file mode 120000 (symlink)
index 0000000..4f69a64
--- /dev/null
@@ -0,0 +1 @@
+../../utils/callvpe.c
\ No newline at end of file
diff --git a/lustre/tests/iabf/callvpe.h b/lustre/tests/iabf/callvpe.h
new file mode 120000 (symlink)
index 0000000..7df1e6b
--- /dev/null
@@ -0,0 +1 @@
+../../utils/callvpe.h
\ No newline at end of file
diff --git a/lustre/tests/iabf/iabf.c b/lustre/tests/iabf/iabf.c
new file mode 100644 (file)
index 0000000..39eb242
--- /dev/null
@@ -0,0 +1,508 @@
+#include <stdbool.h>
+#include <stddef.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+#include <time.h>
+#include <assert.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include "callvpe.h"
+
+enum {
+       NSEC_PER_SEC = 1000000000L,
+};
+
+static int iabf_debug = false;
+static const char iabf_delim[] = "---";
+static __thread pid_t iabf_tid;
+
+#define DEBUG(fmt, args...)                                            \
+       do {                                                            \
+               if (iabf_debug)                                         \
+                       fprintf(stderr, "DEBUG [%d] %s:%d: "fmt, iabf_tid, __func__, __LINE__, ##args); \
+       } while (0)
+
+#define DEBUG_B(x) DEBUG("%s = %s\n", #x, (x) ? "true" : "false")
+#define DEBUG_D(x) DEBUG("%s = %"PRIdMAX"\n", #x, (intmax_t)(x))
+#define DEBUG_P(x) DEBUG("%s = %p\n", #x, (x))
+#define DEBUG_S(x) DEBUG("%s = '%s'\n", #x, (x))
+#define DEBUG_U(x) DEBUG("%s = %"PRIuMAX"\n", #x, (uintmax_t)(x))
+#define DEBUG_X(x) DEBUG("%s = %"PRIxMAX"\n", #x, (uintmax_t)(x))
+
+#define ERROR(fmt, args...)                                            \
+       fprintf(stderr, "%s: "fmt, program_invocation_short_name, ##args)
+
+#define FATAL(fmt, args...)                        \
+       do {                                    \
+               ERROR("fatal: "fmt, ##args);        \
+               exit(EXIT_FAILURE);                 \
+       } while (0)
+
+#define xstrerror(e) strerror(abs(e))
+
+static struct timespec
+timespec_sub(struct timespec a, struct timespec b)
+{
+       struct timespec r = {
+               .tv_sec = a.tv_sec - b.tv_sec,
+               .tv_nsec = a.tv_nsec - b.tv_nsec,
+       };
+
+       while (r.tv_nsec >= NSEC_PER_SEC) {
+               r.tv_sec++;
+               r.tv_nsec -= NSEC_PER_SEC;
+       }
+
+       while (r.tv_nsec < 0) {
+               r.tv_sec--;
+               r.tv_nsec += NSEC_PER_SEC;
+       }
+
+       return r;
+}
+
+static struct timespec
+timespec_from_ns(long ns)
+{
+       return (struct timespec) {
+               .tv_sec = ns / NSEC_PER_SEC,
+               .tv_nsec = ns % NSEC_PER_SEC,
+       };
+}
+
+static long timespec_to_ns(struct timespec tv)
+{
+       return tv.tv_sec * NSEC_PER_SEC + tv.tv_nsec;
+}
+
+struct iabf_control {
+       char **ic_init;
+       char **ic_fini;
+       cpu_set_t *ic_affinity;
+       long ic_delay_begin_ns;
+       long ic_delay_end_ns;
+       long ic_delay_step_ns;
+       long ic_step_count;
+       long ic_autotune_count;
+
+       pthread_barrier_t ic_barrier[2];
+       int ic_should_stop;
+};
+
+struct iabf_task {
+       struct iabf_control *it_control;
+       const char *it_name;
+       pthread_t it_thread;
+       struct timespec it_delay;
+       struct timespec it_elapsed;
+       char **it_argv;
+};
+
+long iabf_getenvl(const char *name, long def)
+{
+       const char *s = getenv(name);
+       return s != NULL ? atol(s) : def;
+}
+
+static void iabf_barrier_wait(struct iabf_control *ic, int which)
+{
+       int rc;
+
+       assert(PTHREAD_BARRIER_SERIAL_THREAD == -1);
+
+       rc = pthread_barrier_wait(&ic->ic_barrier[which]);
+       if (rc > 0)
+               FATAL("cannot wait on barrier: %s\n", xstrerror(rc));
+}
+
+static void *iabf_task_thread(void *data)
+{
+       struct iabf_task *it = data;
+       struct iabf_control *ic = it->it_control;
+       int rc;
+
+       iabf_tid = syscall(SYS_gettid);
+
+       assert(PTHREAD_BARRIER_SERIAL_THREAD == -1);
+
+       while (1) {
+               struct timespec ts[2];
+               pid_t pid, pid2;
+               int status;
+
+               iabf_barrier_wait(ic, 0);
+
+               DEBUG_D(ic->ic_should_stop);
+               if (ic->ic_should_stop)
+                       break;
+
+               rc = clock_nanosleep(CLOCK_MONOTONIC, 0, &it->it_delay, NULL);
+               if (rc != 0)
+                       FATAL("%s: cannot sleep: %s\n", it->it_name, xstrerror(rc));
+
+               rc = clock_gettime(CLOCK_MONOTONIC, &ts[0]);
+               if (rc != 0)
+                       FATAL("%s: cannot get time: %s\n", it->it_name, xstrerror(errno));
+
+               pid = fork();
+               if (pid < 0)
+                       FATAL("%s: cannot fork: %s\n", it->it_name, strerror(errno));
+
+               if (pid == 0) {
+                       execvpe(it->it_argv[0], it->it_argv, environ);
+                       _exit(127);
+               }
+
+               pid2 = waitpid(pid, &status, 0);
+               if (pid2 < 0)
+                       FATAL("%s: cannot wait for pid %d: %s\n", it->it_name, (int)pid, strerror(errno));
+
+               rc = clock_gettime(CLOCK_MONOTONIC, &ts[1]);
+               if (rc != 0)
+                       FATAL("%s: cannot get time: %s\n", it->it_name, xstrerror(errno));
+
+               it->it_elapsed = timespec_sub(ts[1], ts[0]);
+
+               assert(pid == pid2);
+
+               DEBUG("%s: cmd = '%s', pid = %d, status = %d, elapsed_ns = %ld\n",
+                     it->it_name, it->it_argv[0], pid, status, timespec_to_ns(it->it_elapsed));
+
+               if (WIFEXITED(status) && WEXITSTATUS(status) == 127)
+                       FATAL("%s: command '%s' (pid %d) exited with status 127\n", it->it_name, it->it_argv[0], pid);
+
+               iabf_barrier_wait(ic, 1);
+       }
+
+       return NULL;
+}
+
+/* Run I, A+B, F once. Task threads must already be started.
+ *
+ * If delay_ns < 0 then
+ *     delay exec of A by labs(delay_ns) nsec
+ * else
+ *     delay exec of B by labs(delay_ns) nsec.
+ */
+static int iabf_step(struct iabf_control *ic,
+                    struct iabf_task it[2],
+                    long delay_ns)
+{
+       int rc;
+
+       if (ic->ic_init != NULL && ic->ic_init[0] != NULL) {
+               rc = callvpe(ic->ic_init[0], ic->ic_init, environ);
+               DEBUG_D(rc); /* waitpid status */
+               if (rc != 0)
+                       FATAL("initializer '%s' terminated with status %d\n", ic->ic_init[0], rc);
+       }
+
+       DEBUG_D(delay_ns);
+
+       if (delay_ns < 0) {
+               it[0].it_delay = timespec_from_ns(labs(delay_ns));
+               it[1].it_delay = timespec_from_ns(0);
+       } else {
+               it[0].it_delay = timespec_from_ns(0);
+               it[1].it_delay = timespec_from_ns(labs(delay_ns));
+       }
+
+       iabf_barrier_wait(ic, 0);
+
+       /* A+B run here. */
+
+       iabf_barrier_wait(ic, 1);
+
+       if (ic->ic_fini != NULL && ic->ic_fini[0] != NULL) {
+               rc = callvpe(ic->ic_fini[0], ic->ic_fini, environ);
+               DEBUG_D(rc); /* waitpid status */
+               if (rc != 0)
+                       FATAL("finalizer '%s' terminated with status %d\n", ic->ic_fini[0], rc);
+       }
+
+       return 0;
+}
+
+/* Run (I, A+B, F) step $IABF_AUTOTUNE_COUNT times. Task threads must
+ * already be started. Get the average elapsed times for A and B.  We
+ * want to choose delay_begin and delay_end to try to arrange all
+ * possible overlaps given the expected elapsed times of A and B.
+ *
+ *     AAAAAAAAAA      delay(A) is approx elapsed(B)
+ * BBBBB               delay(B) == 0
+ *
+ * AAAAAAAAAA          delay(A) == 0
+ * BBBBB               delay(B) == 0
+ *
+ * AAAAAAAAAA          delay(A) == 0
+ *          BBBBB      delay(B) is approx elapsed(A)
+ *
+ * Note that to delay task A we use a negative delay_ns.
+ */
+static int iabf_autotune(struct iabf_control *ic,
+                        struct iabf_task it[2])
+{
+       long elapsed_ns[2] = { 0, 0 };
+       long i, j;
+
+       DEBUG("begin autotune\n");
+
+       assert(ic->ic_autotune_count >= 0);
+
+       if (ic->ic_autotune_count == 0)
+               return 0;
+
+       for (i = 0; i < ic->ic_autotune_count; i++) {
+               iabf_step(ic, it, 0);
+
+               for (j = 0; j < 2; j++)
+                       elapsed_ns[j] += timespec_to_ns(it[j].it_elapsed);
+       }
+
+       elapsed_ns[0] /= ic->ic_autotune_count;
+       elapsed_ns[1] /= ic->ic_autotune_count;
+
+       DEBUG_D(elapsed_ns[0]);
+       DEBUG_D(elapsed_ns[1]);
+
+       assert(0 <= elapsed_ns[0]);
+       assert(0 <= elapsed_ns[1]);
+
+       /* TODO Apply a multiplier to endpoints. */
+
+       if (ic->ic_delay_begin_ns == LONG_MIN)
+               ic->ic_delay_begin_ns = -elapsed_ns[1];
+
+       if (ic->ic_delay_end_ns == LONG_MAX)
+               ic->ic_delay_end_ns = +elapsed_ns[0];
+
+       assert(ic->ic_delay_begin_ns <= ic->ic_delay_end_ns);
+       assert(0 <= ic->ic_step_count);
+
+       if (ic->ic_step_count != 0)
+               ic->ic_delay_step_ns = (ic->ic_delay_end_ns - ic->ic_delay_begin_ns) / ic->ic_step_count;
+
+       if (ic->ic_delay_step_ns == 0)
+               ic->ic_delay_step_ns = 1; /* Or just leave it 0? */
+
+       DEBUG("end autotune\n");
+
+       return 0;
+}
+
+/* Start A and B threads, autotune delay parameters if needed, run
+ * iabf_step() however many times. */
+static int iabf(struct iabf_control *ic, char **a, char **b)
+{
+       struct iabf_task it[2] = {
+               [0] = {
+                       .it_control = ic,
+                       .it_name = "A",
+                       .it_argv = a,
+               },
+               [1] = {
+                       .it_control = ic,
+                       .it_name = "B",
+                       .it_argv = b,
+               },
+       };
+       pthread_attr_t attr_, *attr = NULL;
+       long i;
+       int rc;
+
+       rc = pthread_attr_init(&attr_);
+       if (rc != 0)
+               FATAL("cannot initialize thread attributes: %s\n", xstrerror(rc));
+
+       attr = &attr_;
+
+       for (i = 0; i < 2; i++) {
+               rc = pthread_barrier_init(&ic->ic_barrier[i], NULL, 3);
+               if (rc != 0)
+                       FATAL("cannot initialize barrier: %s\n", xstrerror(rc));
+       }
+
+       if (ic->ic_affinity != NULL) {
+               rc = pthread_setaffinity_np(pthread_self(), sizeof(ic->ic_affinity[2]), &ic->ic_affinity[2]);
+               if (rc != 0)
+                       FATAL("cannot set CPU affinity : %s\n", xstrerror(rc));
+       }
+
+       for (i = 0; i < 2; i++) {
+               if (ic->ic_affinity != NULL) {
+                       rc = pthread_attr_setaffinity_np(attr, sizeof(ic->ic_affinity[i]), &ic->ic_affinity[i]);
+                       if (rc != 0)
+                               FATAL("cannot set thread attr CPU affinity : %s\n", xstrerror(rc));
+               }
+
+               rc = pthread_create(&it[i].it_thread,
+                                   attr,
+                                   iabf_task_thread,
+                                   &it[i]);
+               if (rc != 0)
+                       FATAL("cannot create thread: %s\n", xstrerror(rc));
+       }
+
+       if (ic->ic_delay_begin_ns == LONG_MIN ||
+           ic->ic_delay_end_ns == LONG_MAX ||
+           ic->ic_delay_step_ns == 0)
+               iabf_autotune(ic, it);
+
+       DEBUG_D(ic->ic_delay_begin_ns);
+       DEBUG_D(ic->ic_delay_end_ns);
+       DEBUG_D(ic->ic_delay_step_ns);
+
+       long delay_ns;
+       for (delay_ns = ic->ic_delay_begin_ns;
+            delay_ns < ic->ic_delay_end_ns;
+            delay_ns += ic->ic_delay_step_ns)
+               iabf_step(ic, it, delay_ns);
+
+       ic->ic_should_stop = 1;
+       DEBUG_D(ic->ic_should_stop);
+
+       iabf_barrier_wait(ic, 0);
+
+       for (i = 0; i < 2; i++) {
+               rc = pthread_join(it[i].it_thread, NULL);
+               if (rc != 0)
+                       FATAL("cannot join thread %s: %s\n", it[i].it_name, xstrerror(rc));
+       }
+
+       for (i = 0; i < 2; i++) {
+               rc = pthread_barrier_destroy(&ic->ic_barrier[i]);
+               if (rc != 0)
+                       FATAL("cannot destroy barrier: %s\n", xstrerror(rc));
+       }
+
+       if (attr != NULL)
+               pthread_attr_destroy(attr);
+
+       return 0;
+}
+
+/* strsep() for argvs */
+char **arg_sep(char ***pargs, const char *delim)
+{
+       char **begin, **end;
+
+       begin = *pargs;
+       if (begin == NULL)
+               return NULL;
+
+       /* Find the end of the token.  */
+       /* end = begin + strcspn (begin, delim); */
+
+       for (end = begin; *end != NULL && strcmp(*end, delim) != 0; end++)
+               ;
+
+       if (*end != NULL) {
+               /* Terminate the token and set *STRINGP past NUL character. */
+               *end++ = NULL;
+               *pargs = end;
+       } else {
+               /* No more delimiters; this is the last token. */
+               *pargs = NULL;
+       }
+
+       return begin;
+}
+
+static cpu_set_t *iabf_affinity(const char *str)
+{
+       cpu_set_t *cpu_sets = NULL;
+       char *str1 = NULL;
+       char *p;
+       char *q;
+       char *r;
+       int i;
+
+       if (str == NULL)
+               return NULL;
+
+       cpu_sets = calloc(3, sizeof(cpu_sets[0]));
+       p = str1 = strdup(str);
+
+       for (i = 0; i < 3; i++) {
+               CPU_ZERO(&cpu_sets[i]);
+
+               q = strsep(&p, " ");
+               if (q == NULL)
+                       FATAL("invalid affinity '%s'\n", str);
+
+               while ((r = strsep(&q, ",")) != NULL)
+                       CPU_SET(atoi(r), &cpu_sets[i]);
+       }
+
+       if (p != NULL)
+               FATAL("invalid affinity '%s'\n", str);
+
+       free(str1);
+
+       return cpu_sets;
+}
+
+int main(int argc, char **argv)
+{
+       struct iabf_control ic = {
+               .ic_should_stop = 0,
+       };
+       char **args = argv + 1;
+       char **a;
+       char **b;
+
+       iabf_tid = syscall(SYS_gettid);
+
+       iabf_debug = atoi(getenv("IABF_DEBUG") ?: "0");
+
+       ic.ic_init = arg_sep(&args, iabf_delim);
+       a = arg_sep(&args, iabf_delim);
+       b = arg_sep(&args, iabf_delim);
+       ic.ic_fini = arg_sep(&args, iabf_delim);
+
+       if (ic.ic_init == NULL ||
+           a == NULL ||
+           b == NULL ||
+           ic.ic_fini == NULL)
+               FATAL("missing '%s' in argv\n", iabf_delim);
+
+       int i;
+       for (i = 0; ic.ic_init[i] != NULL; i++)
+               DEBUG_S(ic.ic_init[i]);
+
+       for (i = 0; a[i] != NULL; i++)
+               DEBUG_S(a[i]);
+
+       for (i = 0; b[i] != NULL; i++)
+               DEBUG_S(b[i]);
+
+       for (i = 0; ic.ic_fini[i] != NULL; i++)
+               DEBUG_S(ic.ic_fini[i]);
+
+       ic.ic_affinity = iabf_affinity(getenv("IABF_AFFINITY"));
+       ic.ic_delay_begin_ns = iabf_getenvl("IABF_DELAY_BEGIN_NS", LONG_MIN);
+       ic.ic_delay_end_ns = iabf_getenvl("IABF_DELAY_END_NS", LONG_MAX);
+       ic.ic_delay_step_ns = iabf_getenvl("IABF_DELAY_STEP_NS", 0);
+       ic.ic_step_count = iabf_getenvl("IABF_STEP_COUNT", 0);
+       ic.ic_autotune_count = iabf_getenvl("IABF_AUTOTUNE_COUNT", 16);
+
+       DEBUG_D(ic.ic_delay_begin_ns);
+       DEBUG_D(ic.ic_delay_end_ns);
+       DEBUG_D(ic.ic_delay_step_ns);
+       DEBUG_D(ic.ic_step_count);
+       DEBUG_D(ic.ic_autotune_count);
+
+       assert(ic.ic_delay_begin_ns <= ic.ic_delay_end_ns);
+       assert(0 <= ic.ic_delay_step_ns);
+       assert(0 <= ic.ic_step_count);
+
+       iabf(&ic, a, b);
+
+       return 0;
+}