1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/utils/loadgen.c
38 * See how many local OSCs we can start whaling on a OST
39 * We're doing direct ioctls instead of going though a system() call to lctl
40 * to avoid the bash overhead.
41 * Adds an osc / echo client pair in each thread and starts echo transactions.
43 * Author: Nathan Rutman <nathan@clusterfs.com>
50 #include <sys/types.h>
60 #include <lnet/lnetctl.h>
61 #include <libcfs/libcfsutil.h>
64 static char cmdname[512];
65 static char target[64] = "";
67 static int live_threads = 0;
68 static int sig_received = 0;
69 static int o_verbose = 4; /* 0-5 */
70 static int my_oss = 0;
71 static int my_ecs = 0;
73 static int jt_quit(int argc, char **argv) {
74 Parser_quit(argc, argv);
78 static int loadgen_usage(int argc, char **argv)
82 "This is a test program used to simulate large numbers of\n"
83 "clients. The echo obds are used, so the obdecho module must\n"
85 "Typical usage would be:\n"
86 " loadgen> dev lustre-OST0000 set the target device\n"
87 " loadgen> start 20 start 20 echo clients\n"
88 " loadgen> wr 10 5 have 10 clients do the brw_write\n"
89 " test 5 times each\n"
92 return (Parser_help(argc, argv));
95 static int loadgen_verbose(int argc, char **argv);
96 static int loadgen_target(int argc, char **argv);
97 static int loadgen_start_echosrv(int argc, char **argv);
98 static int loadgen_start_clients(int argc, char **argv);
99 static int loadgen_wait(int argc, char **argv);
100 static int loadgen_write(int argc, char **argv);
102 command_t cmdlist[] = {
103 {"device", loadgen_target, 0,
104 "set target ost name (e.g. lustre-OST0000)\n"
105 "usage: device <name> [<nid>]"},
106 {"dl", jt_obd_list, 0, "show all devices\n"
108 {"echosrv", loadgen_start_echosrv, 0, "start an echo server\n"},
109 {"start", loadgen_start_clients, 0, "set up echo clients\n"
110 "usage: start_clients <num>"},
111 {"verbose", loadgen_verbose, 0, "set verbosity level 0-5\n"
112 "usage: verbose <level>"},
113 {"wait", loadgen_wait, 0,
114 "wait for all threads to finish\n"},
115 {"write", loadgen_write, 0,
116 "start a test_brw write test on X clients for Y iterations\n"
117 "usage: write <num_clients> <num_iter> [<delay>]"},
119 /* User interface commands */
120 {"help", loadgen_usage, 0, "help"},
121 {"exit", jt_quit, 0, "quit"},
122 {"quit", jt_quit, 0, "quit"},
128 #define C_STOP 0x0001
129 #define C_CREATE_EVERY 0x0002 /* destroy and recreate every time */
130 #define C_READ 0x0004
131 #define C_WRITE 0x0008
140 struct command_t k_cmd;
141 struct kid_t *k_next;
148 static struct kid_t *kid_list = NULL;
150 static struct kid_t *push_kid(int tnum)
153 kid = (struct kid_t *)calloc(1, sizeof(struct kid_t));
155 fprintf(stderr, "malloc failure\n");
158 kid->k_pthread = pthread_self();
159 kid->k_next = kid_list;
166 int trigger_count = 0;
167 int waiting_count = 0;
170 struct timeval trigger_start;
171 struct command_t trigger_cmd;
172 pthread_mutex_t m_trigger = PTHREAD_MUTEX_INITIALIZER;
173 pthread_cond_t cv_trigger = PTHREAD_COND_INITIALIZER;
175 unsigned long long write_bytes;
176 pthread_mutex_t m_count = PTHREAD_MUTEX_INITIALIZER;
178 static void trigger(int command, int threads, int repeat, int delay)
181 pthread_mutex_lock(&m_trigger);
182 trigger_cmd.c_flags = command;
183 trigger_cmd.c_rpt = repeat;
184 trigger_cmd.c_delay = delay;
185 trigger_count = threads;
187 printf("trigger %d cmd c=%d f=%x\n", trigger_count,
188 trigger_cmd.c_rpt, trigger_cmd.c_flags);
189 gettimeofday(&trigger_start, NULL);
191 pthread_mutex_lock(&m_count);
193 pthread_mutex_unlock(&m_count);
195 pthread_cond_broadcast(&cv_trigger);
196 pthread_mutex_unlock(&m_trigger);
199 static __inline__ void stop_all(int unused)
204 static void kill_kids(void)
206 struct kid_t *tmp = kid_list;
209 trigger(C_STOP, 0, 0, 0);
211 pthread_kill(tmp->k_pthread, SIGTERM);
216 static void sig_master(int unused)
222 static int wait_for_threads()
224 struct kid_t *tmp = kid_list;
228 printf("waiting for %d children\n", live_threads);
231 rc = pthread_join(tmp->k_pthread, &statusp);
232 status = (long)statusp;
234 printf("%d: joined, rc = %d, status = %d\n",
235 tmp->k_id, rc, status);
236 kid_list = tmp->k_next;
243 printf("%s done, rc = %d\n", cmdname, rc);
247 static int write_proc(char *proc_path, char *value)
251 fd = open(proc_path, O_WRONLY);
253 fprintf(stderr, "open('%s') failed: %s\n",
254 proc_path, strerror(errno));
257 rc = write(fd, value, strlen(value));
259 fprintf(stderr, "write('%s') failed: %s\n",
260 proc_path, strerror(errno));
267 static int read_proc(char *proc_path, unsigned long long *value)
272 fd = open(proc_path, O_RDONLY);
274 fprintf(stderr, "open('%s') failed: %s\n",
275 proc_path, strerror(errno));
279 rc = read(fd, buf, sizeof(buf));
281 if (errno == EOPNOTSUPP) {
282 /* probably an echo server */
286 fprintf(stderr, "read('%s') failed: %s (%d)\n",
287 proc_path, strerror(errno), errno);
290 *value = strtoull(buf, NULL, 10);
294 static int grant_estimate(int thread)
296 unsigned long long avail, grant;
301 /* I don't really care about protecting this with a mutex */
308 /* Divide /proc/fs/lustre/osc/o_0001/kbytesavail
309 by /proc/fs/lustre/osc/o_0001/cur_grant_bytes to find max clients */
310 sprintf(proc_path, "/proc/fs/lustre/osc/o%.5d/kbytesavail", thread);
311 rc = read_proc(proc_path, &avail);
314 sprintf(proc_path, "/proc/fs/lustre/osc/o%.5d/cur_grant_bytes", thread);
315 rc = read_proc(proc_path, &grant);
321 printf("Estimate %llu clients before we run out of grant space "
322 "(%lluK / %llu)\n", (avail << 10) / grant, avail, grant);
327 /* We hold a thread mutex around create/cleanup because cur_dev is not
328 shared-memory safe */
329 pthread_mutex_t m_config = PTHREAD_MUTEX_INITIALIZER;
331 static int cleanup(char *obdname, int quiet)
336 pthread_mutex_lock(&m_config);
340 rc = jt_lcfg_device(2, args);
342 fprintf(stderr, "%s: can't configure '%s' (%d)\n",
343 cmdname, obdname, rc);
345 rc = jt_obd_cleanup(2, args);
347 fprintf(stderr, "%s: can't cleanup '%s' (%d)\n",
348 cmdname, obdname, rc);
349 rc = jt_obd_detach(1, args);
351 fprintf(stderr, "%s: can't detach '%s' (%d)\n",
352 cmdname, obdname, rc);
354 pthread_mutex_unlock(&m_config);
358 static int echocli_setup(char *oname, char *ename, int *dev)
364 pthread_mutex_lock(&m_config);
369 /* attach "osc" oscname oscuuid */
371 args[2] = args[3] = oname;
372 rc = jt_lcfg_attach(4, args);
374 fprintf(stderr, "%s: can't attach osc '%s' (%d)\n",
376 /* Assume we want e.g. an old one cleaned anyhow. */
379 /* setup ostname "OSS_UUID" */
381 args[2] = "OSS_UUID";
382 rc = jt_lcfg_setup(3, args);
384 fprintf(stderr, "%s: can't setup osc '%s' (%d)\n",
389 /* Large grants cause ENOSPC to be reported, even though
390 there's space left. We can reduce the grant size by
392 sprintf(proc_path, "/proc/fs/lustre/osc/%s/max_dirty_mb", oname);
393 rc = write_proc(proc_path, "1");
394 sprintf(proc_path, "/proc/fs/lustre/osc/%s/max_rpcs_in_flight", oname);
395 rc = write_proc(proc_path, "1");
398 /* attach "echo_client" echoname echouuid */
399 args[1] = "echo_client";
400 args[2] = args[3] = ename;
401 rc = jt_lcfg_attach(4, args);
403 fprintf(stderr, "%s: can't attach '%s' (%d)\n",
406 fprintf(stderr, "%s: is the obdecho module loaded?\n",
412 rc = jt_lcfg_setup(2, args);
414 fprintf(stderr, "%s: can't setup '%s' (%d)\n",
420 rc = jt_obd_device(2, args);
422 fprintf(stderr, "%s: can't set device '%s' (%d)\n",
428 *dev = jt_obd_get_device();
429 pthread_mutex_unlock(&m_config);
433 pthread_mutex_unlock(&m_config);
439 /* We can't use the libptlctl library fns because they are not shared-memory
440 safe with respect to the ioctl device (cur_dev) */
441 static int obj_ioctl(int cmd, struct obd_ioctl_data *data, int unpack)
446 //IOC_PACK(cmdname, data);
447 if (obd_ioctl_pack(data, &buf, sizeof(*data))) {
448 fprintf(stderr, "dev %d invalid ioctl\n", data->ioc_dev);
453 rc = l_ioctl(OBD_DEV_ID, cmd, buf);
456 //IOC_UNPACK(argv[0], data);
457 if (obd_ioctl_unpack(data, buf, sizeof(*data))) {
458 fprintf(stderr, "dev %d invalid reply\n", data->ioc_dev);
470 /* See jt_obd_create */
471 static int obj_create(struct kid_t *kid)
473 struct obd_ioctl_data data;
476 memset(&data, 0, sizeof(data));
477 data.ioc_dev = kid->k_dev;
478 data.ioc_obdo1.o_mode = 0100644;
479 data.ioc_obdo1.o_id = 0;
480 data.ioc_obdo1.o_gr = 2;
481 data.ioc_obdo1.o_uid = 0;
482 data.ioc_obdo1.o_gid = 0;
483 data.ioc_obdo1.o_valid = OBD_MD_FLTYPE | OBD_MD_FLMODE |
484 OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID;
486 rc = obj_ioctl(OBD_IOC_CREATE, &data, 1);
488 fprintf(stderr, "%d: create (%d) %s\n",
489 kid->k_id, rc, strerror(errno));
493 if (!(data.ioc_obdo1.o_valid & OBD_MD_FLID)) {
494 fprintf(stderr, "%d: create oid not valid "LPX64"\n",
495 kid->k_id, data.ioc_obdo1.o_valid);
499 kid->k_objid = data.ioc_obdo1.o_id;
502 printf("%d: cr "LPX64"\n", kid->k_id, kid->k_objid);
507 /* See jt_obd_destroy */
508 static int obj_delete(struct kid_t *kid)
510 struct obd_ioctl_data data;
514 printf("%d: del "LPX64"\n", kid->k_id, kid->k_objid);
516 memset(&data, 0, sizeof(data));
517 data.ioc_dev = kid->k_dev;
518 data.ioc_obdo1.o_id = kid->k_objid;
519 data.ioc_obdo1.o_mode = S_IFREG | 0644;
520 data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE;
522 rc = obj_ioctl(OBD_IOC_DESTROY, &data, 1);
524 fprintf(stderr, "%s-%d: can't destroy obj "LPX64" (%d)\n",
525 cmdname, kid->k_id, kid->k_objid, rc);
531 #define difftime(a, b) \
532 ((a)->tv_sec - (b)->tv_sec + \
533 ((a)->tv_usec - (b)->tv_usec) / 1000000.0)
535 /* See jt_obd_test_brw */
536 static int obj_write(struct kid_t *kid)
538 struct obd_ioctl_data data;
539 struct timeval start;
541 int rc = 0, i, pages = 0;
544 printf("%d: wr "LPX64"\n", kid->k_id, kid->k_objid);
548 len = pages * getpagesize();
550 memset(&data, 0, sizeof(data));
551 data.ioc_dev = kid->k_dev;
552 /* communicate the 'type' of brw test and batching to echo_client.
553 * don't start. we'd love to refactor this lctl->echo_client
555 data.ioc_pbuf1 = (void *)1;
558 data.ioc_obdo1.o_id = kid->k_objid;
559 data.ioc_obdo1.o_mode = S_IFREG;
560 data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
562 data.ioc_obdo1.o_flags = OBD_FL_DEBUG_CHECK;
563 data.ioc_count = len;
566 gettimeofday(&start, NULL);
568 for (i = 1; i <= count; i++) {
569 data.ioc_obdo1.o_valid &= ~(OBD_MD_FLBLOCKS|OBD_MD_FLGRANT);
570 rc = obj_ioctl(OBD_IOC_BRW_WRITE, &data, 0);
572 fprintf(stderr, "%d: write %s\n", kid->k_id,
573 strerror(rc = errno));
577 data.ioc_offset += len;
584 gettimeofday(&end, NULL);
585 diff = difftime(&end, &start);
589 pthread_mutex_lock(&m_count);
590 write_bytes += i * len;
591 pthread_mutex_unlock(&m_count);
594 printf("%d: wrote %dx%d pages in %.3fs (%.3f MB/s): %s",
595 kid->k_id, i, pages, diff,
596 ((double)i * len) / (diff * 1048576.0),
601 fprintf(stderr, "%s-%d: err test_brw obj "LPX64" (%d)\n",
602 cmdname, kid->k_id, kid->k_objid, rc);
606 static int do_work(struct kid_t *kid)
608 int rc = 0, err, iter = 0;
610 if (!(kid->k_cmd.c_flags & C_CREATE_EVERY))
611 rc = obj_create(kid);
613 for (iter = 0; iter < kid->k_cmd.c_rpt; iter++) {
614 if (rc || sig_received)
617 if (kid->k_cmd.c_flags & C_CREATE_EVERY) {
618 rc = obj_create(kid);
623 if (kid->k_cmd.c_flags & C_WRITE) {
625 grant_estimate(kid->k_id);
628 if (kid->k_cmd.c_flags & C_CREATE_EVERY) {
629 err = obj_delete(kid);
633 if ((o_verbose > 3) && (iter % 10 == 0))
634 printf("%d: i%d\n", kid->k_id, iter);
636 sleep(kid->k_cmd.c_delay);
639 if (!(kid->k_cmd.c_flags & C_CREATE_EVERY)) {
640 err = obj_delete(kid);
645 printf("%d: done (%d)\n", kid->k_id, rc);
650 static void report_perf()
655 gettimeofday(&end, NULL);
656 diff = difftime(&end, &trigger_start);
658 pthread_mutex_lock(&m_count);
659 printf("wrote %lluMB in %.3fs (%.3f MB/s)\n",
660 write_bytes >> 20, diff,
661 (write_bytes >> 20) / diff);
662 pthread_mutex_unlock(&m_count);
666 static void *run_one_child(void *threadvp)
669 char oname[10], ename[10];
670 int thread = (long)threadvp, dev = 0;
674 printf("%s: running thread #%d\n", cmdname, thread);
676 sprintf(oname, "o%.5d", thread);
677 sprintf(ename, "e%.5d", thread);
678 rc = echocli_setup(oname, ename, &dev);
680 fprintf(stderr, "%s: can't setup '%s/%s' (%d)\n",
681 cmdname, oname, ename, rc);
682 pthread_exit((void *)(long)rc);
685 kid = push_kid(thread);
692 while(!(rc || sig_received)) {
693 pthread_mutex_lock(&m_trigger);
695 if ((waiting_count == live_threads) && timer_on) {
700 pthread_cond_wait(&cv_trigger, &m_trigger);
704 /* First trigger_count threads will do the work, the rest
708 printf("%d: trigger %d cmd %x\n",
709 kid->k_id, trigger_count,
710 trigger_cmd.c_flags);
712 memcpy(&kid->k_cmd, &trigger_cmd, sizeof(trigger_cmd));
713 pthread_mutex_unlock(&m_trigger);
716 pthread_mutex_unlock(&m_trigger);
721 printf("%s: thread #%d done (%d)\n", cmdname, thread, rc);
727 err = cleanup(ename, 0);
729 err = cleanup(oname, 0);
732 pthread_exit((void *)(long)rc);
735 static int loadgen_start_clients(int argc, char **argv)
738 struct timespec ts = {0, 1000*1000*100}; /* .1 sec */
744 numt = strtoul(argv[1], NULL, 0);
749 fprintf(stderr, "%s: target OST is not defined, use 'device' "
750 "command\n", cmdname);
754 rc = pthread_attr_init(&attr);
756 fprintf(stderr, "%s: pthread_attr_init:(%d) %s\n",
757 cmdname, rc, strerror(errno));
760 pthread_attr_setstacksize (&attr, PTHREAD_STACK_MIN);
761 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
763 numt += live_threads;
765 printf("start %d to %d\n", i, numt);
766 while(!rc && !sig_received && (i < numt)) {
770 rc = pthread_create(&thread, &attr, run_one_child,
773 fprintf(stderr, "%s: pthread: #%d - (%d) %s\n",
774 cmdname, i, rc, strerror(rc));
778 /* give them slightly different start times */
779 nanosleep(&ts, NULL);
782 pthread_attr_destroy(&attr);
787 static int loadgen_target(int argc, char **argv)
793 if (argc < 2 || argc > 3)
799 nidx = libcfs_str2nid(argv[2]);
800 if (nidx == LNET_NID_ANY) {
801 fprintf(stderr, "%s: invalid nid '%s'\n",
806 /* Local device should be in list */
808 rc = jt_obd_device(2, args);
810 fprintf(stderr, "%s: local device '%s' doesn't "
811 "seem to exist. You must use obdfilter device "
812 "names like 'lustre-OST0000'. Use 'dl' to "
813 "list all devices.\n",
818 /* Use the first local nid */
819 args[1] = (char *)(&nidx);
820 args[1][0] = 1; /* hack to get back first nid */
821 rc = jt_ptl_list_nids(2, args);
823 fprintf(stderr, "%s: can't get local nid (%d)\n",
828 if (strcmp(nid, libcfs_nid2str(nidx)) != 0) {
829 /* if new, do an add_uuid */
830 sprintf(nid, "%s", libcfs_nid2str(nidx));
832 /* Fixme change the uuid for every new one */
833 args[1] = "OSS_UUID";
835 rc = jt_lcfg_add_uuid(3, args);
837 fprintf(stderr, "%s: can't add uuid '%s' (%d)\n",
838 cmdname, args[2], rc);
843 snprintf(target, sizeof(target), "%s", argv[1]);
844 printf("Target OST name is '%s'\n", target);
849 static int loadgen_verbose(int argc, char **argv)
853 o_verbose = atoi(argv[1]);
854 printf("verbosity set to %d\n", o_verbose);
858 static int loadgen_write(int argc, char **argv)
862 if (argc < 3 || argc > 4)
864 threads = atoi(argv[1]);
865 if (threads > live_threads) {
866 fprintf(stderr, "requested %d threads but only %d are running. "
867 "Use 'start' to start some more.\n",
868 threads, live_threads);
871 trigger(C_WRITE, threads, atoi(argv[2]),
872 (argc == 4) ? atoi(argv[3]) : 0);
876 char ecsname[] = "echosrv";
877 static int loadgen_stop_echosrv(int argc, char **argv)
879 int verbose = (argc != 9);
882 cleanup(name, verbose);
885 if (my_ecs || (argc == 9)) {
886 cleanup(ecsname, verbose);
892 static int loadgen_start_echosrv(int argc, char **argv)
897 pthread_mutex_lock(&m_config);
901 /* attach obdecho echosrv echosrv_UUID */
903 args[2] = args[3] = ecsname;
904 rc = jt_lcfg_attach(4, args);
906 fprintf(stderr, "%s: can't attach echo server (%d)\n",
908 /* Assume we want e.g. an old one cleaned anyhow. */
914 rc = jt_lcfg_setup(1, args);
916 fprintf(stderr, "%s: can't setup echo server (%d)\n",
921 /* Create an OSS to handle the communications */
922 /* attach ost OSS OSS_UUID */
924 args[2] = args[3] = "OSS";
926 rc = jt_lcfg_attach(4, args);
928 /* Already set up for somebody else, that's fine. */
929 printf("OSS already set up, no problem.\n");
930 pthread_mutex_unlock(&m_config);
934 fprintf(stderr, "%s: can't attach OSS (%d)\n",
941 rc = jt_lcfg_setup(1, args);
943 fprintf(stderr, "%s: can't setup OSS (%d)\n",
948 pthread_mutex_unlock(&m_config);
952 pthread_mutex_unlock(&m_config);
953 loadgen_stop_echosrv(9, argv);
957 static int loadgen_wait(int argc, char **argv)
959 /* Give scripts a chance to start some threads */
967 static int loadgen_init(int argc, char **argv)
972 sprintf(cmdname, "%s", argv[0]);
974 signal(SIGTERM, sig_master);
975 signal(SIGINT, sig_master);
977 /* Test to make sure obdecho module is loaded */
979 args[1] = "echo_client";
980 args[2] = args[3] = "ecc_test";
981 rc = jt_lcfg_attach(4, args);
983 fprintf(stderr, "%s: can't attach echo client (%d)\n",
986 fprintf(stderr, "%s: is the obdecho module loaded?\n",
990 jt_obd_detach(1, args);
996 static int loadgen_exit()
1000 printf("stopping %d children\n", live_threads);
1002 rc = wait_for_threads();
1004 loadgen_stop_echosrv(0, NULL);
1009 /* libptlctl interface */
1010 static int loadgen_main(int argc, char **argv)
1015 /* without this threaded errors cause segfault */
1018 if ((rc = ptl_initialize(argc, argv)) < 0)
1020 if ((rc = obd_initialize(argc, argv)) < 0)
1022 if ((rc = dbg_initialize(argc, argv)) < 0)
1025 Parser_init("loadgen> ", cmdlist);
1027 rc = loadgen_init(argc, argv);
1032 rc = Parser_execarg(argc - 1, argv + 1, cmdlist);
1034 rc = Parser_commands();
1037 rc = loadgen_exit();
1040 obd_finalize(argc, argv);
1044 #ifndef LIBLUSTRE_TEST
1045 int main (int argc, char **argv)
1048 rc = loadgen_main(argc, argv);
1049 pthread_exit((void *)(long)rc);