1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/tests/write_append_truncate.c
38 * Each loop does 3 things:
39 * - truncate file to zero (not via ftruncate though, to test O_APPEND)
40 * - write a "chunk" of data (should be at file offset 0 after truncate)
41 * - on each of two threads either append or truncate-up the file
43 * If the truncate happened first, we should have a hole in the file.
44 * If the append happened first, we should have truncated the file down.
46 * WRITE_SIZE_MAX and APPEND_SIZE_MAX are large enough to cross a stripe.
48 * compile: mpicc -g -Wall -o write_append_truncate write_append_truncate.c
49 * run: mpirun -np 2 -machlist <hostlist file> write_append_truncate <file>
50 * or: pdsh -w <two hosts> write_append_truncate <file>
51 * or: prun -n 2 [-N 2] write_append_truncate <file>
57 #include <sys/types.h>
67 #define DEFAULT_ITER 10000
69 #define WRITE_SIZE_MAX 1234567
70 #define APPEND_SIZE_MAX 1234567
71 #define TRUNC_SIZE_MAX 1234567
73 #define STATUS_FMT "WR %c %7d/%#08x, AP %c %7d/%#08x, TR@ %7d/%#08x"
75 #define HOSTNAME_SIZE 50
76 char hostname[HOSTNAME_SIZE];
77 #define FNAMES_MAX 256
79 void usage(char *prog)
81 printf("usage: %s [-a append_max] [-C] [-n nloops] [-s seed]\n"
82 "\t\t[-t trunc_max] [-T] [-v] [-w write_max] <filename> ...\n", prog);
83 printf("\t-a append_max: maximum size of append, default %u bytes\n",
85 printf("\t-C: 'classic' checks (on file 0)\n");
86 printf("\t-n nloops: count of loops to run, default %u\n",DEFAULT_ITER);
87 printf("\t-s seed: random seed to use, default {current time}\n");
88 printf("\t-t trunc_max: maximum size of truncate, default %u bytes\n",
90 printf("\t-T: 'classic' truncates (on file 0)\n");
91 printf("\t-w write_max: maximum size of write, default %u bytes\n",
93 printf("\t-W: 'classic' writes (on rank 0, file 0)\n");
94 printf("\t-v: run in verbose mode (repeat for more verbosity)\n");
95 printf("\tfilename for each mountpoint of same filesystem on a node\n");
96 printf("\b%s must be run with at least 2 processes\n", prog);
102 /* Print process rank, loop count, message, and exit (i.e. a fatal error) */
103 void rprintf(int rank, int loop, int error, const char *fmt, ...)
104 __attribute__ ((format (printf, 4, 5)));
106 void rprintf(int rank, int loop, int error, const char *fmt, ...)
110 printf("r=%2u", rank);
112 printf(" l=%04u", loop);
114 printf(" %s", hostname);
122 MPI_Abort(MPI_COMM_WORLD, error);
125 int main(int argc, char *argv[])
127 int n, nloops = DEFAULT_ITER;
128 int nfnames = 0, ifnames, fd;
129 int rank = -1, nproc, ret;
130 unsigned write_max = WRITE_SIZE_MAX;
131 unsigned append_max = APPEND_SIZE_MAX;
132 unsigned write_size = 0, append_size = 0, trunc_size = 0;
133 unsigned trunc_max = 0, trunc_offset = 0;
136 char *read_buf = NULL;
137 char *trunc_buf = NULL;
142 int classic_check = 0, classic_trunc = 0, classic_write = 0;
143 char write_char = 'A', append_char = 'a';
144 char *fnames[FNAMES_MAX], *end;
145 char *prog = "write_append_truncate";
148 error = MPI_Init(&argc, &argv);
149 if (error != MPI_SUCCESS)
150 printf("%s: MPI_Init failed: %d\n", prog, error);
151 else if (verbose > 2)
152 printf("%s: MPI_Init succeeded\n", prog);
154 prog = strrchr(argv[0], '/');
160 while ((c = getopt(argc, argv, "a:cCn:s:t:Tvw:W")) != -1) {
163 append_max = strtoul(optarg, &end, 0);
164 if (append_max == 0 || *end) {
165 fprintf(stderr, "%s: bad append option '%s'\n",
174 nloops = strtoul(optarg, &end, 0);
175 if (nloops == 0 || *end) {
176 fprintf(stderr, "%s: bad nloops option '%s'\n",
182 seed = strtoul(optarg, &end, 0);
184 fprintf(stderr, "%s: bad seed option '%s'\n",
190 trunc_max = strtoul(optarg, &end, 0);
192 fprintf(stderr,"%s: bad truncate option '%s'\n",
204 write_max = strtoul(optarg, &end, 0);
205 if (write_max == 0 || *end) {
206 fprintf(stderr, "%s: bad write option '%s'\n",
215 fprintf(stderr, "%s: unknown option '%c'\n", prog, c);
222 if (argc == optind) {
223 fprintf(stderr, "%s: missing filename argument\n", prog);
227 if (argc > optind + FNAMES_MAX) {
228 fprintf(stderr, "%s: too many extra options\n", prog);
232 while (optind < argc)
233 fnames[nfnames++] = argv[optind++];
235 error = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
236 if (verbose > 2 || error != MPI_SUCCESS)
237 rprintf(rank, -1, error != MPI_SUCCESS, "MPI_Comm_rank: %d\n",
240 error = MPI_Comm_size(MPI_COMM_WORLD, &nproc);
241 if (verbose > 2 || error != MPI_SUCCESS)
242 rprintf(rank, -1, error != MPI_SUCCESS, "MPI_Comm_size: %d\n",
246 rprintf(rank, -1, 1, "%s: must run with at least 2 processes\n",
249 append_buf = malloc(append_max);
250 if (append_buf == NULL)
251 rprintf(rank, -1, 1,"%s: error allocating append_buf %u\n",
254 write_buf = malloc(write_max);
255 if (write_buf == NULL)
256 rprintf(rank, -1, 1, "%s: error allocating write_buf %u\n",
259 if (gethostname(hostname, HOSTNAME_SIZE) < 0)
260 rprintf(rank, -1, 1, "%s: gethostname failed: %s\n",
261 prog, strerror(errno));
264 int max_size = write_max + (trunc_max ?: append_max)+append_max;
266 fd = open(fnames[0], O_WRONLY|O_CREAT|O_TRUNC, 0666);
267 rprintf(rank,-1, fd<0, "create %s, max size: %u, seed %u: %s\n",
268 fnames[0], max_size, seed, strerror(errno));
271 trunc_buf = calloc(1, trunc_max ?: append_max);
272 if (trunc_buf == NULL)
273 rprintf(rank,-1,1,"%s: error allocating trunc_buf %u\n",
274 prog, trunc_max ?: append_max);
276 /* initial write + truncate up + append */
277 read_buf = malloc(max_size);
278 if (read_buf == NULL)
279 rprintf(rank,-1,1,"%s: error allocating read_buf %u\n",
283 error = MPI_Barrier(MPI_COMM_WORLD);
284 if (verbose > 2 || error != MPI_SUCCESS)
285 rprintf(rank, -1, error != MPI_SUCCESS,
286 "prep MPI_Barrier: %d\n", error);
288 ifnames = rank % nfnames;
289 fd = open(fnames[ifnames], O_RDWR | O_APPEND);
290 if (verbose || fd < 0)
291 rprintf(rank, -1, errno, "open '%s' (%u): %s\n",
292 fnames[ifnames], ifnames, strerror(errno));
294 for (n = 0; n < nloops; n++) {
295 /* Initialized only to quiet stupid GCC warnings */
296 unsigned write_rank = 0, append_rank = n, trunc_rank = n + 1;
297 unsigned mpi_shared_vars[6];
299 /* reset the environment */
300 write_char = 'A' + (n % 26);
301 append_char = 'a' + (n % 26);
304 write_size = (rand() % (write_max - 1)) + 1;
305 append_size = (rand() % (append_max - 1)) + 1;
306 trunc_size = (rand() % ((trunc_max?: append_size)-1))+1;
307 trunc_offset = write_size + trunc_size;
309 if (verbose || n % 1000 == 0)
310 rprintf(rank, n, 0, STATUS_FMT"\n",
311 write_char, write_size, write_size,
312 append_char, append_size, append_size,
313 trunc_offset, trunc_offset);
315 write_rank = (classic_write ? 0 : rand()) % nproc;
317 append_rank = (classic_write ? n : rand()) %
319 /* We can't allow the append rank be the same
320 * as the classic_trunc trunc_rank, or we will
321 * spin here forever. */
322 } while (append_rank == (n + 1) % nproc);
324 trunc_rank = (classic_trunc? (n + 1) : rand()) %
326 } while (trunc_rank == append_rank);
328 mpi_shared_vars[0] = write_size;
329 mpi_shared_vars[1] = append_size;
330 mpi_shared_vars[2] = trunc_size;
331 mpi_shared_vars[3] = write_rank;
332 mpi_shared_vars[4] = append_rank;
333 mpi_shared_vars[5] = trunc_rank;
336 error = MPI_Bcast(&mpi_shared_vars, 6,
337 MPI_INT, 0, MPI_COMM_WORLD);
338 if (verbose > 2 || error != MPI_SUCCESS)
339 rprintf(rank, n, error != MPI_SUCCESS,
340 "MPI_Bcast mpi_shared_vars"
341 "[%u, %u, %u, %u, %u, %u]: %d\n",
342 mpi_shared_vars[0], mpi_shared_vars[1],
343 mpi_shared_vars[2], mpi_shared_vars[3],
344 mpi_shared_vars[4], mpi_shared_vars[5], error);
347 write_size = mpi_shared_vars[0];
348 append_size = mpi_shared_vars[1];
349 trunc_size = mpi_shared_vars[2];
350 write_rank = mpi_shared_vars[3];
351 append_rank = mpi_shared_vars[4];
352 trunc_rank = mpi_shared_vars[5];
354 trunc_offset = write_size + trunc_size;
357 if (rank == write_rank || rank == 0)
358 memset(write_buf, write_char, write_max);
360 if (rank == write_rank) {
361 ifnames = (classic_write ? 0 : rand()) % nfnames;
362 ret = truncate(fnames[ifnames], 0);
363 if (verbose > 1 || ret != 0)
364 rprintf(rank, n, ret,
365 "initial truncate %s (%u) @ 0: %s\n",
366 fnames[ifnames], ifnames,
371 ret = write(fd, write_buf+done,write_size-done);
372 if (verbose > 1 || ret < 0) {
374 ret < 0 && errno != EINTR,
375 "write %d/%d @ %d: %s\n",
376 ret + done, write_size, done,
378 if (ret < 0 && errno != EINTR)
383 } while (done != write_size);
386 if (rank == append_rank || rank == 0)
387 memset(append_buf, append_char, append_size);
389 error = MPI_Barrier(MPI_COMM_WORLD);
390 if (verbose > 2 || error != MPI_SUCCESS)
391 rprintf(rank, n, error != MPI_SUCCESS,
392 "start MPI_Barrier: %d\n", error);
395 if (rank == append_rank) {
398 ret = write(fd, append_buf + done,
401 rprintf(rank, n, errno != EINTR,
402 "append %u/%u: %s\n",
403 ret + done, append_size,
407 } else if (verbose > 1 || ret != append_size) {
408 rprintf(rank, n, ret != append_size,
410 ret + done, append_size);
414 } while (done != append_size);
415 } else if (rank == trunc_rank) {
416 /* XXX: truncating the same file descriptor as the
417 * append on a single node causes this test
418 * to fail currently (2009-02-01). */
419 ifnames = (classic_trunc ? rank : rand()) % nfnames;
420 ret = truncate(fnames[ifnames], trunc_offset);
421 if (verbose > 1 || ret != 0)
422 rprintf(rank, n, ret,
423 "truncate %s (%u) @ %u: %s\n",
424 fnames[ifnames], ifnames,
425 trunc_offset, strerror(errno));
428 error = MPI_Barrier(MPI_COMM_WORLD);
429 if (verbose > 2 || error != MPI_SUCCESS)
430 rprintf(rank, n, error != MPI_SUCCESS,
431 "end MPI_Barrier: %d\n", error);
435 /* Check the result */
438 struct stat st = { 0 };
440 ifnames = classic_check ? 0 : (rand() % nfnames);
441 ret = stat(fnames[ifnames], &st);
442 if (verbose > 1 || ret != 0)
443 rprintf(rank, n, ret,
444 "stat %s (%u) size %llu: %s\n",
445 fnames[ifnames], ifnames,
446 (long long)st.st_size, strerror(errno));
448 ret = lseek(fd, 0, SEEK_SET);
450 rprintf(rank, n, ret, "lseek 0: %s\n",
455 ret = read(fd, read_buf+done, st.st_size-done);
456 if (verbose > 1 || ret <= 0) {
457 rprintf(rank, n, ret <= 0,
458 "read %d/%llu @ %u: %s\n",
459 ret, (long long)st.st_size-done,
461 strerror(errno) : "short read");
464 } while (done != st.st_size);
466 if (memcmp(read_buf, write_buf, write_size)) {
467 rprintf(rank, n, 0, "WRITE bad "
468 "[0-%d]/[0-%#x] != %c\n",
469 write_size - 1, write_size - 1,
474 tmp_buf = read_buf + write_size;
476 if (st.st_size == trunc_offset) {
477 /* Check case 1: first append then truncate */
478 int tmp_size, tmp_offset;
480 tmp_size = trunc_size < append_size ?
481 trunc_size : append_size;
482 tmp_offset = write_size + tmp_size;
484 if (memcmp(tmp_buf, append_buf, tmp_size)) {
485 rprintf(rank, n, 0,"trunc-after-APPEND "
486 "bad [%d-%d]/[%#x-%#x] != %c\n",
487 write_size, tmp_offset - 1,
488 write_size, tmp_offset - 1,
491 } else if (trunc_size > append_size &&
492 memcmp(tmp_buf+append_size,trunc_buf,
493 trunc_size - append_size)) {
494 rprintf(rank, n, 0,"TRUNC-after-append "
495 "bad [%d-%d]/[%#x-%#x] != 0\n",
496 tmp_offset, trunc_offset - 1,
497 tmp_offset, trunc_offset - 1);
501 int expected_size = trunc_offset + append_size;
502 /* Check case 2: first truncate then append */
503 if (st.st_size != expected_size) {
504 rprintf(rank, n, 0,"APPEND-after-trunc "
505 "bad file size %llu != %u\n",
506 (long long)st.st_size,
511 if (memcmp(tmp_buf, trunc_buf, trunc_size)) {
512 rprintf(rank, n, 0,"append-after-TRUNC "
513 "bad [%d-%d]/[%#x-%#x] != 0\n",
514 write_size, trunc_offset - 1,
515 write_size, trunc_offset - 1);
517 } else if (memcmp(read_buf + trunc_offset,
518 append_buf, append_size)) {
519 rprintf(rank, n, 0,"APPEND-after-trunc "
520 "bad [%d-%d]/[%#x-%#x] != %c\n",
521 trunc_offset, expected_size - 1,
522 trunc_offset, expected_size - 1,
532 rprintf(rank, n, 0, STATUS_FMT"\n",
533 write_char, write_size, write_size,
534 append_char, append_size, append_size,
535 trunc_offset, trunc_offset);
537 sprintf(command, "od -Ax -a %s", fnames[0]);
538 rc = system(command);
539 MPI_Abort(MPI_COMM_WORLD, 1);
544 if (rank == 0 || verbose)
545 printf("r=%2u n=%4u: "STATUS_FMT"\nPASS\n", rank, n - 1,
546 write_char, write_size, write_size,
547 append_char, append_size, append_size,
548 trunc_offset, trunc_offset);
553 ifnames = rand() % nfnames;
554 ret = unlink(fnames[ifnames]);
556 printf("%s: unlink %s failed: %s\n",
557 prog, fnames[ifnames], strerror(errno));