4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
31 * lustre/tests/write_append_truncate.c
33 * Each loop does 3 things:
34 * - truncate file to zero (not via ftruncate though, to test O_APPEND)
35 * - write a "chunk" of data (should be at file offset 0 after truncate)
36 * - on each of two threads either append or truncate-up the file
38 * If the truncate happened first, we should have a hole in the file.
39 * If the append happened first, we should have truncated the file down.
41 * WRITE_SIZE_MAX and APPEND_SIZE_MAX are large enough to cross a stripe.
43 * compile: mpicc -g -Wall -o write_append_truncate write_append_truncate.c
44 * run: mpirun -np 2 -machlist <hostlist file> write_append_truncate <file>
45 * or: pdsh -w <two hosts> write_append_truncate <file>
46 * or: prun -n 2 [-N 2] write_append_truncate <file>
52 #include <sys/types.h>
62 #define DEFAULT_ITER 10000
64 #define WRITE_SIZE_MAX 1234567
65 #define APPEND_SIZE_MAX 1234567
66 #define TRUNC_SIZE_MAX 1234567
68 #define STATUS_FMT "WR %c %7d/%#08x, AP %c %7d/%#08x, TR@ %7d/%#08x"
70 #define HOSTNAME_SIZE 50
71 char hostname[HOSTNAME_SIZE];
72 #define FNAMES_MAX 256
74 void usage(char *prog)
76 printf("usage: %s [-a append_max] [-C] [-n nloops] [-s seed]\n\t\t[-t trunc_max] [-T] [-v] [-w write_max] <filename> ...\n",
78 printf("\t-a append_max: maximum size of append, default %u bytes\n",
80 printf("\t-C: 'classic' checks (on file 0)\n");
81 printf("\t-n nloops: count of loops to run, default %u\n",
83 printf("\t-s seed: random seed to use, default {current time}\n");
84 printf("\t-t trunc_max: maximum size of truncate, default %u bytes\n",
86 printf("\t-T: 'classic' truncates (on file 0)\n");
87 printf("\t-w write_max: maximum size of write, default %u bytes\n",
89 printf("\t-W: 'classic' writes (on rank 0, file 0)\n");
90 printf("\t-v: run in verbose mode (repeat for more verbosity)\n");
91 printf("\tfilename for each mountpoint of same filesystem on a node\n");
92 printf("\b%s must be run with at least 2 processes\n", prog);
98 /* Print process rank, loop count, message, and exit (i.e. a fatal error) */
99 void rprintf(int rank, int loop, int error, const char *fmt, ...)
100 __attribute__ ((format (printf, 4, 5)));
102 void rprintf(int rank, int loop, int error, const char *fmt, ...)
106 printf("r=%2u", rank);
108 printf(" l=%04u", loop);
110 printf(" %s", hostname);
118 MPI_Abort(MPI_COMM_WORLD, error);
121 int main(int argc, char *argv[])
123 int n, nloops = DEFAULT_ITER;
124 int nfnames = 0, ifnames, fd;
125 int rank = -1, nproc, ret;
126 unsigned int write_max = WRITE_SIZE_MAX;
127 unsigned int append_max = APPEND_SIZE_MAX;
128 unsigned int write_size = 0, append_size = 0, trunc_size = 0;
129 unsigned int trunc_max = 0, trunc_offset = 0;
132 char *read_buf = NULL;
133 char *trunc_buf = NULL;
138 int classic_check = 0, classic_trunc = 0, classic_write = 0;
139 char write_char = 'A', append_char = 'a';
140 char *fnames[FNAMES_MAX], *end;
141 char *prog = "write_append_truncate";
144 error = MPI_Init(&argc, &argv);
145 if (error != MPI_SUCCESS)
146 printf("%s: MPI_Init failed: %d\n", prog, error);
147 else if (verbose > 2)
148 printf("%s: MPI_Init succeeded\n", prog);
150 prog = strrchr(argv[0], '/');
156 while ((c = getopt(argc, argv, "a:cCn:s:t:Tvw:W")) != -1) {
159 append_max = strtoul(optarg, &end, 0);
160 if (append_max < 2 || *end) {
161 fprintf(stderr, "%s: bad append option '%s'\n",
170 nloops = strtoul(optarg, &end, 0);
171 if (nloops == 0 || *end) {
172 fprintf(stderr, "%s: bad nloops option '%s'\n",
178 seed = strtoul(optarg, &end, 0);
180 fprintf(stderr, "%s: bad seed option '%s'\n",
186 trunc_max = strtoul(optarg, &end, 0);
189 "%s: bad truncate option '%s'\n", prog,
201 write_max = strtoul(optarg, &end, 0);
202 if (write_max < 2 || *end) {
203 fprintf(stderr, "%s: bad write option '%s'\n",
212 fprintf(stderr, "%s: unknown option '%c'\n", prog, c);
219 if (argc == optind) {
220 fprintf(stderr, "%s: missing filename argument\n", prog);
224 if (argc > optind + FNAMES_MAX) {
225 fprintf(stderr, "%s: too many extra options\n", prog);
229 while (optind < argc)
230 fnames[nfnames++] = argv[optind++];
232 error = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
233 if (verbose > 2 || error != MPI_SUCCESS)
234 rprintf(rank, -1, error != MPI_SUCCESS, "MPI_Comm_rank: %d\n",
237 error = MPI_Comm_size(MPI_COMM_WORLD, &nproc);
238 if (verbose > 2 || error != MPI_SUCCESS)
239 rprintf(rank, -1, error != MPI_SUCCESS, "MPI_Comm_size: %d\n",
243 rprintf(rank, -1, 1, "%s: must run with at least 2 processes\n",
246 append_buf = malloc(append_max);
248 rprintf(rank, -1, 1, "%s: error allocating append_buf %u\n",
251 write_buf = malloc(write_max);
253 rprintf(rank, -1, 1, "%s: error allocating write_buf %u\n",
256 if (gethostname(hostname, HOSTNAME_SIZE) < 0)
257 rprintf(rank, -1, 1, "%s: gethostname failed: %s\n",
258 prog, strerror(errno));
261 int max_size = write_max + (trunc_max ?: append_max) +
264 fd = open(fnames[0], O_WRONLY | O_CREAT | O_TRUNC, 0666);
265 rprintf(rank, -1, fd < 0,
266 "create %s, max size: %u, seed %u: %s\n", fnames[0],
267 max_size, seed, strerror(errno));
270 trunc_buf = calloc(1, trunc_max ?: append_max);
273 "%s: error allocating trunc_buf %u\n",
274 prog, trunc_max ?: append_max);
276 /* initial write + truncate up + append */
277 read_buf = malloc(max_size);
280 "%s: error allocating read_buf %u\n",
284 error = MPI_Barrier(MPI_COMM_WORLD);
285 if (verbose > 2 || error != MPI_SUCCESS)
286 rprintf(rank, -1, error != MPI_SUCCESS,
287 "prep MPI_Barrier: %d\n", error);
289 ifnames = rank % nfnames;
290 fd = open(fnames[ifnames], O_RDWR | O_APPEND);
291 if (verbose || fd < 0)
292 rprintf(rank, -1, errno, "open '%s' (%u): %s\n",
293 fnames[ifnames], ifnames, strerror(errno));
295 for (n = 0; n < nloops; n++) {
296 /* Initialized only to quiet stupid GCC warnings */
297 unsigned int append_rank = n, trunc_rank = n + 1;
298 unsigned int write_rank = 0;
299 unsigned int mpi_shared_vars[6];
301 /* reset the environment */
302 write_char = 'A' + (n % 26);
303 append_char = 'a' + (n % 26);
306 write_size = (rand() % (write_max - 1)) + 1;
307 append_size = (rand() % (append_max - 1)) + 1;
308 trunc_size = (append_size == 1) ? 1 :
310 ((trunc_max ?: append_size) - 1)) + 1;
311 trunc_offset = write_size + trunc_size;
313 if (verbose || n % 1000 == 0)
314 rprintf(rank, n, 0, STATUS_FMT"\n",
315 write_char, write_size, write_size,
316 append_char, append_size, append_size,
317 trunc_offset, trunc_offset);
319 write_rank = (classic_write ? 0 : rand()) % nproc;
321 append_rank = (classic_write ? n : rand()) %
324 * We can't allow the append rank be the same
325 * as the classic_trunc trunc_rank, or we will
328 } while (append_rank == (n + 1) % nproc);
330 trunc_rank = (classic_trunc ? (n + 1) :
332 } while (trunc_rank == append_rank);
334 mpi_shared_vars[0] = write_size;
335 mpi_shared_vars[1] = append_size;
336 mpi_shared_vars[2] = trunc_size;
337 mpi_shared_vars[3] = write_rank;
338 mpi_shared_vars[4] = append_rank;
339 mpi_shared_vars[5] = trunc_rank;
342 error = MPI_Bcast(&mpi_shared_vars, 6,
343 MPI_INT, 0, MPI_COMM_WORLD);
344 if (verbose > 2 || error != MPI_SUCCESS)
345 rprintf(rank, n, error != MPI_SUCCESS,
346 "MPI_Bcast mpi_shared_vars [%u, %u, %u, %u, %u, %u]: %d\n",
347 mpi_shared_vars[0], mpi_shared_vars[1],
348 mpi_shared_vars[2], mpi_shared_vars[3],
349 mpi_shared_vars[4], mpi_shared_vars[5], error);
352 write_size = mpi_shared_vars[0];
353 append_size = mpi_shared_vars[1];
354 trunc_size = mpi_shared_vars[2];
355 write_rank = mpi_shared_vars[3];
356 append_rank = mpi_shared_vars[4];
357 trunc_rank = mpi_shared_vars[5];
359 trunc_offset = write_size + trunc_size;
362 if (rank == write_rank || rank == 0)
363 memset(write_buf, write_char, write_max);
365 if (rank == write_rank) {
366 ifnames = (classic_write ? 0 : rand()) % nfnames;
367 ret = truncate(fnames[ifnames], 0);
368 if (verbose > 1 || ret != 0)
369 rprintf(rank, n, ret,
370 "initial truncate %s (%u) @ 0: %s\n",
371 fnames[ifnames], ifnames,
376 ret = write(fd, write_buf + done,
378 if (verbose > 1 || ret < 0) {
380 ret < 0 && errno != EINTR,
381 "write %d/%d @ %d: %s\n",
382 ret + done, write_size, done,
384 if (ret < 0 && errno != EINTR)
389 } while (done != write_size);
392 if (rank == append_rank || rank == 0)
393 memset(append_buf, append_char, append_size);
395 error = MPI_Barrier(MPI_COMM_WORLD);
396 if (verbose > 2 || error != MPI_SUCCESS)
397 rprintf(rank, n, error != MPI_SUCCESS,
398 "start MPI_Barrier: %d\n", error);
401 if (rank == append_rank) {
404 ret = write(fd, append_buf + done,
407 rprintf(rank, n, errno != EINTR,
408 "append %u/%u: %s\n",
409 ret + done, append_size,
413 } else if (verbose > 1 || ret != append_size) {
414 rprintf(rank, n, ret != append_size,
416 ret + done, append_size);
420 } while (done != append_size);
421 } else if (rank == trunc_rank) {
423 * XXX: truncating the same file descriptor as the
424 * append on a single node causes this test
425 * to fail currently (2009-02-01).
427 ifnames = (classic_trunc ? rank : rand()) % nfnames;
428 ret = truncate(fnames[ifnames], trunc_offset);
429 if (verbose > 1 || ret != 0)
430 rprintf(rank, n, ret,
431 "truncate %s (%u) @ %u: %s\n",
432 fnames[ifnames], ifnames,
433 trunc_offset, strerror(errno));
436 error = MPI_Barrier(MPI_COMM_WORLD);
437 if (verbose > 2 || error != MPI_SUCCESS)
438 rprintf(rank, n, error != MPI_SUCCESS,
439 "end MPI_Barrier: %d\n", error);
443 /* Check the result */
446 struct stat st = { 0 };
448 ifnames = classic_check ? 0 : (rand() % nfnames);
449 ret = stat(fnames[ifnames], &st);
450 if (verbose > 1 || ret != 0)
451 rprintf(rank, n, ret,
452 "stat %s (%u) size %llu: %s\n",
453 fnames[ifnames], ifnames,
454 (long long)st.st_size, strerror(errno));
456 ret = lseek(fd, 0, SEEK_SET);
458 rprintf(rank, n, ret, "lseek 0: %s\n",
463 ret = read(fd, read_buf + done,
465 if (verbose > 1 || ret <= 0) {
466 rprintf(rank, n, ret <= 0,
467 "read %d/%llu @ %u: %s\n",
469 (long long)st.st_size - done,
471 strerror(errno) : "short read");
474 } while (done != st.st_size);
476 if (memcmp(read_buf, write_buf, write_size)) {
478 "WRITE bad [0-%d]/[0-%#x] != %c\n",
479 write_size - 1, write_size - 1,
484 tmp_buf = read_buf + write_size;
486 if (st.st_size == trunc_offset) {
487 /* Check case 1: first append then truncate */
488 int tmp_size, tmp_offset;
490 tmp_size = trunc_size < append_size ?
491 trunc_size : append_size;
492 tmp_offset = write_size + tmp_size;
494 if (memcmp(tmp_buf, append_buf, tmp_size)) {
496 "trunc-after-APPEND bad [%d-%d]/[%#x-%#x] != %c\n",
497 write_size, tmp_offset - 1,
498 write_size, tmp_offset - 1,
501 } else if (trunc_size > append_size &&
502 memcmp(tmp_buf + append_size,
504 trunc_size - append_size)) {
506 "TRUNC-after-append bad [%d-%d]/[%#x-%#x] != 0\n",
507 tmp_offset, trunc_offset - 1,
508 tmp_offset, trunc_offset - 1);
512 int expected_size = trunc_offset + append_size;
513 /* Check case 2: first truncate then append */
514 if (st.st_size != expected_size) {
516 "APPEND-after-trunc bad file size %llu != %u\n",
517 (long long)st.st_size,
522 if (memcmp(tmp_buf, trunc_buf, trunc_size)) {
524 "append-after-TRUNC bad [%d-%d]/[%#x-%#x] != 0\n",
525 write_size, trunc_offset - 1,
526 write_size, trunc_offset - 1);
528 } else if (memcmp(read_buf + trunc_offset,
529 append_buf, append_size)) {
531 "APPEND-after-trunc bad [%d-%d]/[%#x-%#x] != %c\n",
532 trunc_offset, expected_size - 1,
533 trunc_offset, expected_size - 1,
542 rprintf(rank, n, 0, STATUS_FMT"\n",
543 write_char, write_size, write_size,
544 append_char, append_size, append_size,
545 trunc_offset, trunc_offset);
547 sprintf(command, "od -Ax -a %s", fnames[0]);
548 ret = system(command);
549 MPI_Abort(MPI_COMM_WORLD, 1);
554 if (rank == 0 || verbose)
555 printf("r=%2u n=%4u: "STATUS_FMT"\nPASS\n", rank, n - 1,
556 write_char, write_size, write_size, append_char,
557 append_size, append_size, trunc_offset, trunc_offset);
562 ifnames = rand() % nfnames;
563 ret = unlink(fnames[ifnames]);
565 printf("%s: unlink %s failed: %s\n",
566 prog, fnames[ifnames], strerror(errno));