Whamcloud - gitweb
LU-14264 tests: make PARALLEL available to all suites
[fs/lustre-release.git] / lustre / tests / mpi / write_append_truncate.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/tests/write_append_truncate.c
32  *
33  * Each loop does 3 things:
34  *   - truncate file to zero (not via ftruncate though, to test O_APPEND)
35  *   - write a "chunk" of data (should be at file offset 0 after truncate)
36  *   - on each of two threads either append or truncate-up the file
37  *
38  * If the truncate happened first, we should have a hole in the file.
39  * If the append happened first, we should have truncated the file down.
40  *
41  * WRITE_SIZE_MAX and APPEND_SIZE_MAX are large enough to cross a stripe.
42  *
43  * compile: mpicc -g -Wall -o write_append_truncate write_append_truncate.c
44  * run:     mpirun -np 2 -machlist <hostlist file> write_append_truncate <file>
45  *  or:     pdsh -w <two hosts> write_append_truncate <file>
46  *  or:     prun -n 2 [-N 2] write_append_truncate <file>
47  */
48
49 #include <stdlib.h>
50 #include <stdio.h>
51 #include <stdarg.h>
52 #include <sys/types.h>
53 #include <sys/stat.h>
54 #include <fcntl.h>
55 #include <errno.h>
56 #include <time.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <getopt.h>
60 #include "mpi.h"
61
62 #define DEFAULT_ITER    10000
63
64 #define WRITE_SIZE_MAX  1234567
65 #define APPEND_SIZE_MAX 1234567
66 #define TRUNC_SIZE_MAX  1234567
67
68 #define STATUS_FMT "WR %c %7d/%#08x, AP %c %7d/%#08x, TR@ %7d/%#08x"
69
70 #define HOSTNAME_SIZE 50
71 char hostname[HOSTNAME_SIZE];
72 #define FNAMES_MAX 256
73
74 void usage(char *prog)
75 {
76         printf("usage: %s [-a append_max] [-C] [-n nloops] [-s seed]\n\t\t[-t trunc_max] [-T] [-v] [-w write_max] <filename> ...\n",
77                prog);
78         printf("\t-a append_max: maximum size of append, default %u bytes\n",
79                APPEND_SIZE_MAX);
80         printf("\t-C: 'classic' checks (on file 0)\n");
81         printf("\t-n nloops: count of loops to run, default %u\n",
82                DEFAULT_ITER);
83         printf("\t-s seed: random seed to use, default {current time}\n");
84         printf("\t-t trunc_max: maximum size of truncate, default %u bytes\n",
85                TRUNC_SIZE_MAX);
86         printf("\t-T: 'classic' truncates (on file 0)\n");
87         printf("\t-w write_max: maximum size of write, default %u bytes\n",
88                WRITE_SIZE_MAX);
89         printf("\t-W: 'classic' writes (on rank 0, file 0)\n");
90         printf("\t-v: run in verbose mode (repeat for more verbosity)\n");
91         printf("\tfilename for each mountpoint of same filesystem on a node\n");
92         printf("\b%s must be run with at least 2 processes\n", prog);
93
94         MPI_Finalize();
95         exit(1);
96 }
97
98 /* Print process rank, loop count, message, and exit (i.e. a fatal error) */
99 void rprintf(int rank, int loop, int error, const char *fmt, ...)
100 __attribute__ ((format (printf, 4, 5)));
101
102 void rprintf(int rank, int loop, int error, const char *fmt, ...)
103 {
104         va_list ap;
105
106         printf("r=%2u", rank);
107         if (loop >= 0)
108                 printf(" l=%04u", loop);
109         if (error != 0)
110                 printf(" %s", hostname);
111         printf(": ");
112
113         va_start(ap, fmt);
114
115         vprintf(fmt, ap);
116
117         if (error != 0)
118                 MPI_Abort(MPI_COMM_WORLD, error);
119 }
120
121 int main(int argc, char *argv[])
122 {
123         int n, nloops = DEFAULT_ITER;
124         int nfnames = 0, ifnames, fd;
125         int rank = -1, nproc, ret;
126         unsigned int write_max = WRITE_SIZE_MAX;
127         unsigned int append_max = APPEND_SIZE_MAX;
128         unsigned int write_size = 0, append_size = 0, trunc_size = 0;
129         unsigned int trunc_max = 0, trunc_offset = 0;
130         char *append_buf;
131         char *write_buf;
132         char *read_buf = NULL;
133         char *trunc_buf = NULL;
134         int seed = time(0);
135         int done;
136         int error;
137         int verbose = 0;
138         int classic_check = 0, classic_trunc = 0, classic_write = 0;
139         char write_char = 'A', append_char = 'a';
140         char *fnames[FNAMES_MAX], *end;
141         char *prog = "write_append_truncate";
142         int c;
143
144         error = MPI_Init(&argc, &argv);
145         if (error != MPI_SUCCESS)
146                 printf("%s: MPI_Init failed: %d\n", prog, error);
147         else if (verbose > 2)
148                 printf("%s: MPI_Init succeeded\n", prog);
149
150         prog = strrchr(argv[0], '/');
151         if (!prog)
152                 prog = argv[0];
153         else
154                 prog++;
155
156         while ((c = getopt(argc, argv, "a:cCn:s:t:Tvw:W")) != -1) {
157                 switch (c) {
158                 case 'a':
159                         append_max = strtoul(optarg, &end, 0);
160                         if (append_max < 2 || *end) {
161                                 fprintf(stderr, "%s: bad append option '%s'\n",
162                                         prog, optarg);
163                                 usage(prog);
164                         }
165                         break;
166                 case 'C':
167                         classic_check++;
168                         break;
169                 case 'n':
170                         nloops = strtoul(optarg, &end, 0);
171                         if (nloops == 0 || *end) {
172                                 fprintf(stderr, "%s: bad nloops option '%s'\n",
173                                         prog, optarg);
174                                 usage(prog);
175                         }
176                         break;
177                 case 's':
178                         seed = strtoul(optarg, &end, 0);
179                         if (*end) {
180                                 fprintf(stderr, "%s: bad seed option '%s'\n",
181                                         prog, optarg);
182                                 usage(prog);
183                         }
184                         break;
185                 case 't':
186                         trunc_max = strtoul(optarg, &end, 0);
187                         if (*end) {
188                                 fprintf(stderr,
189                                         "%s: bad truncate option '%s'\n", prog,
190                                         optarg);
191                                 usage(prog);
192                         }
193                         break;
194                 case 'T':
195                         classic_trunc++;
196                         break;
197                 case 'v':
198                         verbose++;
199                         break;
200                 case 'w':
201                         write_max = strtoul(optarg, &end, 0);
202                         if (write_max < 2 || *end) {
203                                 fprintf(stderr, "%s: bad write option '%s'\n",
204                                         prog, optarg);
205                                 usage(prog);
206                         }
207                         break;
208                 case 'W':
209                         classic_write++;
210                         break;
211                 default:
212                         fprintf(stderr, "%s: unknown option '%c'\n", prog, c);
213                         usage(prog);
214                 }
215         }
216
217         srand(seed);
218
219         if (argc == optind) {
220                 fprintf(stderr, "%s: missing filename argument\n", prog);
221                 usage(prog);
222         }
223
224         if (argc > optind + FNAMES_MAX) {
225                 fprintf(stderr, "%s: too many extra options\n", prog);
226                 usage(prog);
227         }
228
229         while (optind < argc)
230                 fnames[nfnames++] = argv[optind++];
231
232         error = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
233         if (verbose > 2 || error != MPI_SUCCESS)
234                 rprintf(rank, -1, error != MPI_SUCCESS, "MPI_Comm_rank: %d\n",
235                         error);
236
237         error = MPI_Comm_size(MPI_COMM_WORLD, &nproc);
238         if (verbose > 2 || error != MPI_SUCCESS)
239                 rprintf(rank, -1, error != MPI_SUCCESS, "MPI_Comm_size: %d\n",
240                         error);
241
242         if (nproc < 2)
243                 rprintf(rank, -1, 1, "%s: must run with at least 2 processes\n",
244                         prog);
245
246         append_buf = malloc(append_max);
247         if (!append_buf)
248                 rprintf(rank, -1, 1, "%s: error allocating append_buf %u\n",
249                         prog, append_max);
250
251         write_buf = malloc(write_max);
252         if (!write_buf)
253                 rprintf(rank, -1, 1, "%s: error allocating write_buf %u\n",
254                         prog, write_max);
255
256         if (gethostname(hostname, HOSTNAME_SIZE) < 0)
257                 rprintf(rank, -1, 1, "%s: gethostname failed: %s\n",
258                         prog, strerror(errno));
259
260         if (rank == 0) {
261                 int max_size = write_max + (trunc_max ?: append_max) +
262                                append_max;
263
264                 fd = open(fnames[0], O_WRONLY | O_CREAT | O_TRUNC, 0666);
265                 rprintf(rank, -1, fd < 0,
266                         "create %s, max size: %u, seed %u: %s\n", fnames[0],
267                         max_size, seed, strerror(errno));
268                 close(fd);
269
270                 trunc_buf = calloc(1, trunc_max ?: append_max);
271                 if (!trunc_buf)
272                         rprintf(rank, -1, 1,
273                                 "%s: error allocating trunc_buf %u\n",
274                                 prog, trunc_max ?: append_max);
275
276                 /* initial write + truncate up + append */
277                 read_buf = malloc(max_size);
278                 if (!read_buf)
279                         rprintf(rank, -1, 1,
280                                 "%s: error allocating read_buf %u\n",
281                                 prog, max_size);
282         }
283
284         error = MPI_Barrier(MPI_COMM_WORLD);
285         if (verbose > 2 || error != MPI_SUCCESS)
286                 rprintf(rank, -1, error != MPI_SUCCESS,
287                         "prep MPI_Barrier: %d\n", error);
288
289         ifnames = rank % nfnames;
290         fd = open(fnames[ifnames], O_RDWR | O_APPEND);
291         if (verbose || fd < 0)
292                 rprintf(rank, -1, errno, "open '%s' (%u): %s\n",
293                         fnames[ifnames], ifnames, strerror(errno));
294
295         for (n = 0; n < nloops; n++) {
296                 /* Initialized only to quiet stupid GCC warnings */
297                 unsigned int append_rank = n, trunc_rank = n + 1;
298                 unsigned int write_rank = 0;
299                 unsigned int mpi_shared_vars[6];
300
301                 /* reset the environment */
302                 write_char = 'A' + (n % 26);
303                 append_char = 'a' + (n % 26);
304
305                 if (rank == 0) {
306                         write_size = (rand() % (write_max - 1)) + 1;
307                         append_size = (rand() % (append_max - 1)) + 1;
308                         trunc_size = (append_size == 1) ? 1 :
309                                       (rand() %
310                                       ((trunc_max ?: append_size) - 1)) + 1;
311                         trunc_offset = write_size + trunc_size;
312
313                         if (verbose || n % 1000 == 0)
314                                 rprintf(rank, n, 0, STATUS_FMT"\n",
315                                         write_char, write_size, write_size,
316                                         append_char, append_size, append_size,
317                                         trunc_offset, trunc_offset);
318
319                         write_rank = (classic_write ? 0 : rand()) % nproc;
320                         do {
321                                 append_rank = (classic_write ? n : rand()) %
322                                                nproc;
323                                 /*
324                                  * We can't allow the append rank be the same
325                                  * as the classic_trunc trunc_rank, or we will
326                                  * spin here forever.
327                                  */
328                         } while (append_rank == (n + 1) % nproc);
329                         do {
330                                 trunc_rank = (classic_trunc ? (n + 1) :
331                                               rand()) % nproc;
332                         } while (trunc_rank == append_rank);
333
334                         mpi_shared_vars[0] = write_size;
335                         mpi_shared_vars[1] = append_size;
336                         mpi_shared_vars[2] = trunc_size;
337                         mpi_shared_vars[3] = write_rank;
338                         mpi_shared_vars[4] = append_rank;
339                         mpi_shared_vars[5] = trunc_rank;
340                 }
341
342                 error = MPI_Bcast(&mpi_shared_vars, 6,
343                                   MPI_INT, 0, MPI_COMM_WORLD);
344                 if (verbose > 2 || error != MPI_SUCCESS)
345                         rprintf(rank, n, error != MPI_SUCCESS,
346                                 "MPI_Bcast mpi_shared_vars [%u, %u, %u, %u, %u, %u]: %d\n",
347                                 mpi_shared_vars[0], mpi_shared_vars[1],
348                                 mpi_shared_vars[2], mpi_shared_vars[3],
349                                 mpi_shared_vars[4], mpi_shared_vars[5], error);
350
351                 if (rank != 0) {
352                         write_size  = mpi_shared_vars[0];
353                         append_size = mpi_shared_vars[1];
354                         trunc_size  = mpi_shared_vars[2];
355                         write_rank  = mpi_shared_vars[3];
356                         append_rank = mpi_shared_vars[4];
357                         trunc_rank  = mpi_shared_vars[5];
358
359                         trunc_offset = write_size + trunc_size;
360                 }
361
362                 if (rank == write_rank || rank == 0)
363                         memset(write_buf, write_char, write_max);
364
365                 if (rank == write_rank) {
366                         ifnames = (classic_write ? 0 : rand()) % nfnames;
367                         ret = truncate(fnames[ifnames], 0);
368                         if (verbose > 1 || ret != 0)
369                                 rprintf(rank, n, ret,
370                                         "initial truncate %s (%u) @ 0: %s\n",
371                                         fnames[ifnames], ifnames,
372                                         strerror(errno));
373
374                         done = 0;
375                         do {
376                                 ret = write(fd, write_buf + done,
377                                             write_size - done);
378                                 if (verbose > 1 || ret < 0) {
379                                         rprintf(rank, n,
380                                                 ret < 0 && errno != EINTR,
381                                                 "write %d/%d @ %d: %s\n",
382                                                 ret + done, write_size, done,
383                                                 strerror(errno));
384                                         if (ret < 0 && errno != EINTR)
385                                                 break;
386                                 }
387                                 if (ret > 0)
388                                         done += ret;
389                         } while (done != write_size);
390                 }
391
392                 if (rank == append_rank || rank == 0)
393                         memset(append_buf, append_char, append_size);
394
395                 error = MPI_Barrier(MPI_COMM_WORLD);
396                 if (verbose > 2 || error != MPI_SUCCESS)
397                         rprintf(rank, n, error != MPI_SUCCESS,
398                                 "start MPI_Barrier: %d\n", error);
399
400                 /* Do the race */
401                 if (rank == append_rank) {
402                         done = 0;
403                         do {
404                                 ret = write(fd, append_buf + done,
405                                             append_size - done);
406                                 if (ret < 0) {
407                                         rprintf(rank, n, errno != EINTR,
408                                                 "append %u/%u: %s\n",
409                                                 ret + done, append_size,
410                                                 strerror(errno));
411                                         if (errno != EINTR)
412                                                 break;
413                                 } else if (verbose > 1 || ret != append_size) {
414                                         rprintf(rank, n, ret != append_size,
415                                                 "append %u/%u\n",
416                                                 ret + done, append_size);
417                                 }
418                                 if (ret > 0)
419                                         done += ret;
420                         } while (done != append_size);
421                 } else if (rank == trunc_rank) {
422                         /*
423                          * XXX: truncating the same file descriptor as the
424                          *      append on a single node causes this test
425                          *      to fail currently (2009-02-01).
426                          */
427                         ifnames = (classic_trunc ? rank : rand()) % nfnames;
428                         ret = truncate(fnames[ifnames], trunc_offset);
429                         if (verbose > 1 || ret != 0)
430                                 rprintf(rank, n, ret,
431                                         "truncate %s (%u) @ %u: %s\n",
432                                         fnames[ifnames], ifnames,
433                                         trunc_offset, strerror(errno));
434                 }
435
436                 error = MPI_Barrier(MPI_COMM_WORLD);
437                 if (verbose > 2 || error != MPI_SUCCESS)
438                         rprintf(rank, n, error != MPI_SUCCESS,
439                                 "end MPI_Barrier: %d\n", error);
440
441                 error = 0;
442
443                 /* Check the result */
444                 if (rank == 0) {
445                         char *tmp_buf;
446                         struct stat st = { 0 };
447
448                         ifnames = classic_check ? 0 : (rand() % nfnames);
449                         ret = stat(fnames[ifnames], &st);
450                         if (verbose > 1 || ret != 0)
451                                 rprintf(rank, n, ret,
452                                         "stat %s (%u) size %llu: %s\n",
453                                         fnames[ifnames], ifnames,
454                                         (long long)st.st_size, strerror(errno));
455
456                         ret = lseek(fd, 0, SEEK_SET);
457                         if (ret != 0)
458                                 rprintf(rank, n, ret, "lseek 0: %s\n",
459                                         strerror(errno));
460
461                         done = 0;
462                         do {
463                                 ret = read(fd, read_buf + done,
464                                            st.st_size - done);
465                                 if (verbose > 1 || ret <= 0) {
466                                         rprintf(rank, n, ret <= 0,
467                                                 "read %d/%llu @ %u: %s\n",
468                                                 ret,
469                                                 (long long)st.st_size - done,
470                                                 done, ret != 0 ?
471                                                 strerror(errno) : "short read");
472                                 }
473                                 done += ret;
474                         } while (done != st.st_size);
475
476                         if (memcmp(read_buf, write_buf, write_size)) {
477                                 rprintf(rank, n, 0,
478                                         "WRITE bad [0-%d]/[0-%#x] != %c\n",
479                                         write_size - 1, write_size - 1,
480                                         write_char);
481                                 error = 1;
482                         }
483
484                         tmp_buf = read_buf + write_size;
485
486                         if (st.st_size == trunc_offset) {
487                                 /* Check case 1: first append then truncate */
488                                 int tmp_size, tmp_offset;
489
490                                 tmp_size = trunc_size < append_size ?
491                                                 trunc_size : append_size;
492                                 tmp_offset = write_size + tmp_size;
493
494                                 if (memcmp(tmp_buf, append_buf, tmp_size)) {
495                                         rprintf(rank, n, 0,
496                                                 "trunc-after-APPEND bad [%d-%d]/[%#x-%#x] != %c\n",
497                                                 write_size, tmp_offset - 1,
498                                                 write_size, tmp_offset - 1,
499                                                 append_char);
500                                         error = 1;
501                                 } else if (trunc_size > append_size &&
502                                            memcmp(tmp_buf + append_size,
503                                                   trunc_buf,
504                                                   trunc_size - append_size)) {
505                                         rprintf(rank, n, 0,
506                                                 "TRUNC-after-append bad [%d-%d]/[%#x-%#x] != 0\n",
507                                                 tmp_offset, trunc_offset - 1,
508                                                 tmp_offset, trunc_offset - 1);
509                                         error = 1;
510                                 }
511                         } else {
512                                 int expected_size = trunc_offset + append_size;
513                                 /* Check case 2: first truncate then append */
514                                 if (st.st_size != expected_size) {
515                                         rprintf(rank, n, 0,
516                                                 "APPEND-after-trunc bad file size %llu != %u\n",
517                                                 (long long)st.st_size,
518                                                 expected_size);
519                                         error = 1;
520                                 }
521
522                                 if (memcmp(tmp_buf, trunc_buf, trunc_size)) {
523                                         rprintf(rank, n, 0,
524                                                 "append-after-TRUNC bad [%d-%d]/[%#x-%#x] != 0\n",
525                                                 write_size, trunc_offset - 1,
526                                                 write_size, trunc_offset - 1);
527                                         error = 1;
528                                 } else if (memcmp(read_buf + trunc_offset,
529                                                   append_buf, append_size)) {
530                                         rprintf(rank, n, 0,
531                                                 "APPEND-after-trunc bad [%d-%d]/[%#x-%#x] != %c\n",
532                                                 trunc_offset, expected_size - 1,
533                                                 trunc_offset, expected_size - 1,
534                                                 append_char);
535                                         error = 1;
536                                 }
537                         }
538
539                         if (error == 1) {
540                                 char command[4096];
541
542                                 rprintf(rank, n, 0, STATUS_FMT"\n",
543                                         write_char, write_size, write_size,
544                                         append_char, append_size, append_size,
545                                         trunc_offset, trunc_offset);
546
547                                 sprintf(command, "od -Ax -a %s", fnames[0]);
548                                 ret = system(command);
549                                 MPI_Abort(MPI_COMM_WORLD, 1);
550                         }
551                 }
552         }
553
554         if (rank == 0 || verbose)
555                 printf("r=%2u n=%4u: "STATUS_FMT"\nPASS\n", rank, n - 1,
556                        write_char, write_size, write_size, append_char,
557                        append_size, append_size, trunc_offset, trunc_offset);
558
559         close(fd);
560
561         if (rank == 0) {
562                 ifnames = rand() % nfnames;
563                 ret = unlink(fnames[ifnames]);
564                 if (ret != 0)
565                         printf("%s: unlink %s failed: %s\n",
566                                prog, fnames[ifnames], strerror(errno));
567         }
568
569         MPI_Finalize();
570         return 0;
571 }