Whamcloud - gitweb
0fdf0fcb0602ffed0ec1442c763fe41b4a20ab14
[fs/lustre-release.git] / lustre / tests / mpi / write_append_truncate.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/tests/write_append_truncate.c
37  *
38  * Each loop does 3 things:
39  *   - truncate file to zero (not via ftruncate though, to test O_APPEND)
40  *   - write a "chunk" of data (should be at file offset 0 after truncate)
41  *   - on each of two threads either append or truncate-up the file
42  *
43  * If the truncate happened first, we should have a hole in the file.
44  * If the append happened first, we should have truncated the file down.
45  *
46  * WRITE_SIZE_MAX and APPEND_SIZE_MAX are large enough to cross a stripe.
47  *
48  * compile: mpicc -g -Wall -o write_append_truncate write_append_truncate.c
49  * run:     mpirun -np 2 -machlist <hostlist file> write_append_truncate <file>
50  *  or:     pdsh -w <two hosts> write_append_truncate <file>
51  *  or:     prun -n 2 [-N 2] write_append_truncate <file>
52  */
53
54 #include <stdlib.h>
55 #include <stdio.h>
56 #include <stdarg.h>
57 #include <sys/types.h>
58 #include <sys/stat.h>
59 #include <fcntl.h>
60 #include <errno.h>
61 #include <time.h>
62 #include <string.h>
63 #include <unistd.h>
64 #include <getopt.h>
65 #include "mpi.h"
66
67 #define DEFAULT_ITER    10000
68
69 #define WRITE_SIZE_MAX  1234567
70 #define APPEND_SIZE_MAX 1234567
71 #define TRUNC_SIZE_MAX  1234567
72
73 #define STATUS_FMT "WR %c %7d/%#08x, AP %c %7d/%#08x, TR@ %7d/%#08x"
74
75 #define HOSTNAME_SIZE 50
76 char hostname[HOSTNAME_SIZE];
77 #define FNAMES_MAX 256
78
79 void usage(char *prog)
80 {
81         printf("usage: %s [-a append_max] [-C] [-n nloops] [-s seed]\n"
82                "\t\t[-t trunc_max] [-T] [-v] [-w write_max] <filename> ...\n", prog);
83         printf("\t-a append_max: maximum size of append, default %u bytes\n",
84                APPEND_SIZE_MAX);
85         printf("\t-C: 'classic' checks (on file 0)\n");
86         printf("\t-n nloops: count of loops to run, default %u\n",DEFAULT_ITER);
87         printf("\t-s seed: random seed to use, default {current time}\n");
88         printf("\t-t trunc_max: maximum size of truncate, default %u bytes\n",
89                TRUNC_SIZE_MAX);
90         printf("\t-T: 'classic' truncates (on file 0)\n");
91         printf("\t-w write_max: maximum size of write, default %u bytes\n",
92                WRITE_SIZE_MAX);
93         printf("\t-W: 'classic' writes (on rank 0, file 0)\n");
94         printf("\t-v: run in verbose mode (repeat for more verbosity)\n");
95         printf("\tfilename for each mountpoint of same filesystem on a node\n");
96         printf("\b%s must be run with at least 2 processes\n", prog);
97
98         MPI_Finalize();
99         exit(1);
100 }
101
102 /* Print process rank, loop count, message, and exit (i.e. a fatal error) */
103 void rprintf(int rank, int loop, int error, const char *fmt, ...)
104 __attribute__ ((format (printf, 4, 5)));
105
106 void rprintf(int rank, int loop, int error, const char *fmt, ...)
107 {
108         va_list       ap;
109
110         printf("r=%2u", rank);
111         if (loop >= 0)
112                 printf(" l=%04u", loop);
113         if (error != 0)
114                 printf(" %s", hostname);
115         printf(": ");
116
117         va_start(ap, fmt);
118
119         vprintf(fmt, ap);
120
121         if (error != 0)
122                 MPI_Abort(MPI_COMM_WORLD, error);
123 }
124
125 int main(int argc, char *argv[])
126 {
127         int n, nloops = DEFAULT_ITER;
128         int nfnames = 0, ifnames, fd;
129         int rank = -1, nproc, ret;
130         unsigned write_max = WRITE_SIZE_MAX;
131         unsigned append_max = APPEND_SIZE_MAX;
132         unsigned write_size = 0, append_size = 0, trunc_size = 0;
133         unsigned trunc_max = 0, trunc_offset = 0;
134         char *append_buf;
135         char *write_buf;
136         char *read_buf = NULL;
137         char *trunc_buf = NULL;
138         int seed = time(0);
139         int done;
140         int error;
141         int verbose = 0;
142         int classic_check = 0, classic_trunc = 0, classic_write = 0;
143         char write_char = 'A', append_char = 'a';
144         char *fnames[FNAMES_MAX], *end;
145         char *prog = "write_append_truncate";
146         int c;
147
148         error = MPI_Init(&argc, &argv);
149         if (error != MPI_SUCCESS)
150                 printf("%s: MPI_Init failed: %d\n", prog, error);
151         else if (verbose > 2)
152                 printf("%s: MPI_Init succeeded\n", prog);
153
154         prog = strrchr(argv[0], '/');
155         if (prog == NULL)
156                 prog = argv[0];
157         else
158                 prog++;
159
160         while ((c = getopt(argc, argv, "a:cCn:s:t:Tvw:W")) != -1) {
161                 switch(c) {
162                 case 'a':
163                         append_max = strtoul(optarg, &end, 0);
164                         if (append_max == 0 || *end) {
165                                 fprintf(stderr, "%s: bad append option '%s'\n",
166                                         prog, optarg);
167                                 usage(prog);
168                         }
169                         break;
170                 case 'C':
171                         classic_check++;
172                         break;
173                 case 'n':
174                         nloops = strtoul(optarg, &end, 0);
175                         if (nloops == 0 || *end) {
176                                 fprintf(stderr, "%s: bad nloops option '%s'\n",
177                                         prog, optarg);
178                                 usage(prog);
179                         }
180                         break;
181                 case 's':
182                         seed = strtoul(optarg, &end, 0);
183                         if (*end) {
184                                 fprintf(stderr, "%s: bad seed option '%s'\n",
185                                         prog, optarg);
186                                 usage(prog);
187                         }
188                         break;
189                 case 't':
190                         trunc_max = strtoul(optarg, &end, 0);
191                         if (*end) {
192                                 fprintf(stderr,"%s: bad truncate option '%s'\n",
193                                         prog, optarg);
194                                 usage(prog);
195                         }
196                         break;
197                 case 'T':
198                         classic_trunc++;
199                         break;
200                 case 'v':
201                         verbose++;
202                         break;
203                 case 'w':
204                         write_max = strtoul(optarg, &end, 0);
205                         if (write_max == 0 || *end) {
206                                 fprintf(stderr, "%s: bad write option '%s'\n",
207                                         prog, optarg);
208                                 usage(prog);
209                         }
210                         break;
211                 case 'W':
212                         classic_write++;
213                         break;
214                 default:
215                         fprintf(stderr, "%s: unknown option '%c'\n", prog, c);
216                         usage(prog);
217                 }
218         }
219
220         srand(seed);
221
222         if (argc == optind) {
223                 fprintf(stderr, "%s: missing filename argument\n", prog);
224                 usage(prog);
225         }
226
227         if (argc > optind + FNAMES_MAX) {
228                 fprintf(stderr, "%s: too many extra options\n", prog);
229                 usage(prog);
230         }
231
232         while (optind < argc)
233                 fnames[nfnames++] = argv[optind++];
234
235         error = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
236         if (verbose > 2 || error != MPI_SUCCESS)
237                 rprintf(rank, -1, error != MPI_SUCCESS, "MPI_Comm_rank: %d\n",
238                         error);
239
240         error = MPI_Comm_size(MPI_COMM_WORLD, &nproc);
241         if (verbose > 2 || error != MPI_SUCCESS)
242                 rprintf(rank, -1, error != MPI_SUCCESS, "MPI_Comm_size: %d\n",
243                         error);
244
245         if (nproc < 2)
246                 rprintf(rank, -1, 1, "%s: must run with at least 2 processes\n",
247                         prog);
248
249         append_buf = malloc(append_max);
250         if (append_buf == NULL)
251                 rprintf(rank, -1, 1,"%s: error allocating append_buf %u\n",
252                         prog, append_max);
253
254         write_buf = malloc(write_max);
255         if (write_buf == NULL)
256                 rprintf(rank, -1, 1, "%s: error allocating write_buf %u\n",
257                         prog, write_max);
258
259         if (gethostname(hostname, HOSTNAME_SIZE) < 0)
260                 rprintf(rank, -1, 1, "%s: gethostname failed: %s\n",
261                         prog, strerror(errno));
262
263         if (rank == 0) {
264                 int max_size = write_max + (trunc_max ?: append_max)+append_max;
265
266                 fd = open(fnames[0], O_WRONLY|O_CREAT|O_TRUNC, 0666);
267                 rprintf(rank,-1, fd<0, "create %s, max size: %u, seed %u: %s\n",
268                         fnames[0], max_size, seed, strerror(errno));
269                 close(fd);
270
271                 trunc_buf = calloc(1, trunc_max ?: append_max);
272                 if (trunc_buf == NULL)
273                         rprintf(rank,-1,1,"%s: error allocating trunc_buf %u\n",
274                                 prog, trunc_max ?: append_max);
275
276                 /* initial write + truncate up + append */
277                 read_buf = malloc(max_size);
278                 if (read_buf == NULL)
279                         rprintf(rank,-1,1,"%s: error allocating read_buf %u\n",
280                                 prog, max_size);
281         }
282
283         error = MPI_Barrier(MPI_COMM_WORLD);
284         if (verbose > 2 || error != MPI_SUCCESS)
285                 rprintf(rank, -1, error != MPI_SUCCESS,
286                         "prep MPI_Barrier: %d\n", error);
287
288         ifnames = rank % nfnames;
289         fd = open(fnames[ifnames], O_RDWR | O_APPEND);
290         if (verbose || fd < 0)
291                 rprintf(rank, -1, errno, "open '%s' (%u): %s\n",
292                         fnames[ifnames], ifnames, strerror(errno));
293
294         for (n = 0; n < nloops; n++) {
295                 /* Initialized only to quiet stupid GCC warnings */
296                 unsigned write_rank = 0, append_rank = n, trunc_rank = n + 1;
297                 unsigned mpi_shared_vars[6];
298
299                 /* reset the environment */
300                 write_char = 'A' + (n % 26);
301                 append_char = 'a' + (n % 26);
302
303                 if (rank == 0) {
304                         write_size = (rand() % (write_max - 1)) + 1;
305                         append_size = (rand() % (append_max - 1)) + 1;
306                         trunc_size = (rand() % ((trunc_max?: append_size)-1))+1;
307                         trunc_offset = write_size + trunc_size;
308
309                         if (verbose || n % 1000 == 0)
310                                 rprintf(rank, n, 0, STATUS_FMT"\n",
311                                         write_char, write_size, write_size,
312                                         append_char, append_size, append_size,
313                                         trunc_offset, trunc_offset);
314
315                         write_rank = (classic_write ? 0 : rand()) % nproc;
316                         do {
317                                 append_rank = (classic_write ? n : rand()) %
318                                                                 nproc;
319                                 /* We can't allow the append rank be the same
320                                  * as the classic_trunc trunc_rank, or we will
321                                  * spin here forever. */
322                         } while (append_rank == (n + 1) % nproc);
323                         do {
324                                 trunc_rank = (classic_trunc? (n + 1) : rand()) %
325                                                                 nproc;
326                         } while (trunc_rank == append_rank);
327
328                         mpi_shared_vars[0] = write_size;
329                         mpi_shared_vars[1] = append_size;
330                         mpi_shared_vars[2] = trunc_size;
331                         mpi_shared_vars[3] = write_rank;
332                         mpi_shared_vars[4] = append_rank;
333                         mpi_shared_vars[5] = trunc_rank;
334                 }
335
336                 error = MPI_Bcast(&mpi_shared_vars, 6,
337                                   MPI_INT, 0, MPI_COMM_WORLD);
338                 if (verbose > 2 || error != MPI_SUCCESS)
339                         rprintf(rank, n, error != MPI_SUCCESS,
340                                 "MPI_Bcast mpi_shared_vars"
341                                 "[%u, %u, %u, %u, %u, %u]: %d\n",
342                                 mpi_shared_vars[0], mpi_shared_vars[1],
343                                 mpi_shared_vars[2], mpi_shared_vars[3],
344                                 mpi_shared_vars[4], mpi_shared_vars[5], error);
345
346                 if (rank != 0) {
347                         write_size  = mpi_shared_vars[0];
348                         append_size = mpi_shared_vars[1];
349                         trunc_size  = mpi_shared_vars[2];
350                         write_rank  = mpi_shared_vars[3];
351                         append_rank = mpi_shared_vars[4];
352                         trunc_rank  = mpi_shared_vars[5];
353
354                         trunc_offset = write_size + trunc_size;
355                 }
356
357                 if (rank == write_rank || rank == 0)
358                         memset(write_buf, write_char, write_max);
359
360                 if (rank == write_rank) {
361                         ifnames = (classic_write ? 0 : rand()) % nfnames;
362                         ret = truncate(fnames[ifnames], 0);
363                         if (verbose > 1 || ret != 0)
364                                 rprintf(rank, n, ret,
365                                         "initial truncate %s (%u) @ 0: %s\n",
366                                         fnames[ifnames], ifnames,
367                                         strerror(errno));
368
369                         done = 0;
370                         do {
371                                 ret = write(fd, write_buf+done,write_size-done);
372                                 if (verbose > 1 || ret < 0) {
373                                         rprintf(rank, n,
374                                                 ret < 0 && errno != EINTR,
375                                                 "write %d/%d @ %d: %s\n",
376                                                 ret + done, write_size, done,
377                                                 strerror(errno));
378                                         if (ret < 0 && errno != EINTR)
379                                                 break;
380                                 }
381                                 if (ret > 0)
382                                         done += ret;
383                         } while (done != write_size);
384                 }
385
386                 if (rank == append_rank || rank == 0)
387                         memset(append_buf, append_char, append_size);
388
389                 error = MPI_Barrier(MPI_COMM_WORLD);
390                 if (verbose > 2 || error != MPI_SUCCESS)
391                         rprintf(rank, n, error != MPI_SUCCESS,
392                                 "start MPI_Barrier: %d\n", error);
393
394                 /* Do the race */
395                 if (rank == append_rank) {
396                         done = 0;
397                         do {
398                                 ret = write(fd, append_buf + done,
399                                             append_size - done);
400                                 if (ret < 0) {
401                                         rprintf(rank, n, errno != EINTR,
402                                                 "append %u/%u: %s\n",
403                                                 ret + done, append_size,
404                                                 strerror(errno));
405                                         if (errno != EINTR)
406                                                 break;
407                                 } else if (verbose > 1 || ret != append_size) {
408                                         rprintf(rank, n, ret != append_size,
409                                                 "append %u/%u\n",
410                                                 ret + done, append_size);
411                                 }
412                                 if (ret > 0)
413                                         done += ret;
414                         } while (done != append_size);
415                 } else if (rank == trunc_rank) {
416                         /* XXX: truncating the same file descriptor as the
417                          *      append on a single node causes this test
418                          *      to fail currently (2009-02-01). */
419                         ifnames = (classic_trunc ? rank : rand()) % nfnames;
420                         ret = truncate(fnames[ifnames], trunc_offset);
421                         if (verbose > 1 || ret != 0)
422                                 rprintf(rank, n, ret,
423                                         "truncate %s (%u) @ %u: %s\n",
424                                         fnames[ifnames], ifnames,
425                                         trunc_offset, strerror(errno));
426                 }
427
428                 error = MPI_Barrier(MPI_COMM_WORLD);
429                 if (verbose > 2 || error != MPI_SUCCESS)
430                         rprintf(rank, n, error != MPI_SUCCESS,
431                                 "end MPI_Barrier: %d\n", error);
432
433                 error = 0;
434
435                 /* Check the result */
436                 if (rank == 0) {
437                         char *tmp_buf;
438                         struct stat st = { 0 };
439
440                         ifnames = classic_check ? 0 : (rand() % nfnames);
441                         ret = stat(fnames[ifnames], &st);
442                         if (verbose > 1 || ret != 0)
443                                 rprintf(rank, n, ret,
444                                         "stat %s (%u) size %llu: %s\n",
445                                         fnames[ifnames], ifnames,
446                                         (long long)st.st_size, strerror(errno));
447
448                         ret = lseek(fd, 0, SEEK_SET);
449                         if (ret != 0)
450                                 rprintf(rank, n, ret, "lseek 0: %s\n",
451                                         strerror(errno));
452
453                         done = 0;
454                         do {
455                                 ret = read(fd, read_buf+done, st.st_size-done);
456                                 if (verbose > 1 || ret <= 0) {
457                                         rprintf(rank, n, ret <= 0,
458                                                 "read %d/%llu @ %u: %s\n",
459                                                 ret, (long long)st.st_size-done,
460                                                 done, ret != 0 ?
461                                                 strerror(errno) : "short read");
462                                 }
463                                 done += ret;
464                         } while (done != st.st_size);
465
466                         if (memcmp(read_buf, write_buf, write_size)) {
467                                 rprintf(rank, n, 0, "WRITE bad "
468                                         "[0-%d]/[0-%#x] != %c\n",
469                                         write_size - 1, write_size - 1,
470                                         write_char);
471                                 error = 1;
472                         }
473
474                         tmp_buf = read_buf + write_size;
475
476                         if (st.st_size == trunc_offset) {
477                                 /* Check case 1: first append then truncate */
478                                 int tmp_size, tmp_offset;
479
480                                 tmp_size = trunc_size < append_size ?
481                                                 trunc_size : append_size;
482                                 tmp_offset = write_size + tmp_size;
483
484                                 if (memcmp(tmp_buf, append_buf, tmp_size)) {
485                                         rprintf(rank, n, 0,"trunc-after-APPEND "
486                                                 "bad [%d-%d]/[%#x-%#x] != %c\n",
487                                                 write_size, tmp_offset - 1,
488                                                 write_size, tmp_offset - 1,
489                                                 append_char);
490                                         error = 1;
491                                 } else if (trunc_size > append_size &&
492                                            memcmp(tmp_buf+append_size,trunc_buf,
493                                                   trunc_size - append_size)) {
494                                         rprintf(rank, n, 0,"TRUNC-after-append "
495                                                 "bad [%d-%d]/[%#x-%#x] != 0\n",
496                                                 tmp_offset, trunc_offset - 1,
497                                                 tmp_offset, trunc_offset - 1);
498                                         error = 1;
499                                 }
500                         } else {
501                                 int expected_size = trunc_offset + append_size;
502                                 /* Check case 2: first truncate then append */
503                                 if (st.st_size != expected_size) {
504                                         rprintf(rank, n, 0,"APPEND-after-trunc "
505                                                 "bad file size %llu != %u\n",
506                                                 (long long)st.st_size,
507                                                 expected_size);
508                                         error = 1;
509                                 }
510
511                                 if (memcmp(tmp_buf, trunc_buf, trunc_size)) {
512                                         rprintf(rank, n, 0,"append-after-TRUNC "
513                                                 "bad [%d-%d]/[%#x-%#x] != 0\n",
514                                                 write_size, trunc_offset - 1,
515                                                 write_size, trunc_offset - 1);
516                                         error = 1;
517                                 } else if (memcmp(read_buf + trunc_offset,
518                                                   append_buf, append_size)) {
519                                         rprintf(rank, n, 0,"APPEND-after-trunc "
520                                                 "bad [%d-%d]/[%#x-%#x] != %c\n",
521                                                 trunc_offset, expected_size - 1,
522                                                 trunc_offset, expected_size - 1,
523                                                 append_char);
524                                         error = 1;
525                                 }
526                         }
527
528                         if (error == 1) {
529                                 char command[4096];
530
531                                 rprintf(rank, n, 0, STATUS_FMT"\n",
532                                         write_char, write_size, write_size,
533                                         append_char, append_size, append_size,
534                                         trunc_offset, trunc_offset);
535
536                                 sprintf(command, "od -Ax -a %s", fnames[0]);
537                                 ret = system(command);
538                                 MPI_Abort(MPI_COMM_WORLD, 1);
539                         }
540                 }
541         }
542
543         if (rank == 0 || verbose)
544                 printf("r=%2u n=%4u: "STATUS_FMT"\nPASS\n", rank, n - 1,
545                        write_char, write_size, write_size,
546                        append_char, append_size, append_size,
547                        trunc_offset, trunc_offset);
548
549         close(fd);
550
551         if (rank == 0) {
552                 ifnames = rand() % nfnames;
553                 ret = unlink(fnames[ifnames]);
554                 if (ret != 0)
555                         printf("%s: unlink %s failed: %s\n",
556                                prog, fnames[ifnames], strerror(errno));
557         }
558
559         MPI_Finalize();
560         return 0;
561 }