+{
+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
-+ (EXT_GENERATION(neh) + 1);
++ (EXT_HDR_GEN(neh) + 1);
+}
+
+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
-+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24)
+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
+
+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data)
+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root)
+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth)
-+
++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__))
+
+#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
+
+{
+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
-+ (EXT_GENERATION(neh) + 1);
++ (EXT_HDR_GEN(neh) + 1);
+}
+
+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
-+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24)
+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
+
+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data)
+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root)
+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth)
-+
++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__))
+
+#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
+
+{
+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
-+ (EXT_GENERATION(neh) + 1);
++ (EXT_HDR_GEN(neh) + 1);
+}
+
+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
-+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24)
+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
+
+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data)
+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root)
+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth)
-+
++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__))
+
+#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
+
* version 1.4.7
* bug fixes
+Severity : enhancement
+Bugzilla : 9292
+Description: Getattr by fid
+Details : Getting a file attributes by its fid, obtaining UPDATE|LOOKUP
+ locks, avoids extra getattr rpc requests to MDS, allows '/' to
+ have locks and avoids getattr rpc requests for it on every stat.
+
Severity : major
Frequency : rare
Bugzilla : 5719, 9635, 9792, 9684,
Bugzilla : 9340
Description: allow number of MDS service threads to be changed at module load
Details : It is now possible to change the number of MDS service threads
- running. Adding "options mds mds_num_threads=N" will set the
- number of threads for the next time Lustre is restarted (assuming
- the "mds" module is also reloaded at that time). The default
- number of threads will stay the same, 32 for most systems.
+ running. Adding "options mds mds_num_threads={N}" to the MDS's
+ /etc/modprobe.conf will set the number of threads for the next
+ time Lustre is restarted (assuming the "mds" module is also
+ reloaded at that time). The default number of threads will
+ stay the same, 32 for most systems.
Severity : major
Frequency : rare
service startup instead of the OBD startup.
Severity : enhancement
-Bugzilla : 10393 (patchless)
+Bugzilla : 10193 (patchless)
Description: Remove dependency on various unexported kernel interfaces.
Details : No longer need reparent_to_init, exit_mm, exit_files,
sock_getsockopt, filemap_populate, FMODE_EXEC, put_filp.
and also offers similar improvements for other set_info RPCs.
Severity : minor
+Frequency : common
Bugzilla : 10265
Description: excessive CPU usage during initial read phase on client
Details : During the initial read phase on a client, it would agressively
/proc/fs/lustre/llite/*/max_read_ahead_whole_mb, 2MB by default).
Severity : minor
+Frequency : rare
Bugzilla : 10450
Description: MDS crash when receiving packet with unknown intent.
Details : Do not LBUG in unknown intent case, just return -EFAULT
+Severity : enhancement
+Bugzilla : 9293
+Description: MDS RPCs are serialised on client. This is unnecessary for some.
+Details : Do not serialize getattr (non-intent version) and statfs.
+
+Severity : minor
+Frequency : occasional, when OST network is overloaded/intermittent
+Bugzilla : 10416
+Description: client evicted by OST after bulk IO timeout
+Details : If a client sends a bulk IO request (read or write) the OST
+ may evict the client if it is unresposive to its data GET/PUT
+ request. This is incorrect if the network is overloaded (takes
+ too long to transfer the RPC data) or dropped the OST GET/PUT
+ request. There is no need to evict the client at all, since
+ the pinger and/or lock callbacks will handle this, and the
+ client can restart the bulk request.
+
+Severity : minor
+Frequency : Always when mmapping file with no objects
+Bugzilla : 10438
+Description: client crashes when mmapping file with no objects
+Details : Check that we actually have objects in a file before doing any
+ operations on objects in ll_vm_open, ll_vm_close and
+ ll_glimpse_size.
+
+Severity : minor
+Frequency : Rare
+Bugzilla : 10484
+Description: Request leak when working with deleted CWD
+Details : Introduce advanced request refcount tracking for requests
+ referenced from lustre intent.
+
------------------------------------------------------------------------------
Bugzilla : 4928, 7341, 9758
Description: allow number of OST service threads to be specified
Details : a module parameter allows the number of OST service threads
- to be specified via "options ost ost_num_threads=X" in
- /etc/modules.conf or /etc/modutils.conf.
+ to be specified via "options ost ost_num_threads={N}" in the
+ OSS's /etc/modules.conf or /etc/modprobe.conf.
Severity : major
Frequency : rare
--- /dev/null
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.c 2005-12-06 11:54:37.883130927 -0500
+@@ -0,0 +1,37 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 2001 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++/* adioi.h has the ADIOI_Fns_struct define */
++#include "adioi.h"
++
++struct ADIOI_Fns_struct ADIO_LUSTRE_operations = {
++ ADIOI_LUSTRE_Open, /* Open */
++ ADIOI_LUSTRE_ReadContig, /* ReadContig */
++ ADIOI_LUSTRE_WriteContig, /* WriteContig */
++ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
++ ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
++ ADIOI_GEN_SeekIndividual, /* SeekIndividual */
++ ADIOI_LUSTRE_Fcntl, /* Fcntl */
++ ADIOI_LUSTRE_SetInfo, /* SetInfo */
++ ADIOI_GEN_ReadStrided, /* ReadStrided */
++ ADIOI_GEN_WriteStrided, /* WriteStrided */
++ ADIOI_LUSTRE_Close, /* Close */
++ ADIOI_LUSTRE_IreadContig, /* IreadContig */
++ ADIOI_LUSTRE_IwriteContig, /* IwriteContig */
++ ADIOI_LUSTRE_ReadDone, /* ReadDone */
++ ADIOI_LUSTRE_WriteDone, /* WriteDone */
++ ADIOI_LUSTRE_ReadComplete, /* ReadComplete */
++ ADIOI_LUSTRE_WriteComplete, /* WriteComplete */
++ ADIOI_LUSTRE_IreadStrided, /* IreadStrided */
++ ADIOI_LUSTRE_IwriteStrided, /* IwriteStrided */
++ ADIOI_GEN_Flush, /* Flush */
++ ADIOI_LUSTRE_Resize, /* Resize */
++ ADIOI_GEN_Delete, /* Delete */
++};
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_close.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_close.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_close.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_close.c 2005-12-06 11:54:37.895129327 -0500
+@@ -0,0 +1,32 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_close.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code)
++{
++ int err;
++#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
++ static char myname[] = "ADIOI_LUSTRE_CLOSE";
++#endif
++
++ err = close(fd->fd_sys);
++ if (err == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error(fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_done.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_done.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_done.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_done.c 2005-12-06 11:54:37.898128927 -0500
+@@ -0,0 +1,188 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_done.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++int ADIOI_LUSTRE_ReadDone(ADIO_Request *request, ADIO_Status *status, int *error_code)
++{
++#ifndef NO_AIO
++ int done=0;
++#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
++ static char myname[] = "ADIOI_LUSTRE_READDONE";
++#endif
++#ifdef AIO_SUN
++ aio_result_t *result=0, *tmp;
++#else
++ int err;
++#endif
++#ifdef AIO_HANDLE_IN_AIOCB
++ struct aiocb *tmp1;
++#endif
++#endif
++
++ if (*request == ADIO_REQUEST_NULL) {
++ *error_code = MPI_SUCCESS;
++ return 1;
++ }
++
++#ifdef NO_AIO
++/* HP, FreeBSD, Linux */
++#ifdef HAVE_STATUS_SET_BYTES
++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
++#endif
++ (*request)->fd->async_count--;
++ ADIOI_Free_request((ADIOI_Req_node *) (*request));
++ *request = ADIO_REQUEST_NULL;
++ *error_code = MPI_SUCCESS;
++ return 1;
++#endif
++
++#ifdef AIO_SUN
++ if ((*request)->queued) {
++ tmp = (aio_result_t *) (*request)->handle;
++ if (tmp->aio_return == AIO_INPROGRESS) {
++ done = 0;
++ *error_code = MPI_SUCCESS;
++ }
++ else if (tmp->aio_return != -1) {
++ result = (aio_result_t *) aiowait(0); /* dequeue any one request */
++ done = 1;
++ (*request)->nbytes = tmp->aio_return;
++ *error_code = MPI_SUCCESS;
++ }
++ else {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(tmp->aio_errno));
++ return;
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(tmp->aio_errno));
++ ADIOI_Error((*request)->fd, *error_code, myname);
++#endif
++ }
++ } /* if ((*request)->queued) ... */
++ else {
++ /* ADIOI_Complete_Async completed this request, but request object
++ was not freed. */
++ done = 1;
++ *error_code = MPI_SUCCESS;
++ }
++#ifdef HAVE_STATUS_SET_BYTES
++ if (done && ((*request)->nbytes != -1))
++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
++#endif
++
++#endif
++
++#ifdef AIO_HANDLE_IN_AIOCB
++/* IBM */
++ if ((*request)->queued) {
++ tmp1 = (struct aiocb *) (*request)->handle;
++ errno = aio_error(tmp1->aio_handle);
++ if (errno == EINPROG) {
++ done = 0;
++ *error_code = MPI_SUCCESS;
++ }
++ else {
++ err = aio_return(tmp1->aio_handle);
++ (*request)->nbytes = err;
++ errno = aio_error(tmp1->aio_handle);
++
++ done = 1;
++
++ if (err == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++ return;
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error((*request)->fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++ }
++ } /* if ((*request)->queued) */
++ else {
++ done = 1;
++ *error_code = MPI_SUCCESS;
++ }
++#ifdef HAVE_STATUS_SET_BYTES
++ if (done && ((*request)->nbytes != -1))
++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
++#endif
++
++#elif (!defined(NO_AIO) && !defined(AIO_SUN))
++/* DEC, SGI IRIX 5 and 6 */
++ if ((*request)->queued) {
++ errno = aio_error((const struct aiocb *) (*request)->handle);
++ if (errno == EINPROGRESS) {
++ done = 0;
++ *error_code = MPI_SUCCESS;
++ }
++ else {
++ err = aio_return((struct aiocb *) (*request)->handle);
++ (*request)->nbytes = err;
++ errno = aio_error((struct aiocb *) (*request)->handle);
++
++ done = 1;
++
++ if (err == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++ return;
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error((*request)->fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++ }
++ } /* if ((*request)->queued) */
++ else {
++ done = 1;
++ *error_code = MPI_SUCCESS;
++ }
++#ifdef HAVE_STATUS_SET_BYTES
++ if (done && ((*request)->nbytes != -1))
++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
++#endif
++
++#endif
++
++#ifndef NO_AIO
++ if (done) {
++ /* if request is still queued in the system, it is also there
++ on ADIOI_Async_list. Delete it from there. */
++ if ((*request)->queued) ADIOI_Del_req_from_list(request);
++
++ (*request)->fd->async_count--;
++ if ((*request)->handle) ADIOI_Free((*request)->handle);
++ ADIOI_Free_request((ADIOI_Req_node *) (*request));
++ *request = ADIO_REQUEST_NULL;
++ }
++ return done;
++#endif
++
++}
++
++
++int ADIOI_LUSTRE_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code)
++{
++ return ADIOI_LUSTRE_ReadDone(request, status, error_code);
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_fcntl.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_fcntl.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_fcntl.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_fcntl.c 2005-12-06 11:54:37.901128527 -0500
+@@ -0,0 +1,126 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_fcntl.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++#include "adio_extern.h"
++/* #ifdef MPISGI
++#include "mpisgi2.h"
++#endif */
++
++void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
++{
++ int i, ntimes;
++ ADIO_Offset curr_fsize, alloc_size, size, len, done;
++ ADIO_Status status;
++ char *buf;
++#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
++ static char myname[] = "ADIOI_LUSTRE_FCNTL";
++#endif
++
++ switch(flag) {
++ case ADIO_FCNTL_GET_FSIZE:
++ fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END);
++ if (fd->fp_sys_posn != -1)
++ lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
++ if (fcntl_struct->fsize == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error(fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++ break;
++
++ case ADIO_FCNTL_SET_DISKSPACE:
++ /* will be called by one process only */
++ /* On file systems with no preallocation function, I have to
++ explicitly write
++ to allocate space. Since there could be holes in the file,
++ I need to read up to the current file size, write it back,
++ and then write beyond that depending on how much
++ preallocation is needed.
++ read/write in sizes of no more than ADIOI_PREALLOC_BUFSZ */
++
++ curr_fsize = lseek(fd->fd_sys, 0, SEEK_END);
++ alloc_size = fcntl_struct->diskspace;
++
++ size = ADIOI_MIN(curr_fsize, alloc_size);
++
++ ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ;
++ buf = (char *) ADIOI_Malloc(ADIOI_PREALLOC_BUFSZ);
++ done = 0;
++
++ for (i=0; i<ntimes; i++) {
++ len = ADIOI_MIN(size-done, ADIOI_PREALLOC_BUFSZ);
++ ADIO_ReadContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done,
++ &status, error_code);
++ if (*error_code != MPI_SUCCESS) {
++ ADIOI_Free(buf);
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++#elif defined(PRINT_ERR_MSG)
++ FPRINTF(stderr, "ADIOI_LUSTRE_Fcntl: To preallocate disk space, ROMIO needs to read the file and write it back, but is unable to read the file. Please give the file read permission and open it with MPI_MODE_RDWR.\n");
++ MPI_Abort(MPI_COMM_WORLD, 1);
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_PREALLOC_PERM,
++ myname, (char *) 0, (char *) 0);
++ ADIOI_Error(fd, *error_code, myname);
++#endif
++ return;
++ }
++ ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
++ done, &status, error_code);
++ if (*error_code != MPI_SUCCESS) return;
++ done += len;
++ }
++
++ if (alloc_size > curr_fsize) {
++ memset(buf, 0, ADIOI_PREALLOC_BUFSZ);
++ size = alloc_size - curr_fsize;
++ ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ;
++ for (i=0; i<ntimes; i++) {
++ len = ADIOI_MIN(alloc_size-done, ADIOI_PREALLOC_BUFSZ);
++ ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
++ done, &status, error_code);
++ if (*error_code != MPI_SUCCESS) return;
++ done += len;
++ }
++ }
++ ADIOI_Free(buf);
++ if (fd->fp_sys_posn != -1)
++ lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
++ *error_code = MPI_SUCCESS;
++ break;
++
++ case ADIO_FCNTL_SET_IOMODE:
++ /* for implementing PFS I/O modes. will not occur in MPI-IO
++ implementation.*/
++ if (fd->iomode != fcntl_struct->iomode) {
++ fd->iomode = fcntl_struct->iomode;
++ MPI_Barrier(MPI_COMM_WORLD);
++ }
++ *error_code = MPI_SUCCESS;
++ break;
++
++ case ADIO_FCNTL_SET_ATOMICITY:
++ fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
++ *error_code = MPI_SUCCESS;
++ break;
++
++ default:
++ FPRINTF(stderr, "Unknown flag passed to ADIOI_LUSTRE_Fcntl\n");
++ MPI_Abort(MPI_COMM_WORLD, 1);
++ }
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_flush.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_flush.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_flush.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_flush.c 2005-12-06 11:54:37.903128261 -0500
+@@ -0,0 +1,14 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_flush.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_Flush(ADIO_File fd, int *error_code)
++{
++ ADIOI_GEN_Flush(fd, error_code);
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.h mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.h
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.h 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.h 2005-12-06 11:54:37.891129861 -0500
+@@ -0,0 +1,36 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre.h,v 1.2 2005/07/07 14:38:17 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#ifndef AD_UNIX_INCLUDE
++#define AD_UNIX_INCLUDE
++
++/* temp*/
++#define HAVE_ASM_TYPES_H 1
++
++#include <unistd.h>
++#include <linux/types.h>
++#include <fcntl.h>
++#include <sys/ioctl.h>
++#include "lustre/lustre_user.h"
++#include "adio.h"
++
++#ifndef NO_AIO
++#ifdef AIO_SUN
++#include <sys/asynch.h>
++#else
++#include <aio.h>
++#ifdef NEEDS_ADIOCB_T
++typedef struct adiocb adiocb_t;
++#endif
++#endif
++#endif
++
++int ADIOI_LUSTRE_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
++ int wr, void *handle);
++
++#endif
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_hints.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_hints.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_hints.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_hints.c 2005-12-06 11:54:37.904128127 -0500
+@@ -0,0 +1,130 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_hints.c,v 1.2 2005/07/07 14:38:17 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
++{
++ char *value, *value_in_fd;
++ int flag, tmp_val, str_factor=-1, str_unit=0, start_iodev=-1;
++ struct lov_user_md lum = { 0 };
++ int err, myrank, fd_sys, perm, amode, old_mask;
++
++ if ( (fd->info) == MPI_INFO_NULL) {
++ /* This must be part of the open call. can set striping parameters
++ if necessary. */
++ MPI_Info_create(&(fd->info));
++
++ /* has user specified striping or server buffering parameters
++ and do they have the same value on all processes? */
++ if (users_info != MPI_INFO_NULL) {
++ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
++
++ MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
++ value, &flag);
++ if (flag) {
++ str_factor=atoi(value);
++ tmp_val = str_factor;
++ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
++ if (tmp_val != str_factor) {
++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: the value for key \"striping_factor\" must be the same on all processes\n");
++ MPI_Abort(MPI_COMM_WORLD, 1);
++ }
++ }
++
++ MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
++ value, &flag);
++ if (flag) {
++ str_unit=atoi(value);
++ tmp_val = str_unit;
++ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
++ if (tmp_val != str_unit) {
++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: the value for key \"striping_unit\" must be the same on all processes\n");
++ MPI_Abort(MPI_COMM_WORLD, 1);
++ }
++ }
++
++ MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
++ value, &flag);
++ if (flag) {
++ start_iodev=atoi(value);
++ tmp_val = start_iodev;
++ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
++ if (tmp_val != start_iodev) {
++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: the value for key \"start_iodevice\" must be the same on all processes\n");
++ MPI_Abort(MPI_COMM_WORLD, 1);
++ }
++ }
++
++ /* if user has specified striping info, process 0 tries to set it */
++ if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
++ MPI_Comm_rank(fd->comm, &myrank);
++ if (!myrank) {
++ if (fd->perm == ADIO_PERM_NULL) {
++ old_mask = umask(022);
++ umask(old_mask);
++ perm = old_mask ^ 0666;
++ }
++ else perm = fd->perm;
++
++ amode = 0;
++ if (fd->access_mode & ADIO_CREATE)
++ amode = amode | O_CREAT;
++ if (fd->access_mode & ADIO_RDWR ||
++ (fd->access_mode & ADIO_RDONLY &&
++ fd->access_mode & ADIO_WRONLY))
++ amode = amode | O_RDWR;
++ else if (fd->access_mode & ADIO_WRONLY)
++ amode = amode | O_WRONLY;
++ else if (fd->access_mode & ADIO_RDONLY)
++ amode = amode | O_RDONLY;
++ if (fd->access_mode & ADIO_EXCL)
++ amode = amode | O_EXCL;
++
++ /* we need to create file so ensure this is set */
++ amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
++
++ fd_sys = open(fd->filename, amode, perm);
++ if (fd_sys == -1) {
++ if (errno != EEXIST)
++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: Failure to open file %s %d %d\n",strerror(errno), amode, perm);
++ } else {
++ lum.lmm_magic = LOV_USER_MAGIC;
++ lum.lmm_pattern = 0;
++ lum.lmm_stripe_size = str_unit;
++ lum.lmm_stripe_count = str_factor;
++ lum.lmm_stripe_offset = start_iodev;
++
++ err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
++ if (err == -1 && errno != EEXIST) {
++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: Failure to set stripe info %s \n",strerror(errno));
++ }
++
++ close(fd_sys);
++ }
++
++ }
++ MPI_Barrier(fd->comm);
++ }
++
++ ADIOI_Free(value);
++ }
++
++ /* set the values for collective I/O and data sieving parameters */
++ ADIOI_GEN_SetInfo(fd, users_info, error_code);
++ }
++
++ else {
++ /* The file has been opened previously and fd->fd_sys is a valid
++ file descriptor. cannot set striping parameters now. */
++
++ /* set the values for collective I/O and data sieving parameters */
++ ADIOI_GEN_SetInfo(fd, users_info, error_code);
++
++ }
++
++ *error_code = MPI_SUCCESS;
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iread.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iread.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iread.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iread.c 2005-12-06 11:54:37.904128127 -0500
+@@ -0,0 +1,106 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_iread.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_IreadContig(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Request *request, int *error_code)
++{
++ int len, typesize;
++#ifdef NO_AIO
++ ADIO_Status status;
++#else
++ int err=-1;
++#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
++ static char myname[] = "ADIOI_LUSTRE_IREADCONTIG";
++#endif
++#endif
++
++ (*request) = ADIOI_Malloc_request();
++ (*request)->optype = ADIOI_READ;
++ (*request)->fd = fd;
++ (*request)->datatype = datatype;
++
++ MPI_Type_size(datatype, &typesize);
++ len = count * typesize;
++
++#ifdef NO_AIO
++ /* HP, FreeBSD, Linux */
++ /* no support for nonblocking I/O. Use blocking I/O. */
++
++ ADIOI_LUSTRE_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset,
++ &status, error_code);
++ (*request)->queued = 0;
++#ifdef HAVE_STATUS_SET_BYTES
++ if (*error_code == MPI_SUCCESS) {
++ MPI_Get_elements(&status, MPI_BYTE, &len);
++ (*request)->nbytes = len;
++ }
++#endif
++
++#else
++ if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
++ err = ADIOI_LUSTRE_aio(fd, buf, len, offset, 0, &((*request)->handle));
++ if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
++
++ (*request)->queued = 1;
++ ADIOI_Add_req_to_list(request);
++
++ if (err == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++ return;
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error(fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++#endif /* NO_AIO */
++
++ fd->fp_sys_posn = -1; /* set it to null. */
++ fd->async_count++;
++}
++
++
++
++void ADIOI_LUSTRE_IreadStrided(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Request *request, int
++ *error_code)
++{
++ ADIO_Status status;
++#ifdef HAVE_STATUS_SET_BYTES
++ int typesize;
++#endif
++
++ *request = ADIOI_Malloc_request();
++ (*request)->optype = ADIOI_READ;
++ (*request)->fd = fd;
++ (*request)->datatype = datatype;
++ (*request)->queued = 0;
++ (*request)->handle = 0;
++
++/* call the blocking version. It is faster because it does data sieving. */
++ ADIOI_LUSTRE_ReadStrided(fd, buf, count, datatype, file_ptr_type,
++ offset, &status, error_code);
++
++ fd->async_count++;
++
++#ifdef HAVE_STATUS_SET_BYTES
++ if (*error_code == MPI_SUCCESS) {
++ MPI_Type_size(datatype, &typesize);
++ (*request)->nbytes = count * typesize;
++ }
++#endif
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iwrite.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iwrite.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iwrite.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iwrite.c 2005-12-06 11:54:37.906127861 -0500
+@@ -0,0 +1,268 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_iwrite.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_IwriteContig(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Request *request, int *error_code)
++{
++ int len, typesize;
++#ifdef NO_AIO
++ ADIO_Status status;
++#else
++ int err=-1;
++#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
++ static char myname[] = "ADIOI_LUSTRE_IWRITECONTIG";
++#endif
++#endif
++
++ *request = ADIOI_Malloc_request();
++ (*request)->optype = ADIOI_WRITE;
++ (*request)->fd = fd;
++ (*request)->datatype = datatype;
++
++ MPI_Type_size(datatype, &typesize);
++ len = count * typesize;
++
++#ifdef NO_AIO
++ /* HP, FreeBSD, Linux */
++ /* no support for nonblocking I/O. Use blocking I/O. */
++
++ ADIOI_LUSTRE_WriteContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset,
++ &status, error_code);
++ (*request)->queued = 0;
++#ifdef HAVE_STATUS_SET_BYTES
++ if (*error_code == MPI_SUCCESS) {
++ MPI_Get_elements(&status, MPI_BYTE, &len);
++ (*request)->nbytes = len;
++ }
++#endif
++
++#else
++ if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
++ err = ADIOI_LUSTRE_aio(fd, buf, len, offset, 1, &((*request)->handle));
++ if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
++
++ (*request)->queued = 1;
++ ADIOI_Add_req_to_list(request);
++
++ if (err == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++ return;
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error(fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++#endif /* NO_AIO */
++
++ fd->fp_sys_posn = -1; /* set it to null. */
++ fd->async_count++;
++}
++
++
++
++
++void ADIOI_LUSTRE_IwriteStrided(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Request *request, int
++ *error_code)
++{
++ ADIO_Status status;
++#ifdef HAVE_STATUS_SET_BYTES
++ int typesize;
++#endif
++
++ *request = ADIOI_Malloc_request();
++ (*request)->optype = ADIOI_WRITE;
++ (*request)->fd = fd;
++ (*request)->datatype = datatype;
++ (*request)->queued = 0;
++ (*request)->handle = 0;
++
++/* call the blocking version. It is faster because it does data sieving. */
++ ADIOI_LUSTRE_WriteStrided(fd, buf, count, datatype, file_ptr_type,
++ offset, &status, error_code);
++
++ fd->async_count++;
++
++#ifdef HAVE_STATUS_SET_BYTES
++ if (*error_code == MPI_SUCCESS) {
++ MPI_Type_size(datatype, &typesize);
++ (*request)->nbytes = count * typesize;
++ }
++#endif
++}
++
++
++/* This function is for implementation convenience. It is not user-visible.
++ It takes care of the differences in the interface for nonblocking I/O
++ on various Unix machines! If wr==1 write, wr==0 read. */
++
++int ADIOI_LUSTRE_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
++ int wr, void *handle)
++{
++ int err=-1, fd_sys;
++
++#ifndef NO_AIO
++ int error_code;
++#ifdef AIO_SUN
++ aio_result_t *result;
++#else
++ struct aiocb *aiocbp;
++#endif
++#endif
++
++ fd_sys = fd->fd_sys;
++
++#ifdef AIO_SUN
++ result = (aio_result_t *) ADIOI_Malloc(sizeof(aio_result_t));
++ result->aio_return = AIO_INPROGRESS;
++ if (wr) err = aiowrite(fd_sys, buf, len, offset, SEEK_SET, result);
++ else err = aioread(fd_sys, buf, len, offset, SEEK_SET, result);
++
++ if (err == -1) {
++ if (errno == EAGAIN) {
++ /* the man pages say EPROCLIM, but in reality errno is set to EAGAIN! */
++
++ /* exceeded the max. no. of outstanding requests.
++ complete all previous async. requests and try again.*/
++
++ ADIOI_Complete_async(&error_code);
++ if (wr) err = aiowrite(fd_sys, buf, len, offset, SEEK_SET, result);
++ else err = aioread(fd_sys, buf, len, offset, SEEK_SET, result);
++
++ while (err == -1) {
++ if (errno == EAGAIN) {
++ /* sleep and try again */
++ sleep(1);
++ if (wr) err = aiowrite(fd_sys, buf, len, offset, SEEK_SET, result);
++ else err = aioread(fd_sys, buf, len, offset, SEEK_SET, result);
++ }
++ else {
++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno);
++ MPI_Abort(MPI_COMM_WORLD, 1);
++ }
++ }
++ }
++ else {
++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno);
++ MPI_Abort(MPI_COMM_WORLD, 1);
++ }
++ }
++
++ *((aio_result_t **) handle) = result;
++#endif
++
++#ifdef NO_FD_IN_AIOCB
++/* IBM */
++ aiocbp = (struct aiocb *) ADIOI_Malloc(sizeof(struct aiocb));
++ aiocbp->aio_whence = SEEK_SET;
++ aiocbp->aio_offset = offset;
++ aiocbp->aio_buf = buf;
++ aiocbp->aio_nbytes = len;
++ if (wr) err = aio_write(fd_sys, aiocbp);
++ else err = aio_read(fd_sys, aiocbp);
++
++ if (err == -1) {
++ if (errno == EAGAIN) {
++ /* exceeded the max. no. of outstanding requests.
++ complete all previous async. requests and try again. */
++
++ ADIOI_Complete_async(&error_code);
++ if (wr) err = aio_write(fd_sys, aiocbp);
++ else err = aio_read(fd_sys, aiocbp);
++
++ while (err == -1) {
++ if (errno == EAGAIN) {
++ /* sleep and try again */
++ sleep(1);
++ if (wr) err = aio_write(fd_sys, aiocbp);
++ else err = aio_read(fd_sys, aiocbp);
++ }
++ else {
++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno);
++ MPI_Abort(MPI_COMM_WORLD, 1);
++ }
++ }
++ }
++ else {
++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno);
++ MPI_Abort(MPI_COMM_WORLD, 1);
++ }
++ }
++
++ *((struct aiocb **) handle) = aiocbp;
++
++#elif (!defined(NO_AIO) && !defined(AIO_SUN))
++/* DEC, SGI IRIX 5 and 6 */
++
++ aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1);
++ aiocbp->aio_fildes = fd_sys;
++ aiocbp->aio_offset = offset;
++ aiocbp->aio_buf = buf;
++ aiocbp->aio_nbytes = len;
++
++#ifdef AIO_PRIORITY_DEFAULT
++/* DEC */
++ aiocbp->aio_reqprio = AIO_PRIO_DFL; /* not needed in DEC Unix 4.0 */
++ aiocbp->aio_sigevent.sigev_signo = 0;
++#else
++ aiocbp->aio_reqprio = 0;
++#endif
++
++#ifdef AIO_SIGNOTIFY_NONE
++/* SGI IRIX 6 */
++ aiocbp->aio_sigevent.sigev_notify = SIGEV_NONE;
++#else
++ aiocbp->aio_sigevent.sigev_signo = 0;
++#endif
++
++ if (wr) err = aio_write(aiocbp);
++ else err = aio_read(aiocbp);
++
++ if (err == -1) {
++ if (errno == EAGAIN) {
++ /* exceeded the max. no. of outstanding requests.
++ complete all previous async. requests and try again. */
++
++ ADIOI_Complete_async(&error_code);
++ if (wr) err = aio_write(aiocbp);
++ else err = aio_read(aiocbp);
++
++ while (err == -1) {
++ if (errno == EAGAIN) {
++ /* sleep and try again */
++ sleep(1);
++ if (wr) err = aio_write(aiocbp);
++ else err = aio_read(aiocbp);
++ }
++ else {
++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno);
++ MPI_Abort(MPI_COMM_WORLD, 1);
++ }
++ }
++ }
++ else {
++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno);
++ MPI_Abort(MPI_COMM_WORLD, 1);
++ }
++ }
++
++ *((struct aiocb **) handle) = aiocbp;
++#endif
++
++ return err;
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_open.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_open.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_open.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_open.c 2005-12-06 11:54:37.906127861 -0500
+@@ -0,0 +1,100 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_open.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
++{
++ int perm, old_mask, amode;
++ struct lov_user_md lum = { 0 };
++ char *value;
++
++#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
++ static char myname[] = "ADIOI_LUSTRE_OPEN";
++#endif
++
++ if (fd->perm == ADIO_PERM_NULL) {
++ old_mask = umask(022);
++ umask(old_mask);
++ perm = old_mask ^ 0666;
++ }
++ else perm = fd->perm;
++
++ amode = 0;
++ if (fd->access_mode & ADIO_CREATE)
++ amode = amode | O_CREAT;
++ if (fd->access_mode & ADIO_RDONLY)
++ amode = amode | O_RDONLY;
++ if (fd->access_mode & ADIO_WRONLY)
++ amode = amode | O_WRONLY;
++ if (fd->access_mode & ADIO_RDWR)
++ amode = amode | O_RDWR;
++ if (fd->access_mode & ADIO_EXCL)
++ amode = amode | O_EXCL;
++
++ fd->fd_sys = open(fd->filename, amode, perm);
++
++ if (fd->fd_sys != -1) {
++ int err;
++
++ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
++
++ /* get file striping information and set it in info */
++ lum.lmm_magic = LOV_USER_MAGIC;
++ err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *) &lum);
++
++ if (!err) {
++ sprintf(value, "%d", lum.lmm_stripe_size);
++ MPI_Info_set(fd->info, "striping_unit", value);
++
++ sprintf(value, "%d", lum.lmm_stripe_count);
++ MPI_Info_set(fd->info, "striping_factor", value);
++
++ sprintf(value, "%d", lum.lmm_stripe_offset);
++ MPI_Info_set(fd->info, "start_iodevice", value);
++ }
++ ADIOI_Free(value);
++
++ if (fd->access_mode & ADIO_APPEND)
++ fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
++ }
++
++
++ if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
++ fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
++
++ if (fd->fd_sys == -1) {
++#ifdef MPICH2
++ if (errno == ENAMETOOLONG)
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_BAD_FILE, "**filenamelong", "**filenamelong %s %d", fd->filename, strlen(fd->filename) );
++ else if (errno == ENOENT)
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_NO_SUCH_FILE, "**filenoexist", "**filenoexist %s", fd->filename );
++ else if (errno == ENOTDIR || errno == ELOOP)
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_BAD_FILE, "**filenamedir", "**filenamedir %s", fd->filename );
++ else if (errno == EACCES) {
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ACCESS, "**fileaccess", "**fileaccess %s",
++ fd->filename );
++ }
++ else if (errno == EROFS) {
++ /* Read only file or file system and write access requested */
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_READ_ONLY, "**ioneedrd", 0 );
++ }
++ else {
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++ }
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error(ADIO_FILE_NULL, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_rdcoll.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_rdcoll.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_rdcoll.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_rdcoll.c 2005-12-06 11:54:37.907127727 -0500
+@@ -0,0 +1,18 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_rdcoll.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int
++ *error_code)
++{
++ ADIOI_GEN_ReadStridedColl(fd, buf, count, datatype, file_ptr_type,
++ offset, status, error_code);
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_read.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_read.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_read.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_read.c 2005-12-06 11:54:37.907127727 -0500
+@@ -0,0 +1,67 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_read.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int *error_code)
++{
++ int err=-1, datatype_size, len;
++#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
++ static char myname[] = "ADIOI_LUSTRE_READCONTIG";
++#endif
++
++ MPI_Type_size(datatype, &datatype_size);
++ len = datatype_size * count;
++
++ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
++ if (fd->fp_sys_posn != offset)
++ lseek(fd->fd_sys, offset, SEEK_SET);
++ err = read(fd->fd_sys, buf, len);
++ fd->fp_sys_posn = offset + len;
++ /* individual file pointer not updated */
++ }
++ else { /* read from curr. location of ind. file pointer */
++ if (fd->fp_sys_posn != fd->fp_ind)
++ lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
++ err = read(fd->fd_sys, buf, len);
++ fd->fp_ind += err;
++ fd->fp_sys_posn = fd->fp_ind;
++ }
++
++#ifdef HAVE_STATUS_SET_BYTES
++ if (err != -1) MPIR_Status_set_bytes(status, datatype, err);
++#endif
++
++ if (err == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error(fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++}
++
++
++
++
++void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int
++ *error_code)
++{
++ ADIOI_GEN_ReadStrided(fd, buf, count, datatype, file_ptr_type,
++ offset, status, error_code);
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_resize.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_resize.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_resize.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_resize.c 2005-12-06 11:54:37.909127460 -0500
+@@ -0,0 +1,32 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_resize.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
++{
++ int err;
++#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
++ static char myname[] = "ADIOI_LUSTRE_RESIZE";
++#endif
++
++ err = ftruncate(fd->fd_sys, size);
++ if (err == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error(fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_seek.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_seek.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_seek.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_seek.c 2005-12-06 11:54:37.911127194 -0500
+@@ -0,0 +1,15 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_seek.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++ADIO_Offset ADIOI_LUSTRE_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
++ int whence, int *error_code)
++{
++ return ADIOI_GEN_SeekIndividual(fd, offset, whence, error_code);
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wait.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wait.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wait.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wait.c 2005-12-06 11:54:37.914126794 -0500
+@@ -0,0 +1,188 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_wait.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_ReadComplete(ADIO_Request *request, ADIO_Status *status, int *error_code)
++{
++#ifndef NO_AIO
++#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
++ static char myname[] = "ADIOI_LUSTRE_READCOMPLETE";
++#endif
++#ifdef AIO_SUN
++ aio_result_t *result=0, *tmp;
++#else
++ int err;
++#endif
++#ifdef AIO_HANDLE_IN_AIOCB
++ struct aiocb *tmp1;
++#endif
++#endif
++
++ if (*request == ADIO_REQUEST_NULL) {
++ *error_code = MPI_SUCCESS;
++ return;
++ }
++
++#ifdef AIO_SUN
++ if ((*request)->queued) { /* dequeue it */
++ tmp = (aio_result_t *) (*request)->handle;
++ while (tmp->aio_return == AIO_INPROGRESS) usleep(1000);
++ /* sleep for 1 ms., until done. Is 1 ms. a good number? */
++ /* when done, dequeue any one request */
++ result = (aio_result_t *) aiowait(0);
++
++ (*request)->nbytes = tmp->aio_return;
++
++ if (tmp->aio_return == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(tmp->aio_errno));
++ return;
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(tmp->aio_errno));
++ ADIOI_Error((*request)->fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++
++/* aiowait only dequeues a request. The completion of a request can be
++ checked by just checking the aio_return flag in the handle passed
++ to the original aioread()/aiowrite(). Therefore, I need to ensure
++ that aiowait() is called exactly once for each previous
++ aioread()/aiowrite(). This is also taken care of in ADIOI_xxxDone */
++ }
++ else *error_code = MPI_SUCCESS;
++
++#ifdef HAVE_STATUS_SET_BYTES
++ if ((*request)->nbytes != -1)
++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
++#endif
++
++#endif
++
++#ifdef AIO_HANDLE_IN_AIOCB
++/* IBM */
++ if ((*request)->queued) {
++ do {
++ err = aio_suspend(1, (struct aiocb **) &((*request)->handle));
++ } while ((err == -1) && (errno == EINTR));
++
++ tmp1 = (struct aiocb *) (*request)->handle;
++ if (err != -1) {
++ err = aio_return(tmp1->aio_handle);
++ (*request)->nbytes = err;
++ errno = aio_error(tmp1->aio_handle);
++ }
++ else (*request)->nbytes = -1;
++
++/* on DEC, it is required to call aio_return to dequeue the request.
++ IBM man pages don't indicate what function to use for dequeue.
++ I'm assuming it is aio_return! POSIX says aio_return may be called
++ only once on a given handle. */
++
++ if (err == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++ return;
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error((*request)->fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++ } /* if ((*request)->queued) */
++ else *error_code = MPI_SUCCESS;
++
++#ifdef HAVE_STATUS_SET_BYTES
++ if ((*request)->nbytes != -1)
++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
++#endif
++
++#elif (!defined(NO_AIO) && !defined(AIO_SUN))
++/* DEC, SGI IRIX 5 and 6 */
++ if ((*request)->queued) {
++ do {
++ err = aio_suspend((const aiocb_t **) &((*request)->handle), 1, 0);
++ } while ((err == -1) && (errno == EINTR));
++
++ if (err != -1) {
++ err = aio_return((struct aiocb *) (*request)->handle);
++ (*request)->nbytes = err;
++ errno = aio_error((struct aiocb *) (*request)->handle);
++ }
++ else (*request)->nbytes = -1;
++
++ if (err == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++ return;
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else /* MPICH-1 */
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error((*request)->fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++ } /* if ((*request)->queued) */
++ else *error_code = MPI_SUCCESS;
++#ifdef HAVE_STATUS_SET_BYTES
++ if ((*request)->nbytes != -1)
++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
++#endif
++#endif
++
++#ifndef NO_AIO
++ if ((*request)->queued != -1) {
++
++ /* queued = -1 is an internal hack used when the request must
++ be completed, but the request object should not be
++ freed. This is used in ADIOI_Complete_async, because the user
++ will call MPI_Wait later, which would require status to
++ be filled. Ugly but works. queued = -1 should be used only
++ in ADIOI_Complete_async.
++ This should not affect the user in any way. */
++
++ /* if request is still queued in the system, it is also there
++ on ADIOI_Async_list. Delete it from there. */
++ if ((*request)->queued) ADIOI_Del_req_from_list(request);
++
++ (*request)->fd->async_count--;
++ if ((*request)->handle) ADIOI_Free((*request)->handle);
++ ADIOI_Free_request((ADIOI_Req_node *) (*request));
++ *request = ADIO_REQUEST_NULL;
++ }
++
++#else
++/* HP, FreeBSD, Linux */
++
++#ifdef HAVE_STATUS_SET_BYTES
++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
++#endif
++ (*request)->fd->async_count--;
++ ADIOI_Free_request((ADIOI_Req_node *) (*request));
++ *request = ADIO_REQUEST_NULL;
++ *error_code = MPI_SUCCESS;
++#endif
++}
++
++
++void ADIOI_LUSTRE_WriteComplete(ADIO_Request *request, ADIO_Status *status, int *error_code)
++{
++ ADIOI_LUSTRE_ReadComplete(request, status, error_code);
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wrcoll.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wrcoll.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wrcoll.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wrcoll.c 2005-12-06 11:54:37.914126794 -0500
+@@ -0,0 +1,18 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_wrcoll.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int
++ *error_code)
++{
++ ADIOI_GEN_WriteStridedColl(fd, buf, count, datatype, file_ptr_type,
++ offset, status, error_code);
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_write.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_write.c
+--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_write.c 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_write.c 2005-12-06 11:54:37.914126794 -0500
+@@ -0,0 +1,66 @@
++/* -*- Mode: C; c-basic-offset:4 ; -*- */
++/*
++ * $Id: ad_lustre_write.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $
++ *
++ * Copyright (C) 1997 University of Chicago.
++ * See COPYRIGHT notice in top-level directory.
++ */
++
++#include "ad_lustre.h"
++
++void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int *error_code)
++{
++ int err=-1, datatype_size, len;
++#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
++ static char myname[] = "ADIOI_LUSTRE_WRITECONTIG";
++#endif
++
++ MPI_Type_size(datatype, &datatype_size);
++ len = datatype_size * count;
++
++ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
++ if (fd->fp_sys_posn != offset)
++ lseek(fd->fd_sys, offset, SEEK_SET);
++ err = write(fd->fd_sys, buf, len);
++ fd->fp_sys_posn = offset + err;
++ /* individual file pointer not updated */
++ }
++ else { /* write from curr. location of ind. file pointer */
++ if (fd->fp_sys_posn != fd->fp_ind)
++ lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
++ err = write(fd->fd_sys, buf, len);
++ fd->fp_ind += err;
++ fd->fp_sys_posn = fd->fp_ind;
++ }
++
++#ifdef HAVE_STATUS_SET_BYTES
++ if (err != -1 && status) MPIR_Status_set_bytes(status, datatype, err);
++#endif
++
++ if (err == -1) {
++#ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
++ "**io %s", strerror(errno));
++#elif defined(PRINT_ERR_MSG)
++ *error_code = MPI_ERR_UNKNOWN;
++#else
++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
++ myname, "I/O Error", "%s", strerror(errno));
++ ADIOI_Error(fd, *error_code, myname);
++#endif
++ }
++ else *error_code = MPI_SUCCESS;
++}
++
++
++
++void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int
++ *error_code)
++{
++ ADIOI_GEN_WriteStrided(fd, buf, count, datatype, file_ptr_type,
++ offset, status, error_code);
++}
+diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/Makefile.in mpich-1.2.6/romio/adio/ad_lustre/Makefile.in
+--- mpich-1.2.6/romio/adio/ad_lustre/Makefile.in 1969-12-31 19:00:00.000000000 -0500
++++ mpich-1.2.6/romio/adio/ad_lustre/Makefile.in 2005-12-06 11:54:37.883130927 -0500
+@@ -0,0 +1,47 @@
++CC = @CC@
++AR = @AR@
++LIBNAME = @LIBNAME@
++srcdir = @srcdir@
++CC_SHL = @CC_SHL@
++SHLIBNAME = @SHLIBNAME@
++
++INCLUDE_DIR = -I@MPI_INCLUDE_DIR@ -I${srcdir}/../include -I../include
++CFLAGS = @CFLAGS@ $(INCLUDE_DIR)
++
++C_COMPILE_SHL = $(CC_SHL) @CFLAGS@ $(INCLUDE_DIR)
++
++@VPATH@
++
++AD_LUSTRE_OBJECTS = ad_lustre_close.o ad_lustre_read.o \
++ ad_lustre_open.o ad_lustre_write.o ad_lustre_done.o \
++ ad_lustre_fcntl.o ad_lustre_iread.o ad_lustre_iwrite.o ad_lustre_wait.o \
++ ad_lustre_resize.o ad_lustre_hints.o \
++ ad_lustre.o
++
++
++default: $(LIBNAME)
++ @if [ "@ENABLE_SHLIB@" != "none" ] ; then \
++ $(MAKE) $(SHLIBNAME).la ;\
++ fi
++
++.SUFFIXES: $(SUFFIXES) .p .lo
++
++.c.o:
++ $(CC) $(CFLAGS) -c $<
++.c.lo:
++ $(C_COMPILE_SHL) -c $<
++ @mv -f $*.o $*.lo
++
++$(LIBNAME): $(AD_LUSTRE_OBJECTS)
++ $(AR) $(LIBNAME) $(AD_LUSTRE_OBJECTS)
++
++AD_LUSTRE_LOOBJECTS=$(AD_LUSTRE_OBJECTS:.o=.lo)
++$(SHLIBNAME).la: $(AD_LUSTRE_LOOBJECTS)
++ $(AR) $(SHLIBNAME).la $(AD_LUSTRE_LOOBJECTS)
++
++coverage:
++ -@for file in ${AD_LUSTRE_OBJECTS:.o=.c} ; do \
++ gcov -b -f $$file ; done
++
++clean:
++ @rm -f *.o *.lo
+--- mpich-1.2.6/romio/Makefile.in 2004-01-27 18:27:35.000000000 -0500
++++ mpich-1.2.6/romio/Makefile.in 2005-12-06 11:54:38.000000000 -0500
+@@ -14,7 +14,7 @@ DIRS = mpi-io adio/common
+ MPIO_DIRS = mpi-io
+ EXTRA_SRC_DIRS = @EXTRA_SRC_DIRS@
+ FILE_SYS_DIRS = @FILE_SYS_DIRS@
+-ALL_DIRS = mpi-io mpi-io/fortran mpi2-other/info mpi2-other/info/fortran mpi2-other/array mpi2-other/array/fortran adio/common adio/ad_pfs adio/ad_piofs adio/ad_nfs adio/ad_ufs adio/ad_xfs adio/ad_hfs adio/ad_sfs adio/ad_testfs adio/ad_pvfs adio/ad_pvfs2 test
++ALL_DIRS = mpi-io mpi-io/fortran mpi2-other/info mpi2-other/info/fortran mpi2-other/array mpi2-other/array/fortran adio/common adio/ad_pfs adio/ad_piofs adio/ad_nfs adio/ad_ufs adio/ad_xfs adio/ad_hfs adio/ad_sfs adio/ad_testfs adio/ad_pvfs adio/ad_pvfs2 adio/ad_lustre test
+ SHELL = /bin/sh
+
+ @VPATH@
+--- mpich-1.2.6/romio/configure.in 2004-08-02 09:37:31.000000000 -0400
++++ mpich-1.2.6/romio/configure.in 2005-12-06 11:54:38.000000000 -0500
+@@ -90,7 +90,7 @@ MPIO_REQ_REAL_POBJECTS="_iotest.o _iowai
+ #
+ have_aio=no
+ #
+-known_filesystems="nfs ufs pfs piofs pvfs pvfs2 testfs xfs hfs sfs"
++known_filesystems="nfs ufs pfs piofs pvfs pvfs2 testfs xfs hfs sfs lustre"
+ known_mpi_impls="mpich_mpi sgi_mpi hp_mpi cray_mpi lam_mpi"
+ #
+ # Defaults
+@@ -1270,6 +1270,9 @@ fi
+ if test -n "$file_system_testfs"; then
+ AC_DEFINE(ROMIO_TESTFS,1,[Define for TESTFS])
+ fi
++if test -n "$file_system_lustre"; then
++ AC_DEFINE(ROMIO_LUSTRE,1,[Define for LUSTRE])
++fi
+ if test -n "$file_system_piofs"; then
+ AC_DEFINE(PIOFS,1,[Define for PIOFS])
+ USER_CFLAGS="$USER_CFLAGS -bI:/usr/include/piofs/piofs.exp"
+@@ -1634,7 +1637,7 @@ AC_OUTPUT(Makefile localdefs mpi-io/Make
+ adio/ad_nfs/Makefile adio/ad_ufs/Makefile \
+ adio/ad_xfs/Makefile adio/ad_hfs/Makefile \
+ adio/ad_sfs/Makefile adio/ad_pfs/Makefile \
+- adio/ad_testfs/Makefile adio/ad_pvfs/Makefile \
++ adio/ad_testfs/Makefile adio/ad_lustre/Makefile adio/ad_pvfs/Makefile \
+ adio/ad_pvfs2/Makefile adio/ad_piofs/Makefile \
+ mpi-io/fortran/Makefile mpi2-other/info/fortran/Makefile \
+ mpi2-other/array/fortran/Makefile test/fmisc.f \
+--- mpich-1.2.6/romio/configure 2004-08-04 12:08:28.000000000 -0400
++++ mpich-1.2.6/romio/configure 2005-12-06 11:54:38.000000000 -0500
+@@ -623,7 +623,7 @@ MPIO_REQ_REAL_POBJECTS="_iotest.o _iowai
+ #
+ have_aio=no
+ #
+-known_filesystems="nfs ufs pfs piofs pvfs pvfs2 testfs xfs hfs sfs"
++known_filesystems="nfs ufs pfs piofs pvfs pvfs2 testfs lustre xfs hfs sfs"
+ known_mpi_impls="mpich_mpi sgi_mpi hp_mpi cray_mpi lam_mpi"
+ #
+ # Defaults
+@@ -4022,6 +4022,13 @@ if test -n "$file_system_testfs"; then
+ EOF
+
+ fi
++if test -n "$file_system_lustre"; then
++ cat >> confdefs.h <<\EOF
++#define LUSTRE 1
++EOF
++
++fi
++
+ if test -n "$file_system_piofs"; then
+ cat >> confdefs.h <<\EOF
+ #define PIOFS 1
+@@ -4746,7 +4753,7 @@ trap 'rm -fr `echo "Makefile localdefs m
+ adio/ad_xfs/Makefile adio/ad_hfs/Makefile \
+ adio/ad_sfs/Makefile adio/ad_pfs/Makefile \
+ adio/ad_testfs/Makefile adio/ad_pvfs/Makefile \
+- adio/ad_pvfs2/Makefile adio/ad_piofs/Makefile \
++ adio/ad_pvfs2/Makefile adio/ad_piofs/Makefile adio/ad_lustre/Makefile\
+ mpi-io/fortran/Makefile mpi2-other/info/fortran/Makefile \
+ mpi2-other/array/fortran/Makefile test/fmisc.f \
+ test/fcoll_test.f test/pfcoll_test.f test/fperf.f adio/include/romioconf.h" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15
+@@ -4912,7 +4919,7 @@ CONFIG_FILES=\${CONFIG_FILES-"Makefile l
+ adio/ad_nfs/Makefile adio/ad_ufs/Makefile \
+ adio/ad_xfs/Makefile adio/ad_hfs/Makefile \
+ adio/ad_sfs/Makefile adio/ad_pfs/Makefile \
+- adio/ad_testfs/Makefile adio/ad_pvfs/Makefile \
++ adio/ad_testfs/Makefile adio/ad_lustre/Makefile adio/ad_pvfs/Makefile \
+ adio/ad_pvfs2/Makefile adio/ad_piofs/Makefile \
+ mpi-io/fortran/Makefile mpi2-other/info/fortran/Makefile \
+ mpi2-other/array/fortran/Makefile test/fmisc.f \
+--- mpich-1.2.6/romio/adio/include/romioconf.h.in 2004-08-04 12:08:28.000000000 -0400
++++ mpich-1.2.6/romio/adio/include/romioconf.h.in 2005-12-06 11:54:38.000000000 -0500
+@@ -192,6 +192,9 @@
+ /* Define for TESTFS */
+ #undef ROMIO_TESTFS
+
++/* Define for LUSTRE */
++#undef LUSTRE
++
+ /* Define for PIOFS */
+ #undef PIOFS
+
+--- mpich-1.2.6/romio/adio/include/mpio_error.h 2002-11-15 11:26:23.000000000 -0500
++++ mpich-1.2.6/romio/adio/include/mpio_error.h 2005-12-06 11:54:38.000000000 -0500
+@@ -62,6 +62,7 @@
+ #define MPIR_ERR_FILETYPE 33
+ #define MPIR_ERR_NO_NTFS 35
+ #define MPIR_ERR_NO_TESTFS 36
++#define MPIR_ERR_NO_LUSTRE 37
+
+ /* MPI_ERR_COMM */
+ #ifndef MPIR_ERR_COMM_NULL
+--- mpich-1.2.6/romio/adio/include/adioi_fs_proto.h 2003-06-24 18:48:23.000000000 -0400
++++ mpich-1.2.6/romio/adio/include/adioi_fs_proto.h 2005-12-06 11:54:38.000000000 -0500
+@@ -261,6 +261,68 @@ ADIO_Offset ADIOI_UFS_SeekIndividual(ADI
+ void ADIOI_UFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
+ #endif
+
++#ifdef LUSTRE
++extern struct ADIOI_Fns_struct ADIO_LUSTRE_operations;
++
++void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code);
++void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code);
++void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int
++ *error_code);
++void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int
++ *error_code);
++void ADIOI_LUSTRE_IwriteContig(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Request *request, int
++ *error_code);
++void ADIOI_LUSTRE_IreadContig(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Request *request, int
++ *error_code);
++int ADIOI_LUSTRE_ReadDone(ADIO_Request *request, ADIO_Status *status, int
++ *error_code);
++int ADIOI_LUSTRE_WriteDone(ADIO_Request *request, ADIO_Status *status, int
++ *error_code);
++void ADIOI_LUSTRE_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
++ *error_code);
++void ADIOI_LUSTRE_WriteComplete(ADIO_Request *request, ADIO_Status *status,
++ int *error_code);
++void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
++ *error_code);
++void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int
++ *error_code);
++void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int
++ *error_code);
++void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int
++ *error_code);
++void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Status *status, int
++ *error_code);
++void ADIOI_LUSTRE_IreadStrided(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Request *request, int
++ *error_code);
++void ADIOI_LUSTRE_IwriteStrided(ADIO_File fd, void *buf, int count,
++ MPI_Datatype datatype, int file_ptr_type,
++ ADIO_Offset offset, ADIO_Request *request, int
++ *error_code);
++void ADIOI_LUSTRE_Flush(ADIO_File fd, int *error_code);
++void ADIOI_LUSTRE_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
++ADIO_Offset ADIOI_LUSTRE_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
++ int whence, int *error_code);
++void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
++#endif
++
+ #ifdef ROMIO_NTFS
+ extern struct ADIOI_Fns_struct ADIO_NTFS_operations;
+
+--- mpich-1.2.6/romio/adio/include/adio.h 2004-06-07 13:59:57.000000000 -0400
++++ mpich-1.2.6/romio/adio/include/adio.h 2005-12-06 11:54:38.000000000 -0500
+@@ -276,6 +276,7 @@ typedef struct {
+ #define ADIO_NTFS 158 /* NTFS for Windows NT */
+ #define ADIO_TESTFS 159 /* fake file system for testing */
+ #define ADIO_PVFS2 160 /* PVFS2: 2nd generation PVFS */
++#define ADIO_LUSTRE 161 /* Lustre */
+
+ #define ADIO_SEEK_SET SEEK_SET
+ #define ADIO_SEEK_CUR SEEK_CUR
+--- mpich-1.2.6/romio/adio/common/setfn.c 2003-06-24 18:48:18.000000000 -0400
++++ mpich-1.2.6/romio/adio/common/setfn.c 2005-12-06 11:54:38.000000000 -0500
+@@ -114,6 +114,16 @@ void ADIOI_SetFunctions(ADIO_File fd)
+ #endif
+ break;
+
++ case ADIO_LUSTRE:
++#ifdef LUSTRE
++ *(fd->fns) = ADIO_LUSTRE_operations;
++#else
++ FPRINTF(stderr, "ADIOI_SetFunctions: ROMIO has not been configured to use the LUSTRE file system\n");
++ MPI_Abort(MPI_COMM_WORLD, 1);
++#endif
++ break;
++
++
+ default:
+ FPRINTF(stderr, "ADIOI_SetFunctions: Unsupported file system type\n");
+ MPI_Abort(MPI_COMM_WORLD, 1);
+--- mpich-1.2.6/romio/adio/common/ad_fstype.c 2003-09-04 16:24:44.000000000 -0400
++++ mpich-1.2.6/romio/adio/common/ad_fstype.c 2005-12-06 11:54:38.000000000 -0500
+@@ -204,6 +204,11 @@ static void ADIO_FileSysType_fncall(char
+ }
+ }
+ #elif defined(LINUX)
++#warning use correct include
++# if defined (LUSTRE)
++#define LL_SUPER_MAGIC 0x0BD00BD0
++# endif
++
+ do {
+ err = statfs(filename, &fsbuf);
+ } while (err && (errno == ESTALE));
+@@ -218,6 +223,9 @@ static void ADIO_FileSysType_fncall(char
+ else {
+ /* FPRINTF(stderr, "%d\n", fsbuf.f_type);*/
+ if (fsbuf.f_type == NFS_SUPER_MAGIC) *fstype = ADIO_NFS;
++# if defined (LUSTRE)
++ else if (fsbuf.f_type == LL_SUPER_MAGIC) *fstype = ADIO_LUSTRE;
++#endif
+ # if defined(ROMIO_PVFS)
+ else if (fsbuf.f_type == PVFS_SUPER_MAGIC) *fstype = ADIO_PVFS;
+ # endif
+@@ -359,6 +367,11 @@ static void ADIO_FileSysType_prefix(char
+ {
+ *fstype = ADIO_TESTFS;
+ }
++ else if (!strncmp(filename, "lustre:", 7)
++ || !strncmp(filename, "LUSTRE:", 7))
++ {
++ *fstype = ADIO_LUSTRE;
++ }
+ else {
+ #ifdef ROMIO_NTFS
+ *fstype = ADIO_NTFS;
+@@ -644,6 +657,24 @@ void ADIO_ResolveFileType(MPI_Comm comm,
+ *ops = &ADIO_TESTFS_operations;
+ #endif
+ }
++ if (file_system == ADIO_LUSTRE) {
++#ifndef LUSTRE
++# ifdef MPICH2
++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**iofstypeunsupported", 0);
++ return;
++# elif defined(PRINT_ERR_MSG)
++ FPRINTF(stderr, "ADIO_ResolveFileType: ROMIO has not been configured to use the LUSTRE file system\n");
++ MPI_Abort(MPI_COMM_WORLD, 1);
++# else /* MPICH-1 */
++ myerrcode = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_LUSTRE,
++ myname, (char *) 0, (char *) 0);
++ *error_code = ADIOI_Error(MPI_FILE_NULL, myerrcode, myname);
++# endif
++ return;
++#else
++ *ops = &ADIO_LUSTRE_operations;
++#endif
++ }
+ *error_code = MPI_SUCCESS;
+ *fstype = file_system;
+ return;
#define OBD_CONNECT_TRANSNO 0x800ULL /* replay is sending initial transno */
#define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */
#define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */
+#define OBD_CONNECT_ATTRFID 0x4000ULL /* Server supports GetAttr By Fid */
#define OBD_CONNECT_NODEVOH 0x8000ULL /* No open handle for special nodes */
#define OBD_CONNECT_EMPTY 0x80000000ULL /* fake: these are empty connect flags*/
#define MDS_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \
- OBD_CONNECT_NODEVOH)
+ OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX)
} mds_reint_t;
/* the disposition of the intent outlines what was executed */
-#define DISP_IT_EXECD 0x01
-#define DISP_LOOKUP_EXECD 0x02
-#define DISP_LOOKUP_NEG 0x04
-#define DISP_LOOKUP_POS 0x08
-#define DISP_OPEN_CREATE 0x10
-#define DISP_OPEN_OPEN 0x20
-#define DISP_ENQ_COMPLETE 0x40
+#define DISP_IT_EXECD 0x00000001
+#define DISP_LOOKUP_EXECD 0x00000002
+#define DISP_LOOKUP_NEG 0x00000004
+#define DISP_LOOKUP_POS 0x00000008
+#define DISP_OPEN_CREATE 0x00000010
+#define DISP_OPEN_OPEN 0x00000020
+#define DISP_ENQ_COMPLETE 0x00400000
+#define DISP_ENQ_OPEN_REF 0x00800000
+#define DISP_ENQ_CREATE_REF 0x01000000
/* INODE LOCK PARTS */
#define MDS_INODELOCK_LOOKUP 0x000001 /* dentry, mode, owner, group */
ldlm_res_iterator_t iter, void *closure);
int ldlm_replay_locks(struct obd_import *imp);
-void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *,
- ldlm_iterator_t iter, void *data);
+void ldlm_resource_iterate(struct ldlm_namespace *, struct ldlm_res_id *,
+ ldlm_iterator_t iter, void *data);
+
/* ldlm_flock.c */
int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data);
int ldlm_handle_convert(struct ptlrpc_request *req);
int ldlm_handle_cancel(struct ptlrpc_request *req);
int ldlm_del_waiting_lock(struct ldlm_lock *lock);
+int ldlm_refresh_waiting_lock(struct ldlm_lock *lock);
int ldlm_get_ref(void);
void ldlm_put_ref(int force);
/* mdc/mdc_locks.c */
int it_disposition(struct lookup_intent *it, int flag);
void it_set_disposition(struct lookup_intent *it, int flag);
+void it_clear_disposition(struct lookup_intent *it, int flag);
int it_open_error(int phase, struct lookup_intent *it);
void mdc_set_lock_data(__u64 *lockh, void *data);
int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid,
int (*ap_make_ready)(void *data, int cmd);
int (*ap_refresh_count)(void *data, int cmd);
void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa);
- void (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc);
+ int (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc);
};
/* the `oig' is passed down from a caller of obd rw methods. the callee
int aa_requested_nob;
int aa_nio_count;
obd_count aa_page_count;
- struct brw_page *aa_pga;
+ struct brw_page **aa_ppga;
struct client_obd *aa_cli;
struct list_head aa_oaps;
};
+{
+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
-+ (EXT_GENERATION(neh) + 1);
++ (EXT_HDR_GEN(neh) + 1);
+}
+
+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
-+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24)
+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
+
+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data)
+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root)
+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth)
-+
++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__))
+
+#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
+
+{
+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
-+ (EXT_GENERATION(neh) + 1);
++ (EXT_HDR_GEN(neh) + 1);
+}
+
+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
-+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24)
+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
+
+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data)
+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root)
+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth)
-+
++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__))
+
+#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
+
+{
+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
-+ (EXT_GENERATION(neh) + 1);
++ (EXT_HDR_GEN(neh) + 1);
+}
+
+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
-+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24)
+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
+
+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data)
+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root)
+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth)
-+
++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__))
+
+#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
+
+{
+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
-+ (EXT_GENERATION(neh) + 1);
++ (EXT_HDR_GEN(neh) + 1);
+}
+
+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
-+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24)
+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
+
+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data)
+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root)
+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth)
-+
++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__))
+
+#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
+
+{
+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
-+ (EXT_GENERATION(neh) + 1);
++ (EXT_HDR_GEN(neh) + 1);
+}
+
+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
-+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24)
+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
+
+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data)
+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root)
+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth)
-+
++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__))
+
+#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
+
+{
+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
-+ (EXT_GENERATION(neh) + 1);
++ (EXT_HDR_GEN(neh) + 1);
+}
+
+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
-+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24)
+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
+
+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data)
+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root)
+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth)
-+
++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__))
+
+#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
+
+{
+ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
+ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
-+ (EXT_GENERATION(neh) + 1);
++ (EXT_HDR_GEN(neh) + 1);
+}
+
+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
-+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
+#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24)
+#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */
+
+#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data)
+#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root)
+#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth)
-+
++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__))
+
+#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
+
if (rc == -ERESTART)
GOTO(restart, -ERESTART);
*flags |= LDLM_FL_BLOCK_GRANTED;
+ /* this way we force client to wait for the lock
+ * endlessly once the lock is enqueued -bzzz */
+ *flags |= LDLM_FL_NO_TIMEOUT;
+
}
rc = 0;
out:
*
* Called with the namespace lock held.
*/
-static int ldlm_add_waiting_lock(struct ldlm_lock *lock)
+static int __ldlm_add_waiting_lock(struct ldlm_lock *lock)
{
cfs_time_t timeout_rounded;
+ if (!list_empty(&lock->l_pending_chain))
+ return 0;
+
+ lock->l_callback_timeout =cfs_time_add(cfs_time_current(),
+ cfs_time_seconds(obd_timeout)/2);
+
+ timeout_rounded = round_timeout(lock->l_callback_timeout);
+
+ if (cfs_time_before(timeout_rounded, cfs_timer_deadline(&waiting_locks_timer)) ||
+ !cfs_timer_is_armed(&waiting_locks_timer)) {
+ cfs_timer_arm(&waiting_locks_timer, timeout_rounded);
+
+ }
+ list_add_tail(&lock->l_pending_chain, &waiting_locks_list); /* FIFO */
+ return 1;
+}
+
+static int ldlm_add_waiting_lock(struct ldlm_lock *lock)
+{
+ int ret;
+
l_check_ns_lock(lock->l_resource->lr_namespace);
LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK));
return 0;
}
- if (!list_empty(&lock->l_pending_chain)) {
- spin_unlock_bh(&waiting_locks_spinlock);
- LDLM_DEBUG(lock, "not re-adding to wait list");
- return 0;
- }
-
- lock->l_callback_timeout =cfs_time_add(cfs_time_current(),
- cfs_time_seconds(obd_timeout)/2);
-
- timeout_rounded = round_timeout(lock->l_callback_timeout);
-
- if (cfs_time_before(timeout_rounded, cfs_timer_deadline(&waiting_locks_timer)) ||
- !cfs_timer_is_armed(&waiting_locks_timer)) {
- cfs_timer_arm(&waiting_locks_timer, timeout_rounded);
- }
- list_add_tail(&lock->l_pending_chain, &waiting_locks_list); /* FIFO */
+ ret = __ldlm_add_waiting_lock(lock);
spin_unlock_bh(&waiting_locks_spinlock);
- LDLM_DEBUG(lock, "adding to wait list");
- return 1;
+
+ LDLM_DEBUG(lock, "%sadding to wait list",
+ ret == 0 ? "not re-" : "");
+ return ret;
}
/*
*
* Called with namespace lock held.
*/
-int ldlm_del_waiting_lock(struct ldlm_lock *lock)
+int __ldlm_del_waiting_lock(struct ldlm_lock *lock)
{
struct list_head *list_next;
- l_check_ns_lock(lock->l_resource->lr_namespace);
-
- if (lock->l_export == NULL) {
- /* We don't have a "waiting locks list" on clients. */
- LDLM_DEBUG(lock, "client lock: no-op");
- return 0;
- }
-
- spin_lock_bh(&waiting_locks_spinlock);
-
- if (list_empty(&lock->l_pending_chain)) {
- spin_unlock_bh(&waiting_locks_spinlock);
- LDLM_DEBUG(lock, "wasn't waiting");
+ if (list_empty(&lock->l_pending_chain))
return 0;
- }
list_next = lock->l_pending_chain.next;
if (lock->l_pending_chain.prev == &waiting_locks_list) {
}
list_del_init(&lock->l_pending_chain);
+ return 1;
+}
+
+int ldlm_del_waiting_lock(struct ldlm_lock *lock)
+{
+ int ret;
+
+ l_check_ns_lock(lock->l_resource->lr_namespace);
+
+ if (lock->l_export == NULL) {
+ /* We don't have a "waiting locks list" on clients. */
+ LDLM_DEBUG(lock, "client lock: no-op");
+ return 0;
+ }
+
+ spin_lock_bh(&waiting_locks_spinlock);
+ ret = __ldlm_del_waiting_lock(lock);
spin_unlock_bh(&waiting_locks_spinlock);
- LDLM_DEBUG(lock, "removed");
+
+ LDLM_DEBUG(lock, "%s", ret == 0 ? "wasn't waiting" : "removed");
+ return ret;
+}
+
+/*
+ * Prolong the lock
+ *
+ * Called with namespace lock held.
+ */
+int ldlm_refresh_waiting_lock(struct ldlm_lock *lock)
+{
+ l_check_ns_lock(lock->l_resource->lr_namespace);
+
+ if (lock->l_export == NULL) {
+ /* We don't have a "waiting locks list" on clients. */
+ LDLM_DEBUG(lock, "client lock: no-op");
+ return 0;
+ }
+
+ spin_lock_bh(&waiting_locks_spinlock);
+
+ if (list_empty(&lock->l_pending_chain)) {
+ spin_unlock_bh(&waiting_locks_spinlock);
+ LDLM_DEBUG(lock, "wasn't waiting");
+ return 0;
+ }
+
+ __ldlm_del_waiting_lock(lock);
+ __ldlm_add_waiting_lock(lock);
+ spin_unlock_bh(&waiting_locks_spinlock);
+
+ LDLM_DEBUG(lock, "refreshed");
return 1;
}
RETURN(0);
}
+int ldlm_refresh_waiting_lock(struct ldlm_lock *lock)
+{
+ RETURN(0);
+}
#endif /* __KERNEL__ */
static void ldlm_failed_ast(struct ldlm_lock *lock, int rc,
EXPORT_SYMBOL(ldlm_resource_foreach);
EXPORT_SYMBOL(ldlm_namespace_foreach);
EXPORT_SYMBOL(ldlm_namespace_foreach_res);
-EXPORT_SYMBOL(ldlm_change_cbdata);
+EXPORT_SYMBOL(ldlm_resource_iterate);
/* ldlm_lockd.c */
EXPORT_SYMBOL(ldlm_server_blocking_ast);
EXPORT_SYMBOL(ldlm_del_waiting_lock);
EXPORT_SYMBOL(ldlm_get_ref);
EXPORT_SYMBOL(ldlm_put_ref);
+EXPORT_SYMBOL(ldlm_refresh_waiting_lock);
/* ldlm_resource.c */
EXPORT_SYMBOL(ldlm_namespace_new);
lwd.lwd_lock = lock;
- if (unlikely(flags & LDLM_FL_NO_TIMEOUT)) {
+ if (lock->l_flags & LDLM_FL_NO_TIMEOUT) {
LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
lwi = LWI_INTR(interrupted_completion_wait, &lwd);
} else {
lock->l_remote_handle = reply->lock_handle;
*flags = reply->lock_flags;
lock->l_flags |= reply->lock_flags & LDLM_INHERIT_FLAGS;
+ /* move NO_TIMEOUT flag to the lock to force ldlm_lock_match()
+ * to wait with no timeout as well */
+ lock->l_flags |= reply->lock_flags & LDLM_FL_NO_TIMEOUT;
l_unlock(&ns->ns_lock);
CDEBUG(D_INFO, "local: %p, remote cookie: "LPX64", flags: 0x%x\n",
}
/* non-blocking function to manipulate a lock whose cb_data is being put away.*/
-void ldlm_change_cbdata(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
- ldlm_iterator_t iter, void *data)
+void ldlm_resource_iterate(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
+ ldlm_iterator_t iter, void *data)
{
struct ldlm_resource *res;
ENTRY;
it->it_magic = 0;
it->it_op_release = 0;
/* We are still holding extra reference on a request, need to free it */
- if (it_disposition(it, DISP_ENQ_COMPLETE))
+ if (it_disposition(it, DISP_ENQ_OPEN_REF)) /* open req for llfile_open*/
+ ptlrpc_req_finished(it->d.lustre.it_data);
+ if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
+ ptlrpc_req_finished(it->d.lustre.it_data);
+ if (it_disposition(it, DISP_ENQ_COMPLETE)) /* saved req from revalidate
+ * to lookup */
ptlrpc_req_finished(it->d.lustre.it_data);
it->d.lustre.it_disposition = 0;
EXIT;
}
-static int revalidate_it_finish(struct ptlrpc_request *request, int offset,
- struct lookup_intent *it,
- struct dentry *de)
+int revalidate_it_finish(struct ptlrpc_request *request, int offset,
+ struct lookup_intent *it, struct dentry *de)
{
int rc = 0;
ENTRY;
out:
req = it->d.lustre.it_data;
ptlrpc_req_finished(req);
+ if (req)
+ it_clear_disposition(it, DISP_ENQ_OPEN_REF);
if (rc == 0)
ll_open_complete(inode);
return rc;
CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", inode->i_ino);
+ if (!lli->lli_smd) {
+ CDEBUG(D_DLMTRACE, "No objects for inode %lu\n", inode->i_ino);
+ RETURN(0);
+ }
+
ast_flags |= LDLM_FL_HAS_INTENT;
/* NOTE: this looks like DLM lock request, but it may not be one. Due
out:
/* this one is in place of ll_file_open */
ptlrpc_req_finished(it->d.lustre.it_data);
+ it_clear_disposition(it, DISP_ENQ_OPEN_REF);
RETURN(rc);
}
struct lustre_handle lockh;
struct ldlm_res_id res_id = { .name = {0} };
struct obd_device *obddev;
- ldlm_policy_data_t policy = { .l_inodebits = {MDS_INODELOCK_UPDATE}};
+ ldlm_policy_data_t policy = { .l_inodebits = {
+ MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP}};
int flags;
ENTRY;
RETURN(0);
}
+static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
+ if (rc == -ENOENT) { /* Already unlinked. Just update nlink
+ * and return success */
+ inode->i_nlink = 0;
+ /* This path cannot be hit for regular files unless in
+ * case of obscure races, so no need to to validate
+ * size. */
+ if (!S_ISREG(inode->i_mode) &&
+ !S_ISDIR(inode->i_mode))
+ return 0;
+ }
+
+ if (rc) {
+ CERROR("failure %d inode %lu\n", rc, inode->i_ino);
+ return -abs(rc);
+
+ }
+
+ return 0;
+}
+
int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
{
struct inode *inode = dentry->d_inode;
- struct ll_inode_info *lli;
- struct lov_stripe_md *lsm;
+ struct ptlrpc_request *req = NULL;
+ struct obd_export *exp;
int rc;
ENTRY;
CERROR("REPORT THIS LINE TO PETER\n");
RETURN(0);
}
- lli = ll_i2info(inode);
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0))
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_REVALIDATE);
#endif
- if (!ll_have_md_lock(dentry)) {
- struct ptlrpc_request *req = NULL;
+ exp = ll_i2mdcexp(inode);
+
+ if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
+ struct lookup_intent oit = { .it_op = IT_GETATTR };
+ struct mdc_op_data op_data;
+
+ /* Call getattr by fid, so do not provide name at all. */
+ ll_prepare_mdc_op_data(&op_data, dentry->d_parent->d_inode,
+ dentry->d_inode, NULL, 0, 0);
+ rc = mdc_intent_lock(exp, &op_data, NULL, 0,
+ /* we are not interested in name
+ based lookup */
+ &oit, 0, &req,
+ ll_mdc_blocking_ast, 0);
+ if (rc < 0) {
+ rc = ll_inode_revalidate_fini(inode, rc);
+ GOTO (out, rc);
+ }
+
+ rc = revalidate_it_finish(req, 1, &oit, dentry);
+ if (rc != 0) {
+ ll_intent_release(&oit);
+ GOTO(out, rc);
+ }
+
+ ll_lookup_finish_locks(&oit, dentry);
+ } else if (!ll_have_md_lock(dentry)) {
struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
struct ll_fid fid;
obd_valid valid = OBD_MD_FLGETATTR;
}
ll_inode2fid(&fid, inode);
rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
- if (rc == -ENOENT) { /* Already unlinked. Just update nlink
- * and return success */
- inode->i_nlink = 0;
- /* This path cannot be hit for regular files unless in
- * case of obscure races, so * no need to to validate
- * size. */
- if (!S_ISREG(inode->i_mode) &&
- !S_ISDIR(inode->i_mode) &&
- !S_ISDIR(inode->i_mode))
- RETURN(0);
- }
-
if (rc) {
- CERROR("failure %d inode %lu\n", rc, inode->i_ino);
- RETURN(-abs(rc));
- }
- rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, 0, NULL);
- if (rc) {
- ptlrpc_req_finished(req);
+ rc = ll_inode_revalidate_fini(inode, rc);
RETURN(rc);
}
- ptlrpc_req_finished(req);
+
+ rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, 0, NULL);
+ if (rc)
+ GOTO(out, rc);
}
- lsm = lli->lli_smd;
- if (lsm == NULL) /* object not yet allocated, don't validate size */
- RETURN(0);
+ /* if object not yet allocated, don't validate size */
+ if (ll_i2info(inode)->lli_smd == NULL)
+ GOTO(out, rc = 0);
/* ll_glimpse_size will prefer locally cached writes if they extend
* the file */
rc = ll_glimpse_size(inode, 0);
+
+out:
+ ptlrpc_req_finished(req);
RETURN(rc);
}
int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
int ll_writepage(struct page *page);
void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa);
-void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
+int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction);
extern struct cache_definition ll_cache_definition;
void ll_removepage(struct page *page);
void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft);
void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name);
-
+int revalidate_it_finish(struct ptlrpc_request *request, int offset,
+ struct lookup_intent *it, struct dentry *de);
/* llite/llite_lib.c */
extern struct super_operations lustre_super_operations;
data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
if (sbi->ll_flags & LL_SBI_USER_XATTR)
data->ocd_connect_flags |= OBD_CONNECT_XATTR;
- data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_JOIN;
+ data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_JOIN |
+ OBD_CONNECT_ATTRFID;
if (sbi->ll_flags & LL_SBI_FLOCK) {
sbi->ll_fop = &ll_file_operations_flock;
#define lustre_throw_orphan_dentries(sb)
#endif
-static void prune_deathrow(struct ll_sb_info *sbi, int try)
+static void prune_dir_dentries(struct inode *inode)
{
- LIST_HEAD(throw_away);
- int locked = 0;
- ENTRY;
-
- if (try) {
- locked = spin_trylock(&sbi->ll_deathrow_lock);
- } else {
- spin_lock(&sbi->ll_deathrow_lock);
- locked = 1;
+ struct dentry *dentry, *prev = NULL;
+
+ /* due to lustre specific logic, a directory
+ * can have few dentries - a bug from VFS POV */
+restart:
+ spin_lock(&dcache_lock);
+ if (!list_empty(&inode->i_dentry)) {
+ dentry = list_entry(inode->i_dentry.prev,
+ struct dentry, d_alias);
+ /* in order to prevent infinite loops we
+ * break if previous dentry is busy */
+ if (dentry != prev) {
+ prev = dentry;
+ dget_locked(dentry);
+ spin_unlock(&dcache_lock);
+
+ /* try to kill all child dentries */
+ lock_dentry(dentry);
+ shrink_dcache_parent(dentry);
+ unlock_dentry(dentry);
+ dput(dentry);
+
+ /* now try to get rid of current dentry */
+ d_prune_aliases(inode);
+ goto restart;
+ }
}
+ spin_unlock(&dcache_lock);
+}
- if (!locked) {
- EXIT;
- return;
- }
+static void prune_deathrow_one(struct ll_inode_info *lli)
+{
+ struct inode *inode = ll_info2i(lli);
- list_splice_init(&sbi->ll_deathrow, &throw_away);
- spin_unlock(&sbi->ll_deathrow_lock);
+ /* first, try to drop any dentries - they hold a ref on the inode */
+ if (S_ISDIR(inode->i_mode))
+ prune_dir_dentries(inode);
+ else
+ d_prune_aliases(inode);
- while (!list_empty(&throw_away)) {
- struct ll_inode_info *lli;
- struct inode *inode;
- lli = list_entry(throw_away.next, struct ll_inode_info,
- lli_dead_list);
- list_del_init(&lli->lli_dead_list);
+ /* if somebody still uses it, leave it */
+ LASSERT(atomic_read(&inode->i_count) > 0);
+ if (atomic_read(&inode->i_count) > 1)
+ goto out;
- inode = ll_info2i(lli);
- d_prune_aliases(inode);
+ CDEBUG(D_INODE, "inode %lu/%u(%d) looks a good candidate for prune\n",
+ inode->i_ino,inode->i_generation, atomic_read(&inode->i_count));
- CDEBUG(D_INODE, "prune duplicate inode %p inum %lu count %u\n",
- inode, inode->i_ino, atomic_read(&inode->i_count));
- iput(inode);
- }
- EXIT;
+ /* seems nobody uses it anymore */
+ inode->i_nlink = 0;
+
+out:
+ iput(inode);
+ return;
+}
+
+static void prune_deathrow(struct ll_sb_info *sbi, int try)
+{
+ struct ll_inode_info *lli;
+ int empty;
+
+ do {
+ if (need_resched())
+ break;
+
+ if (try) {
+ if (!spin_trylock(&sbi->ll_deathrow_lock))
+ break;
+ } else {
+ spin_lock(&sbi->ll_deathrow_lock);
+ }
+
+ empty = 1;
+ lli = NULL;
+ if (!list_empty(&sbi->ll_deathrow)) {
+ lli = list_entry(sbi->ll_deathrow.next,
+ struct ll_inode_info,
+ lli_dead_list);
+ list_del_init(&lli->lli_dead_list);
+ if (!list_empty(&sbi->ll_deathrow))
+ empty = 0;
+ }
+ spin_unlock(&sbi->ll_deathrow_lock);
+
+ if (lli)
+ prune_deathrow_one(lli);
+
+ } while (empty == 0);
}
void client_common_put_super(struct super_block *sb)
int count;
spin_unlock(&lli->lli_lock);
+
+ if (!lsm)
+ return;
count = obd_join_lru(sbi->ll_osc_exp, lsm, 0);
VMA_DEBUG(vma, "split %d unused locks from lru\n", count);
} else {
int count;
spin_unlock(&lli->lli_lock);
+
+ if (!lsm)
+ return;
count = obd_join_lru(sbi->ll_osc_exp, lsm, 1);
VMA_DEBUG(vma, "join %d unused locks to lru\n", count);
} else {
if (inode->i_state & (I_FREEING | I_CLEAR))
return 0;
+ if (inode->i_nlink == 0)
+ return 0;
- atomic_inc(&inode->i_count);
- inode->i_nlink = 0;
- inode->i_state |= I_FREEING;
- LASSERT(list_empty(&lli->lli_dead_list));
/* add "duplicate" inode into deathrow for destroy */
spin_lock(&sbi->ll_deathrow_lock);
- list_add(&lli->lli_dead_list, &sbi->ll_deathrow);
+ if (list_empty(&lli->lli_dead_list)) {
+ atomic_inc(&inode->i_count);
+ list_add(&lli->lli_dead_list, &sbi->ll_deathrow);
+ }
spin_unlock(&sbi->ll_deathrow_lock);
- /* remove inode from dirty/io lists */
- list_del_init(&inode->i_list);
-
return 0;
}
LASSERT(it && it->d.lustre.it_disposition);
+ LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF));
request = it->d.lustre.it_data;
+ it_clear_disposition(it, DISP_ENQ_CREATE_REF);
rc = ll_prep_inode(sbi->ll_osc_exp, &inode, request, 1, dir->i_sb);
if (rc)
GOTO(out, inode = ERR_PTR(rc));
}
/* called for each page in a completed rpc.*/
-void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
+int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
{
struct ll_async_page *llap;
struct page *page;
+ int ret = 0;
ENTRY;
llap = LLAP_FROM_COOKIE(data);
llap->llap_defer_uptodate = 0;
} else {
ll_redirty_page(page);
+ ret = 1;
}
SetPageError(page);
}
end_page_writeback(page);
}
page_cache_release(page);
- EXIT;
+
+ RETURN(ret);
}
/* the kernel calls us here when a page is unhashed from the page cache.
* we just have path resolution to the target inode, so we have great
* chance that cached ACL is uptodate.
*/
+#ifdef CONFIG_FS_POSIX_ACL
if (xattr_type == XATTR_ACL_ACCESS_T) {
struct ll_inode_info *lli = ll_i2info(inode);
struct posix_acl *acl;
posix_acl_release(acl);
RETURN(rc);
}
+#endif
do_getxattr:
ll_inode2fid(&fid, inode);
oa->o_stripe_idx = lap->lap_stripe;
}
-static void lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
+static int lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
{
struct lov_async_page *lap = LAP_FROM_COOKIE(data);
/* in a raid1 regime this would down a count of many ios
* in flight, onl calling the caller_ops completion when all
* the raid1 ios are complete */
- lap->lap_caller_ops->ap_completion(lap->lap_caller_data, cmd, oa, rc);
+ rc = lap->lap_caller_ops->ap_completion(lap->lap_caller_data,cmd,oa,rc);
+ return rc;
}
static struct obd_async_page_ops lov_async_page_ops = {
int rc = 0, i;
ENTRY;
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
lov = &exp->exp_obd->u.lov;
if (lsm == NULL) {
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
ASSERT_LSM_MAGIC(lsm);
- if (!exp || !exp->exp_obd)
- RETURN(-ENODEV);
-
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
struct lov_stripe_md submd;
int err;
if (lov->tgts[loi->loi_ost_idx].ltd_exp ==
data->lock->l_conn_export &&
loi->loi_id == res_id->name[0] &&
- loi->loi_gr == res_id->name[2]) {
+ loi->loi_gr == res_id->name[1]) {
*stripe = i;
GOTO(out, rc = 0);
}
req->rq_buflen = sizeof(*req->rq_md) +
sizeof(struct lov_oinfo);
OBD_ALLOC(req->rq_md, req->rq_buflen);
- if (req->rq_md == NULL)
+ if (req->rq_md == NULL) {
+ OBD_FREE(req, sizeof(*req));
GOTO(out_set, rc = -ENOMEM);
+ }
req->rq_extent.start = start;
req->rq_extent.end = end;
req->rq_buflen = sizeof(*req->rq_md);
OBD_ALLOC(req->rq_md, req->rq_buflen);
- if (req->rq_md == NULL)
+ if (req->rq_md == NULL) {
+ OBD_FREE(req, sizeof(*req));
GOTO(out_set, rc = -ENOMEM);
+ }
req->rq_extent.start = start;
req->rq_extent.end = end;
req->rq_buflen = sizeof(*req->rq_md);
OBD_ALLOC(req->rq_md, req->rq_buflen);
- if (req->rq_md == NULL)
+ if (req->rq_md == NULL) {
+ OBD_FREE(req, sizeof(*req));
GOTO(out_set, rc = -ENOMEM);
+ }
req->rq_idx = loi->loi_ost_idx;
req->rq_stripe = i;
GOTO(out, rc = -ENOMEM);
req->rq_oa = obdo_alloc();
- if (req->rq_oa == NULL)
+ if (req->rq_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
GOTO(out, rc = -ENOMEM);
+ }
if (src_oa)
memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
req->rq_buflen = sizeof(*req->rq_md);
OBD_ALLOC(req->rq_md, req->rq_buflen);
- if (req->rq_md == NULL)
+ if (req->rq_md == NULL) {
+ obdo_free(req->rq_oa);
+ OBD_FREE(req, sizeof(*req));
GOTO(out, rc = -ENOMEM);
+ }
req->rq_idx = loi->loi_ost_idx;
req->rq_stripe = i;
req->rq_idx = loi->loi_ost_idx;
req->rq_oa = obdo_alloc();
- if (req->rq_oa == NULL)
+ if (req->rq_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
GOTO(out_set, rc = -ENOMEM);
+ }
memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
req->rq_oa->o_id = loi->loi_id;
req->rq_idx = loi->loi_ost_idx;
req->rq_oa = obdo_alloc();
- if (req->rq_oa == NULL)
+ if (req->rq_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
GOTO(out_set, rc = -ENOMEM);
+ }
memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
req->rq_oa->o_id = loi->loi_id;
req->rq_idx = loi->loi_ost_idx;
req->rq_oa = obdo_alloc();
- if (req->rq_oa == NULL)
+ if (req->rq_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
GOTO(out_set, rc = -ENOMEM);
+ }
memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
req->rq_oa->o_id = loi->loi_id;
req->rq_oa->o_stripe_idx = i;
req->rq_idx = loi->loi_ost_idx;
req->rq_oa = obdo_alloc();
- if (req->rq_oa == NULL)
+ if (req->rq_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
GOTO(out_set, rc = -ENOMEM);
+ }
memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
req->rq_oa->o_id = loi->loi_id;
req->rq_oa->o_stripe_idx = i;
req->rq_idx = loi->loi_ost_idx;
req->rq_oa = obdo_alloc();
- if (req->rq_oa == NULL)
+ if (req->rq_oa == NULL) {
+ OBD_FREE(req, sizeof(*req));
GOTO(out_set, rc = -ENOMEM);
+ }
memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
req->rq_oa->o_id = loi->loi_id;
req->rq_oa->o_stripe_idx = i;
return EXT_CONTINUE;
}
- tgen = EXT_GENERATION(EXT_ROOT_HDR(tree));
+ tgen = EXT_GENERATION(tree);
count = ext3_ext_calc_credits_for_insert(tree, path);
ext3_up_truncate_sem(inode);
}
ext3_down_truncate_sem(inode);
- if (tgen != EXT_GENERATION(EXT_ROOT_HDR(tree))) {
+ if (tgen != EXT_GENERATION(tree)) {
/* the tree has changed. so path can be invalid at moment */
lock_24kernel();
journal_stop(handle);
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * This file is part of Lustre, http://www.lustre.org
+ *
+ * MDC internal definitions.
+ */
+
#include <lustre_mds.h>
void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
__u64 valid, struct ll_fid *fid, int ea_size);
void mdc_pack_rep_body(struct ptlrpc_request *);
void mdc_readdir_pack(struct ptlrpc_request *req, int pos, __u64 offset,
- __u32 size, struct ll_fid *mdc_fid);
+ __u32 size, struct ll_fid *mdc_fid);
void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
int flags, struct mdc_op_data *data);
void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *data,
struct iattr *iattr, void *ea, int ealen,
- void *ea2, int ea2len);
+ void *ea2, int ea2len);
void mdc_create_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *op_data, const void *data, int datalen,
- __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective,
- __u64 rdev);
+ __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective,
+ __u64 rdev);
void mdc_open_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *op_data, __u32 mode, __u64 rdev,
__u32 flags, const void *data, int datalen);
-void mdc_join_pack(struct ptlrpc_request *req, int offset,
+void mdc_join_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *op_data, __u64 head_size);
void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *data);
struct mdc_op_data *data,
const char *old, int oldlen, const char *new, int newlen);
void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa,
- int valid, struct obd_client_handle *och);
+ int valid, struct obd_client_handle *och);
struct mdc_open_data {
struct obd_client_handle *mod_och;
lck->rpcl_it = NULL;
}
-static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
+static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
struct lookup_intent *it)
{
ENTRY;
- down(&lck->rpcl_sem);
- if (it) {
+ if (1 || !it || (it->it_op != IT_GETATTR && it->it_op != IT_LOOKUP)) {
+ down(&lck->rpcl_sem);
+ LASSERT(lck->rpcl_it == NULL);
lck->rpcl_it = it;
}
}
-static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
+static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
struct lookup_intent *it)
{
- EXIT;
- if (it == NULL) {
- LASSERT(it == lck->rpcl_it);
- up(&lck->rpcl_sem);
- return;
- }
- if (it) {
+ if (1 || !it || (it->it_op != IT_GETATTR && it->it_op != IT_LOOKUP)) {
LASSERT(it == lck->rpcl_it);
lck->rpcl_it = NULL;
up(&lck->rpcl_sem);
}
+ EXIT;
}
rec->cr_fsuid = current->fsuid;
rec->cr_fsgid = current->fsgid;
rec->cr_cap = current->cap_effective;
- if (op_data != NULL)
- rec->cr_fid = op_data->fid1;
+ rec->cr_fid = op_data->fid1;
memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
rec->cr_mode = mode;
rec->cr_flags = mds_pack_open_flags(flags);
b->suppgid = data->suppgids[0];
b->fid1 = data->fid1;
+ b->fid2 = data->fid2;
if (data->name) {
char *tmp;
tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
it->d.lustre.it_disposition &= ~flag;
}
+EXPORT_SYMBOL(it_clear_disposition);
+
static int it_to_lock_mode(struct lookup_intent *it)
{
/* CREAT needs to be tested before open (both could be set) */
res_id.name[0] = fid->id;
res_id.name[1] = fid->generation;
- ldlm_change_cbdata(class_exp2obd(exp)->obd_namespace, &res_id, it,
- data);
+ ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id,
+ it, data);
EXIT;
return 0;
ldlm_policy_data_t policy;
int mode = LCK_CR;
+ /* As not all attributes are kept under update lock, e.g.
+ owner/group/acls are under lookup lock, we need both
+ ibits for GETATTR. */
policy.l_inodebits.bits = (it->it_op == IT_GETATTR) ?
- MDS_INODELOCK_UPDATE : MDS_INODELOCK_LOOKUP;
+ MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP :
+ MDS_INODELOCK_LOOKUP;
+
rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
LDLM_FL_BLOCK_GRANTED, &res_id,
LDLM_IBITS, &policy, LCK_CR, &lockh);
sizeof(lockh));
it->d.lustre.it_lock_mode = mode;
}
- RETURN(rc);
+
+ /* Only return failure if it was not GETATTR by cfid
+ (from inode_revalidate) */
+ if (rc || op_data->namelen != 0)
+ RETURN(rc);
}
/* lookup_it may be called only after revalidate_it has run, because
/* If we were revalidating a fid/name pair, mark the intent in
* case we fail and get called again from lookup */
- if (op_data->fid2.id) {
+ if (op_data->fid2.id && (it->it_op != IT_GETATTR)) {
it_set_disposition(it, DISP_ENQ_COMPLETE);
/* Also: did we find the same inode? */
if (memcmp(&op_data->fid2, &mds_body->fid1, sizeof(op_data->fid2)))
/* keep requests around for the multiple phases of the call
* this shows the DISP_XX must guarantee we make it into the call
*/
- if (it_disposition(it, DISP_OPEN_CREATE) &&
- !it_open_error(DISP_OPEN_CREATE, it))
+ if (!it_disposition(it, DISP_ENQ_CREATE_REF) &&
+ it_disposition(it, DISP_OPEN_CREATE) &&
+ !it_open_error(DISP_OPEN_CREATE, it)) {
+ it_set_disposition(it, DISP_ENQ_CREATE_REF);
ptlrpc_request_addref(request); /* balanced in ll_create_node */
- if (it_disposition(it, DISP_OPEN_OPEN) &&
- !it_open_error(DISP_OPEN_OPEN, it))
+ }
+ if (!it_disposition(it, DISP_ENQ_OPEN_REF) &&
+ it_disposition(it, DISP_OPEN_OPEN) &&
+ !it_open_error(DISP_OPEN_OPEN, it)) {
+ it_set_disposition(it, DISP_ENQ_OPEN_REF);
ptlrpc_request_addref(request); /* balanced in ll_file_open */
+ }
if (it->it_op & IT_CREAT) {
/* XXX this belongs in ll_create_it */
{
struct obd_device *obd = class_exp2obd(exp);
struct ptlrpc_request *req = *request;
- int rc, size[] = { sizeof(struct mds_rec_unlink), data->namelen + 1};
+ int rc, size[] = { sizeof(struct mds_rec_unlink), data->namelen + 1, 0};
ENTRY;
LASSERT(req == NULL);
req->rq_replen = lustre_msg_size(bufcount, size);
- mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
rc = ptlrpc_queue_wait(req);
- mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL);
if (rc != 0)
RETURN (rc);
req->rq_replen = lustre_msg_size(1, &size);
- mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
rc = ptlrpc_queue_wait(req);
- mdc_put_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
if (rc)
GOTO(out, rc);
RETURN(0);
}
-static int mds_getattr_name(int offset, struct ptlrpc_request *req,
+static int mds_getattr_lock(int offset, struct ptlrpc_request *req,
int child_part, struct lustre_handle *child_lockh)
{
struct obd_device *obd = req->rq_export->exp_obd;
RETURN(-EFAULT);
}
namesize = lustre_msg_buflen(req->rq_reqmsg, offset + 1);
+ /* namesize less than 2 means we have empty name, probably came from
+ revalidate by cfid, so no point in having name to be set */
+ if (namesize <= 1)
+ name = NULL;
rc = mds_init_ucred(&uc, req, offset);
if (rc)
}
if (resent_req == 0) {
- if (name) {
- rc = mds_get_parent_child_locked(obd, &obd->u.mds, &body->fid1,
- &parent_lockh, &dparent,
- LCK_CR,
- MDS_INODELOCK_UPDATE,
- name, namesize,
- child_lockh, &dchild, LCK_CR,
- child_part);
- } else {
+ if (name) {
+ rc = mds_get_parent_child_locked(obd, &obd->u.mds,
+ &body->fid1,
+ &parent_lockh,
+ &dparent, LCK_CR,
+ MDS_INODELOCK_UPDATE,
+ name, namesize,
+ child_lockh, &dchild,
+ LCK_CR, child_part);
+ } else {
/* For revalidate by fid we always take UPDATE lock */
dchild = mds_fid2locked_dentry(obd, &body->fid2, NULL,
LCK_CR, child_lockh,
- NULL, 0,
- MDS_INODELOCK_UPDATE);
+ NULL, 0, child_part);
LASSERT(dchild);
if (IS_ERR(dchild))
rc = PTR_ERR(dchild);
- }
- if (rc)
- GOTO(cleanup, rc);
+ }
+ if (rc)
+ GOTO(cleanup, rc);
} else {
struct ldlm_lock *granted_lock;
struct ll_fid child_fid;
if (resent_req == 0) {
if (rc && dchild->d_inode)
ldlm_lock_decref(child_lockh, LCK_CR);
- ldlm_lock_decref(&parent_lockh, LCK_CR);
- l_dput(dparent);
+ if (name) {
+ ldlm_lock_decref(&parent_lockh, LCK_CR);
+ l_dput(dparent);
+ }
}
l_dput(dchild);
case 1:
OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NAME_NET, 0);
/* If this request gets a reconstructed reply, we won't be
- * acquiring any new locks in mds_getattr_name, so we don't
+ * acquiring any new locks in mds_getattr_lock, so we don't
* want to cancel.
*/
- rc = mds_getattr_name(MDS_REQ_REC_OFF, req,
+ rc = mds_getattr_lock(MDS_REQ_REC_OFF, req,
MDS_INODELOCK_UPDATE, &lockh);
/* this non-intent call (from an ioctl) is special */
req->rq_status = rc;
if (len == sizeof("user_xattr") - 1 &&
memcmp(options, "user_xattr", len) == 0) {
mds->mds_fl_user_xattr = 1;
+ } else if (len == sizeof("nouser_xattr") - 1 &&
+ memcmp(options, "nouser_xattr", len) == 0) {
+ mds->mds_fl_user_xattr = 0;
} else if (len == sizeof("acl") - 1 &&
memcmp(options, "acl", len) == 0) {
#ifdef CONFIG_FS_POSIX_ACL
CWARN("ignoring unsupported acl mount option\n");
memmove(options, p, strlen(p) + 1);
#endif
+ } else if (len == sizeof("noacl") - 1 &&
+ memcmp(options, "noacl", len) == 0) {
+#ifdef CONFIG_FS_POSIX_ACL
+ mds->mds_fl_acl = 0;
+#else
+ memmove(options, p, strlen(p) + 1);
+#endif
}
options = ++p;
getattr_part = MDS_INODELOCK_LOOKUP |
MDS_INODELOCK_UPDATE;
- rep->lock_policy_res2 = mds_getattr_name(offset, req,
+ rep->lock_policy_res2 = mds_getattr_lock(offset, req,
getattr_part, &lockh);
/* FIXME: LDLM can set req->rq_status. MDS sets
policy_res{1,2} with disposition and status.
static int mds_create_objects(struct ptlrpc_request *req, int offset,
struct mds_update_record *rec,
struct mds_obd *mds, struct obd_device *obd,
- struct dentry *dchild, void **handle,
+ struct dentry *dchild, void **handle,
obd_id **ids)
{
struct inode *inode = dchild->d_inode;
UNLOCK_INODE_MUTEX(dchild->d_inode);
RETURN(-EEXIST);
}
- if (rec->ur_flags & MDS_OPEN_JOIN_FILE) {
+ if (rec->ur_flags & MDS_OPEN_JOIN_FILE) {
UNLOCK_INODE_MUTEX(dchild->d_inode);
- rc = mds_join_file(rec, req, dchild, lockh);
+ rc = mds_join_file(rec, req, dchild, lockh);
if (rc)
RETURN(rc);
LOCK_INODE_MUTEX(dchild->d_inode);
- }
- if (!(body->valid & OBD_MD_FLEASIZE) &&
+ }
+ if (!(body->valid & OBD_MD_FLEASIZE) &&
!(body->valid & OBD_MD_FLMODEASIZE)) {
/* no EA: create objects */
rc = mds_create_objects(req, 2, rec, mds, obd,
if (iattr.ia_valid != 0) {
handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
- if (IS_ERR(handle))
- GOTO(cleanup, rc = PTR_ERR(handle));
+ if (IS_ERR(handle)) {
+ rc = PTR_ERR(handle);
+ handle = NULL;
+ GOTO(cleanup, rc);
+ }
rc = fsfilt_setattr(obd, mfd->mfd_dentry, handle, &iattr, 0);
if (rc)
CERROR("error in setattr(%s): rc %d\n", fidname, rc);
#include <lustre_fsfilt.h>
#include <lustre_ucache.h>
-#include "mds_internal.h"
+#include "mds_internal.h"
+
+#ifndef XATTR_NAME_ACL_ACCESS
+#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access"
+#endif
static int mds_getxattr_pack_msg(struct ptlrpc_request *req,
struct dentry *de,
"initial_transno",
"inode_bit_locks",
"join_file",
- "",
+ "getattr_by_fid",
"no_oh_for_devices",
NULL
};
memcpy(oa, &eap->eap_eas->eas_oa, sizeof(*oa));
}
-static void ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
+static int ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
{
struct echo_async_page *eap = EAP_FROM_COOKIE(data);
struct echo_async_state *eas;
list_add(&eap->eap_item, &eas->eas_avail);
cfs_waitq_signal(&eas->eas_waitq);
spin_unlock_irqrestore(&eas->eas_lock, flags);
+ return 0;
}
static struct obd_async_page_ops ec_async_page_ops = {
int rc;
ENTRY;
- dentry = __filter_oa2dentry(exp->exp_obd, oa,
- __FUNCTION__, 1);
+ dentry = __filter_oa2dentry(exp->exp_obd, oa, __FUNCTION__, 1);
if (IS_ERR(dentry))
RETURN(PTR_ERR(dentry));
llog_cancel(llog_get_context(obd, fcc->lgc_subsys + 1),
NULL, 1, fcc, 0);
}
+ fcc = NULL;
GOTO(cleanup, rc = -ENOENT);
}
fsfilt_add_journal_cb(obd, 0,
oti ? oti->oti_handle : handle,
filter_cancel_cookies_cb, fcc);
+ fcc = NULL;
}
rc = filter_finish_transno(exp, oti, rc);
rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
filter_parent_unlock(dparent);
case 2:
f_dput(dchild);
+ if (fcc != NULL)
+ OBD_FREE(fcc, sizeof(*fcc));
case 1:
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
break;
struct osc_async_page {
int oap_magic;
- int oap_cmd;
+ unsigned short oap_cmd;
+ unsigned short oap_interrupted:1;
+
struct list_head oap_pending_item;
struct list_head oap_urgent_item;
struct list_head oap_rpc_item;
obd_off oap_obj_off;
- obd_off oap_page_off;
- int oap_count;
- obd_flag oap_brw_flags;
+ unsigned oap_page_off;
enum async_flags oap_async_flags;
- unsigned long oap_interrupted:1;
+ struct brw_page oap_brw_page;
+
struct oig_callback_context oap_occ;
- cfs_page_t *oap_page;
struct obd_io_group *oap_oig;
struct ptlrpc_request *oap_request;
struct client_obd *oap_cli;
void *oap_caller_data;
};
+#define oap_page oap_brw_page.pg
+#define oap_count oap_brw_page.count
+#define oap_brw_flags oap_brw_page.flag
+
#define OAP_FROM_COOKIE(c) \
(LASSERT(((struct osc_async_page *)(c))->oap_magic == OAP_MAGIC), \
(struct osc_async_page *)(c))
static inline int lproc_osc_attach_seqstat(struct obd_device *dev) {return 0;}
#endif
+#ifndef min_t
+#define min_t(type,x,y) \
+ ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#endif
+
#endif /* OSC_INTERNAL_H */
* via the LOV, and it _knows_ it's reading inside the file, it's just that
* this stripe never got written at or beyond this stripe offset yet. */
static void handle_short_read(int nob_read, obd_count page_count,
- struct brw_page *pga)
+ struct brw_page **pga)
{
char *ptr;
+ int i = 0;
/* skip bytes read OK */
while (nob_read > 0) {
LASSERT (page_count > 0);
- if (pga->count > nob_read) {
+ if (pga[i]->count > nob_read) {
/* EOF inside this page */
- ptr = cfs_kmap(pga->pg) + (pga->off & ~CFS_PAGE_MASK);
- memset(ptr + nob_read, 0, pga->count - nob_read);
- cfs_kunmap(pga->pg);
+ ptr = cfs_kmap(pga[i]->pg) +
+ (pga[i]->off & ~CFS_PAGE_MASK);
+ memset(ptr + nob_read, 0, pga[i]->count - nob_read);
+ cfs_kunmap(pga[i]->pg);
page_count--;
- pga++;
+ i++;
break;
}
- nob_read -= pga->count;
+ nob_read -= pga[i]->count;
page_count--;
- pga++;
+ i++;
}
/* zero remaining pages */
while (page_count-- > 0) {
- ptr = cfs_kmap(pga->pg) + (pga->off & ~CFS_PAGE_MASK);
- memset(ptr, 0, pga->count);
- cfs_kunmap(pga->pg);
- pga++;
+ ptr = cfs_kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK);
+ memset(ptr, 0, pga[i]->count);
+ cfs_kunmap(pga[i]->pg);
+ i++;
}
}
static int check_write_rcs(struct ptlrpc_request *request,
int requested_nob, int niocount,
- obd_count page_count, struct brw_page *pga)
+ obd_count page_count, struct brw_page **pga)
{
int *remote_rcs, i;
}
static obd_count osc_checksum_bulk(int nob, obd_count pg_count,
- struct brw_page *pga)
+ struct brw_page **pga)
{
__u32 cksum = ~0;
+ int i = 0;
LASSERT (pg_count > 0);
while (nob > 0 && pg_count > 0) {
- char *ptr = cfs_kmap(pga->pg);
- int off = pga->off & ~CFS_PAGE_MASK;
- int count = pga->count > nob ? nob : pga->count;
+ char *ptr = cfs_kmap(pga[i]->pg);
+ int off = pga[i]->off & ~CFS_PAGE_MASK;
+ int count = pga[i]->count > nob ? nob : pga[i]->count;
cksum = crc32_le(cksum, ptr + off, count);
- cfs_kunmap(pga->pg);
- LL_CDEBUG_PAGE(D_PAGE, pga->pg, "off %d checksum %x\n",
+ cfs_kunmap(pga[i]->pg);
+ LL_CDEBUG_PAGE(D_PAGE, pga[i]->pg, "off %d checksum %x\n",
off, cksum);
- nob -= pga->count;
+ nob -= pga[i]->count;
pg_count--;
- pga++;
+ i++;
}
return cksum;
static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
struct lov_stripe_md *lsm, obd_count page_count,
- struct brw_page *pga, int *requested_nobp,
+ struct brw_page **pga, int *requested_nobp,
int *niocountp, struct ptlrpc_request **reqp)
{
struct ptlrpc_request *req;
pool = ((cmd & OBD_BRW_WRITE) != 0) ? imp->imp_rq_pool : NULL;
for (niocount = i = 1; i < page_count; i++)
- if (!can_merge_pages(&pga[i - 1], &pga[i]))
+ if (!can_merge_pages(pga[i - 1], pga[i]))
niocount++;
size[0] = sizeof(*body);
LASSERT (page_count > 0);
for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
- struct brw_page *pg = &pga[i];
- struct brw_page *pg_prev = pg - 1;
+ struct brw_page *pg = pga[i];
+ struct brw_page *pg_prev = pga[i - 1];
LASSERT(pg->count > 0);
LASSERTF((pg->off & ~CFS_PAGE_MASK) + pg->count <= CFS_PAGE_SIZE,
LASSERTF(i == 0 || pg->off > pg_prev->off,
"i %d p_c %u\n", i, page_count);
#endif
- LASSERT((pga[0].flag & OBD_BRW_SRVLOCK) ==
+ LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) ==
(pg->flag & OBD_BRW_SRVLOCK));
ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~CFS_PAGE_MASK,
}
static void check_write_csum(__u32 cli, __u32 srv, int requested_nob,
- obd_count page_count, struct brw_page *pga)
+ obd_count page_count, struct brw_page **pga)
{
__u32 new_csum;
static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
int requested_nob, int niocount,
- obd_count page_count, struct brw_page *pga,
+ obd_count page_count, struct brw_page **pga,
int rc)
{
const lnet_process_id_t *peer =
static int osc_brw_internal(int cmd, struct obd_export *exp,struct obdo *oa,
struct lov_stripe_md *lsm,
- obd_count page_count, struct brw_page *pga)
+ obd_count page_count, struct brw_page **pga)
{
int requested_nob;
int niocount;
int requested_nob = aa->aa_requested_nob;
int niocount = aa->aa_nio_count;
obd_count page_count = aa->aa_page_count;
- struct brw_page *pga = aa->aa_pga;
+ struct brw_page **pga = aa->aa_ppga;
ENTRY;
rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *lsm, obd_count page_count,
- struct brw_page *pga, struct ptlrpc_request_set *set)
+ struct brw_page **pga, struct ptlrpc_request_set *set)
{
struct ptlrpc_request *request;
int requested_nob;
aa->aa_requested_nob = requested_nob;
aa->aa_nio_count = nio_count;
aa->aa_page_count = page_count;
- aa->aa_pga = pga;
+ aa->aa_ppga = pga;
request->rq_interpret_reply = brw_interpret;
ptlrpc_set_add_req(set, request);
RETURN (rc);
}
-#ifndef min_t
-#define min_t(type,x,y) \
- ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
-#endif
-
/*
* ugh, we want disk allocation on the target to happen in offset order. we'll
* follow sedgewicks advice and stick to the dead simple shellsort -- it'll do
* insertion sort that swaps elements that are strides apart, shrinking the
* stride down until its '1' and the array is sorted.
*/
-static void sort_brw_pages(struct brw_page *array, int num)
+static void sort_brw_pages(struct brw_page **array, int num)
{
int stride, i, j;
- struct brw_page tmp;
+ struct brw_page *tmp;
if (num == 1)
return;
for (i = stride ; i < num ; i++) {
tmp = array[i];
j = i;
- while (j >= stride && array[j - stride].off > tmp.off) {
+ while (j >= stride && array[j - stride]->off > tmp->off) {
array[j] = array[j - stride];
j -= stride;
}
} while (stride > 1);
}
-static obd_count max_unfragmented_pages(struct brw_page *pg, obd_count pages)
+static obd_count max_unfragmented_pages(struct brw_page **pg, obd_count pages)
{
int count = 1;
int offset;
+ int i = 0;
- LASSERT (pages > 0);
- offset = pg->off & (CFS_PAGE_SIZE - 1);
+ LASSERT (pages > 0);
+ offset = pg[i]->off & (CFS_PAGE_SIZE - 1);
for (;;) {
pages--;
if (pages == 0) /* that's all */
return count;
- if (offset + pg->count < CFS_PAGE_SIZE)
- return count; /* doesn't end on page boundary */
+ if (offset + pg[i]->count < CFS_PAGE_SIZE)
+ return count; /* doesn't end on page boundary */
- pg++;
- offset = pg->off & (CFS_PAGE_SIZE - 1);
- if (offset != 0) /* doesn't start on page boundary */
- return count;
+ i++;
+ offset = pg[i]->off & (CFS_PAGE_SIZE - 1);
+ if (offset != 0) /* doesn't start on page boundary */
+ return count;
count++;
}
}
+static struct brw_page **osc_build_ppga(struct brw_page *pga, obd_count count)
+{
+ struct brw_page **ppga;
+ int i;
+
+ OBD_ALLOC(ppga, sizeof(*ppga) * count);
+ if (ppga == NULL)
+ return NULL;
+
+ for (i = 0; i < count; i++)
+ ppga[i] = pga + i;
+ return ppga;
+}
+
+static void osc_release_ppga(struct brw_page **ppga, obd_count count)
+{
+ LASSERT(ppga != NULL);
+ OBD_FREE(ppga, sizeof(*ppga) * count);
+}
+
static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *md, obd_count page_count,
struct brw_page *pga, struct obd_trans_info *oti)
{
struct obdo *saved_oa = NULL;
- int rc;
+ struct brw_page **ppga, **orig;
+ int rc, page_count_orig;
ENTRY;
if (cmd & OBD_BRW_CHECK) {
rc = 0;
+ orig = ppga = osc_build_ppga(pga, page_count);
+ if (ppga == NULL)
+ RETURN(-ENOMEM);
+ page_count_orig = page_count;
+
while (page_count) {
obd_count pages_per_brw;
else
pages_per_brw = page_count;
- sort_brw_pages(pga, pages_per_brw);
- pages_per_brw = max_unfragmented_pages(pga, pages_per_brw);
+ sort_brw_pages(ppga, pages_per_brw);
+ pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw);
if (saved_oa != NULL) {
/* restore previously saved oa */
/* save a copy of oa (brw will clobber it) */
saved_oa = obdo_alloc();
if (saved_oa == NULL)
- RETURN(-ENOMEM);
+ GOTO(out, rc = -ENOMEM);
*saved_oa = *oa;
}
- rc = osc_brw_internal(cmd, exp, oa, md, pages_per_brw, pga);
+ rc = osc_brw_internal(cmd, exp, oa, md, pages_per_brw, ppga);
if (rc != 0)
break;
page_count -= pages_per_brw;
- pga += pages_per_brw;
+ ppga += pages_per_brw;
}
+out:
+ osc_release_ppga(orig, page_count_orig);
+
if (saved_oa != NULL)
obdo_free(saved_oa);
struct brw_page *pga, struct ptlrpc_request_set *set,
struct obd_trans_info *oti)
{
+ struct brw_page **ppga, **orig;
+ int page_count_orig;
+ int rc = 0;
ENTRY;
if (cmd & OBD_BRW_CHECK) {
RETURN(0);
}
+ orig = ppga = osc_build_ppga(pga, page_count);
+ if (ppga == NULL)
+ RETURN(-ENOMEM);
+ page_count_orig = page_count;
+
while (page_count) {
obd_count pages_per_brw;
- int rc;
if (page_count > PTLRPC_MAX_BRW_PAGES)
pages_per_brw = PTLRPC_MAX_BRW_PAGES;
else
pages_per_brw = page_count;
- sort_brw_pages(pga, pages_per_brw);
- pages_per_brw = max_unfragmented_pages(pga, pages_per_brw);
+ sort_brw_pages(ppga, pages_per_brw);
+ pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw);
- rc = async_internal(cmd, exp, oa, md, pages_per_brw, pga, set);
+ rc = async_internal(cmd, exp, oa, md, pages_per_brw, ppga, set);
if (rc != 0)
- RETURN(rc);
+ break;
page_count -= pages_per_brw;
- pga += pages_per_brw;
+ ppga += pages_per_brw;
}
- RETURN(0);
+ osc_release_ppga(orig, page_count_orig);
+ RETURN(rc);
}
static void osc_check_rpcs(struct client_obd *cli);
ar->ar_force_sync = 0;
}
+static void osc_oap_to_pending(struct osc_async_page *oap)
+{
+ struct loi_oap_pages *lop;
+
+ if (oap->oap_cmd & OBD_BRW_WRITE)
+ lop = &oap->oap_loi->loi_write_lop;
+ else
+ lop = &oap->oap_loi->loi_read_lop;
+
+ if (oap->oap_async_flags & ASYNC_URGENT)
+ list_add(&oap->oap_urgent_item, &lop->lop_urgent);
+ list_add_tail(&oap->oap_pending_item, &lop->lop_pending);
+ lop_update_pending(oap->oap_cli, lop, oap->oap_cmd, 1);
+}
+
/* this must be called holding the loi list lock to give coverage to exit_cache,
* async_flag maintenance, and oap_request */
static void osc_ap_completion(struct client_obd *cli, struct obdo *oa,
struct osc_async_page *oap, int sent, int rc)
{
ENTRY;
- osc_exit_cache(cli, oap, sent);
oap->oap_async_flags = 0;
oap->oap_interrupted = 0;
}
if (oap->oap_oig) {
+ osc_exit_cache(cli, oap, sent);
oig_complete_one(oap->oap_oig, &oap->oap_occ, rc);
oap->oap_oig = NULL;
EXIT;
return;
}
- oap->oap_caller_ops->ap_completion(oap->oap_caller_data, oap->oap_cmd,
- oa, rc);
+ rc = oap->oap_caller_ops->ap_completion(oap->oap_caller_data,
+ oap->oap_cmd, oa, rc);
+
+ /* ll_ap_completion (from llite) drops PG_locked. so, a new
+ * I/O on the page could start, but OSC calls it under lock
+ * and thus we can add oap back to pending safely */
+ if (rc)
+ /* upper layer wants to leave the page on pending queue */
+ osc_oap_to_pending(oap);
+ else
+ osc_exit_cache(cli, oap, sent);
EXIT;
}
rc = osc_brw_fini_request(request, aa->aa_oa, aa->aa_requested_nob,
aa->aa_nio_count, aa->aa_page_count,
- aa->aa_pga, rc);
+ aa->aa_ppga, rc);
CDEBUG(D_INODE, "request %p aa %p rc %d\n", request, aa, rc);
client_obd_list_unlock(&cli->cl_loi_list_lock);
obdo_free(aa->aa_oa);
- OBD_FREE(aa->aa_pga, aa->aa_page_count * sizeof(struct brw_page));
+ OBD_FREE(aa->aa_ppga, aa->aa_page_count * sizeof(struct brw_page *));
RETURN(0);
}
int page_count, int cmd)
{
struct ptlrpc_request *req;
- struct brw_page *pga = NULL;
+ struct brw_page **pga = NULL;
int requested_nob, nio_count;
struct osc_brw_async_args *aa;
struct obdo *oa = NULL;
ops = oap->oap_caller_ops;
caller_data = oap->oap_caller_data;
}
- pga[i].off = oap->oap_obj_off + oap->oap_page_off;
- pga[i].pg = oap->oap_page;
- pga[i].count = oap->oap_count;
- pga[i].flag = oap->oap_brw_flags;
+ pga[i] = &oap->oap_brw_page;
+ pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
+ /*pga[i]->pg = oap->oap_page;
+ pga[i]->count = oap->oap_count;
+ pga[i]->flag = oap->oap_brw_flags;*/
CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
- pga[i].pg, cfs_page_index(oap->oap_page), oap, pga[i].flag);
+ pga[i]->pg, cfs_page_index(oap->oap_page), oap, pga[i]->flag);
i++;
}
aa->aa_requested_nob = requested_nob;
aa->aa_nio_count = nio_count;
aa->aa_page_count = page_count;
- aa->aa_pga = pga;
+ aa->aa_ppga = pga;
aa->aa_cli = cli;
out:
continue;
}
osc_ap_completion(cli, NULL, oap, 0, PTR_ERR(request));
-
- /* put the page back in the loi/lop lists */
- list_add_tail(&oap->oap_pending_item,
- &lop->lop_pending);
- lop_update_pending(cli, lop, cmd, 1);
- if (oap->oap_async_flags & ASYNC_URGENT)
- list_add(&oap->oap_urgent_item,
- &lop->lop_urgent);
}
loi_list_maint(cli, loi);
RETURN(PTR_ERR(request));
{
struct client_obd *cli = &exp->exp_obd->u.cli;
struct osc_async_page *oap;
- struct loi_oap_pages *lop;
int rc = 0;
ENTRY;
client_obd_list_unlock(&cli->cl_loi_list_lock);
RETURN(rc);
}
- lop = &loi->loi_write_lop;
- } else {
- lop = &loi->loi_read_lop;
}
- if (oap->oap_async_flags & ASYNC_URGENT)
- list_add(&oap->oap_urgent_item, &lop->lop_urgent);
- list_add_tail(&oap->oap_pending_item, &lop->lop_pending);
- lop_update_pending(cli, lop, cmd, 1);
-
+ osc_oap_to_pending(oap);
loi_list_maint(cli, loi);
LOI_DEBUG(loi, "oap %p page %p added for cmd %d\n", oap, oap->oap_page,
list_for_each_safe(pos, tmp, &lop->lop_pending_group) {
oap = list_entry(pos, struct osc_async_page, oap_pending_item);
list_del(&oap->oap_pending_item);
- list_add_tail(&oap->oap_pending_item, &lop->lop_pending);
- if (oap->oap_async_flags & ASYNC_URGENT)
- list_add(&oap->oap_urgent_item, &lop->lop_urgent);
- lop_update_pending(cli, lop, cmd, 1);
+ osc_oap_to_pending(oap);
}
loi_list_maint(cli, loi);
}
struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
struct obd_device *obd = class_exp2obd(exp);
- ldlm_change_cbdata(obd->obd_namespace, &res_id, replace, data);
+ ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data);
return 0;
}
EXIT;
}
+struct ost_prolong_data {
+ struct obd_export *opd_exp;
+ ldlm_policy_data_t opd_policy;
+ ldlm_mode_t opd_mode;
+};
+
+static int ost_prolong_locks_iter(struct ldlm_lock *lock, void *data)
+{
+ struct ost_prolong_data *opd = data;
+
+ LASSERT(lock->l_resource->lr_type == LDLM_EXTENT);
+
+ if (lock->l_req_mode != lock->l_granted_mode) {
+ /* scan granted locks only */
+ return LDLM_ITER_STOP;
+ }
+
+ if (lock->l_export != opd->opd_exp) {
+ /* prolong locks only for given client */
+ return LDLM_ITER_CONTINUE;
+ }
+
+ if (!(lock->l_granted_mode & opd->opd_mode)) {
+ /* we aren't interesting in all type of locks */
+ return LDLM_ITER_CONTINUE;
+ }
+
+ if (lock->l_policy_data.l_extent.end < opd->opd_policy.l_extent.start ||
+ lock->l_policy_data.l_extent.start > opd->opd_policy.l_extent.end) {
+ /* the request doesn't cross the lock, skip it */
+ return LDLM_ITER_CONTINUE;
+ }
+
+ if (!(lock->l_flags & LDLM_FL_AST_SENT)) {
+ /* ignore locks not being cancelled */
+ return LDLM_ITER_CONTINUE;
+ }
+
+ /* OK. this is a possible lock the user holds doing I/O
+ * let's refresh eviction timer for it */
+ ldlm_refresh_waiting_lock(lock);
+
+ return LDLM_ITER_CONTINUE;
+}
+
+static void ost_prolong_locks(struct obd_export *exp, struct obd_ioobj *obj,
+ struct niobuf_remote *nb, ldlm_mode_t mode)
+{
+ struct ldlm_res_id res_id = { .name = { obj->ioo_id } };
+ int nrbufs = obj->ioo_bufcnt;
+ struct ost_prolong_data opd;
+
+ ENTRY;
+
+ opd.opd_mode = mode;
+ opd.opd_exp = exp;
+ opd.opd_policy.l_extent.start = nb[0].offset & CFS_PAGE_MASK;
+ opd.opd_policy.l_extent.end = (nb[nrbufs - 1].offset +
+ nb[nrbufs - 1].len - 1) | ~CFS_PAGE_MASK;
+
+ CDEBUG(D_DLMTRACE, "refresh locks: "LPU64"/"LPU64" ("LPU64"->"LPU64")\n",
+ res_id.name[0], res_id.name[1], opd.opd_policy.l_extent.start,
+ opd.opd_policy.l_extent.end);
+ ldlm_resource_iterate(exp->exp_obd->obd_namespace, &res_id,
+ ost_prolong_locks_iter, &opd);
+}
+
static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
struct ptlrpc_bulk_desc *desc;
if (rc != 0)
GOTO(out_lock, rc);
+ ost_prolong_locks(req->rq_export, ioo, pp_rnb, LCK_PW | LCK_PR);
+
/* We're finishing using body->oa as an input variable */
do_checksum = (body->oa.o_valid & OBD_MD_FLCKSUM);
body->oa.o_valid = 0;
ptlrpc_rs_decref(req->rq_reply_state);
req->rq_reply_state = NULL;
}
- if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) {
- CERROR("%s: bulk IO comm error evicting %s@%s id %s\n",
- req->rq_export->exp_obd->obd_name,
- req->rq_export->exp_client_uuid.uuid,
- req->rq_export->exp_connection->c_remote_uuid.uuid,
- libcfs_id2str(req->rq_peer));
- class_fail_export(req->rq_export);
- } else {
- CERROR("ignoring bulk IO comms error: "
- "client reconnected %s@%s id %s\n",
- req->rq_export->exp_client_uuid.uuid,
- req->rq_export->exp_connection->c_remote_uuid.uuid,
- libcfs_id2str(req->rq_peer));
- }
+ CWARN("%s: ignoring bulk IO comm error with %s@%s id %s\n",
+ req->rq_export->exp_obd->obd_name,
+ req->rq_export->exp_client_uuid.uuid,
+ req->rq_export->exp_connection->c_remote_uuid.uuid,
+ libcfs_id2str(req->rq_peer));
}
RETURN(rc);
if (rc != 0)
GOTO(out_bulk, rc);
+ ost_prolong_locks(req->rq_export, ioo, pp_rnb, LCK_PW);
+
/* obd_preprw clobbers oa->valid, so save what we need */
do_checksum = (body->oa.o_valid & OBD_MD_FLCKSUM);
ptlrpc_rs_decref(req->rq_reply_state);
req->rq_reply_state = NULL;
}
- if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) {
- CERROR("%s: bulk IO comm error evicting %s@%s id %s\n",
- req->rq_export->exp_obd->obd_name,
- req->rq_export->exp_client_uuid.uuid,
- req->rq_export->exp_connection->c_remote_uuid.uuid,
- libcfs_id2str(req->rq_peer));
- class_fail_export(req->rq_export);
- } else {
- CERROR("ignoring bulk IO comms error: "
- "client reconnected %s@%s id %s\n",
- req->rq_export->exp_client_uuid.uuid,
- req->rq_export->exp_connection->c_remote_uuid.uuid,
- libcfs_id2str(req->rq_peer));
- }
+ CWARN("%s: ignoring bulk IO comm error with %s@%s id %s\n",
+ req->rq_export->exp_obd->obd_name,
+ req->rq_export->exp_client_uuid.uuid,
+ req->rq_export->exp_connection->c_remote_uuid.uuid,
+ libcfs_id2str(req->rq_peer));
}
RETURN(rc);
}
ctxt = llog_get_context(obd, body->lgd_ctxt_idx);
if (ctxt == NULL)
- GOTO(out, rc = -EINVAL);
+ GOTO(out_free, rc = -EINVAL);
disk_obd = ctxt->loc_exp->exp_obd;
push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
out_pop:
pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL);
+out_free:
OBD_FREE(buf, LLOG_CHUNK_SIZE);
out:
RETURN(rc);
{ OST_QUOTACHECK, "ost_quotacheck" },
{ OST_QUOTACTL, "ost_quotactl" },
{ MDS_GETATTR, "mds_getattr" },
- { MDS_GETATTR_NAME, "mds_getattr_name" },
+ { MDS_GETATTR_NAME, "mds_getattr_lock" },
{ MDS_CLOSE, "mds_close" },
{ MDS_REINT, "mds_reint" },
{ MDS_READPAGE, "mds_readpage" },
CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL);
CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
+ CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
+ CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
/* Sizes and Offsets */
IOZONE_OPTS="-i 0 -i 1 -i 2 -e -+d -r $RSIZE -s $SIZE"
IOZFILE="-f $MOUNT/iozone"
+ export O_DIRECT
if [ "$IOZONE" != "no" ]; then
mount_client $MOUNT
$DEBUG_OFF
$CLEANUP
$SETUP
+ # check if O_DIRECT support is implemented in kernel
+ if [ -z "$O_DIRECT" ]; then
+ touch $MOUNT/f.iozone
+ if ! ./directio write $MOUNT/f.iozone 0 1; then
+ O_DIRECT=no
+ fi
+ rm -f $MOUNT/f.iozone
+ fi
if [ "$O_DIRECT" != "no" -a "$IOZONE_DIR" != "no" ]; then
$DEBUG_OFF
iozone -I $IOZONE_OPTS $IOZFILE.odir
#}
#if [ "$MOUNT2" ]; then
-# $LLMOUNT -v -o user_xattr,acl `hostname`:/mds1/client $MOUNT2 || exit 3
+# $LLMOUNT -v -o user_xattr `hostname`:/mds1/client $MOUNT2 || exit 3
#fi
OSTDEV2=${OSTDEV2:-$TMP/ost1-${FSNAME}}
MDS_MOUNT_OPTS="user_xattr,acl,${MDS_MOUNT_OPTS:-""}"
-CLIENTOPT="user_xattr,acl,${CLIENTOPT:-""}"
+CLIENTOPT="user_xattr,${CLIENTOPT:-""}"
# specific journal size for the ost, in MB
JSIZE=${JSIZE:-0}
STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-$((OSTCOUNT -1))}
MDS_MOUNT_OPTS="user_xattr,acl,${MDS_MOUNT_OPTS:-""}"
-CLIENTOPT="user_xattr,acl,${CLIENTOPT:-""}"
+CLIENTOPT="user_xattr,${CLIENTOPT:-""}"
# specific journal size for the ost, in MB
JSIZE=${JSIZE:-0}
#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE
do_facet ost sysctl -w lustre.fail_loc=0x80000504
cancel_lru_locks osc
- # will get evicted here
- do_facet client "cmp /etc/termcap $MOUNT/termcap" && return 1
+ # OST bulk will time out here, client resends
+ do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 1
sysctl -w lustre.fail_loc=0
# give recovery a chance to finish (shouldn't take long)
sleep $TIMEOUT
- do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2
+ do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2
start_read_ahead
}
-run_test 16 "timeout bulk put, evict client (2732)"
+run_test 16 "timeout bulk put, don't evict client (2732)"
test_17() {
# OBD_FAIL_PTLRPC_BULK_GET_NET 0x0503 | OBD_FAIL_ONCE
- # client will get evicted here
+ # OST bulk will time out here, client retries
sysctl -w lustre.fail_loc=0x80000503
# need to ensure we send an RPC
do_facet client cp /etc/termcap $DIR/$tfile
sleep $TIMEOUT
sysctl -w lustre.fail_loc=0
do_facet client "df $DIR"
- # expect cmp to fail
- do_facet client "cmp /etc/termcap $DIR/$tfile" && return 3
+ # expect cmp to succeed, client resent bulk
+ do_facet client "cmp /etc/termcap $DIR/$tfile" || return 3
do_facet client "rm $DIR/$tfile" || return 4
return 0
}
do_facet client cp /etc/termcap $f
sync
- # just use this write to trigger the client's eviction from the ost
-# OBD_FAIL_PTLRPC_BULK_GET_NET|OBD_FAIL_ONCE
- sysctl -w lustre.fail_loc=0x80000503
- do_facet client dd if=/dev/zero of=$f2 bs=4k count=1
- sync
- sysctl -w lustre.fail_loc=0
+ ost_evict_client
# allow recovery to complete
sleep $((TIMEOUT + 2))
# my understanding is that there should be nothing in the page
multiop $DIR/$tdir/$tfile Owy_wyc &
MULTI_PID=$!
usleep 500
-# OBD_FAIL_PTLRPC_BULK_GET_NET|OBD_FAIL_ONCE
- sysctl -w lustre.fail_loc=0x80000503
+ ost_evict_client
usleep 500
kill -USR1 $MULTI_PID
wait $MULTI_PID
case `uname -r` in
2.4*) FSTYPE=${FSTYPE:-ext3}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;;
-2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 60 69" ;;
+2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 48b" ;;
*) error "unsupported kernel" ;;
esac
[ -z "`lsmod|grep obdfilter`" ] &&
echo "skipping $TESTNAME (remote OST)" && return
- f="$DIR/f69"
+ f="$DIR/$tfile"
touch $f
+ if ! $DIRECTIO write ${f}.2 0 1; then
+ echo "skipping $TESTNAME - O_DIRECT not implemented"
+ return 0
+ fi
+
sysctl -w lustre.fail_loc=0x217
truncate $f 1 # vmtruncate() will ignore truncate() error.
$DIRECTIO write $f 0 2 && error "write succeeded, expect -ENOENT"
ls -l $F*
}
-run_test 75 "TEST join file"
+run_test 75 "TEST join file ===================================="
num_inodes() {
awk '/lustre_inode_cache|^inode_cache/ {print $2; exit}' /proc/slabinfo
cd $DIR
[ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return
- [ -z "`mount | grep " $DIR .*\<acl\>"`" ] && echo "skipping $TESTNAME (must have acl)" && return
[ -z "`grep acl $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have acl)" && return
$(which setfacl 2>/dev/null) || echo "skipping $TESTNAME (could not find setfacl)" && return
}
TRACE=${TRACE:-""}
+LPROC=/proc/fs/lustre
+
run_one() {
if ! grep -q $DIR /proc/mounts; then
$SETUP
run_test 24 "lfs df [-ih] [path] test ========================="
test_25() {
- [ -z "`mount | grep " $DIR1 .*\<acl\>"`" ] && echo "skipping $TESTNAME ($DIR1 must have acl)" && return
- [ -z "`mount | grep " $DIR2 .*\<acl\>"`" ] && echo "skipping $TESTNAME ($DIR2 must have acl)" && return
+ [ `cat $LPROC/mdc/MDC*MNT*/connect_flags | grep -c acl` -lt 2 ] && echo "skipping $TESTNAME (must have acl)" && return
mkdir $DIR1/d25 || error
touch $DIR1/d25/f1 || error
break;
case S_MAGIC_MINIX:
type = strdup("minix");
+ break;
case S_MAGIC_MINIX_30:
type = strdup("minix (30 char.)");
break;
do_facet mds "echo $UUID > /proc/fs/lustre/mds/${mds_svc}/evict_client"
}
+ost_evict_client() {
+ UUID=`cat /proc/fs/lustre/osc/*_MNT_*/uuid | head -n 1`
+ do_facet ost "echo $UUID > /proc/fs/lustre/obdfilter/ost_svc/evict_client"
+}
+
fail() {
facet_failover $*
df $MOUNT || error "post-failover df: $?"
FSTYPE=${FSTYPE:-ext3}
MDS_MOUNT_OPTS="user_xattr,acl,${MDS_MOUNT_OPTS:-""}"
-CLIENTOPT="user_xattr,acl,${CLIENTOPT:-""}"
+CLIENTOPT="user_xattr,${CLIENTOPT:-""}"
NETTYPE=${NETTYPE:-tcp}
NIDTYPE=${NIDTYPE:-$NETTYPE}
char *uuid, int ishow, int cooked,
char *type, int index, int rc)
{
- __u64 avail, used, total;
+ long long avail, used, total;
double ratio = 0;
char *suffix = "KMGTPEZY";
char tbuf[10], ubuf[10], abuf[10], rbuf[10];
size = lseek(fd, 0, SEEK_END);
if (size % JOIN_FILE_ALIGN) {
fprintf(stderr,"head file %s size %llu must be mutiple of %d\n",
- name_head, size, JOIN_FILE_ALIGN);
+ name_head, (long long)size, JOIN_FILE_ALIGN);
rc = -EINVAL;
goto out;
}
if (bover)
diff2str(dqb->dqb_btime, timebuf, now);
- sprintf(numbuf[0], "%llu", toqb(dqb->dqb_curspace));
- sprintf(numbuf[1], "%llu", dqb->dqb_bsoftlimit);
- sprintf(numbuf[2], "%llu", dqb->dqb_bhardlimit);
+ sprintf(numbuf[0], LPU64, toqb(dqb->dqb_curspace));
+ sprintf(numbuf[1], LPU64, dqb->dqb_bsoftlimit);
+ sprintf(numbuf[2], LPU64, dqb->dqb_bhardlimit);
printf(" %7s%c %6s %7s %7s",
numbuf[0], bover ? '*' : ' ', numbuf[1],
numbuf[2], bover > 1 ? timebuf : "");
if (iover)
diff2str(dqb->dqb_itime, timebuf, now);
- sprintf(numbuf[0], "%llu", dqb->dqb_curinodes);
- sprintf(numbuf[1], "%llu", dqb->dqb_isoftlimit);
- sprintf(numbuf[2], "%llu", dqb->dqb_ihardlimit);
+ sprintf(numbuf[0], LPU64, dqb->dqb_curinodes);
+ sprintf(numbuf[1], LPU64, dqb->dqb_isoftlimit);
+ sprintf(numbuf[2], LPU64, dqb->dqb_ihardlimit);
if (!ost_only)
printf(" %7s%c %6s %7s %7s",
numbuf[0], iover ? '*' : ' ', numbuf[1],
"\t-v|--verbose: print verbose config settings\n"
"\t-o: filesystem mount options:\n"
"\t\tflock/noflock: enable/disable flock support\n"
- "\t\troute=<gw>[-<gw>]:<low>[-<high>]: portal route to MDS\n"
+ "\t\tretry=<num>: number of times mount is retried by client\n"
"\t\tuser_xattr/nouser_xattr: enable/disable user extended "
"attributes\n"
);
val = atoi(opteq + 1);
*opteq = '\0';
if (!strcmp(opt, "retry")) {
- if (val >= 0 || val < MAX_RETRIES)
+ if (val > MAX_RETRIES)
+ retry = MAX_RETRIES;
+ else if (val >= 0)
retry = val;
- else
- retry = 0;
} else {
fprintf(stderr, "%s: unknown option '%s'. "
"Ignoring.\n", progname, opt);
if (modpipe != NULL)
pclose(modpipe);
/* use <= to include the initial mount before we retry */
- for (i = 0, rc = -EAGAIN; i <= retry && rc != 0; i++)
+ for (i = 0, rc = -EAGAIN; i <= retry && rc != 0; i++) {
rc = mount(source, target, "lustre", flags, &lmd);
+ if (rc && retry)
+ sleep(1 << max((i/2), 5));
+ }
}
if (rc) {
fprintf(stderr, "%s: mount(%s, %s) failed: %s\n", progname,
int rc;
#if HAVE_PROC_FS
char buf[MAX_STRING_SIZE];
- FILE *fp = fopen(DEVICES_LIST, "r");
+ FILE *fp = NULL;
+ if (argc != 1)
+ return CMD_HELP;
+
+ fp = fopen(DEVICES_LIST,"r");
if (fp == NULL) {
fprintf(stderr, "error: %s: %s opening "DEVICES_LIST"\n",
jt_cmdname(argv[0]), strerror(rc = errno));
return rc;
}
- if (argc != 1)
- return CMD_HELP;
-
while (fgets(buf, sizeof(buf), fp) != NULL)
printf("%s", buf);
CHECK_CDEFINE(OBD_CONNECT_TRANSNO);
CHECK_CDEFINE(OBD_CONNECT_IBITS);
CHECK_CDEFINE(OBD_CONNECT_JOIN);
+ CHECK_CDEFINE(OBD_CONNECT_ATTRFID);
+ CHECK_CDEFINE(OBD_CONNECT_NODEVOH);
COMMENT("Sizes and Offsets");
BLANK_LINE();