From: vitaly Date: Wed, 3 May 2006 11:05:57 +0000 (+0000) Subject: Merge b1_5 from b1_4 (20060502_1827) X-Git-Tag: v1_7_100~1^90~8^2~256 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=36b26311a6826f79537dedd9d11e85f910e7adf8;p=fs%2Flustre-release.git Merge b1_5 from b1_4 (20060502_1827) --- diff --git a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.12.patch b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.12.patch index b6439e6..520c031 100644 --- a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.12.patch +++ b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.12.patch @@ -178,7 +178,7 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c +{ + struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); + neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_GENERATION(neh) + 1); ++ (EXT_HDR_GEN(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2849,14 +2849,14 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) +#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) +#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + +#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) +#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) +#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+ ++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + diff --git a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch index 9e78214..f829621 100644 --- a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch +++ b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch @@ -179,7 +179,7 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c +{ + struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); + neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_GENERATION(neh) + 1); ++ (EXT_HDR_GEN(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2839,14 +2839,14 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) +#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) +#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + +#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) +#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) +#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+ ++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + diff --git a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch index bd95c54..993b237 100644 --- a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch +++ b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch @@ -178,7 +178,7 @@ Index: linux-stage/fs/ext3/extents.c +{ + struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); + neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_GENERATION(neh) + 1); ++ (EXT_HDR_GEN(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2834,14 +2834,14 @@ Index: linux-stage/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) +#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) +#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + +#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) +#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) +#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+ ++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 727f180..7fdebda 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -2,6 +2,13 @@ tbd Cluster File Systems, Inc. * version 1.4.7 * bug fixes +Severity : enhancement +Bugzilla : 9292 +Description: Getattr by fid +Details : Getting a file attributes by its fid, obtaining UPDATE|LOOKUP + locks, avoids extra getattr rpc requests to MDS, allows '/' to + have locks and avoids getattr rpc requests for it on every stat. + Severity : major Frequency : rare Bugzilla : 5719, 9635, 9792, 9684, @@ -45,10 +52,11 @@ Severity : enhancement Bugzilla : 9340 Description: allow number of MDS service threads to be changed at module load Details : It is now possible to change the number of MDS service threads - running. Adding "options mds mds_num_threads=N" will set the - number of threads for the next time Lustre is restarted (assuming - the "mds" module is also reloaded at that time). The default - number of threads will stay the same, 32 for most systems. + running. Adding "options mds mds_num_threads={N}" to the MDS's + /etc/modprobe.conf will set the number of threads for the next + time Lustre is restarted (assuming the "mds" module is also + reloaded at that time). The default number of threads will + stay the same, 32 for most systems. Severity : major Frequency : rare @@ -109,7 +117,7 @@ Details : When running an obd_echo server it did not start the ping_evictor service startup instead of the OBD startup. Severity : enhancement -Bugzilla : 10393 (patchless) +Bugzilla : 10193 (patchless) Description: Remove dependency on various unexported kernel interfaces. Details : No longer need reparent_to_init, exit_mm, exit_files, sock_getsockopt, filemap_populate, FMODE_EXEC, put_filp. @@ -157,6 +165,7 @@ Details : Use asynchronous set_info RPCs to send the "evict_by_nid" to and also offers similar improvements for other set_info RPCs. Severity : minor +Frequency : common Bugzilla : 10265 Description: excessive CPU usage during initial read phase on client Details : During the initial read phase on a client, it would agressively @@ -167,10 +176,43 @@ Details : During the initial read phase on a client, it would agressively /proc/fs/lustre/llite/*/max_read_ahead_whole_mb, 2MB by default). Severity : minor +Frequency : rare Bugzilla : 10450 Description: MDS crash when receiving packet with unknown intent. Details : Do not LBUG in unknown intent case, just return -EFAULT +Severity : enhancement +Bugzilla : 9293 +Description: MDS RPCs are serialised on client. This is unnecessary for some. +Details : Do not serialize getattr (non-intent version) and statfs. + +Severity : minor +Frequency : occasional, when OST network is overloaded/intermittent +Bugzilla : 10416 +Description: client evicted by OST after bulk IO timeout +Details : If a client sends a bulk IO request (read or write) the OST + may evict the client if it is unresposive to its data GET/PUT + request. This is incorrect if the network is overloaded (takes + too long to transfer the RPC data) or dropped the OST GET/PUT + request. There is no need to evict the client at all, since + the pinger and/or lock callbacks will handle this, and the + client can restart the bulk request. + +Severity : minor +Frequency : Always when mmapping file with no objects +Bugzilla : 10438 +Description: client crashes when mmapping file with no objects +Details : Check that we actually have objects in a file before doing any + operations on objects in ll_vm_open, ll_vm_close and + ll_glimpse_size. + +Severity : minor +Frequency : Rare +Bugzilla : 10484 +Description: Request leak when working with deleted CWD +Details : Introduce advanced request refcount tracking for requests + referenced from lustre intent. + ------------------------------------------------------------------------------ @@ -574,8 +616,8 @@ Severity : enhancement Bugzilla : 4928, 7341, 9758 Description: allow number of OST service threads to be specified Details : a module parameter allows the number of OST service threads - to be specified via "options ost ost_num_threads=X" in - /etc/modules.conf or /etc/modutils.conf. + to be specified via "options ost ost_num_threads={N}" in the + OSS's /etc/modules.conf or /etc/modprobe.conf. Severity : major Frequency : rare diff --git a/lustre/contrib/mpich-1.2.6-lustre.patch b/lustre/contrib/mpich-1.2.6-lustre.patch new file mode 100644 index 0000000..d32fab9 --- /dev/null +++ b/lustre/contrib/mpich-1.2.6-lustre.patch @@ -0,0 +1,1829 @@ +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.c 2005-12-06 11:54:37.883130927 -0500 +@@ -0,0 +1,37 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 2001 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++/* adioi.h has the ADIOI_Fns_struct define */ ++#include "adioi.h" ++ ++struct ADIOI_Fns_struct ADIO_LUSTRE_operations = { ++ ADIOI_LUSTRE_Open, /* Open */ ++ ADIOI_LUSTRE_ReadContig, /* ReadContig */ ++ ADIOI_LUSTRE_WriteContig, /* WriteContig */ ++ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */ ++ ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */ ++ ADIOI_GEN_SeekIndividual, /* SeekIndividual */ ++ ADIOI_LUSTRE_Fcntl, /* Fcntl */ ++ ADIOI_LUSTRE_SetInfo, /* SetInfo */ ++ ADIOI_GEN_ReadStrided, /* ReadStrided */ ++ ADIOI_GEN_WriteStrided, /* WriteStrided */ ++ ADIOI_LUSTRE_Close, /* Close */ ++ ADIOI_LUSTRE_IreadContig, /* IreadContig */ ++ ADIOI_LUSTRE_IwriteContig, /* IwriteContig */ ++ ADIOI_LUSTRE_ReadDone, /* ReadDone */ ++ ADIOI_LUSTRE_WriteDone, /* WriteDone */ ++ ADIOI_LUSTRE_ReadComplete, /* ReadComplete */ ++ ADIOI_LUSTRE_WriteComplete, /* WriteComplete */ ++ ADIOI_LUSTRE_IreadStrided, /* IreadStrided */ ++ ADIOI_LUSTRE_IwriteStrided, /* IwriteStrided */ ++ ADIOI_GEN_Flush, /* Flush */ ++ ADIOI_LUSTRE_Resize, /* Resize */ ++ ADIOI_GEN_Delete, /* Delete */ ++}; +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_close.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_close.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_close.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_close.c 2005-12-06 11:54:37.895129327 -0500 +@@ -0,0 +1,32 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_close.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code) ++{ ++ int err; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_CLOSE"; ++#endif ++ ++ err = close(fd->fd_sys); ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_done.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_done.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_done.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_done.c 2005-12-06 11:54:37.898128927 -0500 +@@ -0,0 +1,188 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_done.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++int ADIOI_LUSTRE_ReadDone(ADIO_Request *request, ADIO_Status *status, int *error_code) ++{ ++#ifndef NO_AIO ++ int done=0; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_READDONE"; ++#endif ++#ifdef AIO_SUN ++ aio_result_t *result=0, *tmp; ++#else ++ int err; ++#endif ++#ifdef AIO_HANDLE_IN_AIOCB ++ struct aiocb *tmp1; ++#endif ++#endif ++ ++ if (*request == ADIO_REQUEST_NULL) { ++ *error_code = MPI_SUCCESS; ++ return 1; ++ } ++ ++#ifdef NO_AIO ++/* HP, FreeBSD, Linux */ ++#ifdef HAVE_STATUS_SET_BYTES ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ (*request)->fd->async_count--; ++ ADIOI_Free_request((ADIOI_Req_node *) (*request)); ++ *request = ADIO_REQUEST_NULL; ++ *error_code = MPI_SUCCESS; ++ return 1; ++#endif ++ ++#ifdef AIO_SUN ++ if ((*request)->queued) { ++ tmp = (aio_result_t *) (*request)->handle; ++ if (tmp->aio_return == AIO_INPROGRESS) { ++ done = 0; ++ *error_code = MPI_SUCCESS; ++ } ++ else if (tmp->aio_return != -1) { ++ result = (aio_result_t *) aiowait(0); /* dequeue any one request */ ++ done = 1; ++ (*request)->nbytes = tmp->aio_return; ++ *error_code = MPI_SUCCESS; ++ } ++ else { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(tmp->aio_errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(tmp->aio_errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ } /* if ((*request)->queued) ... */ ++ else { ++ /* ADIOI_Complete_Async completed this request, but request object ++ was not freed. */ ++ done = 1; ++ *error_code = MPI_SUCCESS; ++ } ++#ifdef HAVE_STATUS_SET_BYTES ++ if (done && ((*request)->nbytes != -1)) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ ++#endif ++ ++#ifdef AIO_HANDLE_IN_AIOCB ++/* IBM */ ++ if ((*request)->queued) { ++ tmp1 = (struct aiocb *) (*request)->handle; ++ errno = aio_error(tmp1->aio_handle); ++ if (errno == EINPROG) { ++ done = 0; ++ *error_code = MPI_SUCCESS; ++ } ++ else { ++ err = aio_return(tmp1->aio_handle); ++ (*request)->nbytes = err; ++ errno = aio_error(tmp1->aio_handle); ++ ++ done = 1; ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ } ++ } /* if ((*request)->queued) */ ++ else { ++ done = 1; ++ *error_code = MPI_SUCCESS; ++ } ++#ifdef HAVE_STATUS_SET_BYTES ++ if (done && ((*request)->nbytes != -1)) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ ++#elif (!defined(NO_AIO) && !defined(AIO_SUN)) ++/* DEC, SGI IRIX 5 and 6 */ ++ if ((*request)->queued) { ++ errno = aio_error((const struct aiocb *) (*request)->handle); ++ if (errno == EINPROGRESS) { ++ done = 0; ++ *error_code = MPI_SUCCESS; ++ } ++ else { ++ err = aio_return((struct aiocb *) (*request)->handle); ++ (*request)->nbytes = err; ++ errno = aio_error((struct aiocb *) (*request)->handle); ++ ++ done = 1; ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ } ++ } /* if ((*request)->queued) */ ++ else { ++ done = 1; ++ *error_code = MPI_SUCCESS; ++ } ++#ifdef HAVE_STATUS_SET_BYTES ++ if (done && ((*request)->nbytes != -1)) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ ++#endif ++ ++#ifndef NO_AIO ++ if (done) { ++ /* if request is still queued in the system, it is also there ++ on ADIOI_Async_list. Delete it from there. */ ++ if ((*request)->queued) ADIOI_Del_req_from_list(request); ++ ++ (*request)->fd->async_count--; ++ if ((*request)->handle) ADIOI_Free((*request)->handle); ++ ADIOI_Free_request((ADIOI_Req_node *) (*request)); ++ *request = ADIO_REQUEST_NULL; ++ } ++ return done; ++#endif ++ ++} ++ ++ ++int ADIOI_LUSTRE_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code) ++{ ++ return ADIOI_LUSTRE_ReadDone(request, status, error_code); ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_fcntl.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_fcntl.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_fcntl.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_fcntl.c 2005-12-06 11:54:37.901128527 -0500 +@@ -0,0 +1,126 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_fcntl.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++#include "adio_extern.h" ++/* #ifdef MPISGI ++#include "mpisgi2.h" ++#endif */ ++ ++void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code) ++{ ++ int i, ntimes; ++ ADIO_Offset curr_fsize, alloc_size, size, len, done; ++ ADIO_Status status; ++ char *buf; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_FCNTL"; ++#endif ++ ++ switch(flag) { ++ case ADIO_FCNTL_GET_FSIZE: ++ fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END); ++ if (fd->fp_sys_posn != -1) ++ lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); ++ if (fcntl_struct->fsize == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ break; ++ ++ case ADIO_FCNTL_SET_DISKSPACE: ++ /* will be called by one process only */ ++ /* On file systems with no preallocation function, I have to ++ explicitly write ++ to allocate space. Since there could be holes in the file, ++ I need to read up to the current file size, write it back, ++ and then write beyond that depending on how much ++ preallocation is needed. ++ read/write in sizes of no more than ADIOI_PREALLOC_BUFSZ */ ++ ++ curr_fsize = lseek(fd->fd_sys, 0, SEEK_END); ++ alloc_size = fcntl_struct->diskspace; ++ ++ size = ADIOI_MIN(curr_fsize, alloc_size); ++ ++ ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ; ++ buf = (char *) ADIOI_Malloc(ADIOI_PREALLOC_BUFSZ); ++ done = 0; ++ ++ for (i=0; i curr_fsize) { ++ memset(buf, 0, ADIOI_PREALLOC_BUFSZ); ++ size = alloc_size - curr_fsize; ++ ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ; ++ for (i=0; ifp_sys_posn != -1) ++ lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET); ++ *error_code = MPI_SUCCESS; ++ break; ++ ++ case ADIO_FCNTL_SET_IOMODE: ++ /* for implementing PFS I/O modes. will not occur in MPI-IO ++ implementation.*/ ++ if (fd->iomode != fcntl_struct->iomode) { ++ fd->iomode = fcntl_struct->iomode; ++ MPI_Barrier(MPI_COMM_WORLD); ++ } ++ *error_code = MPI_SUCCESS; ++ break; ++ ++ case ADIO_FCNTL_SET_ATOMICITY: ++ fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1; ++ *error_code = MPI_SUCCESS; ++ break; ++ ++ default: ++ FPRINTF(stderr, "Unknown flag passed to ADIOI_LUSTRE_Fcntl\n"); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_flush.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_flush.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_flush.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_flush.c 2005-12-06 11:54:37.903128261 -0500 +@@ -0,0 +1,14 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_flush.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_Flush(ADIO_File fd, int *error_code) ++{ ++ ADIOI_GEN_Flush(fd, error_code); ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.h mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.h +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.h 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre.h 2005-12-06 11:54:37.891129861 -0500 +@@ -0,0 +1,36 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre.h,v 1.2 2005/07/07 14:38:17 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#ifndef AD_UNIX_INCLUDE ++#define AD_UNIX_INCLUDE ++ ++/* temp*/ ++#define HAVE_ASM_TYPES_H 1 ++ ++#include ++#include ++#include ++#include ++#include "lustre/lustre_user.h" ++#include "adio.h" ++ ++#ifndef NO_AIO ++#ifdef AIO_SUN ++#include ++#else ++#include ++#ifdef NEEDS_ADIOCB_T ++typedef struct adiocb adiocb_t; ++#endif ++#endif ++#endif ++ ++int ADIOI_LUSTRE_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, ++ int wr, void *handle); ++ ++#endif +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_hints.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_hints.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_hints.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_hints.c 2005-12-06 11:54:37.904128127 -0500 +@@ -0,0 +1,130 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_hints.c,v 1.2 2005/07/07 14:38:17 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code) ++{ ++ char *value, *value_in_fd; ++ int flag, tmp_val, str_factor=-1, str_unit=0, start_iodev=-1; ++ struct lov_user_md lum = { 0 }; ++ int err, myrank, fd_sys, perm, amode, old_mask; ++ ++ if ( (fd->info) == MPI_INFO_NULL) { ++ /* This must be part of the open call. can set striping parameters ++ if necessary. */ ++ MPI_Info_create(&(fd->info)); ++ ++ /* has user specified striping or server buffering parameters ++ and do they have the same value on all processes? */ ++ if (users_info != MPI_INFO_NULL) { ++ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); ++ ++ MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL, ++ value, &flag); ++ if (flag) { ++ str_factor=atoi(value); ++ tmp_val = str_factor; ++ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); ++ if (tmp_val != str_factor) { ++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: the value for key \"striping_factor\" must be the same on all processes\n"); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL, ++ value, &flag); ++ if (flag) { ++ str_unit=atoi(value); ++ tmp_val = str_unit; ++ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); ++ if (tmp_val != str_unit) { ++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: the value for key \"striping_unit\" must be the same on all processes\n"); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL, ++ value, &flag); ++ if (flag) { ++ start_iodev=atoi(value); ++ tmp_val = start_iodev; ++ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm); ++ if (tmp_val != start_iodev) { ++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: the value for key \"start_iodevice\" must be the same on all processes\n"); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ /* if user has specified striping info, process 0 tries to set it */ ++ if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) { ++ MPI_Comm_rank(fd->comm, &myrank); ++ if (!myrank) { ++ if (fd->perm == ADIO_PERM_NULL) { ++ old_mask = umask(022); ++ umask(old_mask); ++ perm = old_mask ^ 0666; ++ } ++ else perm = fd->perm; ++ ++ amode = 0; ++ if (fd->access_mode & ADIO_CREATE) ++ amode = amode | O_CREAT; ++ if (fd->access_mode & ADIO_RDWR || ++ (fd->access_mode & ADIO_RDONLY && ++ fd->access_mode & ADIO_WRONLY)) ++ amode = amode | O_RDWR; ++ else if (fd->access_mode & ADIO_WRONLY) ++ amode = amode | O_WRONLY; ++ else if (fd->access_mode & ADIO_RDONLY) ++ amode = amode | O_RDONLY; ++ if (fd->access_mode & ADIO_EXCL) ++ amode = amode | O_EXCL; ++ ++ /* we need to create file so ensure this is set */ ++ amode = amode | O_LOV_DELAY_CREATE | O_CREAT; ++ ++ fd_sys = open(fd->filename, amode, perm); ++ if (fd_sys == -1) { ++ if (errno != EEXIST) ++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: Failure to open file %s %d %d\n",strerror(errno), amode, perm); ++ } else { ++ lum.lmm_magic = LOV_USER_MAGIC; ++ lum.lmm_pattern = 0; ++ lum.lmm_stripe_size = str_unit; ++ lum.lmm_stripe_count = str_factor; ++ lum.lmm_stripe_offset = start_iodev; ++ ++ err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum); ++ if (err == -1 && errno != EEXIST) { ++ FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: Failure to set stripe info %s \n",strerror(errno)); ++ } ++ ++ close(fd_sys); ++ } ++ ++ } ++ MPI_Barrier(fd->comm); ++ } ++ ++ ADIOI_Free(value); ++ } ++ ++ /* set the values for collective I/O and data sieving parameters */ ++ ADIOI_GEN_SetInfo(fd, users_info, error_code); ++ } ++ ++ else { ++ /* The file has been opened previously and fd->fd_sys is a valid ++ file descriptor. cannot set striping parameters now. */ ++ ++ /* set the values for collective I/O and data sieving parameters */ ++ ADIOI_GEN_SetInfo(fd, users_info, error_code); ++ ++ } ++ ++ *error_code = MPI_SUCCESS; ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iread.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iread.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iread.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iread.c 2005-12-06 11:54:37.904128127 -0500 +@@ -0,0 +1,106 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_iread.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_IreadContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int *error_code) ++{ ++ int len, typesize; ++#ifdef NO_AIO ++ ADIO_Status status; ++#else ++ int err=-1; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_IREADCONTIG"; ++#endif ++#endif ++ ++ (*request) = ADIOI_Malloc_request(); ++ (*request)->optype = ADIOI_READ; ++ (*request)->fd = fd; ++ (*request)->datatype = datatype; ++ ++ MPI_Type_size(datatype, &typesize); ++ len = count * typesize; ++ ++#ifdef NO_AIO ++ /* HP, FreeBSD, Linux */ ++ /* no support for nonblocking I/O. Use blocking I/O. */ ++ ++ ADIOI_LUSTRE_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset, ++ &status, error_code); ++ (*request)->queued = 0; ++#ifdef HAVE_STATUS_SET_BYTES ++ if (*error_code == MPI_SUCCESS) { ++ MPI_Get_elements(&status, MPI_BYTE, &len); ++ (*request)->nbytes = len; ++ } ++#endif ++ ++#else ++ if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; ++ err = ADIOI_LUSTRE_aio(fd, buf, len, offset, 0, &((*request)->handle)); ++ if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; ++ ++ (*request)->queued = 1; ++ ADIOI_Add_req_to_list(request); ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++#endif /* NO_AIO */ ++ ++ fd->fp_sys_posn = -1; /* set it to null. */ ++ fd->async_count++; ++} ++ ++ ++ ++void ADIOI_LUSTRE_IreadStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code) ++{ ++ ADIO_Status status; ++#ifdef HAVE_STATUS_SET_BYTES ++ int typesize; ++#endif ++ ++ *request = ADIOI_Malloc_request(); ++ (*request)->optype = ADIOI_READ; ++ (*request)->fd = fd; ++ (*request)->datatype = datatype; ++ (*request)->queued = 0; ++ (*request)->handle = 0; ++ ++/* call the blocking version. It is faster because it does data sieving. */ ++ ADIOI_LUSTRE_ReadStrided(fd, buf, count, datatype, file_ptr_type, ++ offset, &status, error_code); ++ ++ fd->async_count++; ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if (*error_code == MPI_SUCCESS) { ++ MPI_Type_size(datatype, &typesize); ++ (*request)->nbytes = count * typesize; ++ } ++#endif ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iwrite.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iwrite.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iwrite.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_iwrite.c 2005-12-06 11:54:37.906127861 -0500 +@@ -0,0 +1,268 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_iwrite.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_IwriteContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int *error_code) ++{ ++ int len, typesize; ++#ifdef NO_AIO ++ ADIO_Status status; ++#else ++ int err=-1; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_IWRITECONTIG"; ++#endif ++#endif ++ ++ *request = ADIOI_Malloc_request(); ++ (*request)->optype = ADIOI_WRITE; ++ (*request)->fd = fd; ++ (*request)->datatype = datatype; ++ ++ MPI_Type_size(datatype, &typesize); ++ len = count * typesize; ++ ++#ifdef NO_AIO ++ /* HP, FreeBSD, Linux */ ++ /* no support for nonblocking I/O. Use blocking I/O. */ ++ ++ ADIOI_LUSTRE_WriteContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset, ++ &status, error_code); ++ (*request)->queued = 0; ++#ifdef HAVE_STATUS_SET_BYTES ++ if (*error_code == MPI_SUCCESS) { ++ MPI_Get_elements(&status, MPI_BYTE, &len); ++ (*request)->nbytes = len; ++ } ++#endif ++ ++#else ++ if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; ++ err = ADIOI_LUSTRE_aio(fd, buf, len, offset, 1, &((*request)->handle)); ++ if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; ++ ++ (*request)->queued = 1; ++ ADIOI_Add_req_to_list(request); ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++#endif /* NO_AIO */ ++ ++ fd->fp_sys_posn = -1; /* set it to null. */ ++ fd->async_count++; ++} ++ ++ ++ ++ ++void ADIOI_LUSTRE_IwriteStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code) ++{ ++ ADIO_Status status; ++#ifdef HAVE_STATUS_SET_BYTES ++ int typesize; ++#endif ++ ++ *request = ADIOI_Malloc_request(); ++ (*request)->optype = ADIOI_WRITE; ++ (*request)->fd = fd; ++ (*request)->datatype = datatype; ++ (*request)->queued = 0; ++ (*request)->handle = 0; ++ ++/* call the blocking version. It is faster because it does data sieving. */ ++ ADIOI_LUSTRE_WriteStrided(fd, buf, count, datatype, file_ptr_type, ++ offset, &status, error_code); ++ ++ fd->async_count++; ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if (*error_code == MPI_SUCCESS) { ++ MPI_Type_size(datatype, &typesize); ++ (*request)->nbytes = count * typesize; ++ } ++#endif ++} ++ ++ ++/* This function is for implementation convenience. It is not user-visible. ++ It takes care of the differences in the interface for nonblocking I/O ++ on various Unix machines! If wr==1 write, wr==0 read. */ ++ ++int ADIOI_LUSTRE_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset, ++ int wr, void *handle) ++{ ++ int err=-1, fd_sys; ++ ++#ifndef NO_AIO ++ int error_code; ++#ifdef AIO_SUN ++ aio_result_t *result; ++#else ++ struct aiocb *aiocbp; ++#endif ++#endif ++ ++ fd_sys = fd->fd_sys; ++ ++#ifdef AIO_SUN ++ result = (aio_result_t *) ADIOI_Malloc(sizeof(aio_result_t)); ++ result->aio_return = AIO_INPROGRESS; ++ if (wr) err = aiowrite(fd_sys, buf, len, offset, SEEK_SET, result); ++ else err = aioread(fd_sys, buf, len, offset, SEEK_SET, result); ++ ++ if (err == -1) { ++ if (errno == EAGAIN) { ++ /* the man pages say EPROCLIM, but in reality errno is set to EAGAIN! */ ++ ++ /* exceeded the max. no. of outstanding requests. ++ complete all previous async. requests and try again.*/ ++ ++ ADIOI_Complete_async(&error_code); ++ if (wr) err = aiowrite(fd_sys, buf, len, offset, SEEK_SET, result); ++ else err = aioread(fd_sys, buf, len, offset, SEEK_SET, result); ++ ++ while (err == -1) { ++ if (errno == EAGAIN) { ++ /* sleep and try again */ ++ sleep(1); ++ if (wr) err = aiowrite(fd_sys, buf, len, offset, SEEK_SET, result); ++ else err = aioread(fd_sys, buf, len, offset, SEEK_SET, result); ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ *((aio_result_t **) handle) = result; ++#endif ++ ++#ifdef NO_FD_IN_AIOCB ++/* IBM */ ++ aiocbp = (struct aiocb *) ADIOI_Malloc(sizeof(struct aiocb)); ++ aiocbp->aio_whence = SEEK_SET; ++ aiocbp->aio_offset = offset; ++ aiocbp->aio_buf = buf; ++ aiocbp->aio_nbytes = len; ++ if (wr) err = aio_write(fd_sys, aiocbp); ++ else err = aio_read(fd_sys, aiocbp); ++ ++ if (err == -1) { ++ if (errno == EAGAIN) { ++ /* exceeded the max. no. of outstanding requests. ++ complete all previous async. requests and try again. */ ++ ++ ADIOI_Complete_async(&error_code); ++ if (wr) err = aio_write(fd_sys, aiocbp); ++ else err = aio_read(fd_sys, aiocbp); ++ ++ while (err == -1) { ++ if (errno == EAGAIN) { ++ /* sleep and try again */ ++ sleep(1); ++ if (wr) err = aio_write(fd_sys, aiocbp); ++ else err = aio_read(fd_sys, aiocbp); ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ *((struct aiocb **) handle) = aiocbp; ++ ++#elif (!defined(NO_AIO) && !defined(AIO_SUN)) ++/* DEC, SGI IRIX 5 and 6 */ ++ ++ aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1); ++ aiocbp->aio_fildes = fd_sys; ++ aiocbp->aio_offset = offset; ++ aiocbp->aio_buf = buf; ++ aiocbp->aio_nbytes = len; ++ ++#ifdef AIO_PRIORITY_DEFAULT ++/* DEC */ ++ aiocbp->aio_reqprio = AIO_PRIO_DFL; /* not needed in DEC Unix 4.0 */ ++ aiocbp->aio_sigevent.sigev_signo = 0; ++#else ++ aiocbp->aio_reqprio = 0; ++#endif ++ ++#ifdef AIO_SIGNOTIFY_NONE ++/* SGI IRIX 6 */ ++ aiocbp->aio_sigevent.sigev_notify = SIGEV_NONE; ++#else ++ aiocbp->aio_sigevent.sigev_signo = 0; ++#endif ++ ++ if (wr) err = aio_write(aiocbp); ++ else err = aio_read(aiocbp); ++ ++ if (err == -1) { ++ if (errno == EAGAIN) { ++ /* exceeded the max. no. of outstanding requests. ++ complete all previous async. requests and try again. */ ++ ++ ADIOI_Complete_async(&error_code); ++ if (wr) err = aio_write(aiocbp); ++ else err = aio_read(aiocbp); ++ ++ while (err == -1) { ++ if (errno == EAGAIN) { ++ /* sleep and try again */ ++ sleep(1); ++ if (wr) err = aio_write(aiocbp); ++ else err = aio_read(aiocbp); ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ } ++ else { ++ FPRINTF(stderr, "Unknown errno %d in ADIOI_LUSTRE_aio\n", errno); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++ } ++ } ++ ++ *((struct aiocb **) handle) = aiocbp; ++#endif ++ ++ return err; ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_open.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_open.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_open.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_open.c 2005-12-06 11:54:37.906127861 -0500 +@@ -0,0 +1,100 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_open.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code) ++{ ++ int perm, old_mask, amode; ++ struct lov_user_md lum = { 0 }; ++ char *value; ++ ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_OPEN"; ++#endif ++ ++ if (fd->perm == ADIO_PERM_NULL) { ++ old_mask = umask(022); ++ umask(old_mask); ++ perm = old_mask ^ 0666; ++ } ++ else perm = fd->perm; ++ ++ amode = 0; ++ if (fd->access_mode & ADIO_CREATE) ++ amode = amode | O_CREAT; ++ if (fd->access_mode & ADIO_RDONLY) ++ amode = amode | O_RDONLY; ++ if (fd->access_mode & ADIO_WRONLY) ++ amode = amode | O_WRONLY; ++ if (fd->access_mode & ADIO_RDWR) ++ amode = amode | O_RDWR; ++ if (fd->access_mode & ADIO_EXCL) ++ amode = amode | O_EXCL; ++ ++ fd->fd_sys = open(fd->filename, amode, perm); ++ ++ if (fd->fd_sys != -1) { ++ int err; ++ ++ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); ++ ++ /* get file striping information and set it in info */ ++ lum.lmm_magic = LOV_USER_MAGIC; ++ err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *) &lum); ++ ++ if (!err) { ++ sprintf(value, "%d", lum.lmm_stripe_size); ++ MPI_Info_set(fd->info, "striping_unit", value); ++ ++ sprintf(value, "%d", lum.lmm_stripe_count); ++ MPI_Info_set(fd->info, "striping_factor", value); ++ ++ sprintf(value, "%d", lum.lmm_stripe_offset); ++ MPI_Info_set(fd->info, "start_iodevice", value); ++ } ++ ADIOI_Free(value); ++ ++ if (fd->access_mode & ADIO_APPEND) ++ fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); ++ } ++ ++ ++ if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) ++ fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END); ++ ++ if (fd->fd_sys == -1) { ++#ifdef MPICH2 ++ if (errno == ENAMETOOLONG) ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_BAD_FILE, "**filenamelong", "**filenamelong %s %d", fd->filename, strlen(fd->filename) ); ++ else if (errno == ENOENT) ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_NO_SUCH_FILE, "**filenoexist", "**filenoexist %s", fd->filename ); ++ else if (errno == ENOTDIR || errno == ELOOP) ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_BAD_FILE, "**filenamedir", "**filenamedir %s", fd->filename ); ++ else if (errno == EACCES) { ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ACCESS, "**fileaccess", "**fileaccess %s", ++ fd->filename ); ++ } ++ else if (errno == EROFS) { ++ /* Read only file or file system and write access requested */ ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_READ_ONLY, "**ioneedrd", 0 ); ++ } ++ else { ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ } ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(ADIO_FILE_NULL, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_rdcoll.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_rdcoll.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_rdcoll.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_rdcoll.c 2005-12-06 11:54:37.907127727 -0500 +@@ -0,0 +1,18 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_rdcoll.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code) ++{ ++ ADIOI_GEN_ReadStridedColl(fd, buf, count, datatype, file_ptr_type, ++ offset, status, error_code); ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_read.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_read.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_read.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_read.c 2005-12-06 11:54:37.907127727 -0500 +@@ -0,0 +1,67 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_read.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int *error_code) ++{ ++ int err=-1, datatype_size, len; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_READCONTIG"; ++#endif ++ ++ MPI_Type_size(datatype, &datatype_size); ++ len = datatype_size * count; ++ ++ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { ++ if (fd->fp_sys_posn != offset) ++ lseek(fd->fd_sys, offset, SEEK_SET); ++ err = read(fd->fd_sys, buf, len); ++ fd->fp_sys_posn = offset + len; ++ /* individual file pointer not updated */ ++ } ++ else { /* read from curr. location of ind. file pointer */ ++ if (fd->fp_sys_posn != fd->fp_ind) ++ lseek(fd->fd_sys, fd->fp_ind, SEEK_SET); ++ err = read(fd->fd_sys, buf, len); ++ fd->fp_ind += err; ++ fd->fp_sys_posn = fd->fp_ind; ++ } ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if (err != -1) MPIR_Status_set_bytes(status, datatype, err); ++#endif ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++} ++ ++ ++ ++ ++void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code) ++{ ++ ADIOI_GEN_ReadStrided(fd, buf, count, datatype, file_ptr_type, ++ offset, status, error_code); ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_resize.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_resize.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_resize.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_resize.c 2005-12-06 11:54:37.909127460 -0500 +@@ -0,0 +1,32 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_resize.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) ++{ ++ int err; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_RESIZE"; ++#endif ++ ++ err = ftruncate(fd->fd_sys, size); ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_seek.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_seek.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_seek.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_seek.c 2005-12-06 11:54:37.911127194 -0500 +@@ -0,0 +1,15 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_seek.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++ADIO_Offset ADIOI_LUSTRE_SeekIndividual(ADIO_File fd, ADIO_Offset offset, ++ int whence, int *error_code) ++{ ++ return ADIOI_GEN_SeekIndividual(fd, offset, whence, error_code); ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wait.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wait.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wait.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wait.c 2005-12-06 11:54:37.914126794 -0500 +@@ -0,0 +1,188 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_wait.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_ReadComplete(ADIO_Request *request, ADIO_Status *status, int *error_code) ++{ ++#ifndef NO_AIO ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_READCOMPLETE"; ++#endif ++#ifdef AIO_SUN ++ aio_result_t *result=0, *tmp; ++#else ++ int err; ++#endif ++#ifdef AIO_HANDLE_IN_AIOCB ++ struct aiocb *tmp1; ++#endif ++#endif ++ ++ if (*request == ADIO_REQUEST_NULL) { ++ *error_code = MPI_SUCCESS; ++ return; ++ } ++ ++#ifdef AIO_SUN ++ if ((*request)->queued) { /* dequeue it */ ++ tmp = (aio_result_t *) (*request)->handle; ++ while (tmp->aio_return == AIO_INPROGRESS) usleep(1000); ++ /* sleep for 1 ms., until done. Is 1 ms. a good number? */ ++ /* when done, dequeue any one request */ ++ result = (aio_result_t *) aiowait(0); ++ ++ (*request)->nbytes = tmp->aio_return; ++ ++ if (tmp->aio_return == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(tmp->aio_errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(tmp->aio_errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ ++/* aiowait only dequeues a request. The completion of a request can be ++ checked by just checking the aio_return flag in the handle passed ++ to the original aioread()/aiowrite(). Therefore, I need to ensure ++ that aiowait() is called exactly once for each previous ++ aioread()/aiowrite(). This is also taken care of in ADIOI_xxxDone */ ++ } ++ else *error_code = MPI_SUCCESS; ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if ((*request)->nbytes != -1) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ ++#endif ++ ++#ifdef AIO_HANDLE_IN_AIOCB ++/* IBM */ ++ if ((*request)->queued) { ++ do { ++ err = aio_suspend(1, (struct aiocb **) &((*request)->handle)); ++ } while ((err == -1) && (errno == EINTR)); ++ ++ tmp1 = (struct aiocb *) (*request)->handle; ++ if (err != -1) { ++ err = aio_return(tmp1->aio_handle); ++ (*request)->nbytes = err; ++ errno = aio_error(tmp1->aio_handle); ++ } ++ else (*request)->nbytes = -1; ++ ++/* on DEC, it is required to call aio_return to dequeue the request. ++ IBM man pages don't indicate what function to use for dequeue. ++ I'm assuming it is aio_return! POSIX says aio_return may be called ++ only once on a given handle. */ ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ } /* if ((*request)->queued) */ ++ else *error_code = MPI_SUCCESS; ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if ((*request)->nbytes != -1) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ ++#elif (!defined(NO_AIO) && !defined(AIO_SUN)) ++/* DEC, SGI IRIX 5 and 6 */ ++ if ((*request)->queued) { ++ do { ++ err = aio_suspend((const aiocb_t **) &((*request)->handle), 1, 0); ++ } while ((err == -1) && (errno == EINTR)); ++ ++ if (err != -1) { ++ err = aio_return((struct aiocb *) (*request)->handle); ++ (*request)->nbytes = err; ++ errno = aio_error((struct aiocb *) (*request)->handle); ++ } ++ else (*request)->nbytes = -1; ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++ return; ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else /* MPICH-1 */ ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error((*request)->fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++ } /* if ((*request)->queued) */ ++ else *error_code = MPI_SUCCESS; ++#ifdef HAVE_STATUS_SET_BYTES ++ if ((*request)->nbytes != -1) ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++#endif ++ ++#ifndef NO_AIO ++ if ((*request)->queued != -1) { ++ ++ /* queued = -1 is an internal hack used when the request must ++ be completed, but the request object should not be ++ freed. This is used in ADIOI_Complete_async, because the user ++ will call MPI_Wait later, which would require status to ++ be filled. Ugly but works. queued = -1 should be used only ++ in ADIOI_Complete_async. ++ This should not affect the user in any way. */ ++ ++ /* if request is still queued in the system, it is also there ++ on ADIOI_Async_list. Delete it from there. */ ++ if ((*request)->queued) ADIOI_Del_req_from_list(request); ++ ++ (*request)->fd->async_count--; ++ if ((*request)->handle) ADIOI_Free((*request)->handle); ++ ADIOI_Free_request((ADIOI_Req_node *) (*request)); ++ *request = ADIO_REQUEST_NULL; ++ } ++ ++#else ++/* HP, FreeBSD, Linux */ ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes); ++#endif ++ (*request)->fd->async_count--; ++ ADIOI_Free_request((ADIOI_Req_node *) (*request)); ++ *request = ADIO_REQUEST_NULL; ++ *error_code = MPI_SUCCESS; ++#endif ++} ++ ++ ++void ADIOI_LUSTRE_WriteComplete(ADIO_Request *request, ADIO_Status *status, int *error_code) ++{ ++ ADIOI_LUSTRE_ReadComplete(request, status, error_code); ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wrcoll.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wrcoll.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wrcoll.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_wrcoll.c 2005-12-06 11:54:37.914126794 -0500 +@@ -0,0 +1,18 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_wrcoll.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code) ++{ ++ ADIOI_GEN_WriteStridedColl(fd, buf, count, datatype, file_ptr_type, ++ offset, status, error_code); ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_write.c mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_write.c +--- mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_write.c 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/ad_lustre_write.c 2005-12-06 11:54:37.914126794 -0500 +@@ -0,0 +1,66 @@ ++/* -*- Mode: C; c-basic-offset:4 ; -*- */ ++/* ++ * $Id: ad_lustre_write.c,v 1.1.1.1 2004/11/04 11:03:38 liam Exp $ ++ * ++ * Copyright (C) 1997 University of Chicago. ++ * See COPYRIGHT notice in top-level directory. ++ */ ++ ++#include "ad_lustre.h" ++ ++void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int *error_code) ++{ ++ int err=-1, datatype_size, len; ++#if defined(MPICH2) || !defined(PRINT_ERR_MSG) ++ static char myname[] = "ADIOI_LUSTRE_WRITECONTIG"; ++#endif ++ ++ MPI_Type_size(datatype, &datatype_size); ++ len = datatype_size * count; ++ ++ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { ++ if (fd->fp_sys_posn != offset) ++ lseek(fd->fd_sys, offset, SEEK_SET); ++ err = write(fd->fd_sys, buf, len); ++ fd->fp_sys_posn = offset + err; ++ /* individual file pointer not updated */ ++ } ++ else { /* write from curr. location of ind. file pointer */ ++ if (fd->fp_sys_posn != fd->fp_ind) ++ lseek(fd->fd_sys, fd->fp_ind, SEEK_SET); ++ err = write(fd->fd_sys, buf, len); ++ fd->fp_ind += err; ++ fd->fp_sys_posn = fd->fp_ind; ++ } ++ ++#ifdef HAVE_STATUS_SET_BYTES ++ if (err != -1 && status) MPIR_Status_set_bytes(status, datatype, err); ++#endif ++ ++ if (err == -1) { ++#ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", ++ "**io %s", strerror(errno)); ++#elif defined(PRINT_ERR_MSG) ++ *error_code = MPI_ERR_UNKNOWN; ++#else ++ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, ++ myname, "I/O Error", "%s", strerror(errno)); ++ ADIOI_Error(fd, *error_code, myname); ++#endif ++ } ++ else *error_code = MPI_SUCCESS; ++} ++ ++ ++ ++void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code) ++{ ++ ADIOI_GEN_WriteStrided(fd, buf, count, datatype, file_ptr_type, ++ offset, status, error_code); ++} +diff -r -u --new-file mpich-1.2.6/romio/adio/ad_lustre/Makefile.in mpich-1.2.6/romio/adio/ad_lustre/Makefile.in +--- mpich-1.2.6/romio/adio/ad_lustre/Makefile.in 1969-12-31 19:00:00.000000000 -0500 ++++ mpich-1.2.6/romio/adio/ad_lustre/Makefile.in 2005-12-06 11:54:37.883130927 -0500 +@@ -0,0 +1,47 @@ ++CC = @CC@ ++AR = @AR@ ++LIBNAME = @LIBNAME@ ++srcdir = @srcdir@ ++CC_SHL = @CC_SHL@ ++SHLIBNAME = @SHLIBNAME@ ++ ++INCLUDE_DIR = -I@MPI_INCLUDE_DIR@ -I${srcdir}/../include -I../include ++CFLAGS = @CFLAGS@ $(INCLUDE_DIR) ++ ++C_COMPILE_SHL = $(CC_SHL) @CFLAGS@ $(INCLUDE_DIR) ++ ++@VPATH@ ++ ++AD_LUSTRE_OBJECTS = ad_lustre_close.o ad_lustre_read.o \ ++ ad_lustre_open.o ad_lustre_write.o ad_lustre_done.o \ ++ ad_lustre_fcntl.o ad_lustre_iread.o ad_lustre_iwrite.o ad_lustre_wait.o \ ++ ad_lustre_resize.o ad_lustre_hints.o \ ++ ad_lustre.o ++ ++ ++default: $(LIBNAME) ++ @if [ "@ENABLE_SHLIB@" != "none" ] ; then \ ++ $(MAKE) $(SHLIBNAME).la ;\ ++ fi ++ ++.SUFFIXES: $(SUFFIXES) .p .lo ++ ++.c.o: ++ $(CC) $(CFLAGS) -c $< ++.c.lo: ++ $(C_COMPILE_SHL) -c $< ++ @mv -f $*.o $*.lo ++ ++$(LIBNAME): $(AD_LUSTRE_OBJECTS) ++ $(AR) $(LIBNAME) $(AD_LUSTRE_OBJECTS) ++ ++AD_LUSTRE_LOOBJECTS=$(AD_LUSTRE_OBJECTS:.o=.lo) ++$(SHLIBNAME).la: $(AD_LUSTRE_LOOBJECTS) ++ $(AR) $(SHLIBNAME).la $(AD_LUSTRE_LOOBJECTS) ++ ++coverage: ++ -@for file in ${AD_LUSTRE_OBJECTS:.o=.c} ; do \ ++ gcov -b -f $$file ; done ++ ++clean: ++ @rm -f *.o *.lo +--- mpich-1.2.6/romio/Makefile.in 2004-01-27 18:27:35.000000000 -0500 ++++ mpich-1.2.6/romio/Makefile.in 2005-12-06 11:54:38.000000000 -0500 +@@ -14,7 +14,7 @@ DIRS = mpi-io adio/common + MPIO_DIRS = mpi-io + EXTRA_SRC_DIRS = @EXTRA_SRC_DIRS@ + FILE_SYS_DIRS = @FILE_SYS_DIRS@ +-ALL_DIRS = mpi-io mpi-io/fortran mpi2-other/info mpi2-other/info/fortran mpi2-other/array mpi2-other/array/fortran adio/common adio/ad_pfs adio/ad_piofs adio/ad_nfs adio/ad_ufs adio/ad_xfs adio/ad_hfs adio/ad_sfs adio/ad_testfs adio/ad_pvfs adio/ad_pvfs2 test ++ALL_DIRS = mpi-io mpi-io/fortran mpi2-other/info mpi2-other/info/fortran mpi2-other/array mpi2-other/array/fortran adio/common adio/ad_pfs adio/ad_piofs adio/ad_nfs adio/ad_ufs adio/ad_xfs adio/ad_hfs adio/ad_sfs adio/ad_testfs adio/ad_pvfs adio/ad_pvfs2 adio/ad_lustre test + SHELL = /bin/sh + + @VPATH@ +--- mpich-1.2.6/romio/configure.in 2004-08-02 09:37:31.000000000 -0400 ++++ mpich-1.2.6/romio/configure.in 2005-12-06 11:54:38.000000000 -0500 +@@ -90,7 +90,7 @@ MPIO_REQ_REAL_POBJECTS="_iotest.o _iowai + # + have_aio=no + # +-known_filesystems="nfs ufs pfs piofs pvfs pvfs2 testfs xfs hfs sfs" ++known_filesystems="nfs ufs pfs piofs pvfs pvfs2 testfs xfs hfs sfs lustre" + known_mpi_impls="mpich_mpi sgi_mpi hp_mpi cray_mpi lam_mpi" + # + # Defaults +@@ -1270,6 +1270,9 @@ fi + if test -n "$file_system_testfs"; then + AC_DEFINE(ROMIO_TESTFS,1,[Define for TESTFS]) + fi ++if test -n "$file_system_lustre"; then ++ AC_DEFINE(ROMIO_LUSTRE,1,[Define for LUSTRE]) ++fi + if test -n "$file_system_piofs"; then + AC_DEFINE(PIOFS,1,[Define for PIOFS]) + USER_CFLAGS="$USER_CFLAGS -bI:/usr/include/piofs/piofs.exp" +@@ -1634,7 +1637,7 @@ AC_OUTPUT(Makefile localdefs mpi-io/Make + adio/ad_nfs/Makefile adio/ad_ufs/Makefile \ + adio/ad_xfs/Makefile adio/ad_hfs/Makefile \ + adio/ad_sfs/Makefile adio/ad_pfs/Makefile \ +- adio/ad_testfs/Makefile adio/ad_pvfs/Makefile \ ++ adio/ad_testfs/Makefile adio/ad_lustre/Makefile adio/ad_pvfs/Makefile \ + adio/ad_pvfs2/Makefile adio/ad_piofs/Makefile \ + mpi-io/fortran/Makefile mpi2-other/info/fortran/Makefile \ + mpi2-other/array/fortran/Makefile test/fmisc.f \ +--- mpich-1.2.6/romio/configure 2004-08-04 12:08:28.000000000 -0400 ++++ mpich-1.2.6/romio/configure 2005-12-06 11:54:38.000000000 -0500 +@@ -623,7 +623,7 @@ MPIO_REQ_REAL_POBJECTS="_iotest.o _iowai + # + have_aio=no + # +-known_filesystems="nfs ufs pfs piofs pvfs pvfs2 testfs xfs hfs sfs" ++known_filesystems="nfs ufs pfs piofs pvfs pvfs2 testfs lustre xfs hfs sfs" + known_mpi_impls="mpich_mpi sgi_mpi hp_mpi cray_mpi lam_mpi" + # + # Defaults +@@ -4022,6 +4022,13 @@ if test -n "$file_system_testfs"; then + EOF + + fi ++if test -n "$file_system_lustre"; then ++ cat >> confdefs.h <<\EOF ++#define LUSTRE 1 ++EOF ++ ++fi ++ + if test -n "$file_system_piofs"; then + cat >> confdefs.h <<\EOF + #define PIOFS 1 +@@ -4746,7 +4753,7 @@ trap 'rm -fr `echo "Makefile localdefs m + adio/ad_xfs/Makefile adio/ad_hfs/Makefile \ + adio/ad_sfs/Makefile adio/ad_pfs/Makefile \ + adio/ad_testfs/Makefile adio/ad_pvfs/Makefile \ +- adio/ad_pvfs2/Makefile adio/ad_piofs/Makefile \ ++ adio/ad_pvfs2/Makefile adio/ad_piofs/Makefile adio/ad_lustre/Makefile\ + mpi-io/fortran/Makefile mpi2-other/info/fortran/Makefile \ + mpi2-other/array/fortran/Makefile test/fmisc.f \ + test/fcoll_test.f test/pfcoll_test.f test/fperf.f adio/include/romioconf.h" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 +@@ -4912,7 +4919,7 @@ CONFIG_FILES=\${CONFIG_FILES-"Makefile l + adio/ad_nfs/Makefile adio/ad_ufs/Makefile \ + adio/ad_xfs/Makefile adio/ad_hfs/Makefile \ + adio/ad_sfs/Makefile adio/ad_pfs/Makefile \ +- adio/ad_testfs/Makefile adio/ad_pvfs/Makefile \ ++ adio/ad_testfs/Makefile adio/ad_lustre/Makefile adio/ad_pvfs/Makefile \ + adio/ad_pvfs2/Makefile adio/ad_piofs/Makefile \ + mpi-io/fortran/Makefile mpi2-other/info/fortran/Makefile \ + mpi2-other/array/fortran/Makefile test/fmisc.f \ +--- mpich-1.2.6/romio/adio/include/romioconf.h.in 2004-08-04 12:08:28.000000000 -0400 ++++ mpich-1.2.6/romio/adio/include/romioconf.h.in 2005-12-06 11:54:38.000000000 -0500 +@@ -192,6 +192,9 @@ + /* Define for TESTFS */ + #undef ROMIO_TESTFS + ++/* Define for LUSTRE */ ++#undef LUSTRE ++ + /* Define for PIOFS */ + #undef PIOFS + +--- mpich-1.2.6/romio/adio/include/mpio_error.h 2002-11-15 11:26:23.000000000 -0500 ++++ mpich-1.2.6/romio/adio/include/mpio_error.h 2005-12-06 11:54:38.000000000 -0500 +@@ -62,6 +62,7 @@ + #define MPIR_ERR_FILETYPE 33 + #define MPIR_ERR_NO_NTFS 35 + #define MPIR_ERR_NO_TESTFS 36 ++#define MPIR_ERR_NO_LUSTRE 37 + + /* MPI_ERR_COMM */ + #ifndef MPIR_ERR_COMM_NULL +--- mpich-1.2.6/romio/adio/include/adioi_fs_proto.h 2003-06-24 18:48:23.000000000 -0400 ++++ mpich-1.2.6/romio/adio/include/adioi_fs_proto.h 2005-12-06 11:54:38.000000000 -0500 +@@ -261,6 +261,68 @@ ADIO_Offset ADIOI_UFS_SeekIndividual(ADI + void ADIOI_UFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); + #endif + ++#ifdef LUSTRE ++extern struct ADIOI_Fns_struct ADIO_LUSTRE_operations; ++ ++void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code); ++void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code); ++void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_IwriteContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code); ++void ADIOI_LUSTRE_IreadContig(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code); ++int ADIOI_LUSTRE_ReadDone(ADIO_Request *request, ADIO_Status *status, int ++ *error_code); ++int ADIOI_LUSTRE_WriteDone(ADIO_Request *request, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_ReadComplete(ADIO_Request *request, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_WriteComplete(ADIO_Request *request, ADIO_Status *status, ++ int *error_code); ++void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int ++ *error_code); ++void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Status *status, int ++ *error_code); ++void ADIOI_LUSTRE_IreadStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code); ++void ADIOI_LUSTRE_IwriteStrided(ADIO_File fd, void *buf, int count, ++ MPI_Datatype datatype, int file_ptr_type, ++ ADIO_Offset offset, ADIO_Request *request, int ++ *error_code); ++void ADIOI_LUSTRE_Flush(ADIO_File fd, int *error_code); ++void ADIOI_LUSTRE_Resize(ADIO_File fd, ADIO_Offset size, int *error_code); ++ADIO_Offset ADIOI_LUSTRE_SeekIndividual(ADIO_File fd, ADIO_Offset offset, ++ int whence, int *error_code); ++void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code); ++#endif ++ + #ifdef ROMIO_NTFS + extern struct ADIOI_Fns_struct ADIO_NTFS_operations; + +--- mpich-1.2.6/romio/adio/include/adio.h 2004-06-07 13:59:57.000000000 -0400 ++++ mpich-1.2.6/romio/adio/include/adio.h 2005-12-06 11:54:38.000000000 -0500 +@@ -276,6 +276,7 @@ typedef struct { + #define ADIO_NTFS 158 /* NTFS for Windows NT */ + #define ADIO_TESTFS 159 /* fake file system for testing */ + #define ADIO_PVFS2 160 /* PVFS2: 2nd generation PVFS */ ++#define ADIO_LUSTRE 161 /* Lustre */ + + #define ADIO_SEEK_SET SEEK_SET + #define ADIO_SEEK_CUR SEEK_CUR +--- mpich-1.2.6/romio/adio/common/setfn.c 2003-06-24 18:48:18.000000000 -0400 ++++ mpich-1.2.6/romio/adio/common/setfn.c 2005-12-06 11:54:38.000000000 -0500 +@@ -114,6 +114,16 @@ void ADIOI_SetFunctions(ADIO_File fd) + #endif + break; + ++ case ADIO_LUSTRE: ++#ifdef LUSTRE ++ *(fd->fns) = ADIO_LUSTRE_operations; ++#else ++ FPRINTF(stderr, "ADIOI_SetFunctions: ROMIO has not been configured to use the LUSTRE file system\n"); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++#endif ++ break; ++ ++ + default: + FPRINTF(stderr, "ADIOI_SetFunctions: Unsupported file system type\n"); + MPI_Abort(MPI_COMM_WORLD, 1); +--- mpich-1.2.6/romio/adio/common/ad_fstype.c 2003-09-04 16:24:44.000000000 -0400 ++++ mpich-1.2.6/romio/adio/common/ad_fstype.c 2005-12-06 11:54:38.000000000 -0500 +@@ -204,6 +204,11 @@ static void ADIO_FileSysType_fncall(char + } + } + #elif defined(LINUX) ++#warning use correct include ++# if defined (LUSTRE) ++#define LL_SUPER_MAGIC 0x0BD00BD0 ++# endif ++ + do { + err = statfs(filename, &fsbuf); + } while (err && (errno == ESTALE)); +@@ -218,6 +223,9 @@ static void ADIO_FileSysType_fncall(char + else { + /* FPRINTF(stderr, "%d\n", fsbuf.f_type);*/ + if (fsbuf.f_type == NFS_SUPER_MAGIC) *fstype = ADIO_NFS; ++# if defined (LUSTRE) ++ else if (fsbuf.f_type == LL_SUPER_MAGIC) *fstype = ADIO_LUSTRE; ++#endif + # if defined(ROMIO_PVFS) + else if (fsbuf.f_type == PVFS_SUPER_MAGIC) *fstype = ADIO_PVFS; + # endif +@@ -359,6 +367,11 @@ static void ADIO_FileSysType_prefix(char + { + *fstype = ADIO_TESTFS; + } ++ else if (!strncmp(filename, "lustre:", 7) ++ || !strncmp(filename, "LUSTRE:", 7)) ++ { ++ *fstype = ADIO_LUSTRE; ++ } + else { + #ifdef ROMIO_NTFS + *fstype = ADIO_NTFS; +@@ -644,6 +657,24 @@ void ADIO_ResolveFileType(MPI_Comm comm, + *ops = &ADIO_TESTFS_operations; + #endif + } ++ if (file_system == ADIO_LUSTRE) { ++#ifndef LUSTRE ++# ifdef MPICH2 ++ *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**iofstypeunsupported", 0); ++ return; ++# elif defined(PRINT_ERR_MSG) ++ FPRINTF(stderr, "ADIO_ResolveFileType: ROMIO has not been configured to use the LUSTRE file system\n"); ++ MPI_Abort(MPI_COMM_WORLD, 1); ++# else /* MPICH-1 */ ++ myerrcode = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ERR_NO_LUSTRE, ++ myname, (char *) 0, (char *) 0); ++ *error_code = ADIOI_Error(MPI_FILE_NULL, myerrcode, myname); ++# endif ++ return; ++#else ++ *ops = &ADIO_LUSTRE_operations; ++#endif ++ } + *error_code = MPI_SUCCESS; + *fstype = file_system; + return; diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index d7322cf..f99f3ff 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -231,6 +231,7 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags) #define OBD_CONNECT_TRANSNO 0x800ULL /* replay is sending initial transno */ #define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */ #define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */ +#define OBD_CONNECT_ATTRFID 0x4000ULL /* Server supports GetAttr By Fid */ #define OBD_CONNECT_NODEVOH 0x8000ULL /* No open handle for special nodes */ #define OBD_CONNECT_EMPTY 0x80000000ULL /* fake: these are empty connect flags*/ @@ -239,7 +240,7 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags) #define MDS_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \ OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \ - OBD_CONNECT_NODEVOH) + OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX) @@ -601,13 +602,15 @@ typedef enum { } mds_reint_t; /* the disposition of the intent outlines what was executed */ -#define DISP_IT_EXECD 0x01 -#define DISP_LOOKUP_EXECD 0x02 -#define DISP_LOOKUP_NEG 0x04 -#define DISP_LOOKUP_POS 0x08 -#define DISP_OPEN_CREATE 0x10 -#define DISP_OPEN_OPEN 0x20 -#define DISP_ENQ_COMPLETE 0x40 +#define DISP_IT_EXECD 0x00000001 +#define DISP_LOOKUP_EXECD 0x00000002 +#define DISP_LOOKUP_NEG 0x00000004 +#define DISP_LOOKUP_POS 0x00000008 +#define DISP_OPEN_CREATE 0x00000010 +#define DISP_OPEN_OPEN 0x00000020 +#define DISP_ENQ_COMPLETE 0x00400000 +#define DISP_ENQ_OPEN_REF 0x00800000 +#define DISP_ENQ_CREATE_REF 0x01000000 /* INODE LOCK PARTS */ #define MDS_INODELOCK_LOOKUP 0x000001 /* dentry, mode, owner, group */ diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 7a656db..9298f31 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -441,8 +441,9 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, ldlm_res_iterator_t iter, void *closure); int ldlm_replay_locks(struct obd_import *imp); -void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *, - ldlm_iterator_t iter, void *data); +void ldlm_resource_iterate(struct ldlm_namespace *, struct ldlm_res_id *, + ldlm_iterator_t iter, void *data); + /* ldlm_flock.c */ int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data); @@ -461,6 +462,7 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback, int ldlm_handle_convert(struct ptlrpc_request *req); int ldlm_handle_cancel(struct ptlrpc_request *req); int ldlm_del_waiting_lock(struct ldlm_lock *lock); +int ldlm_refresh_waiting_lock(struct ldlm_lock *lock); int ldlm_get_ref(void); void ldlm_put_ref(int force); diff --git a/lustre/include/lustre_mds.h b/lustre/include/lustre_mds.h index 6ea9a8d..3f7d238 100644 --- a/lustre/include/lustre_mds.h +++ b/lustre/include/lustre_mds.h @@ -104,6 +104,7 @@ int mds_reint_rec(struct mds_update_record *r, int offset, /* mdc/mdc_locks.c */ int it_disposition(struct lookup_intent *it, int flag); void it_set_disposition(struct lookup_intent *it, int flag); +void it_clear_disposition(struct lookup_intent *it, int flag); int it_open_error(int phase, struct lookup_intent *it); void mdc_set_lock_data(__u64 *lockh, void *data); int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 8c22e02..751542c 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -171,7 +171,7 @@ struct obd_async_page_ops { int (*ap_make_ready)(void *data, int cmd); int (*ap_refresh_count)(void *data, int cmd); void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa); - void (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc); + int (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc); }; /* the `oig' is passed down from a caller of obd rw methods. the callee diff --git a/lustre/include/obd_ost.h b/lustre/include/obd_ost.h index 50aace7..deb963b 100644 --- a/lustre/include/obd_ost.h +++ b/lustre/include/obd_ost.h @@ -18,7 +18,7 @@ struct osc_brw_async_args { int aa_requested_nob; int aa_nio_count; obd_count aa_page_count; - struct brw_page *aa_pga; + struct brw_page **aa_ppga; struct client_obd *aa_cli; struct list_head aa_oaps; }; diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch index 72f5dd5..0d9a5b8 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch @@ -181,7 +181,7 @@ Index: linux-2.4.21-rhel/fs/ext3/extents.c +{ + struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); + neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_GENERATION(neh) + 1); ++ (EXT_HDR_GEN(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2790,14 +2790,14 @@ Index: linux-2.4.21-rhel/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) +#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) +#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + +#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) +#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) +#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+ ++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch index 940b916..374bae9 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch @@ -181,7 +181,7 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c +{ + struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); + neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_GENERATION(neh) + 1); ++ (EXT_HDR_GEN(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2788,14 +2788,14 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) +#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) +#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + +#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) +#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) +#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+ ++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch index 571fb0f..8d4de9c 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch @@ -181,7 +181,7 @@ Index: linux-2.4.24/fs/ext3/extents.c +{ + struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); + neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_GENERATION(neh) + 1); ++ (EXT_HDR_GEN(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2776,14 +2776,14 @@ Index: linux-2.4.24/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) +#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) +#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + +#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) +#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) +#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+ ++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch index 125f747..84b9a12 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch @@ -181,7 +181,7 @@ Index: linux-2.4.29/fs/ext3/extents.c +{ + struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); + neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_GENERATION(neh) + 1); ++ (EXT_HDR_GEN(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2777,14 +2777,14 @@ Index: linux-2.4.29/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) +#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) +#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + +#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) +#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) +#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+ ++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch index b6439e6..520c031 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch @@ -178,7 +178,7 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c +{ + struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); + neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_GENERATION(neh) + 1); ++ (EXT_HDR_GEN(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2849,14 +2849,14 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) +#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) +#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + +#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) +#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) +#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+ ++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch index 9e78214..f829621 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch @@ -179,7 +179,7 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c +{ + struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); + neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_GENERATION(neh) + 1); ++ (EXT_HDR_GEN(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2839,14 +2839,14 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) +#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) +#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + +#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) +#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) +#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+ ++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch index bd95c54..993b237 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch @@ -178,7 +178,7 @@ Index: linux-stage/fs/ext3/extents.c +{ + struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); + neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | -+ (EXT_GENERATION(neh) + 1); ++ (EXT_HDR_GEN(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2834,14 +2834,14 @@ Index: linux-stage/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_HDR_GEN(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) +#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) +#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + +#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) +#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) +#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) -+ ++#define EXT_GENERATION(__tree__) EXT_HDR_GEN(EXT_ROOT_HDR(__tree__)) + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c index 205ff14..0f279e8 100644 --- a/lustre/ldlm/ldlm_extent.c +++ b/lustre/ldlm/ldlm_extent.c @@ -394,6 +394,10 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq, if (rc == -ERESTART) GOTO(restart, -ERESTART); *flags |= LDLM_FL_BLOCK_GRANTED; + /* this way we force client to wait for the lock + * endlessly once the lock is enqueued -bzzz */ + *flags |= LDLM_FL_NO_TIMEOUT; + } rc = 0; out: diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 3e150d5..410e438 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -249,10 +249,31 @@ static void waiting_locks_callback(unsigned long unused) * * Called with the namespace lock held. */ -static int ldlm_add_waiting_lock(struct ldlm_lock *lock) +static int __ldlm_add_waiting_lock(struct ldlm_lock *lock) { cfs_time_t timeout_rounded; + if (!list_empty(&lock->l_pending_chain)) + return 0; + + lock->l_callback_timeout =cfs_time_add(cfs_time_current(), + cfs_time_seconds(obd_timeout)/2); + + timeout_rounded = round_timeout(lock->l_callback_timeout); + + if (cfs_time_before(timeout_rounded, cfs_timer_deadline(&waiting_locks_timer)) || + !cfs_timer_is_armed(&waiting_locks_timer)) { + cfs_timer_arm(&waiting_locks_timer, timeout_rounded); + + } + list_add_tail(&lock->l_pending_chain, &waiting_locks_list); /* FIFO */ + return 1; +} + +static int ldlm_add_waiting_lock(struct ldlm_lock *lock) +{ + int ret; + l_check_ns_lock(lock->l_resource->lr_namespace); LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)); @@ -268,25 +289,12 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) return 0; } - if (!list_empty(&lock->l_pending_chain)) { - spin_unlock_bh(&waiting_locks_spinlock); - LDLM_DEBUG(lock, "not re-adding to wait list"); - return 0; - } - - lock->l_callback_timeout =cfs_time_add(cfs_time_current(), - cfs_time_seconds(obd_timeout)/2); - - timeout_rounded = round_timeout(lock->l_callback_timeout); - - if (cfs_time_before(timeout_rounded, cfs_timer_deadline(&waiting_locks_timer)) || - !cfs_timer_is_armed(&waiting_locks_timer)) { - cfs_timer_arm(&waiting_locks_timer, timeout_rounded); - } - list_add_tail(&lock->l_pending_chain, &waiting_locks_list); /* FIFO */ + ret = __ldlm_add_waiting_lock(lock); spin_unlock_bh(&waiting_locks_spinlock); - LDLM_DEBUG(lock, "adding to wait list"); - return 1; + + LDLM_DEBUG(lock, "%sadding to wait list", + ret == 0 ? "not re-" : ""); + return ret; } /* @@ -296,25 +304,12 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) * * Called with namespace lock held. */ -int ldlm_del_waiting_lock(struct ldlm_lock *lock) +int __ldlm_del_waiting_lock(struct ldlm_lock *lock) { struct list_head *list_next; - l_check_ns_lock(lock->l_resource->lr_namespace); - - if (lock->l_export == NULL) { - /* We don't have a "waiting locks list" on clients. */ - LDLM_DEBUG(lock, "client lock: no-op"); - return 0; - } - - spin_lock_bh(&waiting_locks_spinlock); - - if (list_empty(&lock->l_pending_chain)) { - spin_unlock_bh(&waiting_locks_spinlock); - LDLM_DEBUG(lock, "wasn't waiting"); + if (list_empty(&lock->l_pending_chain)) return 0; - } list_next = lock->l_pending_chain.next; if (lock->l_pending_chain.prev == &waiting_locks_list) { @@ -332,8 +327,57 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock) } list_del_init(&lock->l_pending_chain); + return 1; +} + +int ldlm_del_waiting_lock(struct ldlm_lock *lock) +{ + int ret; + + l_check_ns_lock(lock->l_resource->lr_namespace); + + if (lock->l_export == NULL) { + /* We don't have a "waiting locks list" on clients. */ + LDLM_DEBUG(lock, "client lock: no-op"); + return 0; + } + + spin_lock_bh(&waiting_locks_spinlock); + ret = __ldlm_del_waiting_lock(lock); spin_unlock_bh(&waiting_locks_spinlock); - LDLM_DEBUG(lock, "removed"); + + LDLM_DEBUG(lock, "%s", ret == 0 ? "wasn't waiting" : "removed"); + return ret; +} + +/* + * Prolong the lock + * + * Called with namespace lock held. + */ +int ldlm_refresh_waiting_lock(struct ldlm_lock *lock) +{ + l_check_ns_lock(lock->l_resource->lr_namespace); + + if (lock->l_export == NULL) { + /* We don't have a "waiting locks list" on clients. */ + LDLM_DEBUG(lock, "client lock: no-op"); + return 0; + } + + spin_lock_bh(&waiting_locks_spinlock); + + if (list_empty(&lock->l_pending_chain)) { + spin_unlock_bh(&waiting_locks_spinlock); + LDLM_DEBUG(lock, "wasn't waiting"); + return 0; + } + + __ldlm_del_waiting_lock(lock); + __ldlm_add_waiting_lock(lock); + spin_unlock_bh(&waiting_locks_spinlock); + + LDLM_DEBUG(lock, "refreshed"); return 1; } @@ -350,6 +394,10 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock) RETURN(0); } +int ldlm_refresh_waiting_lock(struct ldlm_lock *lock) +{ + RETURN(0); +} #endif /* __KERNEL__ */ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, @@ -1736,7 +1784,7 @@ EXPORT_SYMBOL(ldlm_replay_locks); EXPORT_SYMBOL(ldlm_resource_foreach); EXPORT_SYMBOL(ldlm_namespace_foreach); EXPORT_SYMBOL(ldlm_namespace_foreach_res); -EXPORT_SYMBOL(ldlm_change_cbdata); +EXPORT_SYMBOL(ldlm_resource_iterate); /* ldlm_lockd.c */ EXPORT_SYMBOL(ldlm_server_blocking_ast); @@ -1748,6 +1796,7 @@ EXPORT_SYMBOL(ldlm_handle_convert); EXPORT_SYMBOL(ldlm_del_waiting_lock); EXPORT_SYMBOL(ldlm_get_ref); EXPORT_SYMBOL(ldlm_put_ref); +EXPORT_SYMBOL(ldlm_refresh_waiting_lock); /* ldlm_resource.c */ EXPORT_SYMBOL(ldlm_namespace_new); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index bf2f655..a849a31 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -115,7 +115,7 @@ noreproc: lwd.lwd_lock = lock; - if (unlikely(flags & LDLM_FL_NO_TIMEOUT)) { + if (lock->l_flags & LDLM_FL_NO_TIMEOUT) { LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT"); lwi = LWI_INTR(interrupted_completion_wait, &lwd); } else { @@ -454,6 +454,9 @@ int ldlm_cli_enqueue(struct obd_export *exp, lock->l_remote_handle = reply->lock_handle; *flags = reply->lock_flags; lock->l_flags |= reply->lock_flags & LDLM_INHERIT_FLAGS; + /* move NO_TIMEOUT flag to the lock to force ldlm_lock_match() + * to wait with no timeout as well */ + lock->l_flags |= reply->lock_flags & LDLM_FL_NO_TIMEOUT; l_unlock(&ns->ns_lock); CDEBUG(D_INFO, "local: %p, remote cookie: "LPX64", flags: 0x%x\n", @@ -1068,8 +1071,8 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, } /* non-blocking function to manipulate a lock whose cb_data is being put away.*/ -void ldlm_change_cbdata(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, - ldlm_iterator_t iter, void *data) +void ldlm_resource_iterate(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, + ldlm_iterator_t iter, void *data) { struct ldlm_resource *res; ENTRY; diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 7b5fa8c..71336cc 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -135,7 +135,12 @@ void ll_intent_release(struct lookup_intent *it) it->it_magic = 0; it->it_op_release = 0; /* We are still holding extra reference on a request, need to free it */ - if (it_disposition(it, DISP_ENQ_COMPLETE)) + if (it_disposition(it, DISP_ENQ_OPEN_REF)) /* open req for llfile_open*/ + ptlrpc_req_finished(it->d.lustre.it_data); + if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */ + ptlrpc_req_finished(it->d.lustre.it_data); + if (it_disposition(it, DISP_ENQ_COMPLETE)) /* saved req from revalidate + * to lookup */ ptlrpc_req_finished(it->d.lustre.it_data); it->d.lustre.it_disposition = 0; @@ -214,9 +219,8 @@ restart: EXIT; } -static int revalidate_it_finish(struct ptlrpc_request *request, int offset, - struct lookup_intent *it, - struct dentry *de) +int revalidate_it_finish(struct ptlrpc_request *request, int offset, + struct lookup_intent *it, struct dentry *de) { int rc = 0; ENTRY; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index b8e10e8..29dcb3b 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -311,6 +311,8 @@ int ll_file_open(struct inode *inode, struct file *file) out: req = it->d.lustre.it_data; ptlrpc_req_finished(req); + if (req) + it_clear_disposition(it, DISP_ENQ_OPEN_REF); if (rc == 0) ll_open_complete(inode); return rc; @@ -732,6 +734,11 @@ int ll_glimpse_size(struct inode *inode, int ast_flags) CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", inode->i_ino); + if (!lli->lli_smd) { + CDEBUG(D_DLMTRACE, "No objects for inode %lu\n", inode->i_ino); + RETURN(0); + } + ast_flags |= LDLM_FL_HAS_INTENT; /* NOTE: this looks like DLM lock request, but it may not be one. Due @@ -1598,6 +1605,7 @@ int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it) out: /* this one is in place of ll_file_open */ ptlrpc_req_finished(it->d.lustre.it_data); + it_clear_disposition(it, DISP_ENQ_OPEN_REF); RETURN(rc); } @@ -1881,7 +1889,8 @@ static int ll_have_md_lock(struct dentry *de) struct lustre_handle lockh; struct ldlm_res_id res_id = { .name = {0} }; struct obd_device *obddev; - ldlm_policy_data_t policy = { .l_inodebits = {MDS_INODELOCK_UPDATE}}; + ldlm_policy_data_t policy = { .l_inodebits = { + MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP}}; int flags; ENTRY; @@ -1903,11 +1912,32 @@ static int ll_have_md_lock(struct dentry *de) RETURN(0); } +static int ll_inode_revalidate_fini(struct inode *inode, int rc) { + if (rc == -ENOENT) { /* Already unlinked. Just update nlink + * and return success */ + inode->i_nlink = 0; + /* This path cannot be hit for regular files unless in + * case of obscure races, so no need to to validate + * size. */ + if (!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode)) + return 0; + } + + if (rc) { + CERROR("failure %d inode %lu\n", rc, inode->i_ino); + return -abs(rc); + + } + + return 0; +} + int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) { struct inode *inode = dentry->d_inode; - struct ll_inode_info *lli; - struct lov_stripe_md *lsm; + struct ptlrpc_request *req = NULL; + struct obd_export *exp; int rc; ENTRY; @@ -1915,15 +1945,39 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) CERROR("REPORT THIS LINE TO PETER\n"); RETURN(0); } - lli = ll_i2info(inode); CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n", inode->i_ino, inode->i_generation, inode, dentry->d_name.name); #if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)) lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_REVALIDATE); #endif - if (!ll_have_md_lock(dentry)) { - struct ptlrpc_request *req = NULL; + exp = ll_i2mdcexp(inode); + + if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) { + struct lookup_intent oit = { .it_op = IT_GETATTR }; + struct mdc_op_data op_data; + + /* Call getattr by fid, so do not provide name at all. */ + ll_prepare_mdc_op_data(&op_data, dentry->d_parent->d_inode, + dentry->d_inode, NULL, 0, 0); + rc = mdc_intent_lock(exp, &op_data, NULL, 0, + /* we are not interested in name + based lookup */ + &oit, 0, &req, + ll_mdc_blocking_ast, 0); + if (rc < 0) { + rc = ll_inode_revalidate_fini(inode, rc); + GOTO (out, rc); + } + + rc = revalidate_it_finish(req, 1, &oit, dentry); + if (rc != 0) { + ll_intent_release(&oit); + GOTO(out, rc); + } + + ll_lookup_finish_locks(&oit, dentry); + } else if (!ll_have_md_lock(dentry)) { struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); struct ll_fid fid; obd_valid valid = OBD_MD_FLGETATTR; @@ -1937,37 +1991,26 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) } ll_inode2fid(&fid, inode); rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req); - if (rc == -ENOENT) { /* Already unlinked. Just update nlink - * and return success */ - inode->i_nlink = 0; - /* This path cannot be hit for regular files unless in - * case of obscure races, so * no need to to validate - * size. */ - if (!S_ISREG(inode->i_mode) && - !S_ISDIR(inode->i_mode) && - !S_ISDIR(inode->i_mode)) - RETURN(0); - } - if (rc) { - CERROR("failure %d inode %lu\n", rc, inode->i_ino); - RETURN(-abs(rc)); - } - rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, 0, NULL); - if (rc) { - ptlrpc_req_finished(req); + rc = ll_inode_revalidate_fini(inode, rc); RETURN(rc); } - ptlrpc_req_finished(req); + + rc = ll_prep_inode(sbi->ll_osc_exp, &inode, req, 0, NULL); + if (rc) + GOTO(out, rc); } - lsm = lli->lli_smd; - if (lsm == NULL) /* object not yet allocated, don't validate size */ - RETURN(0); + /* if object not yet allocated, don't validate size */ + if (ll_i2info(inode)->lli_smd == NULL) + GOTO(out, rc = 0); /* ll_glimpse_size will prefer locally cached writes if they extend * the file */ rc = ll_glimpse_size(inode, 0); + +out: + ptlrpc_req_finished(req); RETURN(rc); } diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 4b2132e..ea519ba 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -367,7 +367,7 @@ int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to); int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to); int ll_writepage(struct page *page); void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa); -void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc); +int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc); int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction); extern struct cache_definition ll_cache_definition; void ll_removepage(struct page *page); @@ -417,7 +417,8 @@ void ll_unhash_aliases(struct inode *); void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft); void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry); int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name); - +int revalidate_it_finish(struct ptlrpc_request *request, int offset, + struct lookup_intent *it, struct dentry *de); /* llite/llite_lib.c */ extern struct super_operations lustre_super_operations; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index cf0fc28..73c1631 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -145,7 +145,8 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) data->ocd_connect_flags |= OBD_CONNECT_RDONLY; if (sbi->ll_flags & LL_SBI_USER_XATTR) data->ocd_connect_flags |= OBD_CONNECT_XATTR; - data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_JOIN; + data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_JOIN | + OBD_CONNECT_ATTRFID; if (sbi->ll_flags & LL_SBI_FLOCK) { sbi->ll_fop = &ll_file_operations_flock; @@ -417,43 +418,97 @@ void lustre_throw_orphan_dentries(struct super_block *sb) #define lustre_throw_orphan_dentries(sb) #endif -static void prune_deathrow(struct ll_sb_info *sbi, int try) +static void prune_dir_dentries(struct inode *inode) { - LIST_HEAD(throw_away); - int locked = 0; - ENTRY; - - if (try) { - locked = spin_trylock(&sbi->ll_deathrow_lock); - } else { - spin_lock(&sbi->ll_deathrow_lock); - locked = 1; + struct dentry *dentry, *prev = NULL; + + /* due to lustre specific logic, a directory + * can have few dentries - a bug from VFS POV */ +restart: + spin_lock(&dcache_lock); + if (!list_empty(&inode->i_dentry)) { + dentry = list_entry(inode->i_dentry.prev, + struct dentry, d_alias); + /* in order to prevent infinite loops we + * break if previous dentry is busy */ + if (dentry != prev) { + prev = dentry; + dget_locked(dentry); + spin_unlock(&dcache_lock); + + /* try to kill all child dentries */ + lock_dentry(dentry); + shrink_dcache_parent(dentry); + unlock_dentry(dentry); + dput(dentry); + + /* now try to get rid of current dentry */ + d_prune_aliases(inode); + goto restart; + } } + spin_unlock(&dcache_lock); +} - if (!locked) { - EXIT; - return; - } +static void prune_deathrow_one(struct ll_inode_info *lli) +{ + struct inode *inode = ll_info2i(lli); - list_splice_init(&sbi->ll_deathrow, &throw_away); - spin_unlock(&sbi->ll_deathrow_lock); + /* first, try to drop any dentries - they hold a ref on the inode */ + if (S_ISDIR(inode->i_mode)) + prune_dir_dentries(inode); + else + d_prune_aliases(inode); - while (!list_empty(&throw_away)) { - struct ll_inode_info *lli; - struct inode *inode; - lli = list_entry(throw_away.next, struct ll_inode_info, - lli_dead_list); - list_del_init(&lli->lli_dead_list); + /* if somebody still uses it, leave it */ + LASSERT(atomic_read(&inode->i_count) > 0); + if (atomic_read(&inode->i_count) > 1) + goto out; - inode = ll_info2i(lli); - d_prune_aliases(inode); + CDEBUG(D_INODE, "inode %lu/%u(%d) looks a good candidate for prune\n", + inode->i_ino,inode->i_generation, atomic_read(&inode->i_count)); - CDEBUG(D_INODE, "prune duplicate inode %p inum %lu count %u\n", - inode, inode->i_ino, atomic_read(&inode->i_count)); - iput(inode); - } - EXIT; + /* seems nobody uses it anymore */ + inode->i_nlink = 0; + +out: + iput(inode); + return; +} + +static void prune_deathrow(struct ll_sb_info *sbi, int try) +{ + struct ll_inode_info *lli; + int empty; + + do { + if (need_resched()) + break; + + if (try) { + if (!spin_trylock(&sbi->ll_deathrow_lock)) + break; + } else { + spin_lock(&sbi->ll_deathrow_lock); + } + + empty = 1; + lli = NULL; + if (!list_empty(&sbi->ll_deathrow)) { + lli = list_entry(sbi->ll_deathrow.next, + struct ll_inode_info, + lli_dead_list); + list_del_init(&lli->lli_dead_list); + if (!list_empty(&sbi->ll_deathrow)) + empty = 0; + } + spin_unlock(&sbi->ll_deathrow_lock); + + if (lli) + prune_deathrow_one(lli); + + } while (empty == 0); } void client_common_put_super(struct super_block *sb) diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index 06f23a1..5561ed4 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -459,6 +459,9 @@ static void ll_vm_open(struct vm_area_struct * vma) int count; spin_unlock(&lli->lli_lock); + + if (!lsm) + return; count = obd_join_lru(sbi->ll_osc_exp, lsm, 0); VMA_DEBUG(vma, "split %d unused locks from lru\n", count); } else { @@ -485,6 +488,9 @@ static void ll_vm_close(struct vm_area_struct *vma) int count; spin_unlock(&lli->lli_lock); + + if (!lsm) + return; count = obd_join_lru(sbi->ll_osc_exp, lsm, 1); VMA_DEBUG(vma, "join %d unused locks to lru\n", count); } else { diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index eb57f8f..29dc6c8 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -78,19 +78,17 @@ static int ll_test_inode(struct inode *inode, void *opaque) if (inode->i_state & (I_FREEING | I_CLEAR)) return 0; + if (inode->i_nlink == 0) + return 0; - atomic_inc(&inode->i_count); - inode->i_nlink = 0; - inode->i_state |= I_FREEING; - LASSERT(list_empty(&lli->lli_dead_list)); /* add "duplicate" inode into deathrow for destroy */ spin_lock(&sbi->ll_deathrow_lock); - list_add(&lli->lli_dead_list, &sbi->ll_deathrow); + if (list_empty(&lli->lli_dead_list)) { + atomic_inc(&inode->i_count); + list_add(&lli->lli_dead_list, &sbi->ll_deathrow); + } spin_unlock(&sbi->ll_deathrow_lock); - /* remove inode from dirty/io lists */ - list_del_init(&inode->i_list); - return 0; } @@ -510,7 +508,9 @@ static struct inode *ll_create_node(struct inode *dir, const char *name, LASSERT(it && it->d.lustre.it_disposition); + LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF)); request = it->d.lustre.it_data; + it_clear_disposition(it, DISP_ENQ_CREATE_REF); rc = ll_prep_inode(sbi->ll_osc_exp, &inode, request, 1, dir->i_sb); if (rc) GOTO(out, inode = ERR_PTR(rc)); diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index d655cf4..e340c1d 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -795,10 +795,11 @@ static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len) } /* called for each page in a completed rpc.*/ -void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) +int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) { struct ll_async_page *llap; struct page *page; + int ret = 0; ENTRY; llap = LLAP_FROM_COOKIE(data); @@ -823,6 +824,7 @@ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) llap->llap_defer_uptodate = 0; } else { ll_redirty_page(page); + ret = 1; } SetPageError(page); } @@ -838,7 +840,8 @@ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) end_page_writeback(page); } page_cache_release(page); - EXIT; + + RETURN(ret); } /* the kernel calls us here when a page is unhashed from the page cache. diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c index 44664a7..3252af8 100644 --- a/lustre/llite/xattr.c +++ b/lustre/llite/xattr.c @@ -190,6 +190,7 @@ int ll_getxattr_common(struct inode *inode, const char *name, * we just have path resolution to the target inode, so we have great * chance that cached ACL is uptodate. */ +#ifdef CONFIG_FS_POSIX_ACL if (xattr_type == XATTR_ACL_ACCESS_T) { struct ll_inode_info *lli = ll_i2info(inode); struct posix_acl *acl; @@ -205,6 +206,7 @@ int ll_getxattr_common(struct inode *inode, const char *name, posix_acl_release(acl); RETURN(rc); } +#endif do_getxattr: ll_inode2fid(&fid, inode); diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 40f6b46..cf86943 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -1512,14 +1512,15 @@ static void lov_ap_fill_obdo(void *data, int cmd, struct obdo *oa) oa->o_stripe_idx = lap->lap_stripe; } -static void lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc) +static int lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc) { struct lov_async_page *lap = LAP_FROM_COOKIE(data); /* in a raid1 regime this would down a count of many ios * in flight, onl calling the caller_ops completion when all * the raid1 ios are complete */ - lap->lap_caller_ops->ap_completion(lap->lap_caller_data, cmd, oa, rc); + rc = lap->lap_caller_ops->ap_completion(lap->lap_caller_data,cmd,oa,rc); + return rc; } static struct obd_async_page_ops lov_async_page_ops = { @@ -1878,6 +1879,9 @@ static int lov_cancel_unused(struct obd_export *exp, int rc = 0, i; ENTRY; + if (!exp || !exp->exp_obd) + RETURN(-ENODEV); + lov = &exp->exp_obd->u.lov; if (lsm == NULL) { for (i = 0; i < lov->desc.ld_tgt_count; i++) { @@ -1895,9 +1899,6 @@ static int lov_cancel_unused(struct obd_export *exp, ASSERT_LSM_MAGIC(lsm); - if (!exp || !exp->exp_obd) - RETURN(-ENODEV); - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { struct lov_stripe_md submd; int err; @@ -2186,7 +2187,7 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, if (lov->tgts[loi->loi_ost_idx].ltd_exp == data->lock->l_conn_export && loi->loi_id == res_id->name[0] && - loi->loi_gr == res_id->name[2]) { + loi->loi_gr == res_id->name[1]) { *stripe = i; GOTO(out, rc = 0); } diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index a3a4372..b39bb1a 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -275,8 +275,10 @@ int lov_prep_enqueue_set(struct obd_export *exp, struct lov_stripe_md *lsm, req->rq_buflen = sizeof(*req->rq_md) + sizeof(struct lov_oinfo); OBD_ALLOC(req->rq_md, req->rq_buflen); - if (req->rq_md == NULL) + if (req->rq_md == NULL) { + OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); + } req->rq_extent.start = start; req->rq_extent.end = end; @@ -381,8 +383,10 @@ int lov_prep_match_set(struct obd_export *exp, struct lov_stripe_md *lsm, req->rq_buflen = sizeof(*req->rq_md); OBD_ALLOC(req->rq_md, req->rq_buflen); - if (req->rq_md == NULL) + if (req->rq_md == NULL) { + OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); + } req->rq_extent.start = start; req->rq_extent.end = end; @@ -464,8 +468,10 @@ int lov_prep_cancel_set(struct obd_export *exp, struct lov_stripe_md *lsm, req->rq_buflen = sizeof(*req->rq_md); OBD_ALLOC(req->rq_md, req->rq_buflen); - if (req->rq_md == NULL) + if (req->rq_md == NULL) { + OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); + } req->rq_idx = loi->loi_ost_idx; req->rq_stripe = i; @@ -809,8 +815,10 @@ int lov_prep_brw_set(struct obd_export *exp, struct obdo *src_oa, GOTO(out, rc = -ENOMEM); req->rq_oa = obdo_alloc(); - if (req->rq_oa == NULL) + if (req->rq_oa == NULL) { + OBD_FREE(req, sizeof(*req)); GOTO(out, rc = -ENOMEM); + } if (src_oa) memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); @@ -819,8 +827,11 @@ int lov_prep_brw_set(struct obd_export *exp, struct obdo *src_oa, req->rq_buflen = sizeof(*req->rq_md); OBD_ALLOC(req->rq_md, req->rq_buflen); - if (req->rq_md == NULL) + if (req->rq_md == NULL) { + obdo_free(req->rq_oa); + OBD_FREE(req, sizeof(*req)); GOTO(out, rc = -ENOMEM); + } req->rq_idx = loi->loi_ost_idx; req->rq_stripe = i; @@ -916,8 +927,10 @@ int lov_prep_getattr_set(struct obd_export *exp, struct obdo *src_oa, req->rq_idx = loi->loi_ost_idx; req->rq_oa = obdo_alloc(); - if (req->rq_oa == NULL) + if (req->rq_oa == NULL) { + OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); + } memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); req->rq_oa->o_id = loi->loi_id; @@ -989,8 +1002,10 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obdo *src_oa, req->rq_idx = loi->loi_ost_idx; req->rq_oa = obdo_alloc(); - if (req->rq_oa == NULL) + if (req->rq_oa == NULL) { + OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); + } memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); req->rq_oa->o_id = loi->loi_id; @@ -1063,8 +1078,10 @@ int lov_prep_setattr_set(struct obd_export *exp, struct obdo *src_oa, req->rq_idx = loi->loi_ost_idx; req->rq_oa = obdo_alloc(); - if (req->rq_oa == NULL) + if (req->rq_oa == NULL) { + OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); + } memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); req->rq_oa->o_id = loi->loi_id; req->rq_oa->o_stripe_idx = i; @@ -1191,8 +1208,10 @@ int lov_prep_punch_set(struct obd_export *exp, struct obdo *src_oa, req->rq_idx = loi->loi_ost_idx; req->rq_oa = obdo_alloc(); - if (req->rq_oa == NULL) + if (req->rq_oa == NULL) { + OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); + } memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); req->rq_oa->o_id = loi->loi_id; req->rq_oa->o_stripe_idx = i; @@ -1271,8 +1290,10 @@ int lov_prep_sync_set(struct obd_export *exp, struct obdo *src_oa, req->rq_idx = loi->loi_ost_idx; req->rq_oa = obdo_alloc(); - if (req->rq_oa == NULL) + if (req->rq_oa == NULL) { + OBD_FREE(req, sizeof(*req)); GOTO(out_set, rc = -ENOMEM); + } memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); req->rq_oa->o_id = loi->loi_id; req->rq_oa->o_stripe_idx = i; diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 533f0d3..0a418a0 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -860,7 +860,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, return EXT_CONTINUE; } - tgen = EXT_GENERATION(EXT_ROOT_HDR(tree)); + tgen = EXT_GENERATION(tree); count = ext3_ext_calc_credits_for_insert(tree, path); ext3_up_truncate_sem(inode); @@ -873,7 +873,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, } ext3_down_truncate_sem(inode); - if (tgen != EXT_GENERATION(EXT_ROOT_HDR(tree))) { + if (tgen != EXT_GENERATION(tree)) { /* the tree has changed. so path can be invalid at moment */ lock_24kernel(); journal_stop(handle); diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 6158722..2ae5a1b 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -1,23 +1,31 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * This file is part of Lustre, http://www.lustre.org + * + * MDC internal definitions. + */ + #include void mdc_pack_req_body(struct ptlrpc_request *req, int offset, __u64 valid, struct ll_fid *fid, int ea_size); void mdc_pack_rep_body(struct ptlrpc_request *); void mdc_readdir_pack(struct ptlrpc_request *req, int pos, __u64 offset, - __u32 size, struct ll_fid *mdc_fid); + __u32 size, struct ll_fid *mdc_fid); void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset, int flags, struct mdc_op_data *data); void mdc_setattr_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data, struct iattr *iattr, void *ea, int ealen, - void *ea2, int ea2len); + void *ea2, int ea2len); void mdc_create_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *op_data, const void *data, int datalen, - __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective, - __u64 rdev); + __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective, + __u64 rdev); void mdc_open_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *op_data, __u32 mode, __u64 rdev, __u32 flags, const void *data, int datalen); -void mdc_join_pack(struct ptlrpc_request *req, int offset, +void mdc_join_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *op_data, __u64 head_size); void mdc_unlink_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data); @@ -27,7 +35,7 @@ void mdc_rename_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data, const char *old, int oldlen, const char *new, int newlen); void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa, - int valid, struct obd_client_handle *och); + int valid, struct obd_client_handle *och); struct mdc_open_data { struct obd_client_handle *mod_och; @@ -46,28 +54,24 @@ static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck) lck->rpcl_it = NULL; } -static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, +static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, struct lookup_intent *it) { ENTRY; - down(&lck->rpcl_sem); - if (it) { + if (1 || !it || (it->it_op != IT_GETATTR && it->it_op != IT_LOOKUP)) { + down(&lck->rpcl_sem); + LASSERT(lck->rpcl_it == NULL); lck->rpcl_it = it; } } -static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, +static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, struct lookup_intent *it) { - EXIT; - if (it == NULL) { - LASSERT(it == lck->rpcl_it); - up(&lck->rpcl_sem); - return; - } - if (it) { + if (1 || !it || (it->it_op != IT_GETATTR && it->it_op != IT_LOOKUP)) { LASSERT(it == lck->rpcl_it); lck->rpcl_it = NULL; up(&lck->rpcl_sem); } + EXIT; } diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index 23c79f0..9355db2 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -148,8 +148,7 @@ void mdc_open_pack(struct ptlrpc_request *req, int offset, rec->cr_fsuid = current->fsuid; rec->cr_fsgid = current->fsgid; rec->cr_cap = current->cap_effective; - if (op_data != NULL) - rec->cr_fid = op_data->fid1; + rec->cr_fid = op_data->fid1; memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid)); rec->cr_mode = mode; rec->cr_flags = mds_pack_open_flags(flags); @@ -301,6 +300,7 @@ void mdc_getattr_pack(struct ptlrpc_request *req, int offset, int valid, b->suppgid = data->suppgids[0]; b->fid1 = data->fid1; + b->fid2 = data->fid2; if (data->name) { char *tmp; tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index 84de47c..c67cff3 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -58,6 +58,8 @@ void it_clear_disposition(struct lookup_intent *it, int flag) it->d.lustre.it_disposition &= ~flag; } +EXPORT_SYMBOL(it_clear_disposition); + static int it_to_lock_mode(struct lookup_intent *it) { /* CREAT needs to be tested before open (both could be set) */ @@ -151,8 +153,8 @@ int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, res_id.name[0] = fid->id; res_id.name[1] = fid->generation; - ldlm_change_cbdata(class_exp2obd(exp)->obd_namespace, &res_id, it, - data); + ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id, + it, data); EXIT; return 0; @@ -556,8 +558,13 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, ldlm_policy_data_t policy; int mode = LCK_CR; + /* As not all attributes are kept under update lock, e.g. + owner/group/acls are under lookup lock, we need both + ibits for GETATTR. */ policy.l_inodebits.bits = (it->it_op == IT_GETATTR) ? - MDS_INODELOCK_UPDATE : MDS_INODELOCK_LOOKUP; + MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP : + MDS_INODELOCK_LOOKUP; + rc = ldlm_lock_match(exp->exp_obd->obd_namespace, LDLM_FL_BLOCK_GRANTED, &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh); @@ -578,7 +585,11 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, sizeof(lockh)); it->d.lustre.it_lock_mode = mode; } - RETURN(rc); + + /* Only return failure if it was not GETATTR by cfid + (from inode_revalidate) */ + if (rc || op_data->namelen != 0) + RETURN(rc); } /* lookup_it may be called only after revalidate_it has run, because @@ -637,7 +648,7 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, /* If we were revalidating a fid/name pair, mark the intent in * case we fail and get called again from lookup */ - if (op_data->fid2.id) { + if (op_data->fid2.id && (it->it_op != IT_GETATTR)) { it_set_disposition(it, DISP_ENQ_COMPLETE); /* Also: did we find the same inode? */ if (memcmp(&op_data->fid2, &mds_body->fid1, sizeof(op_data->fid2))) @@ -651,12 +662,18 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data, /* keep requests around for the multiple phases of the call * this shows the DISP_XX must guarantee we make it into the call */ - if (it_disposition(it, DISP_OPEN_CREATE) && - !it_open_error(DISP_OPEN_CREATE, it)) + if (!it_disposition(it, DISP_ENQ_CREATE_REF) && + it_disposition(it, DISP_OPEN_CREATE) && + !it_open_error(DISP_OPEN_CREATE, it)) { + it_set_disposition(it, DISP_ENQ_CREATE_REF); ptlrpc_request_addref(request); /* balanced in ll_create_node */ - if (it_disposition(it, DISP_OPEN_OPEN) && - !it_open_error(DISP_OPEN_OPEN, it)) + } + if (!it_disposition(it, DISP_ENQ_OPEN_REF) && + it_disposition(it, DISP_OPEN_OPEN) && + !it_open_error(DISP_OPEN_OPEN, it)) { + it_set_disposition(it, DISP_ENQ_OPEN_REF); ptlrpc_request_addref(request); /* balanced in ll_file_open */ + } if (it->it_op & IT_CREAT) { /* XXX this belongs in ll_create_it */ diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 93bce95..ff11d94 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -162,7 +162,7 @@ int mdc_unlink(struct obd_export *exp, struct mdc_op_data *data, { struct obd_device *obd = class_exp2obd(exp); struct ptlrpc_request *req = *request; - int rc, size[] = { sizeof(struct mds_rec_unlink), data->namelen + 1}; + int rc, size[] = { sizeof(struct mds_rec_unlink), data->namelen + 1, 0}; ENTRY; LASSERT(req == NULL); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index dc9fc62..593ac4b 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -125,9 +125,7 @@ int mdc_getattr_common(struct obd_export *exp, unsigned int ea_size, req->rq_replen = lustre_msg_size(bufcount, size); - mdc_get_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); rc = ptlrpc_queue_wait(req); - mdc_put_rpc_lock(exp->exp_obd->u.cli.cl_rpc_lock, NULL); if (rc != 0) RETURN (rc); @@ -935,9 +933,7 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs, req->rq_replen = lustre_msg_size(1, &size); - mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL); rc = ptlrpc_queue_wait(req); - mdc_put_rpc_lock(obd->u.cli.cl_rpc_lock, NULL); if (rc) GOTO(out, rc); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 9a1f1fe..252c1fc 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -786,7 +786,7 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, RETURN(0); } -static int mds_getattr_name(int offset, struct ptlrpc_request *req, +static int mds_getattr_lock(int offset, struct ptlrpc_request *req, int child_part, struct lustre_handle *child_lockh) { struct obd_device *obd = req->rq_export->exp_obd; @@ -820,6 +820,10 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, RETURN(-EFAULT); } namesize = lustre_msg_buflen(req->rq_reqmsg, offset + 1); + /* namesize less than 2 means we have empty name, probably came from + revalidate by cfid, so no point in having name to be set */ + if (namesize <= 1) + name = NULL; rc = mds_init_ucred(&uc, req, offset); if (rc) @@ -865,26 +869,26 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, } if (resent_req == 0) { - if (name) { - rc = mds_get_parent_child_locked(obd, &obd->u.mds, &body->fid1, - &parent_lockh, &dparent, - LCK_CR, - MDS_INODELOCK_UPDATE, - name, namesize, - child_lockh, &dchild, LCK_CR, - child_part); - } else { + if (name) { + rc = mds_get_parent_child_locked(obd, &obd->u.mds, + &body->fid1, + &parent_lockh, + &dparent, LCK_CR, + MDS_INODELOCK_UPDATE, + name, namesize, + child_lockh, &dchild, + LCK_CR, child_part); + } else { /* For revalidate by fid we always take UPDATE lock */ dchild = mds_fid2locked_dentry(obd, &body->fid2, NULL, LCK_CR, child_lockh, - NULL, 0, - MDS_INODELOCK_UPDATE); + NULL, 0, child_part); LASSERT(dchild); if (IS_ERR(dchild)) rc = PTR_ERR(dchild); - } - if (rc) - GOTO(cleanup, rc); + } + if (rc) + GOTO(cleanup, rc); } else { struct ldlm_lock *granted_lock; struct ll_fid child_fid; @@ -932,8 +936,10 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, if (resent_req == 0) { if (rc && dchild->d_inode) ldlm_lock_decref(child_lockh, LCK_CR); - ldlm_lock_decref(&parent_lockh, LCK_CR); - l_dput(dparent); + if (name) { + ldlm_lock_decref(&parent_lockh, LCK_CR); + l_dput(dparent); + } } l_dput(dchild); case 1: @@ -1490,10 +1496,10 @@ int mds_handle(struct ptlrpc_request *req) OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NAME_NET, 0); /* If this request gets a reconstructed reply, we won't be - * acquiring any new locks in mds_getattr_name, so we don't + * acquiring any new locks in mds_getattr_lock, so we don't * want to cancel. */ - rc = mds_getattr_name(MDS_REQ_REC_OFF, req, + rc = mds_getattr_lock(MDS_REQ_REC_OFF, req, MDS_INODELOCK_UPDATE, &lockh); /* this non-intent call (from an ioctl) is special */ req->rq_status = rc; @@ -1765,6 +1771,9 @@ void fsoptions_to_mds_flags(struct mds_obd *mds, char *options) if (len == sizeof("user_xattr") - 1 && memcmp(options, "user_xattr", len) == 0) { mds->mds_fl_user_xattr = 1; + } else if (len == sizeof("nouser_xattr") - 1 && + memcmp(options, "nouser_xattr", len) == 0) { + mds->mds_fl_user_xattr = 0; } else if (len == sizeof("acl") - 1 && memcmp(options, "acl", len) == 0) { #ifdef CONFIG_FS_POSIX_ACL @@ -1773,6 +1782,13 @@ void fsoptions_to_mds_flags(struct mds_obd *mds, char *options) CWARN("ignoring unsupported acl mount option\n"); memmove(options, p, strlen(p) + 1); #endif + } else if (len == sizeof("noacl") - 1 && + memcmp(options, "noacl", len) == 0) { +#ifdef CONFIG_FS_POSIX_ACL + mds->mds_fl_acl = 0; +#else + memmove(options, p, strlen(p) + 1); +#endif } options = ++p; @@ -2356,7 +2372,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns, getattr_part = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE; - rep->lock_policy_res2 = mds_getattr_name(offset, req, + rep->lock_policy_res2 = mds_getattr_lock(offset, req, getattr_part, &lockh); /* FIXME: LDLM can set req->rq_status. MDS sets policy_res{1,2} with disposition and status. diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index 585cbb0..f87d7cd 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -307,7 +307,7 @@ cleanup_dentry: static int mds_create_objects(struct ptlrpc_request *req, int offset, struct mds_update_record *rec, struct mds_obd *mds, struct obd_device *obd, - struct dentry *dchild, void **handle, + struct dentry *dchild, void **handle, obd_id **ids) { struct inode *inode = dchild->d_inode; @@ -691,14 +691,14 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, UNLOCK_INODE_MUTEX(dchild->d_inode); RETURN(-EEXIST); } - if (rec->ur_flags & MDS_OPEN_JOIN_FILE) { + if (rec->ur_flags & MDS_OPEN_JOIN_FILE) { UNLOCK_INODE_MUTEX(dchild->d_inode); - rc = mds_join_file(rec, req, dchild, lockh); + rc = mds_join_file(rec, req, dchild, lockh); if (rc) RETURN(rc); LOCK_INODE_MUTEX(dchild->d_inode); - } - if (!(body->valid & OBD_MD_FLEASIZE) && + } + if (!(body->valid & OBD_MD_FLEASIZE) && !(body->valid & OBD_MD_FLMODEASIZE)) { /* no EA: create objects */ rc = mds_create_objects(req, 2, rec, mds, obd, @@ -1305,8 +1305,11 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset,struct obd_device *obd, if (iattr.ia_valid != 0) { handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); + if (IS_ERR(handle)) { + rc = PTR_ERR(handle); + handle = NULL; + GOTO(cleanup, rc); + } rc = fsfilt_setattr(obd, mfd->mfd_dentry, handle, &iattr, 0); if (rc) CERROR("error in setattr(%s): rc %d\n", fidname, rc); diff --git a/lustre/mds/mds_xattr.c b/lustre/mds/mds_xattr.c index 45884c5..e9a4825 100644 --- a/lustre/mds/mds_xattr.c +++ b/lustre/mds/mds_xattr.c @@ -41,7 +41,11 @@ #include #include -#include "mds_internal.h" +#include "mds_internal.h" + +#ifndef XATTR_NAME_ACL_ACCESS +#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" +#endif static int mds_getxattr_pack_msg(struct ptlrpc_request *req, struct dentry *de, diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 59979690..b3f0e1f 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -361,7 +361,7 @@ static const char *obd_connect_names[] = { "initial_transno", "inode_bit_locks", "join_file", - "", + "getattr_by_fid", "no_oh_for_devices", NULL }; diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 22d43f7..6e9073e 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -705,7 +705,7 @@ static void ec_ap_fill_obdo(void *data, int cmd, struct obdo *oa) memcpy(oa, &eap->eap_eas->eas_oa, sizeof(*oa)); } -static void ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc) +static int ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc) { struct echo_async_page *eap = EAP_FROM_COOKIE(data); struct echo_async_state *eas; @@ -728,6 +728,7 @@ static void ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc) list_add(&eap->eap_item, &eas->eas_avail); cfs_waitq_signal(&eas->eas_waitq); spin_unlock_irqrestore(&eas->eas_lock, flags); + return 0; } static struct obd_async_page_ops ec_async_page_ops = { diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index a7c115b..7f6ebb5 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -2263,8 +2263,7 @@ int filter_setattr(struct obd_export *exp, struct obdo *oa, int rc; ENTRY; - dentry = __filter_oa2dentry(exp->exp_obd, oa, - __FUNCTION__, 1); + dentry = __filter_oa2dentry(exp->exp_obd, oa, __FUNCTION__, 1); if (IS_ERR(dentry)) RETURN(PTR_ERR(dentry)); @@ -2739,6 +2738,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, llog_cancel(llog_get_context(obd, fcc->lgc_subsys + 1), NULL, 1, fcc, 0); } + fcc = NULL; GOTO(cleanup, rc = -ENOENT); } @@ -2807,6 +2807,7 @@ cleanup: fsfilt_add_journal_cb(obd, 0, oti ? oti->oti_handle : handle, filter_cancel_cookies_cb, fcc); + fcc = NULL; } rc = filter_finish_transno(exp, oti, rc); rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0); @@ -2819,6 +2820,8 @@ cleanup: filter_parent_unlock(dparent); case 2: f_dput(dchild); + if (fcc != NULL) + OBD_FREE(fcc, sizeof(*fcc)); case 1: pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); break; diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 667da17..18148c8 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -9,20 +9,20 @@ struct osc_async_page { int oap_magic; - int oap_cmd; + unsigned short oap_cmd; + unsigned short oap_interrupted:1; + struct list_head oap_pending_item; struct list_head oap_urgent_item; struct list_head oap_rpc_item; obd_off oap_obj_off; - obd_off oap_page_off; - int oap_count; - obd_flag oap_brw_flags; + unsigned oap_page_off; enum async_flags oap_async_flags; - unsigned long oap_interrupted:1; + struct brw_page oap_brw_page; + struct oig_callback_context oap_occ; - cfs_page_t *oap_page; struct obd_io_group *oap_oig; struct ptlrpc_request *oap_request; struct client_obd *oap_cli; @@ -32,6 +32,10 @@ struct osc_async_page { void *oap_caller_data; }; +#define oap_page oap_brw_page.pg +#define oap_count oap_brw_page.count +#define oap_brw_flags oap_brw_page.flag + #define OAP_FROM_COOKIE(c) \ (LASSERT(((struct osc_async_page *)(c))->oap_magic == OAP_MAGIC), \ (struct osc_async_page *)(c)) @@ -63,4 +67,9 @@ int lproc_osc_attach_seqstat(struct obd_device *dev); static inline int lproc_osc_attach_seqstat(struct obd_device *dev) {return 0;} #endif +#ifndef min_t +#define min_t(type,x,y) \ + ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) +#endif + #endif /* OSC_INTERNAL_H */ diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 6392f30..c508bd8 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -643,41 +643,43 @@ static void osc_update_grant(struct client_obd *cli, struct ost_body *body) * via the LOV, and it _knows_ it's reading inside the file, it's just that * this stripe never got written at or beyond this stripe offset yet. */ static void handle_short_read(int nob_read, obd_count page_count, - struct brw_page *pga) + struct brw_page **pga) { char *ptr; + int i = 0; /* skip bytes read OK */ while (nob_read > 0) { LASSERT (page_count > 0); - if (pga->count > nob_read) { + if (pga[i]->count > nob_read) { /* EOF inside this page */ - ptr = cfs_kmap(pga->pg) + (pga->off & ~CFS_PAGE_MASK); - memset(ptr + nob_read, 0, pga->count - nob_read); - cfs_kunmap(pga->pg); + ptr = cfs_kmap(pga[i]->pg) + + (pga[i]->off & ~CFS_PAGE_MASK); + memset(ptr + nob_read, 0, pga[i]->count - nob_read); + cfs_kunmap(pga[i]->pg); page_count--; - pga++; + i++; break; } - nob_read -= pga->count; + nob_read -= pga[i]->count; page_count--; - pga++; + i++; } /* zero remaining pages */ while (page_count-- > 0) { - ptr = cfs_kmap(pga->pg) + (pga->off & ~CFS_PAGE_MASK); - memset(ptr, 0, pga->count); - cfs_kunmap(pga->pg); - pga++; + ptr = cfs_kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK); + memset(ptr, 0, pga[i]->count); + cfs_kunmap(pga[i]->pg); + i++; } } static int check_write_rcs(struct ptlrpc_request *request, int requested_nob, int niocount, - obd_count page_count, struct brw_page *pga) + obd_count page_count, struct brw_page **pga) { int *remote_rcs, i; @@ -729,24 +731,25 @@ static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2) } static obd_count osc_checksum_bulk(int nob, obd_count pg_count, - struct brw_page *pga) + struct brw_page **pga) { __u32 cksum = ~0; + int i = 0; LASSERT (pg_count > 0); while (nob > 0 && pg_count > 0) { - char *ptr = cfs_kmap(pga->pg); - int off = pga->off & ~CFS_PAGE_MASK; - int count = pga->count > nob ? nob : pga->count; + char *ptr = cfs_kmap(pga[i]->pg); + int off = pga[i]->off & ~CFS_PAGE_MASK; + int count = pga[i]->count > nob ? nob : pga[i]->count; cksum = crc32_le(cksum, ptr + off, count); - cfs_kunmap(pga->pg); - LL_CDEBUG_PAGE(D_PAGE, pga->pg, "off %d checksum %x\n", + cfs_kunmap(pga[i]->pg); + LL_CDEBUG_PAGE(D_PAGE, pga[i]->pg, "off %d checksum %x\n", off, cksum); - nob -= pga->count; + nob -= pga[i]->count; pg_count--; - pga++; + i++; } return cksum; @@ -754,7 +757,7 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count, static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, - struct brw_page *pga, int *requested_nobp, + struct brw_page **pga, int *requested_nobp, int *niocountp, struct ptlrpc_request **reqp) { struct ptlrpc_request *req; @@ -776,7 +779,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, pool = ((cmd & OBD_BRW_WRITE) != 0) ? imp->imp_rq_pool : NULL; for (niocount = i = 1; i < page_count; i++) - if (!can_merge_pages(&pga[i - 1], &pga[i])) + if (!can_merge_pages(pga[i - 1], pga[i])) niocount++; size[0] = sizeof(*body); @@ -814,8 +817,8 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, LASSERT (page_count > 0); for (requested_nob = i = 0; i < page_count; i++, niobuf++) { - struct brw_page *pg = &pga[i]; - struct brw_page *pg_prev = pg - 1; + struct brw_page *pg = pga[i]; + struct brw_page *pg_prev = pga[i - 1]; LASSERT(pg->count > 0); LASSERTF((pg->off & ~CFS_PAGE_MASK) + pg->count <= CFS_PAGE_SIZE, @@ -833,7 +836,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, LASSERTF(i == 0 || pg->off > pg_prev->off, "i %d p_c %u\n", i, page_count); #endif - LASSERT((pga[0].flag & OBD_BRW_SRVLOCK) == + LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) == (pg->flag & OBD_BRW_SRVLOCK)); ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~CFS_PAGE_MASK, @@ -887,7 +890,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, } static void check_write_csum(__u32 cli, __u32 srv, int requested_nob, - obd_count page_count, struct brw_page *pga) + obd_count page_count, struct brw_page **pga) { __u32 new_csum; @@ -921,7 +924,7 @@ static void check_write_csum(__u32 cli, __u32 srv, int requested_nob, static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, int requested_nob, int niocount, - obd_count page_count, struct brw_page *pga, + obd_count page_count, struct brw_page **pga, int rc) { const lnet_process_id_t *peer = @@ -1028,7 +1031,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, static int osc_brw_internal(int cmd, struct obd_export *exp,struct obdo *oa, struct lov_stripe_md *lsm, - obd_count page_count, struct brw_page *pga) + obd_count page_count, struct brw_page **pga) { int requested_nob; int niocount; @@ -1065,7 +1068,7 @@ static int brw_interpret(struct ptlrpc_request *request, int requested_nob = aa->aa_requested_nob; int niocount = aa->aa_nio_count; obd_count page_count = aa->aa_page_count; - struct brw_page *pga = aa->aa_pga; + struct brw_page **pga = aa->aa_ppga; ENTRY; rc = osc_brw_fini_request(request, oa, requested_nob, niocount, @@ -1075,7 +1078,7 @@ static int brw_interpret(struct ptlrpc_request *request, static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, - struct brw_page *pga, struct ptlrpc_request_set *set) + struct brw_page **pga, struct ptlrpc_request_set *set) { struct ptlrpc_request *request; int requested_nob; @@ -1095,7 +1098,7 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, aa->aa_requested_nob = requested_nob; aa->aa_nio_count = nio_count; aa->aa_page_count = page_count; - aa->aa_pga = pga; + aa->aa_ppga = pga; request->rq_interpret_reply = brw_interpret; ptlrpc_set_add_req(set, request); @@ -1103,11 +1106,6 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, RETURN (rc); } -#ifndef min_t -#define min_t(type,x,y) \ - ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) -#endif - /* * ugh, we want disk allocation on the target to happen in offset order. we'll * follow sedgewicks advice and stick to the dead simple shellsort -- it'll do @@ -1115,10 +1113,10 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, * insertion sort that swaps elements that are strides apart, shrinking the * stride down until its '1' and the array is sorted. */ -static void sort_brw_pages(struct brw_page *array, int num) +static void sort_brw_pages(struct brw_page **array, int num) { int stride, i, j; - struct brw_page tmp; + struct brw_page *tmp; if (num == 1) return; @@ -1130,7 +1128,7 @@ static void sort_brw_pages(struct brw_page *array, int num) for (i = stride ; i < num ; i++) { tmp = array[i]; j = i; - while (j >= stride && array[j - stride].off > tmp.off) { + while (j >= stride && array[j - stride]->off > tmp->off) { array[j] = array[j - stride]; j -= stride; } @@ -1139,37 +1137,59 @@ static void sort_brw_pages(struct brw_page *array, int num) } while (stride > 1); } -static obd_count max_unfragmented_pages(struct brw_page *pg, obd_count pages) +static obd_count max_unfragmented_pages(struct brw_page **pg, obd_count pages) { int count = 1; int offset; + int i = 0; - LASSERT (pages > 0); - offset = pg->off & (CFS_PAGE_SIZE - 1); + LASSERT (pages > 0); + offset = pg[i]->off & (CFS_PAGE_SIZE - 1); for (;;) { pages--; if (pages == 0) /* that's all */ return count; - if (offset + pg->count < CFS_PAGE_SIZE) - return count; /* doesn't end on page boundary */ + if (offset + pg[i]->count < CFS_PAGE_SIZE) + return count; /* doesn't end on page boundary */ - pg++; - offset = pg->off & (CFS_PAGE_SIZE - 1); - if (offset != 0) /* doesn't start on page boundary */ - return count; + i++; + offset = pg[i]->off & (CFS_PAGE_SIZE - 1); + if (offset != 0) /* doesn't start on page boundary */ + return count; count++; } } +static struct brw_page **osc_build_ppga(struct brw_page *pga, obd_count count) +{ + struct brw_page **ppga; + int i; + + OBD_ALLOC(ppga, sizeof(*ppga) * count); + if (ppga == NULL) + return NULL; + + for (i = 0; i < count; i++) + ppga[i] = pga + i; + return ppga; +} + +static void osc_release_ppga(struct brw_page **ppga, obd_count count) +{ + LASSERT(ppga != NULL); + OBD_FREE(ppga, sizeof(*ppga) * count); +} + static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *md, obd_count page_count, struct brw_page *pga, struct obd_trans_info *oti) { struct obdo *saved_oa = NULL; - int rc; + struct brw_page **ppga, **orig; + int rc, page_count_orig; ENTRY; if (cmd & OBD_BRW_CHECK) { @@ -1184,6 +1204,11 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, rc = 0; + orig = ppga = osc_build_ppga(pga, page_count); + if (ppga == NULL) + RETURN(-ENOMEM); + page_count_orig = page_count; + while (page_count) { obd_count pages_per_brw; @@ -1192,8 +1217,8 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, else pages_per_brw = page_count; - sort_brw_pages(pga, pages_per_brw); - pages_per_brw = max_unfragmented_pages(pga, pages_per_brw); + sort_brw_pages(ppga, pages_per_brw); + pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw); if (saved_oa != NULL) { /* restore previously saved oa */ @@ -1202,19 +1227,22 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, /* save a copy of oa (brw will clobber it) */ saved_oa = obdo_alloc(); if (saved_oa == NULL) - RETURN(-ENOMEM); + GOTO(out, rc = -ENOMEM); *saved_oa = *oa; } - rc = osc_brw_internal(cmd, exp, oa, md, pages_per_brw, pga); + rc = osc_brw_internal(cmd, exp, oa, md, pages_per_brw, ppga); if (rc != 0) break; page_count -= pages_per_brw; - pga += pages_per_brw; + ppga += pages_per_brw; } +out: + osc_release_ppga(orig, page_count_orig); + if (saved_oa != NULL) obdo_free(saved_oa); @@ -1226,6 +1254,9 @@ static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa, struct brw_page *pga, struct ptlrpc_request_set *set, struct obd_trans_info *oti) { + struct brw_page **ppga, **orig; + int page_count_orig; + int rc = 0; ENTRY; if (cmd & OBD_BRW_CHECK) { @@ -1238,27 +1269,32 @@ static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa, RETURN(0); } + orig = ppga = osc_build_ppga(pga, page_count); + if (ppga == NULL) + RETURN(-ENOMEM); + page_count_orig = page_count; + while (page_count) { obd_count pages_per_brw; - int rc; if (page_count > PTLRPC_MAX_BRW_PAGES) pages_per_brw = PTLRPC_MAX_BRW_PAGES; else pages_per_brw = page_count; - sort_brw_pages(pga, pages_per_brw); - pages_per_brw = max_unfragmented_pages(pga, pages_per_brw); + sort_brw_pages(ppga, pages_per_brw); + pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw); - rc = async_internal(cmd, exp, oa, md, pages_per_brw, pga, set); + rc = async_internal(cmd, exp, oa, md, pages_per_brw, ppga, set); if (rc != 0) - RETURN(rc); + break; page_count -= pages_per_brw; - pga += pages_per_brw; + ppga += pages_per_brw; } - RETURN(0); + osc_release_ppga(orig, page_count_orig); + RETURN(rc); } static void osc_check_rpcs(struct client_obd *cli); @@ -1414,13 +1450,27 @@ static void osc_process_ar(struct osc_async_rc *ar, struct ptlrpc_request *req, ar->ar_force_sync = 0; } +static void osc_oap_to_pending(struct osc_async_page *oap) +{ + struct loi_oap_pages *lop; + + if (oap->oap_cmd & OBD_BRW_WRITE) + lop = &oap->oap_loi->loi_write_lop; + else + lop = &oap->oap_loi->loi_read_lop; + + if (oap->oap_async_flags & ASYNC_URGENT) + list_add(&oap->oap_urgent_item, &lop->lop_urgent); + list_add_tail(&oap->oap_pending_item, &lop->lop_pending); + lop_update_pending(oap->oap_cli, lop, oap->oap_cmd, 1); +} + /* this must be called holding the loi list lock to give coverage to exit_cache, * async_flag maintenance, and oap_request */ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa, struct osc_async_page *oap, int sent, int rc) { ENTRY; - osc_exit_cache(cli, oap, sent); oap->oap_async_flags = 0; oap->oap_interrupted = 0; @@ -1446,14 +1496,24 @@ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa, } if (oap->oap_oig) { + osc_exit_cache(cli, oap, sent); oig_complete_one(oap->oap_oig, &oap->oap_occ, rc); oap->oap_oig = NULL; EXIT; return; } - oap->oap_caller_ops->ap_completion(oap->oap_caller_data, oap->oap_cmd, - oa, rc); + rc = oap->oap_caller_ops->ap_completion(oap->oap_caller_data, + oap->oap_cmd, oa, rc); + + /* ll_ap_completion (from llite) drops PG_locked. so, a new + * I/O on the page could start, but OSC calls it under lock + * and thus we can add oap back to pending safely */ + if (rc) + /* upper layer wants to leave the page on pending queue */ + osc_oap_to_pending(oap); + else + osc_exit_cache(cli, oap, sent); EXIT; } @@ -1467,7 +1527,7 @@ static int brw_interpret_oap(struct ptlrpc_request *request, rc = osc_brw_fini_request(request, aa->aa_oa, aa->aa_requested_nob, aa->aa_nio_count, aa->aa_page_count, - aa->aa_pga, rc); + aa->aa_ppga, rc); CDEBUG(D_INODE, "request %p aa %p rc %d\n", request, aa, rc); @@ -1501,7 +1561,7 @@ static int brw_interpret_oap(struct ptlrpc_request *request, client_obd_list_unlock(&cli->cl_loi_list_lock); obdo_free(aa->aa_oa); - OBD_FREE(aa->aa_pga, aa->aa_page_count * sizeof(struct brw_page)); + OBD_FREE(aa->aa_ppga, aa->aa_page_count * sizeof(struct brw_page *)); RETURN(0); } @@ -1511,7 +1571,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, int page_count, int cmd) { struct ptlrpc_request *req; - struct brw_page *pga = NULL; + struct brw_page **pga = NULL; int requested_nob, nio_count; struct osc_brw_async_args *aa; struct obdo *oa = NULL; @@ -1540,12 +1600,13 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, ops = oap->oap_caller_ops; caller_data = oap->oap_caller_data; } - pga[i].off = oap->oap_obj_off + oap->oap_page_off; - pga[i].pg = oap->oap_page; - pga[i].count = oap->oap_count; - pga[i].flag = oap->oap_brw_flags; + pga[i] = &oap->oap_brw_page; + pga[i]->off = oap->oap_obj_off + oap->oap_page_off; + /*pga[i]->pg = oap->oap_page; + pga[i]->count = oap->oap_count; + pga[i]->flag = oap->oap_brw_flags;*/ CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n", - pga[i].pg, cfs_page_index(oap->oap_page), oap, pga[i].flag); + pga[i]->pg, cfs_page_index(oap->oap_page), oap, pga[i]->flag); i++; } @@ -1567,7 +1628,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, aa->aa_requested_nob = requested_nob; aa->aa_nio_count = nio_count; aa->aa_page_count = page_count; - aa->aa_pga = pga; + aa->aa_ppga = pga; aa->aa_cli = cli; out: @@ -1731,14 +1792,6 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, continue; } osc_ap_completion(cli, NULL, oap, 0, PTR_ERR(request)); - - /* put the page back in the loi/lop lists */ - list_add_tail(&oap->oap_pending_item, - &lop->lop_pending); - lop_update_pending(cli, lop, cmd, 1); - if (oap->oap_async_flags & ASYNC_URGENT) - list_add(&oap->oap_urgent_item, - &lop->lop_urgent); } loi_list_maint(cli, loi); RETURN(PTR_ERR(request)); @@ -2054,7 +2107,6 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, { struct client_obd *cli = &exp->exp_obd->u.cli; struct osc_async_page *oap; - struct loi_oap_pages *lop; int rc = 0; ENTRY; @@ -2109,16 +2161,9 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, client_obd_list_unlock(&cli->cl_loi_list_lock); RETURN(rc); } - lop = &loi->loi_write_lop; - } else { - lop = &loi->loi_read_lop; } - if (oap->oap_async_flags & ASYNC_URGENT) - list_add(&oap->oap_urgent_item, &lop->lop_urgent); - list_add_tail(&oap->oap_pending_item, &lop->lop_pending); - lop_update_pending(cli, lop, cmd, 1); - + osc_oap_to_pending(oap); loi_list_maint(cli, loi); LOI_DEBUG(loi, "oap %p page %p added for cmd %d\n", oap, oap->oap_page, @@ -2258,10 +2303,7 @@ static void osc_group_to_pending(struct client_obd *cli, struct lov_oinfo *loi, list_for_each_safe(pos, tmp, &lop->lop_pending_group) { oap = list_entry(pos, struct osc_async_page, oap_pending_item); list_del(&oap->oap_pending_item); - list_add_tail(&oap->oap_pending_item, &lop->lop_pending); - if (oap->oap_async_flags & ASYNC_URGENT) - list_add(&oap->oap_urgent_item, &lop->lop_urgent); - lop_update_pending(cli, lop, cmd, 1); + osc_oap_to_pending(oap); } loi_list_maint(cli, loi); } @@ -2662,7 +2704,7 @@ static int osc_change_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm, struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; struct obd_device *obd = class_exp2obd(exp); - ldlm_change_cbdata(obd->obd_namespace, &res_id, replace, data); + ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data); return 0; } diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 11da088..3d56caf 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -580,6 +580,73 @@ static void ost_brw_lock_put(int mode, EXIT; } +struct ost_prolong_data { + struct obd_export *opd_exp; + ldlm_policy_data_t opd_policy; + ldlm_mode_t opd_mode; +}; + +static int ost_prolong_locks_iter(struct ldlm_lock *lock, void *data) +{ + struct ost_prolong_data *opd = data; + + LASSERT(lock->l_resource->lr_type == LDLM_EXTENT); + + if (lock->l_req_mode != lock->l_granted_mode) { + /* scan granted locks only */ + return LDLM_ITER_STOP; + } + + if (lock->l_export != opd->opd_exp) { + /* prolong locks only for given client */ + return LDLM_ITER_CONTINUE; + } + + if (!(lock->l_granted_mode & opd->opd_mode)) { + /* we aren't interesting in all type of locks */ + return LDLM_ITER_CONTINUE; + } + + if (lock->l_policy_data.l_extent.end < opd->opd_policy.l_extent.start || + lock->l_policy_data.l_extent.start > opd->opd_policy.l_extent.end) { + /* the request doesn't cross the lock, skip it */ + return LDLM_ITER_CONTINUE; + } + + if (!(lock->l_flags & LDLM_FL_AST_SENT)) { + /* ignore locks not being cancelled */ + return LDLM_ITER_CONTINUE; + } + + /* OK. this is a possible lock the user holds doing I/O + * let's refresh eviction timer for it */ + ldlm_refresh_waiting_lock(lock); + + return LDLM_ITER_CONTINUE; +} + +static void ost_prolong_locks(struct obd_export *exp, struct obd_ioobj *obj, + struct niobuf_remote *nb, ldlm_mode_t mode) +{ + struct ldlm_res_id res_id = { .name = { obj->ioo_id } }; + int nrbufs = obj->ioo_bufcnt; + struct ost_prolong_data opd; + + ENTRY; + + opd.opd_mode = mode; + opd.opd_exp = exp; + opd.opd_policy.l_extent.start = nb[0].offset & CFS_PAGE_MASK; + opd.opd_policy.l_extent.end = (nb[nrbufs - 1].offset + + nb[nrbufs - 1].len - 1) | ~CFS_PAGE_MASK; + + CDEBUG(D_DLMTRACE, "refresh locks: "LPU64"/"LPU64" ("LPU64"->"LPU64")\n", + res_id.name[0], res_id.name[1], opd.opd_policy.l_extent.start, + opd.opd_policy.l_extent.end); + ldlm_resource_iterate(exp->exp_obd->obd_namespace, &res_id, + ost_prolong_locks_iter, &opd); +} + static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) { struct ptlrpc_bulk_desc *desc; @@ -670,6 +737,8 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) if (rc != 0) GOTO(out_lock, rc); + ost_prolong_locks(req->rq_export, ioo, pp_rnb, LCK_PW | LCK_PR); + /* We're finishing using body->oa as an input variable */ do_checksum = (body->oa.o_valid & OBD_MD_FLCKSUM); body->oa.o_valid = 0; @@ -777,20 +846,11 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) ptlrpc_rs_decref(req->rq_reply_state); req->rq_reply_state = NULL; } - if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) { - CERROR("%s: bulk IO comm error evicting %s@%s id %s\n", - req->rq_export->exp_obd->obd_name, - req->rq_export->exp_client_uuid.uuid, - req->rq_export->exp_connection->c_remote_uuid.uuid, - libcfs_id2str(req->rq_peer)); - class_fail_export(req->rq_export); - } else { - CERROR("ignoring bulk IO comms error: " - "client reconnected %s@%s id %s\n", - req->rq_export->exp_client_uuid.uuid, - req->rq_export->exp_connection->c_remote_uuid.uuid, - libcfs_id2str(req->rq_peer)); - } + CWARN("%s: ignoring bulk IO comm error with %s@%s id %s\n", + req->rq_export->exp_obd->obd_name, + req->rq_export->exp_client_uuid.uuid, + req->rq_export->exp_connection->c_remote_uuid.uuid, + libcfs_id2str(req->rq_peer)); } RETURN(rc); @@ -899,6 +959,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) if (rc != 0) GOTO(out_bulk, rc); + ost_prolong_locks(req->rq_export, ioo, pp_rnb, LCK_PW); + /* obd_preprw clobbers oa->valid, so save what we need */ do_checksum = (body->oa.o_valid & OBD_MD_FLCKSUM); @@ -1020,20 +1082,11 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) ptlrpc_rs_decref(req->rq_reply_state); req->rq_reply_state = NULL; } - if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) { - CERROR("%s: bulk IO comm error evicting %s@%s id %s\n", - req->rq_export->exp_obd->obd_name, - req->rq_export->exp_client_uuid.uuid, - req->rq_export->exp_connection->c_remote_uuid.uuid, - libcfs_id2str(req->rq_peer)); - class_fail_export(req->rq_export); - } else { - CERROR("ignoring bulk IO comms error: " - "client reconnected %s@%s id %s\n", - req->rq_export->exp_client_uuid.uuid, - req->rq_export->exp_connection->c_remote_uuid.uuid, - libcfs_id2str(req->rq_peer)); - } + CWARN("%s: ignoring bulk IO comm error with %s@%s id %s\n", + req->rq_export->exp_obd->obd_name, + req->rq_export->exp_client_uuid.uuid, + req->rq_export->exp_connection->c_remote_uuid.uuid, + libcfs_id2str(req->rq_peer)); } RETURN(rc); } diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c index cf588d3..e536c82 100644 --- a/lustre/ptlrpc/llog_server.c +++ b/lustre/ptlrpc/llog_server.c @@ -194,7 +194,7 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req) ctxt = llog_get_context(obd, body->lgd_ctxt_idx); if (ctxt == NULL) - GOTO(out, rc = -EINVAL); + GOTO(out_free, rc = -EINVAL); disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -232,6 +232,7 @@ out_close: out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); +out_free: OBD_FREE(buf, LLOG_CHUNK_SIZE); out: RETURN(rc); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 78712f2..fde851b 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -58,7 +58,7 @@ struct ll_rpc_opcode { { OST_QUOTACHECK, "ost_quotacheck" }, { OST_QUOTACTL, "ost_quotactl" }, { MDS_GETATTR, "mds_getattr" }, - { MDS_GETATTR_NAME, "mds_getattr_name" }, + { MDS_GETATTR_NAME, "mds_getattr_lock" }, { MDS_CLOSE, "mds_close" }, { MDS_REINT, "mds_reint" }, { MDS_READPAGE, "mds_readpage" }, diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 7926a3f..c84159d 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -1157,6 +1157,8 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL); CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL); CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL); + CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL); + CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL); /* Sizes and Offsets */ diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index a4bacc2..bca9acc 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -84,6 +84,7 @@ for NAME in $CONFIGS; do IOZONE_OPTS="-i 0 -i 1 -i 2 -e -+d -r $RSIZE -s $SIZE" IOZFILE="-f $MOUNT/iozone" + export O_DIRECT if [ "$IOZONE" != "no" ]; then mount_client $MOUNT $DEBUG_OFF @@ -92,6 +93,14 @@ for NAME in $CONFIGS; do $CLEANUP $SETUP + # check if O_DIRECT support is implemented in kernel + if [ -z "$O_DIRECT" ]; then + touch $MOUNT/f.iozone + if ! ./directio write $MOUNT/f.iozone 0 1; then + O_DIRECT=no + fi + rm -f $MOUNT/f.iozone + fi if [ "$O_DIRECT" != "no" -a "$IOZONE_DIR" != "no" ]; then $DEBUG_OFF iozone -I $IOZONE_OPTS $IOZFILE.odir diff --git a/lustre/tests/llmount.sh b/lustre/tests/llmount.sh index 80fa2b3..e2dd99c 100755 --- a/lustre/tests/llmount.sh +++ b/lustre/tests/llmount.sh @@ -40,5 +40,5 @@ exit 1 #} #if [ "$MOUNT2" ]; then -# $LLMOUNT -v -o user_xattr,acl `hostname`:/mds1/client $MOUNT2 || exit 3 +# $LLMOUNT -v -o user_xattr `hostname`:/mds1/client $MOUNT2 || exit 3 #fi diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh index fb19ac6..436fd69 100755 --- a/lustre/tests/local.sh +++ b/lustre/tests/local.sh @@ -21,7 +21,7 @@ OSTSIZE=${OSTSIZE:-400000} OSTDEV2=${OSTDEV2:-$TMP/ost1-${FSNAME}} MDS_MOUNT_OPTS="user_xattr,acl,${MDS_MOUNT_OPTS:-""}" -CLIENTOPT="user_xattr,acl,${CLIENTOPT:-""}" +CLIENTOPT="user_xattr,${CLIENTOPT:-""}" # specific journal size for the ost, in MB JSIZE=${JSIZE:-0} diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh index 62c3b14..0dbce33 100755 --- a/lustre/tests/lov.sh +++ b/lustre/tests/lov.sh @@ -29,7 +29,7 @@ STRIPE_BYTES=${STRIPE_BYTES:-1048576} STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-$((OSTCOUNT -1))} MDS_MOUNT_OPTS="user_xattr,acl,${MDS_MOUNT_OPTS:-""}" -CLIENTOPT="user_xattr,acl,${CLIENTOPT:-""}" +CLIENTOPT="user_xattr,${CLIENTOPT:-""}" # specific journal size for the ost, in MB JSIZE=${JSIZE:-0} diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 36e90f3..2f1d764 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -203,19 +203,19 @@ test_16() { #define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 | OBD_FAIL_ONCE do_facet ost sysctl -w lustre.fail_loc=0x80000504 cancel_lru_locks osc - # will get evicted here - do_facet client "cmp /etc/termcap $MOUNT/termcap" && return 1 + # OST bulk will time out here, client resends + do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 1 sysctl -w lustre.fail_loc=0 # give recovery a chance to finish (shouldn't take long) sleep $TIMEOUT - do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2 + do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2 start_read_ahead } -run_test 16 "timeout bulk put, evict client (2732)" +run_test 16 "timeout bulk put, don't evict client (2732)" test_17() { # OBD_FAIL_PTLRPC_BULK_GET_NET 0x0503 | OBD_FAIL_ONCE - # client will get evicted here + # OST bulk will time out here, client retries sysctl -w lustre.fail_loc=0x80000503 # need to ensure we send an RPC do_facet client cp /etc/termcap $DIR/$tfile @@ -224,8 +224,8 @@ test_17() { sleep $TIMEOUT sysctl -w lustre.fail_loc=0 do_facet client "df $DIR" - # expect cmp to fail - do_facet client "cmp /etc/termcap $DIR/$tfile" && return 3 + # expect cmp to succeed, client resent bulk + do_facet client "cmp /etc/termcap $DIR/$tfile" || return 3 do_facet client "rm $DIR/$tfile" || return 4 return 0 } @@ -269,12 +269,7 @@ test_18b() { do_facet client cp /etc/termcap $f sync - # just use this write to trigger the client's eviction from the ost -# OBD_FAIL_PTLRPC_BULK_GET_NET|OBD_FAIL_ONCE - sysctl -w lustre.fail_loc=0x80000503 - do_facet client dd if=/dev/zero of=$f2 bs=4k count=1 - sync - sysctl -w lustre.fail_loc=0 + ost_evict_client # allow recovery to complete sleep $((TIMEOUT + 2)) # my understanding is that there should be nothing in the page @@ -355,8 +350,7 @@ test_24() { # bug 2248 - eviction fails writeback but app doesn't see it multiop $DIR/$tdir/$tfile Owy_wyc & MULTI_PID=$! usleep 500 -# OBD_FAIL_PTLRPC_BULK_GET_NET|OBD_FAIL_ONCE - sysctl -w lustre.fail_loc=0x80000503 + ost_evict_client usleep 500 kill -USR1 $MULTI_PID wait $MULTI_PID diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index b68cb58..77a75ec8f 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -21,7 +21,7 @@ EXCEPT="$EXCEPT 48a" case `uname -r` in 2.4*) FSTYPE=${FSTYPE:-ext3}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;; -2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 60 69" ;; +2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 48b" ;; *) error "unsupported kernel" ;; esac @@ -2559,9 +2559,14 @@ test_69() { [ -z "`lsmod|grep obdfilter`" ] && echo "skipping $TESTNAME (remote OST)" && return - f="$DIR/f69" + f="$DIR/$tfile" touch $f + if ! $DIRECTIO write ${f}.2 0 1; then + echo "skipping $TESTNAME - O_DIRECT not implemented" + return 0 + fi + sysctl -w lustre.fail_loc=0x217 truncate $f 1 # vmtruncate() will ignore truncate() error. $DIRECTIO write $f 0 2 && error "write succeeded, expect -ENOENT" @@ -2702,7 +2707,7 @@ test_75() { ls -l $F* } -run_test 75 "TEST join file" +run_test 75 "TEST join file ====================================" num_inodes() { awk '/lustre_inode_cache|^inode_cache/ {print $2; exit}' /proc/slabinfo @@ -2922,7 +2927,6 @@ test_103 () { cd $DIR [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return - [ -z "`mount | grep " $DIR .*\"`" ] && echo "skipping $TESTNAME (must have acl)" && return [ -z "`grep acl $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have acl)" && return $(which setfacl 2>/dev/null) || echo "skipping $TESTNAME (could not find setfacl)" && return diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index a32f2b6..beec261 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -71,6 +71,8 @@ trace() { } TRACE=${TRACE:-""} +LPROC=/proc/fs/lustre + run_one() { if ! grep -q $DIR /proc/mounts; then $SETUP @@ -580,8 +582,7 @@ test_24() { run_test 24 "lfs df [-ih] [path] test =========================" test_25() { - [ -z "`mount | grep " $DIR1 .*\"`" ] && echo "skipping $TESTNAME ($DIR1 must have acl)" && return - [ -z "`mount | grep " $DIR2 .*\"`" ] && echo "skipping $TESTNAME ($DIR2 must have acl)" && return + [ `cat $LPROC/mdc/MDC*MNT*/connect_flags | grep -c acl` -lt 2 ] && echo "skipping $TESTNAME (must have acl)" && return mkdir $DIR1/d25 || error touch $DIR1/d25/f1 || error diff --git a/lustre/tests/stat.c b/lustre/tests/stat.c index f27957d..9963650 100644 --- a/lustre/tests/stat.c +++ b/lustre/tests/stat.c @@ -85,6 +85,7 @@ void print_human_fstype(struct statfs *statfsbuf) break; case S_MAGIC_MINIX: type = strdup("minix"); + break; case S_MAGIC_MINIX_30: type = strdup("minix (30 char.)"); break; diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 8ca56d9..25c8ff9 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -263,6 +263,11 @@ mds_evict_client() { do_facet mds "echo $UUID > /proc/fs/lustre/mds/${mds_svc}/evict_client" } +ost_evict_client() { + UUID=`cat /proc/fs/lustre/osc/*_MNT_*/uuid | head -n 1` + do_facet ost "echo $UUID > /proc/fs/lustre/obdfilter/ost_svc/evict_client" +} + fail() { facet_failover $* df $MOUNT || error "post-failover df: $?" diff --git a/lustre/tests/uml.sh b/lustre/tests/uml.sh index e887a9f..7d7301b 100644 --- a/lustre/tests/uml.sh +++ b/lustre/tests/uml.sh @@ -23,7 +23,7 @@ MOUNT=${MOUNT:-/mnt/lustre} FSTYPE=${FSTYPE:-ext3} MDS_MOUNT_OPTS="user_xattr,acl,${MDS_MOUNT_OPTS:-""}" -CLIENTOPT="user_xattr,acl,${CLIENTOPT:-""}" +CLIENTOPT="user_xattr,${CLIENTOPT:-""}" NETTYPE=${NETTYPE:-tcp} NIDTYPE=${NIDTYPE:-$NETTYPE} diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 349c703..94a78d4 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -393,7 +393,7 @@ static int showdf(char *mntdir, struct obd_statfs *stat, char *uuid, int ishow, int cooked, char *type, int index, int rc) { - __u64 avail, used, total; + long long avail, used, total; double ratio = 0; char *suffix = "KMGTPEZY"; char tbuf[10], ubuf[10], abuf[10], rbuf[10]; @@ -715,7 +715,7 @@ int lfs_join(int argc, char **argv) size = lseek(fd, 0, SEEK_END); if (size % JOIN_FILE_ALIGN) { fprintf(stderr,"head file %s size %llu must be mutiple of %d\n", - name_head, size, JOIN_FILE_ALIGN); + name_head, (long long)size, JOIN_FILE_ALIGN); rc = -EINVAL; goto out; } @@ -1180,9 +1180,9 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only) if (bover) diff2str(dqb->dqb_btime, timebuf, now); - sprintf(numbuf[0], "%llu", toqb(dqb->dqb_curspace)); - sprintf(numbuf[1], "%llu", dqb->dqb_bsoftlimit); - sprintf(numbuf[2], "%llu", dqb->dqb_bhardlimit); + sprintf(numbuf[0], LPU64, toqb(dqb->dqb_curspace)); + sprintf(numbuf[1], LPU64, dqb->dqb_bsoftlimit); + sprintf(numbuf[2], LPU64, dqb->dqb_bhardlimit); printf(" %7s%c %6s %7s %7s", numbuf[0], bover ? '*' : ' ', numbuf[1], numbuf[2], bover > 1 ? timebuf : ""); @@ -1190,9 +1190,9 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only) if (iover) diff2str(dqb->dqb_itime, timebuf, now); - sprintf(numbuf[0], "%llu", dqb->dqb_curinodes); - sprintf(numbuf[1], "%llu", dqb->dqb_isoftlimit); - sprintf(numbuf[2], "%llu", dqb->dqb_ihardlimit); + sprintf(numbuf[0], LPU64, dqb->dqb_curinodes); + sprintf(numbuf[1], LPU64, dqb->dqb_isoftlimit); + sprintf(numbuf[2], LPU64, dqb->dqb_ihardlimit); if (!ost_only) printf(" %7s%c %6s %7s %7s", numbuf[0], iover ? '*' : ' ', numbuf[1], diff --git a/lustre/utils/llmount.c b/lustre/utils/llmount.c index 1c10faa..385fb44 100644 --- a/lustre/utils/llmount.c +++ b/lustre/utils/llmount.c @@ -61,7 +61,7 @@ void usage(FILE *out) "\t-v|--verbose: print verbose config settings\n" "\t-o: filesystem mount options:\n" "\t\tflock/noflock: enable/disable flock support\n" - "\t\troute=[-]:[-]: portal route to MDS\n" + "\t\tretry=: number of times mount is retried by client\n" "\t\tuser_xattr/nouser_xattr: enable/disable user extended " "attributes\n" ); @@ -251,10 +251,10 @@ int parse_options(char *options, struct lustre_mount_data *lmd, int *flagp) val = atoi(opteq + 1); *opteq = '\0'; if (!strcmp(opt, "retry")) { - if (val >= 0 || val < MAX_RETRIES) + if (val > MAX_RETRIES) + retry = MAX_RETRIES; + else if (val >= 0) retry = val; - else - retry = 0; } else { fprintf(stderr, "%s: unknown option '%s'. " "Ignoring.\n", progname, opt); @@ -446,8 +446,11 @@ int main(int argc, char *const argv[]) if (modpipe != NULL) pclose(modpipe); /* use <= to include the initial mount before we retry */ - for (i = 0, rc = -EAGAIN; i <= retry && rc != 0; i++) + for (i = 0, rc = -EAGAIN; i <= retry && rc != 0; i++) { rc = mount(source, target, "lustre", flags, &lmd); + if (rc && retry) + sleep(1 << max((i/2), 5)); + } } if (rc) { fprintf(stderr, "%s: mount(%s, %s) failed: %s\n", progname, diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 85ee351..a856c63 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -896,17 +896,18 @@ int jt_obd_list(int argc, char **argv) int rc; #if HAVE_PROC_FS char buf[MAX_STRING_SIZE]; - FILE *fp = fopen(DEVICES_LIST, "r"); + FILE *fp = NULL; + if (argc != 1) + return CMD_HELP; + + fp = fopen(DEVICES_LIST,"r"); if (fp == NULL) { fprintf(stderr, "error: %s: %s opening "DEVICES_LIST"\n", jt_cmdname(argv[0]), strerror(rc = errno)); return rc; } - if (argc != 1) - return CMD_HELP; - while (fgets(buf, sizeof(buf), fp) != NULL) printf("%s", buf); diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 5a1f55a..fbaa08a 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -1040,6 +1040,8 @@ main(int argc, char **argv) CHECK_CDEFINE(OBD_CONNECT_TRANSNO); CHECK_CDEFINE(OBD_CONNECT_IBITS); CHECK_CDEFINE(OBD_CONNECT_JOIN); + CHECK_CDEFINE(OBD_CONNECT_ATTRFID); + CHECK_CDEFINE(OBD_CONNECT_NODEVOH); COMMENT("Sizes and Offsets"); BLANK_LINE();