-+
-+void ADIOI_LUSTRE_Calc_my_off_len(ADIO_File fd, int bufcount,
-+ MPI_Datatype datatype, int file_ptr_type,
-+ ADIO_Offset offset,
-+ ADIO_Offset **offset_list_ptr,
-+ int **len_list_ptr,
-+ ADIO_Offset *start_offset_ptr,
-+ ADIO_Offset *end_offset_ptr,
-+ int *contig_access_count_ptr)
-+{
-+ int filetype_size, buftype_size, etype_size;
-+ int i, j, k, frd_size = 0, old_frd_size = 0, st_index = 0;
-+ int n_filetypes, etype_in_filetype;
-+ ADIO_Offset abs_off_in_filetype = 0;
-+ int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
-+ int contig_access_count, *len_list, flag, filetype_is_contig;
-+ MPI_Aint filetype_extent, filetype_lb;
-+ ADIOI_Flatlist_node *flat_file;
-+ ADIO_Offset *offset_list, off, end_offset = 0, disp;
-+
-+ /* For this process's request, calculate the list of offsets and
-+ lengths in the file and determine the start and end offsets. */
-+
-+ ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
-+
-+ MPI_Type_size(fd->filetype, &filetype_size);
-+ MPI_Type_extent(fd->filetype, &filetype_extent);
-+ MPI_Type_lb(fd->filetype, &filetype_lb);
-+ MPI_Type_size(datatype, &buftype_size);
-+ etype_size = fd->etype_size;
-+
-+ if (!filetype_size) {
-+ *contig_access_count_ptr = 0;
-+ *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
-+ *len_list_ptr = (int *) ADIOI_Malloc(2 * sizeof(int));
-+ /* 2 is for consistency. everywhere I malloc one more than needed */
-+
-+ offset_list = *offset_list_ptr;
-+ len_list = *len_list_ptr;
-+ offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
-+ fd->disp + etype_size * offset;
-+ len_list[0] = 0;
-+ *start_offset_ptr = offset_list[0];
-+ *end_offset_ptr = offset_list[0] + len_list[0] - 1;
-+ return;
-+ }
-+
-+ if (filetype_is_contig) {
-+ *contig_access_count_ptr = 1;
-+ *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
-+ *len_list_ptr = (int *) ADIOI_Malloc(2 * sizeof(int));
-+ /* 2 is for consistency. everywhere I malloc one more than needed */
-+
-+ offset_list = *offset_list_ptr;
-+ len_list = *len_list_ptr;
-+ offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
-+ fd->disp + etype_size * offset;
-+ len_list[0] = bufcount * buftype_size;
-+ *start_offset_ptr = offset_list[0];
-+ *end_offset_ptr = offset_list[0] + len_list[0] - 1;
-+
-+ /* update file pointer */
-+ if (file_ptr_type == ADIO_INDIVIDUAL)
-+ fd->fp_ind = *end_offset_ptr + 1;
-+ } else {
-+ /* First calculate what size of offset_list and len_list to allocate */
-+ /* filetype already flattened in ADIO_Open or ADIO_Fcntl */
-+ flat_file = ADIOI_Flatlist;
-+ while (flat_file->type != fd->filetype)
-+ flat_file = flat_file->next;
-+ disp = fd->disp;
-+
-+ if (file_ptr_type == ADIO_INDIVIDUAL) {
-+ offset = fd->fp_ind; /* in bytes */
-+ n_filetypes = -1;
-+ flag = 0;
-+ while (!flag) {
-+ n_filetypes++;
-+ for (i = 0; i < flat_file->count; i++) {
-+ if (disp + flat_file->indices[i] +
-+ (ADIO_Offset) n_filetypes * filetype_extent +
-+ flat_file->blocklens[i] >= offset) {
-+ st_index = i;
-+ frd_size = (int) (disp + flat_file->indices[i] +
-+ (ADIO_Offset) n_filetypes *
-+ filetype_extent +
-+ flat_file->blocklens[i] -
-+ offset);
-+ flag = 1;
-+ break;
-+ }
-+ }
-+ }
-+ } else {
-+ n_etypes_in_filetype = filetype_size / etype_size;
-+ n_filetypes = (int) (offset / n_etypes_in_filetype);
-+ etype_in_filetype = (int) (offset % n_etypes_in_filetype);
-+ size_in_filetype = etype_in_filetype * etype_size;
-+
-+ sum = 0;
-+ for (i = 0; i < flat_file->count; i++) {
-+ sum += flat_file->blocklens[i];
-+ if (sum > size_in_filetype) {
-+ st_index = i;
-+ frd_size = sum - size_in_filetype;
-+ abs_off_in_filetype = flat_file->indices[i] +
-+ size_in_filetype -
-+ (sum - flat_file->blocklens[i]);
-+ break;
-+ }
-+ }
-+
-+ /* abs. offset in bytes in the file */
-+ offset = disp + (ADIO_Offset) n_filetypes *filetype_extent +
-+ abs_off_in_filetype;
-+ }
-+
-+ /* calculate how much space to allocate for offset_list, len_list */
-+
-+ old_frd_size = frd_size;
-+ contig_access_count = i = 0;
-+ j = st_index;
-+ bufsize = buftype_size * bufcount;
-+ frd_size = ADIOI_MIN(frd_size, bufsize);
-+ while (i < bufsize) {
-+ if (frd_size)
-+ contig_access_count++;
-+ i += frd_size;
-+ j = (j + 1) % flat_file->count;
-+ frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize - i);
-+ }
-+
-+ /* allocate space for offset_list and len_list */
-+
-+ *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc((contig_access_count+1) *
-+ sizeof(ADIO_Offset));
-+ *len_list_ptr = (int *) ADIOI_Malloc((contig_access_count + 1) *
-+ sizeof(int));
-+ /* +1 to avoid a 0-size malloc */
-+
-+ offset_list = *offset_list_ptr;
-+ len_list = *len_list_ptr;
-+
-+ /* find start offset, end offset, and fill in offset_list and len_list */
-+
-+ *start_offset_ptr = offset; /* calculated above */
-+
-+ i = k = 0;
-+ j = st_index;
-+ off = offset;
-+ frd_size = ADIOI_MIN(old_frd_size, bufsize);
-+ while (i < bufsize) {
-+ if (frd_size) {
-+ offset_list[k] = off;
-+ len_list[k] = frd_size;
-+ k++;
-+ }
-+ i += frd_size;
-+ end_offset = off + frd_size - 1;
-+
-+ /* Note: end_offset points to the last byte-offset that will be accessed.
-+ e.g., if start_offset=0 and 100 bytes to be read, end_offset=99 */
-+
-+ if (off + frd_size < disp + flat_file->indices[j] +
-+ flat_file->blocklens[j] +
-+ (ADIO_Offset) n_filetypes * filetype_extent) {
-+ off += frd_size;
-+ /* did not reach end of contiguous block in filetype.
-+ * no more I/O needed. off is incremented by frd_size.
-+ */
-+ } else {
-+ if (j < (flat_file->count - 1))
-+ j++;
-+ else {
-+ /* hit end of flattened filetype;
-+ * start at beginning again
-+ */
-+ j = 0;
-+ n_filetypes++;
-+ }
-+ off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes *
-+ filetype_extent;
-+ frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize - i);
-+ }
-+ }
-+
-+ /* update file pointer */
-+ if (file_ptr_type == ADIO_INDIVIDUAL)
-+ fd->fp_ind = off;
-+
-+ *contig_access_count_ptr = contig_access_count;
-+ *end_offset_ptr = end_offset;
-+ }
-+}
-+
-+void ADIOI_LUSTRE_Calc_others_req(ADIO_File fd, int count_my_req_procs,
-+ int *count_my_req_per_proc,
-+ ADIOI_Access * my_req,
-+ int nprocs, int myrank,
-+ ADIO_Offset start_offset,
-+ ADIO_Offset end_offset,
-+ int *striping_info,
-+ int *count_others_req_procs_ptr,
-+ ADIOI_Access ** others_req_ptr)
-+{
-+ /* what requests of other processes will be written by this process */
-+
-+ int *count_others_req_per_proc, count_others_req_procs;
-+ int i, j, lflag, samesize = 0, contiguous = 0;
-+ MPI_Request *send_requests, *recv_requests;
-+ MPI_Status *statuses;
-+ ADIOI_Access *others_req;
-+ char *value = NULL;
-+ int proc, avail_nprocs, stripe_count, CO;
-+ ADIO_Offset min_st_offset, off, req_len, avail_len, rem_len, *all_lens;
-+
-+ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL + 1) * sizeof(char));
-+ /* same io size */
-+ MPI_Info_get(fd->info, "same_io_size", MPI_MAX_INFO_VAL, value, &lflag);
-+ if (lflag && !strcmp(value, "yes"))
-+ samesize = 1;
-+ /* contiguous data */
-+ MPI_Info_get(fd->info, "contiguous_data", MPI_MAX_INFO_VAL, value, &lflag);
-+ if (lflag && !strcmp(value, "yes"))
-+ contiguous = 1;
-+
-+ *others_req_ptr = (ADIOI_Access *) ADIOI_Malloc(nprocs *
-+ sizeof(ADIOI_Access));
-+ others_req = *others_req_ptr;
-+
-+ /* if the data are contiguous, we don't need to do MPI_Alltoall */
-+ if (contiguous) {
-+ stripe_count = striping_info[1];
-+ CO = striping_info[2];
-+
-+ for (i = 0; i < nprocs; i++) {
-+ others_req[i].count = 0;
-+ }
-+ req_len = end_offset - start_offset + 1;
-+ all_lens = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset));
-+
-+ if (samesize == 0) {/* different request size */
-+ /* calculate the min_st_offset */
-+ MPI_Allreduce(&start_offset, &min_st_offset, 1, MPI_LONG_LONG,
-+ MPI_MIN, fd->comm);
-+ /* exchange request length */
-+ MPI_Allgather(&req_len, 1, ADIO_OFFSET, all_lens, 1, ADIO_OFFSET,
-+ fd->comm);
-+ } else { /* same request size */
-+ /* calculate the min_st_offset */
-+ min_st_offset = start_offset - myrank * req_len;
-+ /* assign request length to all_lens[] */
-+ for (i = 0; i < nprocs; i ++)
-+ all_lens[i] = req_len;
-+ }
-+ avail_nprocs = ADIOI_MIN(nprocs, stripe_count * CO);
-+ if (myrank < avail_nprocs) {
-+ off = min_st_offset;
-+ /* calcaulte other_req[i].count */
-+ for (i = 0; i < nprocs; i++) {
-+ avail_len = all_lens[i];
-+ rem_len = avail_len;
-+ while (rem_len > 0) {
-+ proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len,
-+ nprocs, striping_info);
-+ if (proc == myrank) {
-+ others_req[i].count ++;
-+ }
-+ off += avail_len;
-+ rem_len -= avail_len;
-+ avail_len = rem_len;
-+ }
-+ }
-+ /* calculate offset and len for each request */
-+ off = min_st_offset;
-+ for (i = 0; i < nprocs; i++) {
-+ if (others_req[i].count) {
-+ others_req[i].offsets = (ADIO_Offset *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(ADIO_Offset));
-+ others_req[i].lens = (int *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(int));
-+ others_req[i].mem_ptrs = (MPI_Aint *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(MPI_Aint));
-+ }
-+ j = 0;
-+ avail_len = all_lens[i];
-+ rem_len = avail_len;
-+ while (rem_len > 0) {
-+ proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len,
-+ nprocs, striping_info);
-+ if (proc == myrank) {
-+ others_req[i].offsets[j] = off;
-+ others_req[i].lens[j] = (int)avail_len;
-+ j ++;
-+ }
-+ off += avail_len;
-+ rem_len -= avail_len;
-+ avail_len = rem_len;
-+ }
-+ }
-+ }
-+ ADIOI_Free(value);
-+ ADIOI_Free(all_lens);
-+ } else {
-+ /* multiple non-contiguous requests */
-+ /* first find out how much to send/recv and from/to whom */
-+
-+ /*
-+ * count_others_req_procs:
-+ * number of processes whose requests will be written by
-+ * this process (including this process itself)
-+ * count_others_req_per_proc[i]:
-+ * how many separate contiguous requests of proc[i] will be
-+ * written by this process.
-+ */
-+
-+ count_others_req_per_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
-+
-+ MPI_Alltoall(count_my_req_per_proc, 1, MPI_INT,
-+ count_others_req_per_proc, 1, MPI_INT, fd->comm);
-+
-+ count_others_req_procs = 0;
-+ for (i = 0; i < nprocs; i++) {
-+ if (count_others_req_per_proc[i]) {
-+ others_req[i].count = count_others_req_per_proc[i];
-+ others_req[i].offsets = (ADIO_Offset *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(ADIO_Offset));
-+ others_req[i].lens = (int *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(int));
-+ others_req[i].mem_ptrs = (MPI_Aint *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(MPI_Aint));
-+ count_others_req_procs++;
-+ } else
-+ others_req[i].count = 0;
-+ }
-+
-+ /* now send the calculated offsets and lengths to respective processes */
-+
-+ send_requests = (MPI_Request *) ADIOI_Malloc(2 * (count_my_req_procs + 1) *
-+ sizeof(MPI_Request));
-+ recv_requests = (MPI_Request *) ADIOI_Malloc(2 * (count_others_req_procs+1)*
-+ sizeof(MPI_Request));
-+ /* +1 to avoid a 0-size malloc */
-+
-+ j = 0;
-+ for (i = 0; i < nprocs; i++) {
-+ if (others_req[i].count) {
-+ MPI_Irecv(others_req[i].offsets, others_req[i].count,
-+ ADIO_OFFSET, i, i + myrank, fd->comm,
-+ &recv_requests[j]);
-+ j++;
-+ MPI_Irecv(others_req[i].lens, others_req[i].count,
-+ MPI_INT, i, i + myrank + 1, fd->comm,
-+ &recv_requests[j]);
-+ j++;
-+ }
-+ }
-+
-+ j = 0;
-+ for (i = 0; i < nprocs; i++) {
-+ if (my_req[i].count) {
-+ MPI_Isend(my_req[i].offsets, my_req[i].count,
-+ ADIO_OFFSET, i, i + myrank, fd->comm,
-+ &send_requests[j]);
-+ j++;
-+ MPI_Isend(my_req[i].lens, my_req[i].count,
-+ MPI_INT, i, i + myrank + 1, fd->comm,
-+ &send_requests[j]);
-+ j++;
-+ }
-+ }
-+
-+ statuses = (MPI_Status *)
-+ ADIOI_Malloc((1 + 2 * ADIOI_MAX(count_my_req_procs,
-+ count_others_req_procs)) *
-+ sizeof(MPI_Status));
-+ /* +1 to avoid a 0-size malloc */
-+
-+ MPI_Waitall(2 * count_my_req_procs, send_requests, statuses);
-+ MPI_Waitall(2 * count_others_req_procs, recv_requests, statuses);
-+
-+ ADIOI_Free(send_requests);
-+ ADIOI_Free(recv_requests);
-+ ADIOI_Free(statuses);
-+ ADIOI_Free(count_others_req_per_proc);
-+
-+ *count_others_req_procs_ptr = count_others_req_procs;
-+ }
-+}
-diff -ruN ad_lustre_orig/ad_lustre.c ad_lustre/ad_lustre.c
---- ad_lustre_orig/ad_lustre.c 2008-09-17 14:36:57.000000000 +0800
-+++ ad_lustre/ad_lustre.c 2008-09-17 18:20:35.000000000 +0800