-+
-+void ADIOI_LUSTRE_Calc_others_req(ADIO_File fd, int count_my_req_procs,
-+ int *count_my_req_per_proc,
-+ ADIOI_Access * my_req,
-+ int nprocs, int myrank,
-+ ADIO_Offset req_len,
-+ ADIO_Offset min_st_offset,
-+ int *striping_info,
-+ int *count_others_req_procs_ptr,
-+ ADIOI_Access ** others_req_ptr)
-+{
-+ /* what requests of other processes will be written by this process */
-+
-+ int *count_others_req_per_proc, count_others_req_procs, proc;
-+ int i, j, lflag, samesize = 0, contiguous = 0;
-+ int avail_cb_nodes = striping_info[2];
-+ MPI_Request *send_requests, *recv_requests;
-+ MPI_Status *statuses;
-+ ADIOI_Access *others_req;
-+ char *value = NULL;
-+ ADIO_Offset off, avail_len, rem_len, *all_lens;
-+
-+ /* There are two hints, which could reduce some MPI communication overhead,
-+ * if the users knows the I/O pattern and set them correctly. */
-+ /* They are
-+ * contiguous_data: if the data are contiguous,
-+ * we don't need to do MPI_Alltoall().
-+ * same_io_size: And if the data req size is same,
-+ * we can calculate the offset directly
-+ */
-+ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL + 1) * sizeof(char));
-+ /* hint of contiguous data */
-+ MPI_Info_get(fd->info, "contiguous_data", MPI_MAX_INFO_VAL, value, &lflag);
-+ if (lflag && !strcmp(value, "yes"))
-+ contiguous = 1;
-+ /* hint of same io size */
-+ MPI_Info_get(fd->info, "same_io_size", MPI_MAX_INFO_VAL, value, &lflag);
-+ if (lflag && !strcmp(value, "yes"))
-+ samesize = 1;
-+ ADIOI_Free(value);
-+
-+ *others_req_ptr = (ADIOI_Access *) ADIOI_Malloc(nprocs *
-+ sizeof(ADIOI_Access));
-+ others_req = *others_req_ptr;
-+
-+ /* if the data are contiguous, we can calulate the offset and length
-+ * of the other requests simply, instead of MPI_Alltoall() */
-+ if (contiguous) {
-+ for (i = 0; i < nprocs; i++) {
-+ others_req[i].count = 0;
-+ }
-+ all_lens = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset));
-+
-+ /* same req size ? */
-+ if (samesize == 0) {
-+ /* exchange request length */
-+ MPI_Allgather(&req_len, 1, ADIO_OFFSET, all_lens, 1, ADIO_OFFSET,
-+ fd->comm);
-+ } else { /* same request size */
-+ /* assign request length to all_lens[] */
-+ for (i = 0; i < nprocs; i ++)
-+ all_lens[i] = req_len;
-+ }
-+ if (myrank < avail_cb_nodes) {
-+ /* It's a IO client and it will receive data from others */
-+ off = min_st_offset;
-+ /* calcaulte other_req[i].count */
-+ for (i = 0; i < nprocs; i++) {
-+ avail_len = all_lens[i];
-+ rem_len = avail_len;
-+ while (rem_len > 0) {
-+ proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len,
-+ striping_info);
-+ if (proc == myrank) {
-+ others_req[i].count ++;
-+ }
-+ off += avail_len;
-+ rem_len -= avail_len;
-+ avail_len = rem_len;
-+ }
-+ }
-+ /* calculate offset and len for each request */
-+ off = min_st_offset;
-+ for (i = 0; i < nprocs; i++) {
-+ if (others_req[i].count) {
-+ others_req[i].offsets = (ADIO_Offset *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(ADIO_Offset));
-+ others_req[i].lens = (int *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(int));
-+ others_req[i].mem_ptrs = (MPI_Aint *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(MPI_Aint));
-+ }
-+ j = 0;
-+ avail_len = all_lens[i];
-+ rem_len = avail_len;
-+ while (rem_len > 0) {
-+ proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len,
-+ striping_info);
-+ if (proc == myrank) {
-+ others_req[i].offsets[j] = off;
-+ others_req[i].lens[j] = (int)avail_len;
-+ j ++;
-+ }
-+ off += avail_len;
-+ rem_len -= avail_len;
-+ avail_len = rem_len;
-+ }
-+ }
-+ }
-+ ADIOI_Free(all_lens);
-+ } else {
-+ /* multiple non-contiguous requests */
-+ /* first find out how much to send/recv and from/to whom */
-+
-+ /*
-+ * count_others_req_procs:
-+ * number of processes whose requests will be written by
-+ * this process (including this process itself)
-+ * count_others_req_per_proc[i]:
-+ * how many separate contiguous requests of proc[i] will be
-+ * written by this process.
-+ */
-+
-+ count_others_req_per_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
-+
-+ MPI_Alltoall(count_my_req_per_proc, 1, MPI_INT,
-+ count_others_req_per_proc, 1, MPI_INT, fd->comm);
-+
-+ count_others_req_procs = 0;
-+ for (i = 0; i < nprocs; i++) {
-+ if (count_others_req_per_proc[i]) {
-+ others_req[i].count = count_others_req_per_proc[i];
-+ others_req[i].offsets = (ADIO_Offset *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(ADIO_Offset));
-+ others_req[i].lens = (int *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(int));
-+ others_req[i].mem_ptrs = (MPI_Aint *)
-+ ADIOI_Malloc(others_req[i].count *
-+ sizeof(MPI_Aint));
-+ count_others_req_procs++;
-+ } else
-+ others_req[i].count = 0;
-+ }
-+
-+ /* now send the calculated offsets and lengths to respective processes */
-+
-+ send_requests = (MPI_Request *) ADIOI_Malloc(2 * (count_my_req_procs + 1) *
-+ sizeof(MPI_Request));
-+ recv_requests = (MPI_Request *) ADIOI_Malloc(2 * (count_others_req_procs+1)*
-+ sizeof(MPI_Request));
-+ /* +1 to avoid a 0-size malloc */
-+
-+ j = 0;
-+ for (i = 0; i < nprocs; i++) {
-+ if (others_req[i].count) {
-+ MPI_Irecv(others_req[i].offsets, others_req[i].count,
-+ ADIO_OFFSET, i, i + myrank, fd->comm,
-+ &recv_requests[j]);
-+ j++;
-+ MPI_Irecv(others_req[i].lens, others_req[i].count,
-+ MPI_INT, i, i + myrank + 1, fd->comm,
-+ &recv_requests[j]);
-+ j++;
-+ }
-+ }
-+
-+ j = 0;
-+ for (i = 0; i < nprocs; i++) {
-+ if (my_req[i].count) {
-+ MPI_Isend(my_req[i].offsets, my_req[i].count,
-+ ADIO_OFFSET, i, i + myrank, fd->comm,
-+ &send_requests[j]);
-+ j++;
-+ MPI_Isend(my_req[i].lens, my_req[i].count,
-+ MPI_INT, i, i + myrank + 1, fd->comm,
-+ &send_requests[j]);
-+ j++;
-+ }
-+ }
-+
-+ statuses = (MPI_Status *)
-+ ADIOI_Malloc((1 + 2 * ADIOI_MAX(count_my_req_procs,
-+ count_others_req_procs)) *
-+ sizeof(MPI_Status));
-+ /* +1 to avoid a 0-size malloc */
-+
-+ MPI_Waitall(2 * count_my_req_procs, send_requests, statuses);
-+ MPI_Waitall(2 * count_others_req_procs, recv_requests, statuses);
-+
-+ ADIOI_Free(send_requests);
-+ ADIOI_Free(recv_requests);
-+ ADIOI_Free(statuses);
-+ ADIOI_Free(count_others_req_per_proc);
-+
-+ *count_others_req_procs_ptr = count_others_req_procs;
-+ }
-+}
-diff -ruN ad_lustre_orig/ad_lustre.c ad_lustre/ad_lustre.c
---- ad_lustre_orig/ad_lustre.c 2008-09-17 14:36:57.000000000 +0800
-+++ ad_lustre/ad_lustre.c 2008-09-17 18:20:35.000000000 +0800