- oa->o_mode = inode->i_mode;
- oa->o_id = lsm->lsm_object_id;
- oa->o_gr = lsm->lsm_object_gr;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLMODE |
- OBD_MD_FLTYPE |OBD_MD_FLGROUP;
- obdo_from_inode(oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
-
- cmd = OBD_BRW_READ;
- if (bio_rw(bio) == WRITE)
- cmd = OBD_BRW_WRITE;
-
- if (cmd == OBD_BRW_WRITE)
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE, bio->bi_size);
- else
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ, bio->bi_size);
- oinfo.oi_oa = oa;
- oinfo.oi_md = lsm;
- opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_RW;
- oinfo.oi_capa = ll_osscapa_get(inode, opc);
- ret = obd_brw(cmd, ll_i2dtexp(inode), &oinfo,
- (obd_count)(i - bio->bi_idx),
- lo->lo_requests[0].lrd_pages, NULL);
- capa_put(oinfo.oi_capa);
- if (ret == 0)
- obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
- return ret;
+ ll_stats_ops_tally(ll_i2sbi(inode),
+ (rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ,
+ page_count << PAGE_CACHE_SHIFT);
+
+ pvec->ldp_size = page_count << PAGE_CACHE_SHIFT;
+ pvec->ldp_nr = page_count;
+
+ /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to
+ * write those pages into OST. Even worse case is that more pages
+ * would be asked to write out to swap space, and then finally get here
+ * again.
+ * Unfortunately this is NOT easy to fix.
+ * Thoughts on solution:
+ * 0. Define a reserved pool for cl_pages, which could be a list of
+ * pre-allocated cl_pages from cl_page_kmem;
+ * 1. Define a new operation in cl_object_operations{}, says clo_depth,
+ * which measures how many layers for this lustre object. Generally
+ * speaking, the depth would be 2, one for llite, and one for lovsub.
+ * However, for SNS, there will be more since we need additional page
+ * to store parity;
+ * 2. Reserve the # of (page_count * depth) cl_pages from the reserved
+ * pool. Afterwards, the clio would allocate the pages from reserved
+ * pool, this guarantees we neeedn't allocate the cl_pages from
+ * generic cl_page slab cache.
+ * Of course, if there is NOT enough pages in the pool, we might
+ * be asked to write less pages once, this purely depends on
+ * implementation. Anyway, we should be careful to avoid deadlocking.
+ */
+ LOCK_INODE_MUTEX(inode);
+ bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
+ UNLOCK_INODE_MUTEX(inode);
+ cl_io_fini(env, io);
+ return (bytes == pvec->ldp_size) ? 0 : (int)bytes;