struct obd_info *oinfo,
struct ptlrpc_request_set *set,
size_t size, loff_t file_offset,
- struct page **pages, int page_count)
+ struct page **pages, int page_count,
+ unsigned long user_addr)
{
struct brw_page *pga;
- int i, rc = 0;
+ int i, rc = 0, pshift;
size_t length;
ENTRY;
RETURN(-ENOMEM);
}
- for (i = 0, length = size; length > 0;
- length -=pga[i].count, file_offset +=pga[i].count,i++) {/*i last!*/
+ /*
+ * pshift is something we'll add to ->off to get the in-memory offset,
+ * also see the OSC_FILE2MEM_OFF macro
+ */
+ pshift = (user_addr & ~CFS_PAGE_MASK) - (file_offset & ~CFS_PAGE_MASK);
+
+ for (i = 0, length = size; length > 0; i++) {/*i last!*/
+ LASSERT(i < page_count);
+
pga[i].pg = pages[i];
pga[i].off = file_offset;
/* To the end of the page, or the length, whatever is less */
- pga[i].count = min_t(int, CFS_PAGE_SIZE -(file_offset & ~CFS_PAGE_MASK),
+ pga[i].count = min_t(int, CFS_PAGE_SIZE -(user_addr & ~CFS_PAGE_MASK),
length);
pga[i].flag = OBD_BRW_SYNC;
if (rw == READ)
POISON_PAGE(pages[i], 0x0d);
+
+ length -= pga[i].count;
+ file_offset += pga[i].count;
+ user_addr += pga[i].count;
}
rc = obd_brw_async(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
ll_i2obdexp(inode), oinfo, page_count,
- pga, NULL, set);
+ pga, NULL, set, pshift);
if (rc == 0)
rc = size;
if (!lli->lli_smd || !lli->lli_smd->lsm_object_id)
RETURN(-EBADF);
- /* FIXME: io smaller than CFS_PAGE_SIZE is broken on ia64 ??? */
- if ((file_offset & (~CFS_PAGE_MASK)) || (count & ~CFS_PAGE_MASK))
- RETURN(-EINVAL);
-
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), size="LPSZ" (max %lu), "
"offset=%lld=%llx, pages "LPSZ" (max %lu)\n",
inode->i_ino, inode->i_generation, inode, count, MAX_DIO_SIZE,
else
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_DIRECT_READ, count);
- /* Check that all user buffers are aligned as well */
- for (seg = 0; seg < nr_segs; seg++) {
- if (((unsigned long)iov[seg].iov_base & ~CFS_PAGE_MASK) ||
- (iov[seg].iov_len & ~CFS_PAGE_MASK))
- RETURN(-EINVAL);
- }
-
set = ptlrpc_prep_set();
if (set == NULL)
RETURN(-ENOMEM);
*size changing by concurrent truncates and writes. */
if (rw == READ)
LOCK_INODE_MUTEX(inode);
-
for (seg = 0; seg < nr_segs; seg++) {
size_t iov_left = iov[seg].iov_len;
unsigned long user_addr = (unsigned long)iov[seg].iov_base;
&oinfo, set,
min(size,iov_left),
file_offset, pages,
- page_count);
+ page_count,
+ user_addr);
ll_free_user_pages(pages, page_count, rw==READ);
} else {
result = 0;
static obd_count osc_checksum_bulk(int nob, obd_count pg_count,
struct brw_page **pga, int opc,
- cksum_type_t cksum_type)
+ cksum_type_t cksum_type, int pshift)
{
__u32 cksum;
int i = 0;
cksum = init_checksum(cksum_type);
while (nob > 0 && pg_count > 0) {
unsigned char *ptr = cfs_kmap(pga[i]->pg);
- int off = pga[i]->off & ~CFS_PAGE_MASK;
+ int off = OSC_FILE2MEM_OFF(pga[i]->off, pshift) & ~CFS_PAGE_MASK;
int count = pga[i]->count > nob ? nob : pga[i]->count;
/* corrupt the data before we compute the checksum, to
static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa,
struct lov_stripe_md *lsm, obd_count page_count,
struct brw_page **pga,
- struct ptlrpc_request **reqp)
+ struct ptlrpc_request **reqp, int pshift)
{
struct ptlrpc_request *req;
struct ptlrpc_bulk_desc *desc;
struct brw_page *pg = pga[i];
LASSERT(pg->count > 0);
- LASSERTF((pg->off & ~CFS_PAGE_MASK) + pg->count <= CFS_PAGE_SIZE,
- "i: %d pg: %p off: "LPU64", count: %u\n", i, pg,
- pg->off, pg->count);
+ LASSERTF((OSC_FILE2MEM_OFF(pg->off, pshift) & ~CFS_PAGE_MASK) +
+ pg->count <= CFS_PAGE_SIZE,
+ "i: %d pg: %p off: "LPU64", count: %u, shift: %d\n",
+ i, pg, pg->off, pg->count, pshift);
#ifdef __linux__
LASSERTF(i == 0 || pg->off > pg_prev->off,
"i %d p_c %u pg %p [pri %lu ind %lu] off "LPU64
LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) ==
(pg->flag & OBD_BRW_SRVLOCK));
- ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~CFS_PAGE_MASK,
+ ptlrpc_prep_bulk_page(desc, pg->pg,
+ OSC_FILE2MEM_OFF(pg->off,pshift)&~CFS_PAGE_MASK,
pg->count);
requested_nob += pg->count;
body->oa.o_cksum = osc_checksum_bulk(requested_nob,
page_count, pga,
OST_WRITE,
- cksum_type);
+ cksum_type, pshift);
CDEBUG(D_PAGE, "checksum at write origin: %x\n",
body->oa.o_cksum);
/* save this in 'oa', too, for later checking */
aa->aa_resends = 0;
aa->aa_ppga = pga;
aa->aa_cli = cli;
+ aa->aa_pshift = pshift;
CFS_INIT_LIST_HEAD(&aa->aa_oaps);
*reqp = req;
static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
__u32 client_cksum, __u32 server_cksum, int nob,
obd_count page_count, struct brw_page **pga,
- cksum_type_t client_cksum_type)
+ cksum_type_t client_cksum_type, int pshift)
{
__u32 new_cksum;
char *msg;
cksum_type = OBD_CKSUM_CRC32;
new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE,
- cksum_type);
+ cksum_type, pshift);
if (cksum_type != client_cksum_type)
msg = "the server did not use the checksum type specified in "
check_write_checksum(&body->oa, peer, client_cksum,
body->oa.o_cksum, aa->aa_requested_nob,
aa->aa_page_count, aa->aa_ppga,
- cksum_type_unpack(aa->aa_oa->o_flags)))
+ cksum_type_unpack(aa->aa_oa->o_flags),
+ aa->aa_pshift))
RETURN(-EAGAIN);
rc = check_write_rcs(req, aa->aa_requested_nob,aa->aa_nio_count,
cksum_type = OBD_CKSUM_CRC32;
client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
aa->aa_ppga, OST_READ,
- cksum_type);
+ cksum_type, aa->aa_pshift);
if (peer->nid == req->rq_bulk->bd_sender) {
via = router = "";
restart_bulk:
rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm,
- page_count, pga, &request);
+ page_count, pga, &request, 0);
if (rc != 0)
return (rc);
OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ,
aa->aa_cli, aa->aa_oa,
NULL /* lsm unused by osc currently */,
- aa->aa_page_count, aa->aa_ppga, &new_req);
+ aa->aa_page_count, aa->aa_ppga, &new_req,
+ aa->aa_pshift);
if (rc)
RETURN(rc);
static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *lsm, obd_count page_count,
- struct brw_page **pga, struct ptlrpc_request_set *set)
+ struct brw_page **pga, struct ptlrpc_request_set *set,
+ int pshift)
{
struct ptlrpc_request *request;
struct client_obd *cli = &exp->exp_obd->u.cli;
/* Consume write credits even if doing a sync write -
* otherwise we may run out of space on OST due to grant. */
- if (cmd == OBD_BRW_WRITE) {
+ /* Badly aligned writes are not subject to write granting */
+ if (cmd == OBD_BRW_WRITE && pshift == 0) {
client_obd_list_lock(&cli->cl_loi_list_lock);
for (i = 0; i < page_count; i++) {
if (cli->cl_avail_grant >= CFS_PAGE_SIZE)
}
rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm,
- page_count, pga, &request);
+ page_count, pga, &request, pshift);
CLASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
aa = ptlrpc_req_async_args(request);
} while (stride > 1);
}
-static obd_count max_unfragmented_pages(struct brw_page **pg, obd_count pages)
+static obd_count max_unfragmented_pages(struct brw_page **pg, obd_count pages,
+ int pshift)
{
int count = 1;
int offset;
int i = 0;
LASSERT (pages > 0);
- offset = pg[i]->off & (~CFS_PAGE_MASK);
+ offset = OSC_FILE2MEM_OFF(pg[i]->off, pshift) & ~CFS_PAGE_MASK;
for (;;) {
pages--;
return count; /* doesn't end on page boundary */
i++;
- offset = pg[i]->off & (~CFS_PAGE_MASK);
+ offset = OSC_FILE2MEM_OFF(pg[i]->off, pshift) & ~CFS_PAGE_MASK;
if (offset != 0) /* doesn't start on page boundary */
return count;
else
pages_per_brw = page_count;
- pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw);
+ pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw, 0);
if (saved_oa != NULL) {
/* restore previously saved oa */
static int osc_brw_async(int cmd, struct obd_export *exp,
struct obd_info *oinfo, obd_count page_count,
struct brw_page *pga, struct obd_trans_info *oti,
- struct ptlrpc_request_set *set)
+ struct ptlrpc_request_set *set, int pshift)
{
struct brw_page **ppga, **orig;
int page_count_orig;
pages_per_brw = min_t(obd_count, page_count,
class_exp2cliimp(exp)->imp_obd->u.cli.cl_max_pages_per_rpc);
- pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw);
+ pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw,
+ pshift);
/* use ppga only if single RPC is going to fly */
if (pages_per_brw != page_count_orig || ppga != orig) {
}
rc = async_internal(cmd, exp, oa, oinfo->oi_md, pages_per_brw,
- copy, set);
+ copy, set, pshift);
if (rc != 0) {
if (copy != ppga)
obd_count i;
for (i = 0; i < aa->aa_page_count; i++)
osc_release_write_grant(aa->aa_cli, aa->aa_ppga[i], 1);
-
+
if (aa->aa_oa->o_flags & OBD_FL_TEMPORARY)
OBDO_FREE(aa->aa_oa);
}
}
sort_brw_pages(pga, page_count);
- rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count, pga, &req);
+ rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count, pga, &req, 0);
if (rc != 0) {
CERROR("prep_req failed: %d\n", rc);
GOTO(out, req = ERR_PTR(rc));