1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/liblustre/rw.c
38 * Lustre Light block IO
41 #define DEBUG_SUBSYSTEM S_LLITE
47 #include <sys/types.h>
49 #include <sys/queue.h>
66 #include "llite_lib.h"
70 struct obd_io_group *lig_oig;
71 struct inode *lig_inode;
72 struct lustre_rw_params *lig_params;
76 struct ll_async_page *lig_llaps;
77 struct page *lig_pages;
78 void *lig_llap_cookies;
81 #define LLU_IO_GROUP_SIZE(x) \
82 (sizeof(struct llu_io_group) + \
83 (sizeof(struct ll_async_page) + \
84 sizeof(struct page) + \
85 llap_cookie_size) * (x))
89 struct inode *lis_inode;
93 struct llu_io_group *lis_groups[0];
95 #define LLU_IO_SESSION_SIZE(x) \
96 (sizeof(struct llu_io_session) + (x) * 2 * sizeof(void *))
99 typedef ssize_t llu_file_piov_t(const struct iovec *iovec, int iovlen,
100 _SYSIO_OFF_T pos, ssize_t len,
103 size_t llap_cookie_size;
105 static int llu_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock)
107 struct llu_inode_info *lli = llu_i2info(inode);
108 struct lov_stripe_md *lsm = lli->lli_smd;
109 struct obd_export *exp = llu_i2obdexp(inode);
112 struct ldlm_lock *lock;
113 } key = { .name = KEY_LOCK_TO_STRIPE, .lock = lock };
114 __u32 stripe, vallen = sizeof(stripe);
118 if (lsm->lsm_stripe_count == 1)
121 /* get our offset in the lov */
122 rc = obd_get_info(exp, sizeof(key), &key, &vallen, &stripe, lsm);
124 CERROR("obd_get_info: rc = %d\n", rc);
127 LASSERT(stripe < lsm->lsm_stripe_count);
131 int llu_extent_lock_cancel_cb(struct ldlm_lock *lock,
132 struct ldlm_lock_desc *new, void *data,
135 struct lustre_handle lockh = { 0 };
139 if ((unsigned long)data > 0 && (unsigned long)data < 0x1000) {
140 LDLM_ERROR(lock, "cancelling lock with bad data %p", data);
145 case LDLM_CB_BLOCKING:
146 ldlm_lock2handle(lock, &lockh);
147 rc = ldlm_cli_cancel(&lockh);
149 CERROR("ldlm_cli_cancel failed: %d\n", rc);
151 case LDLM_CB_CANCELING: {
153 struct llu_inode_info *lli;
154 struct lov_stripe_md *lsm;
158 /* This lock wasn't granted, don't try to evict pages */
159 if (lock->l_req_mode != lock->l_granted_mode)
162 inode = llu_inode_from_lock(lock);
165 lli= llu_i2info(inode);
172 stripe = llu_lock_to_stripe_offset(inode, lock);
173 lock_res_and_lock(lock);
174 kms = ldlm_extent_shift_kms(lock,
175 lsm->lsm_oinfo[stripe]->loi_kms);
176 unlock_res_and_lock(lock);
177 if (lsm->lsm_oinfo[stripe]->loi_kms != kms)
178 LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
179 lsm->lsm_oinfo[stripe]->loi_kms, kms);
180 lsm->lsm_oinfo[stripe]->loi_kms = kms;
192 static int llu_glimpse_callback(struct ldlm_lock *lock, void *reqp)
194 struct ptlrpc_request *req = reqp;
195 struct inode *inode = llu_inode_from_lock(lock);
196 struct llu_inode_info *lli;
198 __u32 size[2] = { sizeof(struct ptlrpc_body), sizeof(*lvb) };
203 GOTO(out, rc = -ELDLM_NO_LOCK_DATA);
204 lli = llu_i2info(inode);
206 GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
207 if (lli->lli_smd == NULL)
208 GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
210 /* First, find out which stripe index this lock corresponds to. */
211 if (lli->lli_smd->lsm_stripe_count > 1)
212 stripe = llu_lock_to_stripe_offset(inode, lock);
214 rc = lustre_pack_reply(req, 2, size, NULL);
218 lvb = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*lvb));
219 lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe]->loi_kms;
221 LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64,
222 (long long)llu_i2stat(inode)->st_size, stripe,lvb->lvb_size);
226 /* These errors are normal races, so we don't want to fill the console
227 * with messages by calling ptlrpc_error() */
228 if (rc == -ELDLM_NO_LOCK_DATA)
229 lustre_pack_reply(req, 1, NULL, NULL);
235 /* NB: lov_merge_size will prefer locally cached writes if they extend the
236 * file (because it prefers KMS over RSS when larger) */
237 int llu_glimpse_size(struct inode *inode)
239 struct llu_inode_info *lli = llu_i2info(inode);
240 struct intnl_stat *st = llu_i2stat(inode);
241 struct llu_sb_info *sbi = llu_i2sbi(inode);
242 struct lustre_handle lockh = { 0 };
243 struct ldlm_enqueue_info einfo = { 0 };
244 struct obd_info oinfo = { { { 0 } } };
249 CDEBUG(D_DLMTRACE, "Glimpsing inode %llu\n", (long long)st->st_ino);
252 CDEBUG(D_DLMTRACE, "No objects for inode %llu\n",
253 (long long)st->st_ino);
257 einfo.ei_type = LDLM_EXTENT;
258 einfo.ei_mode = LCK_PR;
259 einfo.ei_cb_bl = osc_extent_blocking_cb;
260 einfo.ei_cb_cp = ldlm_completion_ast;
261 einfo.ei_cb_gl = llu_glimpse_callback;
262 einfo.ei_cbdata = inode;
264 oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
265 oinfo.oi_lockh = &lockh;
266 oinfo.oi_md = lli->lli_smd;
267 oinfo.oi_flags = LDLM_FL_HAS_INTENT;
269 rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo);
271 CERROR("obd_enqueue returned rc %d, returning -EIO\n", rc);
272 RETURN(rc > 0 ? -EIO : rc);
275 lov_stripe_lock(lli->lli_smd);
276 inode_init_lvb(inode, &lvb);
277 /* merge timestamps the most recently obtained from mds with
278 timestamps obtained from osts */
280 rc = obd_merge_lvb(sbi->ll_osc_exp, lli->lli_smd, &lvb, 0);
281 st->st_size = lvb.lvb_size;
282 st->st_blocks = lvb.lvb_blocks;
283 /* handle st_blocks overflow gracefully */
284 if (st->st_blocks < lvb.lvb_blocks)
285 st->st_blocks = ~0UL;
286 st->st_mtime = lvb.lvb_mtime;
287 st->st_atime = lvb.lvb_atime;
288 st->st_ctime = lvb.lvb_ctime;
289 lov_stripe_unlock(lli->lli_smd);
291 CDEBUG(D_DLMTRACE, "glimpse: size: "LPU64", blocks: "LPU64"\n",
292 (__u64)st->st_size, (__u64)st->st_blocks);
297 int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
298 struct lov_stripe_md *lsm, int mode,
299 ldlm_policy_data_t *policy, struct lustre_handle *lockh,
302 struct llu_sb_info *sbi = llu_i2sbi(inode);
303 struct intnl_stat *st = llu_i2stat(inode);
304 struct ldlm_enqueue_info einfo = { 0 };
305 struct obd_info oinfo = { { { 0 } } };
310 LASSERT(!lustre_handle_is_used(lockh));
311 CLASSERT(ELDLM_OK == 0);
313 /* XXX phil: can we do this? won't it screw the file size up? */
314 if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
315 (sbi->ll_flags & LL_SBI_NOLCK) || mode == LCK_NL)
318 CDEBUG(D_DLMTRACE, "Locking inode %llu, start "LPU64" end "LPU64"\n",
319 (long long)st->st_ino, policy->l_extent.start,
320 policy->l_extent.end);
322 einfo.ei_type = LDLM_EXTENT;
323 einfo.ei_mode = mode;
324 einfo.ei_cb_bl = osc_extent_blocking_cb;
325 einfo.ei_cb_cp = ldlm_completion_ast;
326 einfo.ei_cb_gl = llu_glimpse_callback;
327 einfo.ei_cbdata = inode;
329 oinfo.oi_policy = *policy;
330 oinfo.oi_lockh = lockh;
332 oinfo.oi_flags = ast_flags;
334 rc = obd_enqueue(sbi->ll_osc_exp, &oinfo, &einfo, NULL);
335 *policy = oinfo.oi_policy;
339 inode_init_lvb(inode, &lvb);
340 obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 1);
341 if (policy->l_extent.start == 0 &&
342 policy->l_extent.end == OBD_OBJECT_EOF)
343 st->st_size = lvb.lvb_size;
346 st->st_mtime = lvb.lvb_mtime;
347 st->st_atime = lvb.lvb_atime;
348 st->st_ctime = lvb.lvb_ctime;
354 int llu_extent_unlock(struct ll_file_data *fd, struct inode *inode,
355 struct lov_stripe_md *lsm, int mode,
356 struct lustre_handle *lockh)
358 struct llu_sb_info *sbi = llu_i2sbi(inode);
362 CLASSERT(ELDLM_OK == 0);
364 /* XXX phil: can we do this? won't it screw the file size up? */
365 if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
366 (sbi->ll_flags & LL_SBI_NOLCK) || mode == LCK_NL)
369 rc = obd_cancel(sbi->ll_osc_exp, lsm, mode, lockh, 0, 0);
374 #define LLAP_MAGIC 12346789
376 struct ll_async_page {
380 struct page *llap_page;
381 struct inode *llap_inode;
384 static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
386 struct ll_async_page *llap;
388 struct lov_stripe_md *lsm;
389 obd_flag valid_flags;
392 llap = LLAP_FROM_COOKIE(data);
393 inode = llap->llap_inode;
394 lsm = llu_i2info(inode)->lli_smd;
396 oa->o_id = lsm->lsm_object_id;
397 oa->o_valid = OBD_MD_FLID;
398 valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME;
399 if (cmd & OBD_BRW_WRITE)
400 valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
401 OBD_MD_FLUID | OBD_MD_FLGID |
402 OBD_MD_FLFID | OBD_MD_FLGENER;
404 obdo_from_inode(oa, inode, valid_flags);
408 static void llu_ap_update_obdo(void *data, int cmd, struct obdo *oa,
411 struct ll_async_page *llap;
414 llap = LLAP_FROM_COOKIE(data);
415 obdo_from_inode(oa, llap->llap_inode, valid);
420 /* called for each page in a completed rpc.*/
421 static int llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
423 struct ll_async_page *llap;
427 llap = LLAP_FROM_COOKIE(data);
428 llap->llap_queued = 0;
429 page = llap->llap_page;
432 if (cmd & OBD_BRW_WRITE)
433 CERROR("writeback error on page %p index %ld: %d\n",
434 page, page->index, rc);
439 static struct obd_async_page_ops llu_async_page_ops = {
440 .ap_make_ready = NULL,
441 .ap_refresh_count = NULL,
442 .ap_fill_obdo = llu_ap_fill_obdo,
443 .ap_update_obdo = llu_ap_update_obdo,
444 .ap_completion = llu_ap_completion,
447 static int llu_queue_pio(int cmd, struct llu_io_group *group,
448 char *buf, size_t count, loff_t pos)
450 struct llu_inode_info *lli = llu_i2info(group->lig_inode);
451 struct intnl_stat *st = llu_i2stat(group->lig_inode);
452 struct lov_stripe_md *lsm = lli->lli_smd;
453 struct obd_export *exp = llu_i2obdexp(group->lig_inode);
454 struct page *pages = &group->lig_pages[group->lig_npages],*page = pages;
455 struct ll_async_page *llap = &group->lig_llaps[group->lig_npages];
456 void *llap_cookie = group->lig_llap_cookies +
457 llap_cookie_size * group->lig_npages;
458 int i, rc, npages = 0, ret_bytes = 0;
465 local_lock = group->lig_params->lrp_lock_mode != LCK_NL;
466 /* prepare the pages array */
468 unsigned long index, offset, bytes;
470 offset = (pos & ~CFS_PAGE_MASK);
471 index = pos >> CFS_PAGE_SHIFT;
472 bytes = CFS_PAGE_SIZE - offset;
476 /* prevent read beyond file range */
477 if (/* local_lock && */
478 cmd == OBD_BRW_READ && pos + bytes >= st->st_size) {
479 if (pos >= st->st_size)
481 bytes = st->st_size - pos;
484 /* prepare page for this index */
486 page->addr = buf - offset;
488 page->_offset = offset;
489 page->_count = bytes;
497 group->lig_rwcount += bytes;
501 group->lig_npages += npages;
503 for (i = 0, page = pages; i < npages;
504 i++, page++, llap++, llap_cookie += llap_cookie_size){
505 llap->llap_magic = LLAP_MAGIC;
506 llap->llap_cookie = llap_cookie;
507 rc = obd_prep_async_page(exp, lsm, NULL, page,
508 (obd_off)page->index << CFS_PAGE_SHIFT,
510 llap, &llap->llap_cookie,
511 /* no cache in liblustre at all */
516 llap->llap_cookie = NULL;
519 CDEBUG(D_CACHE, "llap %p page %p group %p obj off "LPU64"\n",
520 llap, page, llap->llap_cookie,
521 (obd_off)pages->index << CFS_PAGE_SHIFT);
522 page->private = (unsigned long)llap;
523 llap->llap_page = page;
524 llap->llap_inode = group->lig_inode;
526 rc = obd_queue_group_io(exp, lsm, NULL, group->lig_oig,
527 llap->llap_cookie, cmd,
528 page->_offset, page->_count,
529 group->lig_params->lrp_brw_flags,
530 ASYNC_READY | ASYNC_URGENT |
531 ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
532 if (!local_lock && cmd == OBD_BRW_READ) {
534 * In OST-side locking case short reads cannot be
537 * The root of the problem is that
539 * kms = lov_merge_size(lsm, 1);
541 * glimpse_size(inode);
545 * logic in the read code (both llite and liblustre)
546 * only works correctly when client holds DLM lock on
547 * [start, end]. Without DLM lock KMS can be
548 * completely out of date, and client can either make
549 * spurious short-read (missing concurrent write), or
550 * return stale data (missing concurrent
551 * truncate). For llite client this is fatal, because
552 * incorrect data are cached and can be later sent
553 * back to the server (vide bug 5047). This is hard to
554 * fix by handling short-reads on the server, as there
555 * is no easy way to communicate file size (or amount
556 * of bytes read/written) back to the client,
557 * _especially_ because OSC pages can be sliced and
558 * dices into multiple RPCs arbitrary. Fortunately,
559 * liblustre doesn't cache data and the worst case is
560 * that we get race with concurrent write or truncate.
568 llap->llap_queued = 1;
575 struct llu_io_group * get_io_group(struct inode *inode, int maxpages,
576 struct lustre_rw_params *params)
578 struct llu_io_group *group;
581 if (!llap_cookie_size)
582 llap_cookie_size = obd_prep_async_page(llu_i2obdexp(inode),
587 OBD_ALLOC(group, LLU_IO_GROUP_SIZE(maxpages));
589 return ERR_PTR(-ENOMEM);
592 group->lig_inode = inode;
593 group->lig_maxpages = maxpages;
594 group->lig_params = params;
595 group->lig_llaps = (struct ll_async_page *)(group + 1);
596 group->lig_pages = (struct page *)(&group->lig_llaps[maxpages]);
597 group->lig_llap_cookies = (void *)(&group->lig_pages[maxpages]);
599 rc = oig_init(&group->lig_oig);
601 OBD_FREE(group, LLU_IO_GROUP_SIZE(maxpages));
608 static int max_io_pages(ssize_t len, int iovlen)
610 return (((len + CFS_PAGE_SIZE -1) >> CFS_PAGE_SHIFT) + 2 + iovlen - 1);
614 void put_io_group(struct llu_io_group *group)
616 struct lov_stripe_md *lsm = llu_i2info(group->lig_inode)->lli_smd;
617 struct obd_export *exp = llu_i2obdexp(group->lig_inode);
618 struct ll_async_page *llap = group->lig_llaps;
621 for (i = 0; i < group->lig_npages; i++, llap++) {
622 if (llap->llap_cookie)
623 obd_teardown_async_page(exp, lsm, NULL,
627 I_RELE(group->lig_inode);
629 oig_release(group->lig_oig);
630 OBD_FREE(group, LLU_IO_GROUP_SIZE(group->lig_maxpages));
634 ssize_t llu_file_prwv(const struct iovec *iovec, int iovlen,
635 _SYSIO_OFF_T pos, ssize_t len,
638 struct llu_io_session *session = (struct llu_io_session *) private;
639 struct inode *inode = session->lis_inode;
640 struct llu_inode_info *lli = llu_i2info(inode);
641 struct intnl_stat *st = llu_i2stat(inode);
642 struct ll_file_data *fd = lli->lli_file_data;
643 struct lustre_handle lockh = {0};
644 struct lov_stripe_md *lsm = lli->lli_smd;
645 struct obd_export *exp = NULL;
646 struct llu_io_group *iogroup;
647 struct lustre_rw_params p;
650 int err, is_read, iovidx, ret;
652 ssize_t ret_len = len;
655 /* in a large iov read/write we'll be repeatedly called.
656 * so give a chance to answer cancel ast here
658 liblustre_wait_event(0);
660 exp = llu_i2obdexp(inode);
664 if (len == 0 || iovlen == 0)
667 if (pos + len > lli->lli_maxbytes)
670 lustre_build_lock_params(session->lis_cmd, lli->lli_open_flags,
671 lli->lli_sbi->ll_lco.lco_flags,
674 iogroup = get_io_group(inode, max_io_pages(len, iovlen), &p);
676 RETURN(PTR_ERR(iogroup));
678 local_lock = p.lrp_lock_mode != LCK_NL;
680 err = llu_extent_lock(fd, inode, lsm, p.lrp_lock_mode, &p.lrp_policy,
681 &lockh, p.lrp_ast_flags);
685 is_read = (session->lis_cmd == OBD_BRW_READ);
688 * If OST-side locking is used, KMS can be completely out of
689 * date, and, hence, cannot be used for short-read
690 * detection. Rely in OST to handle short reads in that case.
692 inode_init_lvb(inode, &lvb);
693 obd_merge_lvb(exp, lsm, &lvb, 1);
695 /* extent.end is last byte of the range */
696 if (p.lrp_policy.l_extent.end >= kms) {
697 /* A glimpse is necessary to determine whether
698 * we return a short read or some zeroes at
699 * the end of the buffer
701 * In the case of OST-side locking KMS can be
702 * completely out of date and short-reads maybe
703 * mishandled. See llu_queue_pio() for more detailed
706 if ((err = llu_glimpse_size(inode))) {
707 GOTO(err_unlock, err);
709 /* If objective page index exceed end-of-file
710 * page index, return directly. --bug 17336 */
711 loff_t size = st->st_size;
712 unsigned long cur_index = pos >> CFS_PAGE_SHIFT;
714 if ((size == 0 && cur_index != 0) ||
715 (((size - 1) >> CFS_PAGE_SHIFT) < cur_index))
716 GOTO(err_unlock, err);
721 } else if (lli->lli_open_flags & O_APPEND) {
726 struct ost_lvb xtimes;
728 lov_stripe_lock(lsm);
729 /* inode might mtime and ctime set earlier in race with stat
730 * which merged into inode timestamps obtained from mds and
732 st->st_atime = st->st_mtime = st->st_ctime = CURRENT_TIME;
733 xtimes.lvb_atime = st->st_atime;
734 xtimes.lvb_mtime = st->st_mtime;
735 xtimes.lvb_ctime = st->st_ctime;
736 obd_update_lvb(exp, lsm, &xtimes,
737 is_read ? OBD_MD_FLATIME :
738 (OBD_MD_FLMTIME | OBD_MD_FLCTIME));
739 lov_stripe_unlock(lsm);
742 for (iovidx = 0; iovidx < iovlen; iovidx++) {
743 char *buf = (char *) iovec[iovidx].iov_base;
744 size_t count = iovec[iovidx].iov_len;
750 if (IS_BAD_PTR(buf) || IS_BAD_PTR(buf + count)) {
751 GOTO(err_unlock, err = -EFAULT);
755 if (/* local_lock && */ pos >= st->st_size)
758 if (pos >= lli->lli_maxbytes) {
759 GOTO(err_unlock, err = -EFBIG);
761 if (pos + count >= lli->lli_maxbytes)
762 count = lli->lli_maxbytes - pos;
765 ret = llu_queue_pio(session->lis_cmd, iogroup, buf, count, pos);
767 GOTO(err_unlock, err = ret);
771 LASSERT(ret == count);
772 obd_adjust_kms(exp, lsm, pos, 0);
773 /* file size grow immediately */
774 if (pos > st->st_size)
782 LASSERT(len == 0 || is_read); /* libsysio should guarantee this */
784 err = obd_trigger_group_io(exp, lsm, NULL, iogroup->lig_oig);
786 GOTO(err_unlock, err);
788 err = oig_wait(iogroup->lig_oig);
790 CERROR("%s error: %s\n", is_read ? "read" : "write", strerror(-err));
791 GOTO(err_unlock, err);
794 ret = llu_extent_unlock(fd, inode, lsm, p.lrp_lock_mode, &lockh);
796 CERROR("extent unlock error %d\n", ret);
798 session->lis_groups[session->lis_ngroups++] = iogroup;
802 llu_extent_unlock(fd, inode, lsm, p.lrp_lock_mode, &lockh);
804 put_io_group(iogroup);
805 RETURN((ssize_t)err);
809 struct llu_io_session *get_io_session(struct inode *ino, int ngroups, int cmd)
811 struct llu_io_session *session;
813 OBD_ALLOC(session, LLU_IO_SESSION_SIZE(ngroups));
818 session->lis_inode = ino;
819 session->lis_max_groups = ngroups;
820 session->lis_cmd = cmd;
824 static void put_io_session(struct llu_io_session *session)
828 for (i = 0; i < session->lis_ngroups; i++) {
829 if (session->lis_groups[i]) {
830 put_io_group(session->lis_groups[i]);
831 session->lis_groups[i] = NULL;
835 I_RELE(session->lis_inode);
836 OBD_FREE(session, LLU_IO_SESSION_SIZE(session->lis_max_groups));
839 static int llu_file_rwx(struct inode *ino,
843 struct llu_io_session *session;
845 int cmd = read ? OBD_BRW_READ : OBD_BRW_WRITE;
848 LASSERT(ioctx->ioctx_xtvlen >= 0);
849 LASSERT(ioctx->ioctx_iovlen >= 0);
851 liblustre_wait_event(0);
853 if (!ioctx->ioctx_xtvlen)
856 /* XXX consider other types later */
857 if (S_ISDIR(llu_i2stat(ino)->st_mode))
859 if (!S_ISREG(llu_i2stat(ino)->st_mode))
862 session = get_io_session(ino, ioctx->ioctx_xtvlen * 2, cmd);
866 cc = _sysio_enumerate_extents(ioctx->ioctx_xtv, ioctx->ioctx_xtvlen,
867 ioctx->ioctx_iov, ioctx->ioctx_iovlen,
868 llu_file_prwv, session);
871 LASSERT(!ioctx->ioctx_cc);
872 ioctx->ioctx_private = session;
875 put_io_session(session);
878 liblustre_wait_event(0);
882 int llu_iop_read(struct inode *ino,
886 struct intnl_stat *st = llu_i2stat(ino);
887 st->st_atime = CURRENT_TIME;
889 return llu_file_rwx(ino, ioctx, 1);
892 int llu_iop_write(struct inode *ino,
895 struct intnl_stat *st = llu_i2stat(ino);
896 st->st_mtime = st->st_ctime = CURRENT_TIME;
898 return llu_file_rwx(ino, ioctx, 0);
901 int llu_iop_iodone(struct ioctx *ioctx)
903 struct llu_io_session *session;
904 struct llu_io_group *group;
905 int i, err = 0, rc = 0;
908 liblustre_wait_event(0);
910 session = (struct llu_io_session *) ioctx->ioctx_private;
912 LASSERT(!IS_ERR(session));
914 for (i = 0; i < session->lis_ngroups; i++) {
915 group = session->lis_groups[i];
918 err = oig_wait(group->lig_oig);
923 ioctx->ioctx_cc += group->lig_rwcount;
925 session->lis_groups[i] = NULL;
931 ioctx->ioctx_cc = -1;
932 ioctx->ioctx_errno = -rc;
935 put_io_session(session);
936 ioctx->ioctx_private = NULL;
937 liblustre_wait_event(0);