X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Ffile.c;h=f381fb82f3af1bd162875bf12ddee273b5b03c5e;hp=8db92ee530487c8736dff0d1e5c532b40d615c4a;hb=a25adb47c7b7eeb68a922e2647d74eeff3401c6a;hpb=8c82ab5cd92ee70a4cda2fe6437861e56f3fe3d5 diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 8db92ee..f381fb8 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -28,9 +28,6 @@ #include #include #include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include -#endif #include "llite_internal.h" /* also used by llite/special.c:ll_special_open() */ @@ -38,14 +35,14 @@ struct ll_file_data *ll_file_data_get(void) { struct ll_file_data *fd; - OBD_SLAB_ALLOC(fd, ll_file_data_slab, GFP_KERNEL, sizeof *fd); + OBD_SLAB_ALLOC_PTR(fd, ll_file_data_slab); return fd; } static void ll_file_data_put(struct ll_file_data *fd) { if (fd != NULL) - OBD_SLAB_FREE(fd, ll_file_data_slab, sizeof *fd); + OBD_SLAB_FREE_PTR(fd, ll_file_data_slab); } void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, @@ -56,7 +53,7 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, op_data->op_attr.ia_atime = inode->i_atime; op_data->op_attr.ia_mtime = inode->i_mtime; op_data->op_attr.ia_ctime = inode->i_ctime; - op_data->op_attr.ia_size = inode->i_size; + op_data->op_attr.ia_size = i_size_read(inode); op_data->op_attr_blocks = inode->i_blocks; ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags; op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch; @@ -95,7 +92,7 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, struct ptlrpc_request *req = NULL; struct obd_device *obd = class_exp2obd(exp); int epoch_close = 1; - int rc; + int seq_end = 0, rc; ENTRY; if (obd == NULL) { @@ -122,7 +119,9 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, ll_prepare_close(inode, op_data, och); epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE); - rc = md_close(md_exp, op_data, och, &req); + rc = md_close(md_exp, op_data, och->och_mod, &req); + if (rc != -EAGAIN) + seq_end = 1; if (rc == -EAGAIN) { /* This close must have the epoch closed. */ @@ -130,8 +129,8 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, LASSERT(epoch_close); /* MDS has instructed us to obtain Size-on-MDS attribute from * OSTs and send setattr to back to MDS. */ - rc = ll_sizeonmds_update(inode, &och->och_fh, - op_data->op_ioepoch); + rc = ll_sizeonmds_update(inode, och->och_mod, + &och->och_fh, op_data->op_ioepoch); if (rc) { CERROR("inode %lu mdc Size-on-MDS update failed: " "rc = %d\n", inode->i_ino, rc); @@ -150,7 +149,6 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, inode->i_ino, rc); } - ptlrpc_req_finished(req); /* This is close request */ EXIT; out: @@ -158,12 +156,15 @@ out: S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) { ll_queue_done_writing(inode, LLIF_DONE_WRITING); } else { + if (seq_end) + ptlrpc_close_replay_seq(req); md_clear_open_replay_data(md_exp, och); /* Free @och if it is not waiting for DONE_WRITING. */ och->och_fh.cookie = DEAD_HANDLE_MAGIC; OBD_FREE_PTR(och); } - + if (req) /* This is close request */ + ptlrpc_req_finished(req); return rc; } @@ -356,7 +357,7 @@ static int ll_intent_file_open(struct file *file, void *lmm, if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) { rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp); - CERROR("lock enqueue: err: %d\n", rc); + CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc); GOTO(out, rc); } @@ -386,8 +387,10 @@ static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli, LASSERT(och); body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body)); - LASSERT(body != NULL); /* reply already checked out */ - LASSERT_REPSWABBED(req, DLM_REPLY_REC_OFF); /* and swabbed in md_enqueue */ + /* reply already checked out */ + LASSERT(body != NULL); + /* and swabbed in md_enqueue */ + LASSERT(lustre_rep_swabbed(req, DLM_REPLY_REC_OFF)); memcpy(&och->och_fh, &body->handle, sizeof(body->handle)); och->och_magic = OBD_CLIENT_HANDLE_MAGIC; @@ -470,7 +473,7 @@ int ll_file_open(struct inode *inode, struct file *file) if (inode->i_sb->s_root == file->f_dentry) RETURN(0); -#ifdef LUSTRE_KERNEL_VERSION +#ifdef HAVE_VFS_INTENT_PATCHES it = file->f_it; #else it = file->private_data; /* XXX: compat macro */ @@ -673,8 +676,8 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo) obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid); CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %lu, blksize %lu\n", - lli->lli_smd->lsm_object_id, inode->i_size, inode->i_blocks, - inode->i_blksize); + lli->lli_smd->lsm_object_id, i_size_read(inode), + inode->i_blocks, inode->i_blksize); RETURN(0); } @@ -747,12 +750,13 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, struct page *page; int rc, rc2, discard = lock->l_flags & LDLM_FL_DISCARD_DATA; struct lustre_handle lockh; - ENTRY; + struct address_space *mapping = inode->i_mapping; - memcpy(&tmpex, &lock->l_policy_data, sizeof(tmpex)); + ENTRY; + tmpex = lock->l_policy_data; CDEBUG(D_INODE|D_PAGE, "inode %lu(%p) ["LPU64"->"LPU64"] size: %llu\n", inode->i_ino, inode, tmpex.l_extent.start, tmpex.l_extent.end, - inode->i_size); + i_size_read(inode)); /* our locks are page granular thanks to osc_enqueue, we invalidate the * whole page. */ @@ -777,7 +781,8 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, if (end < tmpex.l_extent.end >> CFS_PAGE_SHIFT) end = ~0; - i = inode->i_size ? (__u64)(inode->i_size - 1) >> CFS_PAGE_SHIFT : 0; + i = i_size_read(inode) ? (__u64)(i_size_read(inode) - 1) >> + CFS_PAGE_SHIFT : 0; if (i < end) end = i; @@ -793,8 +798,8 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, for (i = start; i <= end; i += (j + skip)) { j = min(count - (i % count), end - i + 1); LASSERT(j > 0); - LASSERT(inode->i_mapping); - if (ll_teardown_mmaps(inode->i_mapping, + LASSERT(mapping); + if (ll_teardown_mmaps(mapping, (__u64)i << CFS_PAGE_SHIFT, ((__u64)(i+j) << CFS_PAGE_SHIFT) - 1) ) break; @@ -819,14 +824,14 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, tmpex.l_extent.start, lock->l_policy_data.l_extent.end, start, i, end); - if (!mapping_has_pages(inode->i_mapping)) { + if (!mapping_has_pages(mapping)) { CDEBUG(D_INODE|D_PAGE, "nothing left\n"); break; } cond_resched(); - page = find_get_page(inode->i_mapping, i); + page = find_get_page(mapping, i); if (page == NULL) continue; LL_CDEBUG_PAGE(D_PAGE, page, "lock page idx %lu ext "LPU64"\n", @@ -836,12 +841,19 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, /* page->mapping to check with racing against teardown */ if (!discard && clear_page_dirty_for_io(page)) { rc = ll_call_writepage(inode, page); - if (rc != 0) - CERROR("writepage of page %p failed: %d\n", - page, rc); /* either waiting for io to complete or reacquiring * the lock that the failed writepage released */ lock_page(page); + wait_on_page_writeback(page); + if (rc != 0) { + CERROR("writepage inode %lu(%p) of page %p " + "failed: %d\n", inode->i_ino, inode, + page, rc); + if (rc == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else + set_bit(AS_EIO, &mapping->flags); + } } tmpex.l_extent.end = tmpex.l_extent.start + CFS_PAGE_SIZE - 1; @@ -858,7 +870,7 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, * lock_page() */ LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n"); if (llap) - ll_ra_accounting(llap, inode->i_mapping); + ll_ra_accounting(llap, mapping); ll_truncate_complete_page(page); } unlock_page(page); @@ -1036,7 +1048,7 @@ static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp) LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64 " atime "LPU64", mtime "LPU64", ctime "LPU64, - inode->i_size, stripe, lvb->lvb_size, lvb->lvb_mtime, + i_size_read(inode), stripe, lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime, lvb->lvb_ctime); iput: iput(inode); @@ -1061,7 +1073,7 @@ static void ll_merge_lvb(struct inode *inode) ll_inode_size_lock(inode, 1); inode_init_lvb(inode, &lvb); obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0); - inode->i_size = lvb.lvb_size; + i_size_write(inode, lvb.lvb_size); inode->i_blocks = lvb.lvb_blocks; LTIME_S(inode->i_mtime) = lvb.lvb_mtime; LTIME_S(inode->i_atime) = lvb.lvb_atime; @@ -1084,14 +1096,14 @@ int ll_local_size(struct inode *inode) RETURN(0); rc = obd_match(sbi->ll_dt_exp, lli->lli_smd, LDLM_EXTENT, - &policy, LCK_PR | LCK_PW, &flags, inode, &lockh); + &policy, LCK_PR, &flags, inode, &lockh); if (rc < 0) RETURN(rc); else if (rc == 0) RETURN(-ENODATA); ll_merge_lvb(inode); - obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR | LCK_PW, &lockh); + obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR, &lockh); RETURN(0); } @@ -1099,7 +1111,7 @@ int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, lstat_t *st) { struct lustre_handle lockh = { 0 }; - struct obd_enqueue_info einfo = { 0 }; + struct ldlm_enqueue_info einfo = { 0 }; struct obd_info oinfo = { { { 0 } } }; struct ost_lvb lvb; int rc; @@ -1108,7 +1120,6 @@ int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, einfo.ei_type = LDLM_EXTENT; einfo.ei_mode = LCK_PR; - einfo.ei_flags = LDLM_FL_HAS_INTENT; einfo.ei_cb_bl = ll_extent_lock_callback; einfo.ei_cb_cp = ldlm_completion_ast; einfo.ei_cb_gl = ll_glimpse_callback; @@ -1117,6 +1128,7 @@ int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm, oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF; oinfo.oi_lockh = &lockh; oinfo.oi_md = lsm; + oinfo.oi_flags = LDLM_FL_HAS_INTENT; rc = obd_enqueue_rqset(sbi->ll_dt_exp, &oinfo, &einfo); if (rc == -ENOENT) @@ -1147,7 +1159,7 @@ int ll_glimpse_size(struct inode *inode, int ast_flags) struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); struct lustre_handle lockh = { 0 }; - struct obd_enqueue_info einfo = { 0 }; + struct ldlm_enqueue_info einfo = { 0 }; struct obd_info oinfo = { { { 0 } } }; int rc; ENTRY; @@ -1171,7 +1183,6 @@ int ll_glimpse_size(struct inode *inode, int ast_flags) * acquired only if there were no conflicting locks. */ einfo.ei_type = LDLM_EXTENT; einfo.ei_mode = LCK_PR; - einfo.ei_flags = ast_flags | LDLM_FL_HAS_INTENT; einfo.ei_cb_bl = ll_extent_lock_callback; einfo.ei_cb_cp = ldlm_completion_ast; einfo.ei_cb_gl = ll_glimpse_callback; @@ -1180,6 +1191,7 @@ int ll_glimpse_size(struct inode *inode, int ast_flags) oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF; oinfo.oi_lockh = &lockh; oinfo.oi_md = lli->lli_smd; + oinfo.oi_flags = ast_flags | LDLM_FL_HAS_INTENT; rc = obd_enqueue_rqset(sbi->ll_dt_exp, &oinfo, &einfo); if (rc == -ENOENT) @@ -1192,7 +1204,7 @@ int ll_glimpse_size(struct inode *inode, int ast_flags) ll_merge_lvb(inode); CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %lu\n", - inode->i_size, inode->i_blocks); + i_size_read(inode), inode->i_blocks); RETURN(rc); } @@ -1204,7 +1216,7 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, { struct ll_sb_info *sbi = ll_i2sbi(inode); struct ost_lvb lvb; - struct obd_enqueue_info einfo = { 0 }; + struct ldlm_enqueue_info einfo = { 0 }; struct obd_info oinfo = { { { 0 } } }; int rc; ENTRY; @@ -1226,7 +1238,6 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, einfo.ei_type = LDLM_EXTENT; einfo.ei_mode = mode; - einfo.ei_flags = ast_flags; einfo.ei_cb_bl = ll_extent_lock_callback; einfo.ei_cb_cp = ldlm_completion_ast; einfo.ei_cb_gl = ll_glimpse_callback; @@ -1235,8 +1246,9 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, oinfo.oi_policy = *policy; oinfo.oi_lockh = lockh; oinfo.oi_md = lsm; + oinfo.oi_flags = ast_flags; - rc = obd_enqueue(sbi->ll_dt_exp, &oinfo, &einfo); + rc = obd_enqueue(sbi->ll_dt_exp, &oinfo, &einfo, NULL); *policy = oinfo.oi_policy; if (rc > 0) rc = -EIO; @@ -1257,9 +1269,9 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, * cancel the result of the truncate. Getting the * ll_inode_size_lock() after the enqueue maintains the DLM * -> ll_inode_size_lock() acquiring order. */ - inode->i_size = lvb.lvb_size; - CDEBUG(D_INODE, "inode=%lu, updating i_size %llu\n", - inode->i_ino, inode->i_size); + i_size_write(inode, lvb.lvb_size); + CDEBUG(D_INODE, "inode=%lu, updating i_size %llu\n", + inode->i_ino, i_size_read(inode)); } if (rc == 0) { @@ -1327,11 +1339,11 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count, * unguarded */ /* Read beyond end of file */ - if (*ppos >= inode->i_size) + if (*ppos >= i_size_read(inode)) RETURN(0); - if (count > inode->i_size - *ppos) - count = inode->i_size - *ppos; + if (count > i_size_read(inode) - *ppos) + count = i_size_read(inode) - *ppos; /* Make sure to correctly adjust the file pos pointer for * EFAULT case */ notzeroed = clear_user(buf, count); @@ -1361,6 +1373,10 @@ repeat: } node = ll_node_from_inode(inode, *ppos, end, LCK_PR); + if (IS_ERR(node)){ + GOTO(out, retval = PTR_ERR(node)); + } + tree.lt_fd = LUSTRE_FPRIVATE(file); rc = ll_tree_lock(&tree, node, buf, count, file->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0); @@ -1406,21 +1422,18 @@ repeat: * the kms size is _correct_, it is only the _minimum_ size. * If someone does a stat they will get the correct size which * will always be >= the kms value here. b=11081 */ - if (inode->i_size < kms) - inode->i_size = kms; + if (i_size_read(inode) < kms) + i_size_write(inode, kms); ll_inode_size_unlock(inode, 1); } chunk = end - *ppos + 1; CDEBUG(D_INODE, "Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n", - inode->i_ino, chunk, *ppos, inode->i_size); + inode->i_ino, chunk, *ppos, i_size_read(inode)); /* turn off the kernel's read-ahead */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - file->f_ramax = 0; -#else file->f_ra.ra_pages = 0; -#endif + /* initialize read-ahead window once per syscall */ if (ra == 0) { ra = 1; @@ -1529,13 +1542,13 @@ repeat: * The i_size value gets updated in ll_extent_lock() as a consequence * of the [0,EOF] extent lock we requested above. */ if (file->f_flags & O_APPEND) { - *ppos = inode->i_size; + *ppos = i_size_read(inode); end = *ppos + count - 1; } if (*ppos >= maxbytes) { send_sig(SIGXFSZ, current, 0); - GOTO(out, retval = -EFBIG); + GOTO(out_unlock, retval = -EFBIG); } if (*ppos + count > maxbytes) count = maxbytes - *ppos; @@ -1547,9 +1560,10 @@ repeat: retval = generic_file_write(file, buf, chunk, ppos); ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, count, 1); -out: +out_unlock: ll_tree_unlock(&tree); +out: if (retval > 0) { buf += retval; count -= retval; @@ -1569,7 +1583,6 @@ out: /* * Send file content (through pagecache) somewhere with helper */ -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, read_actor_t actor, void *target) { @@ -1601,6 +1614,9 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, RETURN(generic_file_sendfile(in_file, ppos, count, actor, target)); node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR); + if (IS_ERR(node)) + RETURN(PTR_ERR(node)); + tree.lt_fd = LUSTRE_FPRIVATE(in_file); rc = ll_tree_lock(&tree, node, NULL, count, in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0); @@ -1639,12 +1655,12 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, goto out; } else { /* region is within kms and, hence, within real file size (A) */ - inode->i_size = kms; + i_size_write(inode, kms); ll_inode_size_unlock(inode, 1); } CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n", - inode->i_ino, count, *ppos, inode->i_size); + inode->i_ino, count, *ppos, i_size_read(inode)); bead.lrr_start = *ppos >> CFS_PAGE_SHIFT; bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; @@ -1658,7 +1674,6 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, ll_tree_unlock(&tree); RETURN(retval); } -#endif static int ll_lov_recreate_obj(struct inode *inode, struct file *file, unsigned long arg) @@ -1783,7 +1798,7 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); LASSERT(body != NULL); /* checked by mdc_getattr_name */ /* swabbed by mdc_getattr_name */ - LASSERT_REPSWABBED(req, REPLY_REC_OFF); + LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF)); lmmsize = body->eadatasize; @@ -1794,7 +1809,7 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, lmmsize); LASSERT(lmm != NULL); - LASSERT_REPSWABBED(req, REPLY_REC_OFF + 1); + LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF + 1)); /* * This is coming from the MDS, so is probably in @@ -2006,8 +2021,8 @@ static int join_sanity_check(struct inode *head, struct inode *tail) CERROR("file %lu can not be joined to itself \n", head->i_ino); RETURN(-EINVAL); } - if (head->i_size % JOIN_FILE_ALIGN) { - CERROR("hsize %llu must be times of 64K\n", head->i_size); + if (i_size_read(head) % JOIN_FILE_ALIGN) { + CERROR("hsize %llu must be times of 64K\n", i_size_read(head)); RETURN(-EINVAL); } RETURN(0); @@ -2016,29 +2031,31 @@ static int join_sanity_check(struct inode *head, struct inode *tail) static int join_file(struct inode *head_inode, struct file *head_filp, struct file *tail_filp) { - struct inode *tail_inode, *tail_parent; struct dentry *tail_dentry = tail_filp->f_dentry; struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = head_filp->f_flags|O_JOIN_FILE}; + struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW, + ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL }; + struct lustre_handle lockh; struct md_op_data *op_data; int rc; + loff_t data; ENTRY; tail_dentry = tail_filp->f_dentry; - tail_inode = tail_dentry->d_inode; - tail_parent = tail_dentry->d_parent->d_inode; - op_data = ll_prep_md_op_data(NULL, head_inode, tail_parent, + data = i_size_read(head_inode); + op_data = ll_prep_md_op_data(NULL, head_inode, + tail_dentry->d_parent->d_inode, tail_dentry->d_name.name, tail_dentry->d_name.len, 0, - LUSTRE_OPC_ANY, &head_inode->i_size); + LUSTRE_OPC_ANY, &data); if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); - rc = md_enqueue(ll_i2mdexp(head_inode), LDLM_IBITS, &oit, LCK_CW, - op_data, &lockh, NULL, 0, ldlm_completion_ast, - ll_md_blocking_ast, NULL, 0); + rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit, + op_data, &lockh, NULL, 0, 0); ll_finish_md_op_data(op_data); if (rc < 0) @@ -2289,10 +2306,17 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, RETURN(ll_ioctl_setfacl(inode, &ioc)); } - default: + default: { + int err; + + if (LLIOC_STOP == + ll_iocontrol_call(inode, file, cmd, arg, &err)) + RETURN(err); + RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL, (void *)arg)); } + } } loff_t ll_file_seek(struct file *file, loff_t offset, int origin) @@ -2302,13 +2326,13 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin) struct lov_stripe_md *lsm = lli->lli_smd; loff_t retval; ENTRY; - retval = offset + ((origin == 2) ? inode->i_size : + retval = offset + ((origin == 2) ? i_size_read(inode) : (origin == 1) ? file->f_pos : 0); CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n", inode->i_ino, inode->i_generation, inode, retval, retval, origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET"); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); - + if (origin == 2) { /* SEEK_END */ int nonblock = 0, rc; @@ -2322,7 +2346,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin) } ll_inode_size_lock(inode, 0); - offset += inode->i_size; + offset += i_size_read(inode); ll_inode_size_unlock(inode, 0); } else if (origin == 1) { /* SEEK_CUR */ offset += file->f_pos; @@ -2395,7 +2419,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data) OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLGROUP); - oc = ll_osscapa_get(inode, 0, CAPA_OPC_OSS_WRITE); + oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE); err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm, 0, OBD_OBJECT_EOF, oc); capa_put(oc); @@ -2416,9 +2440,10 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) fid_oid(ll_inode2fid(inode)), fid_ver(ll_inode2fid(inode)), LDLM_FLOCK} }; + struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL, + ldlm_flock_completion_ast, NULL, file_lock }; struct lustre_handle lockh = {0}; ldlm_policy_data_t flock; - ldlm_mode_t mode = 0; int flags = 0; int rc; ENTRY; @@ -2440,7 +2465,7 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) switch (file_lock->fl_type) { case F_RDLCK: - mode = LCK_PR; + einfo.ei_mode = LCK_PR; break; case F_UNLCK: /* An unlock request may or may not have any relation to @@ -2451,10 +2476,10 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) * information that is given with a normal read or write record * lock request. To avoid creating another ldlm unlock (cancel) * message we'll treat a LCK_NL flock request as an unlock. */ - mode = LCK_NL; + einfo.ei_mode = LCK_NL; break; case F_WRLCK: - mode = LCK_PW; + einfo.ei_mode = LCK_PW; break; default: CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type); @@ -2481,7 +2506,7 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) flags = LDLM_FL_TEST_LOCK; /* Save the old mode so that if the mode in the lock changes we * can decrement the appropriate reader or writer refcount. */ - file_lock->fl_type = mode; + file_lock->fl_type = einfo.ei_mode; break; default: CERROR("unknown fcntl lock command: %d\n", cmd); @@ -2490,12 +2515,10 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, " "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid, - flags, mode, flock.l_flock.start, flock.l_flock.end); + flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end); - rc = ldlm_cli_enqueue(sbi->ll_md_exp, NULL, &res_id, - LDLM_FLOCK, &flock, mode, &flags, NULL, - ldlm_flock_completion_ast, NULL, file_lock, - NULL, 0, NULL, &lockh, 0); + rc = ldlm_cli_enqueue(sbi->ll_md_exp, NULL, &einfo, &res_id, + &flock, &flags, NULL, 0, NULL, &lockh, 0); if ((file_lock->fl_flags & FL_FLOCK) && (rc == 0)) ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW)); #ifdef HAVE_F_OP_FLOCK @@ -2530,13 +2553,30 @@ int ll_have_md_lock(struct inode *inode, __u64 bits) flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK; if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy, - LCK_CR|LCK_CW|LCK_PR, &lockh)) { + LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) { RETURN(1); } - RETURN(0); } +ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits, + struct lustre_handle *lockh) +{ + ldlm_policy_data_t policy = { .l_inodebits = {bits}}; + struct lu_fid *fid; + ldlm_mode_t rc; + int flags; + ENTRY; + + fid = &ll_i2info(inode)->lli_fid; + CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); + + flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING; + rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy, + LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh); + RETURN(rc); +} + static int ll_inode_revalidate_fini(struct inode *inode, int rc) { if (rc == -ENOENT) { /* Already unlinked. Just update nlink * and return success */ @@ -2575,9 +2615,6 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n", inode->i_ino, inode->i_generation, inode, dentry->d_name.name); -#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)) - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REVALIDATE, 1); -#endif exp = ll_i2mdexp(inode); @@ -2622,8 +2659,7 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) } ll_lookup_finish_locks(&oit, dentry); - } else if (!ll_have_md_lock(dentry->d_inode, - MDS_INODELOCK_UPDATE)) { + } else if (!ll_have_md_lock(dentry->d_inode, MDS_INODELOCK_UPDATE)) { struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); obd_valid valid = OBD_MD_FLGETATTR; struct obd_capa *oc; @@ -2666,7 +2702,6 @@ out: return rc; } -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, struct lookup_intent *it, struct kstat *stat) { @@ -2696,7 +2731,7 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, #endif ll_inode_size_lock(inode, 0); - stat->size = inode->i_size; + stat->size = i_size_read(inode); stat->blocks = inode->i_blocks; ll_inode_size_unlock(inode, 0); @@ -2708,7 +2743,6 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) return ll_getattr_it(mnt, de, &it, stat); } -#endif static int lustre_check_acl(struct inode *inode, int mask) @@ -2747,11 +2781,7 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd) return generic_permission(inode, mask, lustre_check_acl); } #else -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd) -#else -int ll_inode_permission(struct inode *inode, int mask) -#endif { int mode = inode->i_mode; int rc; @@ -2811,9 +2841,7 @@ struct file_operations ll_file_operations = { .release = ll_file_release, .mmap = ll_file_mmap, .llseek = ll_file_seek, -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) .sendfile = ll_file_sendfile, -#endif .fsync = ll_fsync, }; @@ -2825,9 +2853,7 @@ struct file_operations ll_file_operations_flock = { .release = ll_file_release, .mmap = ll_file_mmap, .llseek = ll_file_seek, -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) .sendfile = ll_file_sendfile, -#endif .fsync = ll_fsync, #ifdef HAVE_F_OP_FLOCK .flock = ll_file_flock, @@ -2844,9 +2870,7 @@ struct file_operations ll_file_operations_noflock = { .release = ll_file_release, .mmap = ll_file_mmap, .llseek = ll_file_seek, -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) .sendfile = ll_file_sendfile, -#endif .fsync = ll_fsync, #ifdef HAVE_F_OP_FLOCK .flock = ll_file_noflock, @@ -2855,16 +2879,12 @@ struct file_operations ll_file_operations_noflock = { }; struct inode_operations ll_file_inode_operations = { -#ifdef LUSTRE_KERNEL_VERSION +#ifdef HAVE_VFS_INTENT_PATCHES .setattr_raw = ll_setattr_raw, #endif .setattr = ll_setattr, .truncate = ll_truncate, -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) .getattr = ll_getattr, -#else - .revalidate_it = ll_inode_revalidate_it, -#endif .permission = ll_inode_permission, .setxattr = ll_setxattr, .getxattr = ll_getxattr, @@ -2872,3 +2892,102 @@ struct inode_operations ll_file_inode_operations = { .removexattr = ll_removexattr, }; +/* dynamic ioctl number support routins */ +static struct llioc_ctl_data { + struct rw_semaphore ioc_sem; + struct list_head ioc_head; +} llioc = { + __RWSEM_INITIALIZER(llioc.ioc_sem), + CFS_LIST_HEAD_INIT(llioc.ioc_head) +}; + + +struct llioc_data { + struct list_head iocd_list; + unsigned int iocd_size; + llioc_callback_t iocd_cb; + unsigned int iocd_count; + unsigned int iocd_cmd[0]; +}; + +void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd) +{ + unsigned int size; + struct llioc_data *in_data = NULL; + ENTRY; + + if (cb == NULL || cmd == NULL || + count > LLIOC_MAX_CMD || count < 0) + RETURN(NULL); + + size = sizeof(*in_data) + count * sizeof(unsigned int); + OBD_ALLOC(in_data, size); + if (in_data == NULL) + RETURN(NULL); + + memset(in_data, 0, sizeof(*in_data)); + in_data->iocd_size = size; + in_data->iocd_cb = cb; + in_data->iocd_count = count; + memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count); + + down_write(&llioc.ioc_sem); + list_add_tail(&in_data->iocd_list, &llioc.ioc_head); + up_write(&llioc.ioc_sem); + + RETURN(in_data); +} + +void ll_iocontrol_unregister(void *magic) +{ + struct llioc_data *tmp; + + if (magic == NULL) + return; + + down_write(&llioc.ioc_sem); + list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) { + if (tmp == magic) { + unsigned int size = tmp->iocd_size; + + list_del(&tmp->iocd_list); + up_write(&llioc.ioc_sem); + + OBD_FREE(tmp, size); + return; + } + } + up_write(&llioc.ioc_sem); + + CWARN("didn't find iocontrol register block with magic: %p\n", magic); +} + +EXPORT_SYMBOL(ll_iocontrol_register); +EXPORT_SYMBOL(ll_iocontrol_unregister); + +enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg, int *rcp) +{ + enum llioc_iter ret = LLIOC_CONT; + struct llioc_data *data; + int rc = -EINVAL, i; + + down_read(&llioc.ioc_sem); + list_for_each_entry(data, &llioc.ioc_head, iocd_list) { + for (i = 0; i < data->iocd_count; i++) { + if (cmd != data->iocd_cmd[i]) + continue; + + ret = data->iocd_cb(inode, file, cmd, arg, data, &rc); + break; + } + + if (ret == LLIOC_STOP) + break; + } + up_read(&llioc.ioc_sem); + + if (rcp) + *rcp = rc; + return ret; +}