- struct inode *inode = file->f_dentry->d_inode;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lov_stripe_md *lsm = lli->lli_smd;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_lock_tree tree;
- struct ll_lock_tree_node *node;
- struct ost_lvb lvb;
- struct ll_ra_read bead;
- int rc, ra = 0;
- loff_t end;
- ssize_t retval, chunk, sum = 0;
-
- __u64 kms;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
- inode->i_ino, inode->i_generation, inode, count, *ppos);
- /* "If nbyte is 0, read() will return 0 and have no other results."
- * -- Single Unix Spec */
- if (count == 0)
- RETURN(0);
-
- ll_stats_ops_tally(sbi, LPROC_LL_READ_BYTES, count);
-
- if (!lsm) {
- /* Read on file with no objects should return zero-filled
- * buffers up to file size (we can get non-zero sizes with
- * mknod + truncate, then opening file for read. This is a
- * common pattern in NFS case, it seems). Bug 6243 */
- int notzeroed;
- /* Since there are no objects on OSTs, we have nothing to get
- * lock on and so we are forced to access inode->i_size
- * unguarded */
-
- /* Read beyond end of file */
- if (*ppos >= i_size_read(inode))
- RETURN(0);
-
- if (count > i_size_read(inode) - *ppos)
- count = i_size_read(inode) - *ppos;
- /* Make sure to correctly adjust the file pos pointer for
- * EFAULT case */
- notzeroed = clear_user(buf, count);
- count -= notzeroed;
- *ppos += count;
- if (!count)
- RETURN(-EFAULT);
- RETURN(count);
- }
-
-repeat:
- if (sbi->ll_max_rw_chunk != 0) {
- /* first, let's know the end of the current stripe */
- end = *ppos;
- obd_extent_calc(sbi->ll_dt_exp, lsm, OBD_CALC_STRIPE_END,
- (obd_off *)&end);
-
- /* correct, the end is beyond the request */
- if (end > *ppos + count - 1)
- end = *ppos + count - 1;
-
- /* and chunk shouldn't be too large even if striping is wide */
- if (end - *ppos > sbi->ll_max_rw_chunk)
- end = *ppos + sbi->ll_max_rw_chunk - 1;
- } else {
- end = *ppos + count - 1;
- }
-
- node = ll_node_from_inode(inode, *ppos, end, LCK_PR);
- if (IS_ERR(node)){
- GOTO(out, retval = PTR_ERR(node));
- }
-
- tree.lt_fd = LUSTRE_FPRIVATE(file);
- rc = ll_tree_lock(&tree, node, buf, count,
- file->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0);
- if (rc != 0)
- GOTO(out, retval = rc);
-
- ll_inode_size_lock(inode, 1);
- /*
- * Consistency guarantees: following possibilities exist for the
- * relation between region being read and real file size at this
- * moment:
- *
- * (A): the region is completely inside of the file;
- *
- * (B-x): x bytes of region are inside of the file, the rest is
- * outside;
- *
- * (C): the region is completely outside of the file.
- *
- * This classification is stable under DLM lock acquired by
- * ll_tree_lock() above, because to change class, other client has to
- * take DLM lock conflicting with our lock. Also, any updates to
- * ->i_size by other threads on this client are serialized by
- * ll_inode_size_lock(). This guarantees that short reads are handled
- * correctly in the face of concurrent writes and truncates.
- */
- inode_init_lvb(inode, &lvb);
- obd_merge_lvb(sbi->ll_dt_exp, lsm, &lvb, 1);
- kms = lvb.lvb_size;
- if (*ppos + count - 1 > kms) {
- /* A glimpse is necessary to determine whether we return a
- * short read (B) or some zeroes at the end of the buffer (C) */
- ll_inode_size_unlock(inode, 1);
- retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
- if (retval) {
- ll_tree_unlock(&tree);
- goto out;
- }
- } else {
- /* region is within kms and, hence, within real file size (A).
- * We need to increase i_size to cover the read region so that
- * generic_file_read() will do its job, but that doesn't mean
- * the kms size is _correct_, it is only the _minimum_ size.
- * If someone does a stat they will get the correct size which
- * will always be >= the kms value here. b=11081 */
- if (i_size_read(inode) < kms)
- i_size_write(inode, kms);
- ll_inode_size_unlock(inode, 1);
- }
-
- chunk = end - *ppos + 1;
- CDEBUG(D_INODE, "Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
- inode->i_ino, chunk, *ppos, i_size_read(inode));
-
- /* turn off the kernel's read-ahead */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- file->f_ramax = 0;
-#else
- file->f_ra.ra_pages = 0;
-#endif
- /* initialize read-ahead window once per syscall */
- if (ra == 0) {
- ra = 1;
- bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
- bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
- ll_ra_read_in(file, &bead);
- }
-
- /* BUG: 5972 */
- file_accessed(file);
- retval = generic_file_read(file, buf, chunk, ppos);
- ll_rw_stats_tally(sbi, current->pid, file, count, 0);
-
- ll_tree_unlock(&tree);
-
- if (retval > 0) {
- buf += retval;
- count -= retval;
- sum += retval;
- if (retval == chunk && count > 0)
- goto repeat;
- }
-
- out:
- if (ra != 0)
- ll_ra_read_ex(file, &bead);
- retval = (sum > 0) ? sum : retval;
- RETURN(retval);
-}
-
-/*
- * Write to a file (through the page cache).
- */
-static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
- loff_t *ppos)
-{
- struct inode *inode = file->f_dentry->d_inode;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- struct ll_lock_tree tree;
- struct ll_lock_tree_node *node;
- loff_t maxbytes = ll_file_maxbytes(inode);
- loff_t lock_start, lock_end, end;
- ssize_t retval, chunk, sum = 0;
- int rc;
- ENTRY;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
- inode->i_ino, inode->i_generation, inode, count, *ppos);
-
- SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
-
- /* POSIX, but surprised the VFS doesn't check this already */
- if (count == 0)
- RETURN(0);
-
- /* If file was opened for LL_IOC_LOV_SETSTRIPE but the ioctl wasn't
- * called on the file, don't fail the below assertion (bug 2388). */
- if (file->f_flags & O_LOV_DELAY_CREATE &&
- ll_i2info(inode)->lli_smd == NULL)
- RETURN(-EBADF);
-
- LASSERT(ll_i2info(inode)->lli_smd != NULL);
-
- down(&ll_i2info(inode)->lli_write_sem);
-
-repeat:
- chunk = 0; /* just to fix gcc's warning */
- end = *ppos + count - 1;
-
- if (file->f_flags & O_APPEND) {
- lock_start = 0;
- lock_end = OBD_OBJECT_EOF;
- } else if (sbi->ll_max_rw_chunk != 0) {
- /* first, let's know the end of the current stripe */
- end = *ppos;
- obd_extent_calc(sbi->ll_dt_exp, lsm, OBD_CALC_STRIPE_END,
- (obd_off *)&end);
-
- /* correct, the end is beyond the request */
- if (end > *ppos + count - 1)
- end = *ppos + count - 1;
-
- /* and chunk shouldn't be too large even if striping is wide */
- if (end - *ppos > sbi->ll_max_rw_chunk)
- end = *ppos + sbi->ll_max_rw_chunk - 1;
- lock_start = *ppos;
- lock_end = end;
- } else {
- lock_start = *ppos;
- lock_end = *ppos + count - 1;
- }
- node = ll_node_from_inode(inode, lock_start, lock_end, LCK_PW);
-
- if (IS_ERR(node))
- GOTO(out, retval = PTR_ERR(node));
-
- tree.lt_fd = LUSTRE_FPRIVATE(file);
- rc = ll_tree_lock(&tree, node, buf, count,
- file->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0);
- if (rc != 0)
- GOTO(out, retval = rc);
-
- /* This is ok, g_f_w will overwrite this under i_sem if it races
- * with a local truncate, it just makes our maxbyte checking easier.
- * The i_size value gets updated in ll_extent_lock() as a consequence
- * of the [0,EOF] extent lock we requested above. */
- if (file->f_flags & O_APPEND) {
- *ppos = i_size_read(inode);
- end = *ppos + count - 1;
- }
-
- if (*ppos >= maxbytes) {
- send_sig(SIGXFSZ, current, 0);
- GOTO(out_unlock, retval = -EFBIG);
- }
- if (*ppos + count > maxbytes)
- count = maxbytes - *ppos;
-
- /* generic_file_write handles O_APPEND after getting i_mutex */
- chunk = end - *ppos + 1;
- CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
- inode->i_ino, chunk, *ppos);
- retval = generic_file_write(file, buf, chunk, ppos);
- ll_rw_stats_tally(ll_i2sbi(inode), current->pid, file, count, 1);
-
-out_unlock:
- ll_tree_unlock(&tree);
-
-out:
- if (retval > 0) {
- buf += retval;
- count -= retval;
- sum += retval;
- if (retval == chunk && count > 0)
- goto repeat;
- }
-
- up(&ll_i2info(inode)->lli_write_sem);
-
- retval = (sum > 0) ? sum : retval;
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES,
- retval > 0 ? retval : 0);
- RETURN(retval);
-}
-
-/*
- * Send file content (through pagecache) somewhere with helper
- */
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
- read_actor_t actor, void *target)
-{
- struct inode *inode = in_file->f_dentry->d_inode;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lov_stripe_md *lsm = lli->lli_smd;
- struct ll_lock_tree tree;
- struct ll_lock_tree_node *node;
- struct ost_lvb lvb;
- struct ll_ra_read bead;
- int rc;
- ssize_t retval;
- __u64 kms;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
- inode->i_ino, inode->i_generation, inode, count, *ppos);
-
- /* "If nbyte is 0, read() will return 0 and have no other results."
- * -- Single Unix Spec */
- if (count == 0)
- RETURN(0);
-
- ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_READ_BYTES, count);
- /* turn off the kernel's read-ahead */
- in_file->f_ra.ra_pages = 0;
-
- /* File with no objects, nothing to lock */
- if (!lsm)
- RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
-
- node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
- if (IS_ERR(node))
- RETURN(PTR_ERR(node));
-
- tree.lt_fd = LUSTRE_FPRIVATE(in_file);
- rc = ll_tree_lock(&tree, node, NULL, count,
- in_file->f_flags & O_NONBLOCK?LDLM_FL_BLOCK_NOWAIT:0);
- if (rc != 0)
- RETURN(rc);
-
- ll_inode_size_lock(inode, 1);
- /*
- * Consistency guarantees: following possibilities exist for the
- * relation between region being read and real file size at this
- * moment:
- *
- * (A): the region is completely inside of the file;
- *
- * (B-x): x bytes of region are inside of the file, the rest is
- * outside;
- *
- * (C): the region is completely outside of the file.
- *
- * This classification is stable under DLM lock acquired by
- * ll_tree_lock() above, because to change class, other client has to
- * take DLM lock conflicting with our lock. Also, any updates to
- * ->i_size by other threads on this client are serialized by
- * ll_inode_size_lock(). This guarantees that short reads are handled
- * correctly in the face of concurrent writes and truncates.
- */
- inode_init_lvb(inode, &lvb);
- obd_merge_lvb(ll_i2sbi(inode)->ll_dt_exp, lsm, &lvb, 1);
- kms = lvb.lvb_size;
- if (*ppos + count - 1 > kms) {
- /* A glimpse is necessary to determine whether we return a
- * short read (B) or some zeroes at the end of the buffer (C) */
- ll_inode_size_unlock(inode, 1);
- retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
- if (retval)
- goto out;
- } else {
- /* region is within kms and, hence, within real file size (A) */
- i_size_write(inode, kms);
- ll_inode_size_unlock(inode, 1);
- }
-
- CDEBUG(D_INFO, "Send ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
- inode->i_ino, count, *ppos, i_size_read(inode));
-
- bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
- bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
- ll_ra_read_in(in_file, &bead);
- /* BUG: 5972 */
- file_accessed(in_file);
- retval = generic_file_sendfile(in_file, ppos, count, actor, target);
- ll_ra_read_ex(in_file, &bead);
-
- out:
- ll_tree_unlock(&tree);
- RETURN(retval);
-}
-#endif
-
-static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
- unsigned long arg)
-{
- struct ll_inode_info *lli = ll_i2info(inode);