X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Ffile.c;h=0ac8d654281edb210744943c8cfc1cf8738fb52b;hp=3f52d6bc09f0fff75c7e2c0e1c2715140855deed;hb=926c6309185a25a8ac1541cfa67910325ed8626f;hpb=fbfb15d1d8d6b16b7426d205ce3e07eed2f75e08 diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 3f52d6b..0ac8d65 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1,356 +1,289 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * linux/fs/ext2/file.c + * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * Author: Peter Braam + * Author: Phil Schwan + * Author: Andreas Dilger * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution + * This file is part of Lustre, http://www.lustre.org. * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. * - * from + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * linux/fs/minix/file.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext2 fs regular file handling primitives - * - * 64-bit file support on 64-bit platforms by Jakub Jelinek - * (jj@sunsite.ms.mff.cuni.cz) + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define DEBUG_SUBSYSTEM S_LLITE - #include #include -#include - -int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc); -extern int ll_setattr(struct dentry *de, struct iattr *attr); +#include +#include +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#include +#endif +#include "llite_internal.h" +#include -int ll_create_objects(struct super_block *sb, obd_id id, uid_t uid, gid_t gid, - struct lov_stripe_md **lsmp) +int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode, + struct file *file) { - struct obdo *oa; + struct ll_file_data *fd = file->private_data; + struct ptlrpc_request *req = NULL; + struct obd_client_handle *och = &fd->fd_mds_och; + struct obdo obdo; int rc; ENTRY; - oa = obdo_alloc(); - if (!oa) - RETURN(-ENOMEM); + /* clear group lock, if present */ + if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { + struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; + fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK); + rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, + &fd->fd_cwlockh); + } - oa->o_mode = S_IFREG | 0600; - oa->o_easize = ll_mds_easize(sb); - oa->o_id = id; - oa->o_uid = uid; - oa->o_gid = gid; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE | - OBD_MD_FLEASIZE | OBD_MD_FLUID | OBD_MD_FLGID; - rc = obd_create(ll_s2obdconn(sb), oa, lsmp); - obdo_free(oa); + obdo.o_id = inode->i_ino; + obdo.o_valid = OBD_MD_FLID; + obdo_from_inode(&obdo, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE | + OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLATIME | OBD_MD_FLMTIME | + OBD_MD_FLCTIME); + if (0 /* ll_is_inode_dirty(inode) */) { + obdo.o_flags = MDS_BFLAG_UNCOMMITTED_WRITES; + obdo.o_valid |= OBD_MD_FLFLAGS; + } + obdo.o_mds = ll_i2info(inode)->lli_mds; + rc = md_close(mdc_exp, &obdo, och, &req); + + if (rc == EAGAIN) { + /* We are the last writer, so the MDS has instructed us to get + * the file size and any write cookies, then close again. */ + //ll_queue_done_writing(inode); + rc = 0; + } else if (rc) { + CERROR("inode %lu mdc close failed: rc = %d\n", + inode->i_ino, rc); + } + if (rc == 0) { + rc = ll_objects_destroy(req, file->f_dentry->d_inode, 1); + if (rc) + CERROR("inode %lu ll_objects destroy: rc = %d\n", + inode->i_ino, rc); + } + + mdc_clear_open_replay_data(mdc_exp, och); + ptlrpc_req_finished(req); + och->och_fh.cookie = DEAD_HANDLE_MAGIC; + file->private_data = NULL; + OBD_SLAB_FREE(fd, ll_file_data_slab, sizeof *fd); - if (!rc) - LASSERT(*lsmp && (*lsmp)->lsm_object_id); RETURN(rc); } -static int ll_file_open(struct inode *inode, struct file *file) +/* While this returns an error code, fput() the caller does not, so we need + * to make every effort to clean up all of our state here. Also, applications + * rarely check close errors and even if an error is returned they will not + * re-try the close call. + */ +int ll_file_release(struct inode *inode, struct file *file) { - struct ptlrpc_request *req = NULL; struct ll_file_data *fd; - struct obdo *oa; - struct lov_stripe_md *lsm = NULL; struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - int rc = 0; + int rc; + ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op:inode=%u/%lu/%u(%p)\n", + ll_i2info(inode)->lli_mds, inode->i_ino, + inode->i_generation, inode); - LASSERT(!file->private_data); + /* don't do anything for / */ + if (inode->i_sb->s_root == file->f_dentry) + RETURN(0); - lsm = lli->lli_smd; + lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_RELEASE); + fd = (struct ll_file_data *)file->private_data; + LASSERT(fd != NULL); - /* delayed create of object (intent created inode) */ - /* XXX object needs to be cleaned up if mdc_open fails */ - /* XXX error handling appropriate here? */ - if (lsm == NULL) { - if (file->f_flags & O_LOV_DELAY_CREATE) { - CDEBUG(D_INODE, "delaying object creation\n"); - RETURN(0); - } - down(&lli->lli_open_sem); - /* Check to see if we lost the race */ - if (!lli->lli_smd) - rc = ll_create_objects(inode->i_sb, inode->i_ino, 0, 0, - &lli->lli_smd); - up(&lli->lli_open_sem); - if (rc) - RETURN(rc); + rc = ll_mdc_close(sbi->ll_mdc_exp, inode, file); + RETURN(rc); +} - lsm = lli->lli_smd; - } +static int ll_intent_file_open(struct file *file, void *lmm, + int lmmsize, struct lookup_intent *itp) +{ + struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode); + struct lustre_handle lockh; + struct mdc_op_data data; + struct dentry *parent = file->f_dentry->d_parent; + const char *name = file->f_dentry->d_name.name; + const int len = file->f_dentry->d_name.len; + int rc; - fd = kmem_cache_alloc(ll_file_data_slab, SLAB_KERNEL); - if (!fd) - GOTO(out, rc = -ENOMEM); - memset(fd, 0, sizeof(*fd)); + if (!parent) + RETURN(-ENOENT); - fd->fd_mdshandle.addr = (__u64)(unsigned long)file; - get_random_bytes(&fd->fd_mdshandle.cookie, - sizeof(fd->fd_mdshandle.cookie)); - rc = mdc_open(&sbi->ll_mdc_conn, inode->i_ino, S_IFREG | inode->i_mode, - file->f_flags, lsm, &fd->fd_mdshandle, &req); - fd->fd_req = req; + ll_prepare_mdc_op_data(&data, parent->d_inode, NULL, name, len, O_RDWR); - /* This is the "reply" refcount. */ - ptlrpc_req_finished(req); - if (rc) - GOTO(out_req, -abs(rc)); - if (!fd->fd_mdshandle.addr || - fd->fd_mdshandle.addr == (__u64)(unsigned long)file) { - CERROR("hmm, mdc_open didn't assign fd_mdshandle?\n"); - /* XXX handle this how, abort or is it non-fatal? */ + rc = md_enqueue(sbi->ll_mdc_exp, LDLM_IBITS, itp, LCK_PR, &data, + &lockh, lmm, lmmsize, ldlm_completion_ast, + ll_mdc_blocking_ast, NULL); + if (rc == 0) { + if (itp->d.lustre.it_lock_mode) + memcpy(&itp->d.lustre.it_lock_handle, + &lockh, sizeof(lockh)); + } else if (rc < 0) { + CERROR("lock enqueue: err: %d\n", rc); } + + RETURN(rc); +} - oa = obdo_alloc(); - if (!oa) - GOTO(out_mdc, rc = -EINVAL); - - oa->o_id = lsm->lsm_object_id; - oa->o_mode = S_IFREG; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE | - OBD_MD_FLBLOCKS; - rc = obd_open(ll_i2obdconn(inode), oa, lsm); - obdo_to_inode(inode, oa, oa->o_valid & (OBD_MD_FLSIZE|OBD_MD_FLBLOCKS)); +int ll_local_open(struct file *file, struct lookup_intent *it) +{ + struct ptlrpc_request *req = it->d.lustre.it_data; + struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode); + struct obd_export *mdc_exp = ll_i2mdcexp(file->f_dentry->d_inode); + struct ll_file_data *fd; + struct mds_body *body; + ENTRY; - obd_oa2handle(&fd->fd_osthandle, oa); - obdo_free(oa); + body = lustre_msg_buf (req->rq_repmsg, 1, sizeof (*body)); + LASSERT (body != NULL); /* reply already checked out */ + LASSERT_REPSWABBED (req, 1); /* and swabbed down */ - if (rc) - GOTO(out_mdc, rc = -abs(rc)); + LASSERT(!file->private_data); - atomic_inc(&lli->lli_open_count); + OBD_SLAB_ALLOC(fd, ll_file_data_slab, SLAB_KERNEL, sizeof *fd); + /* We can't handle this well without reorganizing ll_file_open and + * ll_mdc_close, so don't even try right now. */ + LASSERT(fd != NULL); + memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); + fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC; file->private_data = fd; + ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras); - RETURN(0); -out_mdc: - mdc_close(&sbi->ll_mdc_conn, inode->i_ino, - S_IFREG, &fd->fd_mdshandle, &req); -out_req: - ptlrpc_free_req(req); -//out_fd: - fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC; - kmem_cache_free(ll_file_data_slab, fd); -out: - return rc; -} + lli->lli_io_epoch = body->io_epoch; -int ll_size_lock(struct inode *inode, struct lov_stripe_md *lsm, obd_off start, - int mode, struct lustre_handle **lockhs_p) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ldlm_extent extent; - struct lustre_handle *lockhs = NULL; - int rc, flags = 0, stripe_count; - ENTRY; + mdc_set_open_replay_data(mdc_exp, &fd->fd_mds_och, it->d.lustre.it_data); - if (sbi->ll_flags & LL_SBI_NOLCK) { - *lockhs_p = NULL; - RETURN(0); - } - - stripe_count = lsm->lsm_stripe_count; - if (!stripe_count) - stripe_count = 1; - - OBD_ALLOC(lockhs, stripe_count * sizeof(*lockhs)); - if (lockhs == NULL) - RETURN(-ENOMEM); - - extent.start = start; - extent.end = OBD_OBJECT_EOF; - - rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, &extent, - sizeof(extent), mode, &flags, ll_lock_callback, - inode, sizeof(*inode), lockhs); - if (rc != ELDLM_OK) { - CERROR("lock enqueue: %d\n", rc); - OBD_FREE(lockhs, stripe_count * sizeof(*lockhs)); - } else - *lockhs_p = lockhs; - RETURN(rc); + RETURN(0); } -int ll_size_unlock(struct inode *inode, struct lov_stripe_md *lsm, int mode, - struct lustre_handle *lockhs) +/* Open a file, and (for the very first open) create objects on the OSTs at + * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object + * creation or open until ll_lov_setstripe() ioctl is called. We grab + * lli_open_sem to ensure no other process will create objects, send the + * stripe MD to the MDS, or try to destroy the objects if that fails. + * + * If we already have the stripe MD locally then we don't request it in + * mdc_open(), by passing a lmm_size = 0. + * + * It is up to the application to ensure no other processes open this file + * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be + * used. We might be able to avoid races of that sort by getting lli_open_sem + * before returning in the O_LOV_DELAY_CREATE case and dropping it here + * or in ll_file_release(), but I'm not sure that is desirable/necessary. + */ +int ll_file_open(struct inode *inode, struct file *file) { - struct ll_sb_info *sbi = ll_i2sbi(inode); - int rc, stripe_count; + struct ll_inode_info *lli = ll_i2info(inode); + struct lookup_intent *it, oit = { .it_op = IT_OPEN, + .it_flags = file->f_flags }; + struct lov_stripe_md *lsm; + struct ptlrpc_request *req; + int rc = 0; ENTRY; - if (sbi->ll_flags & LL_SBI_NOLCK) + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, + inode->i_generation, inode); + + /* don't do anything for / */ + if (inode->i_sb->s_root == file->f_dentry) RETURN(0); - if (lockhs == NULL) { - LBUG(); - RETURN(-EINVAL); - } + it = file->f_it; - rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockhs); - if (rc != ELDLM_OK) { - CERROR("lock cancel: %d\n", rc); - LBUG(); + if (!it || !it->d.lustre.it_disposition) { + it = &oit; + rc = ll_intent_file_open(file, NULL, 0, it); + if (rc) + GOTO(out, rc); } - stripe_count = lsm->lsm_stripe_count; - if (!stripe_count) - stripe_count = 1; - - OBD_FREE(lockhs, stripe_count * sizeof(*lockhs)); - RETURN(rc); -} - -int ll_file_size(struct inode *inode, struct lov_stripe_md *lsm) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct lustre_handle *lockhs; - struct obdo oa; - int err, rc; - ENTRY; - - LASSERT(lsm); - LASSERT(sbi); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); + rc = it_open_error(DISP_OPEN_OPEN, it); + if (rc) + GOTO(out, rc); - rc = ll_size_lock(inode, lsm, 0, LCK_PR, &lockhs); - if (rc != ELDLM_OK) { - CERROR("lock enqueue: %d\n", rc); - RETURN(rc); - } + rc = ll_local_open(file, it); + if (rc) + LBUG(); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID|OBD_MD_FLTYPE|OBD_MD_FLSIZE|OBD_MD_FLBLOCKS; - rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm); - if (!rc) - obdo_to_inode(inode, &oa, - oa.o_valid & ~(OBD_MD_FLTYPE | OBD_MD_FLMODE)); + if (!S_ISREG(inode->i_mode)) + GOTO(out, rc); - err = ll_size_unlock(inode, lsm, LCK_PR, lockhs); - if (err != ELDLM_OK) { - CERROR("lock cancel: %d\n", err); - LBUG(); + lsm = lli->lli_smd; + if (lsm == NULL) { + if (file->f_flags & O_LOV_DELAY_CREATE || + !(file->f_mode & FMODE_WRITE)) { + CDEBUG(D_INODE, "object creation was delayed\n"); + GOTO(out, rc); + } } - RETURN(rc); + file->f_flags &= ~O_LOV_DELAY_CREATE; + GOTO(out, rc); + out: + req = it->d.lustre.it_data; + ptlrpc_req_finished(req); + if (rc == 0) + ll_open_complete(inode); + return rc; } -static int ll_file_release(struct inode *inode, struct file *file) +/* Fills the obdo with the attributes for the inode defined by lsm */ +int ll_lsm_getattr(struct obd_export *exp, struct lov_stripe_md *lsm, + struct obdo *oa) { - struct ptlrpc_request *req = NULL; - struct ll_file_data *fd; - struct obdo oa; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - int rc, rc2; - + struct ptlrpc_request_set *set; + int rc; ENTRY; - fd = (struct ll_file_data *)file->private_data; - if (!fd) { - LBUG(); - GOTO(out, rc = -EINVAL); - } - - memset(&oa, 0, sizeof(oa)); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID; - obd_handle2oa(&oa, &fd->fd_osthandle); - rc = obd_close(ll_i2obdconn(inode), &oa, lsm); - if (rc) - GOTO(out_mdc, rc = -abs(rc)); + LASSERT(lsm != NULL); -#if 0 -#error "This should only be done on the node that already has the EOF lock" -#error "and only in the case where the file size actually changed. For now" -#error "we don't care about the size on the MDS, since we never use it (the" -#error "OST always has the authoritative size and we don't even use the MDS." - /* If this fails and we goto out_fd, the file size on the MDS is out of - * date. Is that a big deal? */ - if (file->f_mode & FMODE_WRITE) { - struct lustre_handle *lockhs; - - rc = ll_size_lock(inode, lsm, 0, LCK_PR, &lockhs); - if (rc) - GOTO(out_mdc, -abs(rc)); - - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE | - OBD_MD_FLBLOCKS; - rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm); - if (!rc) { - struct iattr attr; - attr.ia_valid = (ATTR_MTIME | ATTR_CTIME | ATTR_ATIME | - ATTR_SIZE); - attr.ia_mtime = inode->i_mtime; - attr.ia_ctime = inode->i_ctime; - attr.ia_atime = inode->i_atime; - attr.ia_size = oa.o_size; - - inode->i_blocks = oa.o_blocks; - - /* XXX: this introduces a small race that we should - * evaluate */ - rc = ll_inode_setattr(inode, &attr, 0); - } - rc2 = ll_size_unlock(inode, lli->lli_smd, LCK_PR, lockhs); - if (rc2) { - CERROR("lock cancel: %d\n", rc); - LBUG(); - if (!rc) - rc = rc2; - } - } -#endif + memset(oa, 0, sizeof *oa); + oa->o_id = lsm->lsm_object_id; + oa->o_gr = lsm->lsm_object_gr; + oa->o_mode = S_IFREG; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE | + OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | + OBD_MD_FLCTIME | OBD_MD_FLGROUP; -out_mdc: - rc2 = mdc_close(&sbi->ll_mdc_conn, inode->i_ino, - S_IFREG, &fd->fd_mdshandle, &req); - ptlrpc_req_finished(req); - if (rc2) { - if (!rc) - rc = -abs(rc2); - GOTO(out_fd, rc); - } - CDEBUG(D_HA, "matched req %p xid "LPD64" transno "LPD64" op " - "%d->%s:%d\n", fd->fd_req, fd->fd_req->rq_xid, - fd->fd_req->rq_repmsg->transno, fd->fd_req->rq_reqmsg->opc, - fd->fd_req->rq_import->imp_connection->c_remote_uuid, - fd->fd_req->rq_import->imp_client->cli_request_portal); - ptlrpc_req_finished(fd->fd_req); - - if (atomic_dec_and_test(&lli->lli_open_count)) { - CDEBUG(D_INFO, "last close, cancelling unused locks\n"); - rc = obd_cancel_unused(ll_i2obdconn(inode), lsm, 0); - if (rc) - CERROR("obd_cancel_unused: %d\n", rc); + set = ptlrpc_prep_set(); + if (set == NULL) { + CERROR ("ENOMEM allocing request set\n"); + rc = -ENOMEM; } else { - CDEBUG(D_INFO, "not last close, not cancelling unused locks\n"); + rc = obd_getattr_async(exp, oa, lsm, set); + if (rc == 0) + rc = ptlrpc_set_wait(set); + ptlrpc_set_destroy(set); } + if (rc) + RETURN(rc); - EXIT; - -out_fd: - fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC; - file->private_data = NULL; - kmem_cache_free(ll_file_data_slab, fd); -out: - return rc; + oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | + OBD_MD_FLCTIME | OBD_MD_FLSIZE); + RETURN(0); } static inline void ll_remove_suid(struct inode *inode) @@ -368,34 +301,176 @@ static inline void ll_remove_suid(struct inode *inode) } } -static void ll_update_atime(struct inode *inode) +static int ll_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock) { - struct iattr attr; + struct ll_inode_info *lli = ll_i2info(inode); + struct lov_stripe_md *lsm = lli->lli_smd; + struct obd_export *exp = ll_i2obdexp(inode); + struct { + char name[16]; + struct ldlm_lock *lock; + struct lov_stripe_md *lsm; + } key = { .name = "lock_to_stripe", .lock = lock, .lsm = lsm }; + __u32 stripe, vallen = sizeof(stripe); + int rc; + ENTRY; - attr.ia_atime = CURRENT_TIME; - attr.ia_valid = ATTR_ATIME; + if (lsm->lsm_stripe_count == 1) + GOTO(check, stripe = 0); - if (inode->i_atime == attr.ia_atime) return; - if (IS_RDONLY(inode)) return; - if (IS_NOATIME(inode)) return; + /* get our offset in the lov */ + rc = obd_get_info(exp, sizeof(key), &key, &vallen, &stripe); + if (rc != 0) { + CERROR("obd_get_info: rc = %d\n", rc); + RETURN(rc); + } + LASSERT(stripe < lsm->lsm_stripe_count); + +check: + if (lsm->lsm_oinfo[stripe].loi_id != lock->l_resource->lr_name.name[0]|| + lsm->lsm_oinfo[stripe].loi_gr != lock->l_resource->lr_name.name[2]){ + LDLM_ERROR(lock, "resource doesn't match object "LPU64"/"LPU64 + " inode=%lu/%u (%p)\n", + lsm->lsm_oinfo[stripe].loi_id, + lsm->lsm_oinfo[stripe].loi_gr, + inode->i_ino, inode->i_generation, inode); + RETURN(-ELDLM_NO_LOCK_DATA); + } - /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */ - ll_inode_setattr(inode, &attr, 0); + RETURN(stripe); } -int ll_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, - void *data, __u32 data_len, int flag) +/* Flush the page cache for an extent as its canceled. When we're on an LOV, + * we get a lock cancellation for each stripe, so we have to map the obd's + * region back onto the stripes in the file that it held. + * + * No one can dirty the extent until we've finished our work and they can + * enqueue another lock. The DLM protects us from ll_file_read/write here, + * but other kernel actors could have pages locked. + * + * Called with the DLM lock held. */ +void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, + struct ldlm_lock *lock, __u32 stripe) { - struct inode *inode = data; + ldlm_policy_data_t tmpex; + unsigned long start, end, count, skip, i, j; + struct page *page; + int rc, rc2, discard = lock->l_flags & LDLM_FL_DISCARD_DATA; struct lustre_handle lockh; - int rc; ENTRY; - if (data_len != sizeof(struct inode)) - LBUG(); + memcpy(&tmpex, &lock->l_policy_data, sizeof(tmpex)); + CDEBUG(D_INODE|D_PAGE, "inode %lu(%p) ["LPU64"->"LPU64"] size: %llu\n", + inode->i_ino, inode, tmpex.l_extent.start, tmpex.l_extent.end, + inode->i_size); + + /* our locks are page granular thanks to osc_enqueue, we invalidate the + * whole page. */ + LASSERT((tmpex.l_extent.start & ~PAGE_CACHE_MASK) == 0); + LASSERT(((tmpex.l_extent.end + 1) & ~PAGE_CACHE_MASK) == 0); + + count = ~0; + skip = 0; + start = tmpex.l_extent.start >> PAGE_CACHE_SHIFT; + end = tmpex.l_extent.end >> PAGE_CACHE_SHIFT; + if (lsm->lsm_stripe_count > 1) { + count = lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT; + skip = (lsm->lsm_stripe_count - 1) * count; + start += start/count * skip + stripe * count; + if (end != ~0) + end += end/count * skip + stripe * count; + } + if (end < tmpex.l_extent.end >> PAGE_CACHE_SHIFT) + end = ~0; + + i = (inode->i_size + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; + if (i < end) + end = i; + + CDEBUG(D_INODE|D_PAGE, "walking page indices start: %lu j: %lu " + "count: %lu skip: %lu end: %lu%s\n", start, start % count, + count, skip, end, discard ? " (DISCARDING)" : ""); + + /* this is the simplistic implementation of page eviction at + * cancelation. It is careful to get races with other page + * lockers handled correctly. fixes from bug 20 will make it + * more efficient by associating locks with pages and with + * batching writeback under the lock explicitly. */ + for (i = start, j = start % count; i <= end; + j++, i++, tmpex.l_extent.start += PAGE_CACHE_SIZE) { + if (j == count) { + CDEBUG(D_PAGE, "skip index %lu to %lu\n", i, i + skip); + i += skip; + j = 0; + if (i > end) + break; + } + LASSERTF(tmpex.l_extent.start< lock->l_policy_data.l_extent.end, + LPU64" >= "LPU64" start %lu i %lu end %lu\n", + tmpex.l_extent.start, lock->l_policy_data.l_extent.end, + start, i, end); + + if (!mapping_has_pages(inode->i_mapping)) { + CDEBUG(D_INODE|D_PAGE, "nothing left\n"); + break; + } - if (inode == NULL) + cond_resched(); + + page = find_get_page(inode->i_mapping, i); + if (page == NULL) + continue; + LL_CDEBUG_PAGE(D_PAGE, page, "lock page idx %lu ext "LPU64"\n", + i, tmpex.l_extent.start); + lock_page(page); + + /* page->mapping to check with racing against teardown */ + if (!discard && clear_page_dirty_for_io(page)) { + rc = ll_call_writepage(inode, page); + if (rc != 0) + CERROR("writepage of page %p failed: %d\n", + page, rc); + /* either waiting for io to complete or reacquiring + * the lock that the failed writepage released */ + lock_page(page); + } + + tmpex.l_extent.end = tmpex.l_extent.start + PAGE_CACHE_SIZE - 1; + /* check to see if another DLM lock covers this page */ + rc2 = ldlm_lock_match(lock->l_resource->lr_namespace, + LDLM_FL_BLOCK_GRANTED|LDLM_FL_CBPENDING | + LDLM_FL_TEST_LOCK, + &lock->l_resource->lr_name, LDLM_EXTENT, + &tmpex, LCK_PR | LCK_PW, &lockh); + if (rc2 == 0 && page->mapping != NULL) { + // checking again to account for writeback's lock_page() + LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n"); + ll_truncate_complete_page(page); + } + unlock_page(page); + page_cache_release(page); + } + LASSERTF(tmpex.l_extent.start <= + (lock->l_policy_data.l_extent.end == ~0ULL ? ~0ULL : + lock->l_policy_data.l_extent.end + 1), + "loop too long "LPU64" > "LPU64" start %lu i %lu end %lu\n", + tmpex.l_extent.start, lock->l_policy_data.l_extent.end, + start, i, end); + EXIT; +} + +static int ll_extent_lock_callback(struct ldlm_lock *lock, + struct ldlm_lock_desc *new, void *data, + int flag) +{ + struct lustre_handle lockh = { 0 }; + int rc; + ENTRY; + + if ((unsigned long)data > 0 && (unsigned long)data < 0x1000) { + LDLM_ERROR(lock, "cancelling lock with bad data %p", data); LBUG(); + } switch (flag) { case LDLM_CB_BLOCKING: @@ -404,13 +479,46 @@ int ll_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, if (rc != ELDLM_OK) CERROR("ldlm_cli_cancel failed: %d\n", rc); break; - case LDLM_CB_CANCELING: - CDEBUG(D_INODE, "invalidating obdo/inode %ld\n", inode->i_ino); - /* FIXME: do something better than throwing away everything */ - //down(&inode->i_sem); - ll_invalidate_inode_pages(inode); - //up(&inode->i_sem); + case LDLM_CB_CANCELING: { + struct inode *inode; + struct ll_inode_info *lli; + struct lov_stripe_md *lsm; + __u32 stripe; + __u64 kms; + + /* This lock wasn't granted, don't try to evict pages */ + if (lock->l_req_mode != lock->l_granted_mode) + RETURN(0); + + inode = ll_inode_from_lock(lock); + if (inode == NULL) + RETURN(0); + lli = ll_i2info(inode); + if (lli == NULL) + goto iput; + if (lli->lli_smd == NULL) + goto iput; + lsm = lli->lli_smd; + + stripe = ll_lock_to_stripe_offset(inode, lock); + if (stripe < 0) + goto iput; + ll_pgcache_remove_extent(inode, lsm, lock, stripe); + + down(&inode->i_sem); + kms = ldlm_extent_shift_kms(lock, + lsm->lsm_oinfo[stripe].loi_kms); + + if (lsm->lsm_oinfo[stripe].loi_kms != kms) + LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64, + lsm->lsm_oinfo[stripe].loi_kms, kms); + lsm->lsm_oinfo[stripe].loi_kms = kms; + up(&inode->i_sem); + //ll_try_done_writing(inode); + iput: + iput(inode); break; + } default: LBUG(); } @@ -418,254 +526,572 @@ int ll_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, RETURN(0); } +#if 0 +int ll_async_completion_ast(struct ldlm_lock *lock, int flags, void *data) +{ + /* XXX ALLOCATE - 160 bytes */ + struct inode *inode = ll_inode_from_lock(lock); + struct ll_inode_info *lli = ll_i2info(inode); + struct lustre_handle lockh = { 0 }; + struct ost_lvb *lvb; + __u32 stripe; + ENTRY; + + if (flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | + LDLM_FL_BLOCK_CONV)) { + LBUG(); /* not expecting any blocked async locks yet */ + LDLM_DEBUG(lock, "client-side async enqueue returned a blocked " + "lock, returning"); + ldlm_lock_dump(D_OTHER, lock, 0); + ldlm_reprocess_all(lock->l_resource); + RETURN(0); + } + + LDLM_DEBUG(lock, "client-side async enqueue: granted/glimpsed"); + + stripe = ll_lock_to_stripe_offset(inode, lock); + if (stripe < 0) + goto iput; + + if (lock->l_lvb_len) { + struct lov_stripe_md *lsm = lli->lli_smd; + __u64 kms; + lvb = lock->l_lvb_data; + lsm->lsm_oinfo[stripe].loi_rss = lvb->lvb_size; + + down(&inode->i_sem); + kms = MAX(lsm->lsm_oinfo[stripe].loi_kms, lvb->lvb_size); + kms = ldlm_extent_shift_kms(NULL, kms); + if (lsm->lsm_oinfo[stripe].loi_kms != kms) + LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64, + lsm->lsm_oinfo[stripe].loi_kms, kms); + lsm->lsm_oinfo[stripe].loi_kms = kms; + up(&inode->i_sem); + } + +iput: + iput(inode); + wake_up(&lock->l_waitq); + + ldlm_lock2handle(lock, &lockh); + ldlm_lock_decref(&lockh, LCK_PR); + RETURN(0); +} +#endif + +static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp) +{ + struct ptlrpc_request *req = reqp; + struct inode *inode = ll_inode_from_lock(lock); + struct ll_inode_info *lli; + struct ost_lvb *lvb; + int rc, size = sizeof(*lvb), stripe; + ENTRY; + + if (inode == NULL) + GOTO(out, rc = -ELDLM_NO_LOCK_DATA); + lli = ll_i2info(inode); + if (lli == NULL) + GOTO(iput, rc = -ELDLM_NO_LOCK_DATA); + if (lli->lli_smd == NULL) + GOTO(iput, rc = -ELDLM_NO_LOCK_DATA); + + /* First, find out which stripe index this lock corresponds to. */ + stripe = ll_lock_to_stripe_offset(inode, lock); + if (stripe < 0) + GOTO(iput, rc = -ELDLM_NO_LOCK_DATA); + + rc = lustre_pack_reply(req, 1, &size, NULL); + if (rc) { + CERROR("lustre_pack_reply: %d\n", rc); + GOTO(iput, rc); + } + + lvb = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*lvb)); + lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe].loi_kms; + + LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64, + inode->i_size, stripe, lvb->lvb_size); + GOTO(iput, 0); + iput: + iput(inode); + + out: + /* These errors are normal races, so we don't want to fill the console + * with messages by calling ptlrpc_error() */ + if (rc == -ELDLM_NO_LOCK_DATA) + lustre_pack_reply(req, 0, NULL, NULL); + + req->rq_status = rc; + return rc; +} + +__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms); +__u64 lov_merge_blocks(struct lov_stripe_md *lsm); +__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time); + +/* NB: lov_merge_size will prefer locally cached writes if they extend the + * file (because it prefers KMS over RSS when larger) */ +int ll_glimpse_size(struct inode *inode, struct ost_lvb *lvb) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_sb_info *sbi = ll_i2sbi(inode); + ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } }; + struct lustre_handle lockh = { 0 }; + int rc, flags = LDLM_FL_HAS_INTENT; + ENTRY; + + CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", inode->i_ino); + + rc = obd_enqueue(sbi->ll_osc_exp, lli->lli_smd, LDLM_EXTENT, &policy, + LCK_PR, &flags, ll_extent_lock_callback, + ldlm_completion_ast, ll_glimpse_callback, inode, + sizeof(*lvb), lustre_swab_ost_lvb, &lockh); + if (rc != 0) { + CERROR("obd_enqueue returned rc %d, returning -EIO\n", rc); + RETURN(rc > 0 ? -EIO : rc); + } + + lvb->lvb_size = lov_merge_size(lli->lli_smd, 0); + inode->i_blocks = lov_merge_blocks(lli->lli_smd); + //inode->i_mtime = lov_merge_mtime(lli->lli_smd, inode->i_mtime); + + CDEBUG(D_DLMTRACE, "glimpse: size: "LPU64", blocks: "LPU64"\n", + lvb->lvb_size, lvb->lvb_blocks); + + obd_cancel(sbi->ll_osc_exp, lli->lli_smd, LCK_PR, &lockh); + + RETURN(rc); +} + +int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, + struct lov_stripe_md *lsm, int mode, + ldlm_policy_data_t *policy, struct lustre_handle *lockh, + int ast_flags) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + int rc; + ENTRY; + + LASSERT(lockh->cookie == 0); + + /* XXX phil: can we do this? won't it screw the file size up? */ + if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || + (sbi->ll_flags & LL_SBI_NOLCK)) + RETURN(0); + + CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n", + inode->i_ino, policy->l_extent.start, policy->l_extent.end); + + rc = obd_enqueue(sbi->ll_osc_exp, lsm, LDLM_EXTENT, policy, mode, + &ast_flags, ll_extent_lock_callback, + ldlm_completion_ast, ll_glimpse_callback, inode, + sizeof(struct ost_lvb), lustre_swab_ost_lvb, lockh); + if (rc > 0) + rc = -EIO; + + if (policy->l_extent.start == 0 && + policy->l_extent.end == OBD_OBJECT_EOF) + inode->i_size = lov_merge_size(lsm, 1); + + //inode->i_mtime = lov_merge_mtime(lsm, inode->i_mtime); + + RETURN(rc); +} + +int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, + struct lov_stripe_md *lsm, int mode, + struct lustre_handle *lockh) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + int rc; + ENTRY; + + /* XXX phil: can we do this? won't it screw the file size up? */ + if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || + (sbi->ll_flags & LL_SBI_NOLCK)) + RETURN(0); + + rc = obd_cancel(sbi->ll_osc_exp, lsm, mode, lockh); + + RETURN(rc); +} + static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) { - struct ll_file_data *fd = (struct ll_file_data *)filp->private_data; + struct ll_file_data *fd = filp->private_data; struct inode *inode = filp->f_dentry->d_inode; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct lustre_handle *lockhs = NULL; - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - int flags = 0; - ldlm_error_t err; + struct ll_inode_info *lli = ll_i2info(inode); + struct lov_stripe_md *lsm = lli->lli_smd; + struct lustre_handle lockh = { 0 }; + ldlm_policy_data_t policy; + int rc; ssize_t retval; + __u64 kms; ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", + inode->i_ino, inode->i_generation, inode, count, *ppos); - if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) && - !(sbi->ll_flags & LL_SBI_NOLCK)) { - struct ldlm_extent extent; - OBD_ALLOC(lockhs, lsm->lsm_stripe_count * sizeof(*lockhs)); - if (!lockhs) - RETURN(-ENOMEM); - - extent.start = *ppos; - extent.end = *ppos + count; - CDEBUG(D_INFO, "Locking inode %ld, start "LPU64" end "LPU64"\n", - inode->i_ino, extent.start, extent.end); - - err = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, - &extent, sizeof(extent), LCK_PR, &flags, - ll_lock_callback, inode, sizeof(*inode), - lockhs); - if (err != ELDLM_OK) { - OBD_FREE(lockhs, lsm->lsm_stripe_count*sizeof(*lockhs)); - CERROR("lock enqueue: err: %d\n", err); - RETURN(err); - } - } + /* "If nbyte is 0, read() will return 0 and have no other results." + * -- Single Unix Spec */ + if (count == 0) + RETURN(0); - /* If we don't refresh the file size, generic_file_read may not even - * call us */ - retval = ll_file_size(inode, lsm); - if (retval < 0) { - CERROR("ll_file_size: %d\n", retval); - RETURN(retval); - } + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES, + count); - CDEBUG(D_INFO, "Reading inode %ld, %d bytes, offset %Ld\n", - inode->i_ino, count, *ppos); - retval = generic_file_read(filp, buf, count, ppos); + if (!lsm) + RETURN(0); - if (retval > 0) - ll_update_atime(inode); + policy.l_extent.start = *ppos; + policy.l_extent.end = *ppos + count - 1; - if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) && - !(sbi->ll_flags & LL_SBI_NOLCK)) { - err = obd_cancel(&sbi->ll_osc_conn, lsm, LCK_PR, lockhs); - if (err != ELDLM_OK) { - CERROR("lock cancel: err: %d\n", err); - retval = err; - } + rc = ll_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh, + (filp->f_flags & O_NONBLOCK) ? + LDLM_FL_BLOCK_NOWAIT: 0); + if (rc != 0) + RETURN(rc); + + kms = lov_merge_size(lsm, 1); + if (*ppos + count - 1 > kms) { + /* A glimpse is necessary to determine whether we return a short + * read or some zeroes at the end of the buffer */ + struct ost_lvb lvb; + retval = ll_glimpse_size(inode, &lvb); + if (retval) + goto out; + inode->i_size = lvb.lvb_size; + } else { + inode->i_size = kms; } - if (lockhs) - OBD_FREE(lockhs, lsm->lsm_stripe_count * sizeof(*lockhs)); + CDEBUG(D_INFO, "Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n", + inode->i_ino, count, *ppos, inode->i_size); + + /* turn off the kernel's read-ahead */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + filp->f_ramax = 0; +#else + filp->f_ra.ra_pages = 0; +#endif + retval = generic_file_read(filp, buf, count, ppos); + + out: + ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); RETURN(retval); } /* * Write to a file (through the page cache). */ -static ssize_t -ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, + loff_t *ppos) { - struct ll_file_data *fd = (struct ll_file_data *)file->private_data; + struct ll_file_data *fd = file->private_data; struct inode *inode = file->f_dentry->d_inode; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct lustre_handle *lockhs = NULL, *eof_lockhs = NULL; struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - int flags = 0; - ldlm_error_t err; + struct lustre_handle lockh = { 0 }; + ldlm_policy_data_t policy; + loff_t maxbytes = ll_file_maxbytes(inode); ssize_t retval; + int nonblock = 0, rc; ENTRY; + if (file->f_flags & O_NONBLOCK) + nonblock = LDLM_FL_BLOCK_NOWAIT; + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", + inode->i_ino, inode->i_generation, inode, count, *ppos); - if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) { - struct obdo *oa; + SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */ - oa = obdo_alloc(); - if (!oa) - RETURN(-ENOMEM); + /* POSIX, but surprised the VFS doesn't check this already */ + if (count == 0) + RETURN(0); - err = ll_size_lock(inode, lsm, 0, LCK_PW, &eof_lockhs); - if (err) { - obdo_free(oa); - RETURN(err); - } + /* If file was opened for LL_IOC_LOV_SETSTRIPE but the ioctl wasn't + * called on the file, don't fail the below assertion (bug 2388). */ + if (file->f_flags & O_LOV_DELAY_CREATE && lsm == NULL) + RETURN(-EBADF); - oa->o_id = lsm->lsm_object_id; - oa->o_mode = inode->i_mode; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE | - OBD_MD_FLBLOCKS; - obd_handle2oa(oa, &fd->fd_osthandle); - retval = obd_getattr(&sbi->ll_osc_conn, oa, lsm); - if (retval) { - obdo_free(oa); - GOTO(out_eof, retval); - } + LASSERT(lsm); - *ppos = oa->o_size; - obdo_to_inode(inode, oa, oa->o_valid); - obdo_free(oa); + if (file->f_flags & O_APPEND) { + policy.l_extent.start = 0; + policy.l_extent.end = OBD_OBJECT_EOF; + } else { + policy.l_extent.start = *ppos; + policy.l_extent.end = *ppos + count - 1; } - if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) && - !(sbi->ll_flags & LL_SBI_NOLCK)) { - struct ldlm_extent extent; - OBD_ALLOC(lockhs, lsm->lsm_stripe_count * sizeof(*lockhs)); - if (!lockhs) - GOTO(out_eof, retval = -ENOMEM); - extent.start = *ppos; - extent.end = *ppos + count; - CDEBUG(D_INFO, "Locking inode %ld, start "LPU64" end "LPU64"\n", - inode->i_ino, extent.start, extent.end); - - err = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, - &extent, sizeof(extent), LCK_PW, &flags, - ll_lock_callback, inode, sizeof(*inode), - lockhs); - if (err != ELDLM_OK) { - CERROR("lock enqueue: err: %d\n", err); - GOTO(out_free, retval = err); + rc = ll_extent_lock(fd, inode, lsm, LCK_PW, &policy, &lockh, nonblock); + if (rc != 0) + RETURN(rc); + + /* this is ok, g_f_w will overwrite this under i_sem if it races + * with a local truncate, it just makes our maxbyte checking easier */ + if (file->f_flags & O_APPEND) + *ppos = inode->i_size; + + if (*ppos >= maxbytes) { + if (count || *ppos > maxbytes) { + send_sig(SIGXFSZ, current, 0); + GOTO(out, retval = -EFBIG); } } + if (*ppos + count > maxbytes) + count = maxbytes - *ppos; - CDEBUG(D_INFO, "Writing inode %ld, %ld bytes, offset "LPD64"\n", - inode->i_ino, (long)count, *ppos); + CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n", + inode->i_ino, count, *ppos); + /* generic_file_write handles O_APPEND after getting i_sem */ retval = generic_file_write(file, buf, count, ppos); - if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) || - sbi->ll_flags & LL_SBI_NOLCK) { - err = obd_cancel(&sbi->ll_osc_conn, lsm, LCK_PW, lockhs); - if (err != ELDLM_OK) { - CERROR("lock cancel: err: %d\n", err); - GOTO(out_free, retval = err); - } +out: + ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh); + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES, + retval > 0 ? retval : 0); + RETURN(retval); +} + +static int ll_lov_recreate_obj(struct inode *inode, struct file *file, + unsigned long arg) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_export *exp = ll_i2obdexp(inode); + struct ll_recreate_obj ucreatp; + struct obd_trans_info oti = { 0 }; + struct obdo *oa = NULL; + int lsm_size; + int rc = 0; + struct lov_stripe_md *lsm, *lsm2; + ENTRY; + + if (!capable (CAP_SYS_ADMIN)) + RETURN(-EPERM); + + rc = copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg, + sizeof(struct ll_recreate_obj)); + if (rc) { + RETURN(-EFAULT); + } + oa = obdo_alloc(); + if (oa == NULL) { + RETURN(-ENOMEM); } - EXIT; - out_free: - if (lockhs) - OBD_FREE(lockhs, lsm->lsm_stripe_count * sizeof(*lockhs)); + down(&lli->lli_open_sem); + lsm = lli->lli_smd; + if (lsm == NULL) { + up(&lli->lli_open_sem); + obdo_free(oa); + RETURN (-ENOENT); + } + lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) * + (lsm->lsm_stripe_count)); - out_eof: - if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) { - err = ll_size_unlock(inode, lsm, LCK_PW, eof_lockhs); - if (err && !retval) - retval = err; + OBD_ALLOC(lsm2, lsm_size); + if (lsm2 == NULL) { + up(&lli->lli_open_sem); + obdo_free(oa); + RETURN(-ENOMEM); } - return retval; + oa->o_id = ucreatp.lrc_id; + oa->o_nlink = ucreatp.lrc_ost_idx; + oa->o_gr = ucreatp.lrc_group; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLFLAGS; + oa->o_flags |= OBD_FL_RECREATE_OBJS; + obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); + + oti.oti_objid = NULL; + memcpy(lsm2, lsm, lsm_size); + rc = obd_create(exp, oa, &lsm2, &oti); + + up(&lli->lli_open_sem); + OBD_FREE(lsm2, lsm_size); + obdo_free(oa); + RETURN (rc); } -static int ll_lov_setstripe(struct inode *inode, struct file *file, - struct lov_user_md *lum) +static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, + int flags, struct lov_user_md *lum, + int lum_size) { struct ll_inode_info *lli = ll_i2info(inode); + struct file *f; + struct obd_export *exp = ll_i2obdexp(inode); struct lov_stripe_md *lsm; - int size = ll_mds_easize(inode->i_sb); - int rc; - - rc = verify_area(VERIFY_READ, lum, sizeof(*lum)); - if (rc) - RETURN(rc); + struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; + struct ptlrpc_request *req = NULL; + int rc = 0; + struct lustre_md md; + ENTRY; down(&lli->lli_open_sem); - if (lli->lli_smd) { - CERROR("striping data already set for %d\n", inode->i_ino); - GOTO(out_lov_up, rc = -EPERM); + lsm = lli->lli_smd; + if (lsm) { + up(&lli->lli_open_sem); + CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n", + inode->i_ino); + RETURN(-EEXIST); } - OBD_ALLOC(lli->lli_smd, size); - if (!lli->lli_smd) - GOTO(out_lov_up, rc = -ENOMEM); + f = get_empty_filp(); + if (!f) + GOTO(out, -ENOMEM); - lsm = lli->lli_smd; - lsm->lsm_magic = LOV_MAGIC; - lsm->lsm_stripe_size = lum->lum_stripe_size; - lsm->lsm_stripe_pattern = lum->lum_stripe_pattern; - lsm->lsm_stripe_offset = lum->lum_stripe_offset; - lsm->lsm_stripe_count = lum->lum_stripe_count; - lsm->lsm_mds_easize = size; + f->f_dentry = file->f_dentry; + f->f_vfsmnt = file->f_vfsmnt; - file->f_flags &= ~O_LOV_DELAY_CREATE; - rc = ll_create_objects(inode->i_sb, inode->i_ino, 0, 0, &lsm); + rc = ll_intent_file_open(f, lum, lum_size, &oit); + if (rc) + GOTO(out, rc); + if (it_disposition(&oit, DISP_LOOKUP_NEG)) + GOTO(out, -ENOENT); + req = oit.d.lustre.it_data; + rc = oit.d.lustre.it_status; + + if (rc < 0) + GOTO(out, rc); + + rc = mdc_req2lustre_md(ll_i2mdcexp(inode), req, 1, exp, &md); if (rc) - OBD_FREE(lli->lli_smd, size); - else - rc = ll_file_open(inode, file); -out_lov_up: + GOTO(out, rc); + ll_update_inode(f->f_dentry->d_inode, &md); + + rc = ll_local_open(f, &oit); + if (rc) + GOTO(out, rc); + ll_intent_release(&oit); + + rc = ll_file_release(f->f_dentry->d_inode, f); + + out: + if (f) + put_filp(f); up(&lli->lli_open_sem); - return rc; + if (req != NULL) + ptlrpc_req_finished(req); + RETURN(rc); +} + +static int ll_lov_setea(struct inode *inode, struct file *file, + unsigned long arg) +{ + int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; + struct lov_user_md *lump; + int lum_size = sizeof(struct lov_user_md) + + sizeof(struct lov_user_ost_data); + int rc; + ENTRY; + + if (!capable (CAP_SYS_ADMIN)) + RETURN(-EPERM); + + OBD_ALLOC(lump, lum_size); + if (lump == NULL) { + RETURN(-ENOMEM); + } + rc = copy_from_user(lump, (struct lov_user_md *)arg, lum_size); + if (rc) { + OBD_FREE(lump, lum_size); + RETURN(-EFAULT); + } + + rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size); + + OBD_FREE(lump, lum_size); + RETURN(rc); +} + +static int ll_lov_setstripe(struct inode *inode, struct file *file, + unsigned long arg) +{ + struct lov_user_md lum, *lump = (struct lov_user_md *)arg; + int rc; + int flags = FMODE_WRITE; + ENTRY; + + /* Bug 1152: copy properly when this is no longer true */ + LASSERT(sizeof(lum) == sizeof(*lump)); + LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0])); + rc = copy_from_user(&lum, lump, sizeof(lum)); + if (rc) + RETURN(-EFAULT); + + rc = ll_lov_setstripe_ea_info(inode, file, flags, &lum, sizeof(lum)); + RETURN(rc); } static int ll_lov_getstripe(struct inode *inode, unsigned long arg) { - struct lov_user_md lum; - struct lov_user_md *lump; + struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; + + if (!lsm) + RETURN(-ENODATA); + + return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2obdexp(inode), 0, lsm, + (void *)arg); +} + +static int ll_get_grouplock(struct inode *inode, struct file *file, + unsigned long arg) +{ + struct ll_file_data *fd = file->private_data; + ldlm_policy_data_t policy = { .l_extent = { .start = 0, + .end = OBD_OBJECT_EOF}}; + struct lustre_handle lockh = { 0 }; struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; - struct lov_user_oinfo *luoip; - struct lov_oinfo *loip; - int count, len, i, rc; + int flags = 0, rc; + ENTRY; - rc = copy_from_user(&lum, (void *)arg, sizeof(lum)); - if (rc) + if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { + RETURN(-EINVAL); + } + + policy.l_extent.gid = arg; + if (file->f_flags & O_NONBLOCK) + flags = LDLM_FL_BLOCK_NOWAIT; + + rc = ll_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh, flags); + if (rc != 0) RETURN(rc); - if ((count = lsm->lsm_stripe_count) == 0) - count = 1; + fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK; + fd->fd_gid = arg; + memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh)); + + RETURN(0); +} + +static int ll_put_grouplock(struct inode *inode, struct file *file, + unsigned long arg) +{ + struct ll_file_data *fd = file->private_data; + struct ll_inode_info *lli = ll_i2info(inode); + struct lov_stripe_md *lsm = lli->lli_smd; + int rc; + ENTRY; - if (lum.lum_stripe_count < count) + if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + /* Ugh, it's already unlocked. */ + RETURN(-EINVAL); + } + + if (fd->fd_gid != arg) /* Ugh? Unlocking with different gid? */ RETURN(-EINVAL); - len = sizeof(*lump) + count * sizeof(*luoip); + fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK); - rc = verify_area(VERIFY_WRITE, (void *)arg, len); + rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh); if (rc) RETURN(rc); - lump = (struct lov_user_md *)arg; - lump->lum_stripe_count = count; - luoip = lump->lum_luoinfo; - - if (lsm->lsm_stripe_count == 0) { - lump->lum_stripe_size = 0; - lump->lum_stripe_pattern = 0; - lump->lum_stripe_offset = 0; - luoip->luo_idx = 0; - luoip->luo_id = lsm->lsm_object_id; - } else { - lump->lum_stripe_size = lsm->lsm_stripe_size; - lump->lum_stripe_pattern = lsm->lsm_stripe_pattern; - lump->lum_stripe_offset = lsm->lsm_stripe_offset; - - loip = lsm->lsm_oinfo; - for (i = 0; i < count; i++, luoip++, loip++) { - luoip->luo_idx = loip->loi_ost_idx; - luoip->luo_id = loip->loi_id; - } - } + fd->fd_gid = 0; + memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh)); RETURN(0); } @@ -673,10 +1099,17 @@ static int ll_lov_getstripe(struct inode *inode, unsigned long arg) int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - struct ll_file_data *fd = (struct ll_file_data *)file->private_data; - struct lustre_handle *conn; + struct ll_file_data *fd = file->private_data; int flags; + ENTRY; + + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino, + inode->i_generation, inode, cmd); + if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */ + RETURN(-ENOTTY); + + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL); switch(cmd) { case LL_IOC_GETFLAGS: /* Get the current value of the file flags */ @@ -688,118 +1121,336 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, * not abused, and to handle any flag side effects. */ if (get_user(flags, (int *) arg)) - return -EFAULT; + RETURN(-EFAULT); if (cmd == LL_IOC_SETFLAGS) fd->fd_flags |= flags; else fd->fd_flags &= ~flags; - return 0; + RETURN(0); case LL_IOC_LOV_SETSTRIPE: - return ll_lov_setstripe(inode, file, (struct lov_user_md *)arg); + RETURN(ll_lov_setstripe(inode, file, arg)); + case LL_IOC_LOV_SETEA: + RETURN(ll_lov_setea(inode, file, arg)); case LL_IOC_LOV_GETSTRIPE: - return ll_lov_getstripe(inode, arg); - + RETURN(ll_lov_getstripe(inode, arg)); + case LL_IOC_RECREATE_OBJ: + RETURN(ll_lov_recreate_obj(inode, file, arg)); + case EXT3_IOC_GETFLAGS: + case EXT3_IOC_SETFLAGS: + RETURN( ll_iocontrol(inode, file, cmd, arg) ); + case LL_IOC_GROUP_LOCK: + RETURN(ll_get_grouplock(inode, file, arg)); + case LL_IOC_GROUP_UNLOCK: + RETURN(ll_put_grouplock(inode, file, arg)); /* We need to special case any other ioctls we want to handle, * to send them to the MDS/OST as appropriate and to properly * network encode the arg field. - case EXT2_IOC_GETFLAGS: - case EXT2_IOC_SETFLAGS: case EXT2_IOC_GETVERSION_OLD: case EXT2_IOC_GETVERSION_NEW: case EXT2_IOC_SETVERSION_OLD: case EXT2_IOC_SETVERSION_NEW: */ default: - conn = ll_i2obdconn(inode); - return obd_iocontrol(cmd, conn, 0, NULL, (void *)arg); + RETURN( obd_iocontrol(cmd, ll_i2obdexp(inode), 0, NULL, + (void *)arg) ); } } loff_t ll_file_seek(struct file *file, loff_t offset, int origin) { struct inode *inode = file->f_dentry->d_inode; - long long retval; + struct ll_file_data *fd = file->private_data; + struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; + struct lustre_handle lockh = {0}; + loff_t retval; ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),to=%llu\n", inode->i_ino, + inode->i_generation, inode, + offset + ((origin==2) ? inode->i_size : file->f_pos)); - switch (origin) { - case 2: { - struct ll_inode_info *lli = ll_i2info(inode); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_LLSEEK); + if (origin == 2) { /* SEEK_END */ + int nonblock = 0, rc; + ldlm_policy_data_t policy = { .l_extent = {0, OBD_OBJECT_EOF }}; - retval = ll_file_size(inode, lli->lli_smd); - if (retval) - RETURN(retval); + if (file->f_flags & O_NONBLOCK) + nonblock = LDLM_FL_BLOCK_NOWAIT; + + rc = ll_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh, + nonblock); + if (rc != 0) + RETURN(rc); offset += inode->i_size; - break; - } - case 1: + } else if (origin == 1) { /* SEEK_CUR */ offset += file->f_pos; } + retval = -EINVAL; - if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { + if (offset >= 0 && offset <= ll_file_maxbytes(inode)) { if (offset != file->f_pos) { file->f_pos = offset; #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) file->f_reada = 0; -#endif file->f_version = ++event; +#endif } retval = offset; } + + if (origin == 2) + ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); RETURN(retval); } -/* XXX this does not need to do anything for data, it _does_ need to - call setattr */ int ll_fsync(struct file *file, struct dentry *dentry, int data) { - return 0; + struct inode *inode = dentry->d_inode; + struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; + struct ll_fid fid; + struct ptlrpc_request *req; + int rc, err; + ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, + inode->i_generation, inode); + + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_FSYNC); + + /* fsync's caller has already called _fdata{sync,write}, we want + * that IO to finish before calling the osc and mdc sync methods */ + rc = filemap_fdatawait(inode->i_mapping); + + ll_inode2fid(&fid, inode); + err = md_sync(ll_i2sbi(inode)->ll_mdc_exp, &fid, &req); + if (!rc) + rc = err; + if (!err) + ptlrpc_req_finished(req); + + if (data && lsm) { + struct obdo *oa = obdo_alloc(); + + if (!oa) + RETURN(rc ? rc : -ENOMEM); + + oa->o_id = lsm->lsm_object_id; + oa->o_gr = lsm->lsm_object_gr; + oa->o_valid = OBD_MD_FLID; + obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLGROUP); + + err = obd_sync(ll_i2sbi(inode)->ll_osc_exp, oa, lsm, + 0, OBD_OBJECT_EOF); + if (!rc) + rc = err; + obdo_free(oa); + } + + RETURN(rc); } +int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) +{ + struct inode *inode = file->f_dentry->d_inode; + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct obd_device *obddev; + struct ldlm_res_id res_id = + { .name = {inode->i_ino, inode->i_generation, LDLM_FLOCK} }; + struct lustre_handle lockh = {0}; + ldlm_policy_data_t flock; + ldlm_mode_t mode = 0; + int flags = 0; + int rc; + ENTRY; + + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n", + inode->i_ino, file_lock); + flock.l_flock.pid = file_lock->fl_pid; + flock.l_flock.start = file_lock->fl_start; + flock.l_flock.end = file_lock->fl_end; + + switch (file_lock->fl_type) { + case F_RDLCK: + mode = LCK_PR; + break; + case F_UNLCK: + /* An unlock request may or may not have any relation to + * existing locks so we may not be able to pass a lock handle + * via a normal ldlm_lock_cancel() request. The request may even + * unlock a byte range in the middle of an existing lock. In + * order to process an unlock request we need all of the same + * information that is given with a normal read or write record + * lock request. To avoid creating another ldlm unlock (cancel) + * message we'll treat a LCK_NL flock request as an unlock. */ + mode = LCK_NL; + break; + case F_WRLCK: + mode = LCK_PW; + break; + default: + CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type); + LBUG(); + } -static int ll_inode_revalidate(struct dentry *dentry) + switch (cmd) { + case F_SETLKW: +#ifdef F_SETLKW64 + case F_SETLKW64: +#endif + flags = 0; + break; + case F_SETLK: +#ifdef F_SETLK64 + case F_SETLK64: +#endif + flags = LDLM_FL_BLOCK_NOWAIT; + break; + case F_GETLK: +#ifdef F_GETLK64 + case F_GETLK64: +#endif + flags = LDLM_FL_TEST_LOCK; + /* Save the old mode so that if the mode in the lock changes we + * can decrement the appropriate reader or writer refcount. */ + file_lock->fl_type = mode; + break; + default: + CERROR("unknown fcntl lock command: %d\n", cmd); + LBUG(); + } + + CDEBUG(D_DLMTRACE, "inode=%lu, pid="LPU64", flags=%#x, mode=%u, " + "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid, + flags, mode, flock.l_flock.start, flock.l_flock.end); + + obddev = md_get_real_obd(sbi->ll_mdc_exp, NULL, 0); + rc = ldlm_cli_enqueue(obddev->obd_self_export, NULL, + obddev->obd_namespace, + res_id, LDLM_FLOCK, &flock, mode, &flags, + NULL, ldlm_flock_completion_ast, NULL, file_lock, + NULL, 0, NULL, &lockh); + RETURN(rc); +} + +int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) { struct inode *inode = dentry->d_inode; + struct ll_inode_info *lli; struct lov_stripe_md *lsm; + struct ll_fid fid; + int rc; ENTRY; - if (!inode) + if (!inode) { + CERROR("REPORT THIS LINE TO PETER\n"); RETURN(0); + } + ll_inode2fid(&fid, inode); + lli = ll_i2info(inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s,intent=%s\n", + inode->i_ino, inode->i_generation, inode, dentry->d_name.name, + LL_IT2STR(it)); +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)) + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_REVALIDATE); +#endif - lsm = ll_i2info(inode)->lli_smd; - if (!lsm) /* object not yet allocated, don't validate size */ + if (!md_valid_attrs(ll_i2mdcexp(inode), &fid)) { + struct ptlrpc_request *req = NULL; + struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); + struct ll_fid fid; + unsigned long valid = 0; + int ealen = 0; + + if (S_ISREG(inode->i_mode)) { + ealen = obd_size_diskmd(sbi->ll_osc_exp, NULL); + valid |= OBD_MD_FLEASIZE; + } + ll_inode2fid(&fid, inode); + rc = md_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req); + if (rc) { + CERROR("failure %d inode %lu\n", rc, inode->i_ino); + RETURN(-abs(rc)); + } + rc = ll_prep_inode(sbi->ll_osc_exp, sbi->ll_mdc_exp, + &inode, req, 0, NULL); + if (rc) { + ptlrpc_req_finished(req); + RETURN(rc); + } + ptlrpc_req_finished(req); + } + + lsm = lli->lli_smd; + if (lsm == NULL) /* object not yet allocated, don't validate size */ RETURN(0); - RETURN(ll_file_size(inode, lsm)); + /* ll_glimpse_size will prefer locally cached writes if they extend + * the file */ + { + struct ost_lvb lvb; + + rc = ll_glimpse_size(inode, &lvb); + inode->i_size = lvb.lvb_size; + } + RETURN(rc); } #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -static int ll_getattr(struct vfsmount *mnt, struct dentry *de, - struct kstat *stat) +int ll_getattr(struct vfsmount *mnt, struct dentry *de, + struct lookup_intent *it, struct kstat *stat) { - return ll_inode_revalidate(de); + int res = 0; + struct inode *inode = de->d_inode; + + res = ll_inode_revalidate_it(de, it); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR); + + if (res) + return res; + + stat->dev = inode->i_sb->s_dev; + stat->ino = inode->i_ino; + stat->mode = inode->i_mode; + stat->nlink = inode->i_nlink; + stat->uid = inode->i_uid; + stat->gid = inode->i_gid; + stat->rdev = kdev_t_to_nr(inode->i_rdev); + stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + stat->ctime = inode->i_ctime; + stat->size = inode->i_size; + stat->blksize = inode->i_blksize; + stat->blocks = inode->i_blocks; + return 0; } #endif struct file_operations ll_file_operations = { - read: ll_file_read, - write: ll_file_write, - ioctl: ll_file_ioctl, - open: ll_file_open, - release: ll_file_release, - mmap: generic_file_mmap, - llseek: ll_file_seek, - fsync: NULL + .read = ll_file_read, + .write = ll_file_write, + .ioctl = ll_file_ioctl, + .open = ll_file_open, + .release = ll_file_release, + .mmap = generic_file_mmap, + .llseek = ll_file_seek, +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) + .sendfile = generic_file_sendfile, +#endif + .fsync = ll_fsync, + .lock = ll_file_flock }; struct inode_operations ll_file_inode_operations = { - setattr: ll_setattr, - truncate: ll_truncate, + .setattr_raw = ll_setattr_raw, + .setattr = ll_setattr, + .truncate = ll_truncate, #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - getattr: ll_getattr, + .getattr_it = ll_getattr, #else - revalidate: ll_inode_revalidate, + .revalidate_it = ll_inode_revalidate_it, #endif }; +