Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / llite / file.c
index 62b0bd0..f381fb8 100644 (file)
@@ -28,9 +28,6 @@
 #include <lustre_mdc.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/lustre_compat25.h>
-#endif
 #include "llite_internal.h"
 
 /* also used by llite/special.c:ll_special_open() */
@@ -95,7 +92,7 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
         struct ptlrpc_request *req = NULL;
         struct obd_device *obd = class_exp2obd(exp);
         int epoch_close = 1;
-        int rc;
+        int seq_end = 0, rc;
         ENTRY;
 
         if (obd == NULL) {
@@ -122,7 +119,9 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
 
         ll_prepare_close(inode, op_data, och);
         epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
-        rc = md_close(md_exp, op_data, och, &req);
+        rc = md_close(md_exp, op_data, och->och_mod, &req);
+        if (rc != -EAGAIN)
+                seq_end = 1;
 
         if (rc == -EAGAIN) {
                 /* This close must have the epoch closed. */
@@ -130,8 +129,8 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
                 LASSERT(epoch_close);
                 /* MDS has instructed us to obtain Size-on-MDS attribute from
                  * OSTs and send setattr to back to MDS. */
-                rc = ll_sizeonmds_update(inode, &och->och_fh,
-                                         op_data->op_ioepoch);
+                rc = ll_sizeonmds_update(inode, och->och_mod,
+                                         &och->och_fh, op_data->op_ioepoch);
                 if (rc) {
                         CERROR("inode %lu mdc Size-on-MDS update failed: "
                                "rc = %d\n", inode->i_ino, rc);
@@ -150,7 +149,6 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
                                inode->i_ino, rc);
         }
 
-        ptlrpc_req_finished(req); /* This is close request */
         EXIT;
 out:
       
@@ -158,12 +156,15 @@ out:
             S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
                 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
         } else {
+                if (seq_end)
+                        ptlrpc_close_replay_seq(req);
                 md_clear_open_replay_data(md_exp, och);
                 /* Free @och if it is not waiting for DONE_WRITING. */
                 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
                 OBD_FREE_PTR(och);
         }
-        
+        if (req) /* This is close request */
+                ptlrpc_req_finished(req);
         return rc;
 }
 
@@ -386,8 +387,10 @@ static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
         LASSERT(och);
 
         body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body));
-        LASSERT(body != NULL);                      /* reply already checked out */
-        LASSERT_REPSWABBED(req, DLM_REPLY_REC_OFF); /* and swabbed in md_enqueue */
+        /* reply already checked out */
+        LASSERT(body != NULL);
+        /* and swabbed in md_enqueue */
+        LASSERT(lustre_rep_swabbed(req, DLM_REPLY_REC_OFF));
 
         memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
         och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
@@ -470,7 +473,7 @@ int ll_file_open(struct inode *inode, struct file *file)
         if (inode->i_sb->s_root == file->f_dentry)
                 RETURN(0);
 
-#ifdef LUSTRE_KERNEL_VERSION
+#ifdef HAVE_VFS_INTENT_PATCHES
         it = file->f_it;
 #else
         it = file->private_data; /* XXX: compat macro */
@@ -747,9 +750,10 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
         struct page *page;
         int rc, rc2, discard = lock->l_flags & LDLM_FL_DISCARD_DATA;
         struct lustre_handle lockh;
-        ENTRY;
+        struct address_space *mapping = inode->i_mapping;
 
-        memcpy(&tmpex, &lock->l_policy_data, sizeof(tmpex));
+        ENTRY;
+        tmpex = lock->l_policy_data;
         CDEBUG(D_INODE|D_PAGE, "inode %lu(%p) ["LPU64"->"LPU64"] size: %llu\n",
                inode->i_ino, inode, tmpex.l_extent.start, tmpex.l_extent.end,
                i_size_read(inode));
@@ -794,8 +798,8 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
         for (i = start; i <= end; i += (j + skip)) {
                 j = min(count - (i % count), end - i + 1);
                 LASSERT(j > 0);
-                LASSERT(inode->i_mapping);
-                if (ll_teardown_mmaps(inode->i_mapping,
+                LASSERT(mapping);
+                if (ll_teardown_mmaps(mapping,
                                       (__u64)i << CFS_PAGE_SHIFT,
                                       ((__u64)(i+j) << CFS_PAGE_SHIFT) - 1) )
                         break;
@@ -820,14 +824,14 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                          tmpex.l_extent.start, lock->l_policy_data.l_extent.end,
                          start, i, end);
 
-                if (!mapping_has_pages(inode->i_mapping)) {
+                if (!mapping_has_pages(mapping)) {
                         CDEBUG(D_INODE|D_PAGE, "nothing left\n");
                         break;
                 }
 
                 cond_resched();
 
-                page = find_get_page(inode->i_mapping, i);
+                page = find_get_page(mapping, i);
                 if (page == NULL)
                         continue;
                 LL_CDEBUG_PAGE(D_PAGE, page, "lock page idx %lu ext "LPU64"\n",
@@ -837,13 +841,19 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                 /* page->mapping to check with racing against teardown */
                 if (!discard && clear_page_dirty_for_io(page)) {
                         rc = ll_call_writepage(inode, page);
-                        if (rc != 0)
-                                CERROR("writepage inode %lu(%p) of page %p "
-                                       "failed: %d\n", inode->i_ino, inode,
-                                       page, rc);
                         /* either waiting for io to complete or reacquiring
                          * the lock that the failed writepage released */
                         lock_page(page);
+                        wait_on_page_writeback(page);
+                        if (rc != 0) {
+                                CERROR("writepage inode %lu(%p) of page %p "
+                                       "failed: %d\n", inode->i_ino, inode,
+                                       page, rc);
+                                if (rc == -ENOSPC)
+                                        set_bit(AS_ENOSPC, &mapping->flags);
+                                else
+                                        set_bit(AS_EIO, &mapping->flags);
+                        }
                 }
 
                 tmpex.l_extent.end = tmpex.l_extent.start + CFS_PAGE_SIZE - 1;
@@ -860,7 +870,7 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                          * lock_page() */
                         LL_CDEBUG_PAGE(D_PAGE, page, "truncating\n");
                         if (llap)
-                                ll_ra_accounting(llap, inode->i_mapping);
+                                ll_ra_accounting(llap, mapping);
                         ll_truncate_complete_page(page);
                 }
                 unlock_page(page);
@@ -1086,14 +1096,14 @@ int ll_local_size(struct inode *inode)
                 RETURN(0);
 
         rc = obd_match(sbi->ll_dt_exp, lli->lli_smd, LDLM_EXTENT,
-                       &policy, LCK_PR | LCK_PW, &flags, inode, &lockh);
+                       &policy, LCK_PR, &flags, inode, &lockh);
         if (rc < 0)
                 RETURN(rc);
         else if (rc == 0)
                 RETURN(-ENODATA);
 
         ll_merge_lvb(inode);
-        obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR | LCK_PW, &lockh);
+        obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR, &lockh);
         RETURN(0);
 }
 
@@ -1422,11 +1432,8 @@ repeat:
                inode->i_ino, chunk, *ppos, i_size_read(inode));
 
         /* turn off the kernel's read-ahead */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        file->f_ramax = 0;
-#else
         file->f_ra.ra_pages = 0;
-#endif
+
         /* initialize read-ahead window once per syscall */
         if (ra == 0) {
                 ra = 1;
@@ -1576,7 +1583,6 @@ out:
 /*
  * Send file content (through pagecache) somewhere with helper
  */
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
                                 read_actor_t actor, void *target)
 {
@@ -1668,7 +1674,6 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
         ll_tree_unlock(&tree);
         RETURN(retval);
 }
-#endif
 
 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
                                unsigned long arg)
@@ -1793,7 +1798,7 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
         body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*body));
         LASSERT(body != NULL); /* checked by mdc_getattr_name */
         /* swabbed by mdc_getattr_name */
-        LASSERT_REPSWABBED(req, REPLY_REC_OFF);
+        LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF));
 
         lmmsize = body->eadatasize;
 
@@ -1804,7 +1809,7 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
 
         lmm = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF + 1, lmmsize);
         LASSERT(lmm != NULL);
-        LASSERT_REPSWABBED(req, REPLY_REC_OFF + 1);
+        LASSERT(lustre_rep_swabbed(req, REPLY_REC_OFF + 1));
 
         /*
          * This is coming from the MDS, so is probably in
@@ -2301,10 +2306,17 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 
                 RETURN(ll_ioctl_setfacl(inode, &ioc));
         }
-        default:
+        default: {
+                int err;
+
+                if (LLIOC_STOP == 
+                    ll_iocontrol_call(inode, file, cmd, arg, &err))
+                        RETURN(err);
+
                 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
                                      (void *)arg));
         }
+        }
 }
 
 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
@@ -2407,7 +2419,7 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
                                            OBD_MD_FLMTIME | OBD_MD_FLCTIME |
                                            OBD_MD_FLGROUP);
 
-                oc = ll_osscapa_get(inode, 0, CAPA_OPC_OSS_WRITE);
+                oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
                 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
                                0, OBD_OBJECT_EOF, oc);
                 capa_put(oc);
@@ -2541,13 +2553,30 @@ int ll_have_md_lock(struct inode *inode, __u64 bits)
 
         flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
         if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
-                          LCK_CR|LCK_CW|LCK_PR, &lockh)) {
+                          LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
                 RETURN(1);
         }
-
         RETURN(0);
 }
 
+ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
+                            struct lustre_handle *lockh)
+{
+        ldlm_policy_data_t policy = { .l_inodebits = {bits}};
+        struct lu_fid *fid;
+        ldlm_mode_t rc;
+        int flags;
+        ENTRY;
+
+        fid = &ll_i2info(inode)->lli_fid;
+        CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
+
+        flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
+        rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
+                           LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
+        RETURN(rc);
+}
+
 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
         if (rc == -ENOENT) { /* Already unlinked. Just update nlink
                               * and return success */
@@ -2586,9 +2615,6 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
                inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
-#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0))
-        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REVALIDATE, 1);
-#endif
 
         exp = ll_i2mdexp(inode);
 
@@ -2633,8 +2659,7 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
                 }
 
                 ll_lookup_finish_locks(&oit, dentry);
-        } else if (!ll_have_md_lock(dentry->d_inode,
-                                    MDS_INODELOCK_UPDATE)) {
+        } else if (!ll_have_md_lock(dentry->d_inode, MDS_INODELOCK_UPDATE)) {
                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
                 obd_valid valid = OBD_MD_FLGETATTR;
                 struct obd_capa *oc;
@@ -2677,7 +2702,6 @@ out:
         return rc;
 }
 
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                   struct lookup_intent *it, struct kstat *stat)
 {
@@ -2719,7 +2743,6 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
 
         return ll_getattr_it(mnt, de, &it, stat);
 }
-#endif
 
 static
 int lustre_check_acl(struct inode *inode, int mask)
@@ -2758,11 +2781,7 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
         return generic_permission(inode, mask, lustre_check_acl);
 }
 #else
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
-#else
-int ll_inode_permission(struct inode *inode, int mask)
-#endif
 {
         int mode = inode->i_mode;
         int rc;
@@ -2822,9 +2841,7 @@ struct file_operations ll_file_operations = {
         .release        = ll_file_release,
         .mmap           = ll_file_mmap,
         .llseek         = ll_file_seek,
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
         .sendfile       = ll_file_sendfile,
-#endif
         .fsync          = ll_fsync,
 };
 
@@ -2836,9 +2853,7 @@ struct file_operations ll_file_operations_flock = {
         .release        = ll_file_release,
         .mmap           = ll_file_mmap,
         .llseek         = ll_file_seek,
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
         .sendfile       = ll_file_sendfile,
-#endif
         .fsync          = ll_fsync,
 #ifdef HAVE_F_OP_FLOCK
         .flock          = ll_file_flock,
@@ -2855,9 +2870,7 @@ struct file_operations ll_file_operations_noflock = {
         .release        = ll_file_release,
         .mmap           = ll_file_mmap,
         .llseek         = ll_file_seek,
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
         .sendfile       = ll_file_sendfile,
-#endif
         .fsync          = ll_fsync,
 #ifdef HAVE_F_OP_FLOCK
         .flock          = ll_file_noflock,
@@ -2866,16 +2879,12 @@ struct file_operations ll_file_operations_noflock = {
 };
 
 struct inode_operations ll_file_inode_operations = {
-#ifdef LUSTRE_KERNEL_VERSION
+#ifdef HAVE_VFS_INTENT_PATCHES
         .setattr_raw    = ll_setattr_raw,
 #endif
         .setattr        = ll_setattr,
         .truncate       = ll_truncate,
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
         .getattr        = ll_getattr,
-#else
-        .revalidate_it  = ll_inode_revalidate_it,
-#endif
         .permission     = ll_inode_permission,
         .setxattr       = ll_setxattr,
         .getxattr       = ll_getxattr,
@@ -2883,3 +2892,102 @@ struct inode_operations ll_file_inode_operations = {
         .removexattr    = ll_removexattr,
 };
 
+/* dynamic ioctl number support routins */
+static struct llioc_ctl_data {
+        struct rw_semaphore ioc_sem;
+        struct list_head    ioc_head;
+} llioc = { 
+        __RWSEM_INITIALIZER(llioc.ioc_sem), 
+        CFS_LIST_HEAD_INIT(llioc.ioc_head)
+};
+
+
+struct llioc_data {
+        struct list_head        iocd_list;
+        unsigned int            iocd_size;
+        llioc_callback_t        iocd_cb;
+        unsigned int            iocd_count;
+        unsigned int            iocd_cmd[0];
+};
+
+void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
+{
+        unsigned int size;
+        struct llioc_data *in_data = NULL;
+        ENTRY;
+
+        if (cb == NULL || cmd == NULL ||
+            count > LLIOC_MAX_CMD || count < 0)
+                RETURN(NULL);
+
+        size = sizeof(*in_data) + count * sizeof(unsigned int);
+        OBD_ALLOC(in_data, size);
+        if (in_data == NULL)
+                RETURN(NULL);
+
+        memset(in_data, 0, sizeof(*in_data));
+        in_data->iocd_size = size;
+        in_data->iocd_cb = cb;
+        in_data->iocd_count = count;
+        memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
+
+        down_write(&llioc.ioc_sem);
+        list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
+        up_write(&llioc.ioc_sem);
+
+        RETURN(in_data);
+}
+
+void ll_iocontrol_unregister(void *magic)
+{
+        struct llioc_data *tmp;
+
+        if (magic == NULL)
+                return;
+
+        down_write(&llioc.ioc_sem);
+        list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
+                if (tmp == magic) {
+                        unsigned int size = tmp->iocd_size;
+
+                        list_del(&tmp->iocd_list);
+                        up_write(&llioc.ioc_sem);
+
+                        OBD_FREE(tmp, size);
+                        return;
+                }
+        }
+        up_write(&llioc.ioc_sem);
+
+        CWARN("didn't find iocontrol register block with magic: %p\n", magic);
+}
+
+EXPORT_SYMBOL(ll_iocontrol_register);
+EXPORT_SYMBOL(ll_iocontrol_unregister);
+
+enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file, 
+                        unsigned int cmd, unsigned long arg, int *rcp)
+{
+        enum llioc_iter ret = LLIOC_CONT;
+        struct llioc_data *data;
+        int rc = -EINVAL, i;
+
+        down_read(&llioc.ioc_sem);
+        list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
+                for (i = 0; i < data->iocd_count; i++) {
+                        if (cmd != data->iocd_cmd[i]) 
+                                continue;
+
+                        ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
+                        break;
+                }
+
+                if (ret == LLIOC_STOP)
+                        break;
+        }
+        up_read(&llioc.ioc_sem);
+
+        if (rcp)
+                *rcp = rc;
+        return ret;
+}