Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / llite / llite_lib.c
index 10687e3..77f615f 100644 (file)
@@ -81,17 +81,25 @@ static struct ll_sb_info *ll_init_sbi(void)
 
         ll_generate_random_uuid(uuid);
         class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-        CDEBUG(D_HA, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
+        CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
 
         spin_lock(&ll_sb_lock);
         list_add_tail(&sbi->ll_list, &ll_super_blocks);
         spin_unlock(&ll_sb_lock);
 
+#ifdef ENABLE_LLITE_CHECKSUM
+        sbi->ll_flags |= LL_SBI_CHECKSUM;
+#endif
+
+#ifdef HAVE_LRU_RESIZE_SUPPORT
+        sbi->ll_flags |= LL_SBI_LRU_RESIZE;
+#endif
+
 #ifdef HAVE_EXPORT___IGET
         INIT_LIST_HEAD(&sbi->ll_deathrow);
         spin_lock_init(&sbi->ll_deathrow_lock);
 #endif
-        for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { 
+        for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
                 spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock);
                 spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock);
         }
@@ -114,7 +122,7 @@ void ll_free_sbi(struct super_block *sb)
 }
 
 static struct dentry_operations ll_d_root_ops = {
-#ifdef LUSTRE_KERNEL_VERSION
+#ifdef DCACHE_LUSTRE_INVALID
         .d_compare = ll_dcompare,
 #endif
 };
@@ -168,7 +176,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         struct obd_connect_data *data = NULL;
         struct lustre_md lmd;
         obd_valid valid;
-        int size, err;
+        int size, err, checksum;
         ENTRY;
 
         obd = class_name2obd(md);
@@ -194,6 +202,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                   OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION |
                                   OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA |
                                   OBD_CONNECT_CANCELSET;
+#ifdef HAVE_LRU_RESIZE_SUPPORT
+        if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
+                data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
+#endif
 #ifdef CONFIG_FS_POSIX_ACL
         data->ocd_connect_flags |= OBD_CONNECT_ACL;
 #endif
@@ -333,13 +345,14 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                 sbi->ll_flags |= LL_SBI_OSS_CAPA;
         }
 
+        sbi->ll_sdev_orig = sb->s_dev;
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
         /* We set sb->s_dev equal on all lustre clients in order to support
          * NFS export clustering.  NFSD requires that the FSID be the same
          * on all clients. */
         /* s_dev is also used in lt_compare() to compare two fs, but that is
          * only a node-local comparison. */
-        
+
         /* XXX: this will not work with LMV */
         sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid,
                                  strlen(sbi2mdc(sbi)->cl_target_uuid.uuid));
@@ -357,6 +370,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         if (sbi->ll_flags & LL_SBI_OSS_CAPA)
                 data->ocd_connect_flags |= OBD_CONNECT_OSS_CAPA;
 
+#ifdef HAVE_LRU_RESIZE_SUPPORT
+        data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
+#endif
         CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
                "ocd_grant: %d\n", data->ocd_connect_flags,
                data->ocd_version, data->ocd_grant);
@@ -405,8 +421,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                 ll_async_page_slab_size =
                         size_round(sizeof(struct ll_async_page)) + err;
                 ll_async_page_slab = cfs_mem_cache_create("ll_async_page",
-                                                        ll_async_page_slab_size,
-                                                           0, 0);
+                                                          ll_async_page_slab_size,
+                                                          0, 0);
                 if (!ll_async_page_slab)
                         GOTO(out_dt_fid, err = -ENOMEM);
         }
@@ -420,9 +436,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         sbi->ll_root_fid = rootfid;
 
         sb->s_op = &lustre_super_operations;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
         sb->s_export_op = &lustre_export_operations;
-#endif
 
         /* make root inode
          * XXX: move this to after cbd setup? */
@@ -473,14 +487,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                 GOTO(out_root, err);
         }
 
-        /* making vm readahead 0 for 2.4.x. In the case of 2.6.x,
-           backing dev info assigned to inode mapping is used for
-           determining maximal readahead. */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
-    !defined(KERNEL_HAS_AS_MAX_READAHEAD)
-        /* bug 2805 - set VM readahead to zero */
-        vm_max_readahead = vm_min_readahead = 0;
-#endif
+        checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
+        err = obd_set_info_async(sbi->ll_dt_exp, strlen("checksum"),"checksum",
+                                 sizeof(checksum), &checksum, NULL);
 
         sb->s_root = d_alloc_root(root);
         if (data != NULL)
@@ -564,27 +573,6 @@ void lustre_dump_dentry(struct dentry *dentry, int recur)
         }
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-void lustre_throw_orphan_dentries(struct super_block *sb)
-{
-        struct dentry *dentry, *next;
-        struct ll_sb_info *sbi = ll_s2sbi(sb);
-
-        /* Do this to get rid of orphaned dentries. That is not really trw. */
-        list_for_each_entry_safe(dentry, next, &sbi->ll_orphan_dentry_list,
-                                 d_hash) {
-                CWARN("found orphan dentry %.*s (%p->%p) at unmount, dumping "
-                      "before and after shrink_dcache_parent\n",
-                      dentry->d_name.len, dentry->d_name.name, dentry, next);
-                lustre_dump_dentry(dentry, 1);
-                shrink_dcache_parent(dentry);
-                lustre_dump_dentry(dentry, 1);
-        }
-}
-#else
-#define lustre_throw_orphan_dentries(sb)
-#endif
-
 #ifdef HAVE_EXPORT___IGET
 static void prune_dir_dentries(struct inode *inode)
 {
@@ -695,7 +683,7 @@ void client_common_put_super(struct super_block *sb)
         prune_deathrow(sbi, 0);
 
         list_del(&sbi->ll_conn_chain);
-        
+
         obd_fid_fini(sbi->ll_dt_exp);
         obd_disconnect(sbi->ll_dt_exp);
         sbi->ll_dt_exp = NULL;
@@ -706,7 +694,25 @@ void client_common_put_super(struct super_block *sb)
         obd_disconnect(sbi->ll_md_exp);
         sbi->ll_md_exp = NULL;
 
-        lustre_throw_orphan_dentries(sb);
+        EXIT;
+}
+
+void ll_kill_super(struct super_block *sb)
+{
+        struct ll_sb_info *sbi;
+
+        ENTRY;
+
+        /* not init sb ?*/
+        if (!(sb->s_flags & MS_ACTIVE))
+                return;
+
+        sbi = ll_s2sbi(sb);
+        /* we need restore s_dev from changed for clustred NFS before put_super
+         * because new kernels have cached s_dev and change sb->s_dev in
+         * put_super not affected real removing devices */
+        if (sbi)
+                sb->s_dev = sbi->ll_sdev_orig;
         EXIT;
 }
 
@@ -803,6 +809,27 @@ static int ll_options(char *options, int *flags)
                         goto next;
                 }
 
+                tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM);
+                if (tmp) {
+                        *flags |= tmp;
+                        goto next;
+                }
+                tmp = ll_set_opt("nochecksum", s1, LL_SBI_CHECKSUM);
+                if (tmp) {
+                        *flags &= ~tmp;
+                        goto next;
+                }
+                tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
+                if (tmp) {
+                        *flags |= tmp;
+                        goto next;
+                }
+                tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
+                if (tmp) {
+                        *flags &= ~tmp;
+                        goto next;
+                }
+
                 LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
                                    s1);
                 RETURN(-EINVAL);
@@ -1108,7 +1135,8 @@ void ll_clear_inode(struct inode *inode)
         EXIT;
 }
 
-int ll_md_setattr(struct inode *inode, struct md_op_data *op_data)
+int ll_md_setattr(struct inode *inode, struct md_op_data *op_data,
+                  struct md_open_data **mod)
 {
         struct lustre_md md;
         struct ll_sb_info *sbi = ll_i2sbi(inode);
@@ -1122,7 +1150,7 @@ int ll_md_setattr(struct inode *inode, struct md_op_data *op_data)
                 RETURN(PTR_ERR(op_data));
 
         rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, 
-                        &request);
+                        &request, mod);
         if (rc) {
                 ptlrpc_req_finished(request);
                 if (rc == -ENOENT) {
@@ -1153,9 +1181,9 @@ int ll_md_setattr(struct inode *inode, struct md_op_data *op_data)
         rc = inode_setattr(inode, &op_data->op_attr);
 
         /* Extract epoch data if obtained. */
-        memcpy(&op_data->op_handle, &md.body->handle, sizeof(op_data->op_handle));
+        op_data->op_handle = md.body->handle;
         op_data->op_ioepoch = md.body->ioepoch;
-        
+
         ll_update_inode(inode, &md);
         ptlrpc_req_finished(request);
 
@@ -1164,7 +1192,8 @@ int ll_md_setattr(struct inode *inode, struct md_op_data *op_data)
 
 /* Close IO epoch and send Size-on-MDS attribute update. */
 static int ll_setattr_done_writing(struct inode *inode,
-                                   struct md_op_data *op_data)
+                                   struct md_op_data *op_data,
+                                   struct md_open_data *mod)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
         int rc = 0;
@@ -1178,12 +1207,11 @@ static int ll_setattr_done_writing(struct inode *inode,
                op_data->op_ioepoch, PFID(&lli->lli_fid));
 
         op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE;
-        /* XXX: pass och here for the recovery purpose. */
-        rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL);
+        rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod);
         if (rc == -EAGAIN) {
                 /* MDS has instructed us to obtain Size-on-MDS attribute
                  * from OSTs and send setattr to back to MDS. */
-                rc = ll_sizeonmds_update(inode, &op_data->op_handle,
+                rc = ll_sizeonmds_update(inode, mod, &op_data->op_handle,
                                          op_data->op_ioepoch);
         } else if (rc) {
                 CERROR("inode %lu mdc truncate failed: rc = %d\n",
@@ -1211,6 +1239,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct md_op_data *op_data = NULL;
+        struct md_open_data *mod = NULL;
         int ia_valid = attr->ia_valid;
         int rc = 0, rc1 = 0;
         ENTRY;
@@ -1277,22 +1306,22 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                 RETURN(-ENOMEM);
         
         memcpy(&op_data->op_attr, attr, sizeof(*attr));
-        
+
         /* Open epoch for truncate. */
         if (ia_valid & ATTR_SIZE)
                 op_data->op_flags = MF_EPOCH_OPEN;
-
-        rc = ll_md_setattr(inode, op_data);
+        
+        rc = ll_md_setattr(inode, op_data, &mod);
         if (rc)
                 GOTO(out, rc);
-        
+
         if (op_data->op_ioepoch)
                 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for "
                        "truncate\n", op_data->op_ioepoch, PFID(&lli->lli_fid));
 
         if (!lsm || !S_ISREG(inode->i_mode)) {
-                        CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
-                        GOTO(out, rc = 0);
+                CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
+                GOTO(out, rc = 0);
         }
 
         /* We really need to get our PW lock before we change inode->i_size.
@@ -1316,13 +1345,9 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                 UP_WRITE_I_ALLOC_SEM(inode);
                 rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy, &lockh,
                                     ast_flags);
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-                DOWN_WRITE_I_ALLOC_SEM(inode);
-                LOCK_INODE_MUTEX(inode);
-#else
                 LOCK_INODE_MUTEX(inode);
                 DOWN_WRITE_I_ALLOC_SEM(inode);
-#endif
+
                 if (rc != 0)
                         GOTO(out, rc);
 
@@ -1381,9 +1406,8 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
         EXIT;
 out:
         if (op_data) {
-                if (op_data->op_ioepoch) {
-                        rc1 = ll_setattr_done_writing(inode, op_data);
-                }
+                if (op_data->op_ioepoch)
+                        rc1 = ll_setattr_done_writing(inode, op_data, mod);
                 ll_finish_md_op_data(op_data);
         }
         return rc ? rc : rc1;
@@ -1428,6 +1452,7 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
                obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
                obd_osfs.os_files);
 
+        osfs->os_bsize = obd_osfs.os_bsize;
         osfs->os_blocks = obd_osfs.os_blocks;
         osfs->os_bfree = obd_osfs.os_bfree;
         osfs->os_bavail = obd_osfs.os_bavail;
@@ -1634,30 +1659,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
         if (body->valid & OBD_MD_FLNLINK)
                 inode->i_nlink = body->nlink;
         if (body->valid & OBD_MD_FLRDEV)
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-                inode->i_rdev = body->rdev;
-#else
                 inode->i_rdev = old_decode_dev(body->rdev);
-#endif
-        if (body->valid & OBD_MD_FLSIZE) {
-                if (ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM) {
-                        if (lli->lli_flags & (LLIF_DONE_WRITING |
-                                              LLIF_EPOCH_PENDING |
-                                              LLIF_SOM_DIRTY))
-                          CWARN("ino %lu flags %lu still has size authority!"
-                                "do not trust the size got from MDS\n", 
-                                inode->i_ino, lli->lli_flags);
-                        else {
-                                inode->i_size = body->size;
-                                lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
-                        }
-                } else {
-                        inode->i_size = body->size;
-                }
-
-                if (body->valid & OBD_MD_FLBLOCKS)
-                        inode->i_blocks = body->blocks;
-        }
 
         if (body->valid & OBD_MD_FLID) {
                 /* FID shouldn't be changed! */
@@ -1673,6 +1675,40 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 
         LASSERT(fid_seq(&lli->lli_fid) != 0);
 
+        if (body->valid & OBD_MD_FLSIZE) {
+                if ((ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM) &&
+                    S_ISREG(inode->i_mode) && lli->lli_smd) {
+                        struct lustre_handle lockh;
+                        ldlm_mode_t mode;
+                        
+                        /* As it is possible a blocking ast has been processed
+                         * by this time, we need to check there is an UPDATE 
+                         * lock on the client and set LLIF_MDS_SIZE_LOCK holding
+                         * it. */
+                        mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE,
+                                               &lockh);
+                        if (mode) {
+                                if (lli->lli_flags & (LLIF_DONE_WRITING |
+                                                      LLIF_EPOCH_PENDING |
+                                                      LLIF_SOM_DIRTY)) {
+                                        CERROR("ino %lu flags %lu still has "
+                                               "size authority! do not trust "
+                                               "the size got from MDS\n",
+                                               inode->i_ino, lli->lli_flags);
+                                } else {
+                                        i_size_write(inode, body->size);
+                                        lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
+                                }
+                                ldlm_lock_decref(&lockh, mode);
+                        }
+                } else {
+                        i_size_write(inode, body->size);
+                }
+
+                if (body->valid & OBD_MD_FLBLOCKS)
+                        inode->i_blocks = body->blocks;
+        }
+
         if (body->valid & OBD_MD_FLMDSCAPA) {
                 LASSERT(md->mds_capa);
                 ll_add_capa(inode, md->mds_capa);
@@ -1683,7 +1719,6 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
         }
 }
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 static struct backing_dev_info ll_backing_dev_info = {
         .ra_pages       = 0,    /* No readahead */
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12))
@@ -1692,7 +1727,6 @@ static struct backing_dev_info ll_backing_dev_info = {
         .memory_backed  = 0,    /* Does contribute to dirty memory */
 #endif
 };
-#endif
 
 void ll_read_inode2(struct inode *inode, void *opaque)
 {
@@ -1736,15 +1770,12 @@ void ll_read_inode2(struct inode *inode, void *opaque)
         } else {
                 inode->i_op = &ll_special_inode_operations;
 
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
                 init_special_inode(inode, inode->i_mode,
                                    kdev_t_to_nr(inode->i_rdev));
 
                 /* initializing backing dev info. */
                 inode->i_mapping->backing_dev_info = &ll_backing_dev_info;
-#else
-                init_special_inode(inode, inode->i_mode, inode->i_rdev);
-#endif
+
                 EXIT;
         }
 }
@@ -1788,9 +1819,6 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 
                 body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
                                       sizeof(*body));
-
-                /*Now the ext3 will be packed directly back to client,
-                 *no need convert here*/
                 flags = body->flags;
 
                 ptlrpc_req_finished (req);
@@ -1818,7 +1846,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
                 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags;
                 op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
                 rc = md_setattr(sbi->ll_md_exp, op_data,
-                                NULL, 0, NULL, 0, &req);
+                                NULL, 0, NULL, 0, &req, NULL);
                 ll_finish_md_op_data(op_data);
                 ptlrpc_req_finished(req);
                 if (rc || lsm == NULL) {