Whamcloud - gitweb
Revert "LU-1756 kernel: cleanup lustre_compat25.h"
[fs/lustre-release.git] / lustre / llite / llite_lib.c
index bdb94d9..c8bba12 100644 (file)
 cfs_mem_cache_t *ll_file_data_slab;
 
 CFS_LIST_HEAD(ll_super_blocks);
-cfs_spinlock_t ll_sb_lock = CFS_SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(ll_sb_lock);
 
 #ifndef MS_HAS_NEW_AOPS
 extern struct address_space_operations ll_aops;
-extern struct address_space_operations ll_dir_aops;
 #else
 extern struct address_space_operations_ext ll_aops;
-extern struct address_space_operations_ext ll_dir_aops;
 #endif
 
 #ifndef log2
@@ -75,8 +73,9 @@ extern struct address_space_operations_ext ll_dir_aops;
 
 static struct ll_sb_info *ll_init_sbi(void)
 {
-        struct ll_sb_info *sbi = NULL;
-        unsigned long pages;
+       struct ll_sb_info *sbi = NULL;
+       unsigned long pages;
+       unsigned long lru_page_max;
         struct sysinfo si;
         class_uuid_t uuid;
         int i;
@@ -96,13 +95,20 @@ static struct ll_sb_info *ll_init_sbi(void)
         pages = si.totalram - si.totalhigh;
         if (pages >> (20 - CFS_PAGE_SHIFT) < 512) {
 #ifdef HAVE_BGL_SUPPORT
-                sbi->ll_async_page_max = pages / 4;
+               lru_page_max = pages / 4;
 #else
-                sbi->ll_async_page_max = pages / 2;
+               lru_page_max = pages / 2;
 #endif
-        } else {
-                sbi->ll_async_page_max = (pages / 4) * 3;
-        }
+       } else {
+               lru_page_max = (pages / 4) * 3;
+       }
+
+       /* initialize lru data */
+       cfs_atomic_set(&sbi->ll_cache.ccc_users, 0);
+       sbi->ll_cache.ccc_lru_max = lru_page_max;
+       cfs_atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
+       cfs_spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
+       CFS_INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
 
         sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
                                            SBI_DEFAULT_READAHEAD_MAX);
@@ -227,6 +233,12 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 #ifdef CONFIG_FS_POSIX_ACL
         data->ocd_connect_flags |= OBD_CONNECT_ACL;
 #endif
+
+       if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT))
+               /* flag mdc connection as lightweight, only used for test
+                * purpose, use with care */
+                data->ocd_connect_flags |= OBD_CONNECT_LIGHTWEIGHT;
+
         data->ocd_ibits_known = MDS_INODELOCK_FULL;
         data->ocd_version = LUSTRE_VERSION_CODE;
 
@@ -463,7 +475,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid));
 
         sb->s_op = &lustre_super_operations;
-#if THREAD_SIZE >= 8192 /*b=17630*/ && !defined(HAVE_FSTYPE_MOUNT) /*LU-812*/
+#if THREAD_SIZE >= 8192 /*b=17630*/
         sb->s_export_op = &lustre_export_operations;
 #endif
 
@@ -539,7 +551,11 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
                                  NULL);
         cl_sb_init(sb);
 
-        sb->s_root = d_alloc_root(root);
+       err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
+                                KEY_CACHE_SET, sizeof(sbi->ll_cache),
+                                &sbi->ll_cache, NULL);
+
+       sb->s_root = d_alloc_root(root);
 #ifdef HAVE_DCACHE_LOCK
        sb->s_root->d_op = &ll_d_root_ops;
 #else
@@ -558,7 +574,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
         uuid = obd_get_uuid(sbi->ll_md_exp);
         if (uuid != NULL)
                 sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid));
-        sbi->ll_mnt = mnt;
 
         if (data != NULL)
                 OBD_FREE_PTR(data);
@@ -773,7 +788,7 @@ static int ll_options(char *options, int *flags)
                         *flags &= ~tmp;
                         goto next;
                 }
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2,5,50,0)
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 5, 50, 0)
                 tmp = ll_set_opt("acl", s1, LL_SBI_ACL);
                 if (tmp) {
                         /* Ignore deprecated mount option.  The client will
@@ -797,6 +812,16 @@ static int ll_options(char *options, int *flags)
                         *flags |= tmp;
                         goto next;
                 }
+               tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
+               if (tmp) {
+                       *flags |= tmp;
+                       goto next;
+               }
+               tmp = ll_set_opt("nouser_fid2path", s1, LL_SBI_USER_FID2PATH);
+               if (tmp) {
+                       *flags &= ~tmp;
+                       goto next;
+               }
 
                 tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM);
                 if (tmp) {
@@ -896,7 +921,6 @@ void ll_lli_init(struct ll_inode_info *lli)
                 cfs_mutex_init(&lli->lli_readdir_mutex);
                 lli->lli_opendir_key = NULL;
                 lli->lli_sai = NULL;
-                lli->lli_sa_pos = 0;
                 lli->lli_def_acl = NULL;
                 cfs_spin_lock_init(&lli->lli_sa_lock);
                 lli->lli_opendir_pid = 0;
@@ -906,13 +930,12 @@ void ll_lli_init(struct ll_inode_info *lli)
                 lli->lli_symlink_name = NULL;
                 cfs_init_rwsem(&lli->lli_trunc_sem);
                 cfs_mutex_init(&lli->lli_write_mutex);
-                lli->lli_async_rc = 0;
-                lli->lli_write_rc = 0;
-                cfs_init_rwsem(&lli->lli_glimpse_sem);
-                lli->lli_glimpse_time = 0;
-                CFS_INIT_LIST_HEAD(&lli->lli_agl_list);
-                lli->lli_agl_index = 0;
-        }
+               cfs_init_rwsem(&lli->lli_glimpse_sem);
+               lli->lli_glimpse_time = 0;
+               CFS_INIT_LIST_HEAD(&lli->lli_agl_list);
+               lli->lli_agl_index = 0;
+               lli->lli_async_rc = 0;
+       }
        cfs_mutex_init(&lli->lli_layout_mutex);
 }
 
@@ -982,7 +1005,7 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt)
            Use the address of the super itself.*/
         cfg->cfg_instance = sb;
         cfg->cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
-
+       cfg->cfg_callback = class_config_llog_handler;
         /* set up client obds */
         err = lustre_process_log(sb, profilenm, cfg);
         if (err < 0) {
@@ -1216,24 +1239,24 @@ int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
 
         rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0,
                         &request, mod);
-        if (rc) {
-                ptlrpc_req_finished(request);
-                if (rc == -ENOENT) {
-                        inode->i_nlink = 0;
-                        /* Unlinked special device node? Or just a race?
-                         * Pretend we done everything. */
-                        if (!S_ISREG(inode->i_mode) &&
-                            !S_ISDIR(inode->i_mode)) {
-                                ia_valid = op_data->op_attr.ia_valid;
-                                op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS;
-                                rc = simple_setattr(dentry, &op_data->op_attr);
-                                op_data->op_attr.ia_valid = ia_valid;
-                        }
-                } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) {
-                        CERROR("md_setattr fails: rc = %d\n", rc);
-                }
-                RETURN(rc);
-        }
+       if (rc) {
+               ptlrpc_req_finished(request);
+               if (rc == -ENOENT) {
+                       clear_nlink(inode);
+                       /* Unlinked special device node? Or just a race?
+                        * Pretend we done everything. */
+                       if (!S_ISREG(inode->i_mode) &&
+                           !S_ISDIR(inode->i_mode)) {
+                               ia_valid = op_data->op_attr.ia_valid;
+                               op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS;
+                               rc = simple_setattr(dentry, &op_data->op_attr);
+                               op_data->op_attr.ia_valid = ia_valid;
+                       }
+               } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) {
+                       CERROR("md_setattr fails: rc = %d\n", rc);
+               }
+               RETURN(rc);
+       }
 
         rc = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
                               sbi->ll_md_exp, &md);
@@ -1383,8 +1406,10 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr)
         int rc = 0, rc1 = 0;
         ENTRY;
 
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu valid %x\n", inode->i_ino,
-               attr->ia_valid);
+       CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu (%p) to %llu, valid %x\n",
+               inode->i_ino,
+               inode,i_size_read(inode),
+               attr->ia_valid);
 
         if (ia_valid & ATTR_SIZE) {
                 /* Check new size against VFS/VM file size limit and rlimit */
@@ -1440,12 +1465,12 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr)
 
        if (!S_ISDIR(inode->i_mode)) {
                if (ia_valid & ATTR_SIZE)
-                       UP_WRITE_I_ALLOC_SEM(inode);
+                       inode_dio_write_done(inode);
                mutex_unlock(&inode->i_mutex);
                cfs_down_write(&lli->lli_trunc_sem);
                mutex_lock(&inode->i_mutex);
                if (ia_valid & ATTR_SIZE)
-                       DOWN_WRITE_I_ALLOC_SEM(inode);
+                       inode_dio_wait(inode);
        }
 
        /* We need a steady stripe configuration for setattr to avoid
@@ -1718,16 +1743,16 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
         } else {
                 inode->i_blkbits = inode->i_sb->s_blocksize_bits;
         }
-        if (body->valid & OBD_MD_FLUID)
-                inode->i_uid = body->uid;
-        if (body->valid & OBD_MD_FLGID)
-                inode->i_gid = body->gid;
-        if (body->valid & OBD_MD_FLFLAGS)
-                inode->i_flags = ll_ext_to_inode_flags(body->flags);
-        if (body->valid & OBD_MD_FLNLINK)
-                inode->i_nlink = body->nlink;
-        if (body->valid & OBD_MD_FLRDEV)
-                inode->i_rdev = old_decode_dev(body->rdev);
+       if (body->valid & OBD_MD_FLUID)
+               inode->i_uid = body->uid;
+       if (body->valid & OBD_MD_FLGID)
+               inode->i_gid = body->gid;
+       if (body->valid & OBD_MD_FLFLAGS)
+               inode->i_flags = ll_ext_to_inode_flags(body->flags);
+       if (body->valid & OBD_MD_FLNLINK)
+               set_nlink(inode, body->nlink);
+       if (body->valid & OBD_MD_FLRDEV)
+               inode->i_rdev = old_decode_dev(body->rdev);
 
         if (body->valid & OBD_MD_FLID) {
                 /* FID shouldn't be changed! */
@@ -1830,7 +1855,6 @@ void ll_read_inode2(struct inode *inode, void *opaque)
         } else if (S_ISDIR(inode->i_mode)) {
                 inode->i_op = &ll_dir_inode_operations;
                 inode->i_fop = &ll_dir_operations;
-                inode->i_mapping->a_ops = (struct address_space_operations *)&ll_dir_aops;
                 EXIT;
         } else if (S_ISLNK(inode->i_mode)) {
                 inode->i_op = &ll_fast_symlink_inode_operations;
@@ -1838,8 +1862,8 @@ void ll_read_inode2(struct inode *inode, void *opaque)
         } else {
                 inode->i_op = &ll_special_inode_operations;
 
-                init_special_inode(inode, inode->i_mode,
-                                   kdev_t_to_nr(inode->i_rdev));
+               init_special_inode(inode, inode->i_mode,
+                                  inode->i_rdev);
 
                 EXIT;
         }
@@ -1942,17 +1966,17 @@ int ll_iocontrol(struct inode *inode, struct file *file,
                if (rc)
                        RETURN(rc);
 
-               OBDO_ALLOC(oinfo.oi_oa);
-               if (!oinfo.oi_oa)
-                       RETURN(-ENOMEM);
+               inode->i_flags = ll_ext_to_inode_flags(flags);
 
                lsm = ccc_inode_lsm_get(inode);
-               if (lsm == NULL) {
-                       inode->i_flags = ll_ext_to_inode_flags(flags);
-                       OBDO_FREE(oinfo.oi_oa);
+               if (lsm == NULL)
                        RETURN(0);
-               }
 
+               OBDO_ALLOC(oinfo.oi_oa);
+               if (!oinfo.oi_oa) {
+                       ccc_inode_lsm_put(inode, lsm);
+                       RETURN(-ENOMEM);
+               }
                oinfo.oi_md = lsm;
                 oinfo.oi_oa->o_id = lsm->lsm_object_id;
                 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
@@ -2002,7 +2026,6 @@ void ll_umount_begin(struct vfsmount *vfsmnt, int flags)
 void ll_umount_begin(struct super_block *sb)
 {
 #endif
-        struct lustre_sb_info *lsi = s2lsi(sb);
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         struct obd_device *obd;
         struct obd_ioctl_data *ioc_data;
@@ -2015,9 +2038,6 @@ void ll_umount_begin(struct super_block *sb)
         }
 #endif
 
-        /* Tell the MGC we got umount -f */
-        lsi->lsi_flags |= LSI_UMOUNT_FORCE;
-
         CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
                sb->s_count, atomic_read(&sb->s_active));
 
@@ -2152,8 +2172,8 @@ int ll_prep_inode(struct inode **inode,
        if (S_ISREG(md.body->mode) && sbi->ll_flags & LL_SBI_LAYOUT_LOCK &&
            md.lsm != NULL && !ll_have_md_lock(*inode, &ibits, LCK_MINMODE)) {
                CERROR("%s: inode "DFID" (%p) layout lock not granted.\n",
-                       ll_get_fsname(*inode), PFID(ll_inode2fid(*inode)),
-                       *inode);
+                       ll_get_fsname(sb, NULL, 0),
+                       PFID(ll_inode2fid(*inode)), *inode);
        }
 
 out:
@@ -2168,6 +2188,7 @@ int ll_obd_statfs(struct inode *inode, void *arg)
         char *buf = NULL;
         struct obd_ioctl_data *data = NULL;
         __u32 type;
+       __u32 flags;
         int len = 0, rc;
 
         if (!inode || !(sbi = ll_i2sbi(inode)))
@@ -2189,14 +2210,15 @@ int ll_obd_statfs(struct inode *inode, void *arg)
                 GOTO(out_statfs, rc = -EINVAL);
 
         memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
-        if (type == LL_STATFS_LMV)
+       if (type & LL_STATFS_LMV)
                 exp = sbi->ll_md_exp;
-        else if (type == LL_STATFS_LOV)
+       else if (type & LL_STATFS_LOV)
                 exp = sbi->ll_dt_exp;
         else
                 GOTO(out_statfs, rc = -ENODEV);
 
-        rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, NULL);
+       flags = (type & LL_STATFS_NODELAY) ? OBD_STATFS_NODELAY : 0;
+       rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, &flags);
         if (rc)
                 GOTO(out_statfs, rc);
 out_statfs:
@@ -2321,6 +2343,9 @@ int ll_show_options(struct seq_file *seq, struct vfsmount *vfs)
         if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
                 seq_puts(seq, ",lazystatfs");
 
+       if (sbi->ll_flags & LL_SBI_USER_FID2PATH)
+               seq_puts(seq, ",user_fid2path");
+
         RETURN(0);
 }
 
@@ -2349,3 +2374,81 @@ int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg)
 
         RETURN(0);
 }
+
+/**
+ * Get lustre file system name by \a sbi. If \a buf is provided(non-NULL), the
+ * fsname will be returned in this buffer; otherwise, a static buffer will be
+ * used to store the fsname and returned to caller.
+ */
+char *ll_get_fsname(struct super_block *sb, char *buf, int buflen)
+{
+       static char fsname_static[MTI_NAME_MAXLEN];
+       struct lustre_sb_info *lsi = s2lsi(sb);
+       char *ptr;
+       int len;
+
+       if (buf == NULL) {
+               /* this means the caller wants to use static buffer
+                * and it doesn't care about race. Usually this is
+                * in error reporting path */
+               buf = fsname_static;
+               buflen = sizeof(fsname_static);
+       }
+
+       len = strlen(lsi->lsi_lmd->lmd_profile);
+       ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-');
+       if (ptr && (strcmp(ptr, "-client") == 0))
+               len -= 7;
+
+       if (unlikely(len >= buflen))
+               len = buflen - 1;
+       strncpy(buf, lsi->lsi_lmd->lmd_profile, len);
+       buf[len] = '\0';
+
+       return buf;
+}
+
+static char* ll_d_path(struct dentry *dentry, char *buf, int bufsize)
+{
+       char *path = NULL;
+
+#ifdef HAVE_FS_STRUCT_USE_PATH
+       struct path p;
+
+       p.dentry = dentry;
+       p.mnt = current->fs->root.mnt;
+       path_get(&p);
+       path = d_path(&p, buf, bufsize);
+       path_put(&p);
+#else
+       path = d_path(dentry, current->fs->rootmnt, buf, bufsize);
+#endif
+
+       return path;
+}
+
+void ll_dirty_page_discard_warn(cfs_page_t *page, int ioret)
+{
+       char *buf, *path = NULL;
+       struct dentry *dentry = NULL;
+       struct ccc_object *obj = cl_inode2ccc(page->mapping->host);
+
+       buf = (char *)__get_free_page(GFP_KERNEL);
+       if (buf != NULL) {
+               dentry = d_find_alias(page->mapping->host);
+               if (dentry != NULL)
+                       path = ll_d_path(dentry, buf, PAGE_SIZE);
+       }
+
+       CWARN("%s: dirty page discard: %s/fid: "DFID"/%s may get corrupted "
+             "(rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0),
+             s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
+             PFID(&obj->cob_header.coh_lu.loh_fid),
+             (path && !IS_ERR(path)) ? path : "", ioret);
+
+       if (dentry != NULL)
+               dput(dentry);
+
+       if (buf != NULL)
+               free_page((unsigned long)buf);
+}