X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Fllite_lib.c;h=76e0f3b2b396123c8522973aecb7af3b9d301328;hp=39db99eee2b26ad5ff217a657bd5e1cca53283e0;hb=cb85c0364fd8323f4bb03c481660805da66aaf85;hpb=98060d83459ba10409f295898f0ec917f938b4d3 diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 39db99e..76e0f3b 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -45,6 +45,7 @@ #include #include +#include #include #include #include @@ -56,16 +57,10 @@ #include #include "llite_internal.h" -cfs_mem_cache_t *ll_file_data_slab; +struct kmem_cache *ll_file_data_slab; -CFS_LIST_HEAD(ll_super_blocks); -DEFINE_SPINLOCK(ll_sb_lock); - -#ifndef MS_HAS_NEW_AOPS -extern struct address_space_operations ll_aops; -#else -extern struct address_space_operations_ext ll_aops; -#endif +static struct list_head ll_super_blocks = LIST_HEAD_INIT(ll_super_blocks); +static DEFINE_SPINLOCK(ll_sb_lock); #ifndef log2 #define log2(n) ffz(~(n)) @@ -76,14 +71,14 @@ static struct ll_sb_info *ll_init_sbi(void) struct ll_sb_info *sbi = NULL; unsigned long pages; unsigned long lru_page_max; - struct sysinfo si; - class_uuid_t uuid; - int i; - ENTRY; + struct sysinfo si; + class_uuid_t uuid; + int i; + ENTRY; - OBD_ALLOC(sbi, sizeof(*sbi)); - if (!sbi) - RETURN(NULL); + OBD_ALLOC_PTR(sbi); + if (sbi == NULL) + RETURN(NULL); spin_lock_init(&sbi->ll_lock); mutex_init(&sbi->ll_lco.lco_lock); @@ -93,36 +88,34 @@ static struct ll_sb_info *ll_init_sbi(void) si_meminfo(&si); pages = si.totalram - si.totalhigh; - if (pages >> (20 - CFS_PAGE_SHIFT) < 512) { - lru_page_max = pages / 2; - } else { - lru_page_max = (pages / 4) * 3; - } + lru_page_max = pages / 2; /* initialize ll_cache data */ - cfs_atomic_set(&sbi->ll_cache.ccc_users, 0); + atomic_set(&sbi->ll_cache.ccc_users, 0); sbi->ll_cache.ccc_lru_max = lru_page_max; - cfs_atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max); + atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max); spin_lock_init(&sbi->ll_cache.ccc_lru_lock); - CFS_INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru); + INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru); - cfs_atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0); - cfs_waitq_init(&sbi->ll_cache.ccc_unstable_waitq); + /* turn unstable check off by default as it impacts performance */ + sbi->ll_cache.ccc_unstable_check = 0; + atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0); + init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq); - sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32, - SBI_DEFAULT_READAHEAD_MAX); - sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file; - sbi->ll_ra_info.ra_max_read_ahead_whole_pages = - SBI_DEFAULT_READAHEAD_WHOLE_MAX; - CFS_INIT_LIST_HEAD(&sbi->ll_conn_chain); - CFS_INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); + sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32, + SBI_DEFAULT_READAHEAD_MAX); + sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file; + sbi->ll_ra_info.ra_max_read_ahead_whole_pages = + SBI_DEFAULT_READAHEAD_WHOLE_MAX; + INIT_LIST_HEAD(&sbi->ll_conn_chain); + INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); ll_generate_random_uuid(uuid); class_uuid_unparse(uuid, &sbi->ll_sb_uuid); CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid); spin_lock(&ll_sb_lock); - cfs_list_add_tail(&sbi->ll_list, &ll_super_blocks); + list_add_tail(&sbi->ll_list, &ll_super_blocks); spin_unlock(&ll_sb_lock); sbi->ll_flags |= LL_SBI_VERBOSE; @@ -141,35 +134,38 @@ static struct ll_sb_info *ll_init_sbi(void) pp_w_hist.oh_lock); } - /* metadata statahead is enabled by default */ - sbi->ll_sa_max = LL_SA_RPC_DEF; - cfs_atomic_set(&sbi->ll_sa_total, 0); - cfs_atomic_set(&sbi->ll_sa_wrong, 0); - cfs_atomic_set(&sbi->ll_agl_total, 0); - sbi->ll_flags |= LL_SBI_AGL_ENABLED; + /* metadata statahead is enabled by default */ + sbi->ll_sa_max = LL_SA_RPC_DEF; + atomic_set(&sbi->ll_sa_total, 0); + atomic_set(&sbi->ll_sa_wrong, 0); + atomic_set(&sbi->ll_agl_total, 0); + sbi->ll_flags |= LL_SBI_AGL_ENABLED; + + /* root squash */ + sbi->ll_squash.rsi_uid = 0; + sbi->ll_squash.rsi_gid = 0; + INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids); + init_rwsem(&sbi->ll_squash.rsi_sem); - RETURN(sbi); + RETURN(sbi); } -void ll_free_sbi(struct super_block *sb) +static void ll_free_sbi(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); ENTRY; if (sbi != NULL) { spin_lock(&ll_sb_lock); - cfs_list_del(&sbi->ll_list); + list_del(&sbi->ll_list); spin_unlock(&ll_sb_lock); + if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids)) + cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids); OBD_FREE(sbi, sizeof(*sbi)); } EXIT; } -static struct dentry_operations ll_d_root_ops = { - .d_compare = ll_dcompare, - .d_revalidate = ll_revalidate_nd, -}; - static int client_common_fill_super(struct super_block *sb, char *md, char *dt, struct vfsmount *mnt) { @@ -203,13 +199,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, RETURN(-ENOMEM); } - if (proc_lustre_fs_root) { - err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb, - dt, md); - if (err < 0) - CERROR("could not register mount in /proc/fs/lustre\n"); - } - /* indicate the features supported by this client */ data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID | @@ -221,7 +210,11 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH| OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | - OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS; + OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS | + OBD_CONNECT_MAX_EASIZE | + OBD_CONNECT_FLOCK_DEAD | + OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK | + OBD_CONNECT_OPEN_BY_FID; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; @@ -312,15 +305,15 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, valid != CLIENT_CONNECT_MDT_REQD) { char *buf; - OBD_ALLOC_WAIT(buf, CFS_PAGE_SIZE); - obd_connect_flags2str(buf, CFS_PAGE_SIZE, + OBD_ALLOC_WAIT(buf, PAGE_CACHE_SIZE); + obd_connect_flags2str(buf, PAGE_CACHE_SIZE, valid ^ CLIENT_CONNECT_MDT_REQD, ","); LCONSOLE_ERROR_MSG(0x170, "Server %s does not support " "feature(s) needed for correct operation " "of this client (%s). Please upgrade " "server or downgrade client.\n", sbi->ll_md_exp->exp_obd->obd_name, buf); - OBD_FREE(buf, CFS_PAGE_SIZE); + OBD_FREE(buf, PAGE_CACHE_SIZE); GOTO(out_md_fid, err = -EPROTO); } @@ -388,13 +381,21 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, sbi->ll_flags |= LL_SBI_64BIT_HASH; if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) - sbi->ll_md_brw_size = data->ocd_brw_size; + sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_CACHE_SHIFT; else - sbi->ll_md_brw_size = CFS_PAGE_SIZE; + sbi->ll_md_brw_pages = 1; - if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) { - LCONSOLE_INFO("Layout lock feature supported.\n"); + if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) sbi->ll_flags |= LL_SBI_LAYOUT_LOCK; + + if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) { + if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) { + LCONSOLE_INFO("%s: disabling xattr cache due to " + "unknown maximum xattr size.\n", dt); + } else { + sbi->ll_flags |= LL_SBI_XATTR_CACHE; + sbi->ll_xattr_cache_enabled = 1; + } } obd = class_name2obd(dt); @@ -413,7 +414,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_MAXBYTES | OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | - OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS; + OBD_CONNECT_LAYOUTLOCK | + OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; @@ -497,7 +499,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, /* make root inode * XXX: move this to after cbd setup? */ - valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMDSCAPA; + valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMDSCAPA | + OBD_MD_FLMODEASIZE; if (sbi->ll_flags & LL_SBI_RMT_CLIENT) valid |= OBD_MD_FLRMTPERM; else if (sbi->ll_flags & LL_SBI_ACL) @@ -532,14 +535,14 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, LASSERT(fid_is_sane(&sbi->ll_root_fid)); root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid, - ll_need_32bit_api(sbi)), + sbi->ll_flags & LL_SBI_32BIT_API), &lmd); md_free_lustre_md(sbi->ll_md_exp, &lmd); ptlrpc_req_finished(request); - if (root == NULL || IS_ERR(root)) { - if (lmd.lsm) - obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm); + if (IS_ERR(root)) { + if (lmd.lsm) + obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm); #ifdef CONFIG_FS_POSIX_ACL if (lmd.posix_acl) { posix_acl_release(lmd.posix_acl); @@ -581,13 +584,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, ll_get_fsname(sb, NULL, 0)); GOTO(out_root, err = -ENOMEM); } - #ifdef HAVE_DCACHE_LOCK - sb->s_root->d_op = &ll_d_root_ops; -#else - /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */ - d_set_d_op(sb->s_root, &ll_d_root_ops); - sb->s_d_op = &ll_d_ops; + sb->s_root->d_op = &ll_d_ops; #endif sbi->ll_sdev_orig = sb->s_dev; @@ -598,13 +596,22 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, /* s_dev is also used in lt_compare() to compare two fs, but that is * only a node-local comparison. */ uuid = obd_get_uuid(sbi->ll_md_exp); - if (uuid != NULL) - sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid)); + if (uuid != NULL) + sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid)); if (data != NULL) OBD_FREE_PTR(data); if (osfs != NULL) OBD_FREE_PTR(osfs); + if (proc_lustre_fs_root != NULL) { + err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb, + dt, md); + if (err < 0) { + CERROR("%s: could not register mount in lprocfs: " + "rc = %d\n", ll_get_fsname(sb, NULL, 0), err); + err = 0; + } + } RETURN(err); out_root: @@ -627,25 +634,63 @@ out: OBD_FREE_PTR(data); if (osfs != NULL) OBD_FREE_PTR(osfs); - lprocfs_unregister_mountpoint(sbi); return err; } int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize) { - int size, rc; + int size, rc; - *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL); - size = sizeof(int); - rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE), - KEY_MAX_EASIZE, &size, lmmsize, NULL); - if (rc) - CERROR("Get max mdsize error rc %d \n", rc); + *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL); + size = sizeof(int); + rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE), + KEY_MAX_EASIZE, &size, lmmsize, NULL); + if (rc) + CERROR("Get max mdsize error rc %d\n", rc); - RETURN(rc); + RETURN(rc); +} + +int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize) +{ + int size, rc; + + size = sizeof(int); + rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE), + KEY_DEFAULT_EASIZE, &size, lmmsize, NULL); + if (rc) + CERROR("Get default mdsize error rc %d\n", rc); + + RETURN(rc); +} + +int ll_get_max_cookiesize(struct ll_sb_info *sbi, int *lmmsize) +{ + int size, rc; + + size = sizeof(int); + rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_COOKIESIZE), + KEY_MAX_COOKIESIZE, &size, lmmsize, NULL); + if (rc) + CERROR("Get max cookiesize error rc %d\n", rc); + + RETURN(rc); +} + +int ll_get_default_cookiesize(struct ll_sb_info *sbi, int *lmmsize) +{ + int size, rc; + + size = sizeof(int); + rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_COOKIESIZE), + KEY_DEFAULT_COOKIESIZE, &size, lmmsize, NULL); + if (rc) + CERROR("Get default cookiesize error rc %d\n", rc); + + RETURN(rc); } -void ll_dump_inode(struct inode *inode) +static void ll_dump_inode(struct inode *inode) { struct ll_d_hlist_node *tmp; int dentry_count = 0; @@ -655,8 +700,10 @@ void ll_dump_inode(struct inode *inode) ll_d_hlist_for_each(tmp, &inode->i_dentry) dentry_count++; - CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n", - inode, ll_i2mdexp(inode)->exp_obd->obd_name, inode->i_ino, + CERROR("%s: inode %p dump: dev=%s fid="DFID + " mode=%o count=%u, %d dentries\n", + ll_get_fsname(inode->i_sb, NULL, 0), inode, + ll_i2mdexp(inode)->exp_obd->obd_name, PFID(ll_inode2fid(inode)), inode->i_mode, atomic_read(&inode->i_count), dentry_count); } @@ -674,7 +721,7 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) " flags=0x%x, fsdata=%p, %d subdirs\n", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, - dentry->d_parent, dentry->d_inode, d_refcount(dentry), + dentry->d_parent, dentry->d_inode, d_count(dentry), dentry->d_flags, dentry->d_fsdata, subdirs); if (dentry->d_inode != NULL) ll_dump_inode(dentry->d_inode); @@ -688,7 +735,7 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) } } -void client_common_put_super(struct super_block *sb) +static void client_common_put_super(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); ENTRY; @@ -704,7 +751,7 @@ void client_common_put_super(struct super_block *sb) cl_sb_fini(sb); - cfs_list_del(&sbi->ll_conn_chain); + list_del(&sbi->ll_conn_chain); obd_fid_fini(sbi->ll_dt_exp->exp_obd); obd_disconnect(sbi->ll_dt_exp); @@ -743,30 +790,6 @@ void ll_kill_super(struct super_block *sb) EXIT; } -char *ll_read_opt(const char *opt, char *data) -{ - char *value; - char *retval; - ENTRY; - - CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); - if (strncmp(opt, data, strlen(opt))) - RETURN(NULL); - if ((value = strchr(data, '=')) == NULL) - RETURN(NULL); - - value++; - OBD_ALLOC(retval, strlen(value) + 1); - if (!retval) { - CERROR("out of memory!\n"); - RETURN(NULL); - } - - memcpy(retval, value, strlen(value)+1); - CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval); - RETURN(retval); -} - static inline int ll_set_opt(const char *opt, char *data, int fl) { if (strncmp(opt, data, strlen(opt)) != 0) @@ -819,22 +842,22 @@ static int ll_options(char *options, int *flags) *flags &= ~tmp; goto next; } -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 5, 50, 0) - tmp = ll_set_opt("acl", s1, LL_SBI_ACL); - if (tmp) { - /* Ignore deprecated mount option. The client will - * always try to mount with ACL support, whether this - * is used depends on whether server supports it. */ - LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated " - "mount option 'acl'.\n"); - goto next; - } - tmp = ll_set_opt("noacl", s1, LL_SBI_ACL); - if (tmp) { - LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated " - "mount option 'noacl'.\n"); - goto next; - } +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 51, 0) + tmp = ll_set_opt("acl", s1, LL_SBI_ACL); + if (tmp) { + /* Ignore deprecated mount option. The client will + * always try to mount with ACL support, whether this + * is used depends on whether server supports it. */ + LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated " + "mount option 'acl'.\n"); + goto next; + } + tmp = ll_set_opt("noacl", s1, LL_SBI_ACL); + if (tmp) { + LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated " + "mount option 'noacl'.\n"); + goto next; + } #else #warning "{no}acl options have been deprecated since 1.8, please remove them" #endif @@ -928,15 +951,15 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_posix_acl = NULL; lli->lli_remote_perms = NULL; mutex_init(&lli->lli_rmtperm_mutex); - /* Do not set lli_fid, it has been initialized already. */ - fid_zero(&lli->lli_pfid); - CFS_INIT_LIST_HEAD(&lli->lli_close_list); - CFS_INIT_LIST_HEAD(&lli->lli_oss_capas); - cfs_atomic_set(&lli->lli_open_count, 0); - lli->lli_mds_capa = NULL; - lli->lli_rmtperm_time = 0; - lli->lli_pending_och = NULL; - lli->lli_mds_read_och = NULL; + /* Do not set lli_fid, it has been initialized already. */ + fid_zero(&lli->lli_pfid); + INIT_LIST_HEAD(&lli->lli_close_list); + INIT_LIST_HEAD(&lli->lli_oss_capas); + atomic_set(&lli->lli_open_count, 0); + lli->lli_mds_capa = NULL; + lli->lli_rmtperm_time = 0; + lli->lli_pending_och = NULL; + lli->lli_mds_read_och = NULL; lli->lli_mds_write_och = NULL; lli->lli_mds_exec_och = NULL; lli->lli_open_fd_read_count = 0; @@ -945,46 +968,41 @@ void ll_lli_init(struct ll_inode_info *lli) mutex_init(&lli->lli_och_mutex); spin_lock_init(&lli->lli_agl_lock); lli->lli_has_smd = false; - lli->lli_layout_gen = LL_LAYOUT_GEN_NONE; + spin_lock_init(&lli->lli_layout_lock); + ll_layout_version_set(lli, LL_LAYOUT_GEN_NONE); lli->lli_clob = NULL; + init_rwsem(&lli->lli_xattrs_list_rwsem); + mutex_init(&lli->lli_xattrs_enq_lock); + LASSERT(lli->lli_vfs_inode.i_mode != 0); if (S_ISDIR(lli->lli_vfs_inode.i_mode)) { mutex_init(&lli->lli_readdir_mutex); lli->lli_opendir_key = NULL; lli->lli_sai = NULL; - lli->lli_def_acl = NULL; spin_lock_init(&lli->lli_sa_lock); lli->lli_opendir_pid = 0; } else { - sema_init(&lli->lli_size_sem, 1); - lli->lli_size_sem_owner = NULL; + mutex_init(&lli->lli_size_mutex); lli->lli_symlink_name = NULL; init_rwsem(&lli->lli_trunc_sem); mutex_init(&lli->lli_write_mutex); init_rwsem(&lli->lli_glimpse_sem); lli->lli_glimpse_time = 0; - CFS_INIT_LIST_HEAD(&lli->lli_agl_list); + INIT_LIST_HEAD(&lli->lli_agl_list); lli->lli_agl_index = 0; lli->lli_async_rc = 0; - lli->lli_volatile = false; } mutex_init(&lli->lli_layout_mutex); } static inline int ll_bdi_register(struct backing_dev_info *bdi) { -#ifdef HAVE_BDI_REGISTER - static atomic_t ll_bdi_num = ATOMIC_INIT(0); + static atomic_t ll_bdi_num = ATOMIC_INIT(0); -#ifdef HAVE_BDI_NAME - bdi->name = "lustre"; -#endif - return bdi_register(bdi, NULL, "lustre-%d", - atomic_inc_return(&ll_bdi_num)); -#else - return 0; -#endif + bdi->name = "lustre"; + return bdi_register(bdi, NULL, "lustre-%d", + atomic_inc_return(&ll_bdi_num)); } int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) @@ -1006,15 +1024,15 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) if (cfg == NULL) RETURN(-ENOMEM); - cfs_module_get(); + try_module_get(THIS_MODULE); - /* client additional sb info */ - lsi->lsi_llsbi = sbi = ll_init_sbi(); - if (!sbi) { - cfs_module_put(THIS_MODULE); - OBD_FREE_PTR(cfg); - RETURN(-ENOMEM); - } + /* client additional sb info */ + lsi->lsi_llsbi = sbi = ll_init_sbi(); + if (!sbi) { + module_put(THIS_MODULE); + OBD_FREE_PTR(cfg); + RETURN(-ENOMEM); + } err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags); if (err) @@ -1029,8 +1047,10 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) if (err) GOTO(out_free, err); -#ifdef HAVE_SB_BDI sb->s_bdi = &lsi->lsi_bdi; +#ifndef HAVE_DCACHE_LOCK + /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */ + sb->s_d_op = &ll_d_ops; #endif /* Generate a string unique to this super, in case some joker tries @@ -1041,10 +1061,8 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) cfg->cfg_callback = class_config_llog_handler; /* set up client obds */ err = lustre_process_log(sb, profilenm, cfg); - if (err < 0) { - CERROR("Unable to process log: %d\n", err); - GOTO(out_free, err); - } + if (err < 0) + GOTO(out_free, err); /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */ lprof = class_get_profile(profilenm); @@ -1084,12 +1102,9 @@ out_free: RETURN(err); } /* ll_fill_super */ - -void lu_context_keys_dump(void); - void ll_put_super(struct super_block *sb) { - struct config_llog_instance cfg; + struct config_llog_instance cfg, params_cfg; struct obd_device *obd; struct lustre_sb_info *lsi = s2lsi(sb); struct ll_sb_info *sbi = ll_s2sbi(sb); @@ -1104,6 +1119,9 @@ void ll_put_super(struct super_block *sb) cfg.cfg_instance = sb; lustre_end_log(sb, profilenm, &cfg); + params_cfg.cfg_instance = sb; + lustre_end_log(sb, PARAMS_FILENAME, ¶ms_cfg); + if (sbi->ll_md_exp) { obd = class_exp2obd(sbi->ll_md_exp); if (obd) @@ -1114,11 +1132,11 @@ void ll_put_super(struct super_block *sb) if (force == 0) { struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq, - cfs_atomic_read(&sbi->ll_cache.ccc_unstable_nr) == 0, + atomic_read(&sbi->ll_cache.ccc_unstable_nr) == 0, &lwi); } - ccc_count = cfs_atomic_read(&sbi->ll_cache.ccc_unstable_nr); + ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr); if (force == 0 && rc != -EINTR) LASSERTF(ccc_count == 0, "count: %i\n", ccc_count); @@ -1157,11 +1175,13 @@ void ll_put_super(struct super_block *sb) ll_free_sbi(sb); lsi->lsi_llsbi = NULL; - lustre_common_put_super(sb); + lustre_common_put_super(sb); - cfs_module_put(THIS_MODULE); + cl_env_cache_purge(~0); - EXIT; + module_put(THIS_MODULE); + + EXIT; } /* client_put_super */ struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock) @@ -1189,26 +1209,210 @@ struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock) return inode; } -struct inode *ll_inode_from_lock(struct ldlm_lock *lock) +static void ll_dir_clear_lsm_md(struct inode *inode) { - struct inode *inode = NULL; - /* NOTE: we depend on atomic igrab() -bzzz */ - lock_res_and_lock(lock); - if (lock->l_ast_data) { - struct ll_inode_info *lli = ll_i2info(lock->l_ast_data); - if (lli->lli_inode_magic == LLI_INODE_MAGIC) { - inode = igrab(lock->l_ast_data); - } else { - inode = lock->l_ast_data; - LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ? D_INFO : - D_WARNING, lock, "l_ast_data %p is " - "bogus: magic %08x", lock->l_ast_data, - lli->lli_inode_magic); - inode = NULL; - } - } - unlock_res_and_lock(lock); - return inode; + struct ll_inode_info *lli = ll_i2info(inode); + + LASSERT(S_ISDIR(inode->i_mode)); + + if (lli->lli_lsm_md != NULL) { + lmv_free_memmd(lli->lli_lsm_md); + lli->lli_lsm_md = NULL; + } +} + +static struct inode *ll_iget_anon_dir(struct super_block *sb, + const struct lu_fid *fid, + struct lustre_md *md) +{ + struct ll_sb_info *sbi = ll_s2sbi(sb); + struct mdt_body *body = md->body; + struct inode *inode; + ino_t ino; + ENTRY; + + ino = cl_fid_build_ino(fid, sbi->ll_flags & LL_SBI_32BIT_API); + inode = iget_locked(sb, ino); + if (inode == NULL) { + CERROR("%s: failed get simple inode "DFID": rc = -ENOENT\n", + ll_get_fsname(sb, NULL, 0), PFID(fid)); + RETURN(ERR_PTR(-ENOENT)); + } + + if (inode->i_state & I_NEW) { + struct ll_inode_info *lli = ll_i2info(inode); + struct lmv_stripe_md *lsm = md->lmv; + + inode->i_mode = (inode->i_mode & ~S_IFMT) | + (body->mbo_mode & S_IFMT); + LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode "DFID"\n", + PFID(fid)); + + LTIME_S(inode->i_mtime) = 0; + LTIME_S(inode->i_atime) = 0; + LTIME_S(inode->i_ctime) = 0; + inode->i_rdev = 0; + + /* initializing backing dev info. */ + inode->i_mapping->backing_dev_info = + &s2lsi(inode->i_sb)->lsi_bdi; + inode->i_op = &ll_dir_inode_operations; + inode->i_fop = &ll_dir_operations; + lli->lli_fid = *fid; + ll_lli_init(lli); + + LASSERT(lsm != NULL); + /* master object FID */ + lli->lli_pfid = body->mbo_fid1; + CDEBUG(D_INODE, "lli %p slave "DFID" master "DFID"\n", + lli, PFID(fid), PFID(&lli->lli_pfid)); + unlock_new_inode(inode); + } + + RETURN(inode); +} + +static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md) +{ + struct lu_fid *fid; + struct lmv_stripe_md *lsm = md->lmv; + int i; + + LASSERT(lsm != NULL); + /* XXX sigh, this lsm_root initialization should be in + * LMV layer, but it needs ll_iget right now, so we + * put this here right now. */ + for (i = 0; i < lsm->lsm_md_stripe_count; i++) { + fid = &lsm->lsm_md_oinfo[i].lmo_fid; + LASSERT(lsm->lsm_md_oinfo[i].lmo_root == NULL); + /* Unfortunately ll_iget will call ll_update_inode, + * where the initialization of slave inode is slightly + * different, so it reset lsm_md to NULL to avoid + * initializing lsm for slave inode. */ + /* For migrating inode, master stripe and master object will + * be same, so we only need assign this inode */ + if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && i == 0) + lsm->lsm_md_oinfo[i].lmo_root = inode; + else + lsm->lsm_md_oinfo[i].lmo_root = + ll_iget_anon_dir(inode->i_sb, fid, md); + + if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) { + int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root); + + lsm->lsm_md_oinfo[i].lmo_root = NULL; + return rc; + } + } + + /* Here is where the lsm is being initialized(fill lmo_info) after + * client retrieve MD stripe information from MDT. */ + return md_update_lsm_md(ll_i2mdexp(inode), lsm, md->body, + ll_md_blocking_ast); +} + +static inline int lli_lsm_md_eq(const struct lmv_stripe_md *lsm_md1, + const struct lmv_stripe_md *lsm_md2) +{ + return lsm_md1->lsm_md_magic == lsm_md2->lsm_md_magic && + lsm_md1->lsm_md_stripe_count == lsm_md2->lsm_md_stripe_count && + lsm_md1->lsm_md_master_mdt_index == + lsm_md2->lsm_md_master_mdt_index && + lsm_md1->lsm_md_hash_type == lsm_md2->lsm_md_hash_type && + lsm_md1->lsm_md_layout_version == + lsm_md2->lsm_md_layout_version && + strcmp(lsm_md1->lsm_md_pool_name, + lsm_md2->lsm_md_pool_name) == 0; +} + +static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct lmv_stripe_md *lsm = md->lmv; + int rc; + ENTRY; + + LASSERT(S_ISDIR(inode->i_mode)); + CDEBUG(D_INODE, "update lsm %p of "DFID"\n", lli->lli_lsm_md, + PFID(ll_inode2fid(inode))); + + /* no striped information from request. */ + if (lsm == NULL) { + if (lli->lli_lsm_md == NULL) { + RETURN(0); + } else if (lli->lli_lsm_md->lsm_md_hash_type & + LMV_HASH_FLAG_MIGRATION) { + /* migration is done, the temporay MIGRATE layout has + * been removed */ + CDEBUG(D_INODE, DFID" finish migration.\n", + PFID(ll_inode2fid(inode))); + lmv_free_memmd(lli->lli_lsm_md); + lli->lli_lsm_md = NULL; + RETURN(0); + } else { + /* The lustre_md from req does not include stripeEA, + * see ll_md_setattr */ + RETURN(0); + } + } + + /* set the directory layout */ + if (lli->lli_lsm_md == NULL) { + + rc = ll_init_lsm_md(inode, md); + if (rc != 0) + RETURN(rc); + + lli->lli_lsm_md = lsm; + /* set lsm_md to NULL, so the following free lustre_md + * will not free this lsm */ + md->lmv = NULL; + CDEBUG(D_INODE, "Set lsm %p magic %x to "DFID"\n", lsm, + lsm->lsm_md_magic, PFID(ll_inode2fid(inode))); + RETURN(0); + } + + /* Compare the old and new stripe information */ + if (!lsm_md_eq(lli->lli_lsm_md, lsm)) { + struct lmv_stripe_md *old_lsm = lli->lli_lsm_md; + int idx; + + CERROR("%s: lmv layout mismatch "DFID"(%p)/"DFID"(%p)" + "magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d" + "hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n", + ll_get_fsname(inode->i_sb, NULL, 0), + PFID(&lsm->lsm_md_master_fid), lsm, + PFID(&old_lsm->lsm_md_master_fid), old_lsm, + lsm->lsm_md_magic, old_lsm->lsm_md_magic, + lsm->lsm_md_stripe_count, + old_lsm->lsm_md_stripe_count, + lsm->lsm_md_master_mdt_index, + old_lsm->lsm_md_master_mdt_index, + lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type, + lsm->lsm_md_layout_version, + old_lsm->lsm_md_layout_version, + lsm->lsm_md_pool_name, + old_lsm->lsm_md_pool_name); + + for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++) { + CERROR("%s: sub FIDs in old lsm idx %d, old: "DFID"\n", + ll_get_fsname(inode->i_sb, NULL, 0), idx, + PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid)); + } + + for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++) { + CERROR("%s: sub FIDs in new lsm idx %d, new: "DFID"\n", + ll_get_fsname(inode->i_sb, NULL, 0), idx, + PFID(&lsm->lsm_md_oinfo[idx].lmo_fid)); + } + + RETURN(-EIO); + } + + rc = md_update_lsm_md(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md, + md->body, ll_md_blocking_ast); + + RETURN(rc); } void ll_clear_inode(struct inode *inode) @@ -1217,8 +1421,8 @@ void ll_clear_inode(struct inode *inode) struct ll_sb_info *sbi = ll_i2sbi(inode); ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", + PFID(ll_inode2fid(inode)), inode); if (S_ISDIR(inode->i_mode)) { /* these should have been cleared in ll_file_release */ @@ -1227,7 +1431,9 @@ void ll_clear_inode(struct inode *inode) LASSERT(lli->lli_opendir_pid == 0); } + spin_lock(&lli->lli_lock); ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK; + spin_unlock(&lli->lli_lock); md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode)); LASSERT(!lli->lli_open_fd_write_count); @@ -1247,32 +1453,36 @@ void ll_clear_inode(struct inode *inode) lli->lli_symlink_name = NULL; } - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - LASSERT(lli->lli_posix_acl == NULL); - if (lli->lli_remote_perms) { - free_rmtperm_hash(lli->lli_remote_perms); - lli->lli_remote_perms = NULL; - } - } + ll_xattr_cache_destroy(inode); + + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { + LASSERT(lli->lli_posix_acl == NULL); + if (lli->lli_remote_perms) { + free_rmtperm_hash(lli->lli_remote_perms); + lli->lli_remote_perms = NULL; + } + } #ifdef CONFIG_FS_POSIX_ACL - else if (lli->lli_posix_acl) { - LASSERT(cfs_atomic_read(&lli->lli_posix_acl->a_refcount) == 1); - LASSERT(lli->lli_remote_perms == NULL); - posix_acl_release(lli->lli_posix_acl); - lli->lli_posix_acl = NULL; - } + else if (lli->lli_posix_acl) { + LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1); + LASSERT(lli->lli_remote_perms == NULL); + posix_acl_release(lli->lli_posix_acl); + lli->lli_posix_acl = NULL; + } #endif - lli->lli_inode_magic = LLI_INODE_DEAD; - - ll_clear_inode_capas(inode); - if (!S_ISDIR(inode->i_mode)) - LASSERT(cfs_list_empty(&lli->lli_agl_list)); - - /* - * XXX This has to be done before lsm is freed below, because - * cl_object still uses inode lsm. - */ - cl_inode_fini(inode); + lli->lli_inode_magic = LLI_INODE_DEAD; + + ll_clear_inode_capas(inode); + if (S_ISDIR(inode->i_mode)) + ll_dir_clear_lsm_md(inode); + else if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) + LASSERT(list_empty(&lli->lli_agl_list)); + + /* + * XXX This has to be done before lsm is freed below, because + * cl_object still uses inode lsm. + */ + cl_inode_fini(inode); lli->lli_has_smd = false; EXIT; @@ -1329,13 +1539,13 @@ int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, op_data->op_attr.ia_valid = ia_valid; /* Extract epoch data if obtained. */ - op_data->op_handle = md.body->handle; - op_data->op_ioepoch = md.body->ioepoch; + op_data->op_handle = md.body->mbo_handle; + op_data->op_ioepoch = md.body->mbo_ioepoch; - ll_update_inode(inode, &md); - ptlrpc_req_finished(request); + rc = ll_update_inode(inode, &md); + ptlrpc_req_finished(request); - RETURN(rc); + RETURN(rc); } /* Close IO epoch and send Size-on-MDS attribute update. */ @@ -1364,8 +1574,9 @@ static int ll_setattr_done_writing(struct inode *inode, * from OSTs and send setattr to back to MDS. */ rc = ll_som_update(inode, op_data); } else if (rc) { - CERROR("inode %lu mdc truncate failed: rc = %d\n", - inode->i_ino, rc); + CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n", + ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); } RETURN(rc); } @@ -1390,50 +1601,6 @@ static int ll_setattr_ost(struct inode *inode, struct iattr *attr) return rc; } -#ifndef HAVE_VFS_INODE_NEWSIZE_OK -/** - * inode_newsize_ok - may this inode be truncated to a given size - * @inode: the inode to be truncated - * @offset: the new size to assign to the inode - * @Returns: 0 on success, -ve errno on failure - * - * inode_newsize_ok will check filesystem limits and ulimits to check that the - * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ - * when necessary. Caller must not proceed with inode size change if failure is - * returned. @inode must be a file (not directory), with appropriate - * permissions to allow truncate (inode_newsize_ok does NOT check these - * conditions). - * - * inode_newsize_ok must be called with i_mutex held. - */ -int inode_newsize_ok(const struct inode *inode, loff_t offset) -{ - if (inode->i_size < offset) { - unsigned long limit; - - limit = rlimit(RLIMIT_FSIZE); - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out_big; - } else { - /* - * truncation of in-use swapfiles is disallowed - it would - * cause subsequent swapout to scribble on the now-freed - * blocks. - */ - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - } - - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; -} -#endif - /* If this inode has objects allocated to it (lsm != NULL), then the OST * object(s) determine the file size and mtime. Otherwise, the MDS will * keep these values until such a time that objects are allocated for it. @@ -1446,20 +1613,24 @@ out_big: * to the OST with the punch RPC, otherwise we do an explicit setattr RPC. * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE * at the same time. + * + * In case of HSMimport, we only set attr on MDS. */ -int ll_setattr_raw(struct dentry *dentry, struct iattr *attr) +int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) { struct inode *inode = dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct md_op_data *op_data = NULL; struct md_open_data *mod = NULL; - int rc = 0, rc1 = 0; - ENTRY; + bool file_is_released = false; + int rc = 0, rc1 = 0; + ENTRY; - CDEBUG(D_VFSTRACE, "%s: setattr inode %p/fid:"DFID" from %llu to %llu, " - "valid %x\n", ll_get_fsname(inode->i_sb, NULL, 0), inode, - PFID(&lli->lli_fid), i_size_read(inode), attr->ia_size, - attr->ia_valid); + CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, " + "valid %x, hsm_import %d\n", + ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), + inode, i_size_read(inode), attr->ia_size, attr->ia_valid, + hsm_import); if (attr->ia_valid & ATTR_SIZE) { /* Check new size against VFS/VM file size limit and rlimit */ @@ -1480,12 +1651,12 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr) attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; } - /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */ + /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */ if (attr->ia_valid & TIMES_SET_FLAGS) { - if (cfs_curproc_fsuid() != inode->i_uid && - !cfs_capable(CFS_CAP_FOWNER)) - RETURN(-EPERM); - } + if ((!uid_eq(current_fsuid(), inode->i_uid)) && + !cfs_capable(CFS_CAP_FOWNER)) + RETURN(-EPERM); + } /* We mark all of the fields "set" so MDS/OST does not re-set them */ if (attr->ia_valid & ATTR_CTIME) { @@ -1508,14 +1679,6 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr) LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), cfs_time_current_sec()); - /* If we are changing file size, file content is modified, flag it. */ - if (attr->ia_valid & ATTR_SIZE) { - attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - /* We always do an MDS RPC, even if we're only changing the size; * only the MDS knows whether truncate() should fail with -ETXTBUSY */ @@ -1527,19 +1690,52 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) inode_dio_write_done(inode); mutex_unlock(&inode->i_mutex); - down_write(&lli->lli_trunc_sem); } - memcpy(&op_data->op_attr, attr, sizeof(*attr)); + /* truncate on a released file must failed with -ENODATA, + * so size must not be set on MDS for released file + * but other attributes must be set + */ + if (S_ISREG(inode->i_mode)) { + struct lov_stripe_md *lsm; + __u32 gen; - /* Open epoch for truncate. */ - if (exp_connect_som(ll_i2mdexp(inode)) && - (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) - op_data->op_flags = MF_EPOCH_OPEN; + ll_layout_refresh(inode, &gen); + lsm = ccc_inode_lsm_get(inode); + if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED) + file_is_released = true; + ccc_inode_lsm_put(inode, lsm); - rc = ll_md_setattr(dentry, op_data, &mod); - if (rc) - GOTO(out, rc); + if (!hsm_import && attr->ia_valid & ATTR_SIZE) { + if (file_is_released) { + rc = ll_layout_restore(inode, 0, attr->ia_size); + if (rc < 0) + GOTO(out, rc); + + file_is_released = false; + ll_layout_refresh(inode, &gen); + } + + /* If we are changing file size, file content is + * modified, flag it. */ + attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; + spin_lock(&lli->lli_lock); + lli->lli_flags |= LLIF_DATA_MODIFIED; + spin_unlock(&lli->lli_lock); + op_data->op_bias |= MDS_DATA_MODIFIED; + } + } + + memcpy(&op_data->op_attr, attr, sizeof(*attr)); + + /* Open epoch for truncate. */ + if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import && + (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) + op_data->op_flags = MF_EPOCH_OPEN; + + rc = ll_md_setattr(dentry, op_data, &mod); + if (rc) + GOTO(out, rc); /* RPC to MDT is sent, cancel data modification flag */ if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) { @@ -1548,34 +1744,38 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr) spin_unlock(&lli->lli_lock); } - ll_ioepoch_open(lli, op_data->op_ioepoch); - if (!S_ISREG(inode->i_mode)) - GOTO(out, rc = 0); + ll_ioepoch_open(lli, op_data->op_ioepoch); + if (!S_ISREG(inode->i_mode) || file_is_released) + GOTO(out, rc = 0); if (attr->ia_valid & (ATTR_SIZE | ATTR_ATIME | ATTR_ATIME_SET | - ATTR_MTIME | ATTR_MTIME_SET)) - /* For truncate and utimes sending attributes to OSTs, setting - * mtime/atime to the past will be performed under PW [0:EOF] - * extent lock (new_size:EOF for truncate). It may seem - * excessive to send mtime/atime updates to OSTs when not - * setting times to past, but it is necessary due to possible - * time de-synchronization between MDT inode and OST objects */ - rc = ll_setattr_ost(inode, attr); - EXIT; + ATTR_MTIME | ATTR_MTIME_SET)) { + /* For truncate and utimes sending attributes to OSTs, setting + * mtime/atime to the past will be performed under PW [0:EOF] + * extent lock (new_size:EOF for truncate). It may seem + * excessive to send mtime/atime updates to OSTs when not + * setting times to past, but it is necessary due to possible + * time de-synchronization between MDT inode and OST objects */ + if (attr->ia_valid & ATTR_SIZE) + down_write(&lli->lli_trunc_sem); + rc = ll_setattr_ost(inode, attr); + if (attr->ia_valid & ATTR_SIZE) + up_write(&lli->lli_trunc_sem); + } + EXIT; out: - if (op_data) { - if (op_data->op_ioepoch) { - rc1 = ll_setattr_done_writing(inode, op_data, mod); - if (!rc) - rc = rc1; - } - ll_finish_md_op_data(op_data); - } + if (op_data) { + if (op_data->op_ioepoch) { + rc1 = ll_setattr_done_writing(inode, op_data, mod); + if (!rc) + rc = rc1; + } + ll_finish_md_op_data(op_data); + } if (!S_ISDIR(inode->i_mode)) { - up_write(&lli->lli_trunc_sem); mutex_lock(&inode->i_mutex); - if (attr->ia_valid & ATTR_SIZE) + if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) inode_dio_wait(inode); } @@ -1587,30 +1787,32 @@ out: int ll_setattr(struct dentry *de, struct iattr *attr) { - int mode = de->d_inode->i_mode; - - if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) == - (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) - attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - - if (((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) == - (ATTR_SIZE|ATTR_MODE)) && - (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) || - (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) && - !(attr->ia_mode & S_ISGID)))) - attr->ia_valid |= ATTR_FORCE; - - if ((mode & S_ISUID) && - !(attr->ia_mode & S_ISUID) && - !(attr->ia_valid & ATTR_KILL_SUID)) - attr->ia_valid |= ATTR_KILL_SUID; - - if (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) && - !(attr->ia_mode & S_ISGID) && - !(attr->ia_valid & ATTR_KILL_SGID)) - attr->ia_valid |= ATTR_KILL_SGID; - - return ll_setattr_raw(de, attr); + int mode = de->d_inode->i_mode; + + if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) == + (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) + attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; + + if (((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) == + (ATTR_SIZE|ATTR_MODE)) && + (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) || + (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) && + !(attr->ia_mode & S_ISGID)))) + attr->ia_valid |= ATTR_FORCE; + + if ((attr->ia_valid & ATTR_MODE) && + (mode & S_ISUID) && + !(attr->ia_mode & S_ISUID) && + !(attr->ia_valid & ATTR_KILL_SUID)) + attr->ia_valid |= ATTR_KILL_SUID; + + if ((attr->ia_valid & ATTR_MODE) && + ((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) && + !(attr->ia_mode & S_ISGID) && + !(attr->ia_valid & ATTR_KILL_SGID)) + attr->ia_valid |= ATTR_KILL_SGID; + + return ll_setattr_raw(de, attr, false); } int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, @@ -1666,6 +1868,7 @@ int ll_statfs(struct dentry *de, struct kstatfs *sfs) { struct super_block *sb = de->d_sb; struct obd_statfs osfs; + __u64 fsid = huge_encode_dev(sb->s_dev); int rc; CDEBUG(D_VFSTRACE, "VFS Op: at "LPU64" jiffies\n", get_jiffies_64()); @@ -1697,41 +1900,37 @@ int ll_statfs(struct dentry *de, struct kstatfs *sfs) sfs->f_blocks = osfs.os_blocks; sfs->f_bfree = osfs.os_bfree; sfs->f_bavail = osfs.os_bavail; - - return 0; + sfs->f_fsid.val[0] = (__u32)fsid; + sfs->f_fsid.val[1] = (__u32)(fsid >> 32); + return 0; } void ll_inode_size_lock(struct inode *inode) { - struct ll_inode_info *lli; + struct ll_inode_info *lli; - LASSERT(!S_ISDIR(inode->i_mode)); + LASSERT(!S_ISDIR(inode->i_mode)); - lli = ll_i2info(inode); - LASSERT(lli->lli_size_sem_owner != current); - down(&lli->lli_size_sem); - LASSERT(lli->lli_size_sem_owner == NULL); - lli->lli_size_sem_owner = current; + lli = ll_i2info(inode); + mutex_lock(&lli->lli_size_mutex); } void ll_inode_size_unlock(struct inode *inode) { - struct ll_inode_info *lli; + struct ll_inode_info *lli; - lli = ll_i2info(inode); - LASSERT(lli->lli_size_sem_owner == current); - lli->lli_size_sem_owner = NULL; - up(&lli->lli_size_sem); + lli = ll_i2info(inode); + mutex_unlock(&lli->lli_size_mutex); } -void ll_update_inode(struct inode *inode, struct lustre_md *md) +int ll_update_inode(struct inode *inode, struct lustre_md *md) { struct ll_inode_info *lli = ll_i2info(inode); struct mdt_body *body = md->body; struct lov_stripe_md *lsm = md->lsm; struct ll_sb_info *sbi = ll_i2sbi(inode); - LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); + LASSERT((lsm != NULL) == ((body->mbo_valid & OBD_MD_FLEASIZE) != 0)); if (lsm != NULL) { if (!lli->lli_has_smd && !(sbi->ll_flags & LL_SBI_LAYOUT_LOCK)) @@ -1742,12 +1941,20 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) lli->lli_maxbytes = MAX_LFS_FILESIZE; } - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - if (body->valid & OBD_MD_FLRMTPERM) - ll_update_remote_perm(inode, md->remote_perm); - } + if (S_ISDIR(inode->i_mode)) { + int rc; + + rc = ll_update_lsm_md(inode, md); + if (rc != 0) + return rc; + } + + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { + if (body->mbo_valid & OBD_MD_FLRMTPERM) + ll_update_remote_perm(inode, md->remote_perm); + } #ifdef CONFIG_FS_POSIX_ACL - else if (body->valid & OBD_MD_FLACL) { + else if (body->mbo_valid & OBD_MD_FLACL) { spin_lock(&lli->lli_lock); if (lli->lli_posix_acl) posix_acl_release(lli->lli_posix_acl); @@ -1755,64 +1962,74 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) spin_unlock(&lli->lli_lock); } #endif - inode->i_ino = cl_fid_build_ino(&body->fid1, ll_need_32bit_api(sbi)); - inode->i_generation = cl_fid_build_gen(&body->fid1); + inode->i_ino = cl_fid_build_ino(&body->mbo_fid1, + sbi->ll_flags & LL_SBI_32BIT_API); + inode->i_generation = cl_fid_build_gen(&body->mbo_fid1); + + if (body->mbo_valid & OBD_MD_FLATIME) { + if (body->mbo_atime > LTIME_S(inode->i_atime)) + LTIME_S(inode->i_atime) = body->mbo_atime; + lli->lli_lvb.lvb_atime = body->mbo_atime; + } - if (body->valid & OBD_MD_FLATIME) { - if (body->atime > LTIME_S(inode->i_atime)) - LTIME_S(inode->i_atime) = body->atime; - lli->lli_lvb.lvb_atime = body->atime; - } - if (body->valid & OBD_MD_FLMTIME) { - if (body->mtime > LTIME_S(inode->i_mtime)) { - CDEBUG(D_INODE, "setting ino %lu mtime from %lu " - "to "LPU64"\n", inode->i_ino, - LTIME_S(inode->i_mtime), body->mtime); - LTIME_S(inode->i_mtime) = body->mtime; - } - lli->lli_lvb.lvb_mtime = body->mtime; - } - if (body->valid & OBD_MD_FLCTIME) { - if (body->ctime > LTIME_S(inode->i_ctime)) - LTIME_S(inode->i_ctime) = body->ctime; - lli->lli_lvb.lvb_ctime = body->ctime; - } - if (body->valid & OBD_MD_FLMODE) - inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); - if (body->valid & OBD_MD_FLTYPE) - inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT); - LASSERT(inode->i_mode != 0); - if (S_ISREG(inode->i_mode)) { - inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, LL_MAX_BLKSIZE_BITS); - } else { - inode->i_blkbits = inode->i_sb->s_blocksize_bits; - } - if (body->valid & OBD_MD_FLUID) - inode->i_uid = body->uid; - if (body->valid & OBD_MD_FLGID) - inode->i_gid = body->gid; - if (body->valid & OBD_MD_FLFLAGS) - inode->i_flags = ll_ext_to_inode_flags(body->flags); - if (body->valid & OBD_MD_FLNLINK) - set_nlink(inode, body->nlink); - if (body->valid & OBD_MD_FLRDEV) - inode->i_rdev = old_decode_dev(body->rdev); - - if (body->valid & OBD_MD_FLID) { - /* FID shouldn't be changed! */ - if (fid_is_sane(&lli->lli_fid)) { - LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1), - "Trying to change FID "DFID - " to the "DFID", inode %lu/%u(%p)\n", - PFID(&lli->lli_fid), PFID(&body->fid1), - inode->i_ino, inode->i_generation, inode); - } else - lli->lli_fid = body->fid1; - } + if (body->mbo_valid & OBD_MD_FLMTIME) { + if (body->mbo_mtime > LTIME_S(inode->i_mtime)) { + CDEBUG(D_INODE, "setting ino %lu mtime from %lu " + "to "LPU64"\n", inode->i_ino, + LTIME_S(inode->i_mtime), body->mbo_mtime); + LTIME_S(inode->i_mtime) = body->mbo_mtime; + } + lli->lli_lvb.lvb_mtime = body->mbo_mtime; + } - LASSERT(fid_seq(&lli->lli_fid) != 0); + if (body->mbo_valid & OBD_MD_FLCTIME) { + if (body->mbo_ctime > LTIME_S(inode->i_ctime)) + LTIME_S(inode->i_ctime) = body->mbo_ctime; + lli->lli_lvb.lvb_ctime = body->mbo_ctime; + } + + if (body->mbo_valid & OBD_MD_FLMODE) + inode->i_mode = (inode->i_mode & S_IFMT) | + (body->mbo_mode & ~S_IFMT); + + if (body->mbo_valid & OBD_MD_FLTYPE) + inode->i_mode = (inode->i_mode & ~S_IFMT) | + (body->mbo_mode & S_IFMT); - if (body->valid & OBD_MD_FLSIZE) { + LASSERT(inode->i_mode != 0); + if (S_ISREG(inode->i_mode)) + inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, + LL_MAX_BLKSIZE_BITS); + else + inode->i_blkbits = inode->i_sb->s_blocksize_bits; + + if (body->mbo_valid & OBD_MD_FLUID) + inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid); + if (body->mbo_valid & OBD_MD_FLGID) + inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid); + if (body->mbo_valid & OBD_MD_FLFLAGS) + inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags); + if (body->mbo_valid & OBD_MD_FLNLINK) + set_nlink(inode, body->mbo_nlink); + if (body->mbo_valid & OBD_MD_FLRDEV) + inode->i_rdev = old_decode_dev(body->mbo_rdev); + + if (body->mbo_valid & OBD_MD_FLID) { + /* FID shouldn't be changed! */ + if (fid_is_sane(&lli->lli_fid)) { + LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1), + "Trying to change FID "DFID + " to the "DFID", inode "DFID"(%p)\n", + PFID(&lli->lli_fid), PFID(&body->mbo_fid1), + PFID(ll_inode2fid(inode)), inode); + } else { + lli->lli_fid = body->mbo_fid1; + } + } + + LASSERT(fid_seq(&lli->lli_fid) != 0); + + if (body->mbo_valid & OBD_MD_FLSIZE) { if (exp_connect_som(ll_i2mdexp(inode)) && S_ISREG(inode->i_mode)) { struct lustre_handle lockh; @@ -1823,50 +2040,67 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) * lock on the client and set LLIF_MDS_SIZE_LOCK holding * it. */ mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE, - &lockh, LDLM_FL_CBPENDING); + &lockh, LDLM_FL_CBPENDING, + LCK_CR | LCK_CW | + LCK_PR | LCK_PW); if (mode) { if (lli->lli_flags & (LLIF_DONE_WRITING | LLIF_EPOCH_PENDING | LLIF_SOM_DIRTY)) { - CERROR("ino %lu flags %u still has " - "size authority! do not trust " - "the size got from MDS\n", - inode->i_ino, lli->lli_flags); + CERROR("%s: inode "DFID" flags %u still" + " has size authority! do not " + "trust the size from MDS\n", + sbi->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), + lli->lli_flags); } else { /* Use old size assignment to avoid * deadlock bz14138 & bz14326 */ - i_size_write(inode, body->size); + i_size_write(inode, body->mbo_size); + spin_lock(&lli->lli_lock); lli->lli_flags |= LLIF_MDS_SIZE_LOCK; + spin_unlock(&lli->lli_lock); } ldlm_lock_decref(&lockh, mode); } } else { /* Use old size assignment to avoid * deadlock bz14138 & bz14326 */ - i_size_write(inode, body->size); + i_size_write(inode, body->mbo_size); - CDEBUG(D_VFSTRACE, "inode=%lu, updating i_size %llu\n", - inode->i_ino, (unsigned long long)body->size); - } + CDEBUG(D_VFSTRACE, + "inode="DFID", updating i_size %llu\n", + PFID(ll_inode2fid(inode)), + (unsigned long long)body->mbo_size); + } - if (body->valid & OBD_MD_FLBLOCKS) - inode->i_blocks = body->blocks; - } + if (body->mbo_valid & OBD_MD_FLBLOCKS) + inode->i_blocks = body->mbo_blocks; + } - if (body->valid & OBD_MD_FLMDSCAPA) { - LASSERT(md->mds_capa); - ll_add_capa(inode, md->mds_capa); - } - if (body->valid & OBD_MD_FLOSSCAPA) { - LASSERT(md->oss_capa); - ll_add_capa(inode, md->oss_capa); - } + if (body->mbo_valid & OBD_MD_FLMDSCAPA) { + LASSERT(md->mds_capa); + ll_add_capa(inode, md->mds_capa); + } + + if (body->mbo_valid & OBD_MD_FLOSSCAPA) { + LASSERT(md->oss_capa); + ll_add_capa(inode, md->oss_capa); + } + + if (body->mbo_valid & OBD_MD_TSTATE) { + if (body->mbo_t_state & MS_RESTORE) + lli->lli_flags |= LLIF_FILE_RESTORING; + } + + return 0; } -void ll_read_inode2(struct inode *inode, void *opaque) +int ll_read_inode2(struct inode *inode, void *opaque) { struct lustre_md *md = opaque; struct ll_inode_info *lli = ll_i2info(inode); + int rc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", @@ -1882,7 +2116,9 @@ void ll_read_inode2(struct inode *inode, void *opaque) LTIME_S(inode->i_atime) = 0; LTIME_S(inode->i_ctime) = 0; inode->i_rdev = 0; - ll_update_inode(inode, md); + rc = ll_update_inode(inode, md); + if (rc != 0) + RETURN(rc); /* OIDEBUG(inode); */ @@ -1911,6 +2147,8 @@ void ll_read_inode2(struct inode *inode, void *opaque) EXIT; } + + return 0; } void ll_delete_inode(struct inode *inode) @@ -1928,12 +2166,12 @@ void ll_delete_inode(struct inode *inode) /* Workaround for LU-118 */ if (inode->i_data.nrpages) { - TREE_READ_LOCK_IRQ(&inode->i_data); - TREE_READ_UNLOCK_IRQ(&inode->i_data); + spin_lock_irq(&inode->i_data.tree_lock); + spin_unlock_irq(&inode->i_data.tree_lock); LASSERTF(inode->i_data.nrpages == 0, - "inode=%lu/%u(%p) nrpages=%lu, see " - "http://jira.whamcloud.com/browse/LU-118\n", - inode->i_ino, inode->i_generation, inode, + "inode="DFID"(%p) nrpages=%lu, see " + "http://jira.whamcloud.com/browse/LU-118\n", + PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages); } /* Workaround end */ @@ -1969,13 +2207,15 @@ int ll_iocontrol(struct inode *inode, struct file *file, rc = md_getattr(sbi->ll_md_exp, op_data, &req); ll_finish_md_op_data(op_data); if (rc) { - CERROR("failure %d inode %lu\n", rc, inode->i_ino); + CERROR("%s: failure inode "DFID": rc = %d\n", + sbi->ll_md_exp->exp_obd->obd_name, + PFID(ll_inode2fid(inode)), rc); RETURN(-abs(rc)); } body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - flags = body->flags; + flags = body->mbo_flags; ptlrpc_req_finished(req); @@ -2006,8 +2246,10 @@ int ll_iocontrol(struct inode *inode, struct file *file, inode->i_flags = ll_ext_to_inode_flags(flags); lsm = ccc_inode_lsm_get(inode); - if (lsm == NULL) + if (!lsm_has_objects(lsm)) { + ccc_inode_lsm_put(inode, lsm); RETURN(0); + } OBDO_ALLOC(oinfo.oi_oa); if (!oinfo.oi_oa) { @@ -2040,51 +2282,39 @@ int ll_iocontrol(struct inode *inode, struct file *file, int ll_flush_ctx(struct inode *inode) { - struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ll_sb_info *sbi = ll_i2sbi(inode); - CDEBUG(D_SEC, "flush context for user %d\n", cfs_curproc_uid()); + CDEBUG(D_SEC, "flush context for user %d\n", + from_kuid(&init_user_ns, current_uid())); - obd_set_info_async(NULL, sbi->ll_md_exp, - sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX, - 0, NULL, NULL); - obd_set_info_async(NULL, sbi->ll_dt_exp, - sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX, - 0, NULL, NULL); - return 0; + obd_set_info_async(NULL, sbi->ll_md_exp, + sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX, + 0, NULL, NULL); + obd_set_info_async(NULL, sbi->ll_dt_exp, + sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX, + 0, NULL, NULL); + return 0; } /* umount -f client means force down, don't save state */ -#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT -void ll_umount_begin(struct vfsmount *vfsmnt, int flags) -{ - struct super_block *sb = vfsmnt->mnt_sb; -#else void ll_umount_begin(struct super_block *sb) { -#endif - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_device *obd; - struct obd_ioctl_data *ioc_data; - ENTRY; - -#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT - if (!(flags & MNT_FORCE)) { - EXIT; - return; - } -#endif + struct ll_sb_info *sbi = ll_s2sbi(sb); + struct obd_device *obd; + struct obd_ioctl_data *ioc_data; + ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb, - sb->s_count, atomic_read(&sb->s_active)); + CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb, + sb->s_count, atomic_read(&sb->s_active)); - obd = class_exp2obd(sbi->ll_md_exp); - if (obd == NULL) { - CERROR("Invalid MDC connection handle "LPX64"\n", - sbi->ll_md_exp->exp_handle.h_cookie); - EXIT; - return; - } - obd->obd_force = 1; + obd = class_exp2obd(sbi->ll_md_exp); + if (obd == NULL) { + CERROR("Invalid MDC connection handle "LPX64"\n", + sbi->ll_md_exp->exp_handle.h_cookie); + EXIT; + return; + } + obd->obd_force = 1; obd = class_exp2obd(sbi->ll_dt_exp); if (obd == NULL) { @@ -2106,23 +2336,12 @@ void ll_umount_begin(struct super_block *sb) OBD_FREE_PTR(ioc_data); } - /* Really, we'd like to wait until there are no requests outstanding, - * and then continue. For now, we just invalidate the requests, - * schedule() and sleep one second if needed, and hope. - */ - cfs_schedule(); -#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT - if (atomic_read(&vfsmnt->mnt_count) > 2) { - cfs_schedule_timeout_and_set_state(CFS_TASK_INTERRUPTIBLE, - cfs_time_seconds(1)); - if (atomic_read(&vfsmnt->mnt_count) > 2) - LCONSOLE_WARN("Mount still busy with %d refs! You " - "may try to umount it a bit later\n", - atomic_read(&vfsmnt->mnt_count)); - } -#endif - - EXIT; + /* Really, we'd like to wait until there are no requests outstanding, + * and then continue. For now, we just invalidate the requests, + * schedule() and sleep one second if needed, and hope. + */ + schedule(); + EXIT; } int ll_remount_fs(struct super_block *sb, int *flags, char *data) @@ -2161,9 +2380,9 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, struct super_block *sb, struct lookup_intent *it) { struct ll_sb_info *sbi = NULL; - struct lustre_md md; - int rc; - ENTRY; + struct lustre_md md = { 0 }; + int rc; + ENTRY; LASSERT(*inode || sb); sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode); @@ -2172,21 +2391,23 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, if (rc) RETURN(rc); - if (*inode) { - ll_update_inode(*inode, &md); - } else { - LASSERT(sb != NULL); + if (*inode) { + rc = ll_update_inode(*inode, &md); + if (rc != 0) + GOTO(out, rc); + } else { + LASSERT(sb != NULL); /* * At this point server returns to client's same fid as client * generated for creating. So using ->fid1 is okay here. */ - LASSERT(fid_is_sane(&md.body->fid1)); + LASSERT(fid_is_sane(&md.body->mbo_fid1)); - *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1, - ll_need_32bit_api(sbi)), + *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1, + sbi->ll_flags & LL_SBI_32BIT_API), &md); - if (*inode == NULL || IS_ERR(*inode)) { + if (IS_ERR(*inode)) { #ifdef CONFIG_FS_POSIX_ACL if (md.posix_acl) { posix_acl_release(md.posix_acl); @@ -2283,31 +2504,28 @@ out_statfs: int ll_process_config(struct lustre_cfg *lcfg) { - char *ptr; - void *sb; - struct lprocfs_static_vars lvars; - unsigned long x; - int rc = 0; + struct super_block *sb; + unsigned long x; + int rc = 0; + char *ptr; - lprocfs_llite_init_vars(&lvars); - - /* The instance name contains the sb: lustre-client-aacfe000 */ - ptr = strrchr(lustre_cfg_string(lcfg, 0), '-'); - if (!ptr || !*(++ptr)) - return -EINVAL; - if (sscanf(ptr, "%lx", &x) != 1) - return -EINVAL; - sb = (void *)x; - /* This better be a real Lustre superblock! */ - LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic == LMD_MAGIC); - - /* Note we have not called client_common_fill_super yet, so - proc fns must be able to handle that! */ - rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars, - lcfg, sb); - if (rc > 0) - rc = 0; - return(rc); + /* The instance name contains the sb: lustre-client-aacfe000 */ + ptr = strrchr(lustre_cfg_string(lcfg, 0), '-'); + if (!ptr || !*(++ptr)) + return -EINVAL; + if (sscanf(ptr, "%lx", &x) != 1) + return -EINVAL; + sb = (struct super_block *)x; + /* This better be a real Lustre superblock! */ + LASSERT(s2lsi(sb)->lsi_lmd->lmd_magic == LMD_MAGIC); + + /* Note we have not called client_common_fill_super yet, so + proc fns must be able to handle that! */ + rc = class_process_proc_seq_param(PARAM_LLITE, lprocfs_llite_obd_vars, + lcfg, sb); + if (rc > 0) + rc = 0; + return rc; } /* this function prepares md_op_data hint for passing ot down to MD stack. */ @@ -2318,8 +2536,17 @@ struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data, { LASSERT(i1 != NULL); - if (namelen > ll_i2sbi(i1)->ll_namelen) - return ERR_PTR(-ENAMETOOLONG); + if (name == NULL) { + /* Do not reuse namelen for something else. */ + if (namelen != 0) + return ERR_PTR(-EINVAL); + } else { + if (namelen > ll_i2sbi(i1)->ll_namelen) + return ERR_PTR(-ENAMETOOLONG); + + if (!lu_name_is_valid_2(name, namelen)) + return ERR_PTR(-EINVAL); + } if (op_data == NULL) OBD_ALLOC_PTR(op_data); @@ -2327,48 +2554,43 @@ struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data, if (op_data == NULL) return ERR_PTR(-ENOMEM); - ll_i2gids(op_data->op_suppgids, i1, i2); - op_data->op_fid1 = *ll_inode2fid(i1); - op_data->op_capa1 = ll_mdscapa_get(i1); + ll_i2gids(op_data->op_suppgids, i1, i2); + op_data->op_fid1 = *ll_inode2fid(i1); + op_data->op_capa1 = ll_mdscapa_get(i1); + if (S_ISDIR(i1->i_mode)) + op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md; + + if (i2) { + op_data->op_fid2 = *ll_inode2fid(i2); + op_data->op_capa2 = ll_mdscapa_get(i2); + if (S_ISDIR(i2->i_mode)) + op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md; + } else { + fid_zero(&op_data->op_fid2); + op_data->op_capa2 = NULL; + } - if (i2) { - op_data->op_fid2 = *ll_inode2fid(i2); - op_data->op_capa2 = ll_mdscapa_get(i2); - } else { - fid_zero(&op_data->op_fid2); - op_data->op_capa2 = NULL; - } + if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH) + op_data->op_cli_flags |= CLI_HASH64; + + if (ll_need_32bit_api(ll_i2sbi(i1))) + op_data->op_cli_flags |= CLI_API32; op_data->op_name = name; op_data->op_namelen = namelen; op_data->op_mode = mode; op_data->op_mod_time = cfs_time_current_sec(); - op_data->op_fsuid = cfs_curproc_fsuid(); - op_data->op_fsgid = cfs_curproc_fsgid(); + op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); + op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); op_data->op_cap = cfs_curproc_cap_pack(); op_data->op_bias = 0; op_data->op_cli_flags = 0; if ((opc == LUSTRE_OPC_CREATE) && (name != NULL) && filename_is_volatile(name, namelen, NULL)) op_data->op_bias |= MDS_CREATE_VOLATILE; - op_data->op_opc = opc; op_data->op_mds = 0; op_data->op_data = data; - /* If the file is being opened after mknod() (normally due to NFS) - * try to use the default stripe data from parent directory for - * allocating OST objects. Try to pass the parent FID to MDS. */ - if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) && - !ll_i2info(i2)->lli_has_smd) { - struct ll_inode_info *lli = ll_i2info(i2); - - spin_lock(&lli->lli_lock); - if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid))) - op_data->op_fid1 = lli->lli_pfid; - spin_unlock(&lli->lli_lock); - /** We ignore parent's capability temporary. */ - } - /* When called by ll_setattr_raw, file is i1. */ if (LLIF_DATA_MODIFIED & ll_i2info(i1)->lli_flags) op_data->op_bias |= MDS_DATA_MODIFIED; @@ -2439,11 +2661,11 @@ int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg) if (!obd) RETURN(-ENOENT); - if (cfs_copy_to_user((void *)arg, obd->obd_name, - strlen(obd->obd_name) + 1)) - RETURN(-EFAULT); + if (copy_to_user((void *)arg, obd->obd_name, + strlen(obd->obd_name) + 1)) + RETURN(-EFAULT); - RETURN(0); + RETURN(0); } /** @@ -2483,7 +2705,6 @@ static char* ll_d_path(struct dentry *dentry, char *buf, int bufsize) { char *path = NULL; -#ifdef HAVE_FS_STRUCT_USE_PATH struct path p; p.dentry = dentry; @@ -2491,14 +2712,10 @@ static char* ll_d_path(struct dentry *dentry, char *buf, int bufsize) path_get(&p); path = d_path(&p, buf, bufsize); path_put(&p); -#else - path = d_path(dentry, current->fs->rootmnt, buf, bufsize); -#endif - return path; } -void ll_dirty_page_discard_warn(cfs_page_t *page, int ioret) +void ll_dirty_page_discard_warn(struct page *page, int ioret) { char *buf, *path = NULL; struct dentry *dentry = NULL; @@ -2512,11 +2729,12 @@ void ll_dirty_page_discard_warn(cfs_page_t *page, int ioret) path = ll_d_path(dentry, buf, PAGE_SIZE); } - CWARN("%s: dirty page discard: %s/fid: "DFID"/%s may get corrupted " - "(rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0), - s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev, - PFID(&obj->cob_header.coh_lu.loh_fid), - (path && !IS_ERR(path)) ? path : "", ioret); + CDEBUG(D_WARNING, + "%s: dirty page discard: %s/fid: "DFID"/%s may get corrupted " + "(rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0), + s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev, + PFID(&obj->cob_header.coh_lu.loh_fid), + (path && !IS_ERR(path)) ? path : "", ioret); if (dentry != NULL) dput(dentry); @@ -2524,3 +2742,41 @@ void ll_dirty_page_discard_warn(cfs_page_t *page, int ioret) if (buf != NULL) free_page((unsigned long)buf); } + +/* + * Compute llite root squash state after a change of root squash + * configuration setting or add/remove of a lnet nid + */ +void ll_compute_rootsquash_state(struct ll_sb_info *sbi) +{ + struct root_squash_info *squash = &sbi->ll_squash; + int i; + bool matched; + lnet_process_id_t id; + + /* Update norootsquash flag */ + down_write(&squash->rsi_sem); + if (list_empty(&squash->rsi_nosquash_nids)) + sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH; + else { + /* Do not apply root squash as soon as one of our NIDs is + * in the nosquash_nids list */ + matched = false; + i = 0; + while (LNetGetId(i++, &id) != -ENOENT) { + if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND) + continue; + if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) { + matched = true; + break; + } + } + if (matched) + sbi->ll_flags |= LL_SBI_NOROOTSQUASH; + else + sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH; + } + up_write(&squash->rsi_sem); +} + +