X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Fllite_lib.c;h=f750a12f332c25614cb08d68c835bec7f275e556;hp=4c5dcec48b582f09164e08377b28b431140ef2df;hb=8eca92b365fd3efd1541a48b1bb239926838d947;hpb=754bf71c650c427acfb0fe35017e8f9c1eb9fa7d diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 4c5dcec..f750a12 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2016, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -41,12 +37,18 @@ #define DEBUG_SUBSYSTEM S_LLITE #include +#include +#include #include #include #include +#include +#ifdef HAVE_UIDGID_HEADER +# include +#endif +#include #include -#include #include #include #include @@ -59,9 +61,6 @@ struct kmem_cache *ll_file_data_slab; -static struct list_head ll_super_blocks = LIST_HEAD_INIT(ll_super_blocks); -static DEFINE_SPINLOCK(ll_sb_lock); - #ifndef log2 #define log2(n) ffz(~(n)) #endif @@ -91,31 +90,22 @@ static struct ll_sb_info *ll_init_sbi(void) lru_page_max = pages / 2; /* initialize ll_cache data */ - atomic_set(&sbi->ll_cache.ccc_users, 0); - sbi->ll_cache.ccc_lru_max = lru_page_max; - atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max); - spin_lock_init(&sbi->ll_cache.ccc_lru_lock); - INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru); - - atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0); - init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq); + sbi->ll_cache = cl_cache_init(lru_page_max); + if (sbi->ll_cache == NULL) { + OBD_FREE(sbi, sizeof(*sbi)); + RETURN(NULL); + } sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32, SBI_DEFAULT_READAHEAD_MAX); sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file; sbi->ll_ra_info.ra_max_read_ahead_whole_pages = SBI_DEFAULT_READAHEAD_WHOLE_MAX; - INIT_LIST_HEAD(&sbi->ll_conn_chain); - INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); ll_generate_random_uuid(uuid); class_uuid_unparse(uuid, &sbi->ll_sb_uuid); CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid); - spin_lock(&ll_sb_lock); - list_add_tail(&sbi->ll_list, &ll_super_blocks); - spin_unlock(&ll_sb_lock); - sbi->ll_flags |= LL_SBI_VERBOSE; #ifdef ENABLE_CHECKSUM sbi->ll_flags |= LL_SBI_CHECKSUM; @@ -124,6 +114,7 @@ static struct ll_sb_info *ll_init_sbi(void) #ifdef HAVE_LRU_RESIZE_SUPPORT sbi->ll_flags |= LL_SBI_LRU_RESIZE; #endif + sbi->ll_flags |= LL_SBI_LAZYSTATFS; for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i]. @@ -136,8 +127,10 @@ static struct ll_sb_info *ll_init_sbi(void) sbi->ll_sa_max = LL_SA_RPC_DEF; atomic_set(&sbi->ll_sa_total, 0); atomic_set(&sbi->ll_sa_wrong, 0); + atomic_set(&sbi->ll_sa_running, 0); atomic_set(&sbi->ll_agl_total, 0); sbi->ll_flags |= LL_SBI_AGL_ENABLED; + sbi->ll_flags |= LL_SBI_FAST_READ; /* root squash */ sbi->ll_squash.rsi_uid = 0; @@ -154,30 +147,36 @@ static void ll_free_sbi(struct super_block *sb) ENTRY; if (sbi != NULL) { - spin_lock(&ll_sb_lock); - list_del(&sbi->ll_list); - spin_unlock(&ll_sb_lock); if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids)) cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids); + if (sbi->ll_cache != NULL) { + cl_cache_decref(sbi->ll_cache); + sbi->ll_cache = NULL; + } OBD_FREE(sbi, sizeof(*sbi)); } EXIT; } +static inline int obd_connect_has_secctx(struct obd_connect_data *data) +{ + return data->ocd_connect_flags & OBD_CONNECT_FLAGS2 && + data->ocd_connect_flags2 & OBD_CONNECT2_FILE_SECCTX; +} + static int client_common_fill_super(struct super_block *sb, char *md, char *dt, struct vfsmount *mnt) { - struct inode *root = 0; + struct inode *root = NULL; struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_device *obd; - struct obd_capa *oc = NULL; struct obd_statfs *osfs = NULL; struct ptlrpc_request *request = NULL; struct obd_connect_data *data = NULL; struct obd_uuid *uuid; struct md_op_data *op_data; struct lustre_md lmd; - obd_valid valid; + u64 valid; int size, err, checksum; ENTRY; @@ -204,17 +203,21 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 | - OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_VBR | - OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH| + OBD_CONNECT_VBR | OBD_CONNECT_FULL20 | + OBD_CONNECT_64BITHASH | OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | - OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS | + OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS| OBD_CONNECT_MAX_EASIZE | OBD_CONNECT_FLOCK_DEAD | - OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK; + OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK | + OBD_CONNECT_OPEN_BY_FID | + OBD_CONNECT_DIR_STRIPE | + OBD_CONNECT_BULK_MBITS | + OBD_CONNECT_SUBTREE | + OBD_CONNECT_FLAGS2; - if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) - data->ocd_connect_flags |= OBD_CONNECT_SOM; + data->ocd_connect_flags2 = 0; #ifdef HAVE_LRU_RESIZE_SUPPORT if (sbi->ll_flags & LL_SBI_LRU_RESIZE) @@ -237,14 +240,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, if (sbi->ll_flags & LL_SBI_USER_XATTR) data->ocd_connect_flags |= OBD_CONNECT_XATTR; -#ifdef HAVE_MS_FLOCK_LOCK - /* force vfs to use lustre handler for flock() calls - bug 10743 */ - sb->s_flags |= MS_FLOCK_LOCK; -#endif -#ifdef MS_HAS_NEW_AOPS - sb->s_flags |= MS_HAS_NEW_AOPS; -#endif - if (sbi->ll_flags & LL_SBI_FLOCK) sbi->ll_fop = &ll_file_operations_flock; else if (sbi->ll_flags & LL_SBI_LOCALFLOCK) @@ -252,10 +247,13 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, else sbi->ll_fop = &ll_file_operations_noflock; - /* real client */ - data->ocd_connect_flags |= OBD_CONNECT_REAL; - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) - data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE; + /* always ping even if server suppress_pings */ + if (sbi->ll_flags & LL_SBI_ALWAYS_PING) + data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS; + +#ifdef HAVE_SECURITY_DENTRY_INIT_SECURITY + data->ocd_connect_flags2 |= OBD_CONNECT2_FILE_SECCTX; +#endif /* HAVE_SECURITY_DENTRY_INIT_SECURITY */ data->ocd_brw_size = MD_MAX_BRW_SIZE; @@ -302,21 +300,21 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, valid != CLIENT_CONNECT_MDT_REQD) { char *buf; - OBD_ALLOC_WAIT(buf, PAGE_CACHE_SIZE); - obd_connect_flags2str(buf, PAGE_CACHE_SIZE, - valid ^ CLIENT_CONNECT_MDT_REQD, ","); + OBD_ALLOC_WAIT(buf, PAGE_SIZE); + obd_connect_flags2str(buf, PAGE_SIZE, + valid ^ CLIENT_CONNECT_MDT_REQD, 0, ","); LCONSOLE_ERROR_MSG(0x170, "Server %s does not support " "feature(s) needed for correct operation " "of this client (%s). Please upgrade " "server or downgrade client.\n", sbi->ll_md_exp->exp_obd->obd_name, buf); - OBD_FREE(buf, PAGE_CACHE_SIZE); + OBD_FREE(buf, PAGE_SIZE); GOTO(out_md_fid, err = -EPROTO); } size = sizeof(*data); err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA), - KEY_CONN_DATA, &size, data, NULL); + KEY_CONN_DATA, &size, data); if (err) { CERROR("%s: Get connect data failed: rc = %d\n", sbi->ll_md_exp->exp_obd->obd_name, err); @@ -324,12 +322,12 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, } LASSERT(osfs->os_bsize); - sb->s_blocksize = osfs->os_bsize; - sb->s_blocksize_bits = log2(osfs->os_bsize); - sb->s_magic = LL_SUPER_MAGIC; - sb->s_maxbytes = MAX_LFS_FILESIZE; - sbi->ll_namelen = osfs->os_namelen; - sbi->ll_max_rw_chunk = LL_DEFAULT_MAX_RW_CHUNK; + sb->s_blocksize = osfs->os_bsize; + sb->s_blocksize_bits = log2(osfs->os_bsize); + sb->s_magic = LL_SUPER_MAGIC; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sbi->ll_namelen = osfs->os_namelen; + sbi->ll_mnt.mnt = current->fs->root.mnt; if ((sbi->ll_flags & LL_SBI_USER_XATTR) && !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) { @@ -351,40 +349,20 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, sbi->ll_flags &= ~LL_SBI_ACL; } - if (data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) { - if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) { - sbi->ll_flags |= LL_SBI_RMT_CLIENT; - LCONSOLE_INFO("client is set as remote by default.\n"); - } - } else { - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - sbi->ll_flags &= ~LL_SBI_RMT_CLIENT; - LCONSOLE_INFO("client claims to be remote, but server " - "rejected, forced to be local.\n"); - } - } - - if (data->ocd_connect_flags & OBD_CONNECT_MDS_CAPA) { - LCONSOLE_INFO("client enabled MDS capability!\n"); - sbi->ll_flags |= LL_SBI_MDS_CAPA; - } - - if (data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA) { - LCONSOLE_INFO("client enabled OSS capability!\n"); - sbi->ll_flags |= LL_SBI_OSS_CAPA; - } - if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH) sbi->ll_flags |= LL_SBI_64BIT_HASH; if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) - sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_CACHE_SHIFT; + sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_SHIFT; else sbi->ll_md_brw_pages = 1; if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) sbi->ll_flags |= LL_SBI_LAYOUT_LOCK; + if (obd_connect_has_secctx(data)) + sbi->ll_flags |= LL_SBI_FILE_SECCTX; + if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) { if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) { LCONSOLE_INFO("%s: disabling xattr cache due to " @@ -401,47 +379,54 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, GOTO(out_md_fid, err = -ENODEV); } - data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION | + /* pass client page size via ocd_grant_blkbits, the server should report + * back its backend blocksize for grant calculation purpose */ + data->ocd_grant_blkbits = PAGE_SHIFT; + + data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE | - OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | - OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK| - OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT | - OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR| - OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH | - OBD_CONNECT_MAXBYTES | + OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | + OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK| + OBD_CONNECT_AT | OBD_CONNECT_OSS_CAPA | + OBD_CONNECT_VBR | OBD_CONNECT_FULL20 | + OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK | - OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK; + OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK | + OBD_CONNECT_BULK_MBITS; + + data->ocd_connect_flags2 = 0; - if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) - data->ocd_connect_flags |= OBD_CONNECT_SOM; + if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_GRANT_PARAM)) + data->ocd_connect_flags |= OBD_CONNECT_GRANT_PARAM; - if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) { - /* OBD_CONNECT_CKSUM should always be set, even if checksums are - * disabled by default, because it can still be enabled on the - * fly via /proc. As a consequence, we still need to come to an - * agreement on the supported algorithms at connect time */ - data->ocd_connect_flags |= OBD_CONNECT_CKSUM; + if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) { + /* OBD_CONNECT_CKSUM should always be set, even if checksums are + * disabled by default, because it can still be enabled on the + * fly via /proc. As a consequence, we still need to come to an + * agreement on the supported algorithms at connect time */ + data->ocd_connect_flags |= OBD_CONNECT_CKSUM; if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY)) data->ocd_cksum_types = OBD_CKSUM_ADLER; else data->ocd_cksum_types = cksum_types_supported_client(); - } + } #ifdef HAVE_LRU_RESIZE_SUPPORT - data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; + data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; #endif - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) - data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE; + /* always ping even if server suppress_pings */ + if (sbi->ll_flags & LL_SBI_ALWAYS_PING) + data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS; - CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d " - "ocd_grant: %d\n", data->ocd_connect_flags, - data->ocd_version, data->ocd_grant); + CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d " + "ocd_grant: %d\n", data->ocd_connect_flags, + data->ocd_version, data->ocd_grant); - obd->obd_upcall.onu_owner = &sbi->ll_lco; - obd->obd_upcall.onu_upcall = cl_ocd_update; + obd->obd_upcall.onu_owner = &sbi->ll_lco; + obd->obd_upcall.onu_upcall = cl_ocd_update; data->ocd_brw_size = DT_MAX_BRW_SIZE; @@ -476,7 +461,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, mutex_unlock(&sbi->ll_lco.lco_lock); fid_zero(&sbi->ll_root_fid); - err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid, &oc); + err = md_get_root(sbi->ll_md_exp, get_mount_fileset(sb), + &sbi->ll_root_fid); if (err) { CERROR("cannot mds_connect: rc = %d\n", err); GOTO(out_lock_cn_cb, err); @@ -491,16 +477,13 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, sb->s_op = &lustre_super_operations; #if THREAD_SIZE >= 8192 /*b=17630*/ - sb->s_export_op = &lustre_export_operations; + sb->s_export_op = &lustre_export_operations; #endif /* make root inode * XXX: move this to after cbd setup? */ - valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMDSCAPA | - OBD_MD_FLMODEASIZE; - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) - valid |= OBD_MD_FLRMTPERM; - else if (sbi->ll_flags & LL_SBI_ACL) + valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE; + if (sbi->ll_flags & LL_SBI_ACL) valid |= OBD_MD_FLACL; OBD_ALLOC_PTR(op_data); @@ -509,12 +492,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, op_data->op_fid1 = sbi->ll_root_fid; op_data->op_mode = 0; - op_data->op_capa1 = oc; op_data->op_valid = valid; err = md_getattr(sbi->ll_md_exp, op_data, &request); - if (oc) - capa_put(oc); + OBD_FREE_PTR(op_data); if (err) { CERROR("%s: md_getattr failed for root: rc = %d\n", @@ -530,50 +511,45 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, GOTO(out_lock_cn_cb, err); } - LASSERT(fid_is_sane(&sbi->ll_root_fid)); + LASSERT(fid_is_sane(&sbi->ll_root_fid)); root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid, sbi->ll_flags & LL_SBI_32BIT_API), &lmd); - md_free_lustre_md(sbi->ll_md_exp, &lmd); - ptlrpc_req_finished(request); + md_free_lustre_md(sbi->ll_md_exp, &lmd); + ptlrpc_req_finished(request); if (IS_ERR(root)) { - if (lmd.lsm) - obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm); -#ifdef CONFIG_FS_POSIX_ACL - if (lmd.posix_acl) { - posix_acl_release(lmd.posix_acl); - lmd.posix_acl = NULL; - } -#endif - err = IS_ERR(root) ? PTR_ERR(root) : -EBADF; - root = NULL; - CERROR("lustre_lite: bad iget4 for root\n"); - GOTO(out_root, err); - } - - err = ll_close_thread_start(&sbi->ll_lcq); - if (err) { - CERROR("cannot start close thread: rc %d\n", err); - GOTO(out_root, err); - } - #ifdef CONFIG_FS_POSIX_ACL - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - rct_init(&sbi->ll_rct); - et_init(&sbi->ll_et); - } + if (lmd.posix_acl) { + posix_acl_release(lmd.posix_acl); + lmd.posix_acl = NULL; + } #endif + err = IS_ERR(root) ? PTR_ERR(root) : -EBADF; + root = NULL; + CERROR("lustre_lite: bad iget4 for root\n"); + GOTO(out_root, err); + } - checksum = sbi->ll_flags & LL_SBI_CHECKSUM; - err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM), - KEY_CHECKSUM, sizeof(checksum), &checksum, - NULL); - cl_sb_init(sb); + checksum = sbi->ll_flags & LL_SBI_CHECKSUM; + err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM), + KEY_CHECKSUM, sizeof(checksum), &checksum, + NULL); + if (err) { + CERROR("%s: Set checksum failed: rc = %d\n", + sbi->ll_dt_exp->exp_obd->obd_name, err); + GOTO(out_root, err); + } + cl_sb_init(sb); err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET), - KEY_CACHE_SET, sizeof(sbi->ll_cache), - &sbi->ll_cache, NULL); + KEY_CACHE_SET, sizeof(*sbi->ll_cache), + sbi->ll_cache, NULL); + if (err) { + CERROR("%s: Set cache_set failed: rc = %d\n", + sbi->ll_dt_exp->exp_obd->obd_name, err); + GOTO(out_root, err); + } sb->s_root = d_make_root(root); if (sb->s_root == NULL) { @@ -585,21 +561,21 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, sb->s_root->d_op = &ll_d_ops; #endif - sbi->ll_sdev_orig = sb->s_dev; + sbi->ll_sdev_orig = sb->s_dev; - /* We set sb->s_dev equal on all lustre clients in order to support - * NFS export clustering. NFSD requires that the FSID be the same - * on all clients. */ - /* s_dev is also used in lt_compare() to compare two fs, but that is - * only a node-local comparison. */ - uuid = obd_get_uuid(sbi->ll_md_exp); + /* We set sb->s_dev equal on all lustre clients in order to support + * NFS export clustering. NFSD requires that the FSID be the same + * on all clients. */ + /* s_dev is also used in lt_compare() to compare two fs, but that is + * only a node-local comparison. */ + uuid = obd_get_uuid(sbi->ll_md_exp); if (uuid != NULL) sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid)); - if (data != NULL) - OBD_FREE_PTR(data); - if (osfs != NULL) - OBD_FREE_PTR(osfs); + if (data != NULL) + OBD_FREE_PTR(data); + if (osfs != NULL) + OBD_FREE_PTR(osfs); if (proc_lustre_fs_root != NULL) { err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb, dt, md); @@ -610,79 +586,96 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, } } - RETURN(err); + RETURN(err); out_root: - if (root) - iput(root); + if (root) + iput(root); out_lock_cn_cb: obd_fid_fini(sbi->ll_dt_exp->exp_obd); out_dt: - obd_disconnect(sbi->ll_dt_exp); - sbi->ll_dt_exp = NULL; - /* Make sure all OScs are gone, since cl_cache is accessing sbi. */ - obd_zombie_barrier(); + obd_disconnect(sbi->ll_dt_exp); + sbi->ll_dt_exp = NULL; out_md_fid: obd_fid_fini(sbi->ll_md_exp->exp_obd); out_md: - obd_disconnect(sbi->ll_md_exp); - sbi->ll_md_exp = NULL; + obd_disconnect(sbi->ll_md_exp); + sbi->ll_md_exp = NULL; out: - if (data != NULL) - OBD_FREE_PTR(data); - if (osfs != NULL) - OBD_FREE_PTR(osfs); - return err; + if (data != NULL) + OBD_FREE_PTR(data); + if (osfs != NULL) + OBD_FREE_PTR(osfs); + return err; } int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize) { int size, rc; - *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL); + size = sizeof(*lmmsize); + rc = obd_get_info(NULL, sbi->ll_dt_exp, sizeof(KEY_MAX_EASIZE), + KEY_MAX_EASIZE, &size, lmmsize); + if (rc != 0) { + CERROR("%s: cannot get max LOV EA size: rc = %d\n", + sbi->ll_dt_exp->exp_obd->obd_name, rc); + RETURN(rc); + } + size = sizeof(int); rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE), - KEY_MAX_EASIZE, &size, lmmsize, NULL); + KEY_MAX_EASIZE, &size, lmmsize); if (rc) CERROR("Get max mdsize error rc %d\n", rc); RETURN(rc); } +/** + * Get the value of the default_easize parameter. + * + * \see client_obd::cl_default_mds_easize + * + * \param[in] sbi superblock info for this filesystem + * \param[out] lmmsize pointer to storage location for value + * + * \retval 0 on success + * \retval negative negated errno on failure + */ int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize) { int size, rc; size = sizeof(int); rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE), - KEY_DEFAULT_EASIZE, &size, lmmsize, NULL); + KEY_DEFAULT_EASIZE, &size, lmmsize); if (rc) CERROR("Get default mdsize error rc %d\n", rc); RETURN(rc); } -int ll_get_max_cookiesize(struct ll_sb_info *sbi, int *lmmsize) +/** + * Set the default_easize parameter to the given value. + * + * \see client_obd::cl_default_mds_easize + * + * \param[in] sbi superblock info for this filesystem + * \param[in] lmmsize the size to set + * + * \retval 0 on success + * \retval negative negated errno on failure + */ +int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize) { - int size, rc; - - size = sizeof(int); - rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_COOKIESIZE), - KEY_MAX_COOKIESIZE, &size, lmmsize, NULL); - if (rc) - CERROR("Get max cookiesize error rc %d\n", rc); - - RETURN(rc); -} + int rc; -int ll_get_default_cookiesize(struct ll_sb_info *sbi, int *lmmsize) -{ - int size, rc; + if (lmmsize < sizeof(struct lov_mds_md) || + lmmsize > OBD_MAX_DEFAULT_EA_SIZE) + return -EINVAL; - size = sizeof(int); - rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_COOKIESIZE), - KEY_DEFAULT_COOKIESIZE, &size, lmmsize, NULL); - if (rc) - CERROR("Get default cookiesize error rc %d\n", rc); + rc = obd_set_info_async(NULL, sbi->ll_md_exp, + sizeof(KEY_DEFAULT_EASIZE), KEY_DEFAULT_EASIZE, + sizeof(int), &lmmsize, NULL); RETURN(rc); } @@ -718,7 +711,7 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) " flags=0x%x, fsdata=%p, %d subdirs\n", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, - dentry->d_parent, dentry->d_inode, d_count(dentry), + dentry->d_parent, dentry->d_inode, ll_d_count(dentry), dentry->d_flags, dentry->d_fsdata, subdirs); if (dentry->d_inode != NULL) ll_dump_inode(dentry->d_inode); @@ -727,7 +720,7 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) return; list_for_each(tmp, &dentry->d_subdirs) { - struct dentry *d = list_entry(tmp, struct dentry, d_u.d_child); + struct dentry *d = list_entry(tmp, struct dentry, d_child); lustre_dump_dentry(d, recur - 1); } } @@ -737,25 +730,11 @@ static void client_common_put_super(struct super_block *sb) struct ll_sb_info *sbi = ll_s2sbi(sb); ENTRY; -#ifdef CONFIG_FS_POSIX_ACL - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - et_fini(&sbi->ll_et); - rct_fini(&sbi->ll_rct); - } -#endif - - ll_close_thread_shutdown(sbi->ll_lcq); - cl_sb_fini(sb); - list_del(&sbi->ll_conn_chain); - obd_fid_fini(sbi->ll_dt_exp->exp_obd); obd_disconnect(sbi->ll_dt_exp); sbi->ll_dt_exp = NULL; - /* wait till all OSCs are gone, since cl_cache is accessing sbi. - * see LU-2543. */ - obd_zombie_barrier(); lprocfs_unregister_mountpoint(sbi); @@ -768,22 +747,28 @@ static void client_common_put_super(struct super_block *sb) void ll_kill_super(struct super_block *sb) { - struct ll_sb_info *sbi; - - ENTRY; + struct ll_sb_info *sbi; + ENTRY; /* not init sb ?*/ - if (!(sb->s_flags & MS_ACTIVE)) - return; + if (!(sb->s_flags & MS_ACTIVE)) + return; - sbi = ll_s2sbi(sb); - /* we need restore s_dev from changed for clustred NFS before put_super - * because new kernels have cached s_dev and change sb->s_dev in - * put_super not affected real removing devices */ + sbi = ll_s2sbi(sb); + /* we need restore s_dev from changed for clustred NFS before put_super + * because new kernels have cached s_dev and change sb->s_dev in + * put_super not affected real removing devices */ if (sbi) { sb->s_dev = sbi->ll_sdev_orig; sbi->ll_umounting = 1; + + /* wait running statahead threads to quit */ + while (atomic_read(&sbi->ll_sa_running) > 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC >> 3)); + } } + EXIT; } @@ -839,30 +824,18 @@ static int ll_options(char *options, int *flags) *flags &= ~tmp; goto next; } -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 51, 0) - tmp = ll_set_opt("acl", s1, LL_SBI_ACL); - if (tmp) { - /* Ignore deprecated mount option. The client will - * always try to mount with ACL support, whether this - * is used depends on whether server supports it. */ - LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated " - "mount option 'acl'.\n"); + tmp = ll_set_opt("context", s1, 1); + if (tmp) goto next; - } - tmp = ll_set_opt("noacl", s1, LL_SBI_ACL); - if (tmp) { - LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated " - "mount option 'noacl'.\n"); + tmp = ll_set_opt("fscontext", s1, 1); + if (tmp) + goto next; + tmp = ll_set_opt("defcontext", s1, 1); + if (tmp) + goto next; + tmp = ll_set_opt("rootcontext", s1, 1); + if (tmp) goto next; - } -#else -#warning "{no}acl options have been deprecated since 1.8, please remove them" -#endif - tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT); - if (tmp) { - *flags |= tmp; - goto next; - } tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH); if (tmp) { *flags |= tmp; @@ -904,11 +877,6 @@ static int ll_options(char *options, int *flags) *flags &= ~tmp; goto next; } - tmp = ll_set_opt("som_preview", s1, LL_SBI_SOM_PREVIEW); - if (tmp) { - *flags |= tmp; - goto next; - } tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API); if (tmp) { *flags |= tmp; @@ -924,6 +892,11 @@ static int ll_options(char *options, int *flags) *flags &= ~tmp; goto next; } + tmp = ll_set_opt("always_ping", s1, LL_SBI_ALWAYS_PING); + if (tmp) { + *flags |= tmp; + goto next; + } LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n", s1); RETURN(-EINVAL); @@ -942,20 +915,10 @@ void ll_lli_init(struct ll_inode_info *lli) { lli->lli_inode_magic = LLI_INODE_MAGIC; lli->lli_flags = 0; - lli->lli_ioepoch = 0; - lli->lli_maxbytes = MAX_LFS_FILESIZE; spin_lock_init(&lli->lli_lock); lli->lli_posix_acl = NULL; - lli->lli_remote_perms = NULL; - mutex_init(&lli->lli_rmtperm_mutex); /* Do not set lli_fid, it has been initialized already. */ fid_zero(&lli->lli_pfid); - INIT_LIST_HEAD(&lli->lli_close_list); - INIT_LIST_HEAD(&lli->lli_oss_capas); - atomic_set(&lli->lli_open_count, 0); - lli->lli_mds_capa = NULL; - lli->lli_rmtperm_time = 0; - lli->lli_pending_och = NULL; lli->lli_mds_read_och = NULL; lli->lli_mds_write_och = NULL; lli->lli_mds_exec_och = NULL; @@ -964,9 +927,8 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_open_fd_exec_count = 0; mutex_init(&lli->lli_och_mutex); spin_lock_init(&lli->lli_agl_lock); - lli->lli_has_smd = false; spin_lock_init(&lli->lli_layout_lock); - ll_layout_version_set(lli, LL_LAYOUT_GEN_NONE); + ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE); lli->lli_clob = NULL; init_rwsem(&lli->lli_xattrs_list_rwsem); @@ -979,11 +941,13 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_sai = NULL; spin_lock_init(&lli->lli_sa_lock); lli->lli_opendir_pid = 0; + lli->lli_sa_enabled = 0; + lli->lli_def_stripe_offset = -1; } else { mutex_init(&lli->lli_size_mutex); lli->lli_symlink_name = NULL; init_rwsem(&lli->lli_trunc_sem); - mutex_init(&lli->lli_write_mutex); + range_lock_tree_init(&lli->lli_write_tree); init_rwsem(&lli->lli_glimpse_sem); lli->lli_glimpse_time = 0; INIT_LIST_HEAD(&lli->lli_agl_list); @@ -991,6 +955,7 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_async_rc = 0; } mutex_init(&lli->lli_layout_mutex); + memset(lli->lli_jobid, 0, LUSTRE_JOBID_SIZE); } static inline int ll_bdi_register(struct backing_dev_info *bdi) @@ -1039,7 +1004,11 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) if (err) GOTO(out_free, err); lsi->lsi_flags |= LSI_BDI_INITIALIZED; +#ifdef HAVE_BDI_CAP_MAP_COPY lsi->lsi_bdi.capabilities = BDI_CAP_MAP_COPY; +#else + lsi->lsi_bdi.capabilities = 0; +#endif err = ll_bdi_register(&lsi->lsi_bdi); if (err) GOTO(out_free, err); @@ -1084,19 +1053,25 @@ int ll_fill_super(struct super_block *sb, struct vfsmount *mnt) /* connections, registrations, sb setup */ err = client_common_fill_super(sb, md, dt, mnt); + if (err < 0) + GOTO(out_free, err); + + sbi->ll_client_common_fill_super_succeeded = 1; out_free: - if (md) - OBD_FREE(md, strlen(lprof->lp_md) + instlen + 2); - if (dt) - OBD_FREE(dt, strlen(lprof->lp_dt) + instlen + 2); - if (err) - ll_put_super(sb); - else if (sbi->ll_flags & LL_SBI_VERBOSE) - LCONSOLE_WARN("Mounted %s\n", profilenm); + if (md) + OBD_FREE(md, strlen(lprof->lp_md) + instlen + 2); + if (dt) + OBD_FREE(dt, strlen(lprof->lp_dt) + instlen + 2); + if (lprof != NULL) + class_put_profile(lprof); + if (err) + ll_put_super(sb); + else if (sbi->ll_flags & LL_SBI_VERBOSE) + LCONSOLE_WARN("Mounted %s\n", profilenm); - OBD_FREE_PTR(cfg); - RETURN(err); + OBD_FREE_PTR(cfg); + RETURN(err); } /* ll_fill_super */ void ll_put_super(struct super_block *sb) @@ -1106,13 +1081,12 @@ void ll_put_super(struct super_block *sb) struct lustre_sb_info *lsi = s2lsi(sb); struct ll_sb_info *sbi = ll_s2sbi(sb); char *profilenm = get_profile_name(sb); - int ccc_count, next, force = 1, rc = 0; + long ccc_count; + int next, force = 1, rc = 0; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm); - ll_print_capa_stat(sbi); - cfg.cfg_instance = sb; lustre_end_log(sb, profilenm, &cfg); @@ -1128,14 +1102,14 @@ void ll_put_super(struct super_block *sb) /* Wait for unstable pages to be committed to stable storage */ if (force == 0) { struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); - rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq, - atomic_read(&sbi->ll_cache.ccc_unstable_nr) == 0, + rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq, + atomic_long_read(&sbi->ll_cache->ccc_unstable_nr) == 0, &lwi); } - ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr); + ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr); if (force == 0 && rc != -EINTR) - LASSERTF(ccc_count == 0, "count: %i\n", ccc_count); + LASSERTF(ccc_count == 0, "count: %li\n", ccc_count); /* We need to set force before the lov_disconnect in @@ -1148,10 +1122,10 @@ void ll_put_super(struct super_block *sb) } } - if (sbi->ll_lcq) { - /* Only if client_common_fill_super succeeded */ - client_common_put_super(sb); - } + if (sbi->ll_client_common_fill_super_succeeded) { + /* Only if client_common_fill_super succeeded */ + client_common_put_super(sb); + } next = 0; while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) { @@ -1206,7 +1180,7 @@ struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock) return inode; } -static void ll_dir_clear_lsm_md(struct inode *inode) +void ll_dir_clear_lsm_md(struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); @@ -1241,7 +1215,7 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb, struct lmv_stripe_md *lsm = md->lmv; inode->i_mode = (inode->i_mode & ~S_IFMT) | - (body->mode & S_IFMT); + (body->mbo_mode & S_IFMT); LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode "DFID"\n", PFID(fid)); @@ -1250,9 +1224,11 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb, LTIME_S(inode->i_ctime) = 0; inode->i_rdev = 0; +#ifdef HAVE_BACKING_DEV_INFO /* initializing backing dev info. */ inode->i_mapping->backing_dev_info = &s2lsi(inode->i_sb)->lsi_bdi; +#endif inode->i_op = &ll_dir_inode_operations; inode->i_fop = &ll_dir_operations; lli->lli_fid = *fid; @@ -1260,7 +1236,7 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb, LASSERT(lsm != NULL); /* master object FID */ - lli->lli_pfid = body->fid1; + lli->lli_pfid = body->mbo_fid1; CDEBUG(D_INODE, "lli %p slave "DFID" master "DFID"\n", lli, PFID(fid), PFID(&lli->lli_pfid)); unlock_new_inode(inode); @@ -1302,10 +1278,7 @@ static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md) } } - /* Here is where the lsm is being initialized(fill lmo_info) after - * client retrieve MD stripe information from MDT. */ - return md_update_lsm_md(ll_i2mdexp(inode), lsm, md->body, - ll_md_blocking_ast); + return 0; } static inline int lli_lsm_md_eq(const struct lmv_stripe_md *lsm_md1, @@ -1355,15 +1328,42 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) /* set the directory layout */ if (lli->lli_lsm_md == NULL) { + struct cl_attr *attr; rc = ll_init_lsm_md(inode, md); if (rc != 0) RETURN(rc); - lli->lli_lsm_md = lsm; - /* set lsm_md to NULL, so the following free lustre_md + /* set md->lmv to NULL, so the following free lustre_md * will not free this lsm */ md->lmv = NULL; + lli->lli_lsm_md = lsm; + + OBD_ALLOC_PTR(attr); + if (attr == NULL) + RETURN(-ENOMEM); + + /* validate the lsm */ + rc = md_merge_attr(ll_i2mdexp(inode), lsm, attr, + ll_md_blocking_ast); + if (rc != 0) { + OBD_FREE_PTR(attr); + RETURN(rc); + } + + if (md->body->mbo_valid & OBD_MD_FLNLINK) + md->body->mbo_nlink = attr->cat_nlink; + if (md->body->mbo_valid & OBD_MD_FLSIZE) + md->body->mbo_size = attr->cat_size; + if (md->body->mbo_valid & OBD_MD_FLATIME) + md->body->mbo_atime = attr->cat_atime; + if (md->body->mbo_valid & OBD_MD_FLCTIME) + md->body->mbo_ctime = attr->cat_ctime; + if (md->body->mbo_valid & OBD_MD_FLMTIME) + md->body->mbo_mtime = attr->cat_mtime; + + OBD_FREE_PTR(attr); + CDEBUG(D_INODE, "Set lsm %p magic %x to "DFID"\n", lsm, lsm->lsm_md_magic, PFID(ll_inode2fid(inode))); RETURN(0); @@ -1374,12 +1374,11 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) struct lmv_stripe_md *old_lsm = lli->lli_lsm_md; int idx; - CERROR("%s: lmv layout mismatch "DFID"(%p)/"DFID"(%p)" + CERROR("%s: inode "DFID"(%p)'s lmv layout mismatch (%p)/(%p)" "magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d" "hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(&lsm->lsm_md_master_fid), lsm, - PFID(&old_lsm->lsm_md_master_fid), old_lsm, + ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), + inode, lsm, old_lsm, lsm->lsm_md_magic, old_lsm->lsm_md_magic, lsm->lsm_md_stripe_count, old_lsm->lsm_md_stripe_count, @@ -1406,10 +1405,7 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) RETURN(-EIO); } - rc = md_update_lsm_md(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md, - md->body, ll_md_blocking_ast); - - RETURN(rc); + RETURN(0); } void ll_clear_inode(struct inode *inode) @@ -1428,9 +1424,6 @@ void ll_clear_inode(struct inode *inode) LASSERT(lli->lli_opendir_pid == 0); } - spin_lock(&lli->lli_lock); - ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK; - spin_unlock(&lli->lli_lock); md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode)); LASSERT(!lli->lli_open_fd_write_count); @@ -1452,24 +1445,15 @@ void ll_clear_inode(struct inode *inode) ll_xattr_cache_destroy(inode); - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - LASSERT(lli->lli_posix_acl == NULL); - if (lli->lli_remote_perms) { - free_rmtperm_hash(lli->lli_remote_perms); - lli->lli_remote_perms = NULL; - } - } #ifdef CONFIG_FS_POSIX_ACL - else if (lli->lli_posix_acl) { + if (lli->lli_posix_acl) { LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1); - LASSERT(lli->lli_remote_perms == NULL); posix_acl_release(lli->lli_posix_acl); lli->lli_posix_acl = NULL; } #endif lli->lli_inode_magic = LLI_INODE_DEAD; - ll_clear_inode_capas(inode); if (S_ISDIR(inode->i_mode)) ll_dir_clear_lsm_md(inode); else if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) @@ -1480,13 +1464,11 @@ void ll_clear_inode(struct inode *inode) * cl_object still uses inode lsm. */ cl_inode_fini(inode); - lli->lli_has_smd = false; EXIT; } -int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, - struct md_open_data **mod) +static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data) { struct lustre_md md; struct inode *inode = dentry->d_inode; @@ -1500,8 +1482,7 @@ int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); - rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, - &request, mod); + rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &request); if (rc) { ptlrpc_req_finished(request); if (rc == -ENOENT) { @@ -1532,72 +1513,19 @@ int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, /* inode size will be in ll_setattr_ost, can't do it now since dirty * cache is not cleared yet. */ op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE); + if (S_ISREG(inode->i_mode)) + inode_lock(inode); rc = simple_setattr(dentry, &op_data->op_attr); + if (S_ISREG(inode->i_mode)) + inode_unlock(inode); op_data->op_attr.ia_valid = ia_valid; - /* Extract epoch data if obtained. */ - op_data->op_handle = md.body->handle; - op_data->op_ioepoch = md.body->ioepoch; - rc = ll_update_inode(inode, &md); ptlrpc_req_finished(request); RETURN(rc); } -/* Close IO epoch and send Size-on-MDS attribute update. */ -static int ll_setattr_done_writing(struct inode *inode, - struct md_op_data *op_data, - struct md_open_data *mod) -{ - struct ll_inode_info *lli = ll_i2info(inode); - int rc = 0; - ENTRY; - - LASSERT(op_data != NULL); - if (!S_ISREG(inode->i_mode)) - RETURN(0); - - CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n", - op_data->op_ioepoch, PFID(&lli->lli_fid)); - - op_data->op_flags = MF_EPOCH_CLOSE; - ll_done_writing_attr(inode, op_data); - ll_pack_inode2opdata(inode, op_data, NULL); - - rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod); - if (rc == -EAGAIN) { - /* MDS has instructed us to obtain Size-on-MDS attribute - * from OSTs and send setattr to back to MDS. */ - rc = ll_som_update(inode, op_data); - } else if (rc) { - CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n", - ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name, - PFID(ll_inode2fid(inode)), rc); - } - RETURN(rc); -} - -static int ll_setattr_ost(struct inode *inode, struct iattr *attr) -{ - struct obd_capa *capa; - int rc; - - if (attr->ia_valid & ATTR_SIZE) - capa = ll_osscapa_get(inode, CAPA_OPC_OSS_TRUNC); - else - capa = ll_mdscapa_get(inode); - - rc = cl_setattr_ost(inode, attr, capa); - - if (attr->ia_valid & ATTR_SIZE) - ll_truncate_free_capa(capa); - else - capa_put(capa); - - return rc; -} - /* If this inode has objects allocated to it (lsm != NULL), then the OST * object(s) determine the file size and mtime. Otherwise, the MDS will * keep these values until such a time that objects are allocated for it. @@ -1618,9 +1546,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) struct inode *inode = dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct md_op_data *op_data = NULL; - struct md_open_data *mod = NULL; - bool file_is_released = false; - int rc = 0, rc1 = 0; + int rc = 0; ENTRY; CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, " @@ -1639,7 +1565,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) * OST maximum object size and number of stripes. This * needs another check in addition to the VFS check above. */ if (attr->ia_size > ll_file_maxbytes(inode)) { - CDEBUG(D_INODE,"file "DFID" too large %llu > "LPU64"\n", + CDEBUG(D_INODE,"file "DFID" too large %llu > %llu\n", PFID(&lli->lli_fid), attr->ia_size, ll_file_maxbytes(inode)); RETURN(-EFBIG); @@ -1656,18 +1582,19 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) } /* We mark all of the fields "set" so MDS/OST does not re-set them */ - if (attr->ia_valid & ATTR_CTIME) { - attr->ia_ctime = CFS_CURRENT_TIME; + if (!(attr->ia_valid & ATTR_CTIME_SET) && + (attr->ia_valid & ATTR_CTIME)) { + attr->ia_ctime = CURRENT_TIME; attr->ia_valid |= ATTR_CTIME_SET; } if (!(attr->ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) { - attr->ia_atime = CFS_CURRENT_TIME; + attr->ia_atime = CURRENT_TIME; attr->ia_valid |= ATTR_ATIME_SET; } if (!(attr->ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) { - attr->ia_mtime = CFS_CURRENT_TIME; + attr->ia_mtime = CURRENT_TIME; attr->ia_valid |= ATTR_MTIME_SET; } @@ -1676,102 +1603,87 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), cfs_time_current_sec()); - /* We always do an MDS RPC, even if we're only changing the size; - * only the MDS knows whether truncate() should fail with -ETXTBUSY */ - - OBD_ALLOC_PTR(op_data); - if (op_data == NULL) - RETURN(-ENOMEM); - - if (!S_ISDIR(inode->i_mode)) { + if (S_ISREG(inode->i_mode)) { if (attr->ia_valid & ATTR_SIZE) inode_dio_write_done(inode); - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); } - /* truncate on a released file must failed with -ENODATA, - * so size must not be set on MDS for released file - * but other attributes must be set - */ - if (S_ISREG(inode->i_mode)) { - struct lov_stripe_md *lsm; - __u32 gen; - - ll_layout_refresh(inode, &gen); - lsm = ccc_inode_lsm_get(inode); - if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED) - file_is_released = true; - ccc_inode_lsm_put(inode, lsm); - - if (!hsm_import && attr->ia_valid & ATTR_SIZE) { - if (file_is_released) { - rc = ll_layout_restore(inode, 0, attr->ia_size); - if (rc < 0) - GOTO(out, rc); - - file_is_released = false; - ll_layout_refresh(inode, &gen); - } + /* We always do an MDS RPC, even if we're only changing the size; + * only the MDS knows whether truncate() should fail with -ETXTBUSY */ - /* If we are changing file size, file content is - * modified, flag it. */ - attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - op_data->op_bias |= MDS_DATA_MODIFIED; - } - } + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + GOTO(out, rc = -ENOMEM); - memcpy(&op_data->op_attr, attr, sizeof(*attr)); + if (!hsm_import && attr->ia_valid & ATTR_SIZE) { + /* If we are changing file size, file content is + * modified, flag it. */ + attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; + op_data->op_bias |= MDS_DATA_MODIFIED; + ll_file_clear_flag(lli, LLIF_DATA_MODIFIED); + } - /* Open epoch for truncate. */ - if (exp_connect_som(ll_i2mdexp(inode)) && !hsm_import && - (attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) - op_data->op_flags = MF_EPOCH_OPEN; + op_data->op_attr = *attr; - rc = ll_md_setattr(dentry, op_data, &mod); + rc = ll_md_setattr(dentry, op_data); if (rc) GOTO(out, rc); - /* RPC to MDT is sent, cancel data modification flag */ - if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) { - spin_lock(&lli->lli_lock); - lli->lli_flags &= ~LLIF_DATA_MODIFIED; - spin_unlock(&lli->lli_lock); - } - - ll_ioepoch_open(lli, op_data->op_ioepoch); - if (!S_ISREG(inode->i_mode) || file_is_released) + if (!S_ISREG(inode->i_mode) || hsm_import) GOTO(out, rc = 0); if (attr->ia_valid & (ATTR_SIZE | ATTR_ATIME | ATTR_ATIME_SET | - ATTR_MTIME | ATTR_MTIME_SET)) { + ATTR_MTIME | ATTR_MTIME_SET | + ATTR_CTIME | ATTR_CTIME_SET)) { /* For truncate and utimes sending attributes to OSTs, setting * mtime/atime to the past will be performed under PW [0:EOF] * extent lock (new_size:EOF for truncate). It may seem * excessive to send mtime/atime updates to OSTs when not * setting times to past, but it is necessary due to possible * time de-synchronization between MDT inode and OST objects */ - if (attr->ia_valid & ATTR_SIZE) - down_write(&lli->lli_trunc_sem); - rc = ll_setattr_ost(inode, attr); - if (attr->ia_valid & ATTR_SIZE) - up_write(&lli->lli_trunc_sem); + rc = cl_setattr_ost(lli->lli_clob, attr, 0); + } + + /* If the file was restored, it needs to set dirty flag. + * + * We've already sent MDS_DATA_MODIFIED flag in + * ll_md_setattr() for truncate. However, the MDT refuses to + * set the HS_DIRTY flag on released files, so we have to set + * it again if the file has been restored. Please check how + * LLIF_DATA_MODIFIED is set in vvp_io_setattr_fini(). + * + * Please notice that if the file is not released, the previous + * MDS_DATA_MODIFIED has taken effect and usually + * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()). + * This way we can save an RPC for common open + trunc + * operation. */ + if (ll_file_test_and_clear_flag(lli, LLIF_DATA_MODIFIED)) { + struct hsm_state_set hss = { + .hss_valid = HSS_SETMASK, + .hss_setmask = HS_DIRTY, + }; + int rc2; + + rc2 = ll_hsm_state_set(inode, &hss); + /* truncate and write can happen at the same time, so that + * the file can be set modified even though the file is not + * restored from released state, and ll_hsm_state_set() is + * not applicable for the file, and rc2 < 0 is normal in this + * case. */ + if (rc2 < 0) + CDEBUG(D_INFO, DFID "HSM set dirty failed: rc2 = %d\n", + PFID(ll_inode2fid(inode)), rc2); } + EXIT; out: - if (op_data) { - if (op_data->op_ioepoch) { - rc1 = ll_setattr_done_writing(inode, op_data, mod); - if (!rc) - rc = rc1; - } + if (op_data != NULL) ll_finish_md_op_data(op_data); - } - if (!S_ISDIR(inode->i_mode)) { - mutex_lock(&inode->i_mutex); + + if (S_ISREG(inode->i_mode)) { + inode_lock(inode); if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) inode_dio_wait(inode); } @@ -1828,7 +1740,7 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, osfs->os_type = sb->s_magic; - CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n", + CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n", osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files); if (sbi->ll_flags & LL_SBI_LAZYSTATFS) @@ -1840,7 +1752,7 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, RETURN(rc); } - CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n", + CDEBUG(D_SUPER, "OSC blocks %llu/%llu objects %llu/%llu\n", obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree, obd_osfs.os_files); @@ -1868,7 +1780,7 @@ int ll_statfs(struct dentry *de, struct kstatfs *sfs) __u64 fsid = huge_encode_dev(sb->s_dev); int rc; - CDEBUG(D_VFSTRACE, "VFS Op: at "LPU64" jiffies\n", get_jiffies_64()); + CDEBUG(D_VFSTRACE, "VFS Op: at %llu jiffies\n", get_jiffies_64()); ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1); /* Some amount of caching on the client is allowed */ @@ -1924,19 +1836,10 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) { struct ll_inode_info *lli = ll_i2info(inode); struct mdt_body *body = md->body; - struct lov_stripe_md *lsm = md->lsm; struct ll_sb_info *sbi = ll_i2sbi(inode); - LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); - if (lsm != NULL) { - if (!lli->lli_has_smd && - !(sbi->ll_flags & LL_SBI_LAYOUT_LOCK)) - cl_file_inode_init(inode, md); - - lli->lli_maxbytes = lsm->lsm_maxbytes; - if (lli->lli_maxbytes > MAX_LFS_FILESIZE) - lli->lli_maxbytes = MAX_LFS_FILESIZE; - } + if (body->mbo_valid & OBD_MD_FLEASIZE) + cl_file_inode_init(inode, md); if (S_ISDIR(inode->i_mode)) { int rc; @@ -1946,12 +1849,8 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) return rc; } - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - if (body->valid & OBD_MD_FLRMTPERM) - ll_update_remote_perm(inode, md->remote_perm); - } #ifdef CONFIG_FS_POSIX_ACL - else if (body->valid & OBD_MD_FLACL) { + if (body->mbo_valid & OBD_MD_FLACL) { spin_lock(&lli->lli_lock); if (lli->lli_posix_acl) posix_acl_release(lli->lli_posix_acl); @@ -1959,126 +1858,93 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) spin_unlock(&lli->lli_lock); } #endif - inode->i_ino = cl_fid_build_ino(&body->fid1, + inode->i_ino = cl_fid_build_ino(&body->mbo_fid1, sbi->ll_flags & LL_SBI_32BIT_API); - inode->i_generation = cl_fid_build_gen(&body->fid1); + inode->i_generation = cl_fid_build_gen(&body->mbo_fid1); - if (body->valid & OBD_MD_FLATIME) { - if (body->atime > LTIME_S(inode->i_atime)) - LTIME_S(inode->i_atime) = body->atime; - lli->lli_lvb.lvb_atime = body->atime; - } - if (body->valid & OBD_MD_FLMTIME) { - if (body->mtime > LTIME_S(inode->i_mtime)) { - CDEBUG(D_INODE, "setting ino %lu mtime from %lu " - "to "LPU64"\n", inode->i_ino, - LTIME_S(inode->i_mtime), body->mtime); - LTIME_S(inode->i_mtime) = body->mtime; - } - lli->lli_lvb.lvb_mtime = body->mtime; - } - if (body->valid & OBD_MD_FLCTIME) { - if (body->ctime > LTIME_S(inode->i_ctime)) - LTIME_S(inode->i_ctime) = body->ctime; - lli->lli_lvb.lvb_ctime = body->ctime; - } - if (body->valid & OBD_MD_FLMODE) - inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); - if (body->valid & OBD_MD_FLTYPE) - inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT); - LASSERT(inode->i_mode != 0); - if (S_ISREG(inode->i_mode)) { - inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, LL_MAX_BLKSIZE_BITS); - } else { - inode->i_blkbits = inode->i_sb->s_blocksize_bits; - } - if (body->valid & OBD_MD_FLUID) - inode->i_uid = make_kuid(&init_user_ns, body->uid); - if (body->valid & OBD_MD_FLGID) - inode->i_gid = make_kgid(&init_user_ns, body->gid); - if (body->valid & OBD_MD_FLFLAGS) - inode->i_flags = ll_ext_to_inode_flags(body->flags); - if (body->valid & OBD_MD_FLNLINK) - set_nlink(inode, body->nlink); - if (body->valid & OBD_MD_FLRDEV) - inode->i_rdev = old_decode_dev(body->rdev); - - if (body->valid & OBD_MD_FLID) { + if (body->mbo_valid & OBD_MD_FLATIME) { + if (body->mbo_atime > LTIME_S(inode->i_atime)) + LTIME_S(inode->i_atime) = body->mbo_atime; + lli->lli_atime = body->mbo_atime; + } + + if (body->mbo_valid & OBD_MD_FLMTIME) { + if (body->mbo_mtime > LTIME_S(inode->i_mtime)) { + CDEBUG(D_INODE, "setting ino %lu mtime from %lu " + "to %llu\n", inode->i_ino, + LTIME_S(inode->i_mtime), body->mbo_mtime); + LTIME_S(inode->i_mtime) = body->mbo_mtime; + } + lli->lli_mtime = body->mbo_mtime; + } + + if (body->mbo_valid & OBD_MD_FLCTIME) { + if (body->mbo_ctime > LTIME_S(inode->i_ctime)) + LTIME_S(inode->i_ctime) = body->mbo_ctime; + lli->lli_ctime = body->mbo_ctime; + } + + if (body->mbo_valid & OBD_MD_FLMODE) + inode->i_mode = (inode->i_mode & S_IFMT) | + (body->mbo_mode & ~S_IFMT); + + if (body->mbo_valid & OBD_MD_FLTYPE) + inode->i_mode = (inode->i_mode & ~S_IFMT) | + (body->mbo_mode & S_IFMT); + + LASSERT(inode->i_mode != 0); + if (S_ISREG(inode->i_mode)) + inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, + LL_MAX_BLKSIZE_BITS); + else + inode->i_blkbits = inode->i_sb->s_blocksize_bits; + + if (body->mbo_valid & OBD_MD_FLUID) + inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid); + if (body->mbo_valid & OBD_MD_FLGID) + inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid); + if (body->mbo_valid & OBD_MD_FLFLAGS) + inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags); + if (body->mbo_valid & OBD_MD_FLNLINK) + set_nlink(inode, body->mbo_nlink); + if (body->mbo_valid & OBD_MD_FLRDEV) + inode->i_rdev = old_decode_dev(body->mbo_rdev); + + if (body->mbo_valid & OBD_MD_FLID) { /* FID shouldn't be changed! */ if (fid_is_sane(&lli->lli_fid)) { - LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1), + LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1), "Trying to change FID "DFID " to the "DFID", inode "DFID"(%p)\n", - PFID(&lli->lli_fid), PFID(&body->fid1), + PFID(&lli->lli_fid), PFID(&body->mbo_fid1), PFID(ll_inode2fid(inode)), inode); } else { - lli->lli_fid = body->fid1; + lli->lli_fid = body->mbo_fid1; } } - LASSERT(fid_seq(&lli->lli_fid) != 0); - - if (body->valid & OBD_MD_FLSIZE) { - if (exp_connect_som(ll_i2mdexp(inode)) && - S_ISREG(inode->i_mode)) { - struct lustre_handle lockh; - ldlm_mode_t mode; - - /* As it is possible a blocking ast has been processed - * by this time, we need to check there is an UPDATE - * lock on the client and set LLIF_MDS_SIZE_LOCK holding - * it. */ - mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE, - &lockh, LDLM_FL_CBPENDING, - LCK_CR | LCK_CW | - LCK_PR | LCK_PW); - if (mode) { - if (lli->lli_flags & (LLIF_DONE_WRITING | - LLIF_EPOCH_PENDING | - LLIF_SOM_DIRTY)) { - CERROR("%s: inode "DFID" flags %u still" - " has size authority! do not " - "trust the size from MDS\n", - sbi->ll_md_exp->exp_obd->obd_name, - PFID(ll_inode2fid(inode)), - lli->lli_flags); - } else { - /* Use old size assignment to avoid - * deadlock bz14138 & bz14326 */ - i_size_write(inode, body->size); - spin_lock(&lli->lli_lock); - lli->lli_flags |= LLIF_MDS_SIZE_LOCK; - spin_unlock(&lli->lli_lock); - } - ldlm_lock_decref(&lockh, mode); - } - } else { - /* Use old size assignment to avoid - * deadlock bz14138 & bz14326 */ - i_size_write(inode, body->size); - - CDEBUG(D_VFSTRACE, - "inode="DFID", updating i_size %llu\n", - PFID(ll_inode2fid(inode)), - (unsigned long long)body->size); - } + LASSERT(fid_seq(&lli->lli_fid) != 0); - if (body->valid & OBD_MD_FLBLOCKS) - inode->i_blocks = body->blocks; - } + if (body->mbo_valid & OBD_MD_FLSIZE) { + i_size_write(inode, body->mbo_size); - if (body->valid & OBD_MD_FLMDSCAPA) { - LASSERT(md->mds_capa); - ll_add_capa(inode, md->mds_capa); - } - if (body->valid & OBD_MD_FLOSSCAPA) { - LASSERT(md->oss_capa); - ll_add_capa(inode, md->oss_capa); - } + CDEBUG(D_VFSTRACE, "inode="DFID", updating i_size %llu\n", + PFID(ll_inode2fid(inode)), + (unsigned long long)body->mbo_size); - if (body->valid & OBD_MD_TSTATE) { - if (body->t_state & MS_RESTORE) - lli->lli_flags |= LLIF_FILE_RESTORING; + if (body->mbo_valid & OBD_MD_FLBLOCKS) + inode->i_blocks = body->mbo_blocks; + } + + if (body->mbo_valid & OBD_MD_TSTATE) { + /* Set LLIF_FILE_RESTORING if restore ongoing and + * clear it when done to ensure to start again + * glimpsing updated attrs + */ + if (body->mbo_t_state & MS_RESTORE) + ll_file_set_flag(lli, LLIF_FILE_RESTORING); + else + ll_file_clear_flag(lli, LLIF_FILE_RESTORING); } return 0; @@ -2094,8 +1960,6 @@ int ll_read_inode2(struct inode *inode, void *opaque) CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n", PFID(&lli->lli_fid), inode); - LASSERT(!lli->lli_has_smd); - /* Core attributes from the MDS first. This is a new inode, and * the VFS doesn't zero times in the core inode so we have to do * it ourselves. They will be overwritten by either MDS or OST @@ -2110,10 +1974,10 @@ int ll_read_inode2(struct inode *inode, void *opaque) /* OIDEBUG(inode); */ - /* initializing backing dev info. */ - inode->i_mapping->backing_dev_info = &s2lsi(inode->i_sb)->lsi_bdi; - - +#ifdef HAVE_BACKING_DEV_INFO + /* initializing backing dev info. */ + inode->i_mapping->backing_dev_info = &s2lsi(inode->i_sb)->lsi_bdi; +#endif if (S_ISREG(inode->i_mode)) { struct ll_sb_info *sbi = ll_i2sbi(inode); inode->i_op = &ll_file_inode_operations; @@ -2141,28 +2005,19 @@ int ll_read_inode2(struct inode *inode, void *opaque) void ll_delete_inode(struct inode *inode) { - struct cl_inode_info *lli = cl_i2info(inode); + struct ll_inode_info *lli = ll_i2info(inode); ENTRY; if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL) - /* discard all dirty pages before truncating them, required by - * osc_extent implementation at LU-1030. */ - cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, - CL_FSYNC_DISCARD, 1); - - truncate_inode_pages(&inode->i_data, 0); - - /* Workaround for LU-118 */ - if (inode->i_data.nrpages) { - spin_lock_irq(&inode->i_data.tree_lock); - spin_unlock_irq(&inode->i_data.tree_lock); - LASSERTF(inode->i_data.nrpages == 0, - "inode="DFID"(%p) nrpages=%lu, see " - "http://jira.whamcloud.com/browse/LU-118\n", - PFID(ll_inode2fid(inode)), inode, - inode->i_data.nrpages); - } - /* Workaround end */ + /* It is last chance to write out dirty pages, + * otherwise we may lose data while umount */ + cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1); + + truncate_inode_pages_final(&inode->i_data); + + LASSERTF(inode->i_data.nrpages == 0, "inode="DFID"(%p) nrpages=%lu, " + "see https://jira.hpdd.intel.com/browse/LU-118\n", + PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages); #ifdef HAVE_SBOPS_EVICT_INODE ll_clear_inode(inode); @@ -2203,29 +2058,28 @@ int ll_iocontrol(struct inode *inode, struct file *file, body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - flags = body->flags; + flags = body->mbo_flags; ptlrpc_req_finished(req); - RETURN(put_user(flags, (int *)arg)); + RETURN(put_user(flags, (int __user *)arg)); } case FSFILT_IOC_SETFLAGS: { - struct lov_stripe_md *lsm; - struct obd_info oinfo = { { { 0 } } }; - struct md_op_data *op_data; + struct iattr *attr; + struct md_op_data *op_data; + struct cl_object *obj; - if (get_user(flags, (int *)arg)) - RETURN(-EFAULT); + if (get_user(flags, (int __user *)arg)) + RETURN(-EFAULT); op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags; + op_data->op_attr_flags = flags; op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; - rc = md_setattr(sbi->ll_md_exp, op_data, - NULL, 0, NULL, 0, &req, NULL); + rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &req); ll_finish_md_op_data(op_data); ptlrpc_req_finished(req); if (rc) @@ -2233,32 +2087,18 @@ int ll_iocontrol(struct inode *inode, struct file *file, inode->i_flags = ll_ext_to_inode_flags(flags); - lsm = ccc_inode_lsm_get(inode); - if (!lsm_has_objects(lsm)) { - ccc_inode_lsm_put(inode, lsm); + obj = ll_i2info(inode)->lli_clob; + if (obj == NULL) RETURN(0); - } - OBDO_ALLOC(oinfo.oi_oa); - if (!oinfo.oi_oa) { - ccc_inode_lsm_put(inode, lsm); + OBD_ALLOC_PTR(attr); + if (attr == NULL) RETURN(-ENOMEM); - } - oinfo.oi_md = lsm; - oinfo.oi_oa->o_oi = lsm->lsm_oi; - oinfo.oi_oa->o_flags = flags; - oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | - OBD_MD_FLGROUP; - oinfo.oi_capa = ll_mdscapa_get(inode); - obdo_set_parent_fid(oinfo.oi_oa, &ll_i2info(inode)->lli_fid); - rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL); - capa_put(oinfo.oi_capa); - OBDO_FREE(oinfo.oi_oa); - ccc_inode_lsm_put(inode, lsm); - - if (rc && rc != -EPERM && rc != -EACCES) - CERROR("osc_setattr_async fails: rc = %d\n", rc); + attr->ia_valid = ATTR_ATTR_FLAG; + rc = cl_setattr_ost(obj, attr, flags); + + OBD_FREE_PTR(attr); RETURN(rc); } default: @@ -2290,6 +2130,8 @@ void ll_umount_begin(struct super_block *sb) struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_device *obd; struct obd_ioctl_data *ioc_data; + struct l_wait_info lwi; + wait_queue_head_t waitq; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb, @@ -2297,7 +2139,7 @@ void ll_umount_begin(struct super_block *sb) obd = class_exp2obd(sbi->ll_md_exp); if (obd == NULL) { - CERROR("Invalid MDC connection handle "LPX64"\n", + CERROR("Invalid MDC connection handle %#llx\n", sbi->ll_md_exp->exp_handle.h_cookie); EXIT; return; @@ -2306,7 +2148,7 @@ void ll_umount_begin(struct super_block *sb) obd = class_exp2obd(sbi->ll_dt_exp); if (obd == NULL) { - CERROR("Invalid LOV connection handle "LPX64"\n", + CERROR("Invalid LOV connection handle %#llx\n", sbi->ll_dt_exp->exp_handle.h_cookie); EXIT; return; @@ -2325,10 +2167,14 @@ void ll_umount_begin(struct super_block *sb) } /* Really, we'd like to wait until there are no requests outstanding, - * and then continue. For now, we just invalidate the requests, - * schedule() and sleep one second if needed, and hope. + * and then continue. For now, we just periodically checking for vfs + * to decrement mnt_cnt and hope to finish it within 10sec. */ - schedule(); + init_waitqueue_head(&waitq); + lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(10), + cfs_time_seconds(1), NULL, NULL); + l_wait_event(waitq, may_umount(sbi->ll_mnt.mnt), &lwi); + EXIT; } @@ -2364,20 +2210,63 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } +/** + * Cleanup the open handle that is cached on MDT-side. + * + * For open case, the client side open handling thread may hit error + * after the MDT grant the open. Under such case, the client should + * send close RPC to the MDT as cleanup; otherwise, the open handle + * on the MDT will be leaked there until the client umount or evicted. + * + * In further, if someone unlinked the file, because the open handle + * holds the reference on such file/object, then it will block the + * subsequent threads that want to locate such object via FID. + * + * \param[in] sb super block for this file-system + * \param[in] open_req pointer to the original open request + */ +void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req) +{ + struct mdt_body *body; + struct md_op_data *op_data; + struct ptlrpc_request *close_req = NULL; + struct obd_export *exp = ll_s2sbi(sb)->ll_md_exp; + ENTRY; + + body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY); + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) { + CWARN("%s: cannot allocate op_data to release open handle for " + DFID"\n", + ll_get_fsname(sb, NULL, 0), PFID(&body->mbo_fid1)); + + RETURN_EXIT; + } + + op_data->op_fid1 = body->mbo_fid1; + op_data->op_handle = body->mbo_handle; + op_data->op_mod_time = cfs_time_current_sec(); + md_close(exp, op_data, NULL, &close_req); + ptlrpc_req_finished(close_req); + ll_finish_md_op_data(op_data); + + EXIT; +} + int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, struct super_block *sb, struct lookup_intent *it) { struct ll_sb_info *sbi = NULL; - struct lustre_md md = { 0 }; + struct lustre_md md = { NULL }; int rc; ENTRY; - LASSERT(*inode || sb); - sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode); - rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp, - sbi->ll_md_exp, &md); - if (rc) - RETURN(rc); + LASSERT(*inode || sb); + sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode); + rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp, + sbi->ll_md_exp, &md); + if (rc != 0) + GOTO(cleanup, rc); if (*inode) { rc = ll_update_inode(*inode, &md); @@ -2386,13 +2275,18 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, } else { LASSERT(sb != NULL); - /* - * At this point server returns to client's same fid as client - * generated for creating. So using ->fid1 is okay here. - */ - LASSERT(fid_is_sane(&md.body->fid1)); + /* + * At this point server returns to client's same fid as client + * generated for creating. So using ->fid1 is okay here. + */ + if (!fid_is_sane(&md.body->mbo_fid1)) { + CERROR("%s: Fid is insane "DFID"\n", + ll_get_fsname(sb, NULL, 0), + PFID(&md.body->mbo_fid1)); + GOTO(out, rc = -EINVAL); + } - *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1, + *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1, sbi->ll_flags & LL_SBI_32BIT_API), &md); if (IS_ERR(*inode)) { @@ -2417,11 +2311,11 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, * 2. layout was changed by another client * 3. proc2: refresh layout and layout lock granted * 4. proc1: to apply a stale layout */ - if (it != NULL && it->d.lustre.it_lock_mode != 0) { + if (it != NULL && it->it_lock_mode != 0) { struct lustre_handle lockh; struct ldlm_lock *lock; - lockh.cookie = it->d.lustre.it_lock_handle; + lockh.cookie = it->it_lock_handle; lock = ldlm_handle2lock(&lockh); LASSERT(lock != NULL); if (ldlm_has_layout(lock)) { @@ -2431,27 +2325,31 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, conf.coc_opc = OBJECT_CONF_SET; conf.coc_inode = *inode; conf.coc_lock = lock; - conf.u.coc_md = &md; + conf.u.coc_layout = md.layout; (void)ll_layout_conf(*inode, &conf); } LDLM_LOCK_PUT(lock); } + GOTO(out, rc = 0); + out: - if (md.lsm != NULL) - obd_free_memmd(sbi->ll_dt_exp, &md.lsm); md_free_lustre_md(sbi->ll_md_exp, &md); - RETURN(rc); + +cleanup: + if (rc != 0 && it != NULL && it->it_op & IT_OPEN) + ll_open_cleanup(sb != NULL ? sb : (*inode)->i_sb, req); + + return rc; } -int ll_obd_statfs(struct inode *inode, void *arg) +int ll_obd_statfs(struct inode *inode, void __user *arg) { struct ll_sb_info *sbi = NULL; struct obd_export *exp; char *buf = NULL; struct obd_ioctl_data *data = NULL; __u32 type; - __u32 flags; int len = 0, rc; if (!inode || !(sbi = ll_i2sbi(inode))) @@ -2480,8 +2378,7 @@ int ll_obd_statfs(struct inode *inode, void *arg) else GOTO(out_statfs, rc = -ENODEV); - flags = (type & LL_STATFS_NODELAY) ? OBD_STATFS_NODELAY : 0; - rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, &flags); + rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, NULL); if (rc) GOTO(out_statfs, rc); out_statfs: @@ -2509,20 +2406,20 @@ int ll_process_config(struct lustre_cfg *lcfg) /* Note we have not called client_common_fill_super yet, so proc fns must be able to handle that! */ - rc = class_process_proc_seq_param(PARAM_LLITE, lprocfs_llite_obd_vars, - lcfg, sb); + rc = class_process_proc_param(PARAM_LLITE, lprocfs_llite_obd_vars, + lcfg, sb); if (rc > 0) rc = 0; return rc; } -/* this function prepares md_op_data hint for passing ot down to MD stack. */ -struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data, - struct inode *i1, struct inode *i2, - const char *name, int namelen, - int mode, __u32 opc, void *data) +/* this function prepares md_op_data hint for passing it down to MD stack. */ +struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, + struct inode *i1, struct inode *i2, + const char *name, size_t namelen, + __u32 mode, __u32 opc, void *data) { - LASSERT(i1 != NULL); + LASSERT(i1 != NULL); if (name == NULL) { /* Do not reuse namelen for something else. */ @@ -2536,26 +2433,28 @@ struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data, return ERR_PTR(-EINVAL); } - if (op_data == NULL) - OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + OBD_ALLOC_PTR(op_data); - if (op_data == NULL) - return ERR_PTR(-ENOMEM); + if (op_data == NULL) + return ERR_PTR(-ENOMEM); ll_i2gids(op_data->op_suppgids, i1, i2); op_data->op_fid1 = *ll_inode2fid(i1); - op_data->op_capa1 = ll_mdscapa_get(i1); - if (S_ISDIR(i1->i_mode)) + op_data->op_default_stripe_offset = -1; + if (S_ISDIR(i1->i_mode)) { op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md; + if (opc == LUSTRE_OPC_MKDIR) + op_data->op_default_stripe_offset = + ll_i2info(i1)->lli_def_stripe_offset; + } if (i2) { op_data->op_fid2 = *ll_inode2fid(i2); - op_data->op_capa2 = ll_mdscapa_get(i2); if (S_ISDIR(i2->i_mode)) op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md; } else { fid_zero(&op_data->op_fid2); - op_data->op_capa2 = NULL; } if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH) @@ -2571,39 +2470,21 @@ struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data, op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); op_data->op_cap = cfs_curproc_cap_pack(); - op_data->op_bias = 0; - op_data->op_cli_flags = 0; if ((opc == LUSTRE_OPC_CREATE) && (name != NULL) && - filename_is_volatile(name, namelen, NULL)) + filename_is_volatile(name, namelen, &op_data->op_mds)) { op_data->op_bias |= MDS_CREATE_VOLATILE; - op_data->op_mds = 0; - op_data->op_data = data; - - /* If the file is being opened after mknod() (normally due to NFS) - * try to use the default stripe data from parent directory for - * allocating OST objects. Try to pass the parent FID to MDS. */ - if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) && - !ll_i2info(i2)->lli_has_smd) { - struct ll_inode_info *lli = ll_i2info(i2); - - spin_lock(&lli->lli_lock); - if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid))) - op_data->op_fid1 = lli->lli_pfid; - spin_unlock(&lli->lli_lock); - /** We ignore parent's capability temporary. */ + } else { + op_data->op_mds = 0; } - - /* When called by ll_setattr_raw, file is i1. */ - if (LLIF_DATA_MODIFIED & ll_i2info(i1)->lli_flags) - op_data->op_bias |= MDS_DATA_MODIFIED; + op_data->op_data = data; return op_data; } void ll_finish_md_op_data(struct md_op_data *op_data) { - capa_put(op_data->op_capa1); - capa_put(op_data->op_capa2); + security_release_secctx(op_data->op_file_secctx, + op_data->op_file_secctx_size); OBD_FREE_PTR(op_data); } @@ -2641,6 +2522,9 @@ int ll_show_options(struct seq_file *seq, struct vfsmount *vfs) if (sbi->ll_flags & LL_SBI_USER_FID2PATH) seq_puts(seq, ",user_fid2path"); + if (sbi->ll_flags & LL_SBI_ALWAYS_PING) + seq_puts(seq, ",always_ping"); + RETURN(0); } @@ -2663,7 +2547,7 @@ int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg) if (!obd) RETURN(-ENOENT); - if (copy_to_user((void *)arg, obd->obd_name, + if (copy_to_user((void __user *)arg, obd->obd_name, strlen(obd->obd_name) + 1)) RETURN(-EFAULT); @@ -2721,7 +2605,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) { char *buf, *path = NULL; struct dentry *dentry = NULL; - struct ccc_object *obj = cl_inode2ccc(page->mapping->host); + struct inode *inode = page->mapping->host; /* this can be called inside spin lock so use GFP_ATOMIC. */ buf = (char *)__get_free_page(GFP_ATOMIC); @@ -2735,7 +2619,7 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) "%s: dirty page discard: %s/fid: "DFID"/%s may get corrupted " "(rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0), s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev, - PFID(&obj->cob_header.coh_lu.loh_fid), + PFID(ll_inode2fid(inode)), (path && !IS_ERR(path)) ? path : "", ioret); if (dentry != NULL) @@ -2745,6 +2629,32 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) free_page((unsigned long)buf); } +ssize_t ll_copy_user_md(const struct lov_user_md __user *md, + struct lov_user_md **kbuf) +{ + struct lov_user_md lum; + ssize_t lum_size; + ENTRY; + + if (copy_from_user(&lum, md, sizeof(lum))) + RETURN(-EFAULT); + + lum_size = ll_lov_user_md_size(&lum); + if (lum_size < 0) + RETURN(lum_size); + + OBD_ALLOC(*kbuf, lum_size); + if (*kbuf == NULL) + RETURN(-ENOMEM); + + if (copy_from_user(*kbuf, md, lum_size) != 0) { + OBD_FREE(*kbuf, lum_size); + RETURN(-EFAULT); + } + + RETURN(lum_size); +} + /* * Compute llite root squash state after a change of root squash * configuration setting or add/remove of a lnet nid @@ -2781,4 +2691,120 @@ void ll_compute_rootsquash_state(struct ll_sb_info *sbi) up_write(&squash->rsi_sem); } +/** + * Parse linkea content to extract information about a given hardlink + * + * \param[in] ldata - Initialized linkea data + * \param[in] linkno - Link identifier + * \param[out] parent_fid - The entry's parent FID + * \param[out] ln - Entry name destination buffer + * + * \retval 0 on success + * \retval Appropriate negative error code on failure + */ +static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno, + struct lu_fid *parent_fid, struct lu_name *ln) +{ + unsigned int idx; + int rc; + ENTRY; + + rc = linkea_init_with_rec(ldata); + if (rc < 0) + RETURN(rc); + + if (linkno >= ldata->ld_leh->leh_reccount) + /* beyond last link */ + RETURN(-ENODATA); + + linkea_first_entry(ldata); + for (idx = 0; ldata->ld_lee != NULL; idx++) { + linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, ln, + parent_fid); + if (idx == linkno) + break; + + linkea_next_entry(ldata); + } + + if (idx < linkno) + RETURN(-ENODATA); + + RETURN(0); +} + +/** + * Get parent FID and name of an identified link. Operation is performed for + * a given link number, letting the caller iterate over linkno to list one or + * all links of an entry. + * + * \param[in] file - File descriptor against which to perform the operation + * \param[in,out] arg - User-filled structure containing the linkno to operate + * on and the available size. It is eventually filled with + * the requested information or left untouched on error + * + * \retval - 0 on success + * \retval - Appropriate negative error code on failure + */ +int ll_getparent(struct file *file, struct getparent __user *arg) +{ + struct dentry *dentry = file_dentry(file); + struct inode *inode = file_inode(file); + struct linkea_data *ldata; + struct lu_buf buf = LU_BUF_NULL; + struct lu_name ln; + struct lu_fid parent_fid; + __u32 linkno; + __u32 name_size; + int rc; + ENTRY; + + if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) && + !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH)) + RETURN(-EPERM); + + if (get_user(name_size, &arg->gp_name_size)) + RETURN(-EFAULT); + + if (get_user(linkno, &arg->gp_linkno)) + RETURN(-EFAULT); + + if (name_size > PATH_MAX) + RETURN(-EINVAL); + + OBD_ALLOC(ldata, sizeof(*ldata)); + if (ldata == NULL) + RETURN(-ENOMEM); + + rc = linkea_data_new(ldata, &buf); + if (rc < 0) + GOTO(ldata_free, rc); + + rc = ll_getxattr(dentry, XATTR_NAME_LINK, buf.lb_buf, buf.lb_len); + if (rc < 0) + GOTO(lb_free, rc); + + rc = ll_linkea_decode(ldata, linkno, &parent_fid, &ln); + if (rc < 0) + GOTO(lb_free, rc); + + if (ln.ln_namelen >= name_size) + GOTO(lb_free, rc = -EOVERFLOW); + + if (copy_to_user(&arg->gp_fid, &parent_fid, sizeof(arg->gp_fid))) + GOTO(lb_free, rc = -EFAULT); + + if (copy_to_user(&arg->gp_name, ln.ln_name, ln.ln_namelen)) + GOTO(lb_free, rc = -EFAULT); + + if (put_user('\0', arg->gp_name + ln.ln_namelen)) + GOTO(lb_free, rc = -EFAULT); + +lb_free: + lu_buf_free(&buf); +ldata_free: + OBD_FREE(ldata, sizeof(*ldata)); + + RETURN(rc); +}