From: Qian Yingjin Date: Fri, 1 Nov 2019 08:58:26 +0000 (+0800) Subject: LU-10934 llite: integrate statx() API with Lustre X-Git-Tag: 2.13.54~22 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=3f7853b31ef6f5a6c2837403910083057728a842 LU-10934 llite: integrate statx() API with Lustre System call statx() interface can specify a bitmask to fetch specific attributes from a file (e.g. st_uid, st_gid, st_mode, and st_btime = file creation time), rather than fetching all of the normal stat() attributes (such as st_size and st_blocks). It also has a AT_STATX_DONT_SYNC mode which allows the kernel to return cached attributes without flushing all of the client data and fetching an accurate result from the server. The conditions for adding statx() API for Lustre are mature: 1. statx() is added to Linux 4.11+; 2. glibc supports statx() (glibc 2.28+ -> RHEL 8, Ubuntun 18.10+) 3. The support for stat(1) and ls(1) to use statx(3) to fetch only the required attributes has landed to the upstream GNU coreutils package. This patch integrates statx() API with Lustre so that we can take advantage of the efficiencies available: - Only fetch MDS attributes if STATX_SIZE, STATX_BLOCKS and STATX_MTIME are not requested, and avoid OSS glimpse RPCs completely; - Hook this into statahead to avoid async glimpse locks (AGL) if OST information not needed; - Enhance the MDS RPC interface to return the file creation time stored in both ldiskfs and ZFS already, and enable STATX_BTIME; - Better support with AT_STATX_DONT_SYNC mode. Return the "lazy" attributes or cached attributes (even stale) on a client if available without any RPCs to servers (MDS and OSS). - statx (lustre/test/statx): port coreutils ls/stat by using statx(3) system call if OS supported it. - Test scripts. Using statx() to verify btime attribute and the advantage described above. Test-Parameters: clientdistro=el8 Test-Parameters: clientdistro=ubuntu1804 Signed-off-by: Qian Yingjin Change-Id: I8432c9029bad9dea3e1ebc13a0d6978131d9b929 Reviewed-on: https://review.whamcloud.com/36674 Reviewed-by: Andreas Dilger Reviewed-by: James Simmons Tested-by: jenkins Tested-by: Maloo --- diff --git a/debian/control b/debian/control index 8e115c7..2e5d2fb 100644 --- a/debian/control +++ b/debian/control @@ -4,7 +4,7 @@ Priority: optional Maintainer: Brian J. Murrell Uploaders: Brian J. Murrell Standards-Version: 3.8.3 -Build-Depends: module-assistant, libreadline-dev, debhelper (>=9.0.0), dpatch, automake (>=1.7) | automake1.7 | automake1.8 | automake1.9, pkg-config, libtool, libyaml-dev, libselinux-dev, libsnmp-dev, mpi-default-dev, bzip2, quilt, linux-headers-generic | linux-headers | linux-headers-amd64, rsync, libssl-dev +Build-Depends: module-assistant, libreadline-dev, debhelper (>=9.0.0), dpatch, automake (>=1.7) | automake1.7 | automake1.8 | automake1.9, pkg-config, libtool, libyaml-dev, libselinux-dev, libsnmp-dev, mpi-default-dev, bzip2, quilt, linux-headers-generic | linux-headers | linux-headers-amd64, rsync, libssl-dev, libiberty-dev Homepage: https://wiki.whamcloud.com/ Vcs-Git: git://git.whamcloud.com/fs/lustre-release.git diff --git a/debian/control.main b/debian/control.main index 8e115c7..2e5d2fb 100644 --- a/debian/control.main +++ b/debian/control.main @@ -4,7 +4,7 @@ Priority: optional Maintainer: Brian J. Murrell Uploaders: Brian J. Murrell Standards-Version: 3.8.3 -Build-Depends: module-assistant, libreadline-dev, debhelper (>=9.0.0), dpatch, automake (>=1.7) | automake1.7 | automake1.8 | automake1.9, pkg-config, libtool, libyaml-dev, libselinux-dev, libsnmp-dev, mpi-default-dev, bzip2, quilt, linux-headers-generic | linux-headers | linux-headers-amd64, rsync, libssl-dev +Build-Depends: module-assistant, libreadline-dev, debhelper (>=9.0.0), dpatch, automake (>=1.7) | automake1.7 | automake1.8 | automake1.9, pkg-config, libtool, libyaml-dev, libselinux-dev, libsnmp-dev, mpi-default-dev, bzip2, quilt, linux-headers-generic | linux-headers | linux-headers-amd64, rsync, libssl-dev, libiberty-dev Homepage: https://wiki.whamcloud.com/ Vcs-Git: git://git.whamcloud.com/fs/lustre-release.git diff --git a/debian/lustre-tests.install b/debian/lustre-tests.install index 882ca28..5972ff0 100644 --- a/debian/lustre-tests.install +++ b/debian/lustre-tests.install @@ -1,4 +1,5 @@ debian/tmp/usr/lib/lustre/tests/* usr/lib/lustre/tests debian/tmp/usr/bin/mcreate usr/bin debian/tmp/usr/bin/munlink usr/bin +debian/tmp/usr/bin/statx usr/bin debian/tmp/usr/sbin/wire* usr/sbin diff --git a/debian/rules b/debian/rules index cbb4a5b..262ea47 100755 --- a/debian/rules +++ b/debian/rules @@ -252,6 +252,7 @@ binary-$(UTILS_PKG): build-stamp debian/$(UTILS_PKG)/usr/bin/*-survey \ debian/$(UTILS_PKG)/usr/bin/mcreate \ debian/$(UTILS_PKG)/usr/bin/munlink \ + debian/$(UTILS_PKG)/usr/bin/statx \ debian/$(UTILS_PKG)/usr/sbin/wirecheck \ debian/$(UTILS_PKG)/usr/sbin/wiretest dh_makeshlibs -p $(UTILS_PKG) diff --git a/lustre.spec.in b/lustre.spec.in index cdd834e..20be3db 100644 --- a/lustre.spec.in +++ b/lustre.spec.in @@ -175,7 +175,7 @@ URL: https://wiki.whamcloud.com/ BuildRoot: %{_tmppath}/lustre-%{version}-root Requires: %{requires_kmod_name} = %{requires_kmod_version} zlib Requires: %{requires_yaml_name} -BuildRequires: libtool libyaml-devel zlib-devel +BuildRequires: libtool libyaml-devel zlib-devel binutils-devel %if %{_vendor}=="redhat" BuildRequires: redhat-rpm-config BuildRequires: pkgconfig @@ -556,6 +556,7 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/liblustreapi.la echo '%{_libdir}/lustre/tests/*' >>lustre-tests.files echo '%{_bindir}/mcreate' >>lustre-tests.files echo '%{_bindir}/munlink' >>lustre-tests.files +echo '%{_bindir}/statx' >>lustre-tests.files echo '%{_sbindir}/wirecheck' >>lustre-tests.files echo '%{_sbindir}/wiretest' >>lustre-tests.files if [ -n "$MPI_BIN" ]; then diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 6b5915d..2977275 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -2596,6 +2596,9 @@ ext2fs.h not found. Please install e2fsprogs development package. ]) ]) +# lustre/tests/statx_test.c +AC_CHECK_FUNCS([statx]) + # lustre/utils/lfs.c AS_IF([test "$enable_dist" = "no"], [ AC_CHECK_LIB([z], [crc32], [ diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h index bf73cc2..fc040f3 100644 --- a/lustre/include/lustre_compat.h +++ b/lustre/include/lustre_compat.h @@ -116,12 +116,13 @@ static inline int d_in_lookup(struct dentry *dentry) #define ll_vfs_unlink(a, b) vfs_unlink(a, b) #endif -static inline int ll_vfs_getattr(struct path *path, struct kstat *st) +static inline int ll_vfs_getattr(struct path *path, struct kstat *st, + u32 request_mask, unsigned int flags) { int rc; #ifdef HAVE_INODEOPS_ENHANCED_GETATTR - rc = vfs_getattr(path, st, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); + rc = vfs_getattr(path, st, request_mask, flags); #else rc = vfs_getattr(path, st); #endif diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 3ca4e56..6e52437 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -235,7 +235,12 @@ struct statx { #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ -#endif +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + +#endif /* STATX_BASIC_STATS */ typedef struct statx lstatx_t; diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index b60f5d7..a02055f 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -317,7 +317,7 @@ static int ll_revalidate_dentry(struct dentry *dentry, return -ECHILD; if (dentry_may_statahead(dir, dentry)) - ll_statahead(dir, &dentry, dentry->d_inode == NULL); + ll_revalidate_statahead(dir, &dentry, dentry->d_inode == NULL); return 1; } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 46377df..4085d54 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -4688,14 +4688,29 @@ static int ll_merge_md_attr(struct inode *inode) RETURN(0); } -int ll_getattr_dentry(struct dentry *de, struct kstat *stat) +int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask, + unsigned int flags) { struct inode *inode = de->d_inode; struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_inode_info *lli = ll_i2info(inode); + struct inode *dir = de->d_parent->d_inode; + bool need_glimpse = true; ktime_t kstart = ktime_get(); int rc; + /* The OST object(s) determine the file size, blocks and mtime. */ + if (!(request_mask & STATX_SIZE || request_mask & STATX_BLOCKS || + request_mask & STATX_MTIME)) + need_glimpse = false; + + if (dentry_may_statahead(dir, de)) + ll_start_statahead(dir, de, need_glimpse && + !(flags & AT_STATX_DONT_SYNC)); + + if (flags & AT_STATX_DONT_SYNC) + GOTO(fill_attr, rc = 0); + rc = ll_inode_revalidate(de, IT_GETATTR); if (rc < 0) RETURN(rc); @@ -4703,10 +4718,36 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat) if (S_ISREG(inode->i_mode)) { bool cached; - rc = pcc_inode_getattr(inode, &cached); + if (!need_glimpse) + GOTO(fill_attr, rc); + + rc = pcc_inode_getattr(inode, request_mask, flags, &cached); if (cached && rc < 0) RETURN(rc); + if (cached) + GOTO(fill_attr, rc); + + /* + * If the returned attr is masked with OBD_MD_FLSIZE & + * OBD_MD_FLBLOCKS & OBD_MD_FLMTIME, it means that the file size + * or blocks obtained from MDT is strictly correct, and the file + * is usually not being modified by clients, and the [a|m|c]time + * got from MDT is also strictly correct. + * Under this circumstance, it does not need to send glimpse + * RPCs to OSTs for file attributes such as the size and blocks. + */ + if (lli->lli_attr_valid & OBD_MD_FLSIZE && + lli->lli_attr_valid & OBD_MD_FLBLOCKS && + lli->lli_attr_valid & OBD_MD_FLMTIME) { + inode->i_mtime.tv_sec = lli->lli_mtime; + if (lli->lli_attr_valid & OBD_MD_FLATIME) + inode->i_atime.tv_sec = lli->lli_atime; + if (lli->lli_attr_valid & OBD_MD_FLCTIME) + inode->i_ctime.tv_sec = lli->lli_ctime; + GOTO(fill_attr, rc); + } + /* In case of restore, the MDT has the right size and has * already send it back without granting the layout lock, * inode is up-to-date so glimpse is useless. @@ -4714,7 +4755,7 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat) * restore the MDT holds the layout lock so the glimpse will * block up to the end of restore (getattr will block) */ - if (!cached && !ll_file_test_flag(lli, LLIF_FILE_RESTORING)) { + if (!ll_file_test_flag(lli, LLIF_FILE_RESTORING)) { rc = ll_glimpse_size(inode); if (rc < 0) RETURN(rc); @@ -4727,11 +4768,15 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat) RETURN(rc); } - inode->i_atime.tv_sec = lli->lli_atime; - inode->i_mtime.tv_sec = lli->lli_mtime; - inode->i_ctime.tv_sec = lli->lli_ctime; + if (lli->lli_attr_valid & OBD_MD_FLATIME) + inode->i_atime.tv_sec = lli->lli_atime; + if (lli->lli_attr_valid & OBD_MD_FLMTIME) + inode->i_mtime.tv_sec = lli->lli_mtime; + if (lli->lli_attr_valid & OBD_MD_FLCTIME) + inode->i_ctime.tv_sec = lli->lli_ctime; } +fill_attr: OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30); if (ll_need_32bit_api(sbi)) { @@ -4763,6 +4808,26 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat) stat->size = i_size_read(inode); stat->blocks = inode->i_blocks; +#ifdef HAVE_INODEOPS_ENHANCED_GETATTR + if (flags & AT_STATX_DONT_SYNC) { + if (stat->size == 0 && + lli->lli_attr_valid & OBD_MD_FLLAZYSIZE) + stat->size = lli->lli_lazysize; + if (stat->blocks == 0 && + lli->lli_attr_valid & OBD_MD_FLLAZYBLOCKS) + stat->blocks = lli->lli_lazyblocks; + } + + if (lli->lli_attr_valid & OBD_MD_FLBTIME) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = lli->lli_btime; + } + + stat->attributes_mask = STATX_ATTR_IMMUTABLE | STATX_ATTR_APPEND; + stat->attributes |= ll_inode_to_ext_flags(inode->i_flags); + stat->result_mask &= request_mask; +#endif + ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, ktime_us_delta(ktime_get(), kstart)); @@ -4773,13 +4838,15 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat) int ll_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { - struct dentry *de = path->dentry; + return ll_getattr_dentry(path->dentry, stat, request_mask, flags); +} #else int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) { -#endif - return ll_getattr_dentry(de, stat); + return ll_getattr_dentry(de, stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT); } +#endif int cl_falloc(struct inode *inode, int mode, loff_t offset, loff_t len) { diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 43a46eb..76db0b0 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -142,6 +142,7 @@ struct ll_inode_info { s64 lli_atime; s64 lli_mtime; s64 lli_ctime; + s64 lli_btime; spinlock_t lli_agl_lock; /* Try to make the d::member and f::member are aligned. Before using @@ -229,6 +230,10 @@ struct ll_inode_info { struct mutex lli_group_mutex; __u64 lli_group_users; unsigned long lli_group_gid; + + __u64 lli_attr_valid; + __u64 lli_lazysize; + __u64 lli_lazyblocks; }; }; @@ -1017,7 +1022,8 @@ int ll_getattr(const struct path *path, struct kstat *stat, #else int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat); #endif -int ll_getattr_dentry(struct dentry *de, struct kstat *stat); +int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask, + unsigned int flags); struct posix_acl *ll_get_acl(struct inode *inode, int type); #ifdef HAVE_IOP_SET_ACL #ifdef CONFIG_LUSTRE_FS_POSIX_ACL @@ -1397,7 +1403,9 @@ struct ll_statahead_info { atomic_t sai_cache_count; /* entry count in cache */ }; -int ll_statahead(struct inode *dir, struct dentry **dentry, bool unplug); +int ll_revalidate_statahead(struct inode *dir, struct dentry **dentry, + bool unplug); +int ll_start_statahead(struct inode *dir, struct dentry *dentry, bool agl); void ll_authorize_statahead(struct inode *dir, void *key); void ll_deauthorize_statahead(struct inode *dir, void *key); @@ -1476,7 +1484,8 @@ dentry_may_statahead(struct inode *dir, struct dentry *dentry) * 'lld_sa_generation == lli->lli_sa_generation'. */ ldd = ll_d2d(dentry); - if (ldd != NULL && ldd->lld_sa_generation == lli->lli_sa_generation) + if (ldd != NULL && lli->lli_sa_generation && + ldd->lld_sa_generation == lli->lli_sa_generation) return false; return true; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 41cf4aa..88b0837 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -2115,6 +2115,9 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) lli->lli_ctime = body->mbo_ctime; } + if (body->mbo_valid & OBD_MD_FLBTIME) + lli->lli_btime = body->mbo_btime; + /* Clear i_flags to remove S_NOSEC before permissions are updated */ if (body->mbo_valid & OBD_MD_FLFLAGS) ll_update_inode_flags(inode, body->mbo_flags); @@ -2153,6 +2156,7 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) LASSERT(fid_seq(&lli->lli_fid) != 0); + lli->lli_attr_valid = body->mbo_valid; if (body->mbo_valid & OBD_MD_FLSIZE) { i_size_write(inode, body->mbo_size); @@ -2162,6 +2166,11 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) if (body->mbo_valid & OBD_MD_FLBLOCKS) inode->i_blocks = body->mbo_blocks; + } else { + if (body->mbo_valid & OBD_MD_FLLAZYSIZE) + lli->lli_lazysize = body->mbo_size; + if (body->mbo_valid & OBD_MD_FLLAZYBLOCKS) + lli->lli_lazyblocks = body->mbo_blocks; } if (body->mbo_valid & OBD_MD_TSTATE) { diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 47fb612..a7d7779 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -762,7 +762,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, it = &lookup_it; if (it->it_op == IT_GETATTR && dentry_may_statahead(parent, dentry)) { - rc = ll_statahead(parent, &dentry, 0); + rc = ll_revalidate_statahead(parent, &dentry, 0); if (rc == 1) RETURN(dentry == save ? NULL : dentry); } diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index 70e6bfc..b80173e 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -1730,7 +1730,8 @@ int pcc_inode_setattr(struct inode *inode, struct iattr *attr, RETURN(rc); } -int pcc_inode_getattr(struct inode *inode, bool *cached) +int pcc_inode_getattr(struct inode *inode, u32 request_mask, + unsigned int flags, bool *cached) { struct ll_inode_info *lli = ll_i2info(inode); const struct cred *old_cred; @@ -1752,7 +1753,8 @@ int pcc_inode_getattr(struct inode *inode, bool *cached) RETURN(0); old_cred = override_creds(pcc_super_cred(inode->i_sb)); - rc = ll_vfs_getattr(&ll_i2pcci(inode)->pcci_path, &stat); + rc = ll_vfs_getattr(&ll_i2pcci(inode)->pcci_path, &stat, request_mask, + flags); revert_creds(old_cred); if (rc) GOTO(out, rc); diff --git a/lustre/llite/pcc.h b/lustre/llite/pcc.h index e704da4..4b05c40 100644 --- a/lustre/llite/pcc.h +++ b/lustre/llite/pcc.h @@ -237,7 +237,8 @@ ssize_t pcc_file_read_iter(struct kiocb *iocb, struct iov_iter *iter, bool *cached); ssize_t pcc_file_write_iter(struct kiocb *iocb, struct iov_iter *iter, bool *cached); -int pcc_inode_getattr(struct inode *inode, bool *cached); +int pcc_inode_getattr(struct inode *inode, u32 request_mask, + unsigned int flags, bool *cached); int pcc_inode_setattr(struct inode *inode, struct iattr *attr, bool *cached); ssize_t pcc_file_splice_read(struct file *in_file, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, diff --git a/lustre/llite/statahead.c b/lustre/llite/statahead.c index e20dd4d..9a25ae7 100644 --- a/lustre/llite/statahead.c +++ b/lustre/llite/statahead.c @@ -992,10 +992,11 @@ static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai) "ll_agl_%u", plli->lli_opendir_pid); if (IS_ERR(task)) { CERROR("can't start ll_agl thread, rc: %ld\n", PTR_ERR(task)); + sai->sai_agl_valid = 0; RETURN_EXIT; } sai->sai_agl_task = task; - sai->sai_agl_valid = 1; + LASSERT(sai->sai_agl_valid == 1); atomic_inc(&ll_i2sbi(d_inode(parent))->ll_agl_total); /* Get an extra reference that the thread holds */ ll_sai_get(d_inode(parent)); @@ -1551,13 +1552,15 @@ out: * \param[in] dir parent directory * \param[in] dentry dentry that triggers statahead, normally the first * dirent under @dir + * \param[in] agl indicate whether AGL is needed * \retval -EAGAIN on success, because when this function is * called, it's already in lookup call, so client should * do it itself instead of waiting for statahead thread * to do it asynchronously. * \retval negative number upon error */ -static int start_statahead_thread(struct inode *dir, struct dentry *dentry) +static int start_statahead_thread(struct inode *dir, struct dentry *dentry, + bool agl) { struct ll_inode_info *lli = ll_i2info(dir); struct ll_statahead_info *sai = NULL; @@ -1587,6 +1590,7 @@ static int start_statahead_thread(struct inode *dir, struct dentry *dentry) GOTO(out, rc = -ENOMEM); sai->sai_ls_all = (first == LS_FIRST_DOT_DE); + sai->sai_agl_valid = agl; /* * if current lli_opendir_key was deauthorized, or dir re-opened by @@ -1616,7 +1620,7 @@ static int start_statahead_thread(struct inode *dir, struct dentry *dentry) GOTO(out, rc); } - if (ll_i2sbi(parent->d_inode)->ll_flags & LL_SBI_AGL_ENABLED) + if (ll_i2sbi(parent->d_inode)->ll_flags & LL_SBI_AGL_ENABLED && agl) ll_start_agl(parent, sai); atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_total); @@ -1647,6 +1651,25 @@ out: RETURN(rc); } +/* + * Check whether statahead for @dir was started. + */ +static inline bool ll_statahead_started(struct inode *dir, bool agl) +{ + struct ll_inode_info *lli = ll_i2info(dir); + struct ll_statahead_info *sai; + + spin_lock(&lli->lli_sa_lock); + sai = lli->lli_sai; + if (sai && sai->sai_agl_valid != agl) + CDEBUG(D_READA, + "%s: Statahead AGL hint changed from %d to %d\n", + ll_i2sbi(dir)->ll_fsname, sai->sai_agl_valid, agl); + spin_unlock(&lli->lli_sa_lock); + + return !!sai; +} + /** * statahead entry function, this is called when client getattr on a file, it * will start statahead thread if this is the first dir entry, else revalidate @@ -1654,6 +1677,26 @@ out: * * \param[in] dir parent directory * \param[out] dentryp dentry to getattr + * \param[in] agl whether start the agl thread + * + * \retval 1 on success + * \retval 0 revalidation from statahead cache failed, caller needs + * to getattr from server directly + * \retval negative number on error, caller often ignores this and + * then getattr from server + */ +int ll_start_statahead(struct inode *dir, struct dentry *dentry, bool agl) +{ + if (!ll_statahead_started(dir, agl)) + return start_statahead_thread(dir, dentry, agl); + return 0; +} + +/** + * revalidate dentry from statahead cache. + * + * \param[in] dir parent directory + * \param[out] dentryp dentry to getattr * \param[in] unplug unplug statahead window only (normally for negative * dentry) * \retval 1 on success @@ -1662,19 +1705,18 @@ out: * \retval negative number on error, caller often ignores this and * then getattr from server */ -int ll_statahead(struct inode *dir, struct dentry **dentryp, bool unplug) +int ll_revalidate_statahead(struct inode *dir, struct dentry **dentryp, + bool unplug) { struct ll_statahead_info *sai; + int rc = 0; sai = ll_sai_get(dir); if (sai) { - int rc; - rc = revalidate_statahead_dentry(dir, sai, dentryp, unplug); CDEBUG(D_READA, "revalidate statahead %pd: rc = %d.\n", *dentryp, rc); ll_sai_put(sai); - return rc; } - return start_statahead_thread(dir, *dentryp); + return rc; } diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 7af3342..9e1f61d 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -84,7 +84,7 @@ if MPITESTS SUBDIRS = mpi endif -bin_PROGRAMS = mcreate munlink +bin_PROGRAMS = mcreate munlink statx testdir = $(libdir)/lustre/tests test_SCRIPTS = $(noinst_SCRIPTS) test_PROGRAMS = $(THETESTS) @@ -111,4 +111,8 @@ ll_dirstripe_verify_LDADD = $(LIBLUSTREAPI) flocks_test_LDADD = $(LIBLUSTREAPI) $(PTHREAD_LIBS) create_foreign_dir_LDADD = $(LIBLUSTREAPI) check_fallocate_LDADD = $(LIBLUSTREAPI) +if UBUNTU +statx_CPPFLAGS := -I/usr/include/libiberty +endif +statx_LDADD = $(SELINUX) -liberty endif # TESTS diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 2106360..513ecae 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -6179,154 +6179,67 @@ test_56r() { } run_test 56r "check lfs find -size works" -test_56ra() { - [[ $MDS1_VERSION -gt $(version_code 2.12.58) ]] || - skip "MDS < 2.12.58 doesn't return LSOM data" - local dir=$DIR/$tdir - - [[ $OSC == "mdc" ]] && skip "DoM files" && return - - setup_56 $dir $NUMFILES $NUMDIRS "-c 1" +test_56ra_sub() { + local expected=$1 + local glimpses=$2 + local cmd="$3" cancel_lru_locks $OSC local rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - local expected=12 - local cmd="$LFS find -size 0 -type f -lazy $dir" local nums=$($cmd | wc -l) [ $nums -eq $expected ] || error "'$cmd' wrong: found $nums, expected $expected" local rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find -size 0 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - $LCTL get_param osc.*.stats - [ $rpcs_after -eq $((rpcs_before + 12)) ] || - error "'$cmd' should send 12 glimpse RPCs to OST" - cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=0 - cmd="$LFS find ! -size 0 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - $LCTL get_param mdc.*.stats - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find ! -size 0 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 12)) ] || - error "'$cmd' should send 12 glimpse RPCs to OST" + if (( rpcs_before + glimpses != rpcs_after )); then + echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" + $LCTL get_param osc.*.stats | grep ldlm_glimpse_enqueue - echo "test" > $dir/$tfile - echo "test2" > $dir/$tfile.2 && sync - cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=1 - cmd="$LFS find -size 5 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find -size 5 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 14)) ] || - error "'$cmd' should send 14 glimpse RPCs to OST" + if [[ $glimpses == 0 ]]; then + error "'$cmd' should not send glimpse RPCs to OST" + else + error "'$cmd' should send $glimpses glimpse RPCs to OST" + fi + fi +} - cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=1 - cmd="$LFS find -size +5 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find -size +5 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 14)) ] || - error "'$cmd' should send 14 glimpse RPCs to OST" +test_56ra() { + [[ $MDS1_VERSION -gt $(version_code 2.12.58) ]] || + skip "MDS < 2.12.58 doesn't return LSOM data" + local dir=$DIR/$tdir - cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=2 - cmd="$LFS find -size +0 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find -size +0 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 14)) ] || - error "'$cmd' should send 14 glimpse RPCs to OST" + [[ $OSC == "mdc" ]] && skip "DoM files" && return + setup_56 $dir $NUMFILES $NUMDIRS "-c 1" + # open and close all files to ensure LSOM is updated cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=2 - cmd="$LFS find ! -size -5 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find ! -size -5 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 14)) ] || - error "'$cmd' should send 14 glimpse RPCs to OST" + find $dir -type f | xargs cat > /dev/null + + # expect_found glimpse_rpcs command_to_run + test_56ra_sub 12 0 "$LFS find -size 0 -type f -lazy $dir" + test_56ra_sub 12 12 "$LFS find -size 0 -type f $dir" + test_56ra_sub 0 0 "$LFS find ! -size 0 -type f -lazy $dir" + test_56ra_sub 0 12 "$LFS find ! -size 0 -type f $dir" + echo "test" > $dir/$tfile + echo "test2" > $dir/$tfile.2 && sync cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=12 - cmd="$LFS find -size -5 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find -size -5 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 14)) ] || - error "'$cmd' should send 14 glimpse RPCs to OST" + cat $dir/$tfile $dir/$tfile.2 > /dev/null + + test_56ra_sub 1 0 "$LFS find -size 5 -type f -lazy $dir" + test_56ra_sub 1 14 "$LFS find -size 5 -type f $dir" + test_56ra_sub 1 0 "$LFS find -size +5 -type f -lazy $dir" + test_56ra_sub 1 14 "$LFS find -size +5 -type f $dir" + + test_56ra_sub 2 0 "$LFS find -size +0 -type f -lazy $dir" + test_56ra_sub 2 14 "$LFS find -size +0 -type f $dir" + test_56ra_sub 2 0 "$LFS find ! -size -5 -type f -lazy $dir" + test_56ra_sub 2 14 "$LFS find ! -size -5 -type f $dir" + test_56ra_sub 12 0 "$LFS find -size -5 -type f -lazy $dir" + test_56ra_sub 12 14 "$LFS find -size -5 -type f $dir" } run_test 56ra "check lfs find -size -lazy works for data on OSTs" @@ -11512,8 +11425,8 @@ test_121() { #bug #10589 } run_test 121 "read cancel race =========" -test_123a() { # was test 123, statahead(bug 11401) - [ $PARALLEL == "yes" ] && skip "skip parallel run" +test_123a_base() { # was test 123, statahead(bug 11401) + local lsx="$1" SLOWOK=0 if ! grep -q "processor.*: 1" /proc/cpuinfo; then @@ -11525,76 +11438,120 @@ test_123a() { # was test 123, statahead(bug 11401) test_mkdir $DIR/$tdir NUMFREE=$(df -i -P $DIR | tail -n 1 | awk '{ print $4 }') [[ $NUMFREE -gt 100000 ]] && NUMFREE=100000 || NUMFREE=$((NUMFREE-1000)) - MULT=10 - for ((i=100, j=0; i<=$NUMFREE; j=$i, i=$((i * MULT)) )); do - createmany -o $DIR/$tdir/$tfile $j $((i - j)) - - max=`lctl get_param -n llite.*.statahead_max | head -n 1` - lctl set_param -n llite.*.statahead_max 0 - lctl get_param llite.*.statahead_max - cancel_lru_locks mdc - cancel_lru_locks osc - stime=`date +%s` - time ls -l $DIR/$tdir | wc -l - etime=`date +%s` - delta=$((etime - stime)) - log "ls $i files without statahead: $delta sec" - lctl set_param llite.*.statahead_max=$max - - swrong=`lctl get_param -n llite.*.statahead_stats | grep "statahead wrong:" | awk '{print $3}'` - lctl get_param -n llite.*.statahead_max | grep '[0-9]' - cancel_lru_locks mdc - cancel_lru_locks osc - stime=`date +%s` - time ls -l $DIR/$tdir | wc -l - etime=`date +%s` - delta_sa=$((etime - stime)) - log "ls $i files with statahead: $delta_sa sec" - lctl get_param -n llite.*.statahead_stats - ewrong=`lctl get_param -n llite.*.statahead_stats | grep "statahead wrong:" | awk '{print $3}'` + MULT=10 + for ((i=100, j=0; i<=$NUMFREE; j=$i, i=$((i * MULT)) )); do + createmany -o $DIR/$tdir/$tfile $j $((i - j)) + + max=$(lctl get_param -n llite.*.statahead_max | head -n 1) + lctl set_param -n llite.*.statahead_max 0 + lctl get_param llite.*.statahead_max + cancel_lru_locks mdc + cancel_lru_locks osc + stime=$(date +%s) + time $lsx $DIR/$tdir | wc -l + etime=$(date +%s) + delta=$((etime - stime)) + log "$lsx $i files without statahead: $delta sec" + lctl set_param llite.*.statahead_max=$max + + swrong=$(lctl get_param -n llite.*.statahead_stats | + grep "statahead wrong:" | awk '{print $3}') + lctl get_param -n llite.*.statahead_max | grep '[0-9]' + cancel_lru_locks mdc + cancel_lru_locks osc + stime=$(date +%s) + time $lsx $DIR/$tdir | wc -l + etime=$(date +%s) + delta_sa=$((etime - stime)) + log "$lsx $i files with statahead: $delta_sa sec" + lctl get_param -n llite.*.statahead_stats + ewrong=$(lctl get_param -n llite.*.statahead_stats | + grep "statahead wrong:" | awk '{print $3}') [[ $swrong -lt $ewrong ]] && log "statahead was stopped, maybe too many locks held!" [[ $delta -eq 0 || $delta_sa -eq 0 ]] && continue - if [ $((delta_sa * 100)) -gt $((delta * 105)) -a $delta_sa -gt $((delta + 2)) ]; then - max=`lctl get_param -n llite.*.statahead_max | head -n 1` - lctl set_param -n llite.*.statahead_max 0 - lctl get_param llite.*.statahead_max - cancel_lru_locks mdc - cancel_lru_locks osc - stime=`date +%s` - time ls -l $DIR/$tdir | wc -l - etime=`date +%s` - delta=$((etime - stime)) - log "ls $i files again without statahead: $delta sec" - lctl set_param llite.*.statahead_max=$max - if [ $((delta_sa * 100)) -gt $((delta * 105)) -a $delta_sa -gt $((delta + 2)) ]; then - if [ $SLOWOK -eq 0 ]; then - error "ls $i files is slower with statahead!" - else - log "ls $i files is slower with statahead!" - fi - break - fi - fi + if [ $((delta_sa * 100)) -gt $((delta * 105)) -a $delta_sa -gt $((delta + 2)) ]; then + max=$(lctl get_param -n llite.*.statahead_max | + head -n 1) + lctl set_param -n llite.*.statahead_max 0 + lctl get_param llite.*.statahead_max + cancel_lru_locks mdc + cancel_lru_locks osc + stime=$(date +%s) + time $lsx $DIR/$tdir | wc -l + etime=$(date +%s) + delta=$((etime - stime)) + log "$lsx $i files again without statahead: $delta sec" + lctl set_param llite.*.statahead_max=$max + if [ $((delta_sa * 100 > delta * 105 && delta_sa > delta + 2)) ]; then + if [ $SLOWOK -eq 0 ]; then + error "$lsx $i files is slower with statahead!" + else + log "$lsx $i files is slower with statahead!" + fi + break + fi + fi - [ $delta -gt 20 ] && break - [ $delta -gt 8 ] && MULT=$((50 / delta)) - [ "$SLOW" = "no" -a $delta -gt 5 ] && break - done - log "ls done" + [ $delta -gt 20 ] && break + [ $delta -gt 8 ] && MULT=$((50 / delta)) + [ "$SLOW" = "no" -a $delta -gt 5 ] && break + done + log "$lsx done" - stime=`date +%s` - rm -r $DIR/$tdir - sync - etime=`date +%s` - delta=$((etime - stime)) - log "rm -r $DIR/$tdir/: $delta seconds" - log "rm done" - lctl get_param -n llite.*.statahead_stats + stime=$(date +%s) + rm -r $DIR/$tdir + sync + etime=$(date +%s) + delta=$((etime - stime)) + log "rm -r $DIR/$tdir/: $delta seconds" + log "rm done" + lctl get_param -n llite.*.statahead_stats +} + +test_123aa() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + test_123a_base "ls -l" +} +run_test 123aa "verify statahead work" + +test_123ab() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + statx_supported || skip_env "Test must be statx() syscall supported" + + test_123a_base "$STATX -l" +} +run_test 123ab "verify statahead work by using statx" + +test_123ac() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + statx_supported || skip_env "Test must be statx() syscall supported" + + local rpcs_before + local rpcs_after + local agl_before + local agl_after + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + agl_before=$($LCTL get_param -n llite.*.statahead_stats | + awk '/agl.total:/ {print $3}') + test_123a_base "$STATX -c \"%n %i %A %h %u %g %W %X %Z\" -D" + test_123a_base "$STATX --cached=always -D" + agl_after=$($LCTL get_param -n llite.*.statahead_stats | + awk '/agl.total:/ {print $3}') + [ $agl_before -eq $agl_after ] || + error "Should not trigger AGL thread - $agl_before:$agl_after" + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $rpcs_before ] || + error "$STATX should not send glimpse RPCs to $OSC" } -run_test 123a "verify statahead work" +run_test 123ac "verify statahead work by using statx without glimpse RPCs" test_123b () { # statahead(bug 15027) [ $PARALLEL == "yes" ] && skip "skip parallel run" diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 8d2bdef..07932e5 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -2922,8 +2922,11 @@ test_51b() { local tmpfile=`mktemp` - # create an empty file - $MCREATE $DIR1/$tfile || error "mcreate $DIR1/$tfile failed" + $LFS setstripe -E 1M -c 1 -E -1 --extension-size 64M $DIR1/$tfile || + error "Create $DIR1/$tfile failed" + + dd if=/dev/zero of=$DIR1/$tfile bs=1k count=1 conv=notrunc || + error "dd $DIR1/$tfile failed" # delay glimpse so that layout has changed when glimpse finish #define OBD_FAIL_GLIMPSE_DELAY 0x1404 @@ -2932,14 +2935,14 @@ test_51b() { local pid=$! sleep 1 - # create layout of testing file - dd if=/dev/zero of=$DIR1/$tfile bs=1k count=1 conv=notrunc >/dev/null || + # extend layout of testing file + dd if=/dev/zero of=$DIR1/$tfile bs=1M count=1 seek=2 conv=notrunc || error "dd $DIR1/$tfile failed" wait $pid local fsize=$(cat $tmpfile) - [ x$fsize = x1024 ] || error "file size is $fsize, should be 1024" + [ x$fsize = x3145728 ] || error "file size is $fsize, should be 3145728" rm -f $DIR1/$tfile $tmpfile } @@ -5115,6 +5118,113 @@ test_105() { } run_test 105 "Glimpse and lock cancel race" +test_106a() { + [ "$mds1_FSTYPE" == "ldiskfs" ] && statx_supported || + skip_env "Test only for ldiskfs and statx() supported" + + local btime + local mdt_btime + local output + local mdtdev=$(mdsdevname ${SINGLEMDS//mds/}) + + dd if=/dev/zero of=$DIR/$tfile bs=1k count=1 conv=notrunc + btime=$($STATX -c %W $DIR/$tfile) + output=$(do_facet mds1 "$DEBUGFS -c -R 'stat ROOT/$tfile' $mdtdev") + ((mdt_btime=$(awk -F ':' /btime/'{ print $2 }' <<< "$output"))) + [[ $btime == $mdt_btime ]] || + error "$DIR/$tfile btime ($btime:$mdt_btime) diff" + +} +run_test 106a "Verify the btime via statx()" + +test_106b() { + statx_supported || skip_env "statx() only test" + + local rpcs_before + local rpcs_after + + $LFS setstripe -c 1 $DIR/$tfile || error "$DIR/$tfile setstripe failed" + dd if=/dev/zero of=$DIR/$tfile bs=1k count=1 conv=notrunc + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX should send 1 glimpse RPC to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + # %n: FILENAME; %i: STATX_INO; %A STATX_MODE; %h STATX_NLINK; + # %u: STATX_UID; %g: STATX_GID; %W STATX_BTIME; %X STATX_ATIME; + # %Z: STATX_CTIME + $STATX -c "%n %i %A %h %u %g %W %X %Z" $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $rpcs_before ] || + error "$STATX should not send glimpse RPCs to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX --cached=always $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $rpcs_before ] || + error "$STATX should not send glimpse RPCs to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX -c %Y $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX -c %Y should send 1 glimpse RPC to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX -c %s $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX -c %s should send 1 glimpse RPC to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX -c %b $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX -c %b should send 1 glimpse RPC to $OSC" +} +run_test 106b "Glimpse RPCs test for statx" + +test_106c() { + statx_supported || skip_env "statx() only test" + + local mask + + touch $DIR/$tfile + # Mask supported in stx_attributes by Lustre is + # STATX_ATTR_IMMUTABLE(0x10) | STATX_ATTR_APPEND(0x20) : (0x30). + mask=$($STATX -c %q $DIR/$tfile) + [[ $mask == "30" ]] || + error "supported stx_attributes: got '$mask', expected '30'" + chattr +i $DIR/$tfile || error "chattr +i $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "10" ]] || + error "got immutable flags '$mask', expected '10'" + chattr -i $DIR/$tfile || error "chattr -i $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "0" ]] || error "got flags '$mask', expected '0'" + chattr +a $DIR/$tfile || error "chattr +a $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "20" ]] || error "got flags '$mask', expected '20'" + chattr -a $DIR/$tfile || error "chattr -a $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "0" ]] || error "got flags '$mask', expected '0'" + chattr +ia $DIR/$tfile || error "chattr +ia $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "30" ]] || error "got flags '$mask', expected '30'" + chattr -ia $DIR/$tfile || error "chattr -ia $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "0" ]] || error "got flags '$mask', expected '0'" +} +run_test 106c "Verify statx attributes mask" + log "cleanup: ======================================================" # kill and wait in each test only guarentee script finish, but command in script diff --git a/lustre/tests/statx.c b/lustre/tests/statx.c new file mode 100644 index 0000000..80bc295 --- /dev/null +++ b/lustre/tests/statx.c @@ -0,0 +1,1561 @@ +/* + * LGPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the GNU Lesser General Public License + * (LGPL) version 2.1 or (at your discretion) any later version. + * (LGPL) version 2.1 accompanies this distribution, and is available at + * http://www.gnu.org/licenses/lgpl-2.1.html + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * LGPL HEADER END + */ +/* + * Copyright (c) 2019, DDN Storage Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + */ +/* + * + * Test for Lustre statx(). + * It uses some code in coreutils ('ls.c' and 'stat.c') for reference. + * + * Author: Qian Yingjin + */ +#define _ATFILE_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_SELINUX +#include +#endif + +/* Factor out some of the common --help and --version processing code. */ + +/* These enum values cannot possibly conflict with the option values + * ordinarily used by commands, including CHAR_MAX + 1, etc. Avoid + * CHAR_MIN - 1, as it may equal -1, the getopt end-of-options value. + */ +enum { + PRINTF_OPTION = (CHAR_MAX + 1), + GETOPT_HELP_CHAR = (CHAR_MIN - 2), + GETOPT_VERSION_CHAR = (CHAR_MIN - 3) +}; + +static bool o_quiet; + +#ifdef __NR_statx +#ifndef HAVE_STATX +static __attribute__((unused)) +ssize_t statx(int dfd, const char *filename, int flags, + unsigned int mask, struct statx *buffer) +{ + return syscall(__NR_statx, dfd, filename, flags, mask, buffer); +} +#endif /* HAVE_STATX */ + +/* coreutils/lib/intprops.h */ +#define _GL_SIGNED_TYPE_OR_EXPR(t) TYPE_SIGNED(__typeof__(t)) + +/* Bound on length of the string representing an unsigned integer + * value representable in B bits. log10 (2.0) < 146/485. The + * smallest value of B where this bound is not tight is 2621. + */ +#define INT_BITS_STRLEN_BOUND(b) (((b) * 146 + 484) / 485) + +/* The width in bits of the integer type or expression T. + * Do not evaluate T. + * Padding bits are not supported; this is checked at compile-time below. + */ +#define TYPE_WIDTH(t) (sizeof(t) * CHAR_BIT) + +/* Bound on length of the string representing an integer type or expression T. + * Subtract 1 for the sign bit if T is signed, and then add 1 more for + * a minus sign if needed. + * + * Because _GL_SIGNED_TYPE_OR_EXPR sometimes returns 1 when its argument is + * unsigned, this macro may overestimate the true bound by one byte when + * applied to unsigned types of size 2, 4, 16, ... bytes. + */ +#define INT_STRLEN_BOUND(t) \ + (INT_BITS_STRLEN_BOUND(TYPE_WIDTH(t) - _GL_SIGNED_TYPE_OR_EXPR(t)) \ + + _GL_SIGNED_TYPE_OR_EXPR(t)) + +/* Bound on buffer size needed to represent an integer type or expression T, + * including the terminating null. + */ +#define INT_BUFSIZE_BOUND(t) (INT_STRLEN_BOUND(t) + 1) + +/* The maximum and minimum values for the integer type T. */ +#define TYPE_MINIMUM(t) ((t)~TYPE_MAXIMUM(t)) +#define TYPE_MAXIMUM(t) \ + ((t) (!TYPE_SIGNED(t) \ + ? (t)-1 \ + : ((((t)1 << (TYPE_WIDTH(t) - 2)) - 1) * 2 + 1))) + +static bool o_dir_list; +static bool long_format; /* use a long listing format */ + +/* Current time in seconds and nanoseconds since 1970, updated as + * needed when deciding whether a file is recent. + */ +static struct timespec current_time; + +/* FIXME: these are used by printf.c, too */ +#define isodigit(c) ('0' <= (c) && (c) <= '7') +#define octtobin(c) ((c) - '0') +#define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : \ + (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0') + +#define ISDIGIT(c) ((unsigned int)(c) - '0' <= 9) + +/* True if the real type T is signed. */ +#define TYPE_SIGNED(t) (!((t)0 < (t)-1)) + +static char const digits[] = "0123456789"; + +/* Flags that are portable for use in printf, for at least one + * conversion specifier; make_format removes unportable flags as + * needed for particular specifiers. The glibc 2.2 extension "I" is + * listed here; it is removed by make_format because it has undefined + * behavior elsewhere and because it is incompatible with + * out_epoch_sec. + */ +static char const printf_flags[] = "'-+ #0I"; + +/* Formats for the --terse option. */ +static char const fmt_terse_fs[] = "%n %i %l %t %s %S %b %f %a %c %d\n"; +static char const fmt_terse_regular[] = "%n %s %b %f %u %g %D %i %h %t %T" + " %X %Y %Z %W %o\n"; +static char const fmt_terse_selinux[] = "%n %s %b %f %u %g %D %i %h %t %T" + " %X %Y %Z %W %o %C\n"; + +static char *format; + +/* Whether to follow symbolic links; True for --dereference (-L). */ +static bool follow_links; + +/* Whether to interpret backslash-escape sequences. + * True for --printf=FMT, not for --format=FMT (-c). + */ +static bool interpret_backslash_escapes; + +/* The trailing delimiter string: + * "" for --printf=FMT, "\n" for --format=FMT (-c). + */ +static char const *trailing_delim = ""; + +/* The representation of the decimal point in the current locale. */ +static char const *decimal_point; +static size_t decimal_point_len; + +/* Convert a possibly-signed character to an unsigned character. This is + * a bit safer than casting to unsigned char, since it catches some type + * errors that the cast doesn't. + */ +static inline unsigned char to_uchar(char ch) +{ + return ch; +} + +void usage(char *prog) +{ + printf("Usage: %s [options] ...\n", prog); + printf("Display file status via statx() syscall.\n" + "List information about the FILE " + "(the current diretory by default) via statx() syscall.\n" + "options:\n" + "\t-L --dereference follow links\n" + "\t--cached=MODE specify how to use cached attributes;\n" + "\t See MODE below\n" + "\t-c --format=FORMAT use the specified FORMAT instead of the " + "default;\n" + "\t output a newline after each use of " + "FORMAT\n" + "\t-t, --terse print the information in terse form\n" + "\t-D --dir list information about the FILE (ls)\n" + "\t-l use a long listing format\n" + "\t-q --quiet do not display results, test only\n\n" + "The --cached MODE argument can be; always, never, or default.\n" + "`always` will use cached attributes if available, while\n" + "`never` will try to synchronize with the latest attributes,\n" + "and `default` will leave it up to the underlying file system.\n" + "\n" + "The valid format sequences for files (without --file-system):\n" + "\n" + "\t%%a access rights in octal (note '#' and '0' printf flags)\n" + "\t%%A access rights in human readable form\n" + "\t%%b number of blocks allocated (see %%B)\n" + "\t%%B the size in bytes of each block reported by %%b\n" + "\t%%C SELinux security context string\n" + "\t%%d device number in decimal\n" + "\t%%D device number in hex\n" + "\t%%f raw mode in hex\n" + "\t%%F file type\n" + "\t%%g group ID of owner\n" + "\t%%G group name of owner\n" + "\t%%h number of hard links\n" + "\t%%i inode number\n" + "\t%%m mount point\n" + "\t%%n file name\n" + "\t%%N quoted file name with dereference if symbolic link\n" + "\t%%o optimal I/O transfer size hint\n" + "\t%%q Mask to show what's supported in stx_attributes\n" + "\t%%r Flags conveying information about the file: " + "stx_attributes\n" + "\t%%s total size, in bytes\n" + "\t%%t major device type in hex, for character/block device " + "special files\n" + "\t%%T minor device type in hex, for character/block device " + "special files\n" + "\t%%u user ID of owner\n" + "\t%%U user name of owner\n" + "\t%%w time of file birth, human-readable; - if unknown\n" + "\t%%W time of file birth, seconds since Epoch; 0 if unknown\n" + "\t%%x time of last access, human-readable\n" + "\t%%X time of last access, seconds since Epoch\n" + "\t%%y time of last data modification, human-readable\n" + "\t%%Y time of last data modification, seconds since Epoch\n" + "\t%%z time of last status change, human-readable\n" + "\t%%Z time of last status change, seconds since Epoch\n"); + exit(0); +} + +/* gnulib/lib/filemode.c */ +/* Return a character indicating the type of file described by + * file mode BITS: + * '-' regular file + * 'b' block special file + * 'c' character special file + * 'C' high performance ("contiguous data") file + * 'd' directory + * 'D' door + * 'l' symbolic link + * 'm' multiplexed file (7th edition Unix; obsolete) + * 'n' network special file (HP-UX) + * 'p' fifo (named pipe) + * 'P' port + * 's' socket + * 'w' whiteout (4.4BSD) + * '?' some other file type + */ +static char ftypelet(mode_t bits) +{ + /* These are the most common, so test for them first.*/ + if (S_ISREG(bits)) + return '-'; + if (S_ISDIR(bits)) + return 'd'; + + /* Other letters standardized by POSIX 1003.1-2004.*/ + if (S_ISBLK(bits)) + return 'b'; + if (S_ISCHR(bits)) + return 'c'; + if (S_ISLNK(bits)) + return 'l'; + if (S_ISFIFO(bits)) + return 'p'; + + /* Other file types (though not letters) standardized by POSIX.*/ + if (S_ISSOCK(bits)) + return 's'; + + return '?'; +} + +/* Like filemodestring, but rely only on MODE.*/ +static void strmode(mode_t mode, char *str) +{ + str[0] = ftypelet(mode); + str[1] = mode & S_IRUSR ? 'r' : '-'; + str[2] = mode & S_IWUSR ? 'w' : '-'; + str[3] = (mode & S_ISUID + ? (mode & S_IXUSR ? 's' : 'S') + : (mode & S_IXUSR ? 'x' : '-')); + str[4] = mode & S_IRGRP ? 'r' : '-'; + str[5] = mode & S_IWGRP ? 'w' : '-'; + str[6] = (mode & S_ISGID + ? (mode & S_IXGRP ? 's' : 'S') + : (mode & S_IXGRP ? 'x' : '-')); + str[7] = mode & S_IROTH ? 'r' : '-'; + str[8] = mode & S_IWOTH ? 'w' : '-'; + str[9] = (mode & S_ISVTX + ? (mode & S_IXOTH ? 't' : 'T') + : (mode & S_IXOTH ? 'x' : '-')); + str[10] = ' '; + str[11] = '\0'; +} + +/* filemodestring - fill in string STR with an ls-style ASCII + * representation of the st_mode field of file stats block STATP. + * 12 characters are stored in STR. + * The characters stored in STR are: + * + * 0 File type, as in ftypelet above, except that other letters are used + * for files whose type cannot be determined solely from st_mode: + * + * 'F' semaphore + * 'M' migrated file (Cray DMF) + * 'Q' message queue + * 'S' shared memory object + * 'T' typed memory object + * + * 1 'r' if the owner may read, '-' otherwise. + * + * 2 'w' if the owner may write, '-' otherwise. + * + * 3 'x' if the owner may execute, 's' if the file is + * set-user-id, '-' otherwise. + * 'S' if the file is set-user-id, but the execute + * bit isn't set. + * + * 4 'r' if group members may read, '-' otherwise. + * + * 5 'w' if group members may write, '-' otherwise. + * + * 6 'x' if group members may execute, 's' if the file is + * set-group-id, '-' otherwise. + * 'S' if it is set-group-id but not executable. + * + * 7 'r' if any user may read, '-' otherwise. + * + * 8 'w' if any user may write, '-' otherwise. + * + * 9 'x' if any user may execute, 't' if the file is "sticky" + * (will be retained in swap space after execution), '-' + * otherwise. + * 'T' if the file is sticky but not executable. + * + * 10 ' ' for compatibility with 4.4BSD strmode, + * since this interface does not support ACLs. + * + * 11 '\0'. + */ +static void filemodestring(struct statx const *stxp, char *str) +{ + strmode(stxp->stx_mode, str); + +/* + if (S_TYPEISSEM(statp)) + str[0] = 'F'; + else if (IS_MIGRATED_FILE (statp)) + str[0] = 'M'; + else if (S_TYPEISMQ (statp)) + str[0] = 'Q'; + else if (S_TYPEISSHM (statp)) + str[0] = 'S'; + else if (S_TYPEISTMO (statp)) + str[0] = 'T'; + */ +} + +/* gnulib/lib/file-type.c */ +static char const *file_type(struct statx const *stx) +{ + /* See POSIX 1003.1-2001 XCU Table 4-8 lines 17093-17107 for some of + * these formats. + * + * To keep diagnostics grammatical in English, the returned string + * must start with a consonant. + */ + /* Do these three first, as they're the most common. */ + if (S_ISREG(stx->stx_mode)) + return stx->stx_size == 0 ? "regular empty file" : + "regular file"; + + if (S_ISDIR(stx->stx_mode)) + return "directory"; + + if (S_ISLNK(stx->stx_mode)) + return "symbolic link"; + + /* The remaining are in alphabetical order. */ + if (S_ISBLK(stx->stx_mode)) + return "block special file"; + + if (S_ISCHR(stx->stx_mode)) + return "character special file"; + + if (S_ISFIFO(stx->stx_mode)) + return "fifo"; + + if (S_ISSOCK(stx->stx_mode)) + return "socket"; + + return "weird file"; +} + +/* gnulib/lib/areadlink-with-size.c */ +/* SYMLINK_MAX is used only for an initial memory-allocation sanity + * check, so it's OK to guess too small on hosts where there is no + * arbitrary limit to symbolic link length. + */ +#ifndef SYMLINK_MAX +#define SYMLINK_MAX 1024 +#endif + +#define MAXSIZE (SIZE_MAX < SSIZE_MAX ? SIZE_MAX : SSIZE_MAX) + +/* Call readlink to get the symbolic link value of FILE. + * SIZE is a hint as to how long the link is expected to be; + * typically it is taken from st_size. It need not be correct. + * Return a pointer to that NUL-terminated string in malloc'd storage. + * If readlink fails, malloc fails, or if the link value is longer + * than SSIZE_MAX, return NULL (caller may use errno to diagnose). + */ +static char *areadlink_with_size(char const *file, size_t size) +{ + /* Some buggy file systems report garbage in st_size. Defend + * against them by ignoring outlandish st_size values in the initial + * memory allocation. + */ + size_t symlink_max = SYMLINK_MAX; + size_t INITIAL_LIMIT_BOUND = 8 * 1024; + size_t initial_limit = (symlink_max < INITIAL_LIMIT_BOUND ? + symlink_max + 1 : INITIAL_LIMIT_BOUND); + enum { stackbuf_size = 128 }; + /* The initial buffer size for the link value. */ + size_t buf_size = (size == 0 ? stackbuf_size : size < initial_limit ? + size + 1 : initial_limit); + + while (1) { + ssize_t r; + size_t link_length; + char stackbuf[stackbuf_size]; + char *buf = stackbuf; + char *buffer = NULL; + + if (!(size == 0 && buf_size == stackbuf_size)) { + buf = buffer = malloc(buf_size); + if (!buffer) + return NULL; + } + + r = readlink(file, buf, buf_size); + link_length = r; + + /* On AIX 5L v5.3 and HP-UX 11i v2 04/09, readlink returns -1 + * with errno == ERANGE if the buffer is too small. + */ + if (r < 0 && errno != ERANGE) { + int saved_errno = errno; + + free(buffer); + errno = saved_errno; + return NULL; + } + + if (link_length < buf_size) { + buf[link_length] = 0; + if (!buffer) { + buffer = malloc(link_length + 1); + if (buffer) + return memcpy(buffer, buf, + link_length + 1); + } else if (link_length + 1 < buf_size) { + /* Shrink BUFFER before returning it. */ + char *shrinked_buffer; + + shrinked_buffer = realloc(buffer, + link_length + 1); + if (shrinked_buffer != NULL) + buffer = shrinked_buffer; + } + return buffer; + } + + free(buffer); + if (buf_size <= MAXSIZE / 2) { + buf_size *= 2; + } else if (buf_size < MAXSIZE) { + buf_size = MAXSIZE; + } else { + errno = ENOMEM; + return NULL; + } + } +} + +/* coreutils/src/stat.c */ +/* Output a single-character \ escape. */ +static void print_esc_char(char c) +{ + switch (c) { + case 'a': /* Alert. */ + c = '\a'; + break; + case 'b': /* Backspace. */ + c = '\b'; + break; + case 'e': /* Escape. */ + c = '\x1B'; + break; + case 'f': /* Form feed. */ + c = '\f'; + break; + case 'n': /* New line. */ + c = '\n'; + break; + case 'r': /* Carriage return. */ + c = '\r'; + break; + case 't': /* Horizontal tab. */ + c = '\t'; + break; + case 'v': /* Vertical tab. */ + c = '\v'; + break; + case '"': + case '\\': + break; + default: + printf("warning: unrecognized escape '\\%c'", c); + break; + } + putchar (c); +} + +static size_t format_code_offset(char const *directive) +{ + size_t len = strspn(directive + 1, printf_flags); + char const *fmt_char = directive + len + 1; + + fmt_char += strspn(fmt_char, digits); + if (*fmt_char == '.') + fmt_char += 1 + strspn(fmt_char + 1, digits); + + return fmt_char - directive; +} + +static unsigned int fmt_to_mask(char fmt) +{ + switch (fmt) { + case 'N': + return STATX_MODE; + case 'd': + case 'D': + return STATX_MODE; + case 'i': + return STATX_INO; + case 'a': + case 'A': + return STATX_MODE; + case 'f': + return STATX_MODE|STATX_TYPE; + case 'F': + return STATX_TYPE; + case 'h': + return STATX_NLINK; + case 'u': + case 'U': + return STATX_UID; + case 'g': + case 'G': + return STATX_GID; + case 'm': + return STATX_MODE|STATX_INO; + case 's': + return STATX_SIZE; + case 't': + case 'T': + return STATX_MODE; + case 'b': + return STATX_BLOCKS; + case 'w': + case 'W': + return STATX_BTIME; + case 'x': + case 'X': + return STATX_ATIME; + case 'y': + case 'Y': + return STATX_MTIME; + case 'z': + case 'Z': + return STATX_CTIME; + } + return 0; +} + +static unsigned int format_to_mask(char const *format) +{ + unsigned int mask = 0; + char const *b; + + for (b = format; *b; b++) { + if (*b != '%') + continue; + + b += format_code_offset(b); + if (*b == '\0') + break; + mask |= fmt_to_mask(*b); + } + + return mask; +} + +static char *human_access(struct statx const *stxbuf) +{ + static char modebuf[12]; + + filemodestring(stxbuf, modebuf); + modebuf[10] = 0; + return modebuf; +} + +static inline struct timespec +statx_timestamp_to_timespec(struct statx_timestamp tsx) +{ + struct timespec ts; + + ts.tv_sec = tsx.tv_sec; + ts.tv_nsec = tsx.tv_nsec; + + return ts; +} + +static int timespec_cmp(struct timespec a, struct timespec b) +{ + if (a.tv_sec < b.tv_sec) + return -1; + if (a.tv_sec > b.tv_sec) + return 1; + + return a.tv_nsec - b.tv_nsec; +} + +static char *human_time(const struct statx_timestamp *ts) +{ + /* STR must be at least INT_BUFSIZE_BOUND (intmax_t) big, either + * because localtime_rz fails, or because the time zone is truly + * outlandish so that %z expands to a long string. + */ + static char str[INT_BUFSIZE_BOUND(intmax_t) + + INT_STRLEN_BOUND(int) /* YYYY */ + + 1 /* because YYYY might equal INT_MAX + 1900 */ + + sizeof "-MM-DD HH:MM:SS.NNNNNNNNN +"]; + struct tm tm; + time_t tim; + int len; + int len2; + + tim = ts->tv_sec; + if (!localtime_r(&tim, &tm)) { + perror("localtime_r"); + exit(EXIT_FAILURE); + } + + if (o_dir_list && long_format) { + struct timespec when_timespec; + struct timespec six_months_ago; + bool recent; + + when_timespec = statx_timestamp_to_timespec(*ts); + /* If the file appears to be in the future, update the current + * time, in case the file happens to have been modified since + * the last time we checked the clock. + */ + if (timespec_cmp(current_time, when_timespec) < 0) { + struct timeval tv; + + gettimeofday(&tv, NULL); + current_time.tv_sec = tv.tv_sec; + current_time.tv_nsec = tv.tv_usec * 1000; + } + + /* Consider a time to be recent if it is within the past six + * months. + * A Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 + * seconds on the average. Write this value as an integer + * constant to avoid floating point hassles. + */ + six_months_ago.tv_sec = current_time.tv_sec - 31556952 / 2; + six_months_ago.tv_nsec = current_time.tv_nsec; + + recent = (timespec_cmp(six_months_ago, when_timespec) < 0 && + (timespec_cmp(when_timespec, current_time) < 0)); + + /* We assume here that all time zones are offset from UTC by a + * whole number of seconds. + */ + len = strftime(str, sizeof(str), + recent ? "%b %e %H:%M" : "%b %e %Y", &tm); + if (len == 0) { + perror("strftime"); + exit(EXIT_FAILURE); + } + + return str; + } + + len = strftime(str, sizeof(str), "%Y-%m-%d %H:%M:%S", &tm); + if (len == 0) { + perror("strftime"); + exit(EXIT_FAILURE); + } + + len2 = snprintf(str + len, sizeof(str) - len, ".%09u ", ts->tv_nsec); + len = strftime(str + len + len2, sizeof(str) - len - len2, "%z", &tm); + if (len == 0) { + perror("strftime2"); + exit(1); + } + + return str; +} + +/* PFORMAT points to a '%' followed by a prefix of a format, all of + * size PREFIX_LEN. The flags allowed for this format are + * ALLOWED_FLAGS; remove other printf flags from the prefix, then + * append SUFFIX. + */ +static void make_format(char *pformat, size_t prefix_len, + char const *allowed_flags, char const *suffix) +{ + char *dst = pformat + 1; + char const *src; + char const *srclim = pformat + prefix_len; + + for (src = dst; src < srclim && strchr(printf_flags, *src); src++) + if (strchr(allowed_flags, *src)) + *dst++ = *src; + while (src < srclim) + *dst++ = *src++; + strcpy(dst, suffix); +} + +static void out_string(char *pformat, size_t prefix_len, char const *arg) +{ + make_format(pformat, prefix_len, "-", "s"); + printf(pformat, arg); +} + +static int out_int(char *pformat, size_t prefix_len, intmax_t arg) +{ + make_format(pformat, prefix_len, "'-+ 0", PRIdMAX); + return printf(pformat, arg); +} + +static int out_uint(char *pformat, size_t prefix_len, uintmax_t arg) +{ + make_format(pformat, prefix_len, "'-0", PRIuMAX); + return printf(pformat, arg); +} + +static void out_uint_o(char *pformat, size_t prefix_len, uintmax_t arg) +{ + make_format(pformat, prefix_len, "-#0", PRIoMAX); + printf(pformat, arg); +} + +static void out_uint_x(char *pformat, size_t prefix_len, uintmax_t arg) +{ + make_format(pformat, prefix_len, "-#0", PRIxMAX); + printf(pformat, arg); +} + +static int out_minus_zero(char *pformat, size_t prefix_len) +{ + make_format(pformat, prefix_len, "'-+ 0", ".0f"); + return printf(pformat, -0.25); +} + +/* Output the number of seconds since the Epoch, using a format that + * acts like printf's %f format. + */ +static void out_epoch_sec(char *pformat, size_t prefix_len, + struct timespec arg) +{ + char *dot = memchr(pformat, '.', prefix_len); + size_t sec_prefix_len = prefix_len; + int width = 0; + int precision = 0; + bool frac_left_adjust = false; + + if (dot) { + sec_prefix_len = dot - pformat; + pformat[prefix_len] = '\0'; + + if (ISDIGIT(dot[1])) { + long int lprec = strtol(dot + 1, NULL, 10); + + precision = (lprec <= INT_MAX ? lprec : INT_MAX); + } else { + precision = 9; + } + + if (precision && ISDIGIT(dot[-1])) { + /* If a nontrivial width is given, subtract the width + * of the decimal point and PRECISION digits that will + * be output later. + */ + char *p = dot; + + *dot = '\0'; + + do + --p; + while (ISDIGIT(p[-1])); + + long int lwidth = strtol(p, NULL, 10); + + width = (lwidth <= INT_MAX ? lwidth : INT_MAX); + if (width > 1) { + p += (*p == '0'); + sec_prefix_len = p - pformat; + + int w_d = (decimal_point_len < width ? + width - decimal_point_len : 0); + + if (w_d > 1) { + int w = w_d - precision; + + if (w > 1) { + char *dst = pformat; + char const *src = dst; + for (; src < p; src++) { + if (*src == '-') + frac_left_adjust = true; + else + *dst++ = *src; + } + sec_prefix_len = + (dst - pformat + + (frac_left_adjust ? 0 : sprintf(dst, "%d", w))); + } + } + } + } + } + + int divisor = 1; + int i; + + for (i = precision; i < 9; i++) + divisor *= 10; + + int frac_sec = arg.tv_nsec / divisor; + int int_len; + + + if (TYPE_SIGNED(time_t)) { + bool minus_zero = false; + + if (arg.tv_sec < 0 && arg.tv_nsec != 0) { + int frac_sec_modulus = 1000000000 / divisor; + + frac_sec = (frac_sec_modulus - frac_sec + - (arg.tv_nsec % divisor != 0)); + arg.tv_sec += (frac_sec != 0); + minus_zero = (arg.tv_sec == 0); + } + int_len = (minus_zero ? + out_minus_zero(pformat, sec_prefix_len) : + out_int(pformat, sec_prefix_len, arg.tv_sec)); + } else { + int_len = out_uint(pformat, sec_prefix_len, arg.tv_sec); + } + + if (precision) { + int prec = (precision < 9 ? precision : 9); + int trailing_prec = precision - prec; + int ilen = (int_len < 0 ? 0 : int_len); + int trailing_width = (ilen < width && + decimal_point_len < width - ilen ? + width - ilen - decimal_point_len - prec : + 0); + + printf("%s%.*d%-*.*d", decimal_point, prec, frac_sec, + trailing_width, trailing_prec, 0); + } +} + +/* Print the context information of FILENAME, and return true iff the + * context could not be obtained. + */ +static int out_file_context(char *pformat, size_t prefix_len, + char const *filename) +{ + char *scontext = NULL; + int rc = 0; + +#ifdef HAVE_SELINUX + if ((follow_links ? getfilecon(filename, &scontext) : + lgetfilecon(filename, &scontext)) < 0) { + printf("failed to get security context of %s: %s\n", + filename, strerror(errno)); + scontext = NULL; + rc = -errno; + } +#endif + + strcpy(pformat + prefix_len, "s"); + printf(pformat, (scontext ? scontext : "?")); + if (scontext) + freecon(scontext); + return rc; +} + +/* Map a TS with negative TS.tv_nsec to {0,0}. */ +static inline struct timespec neg_to_zero(struct timespec ts) +{ + if (ts.tv_nsec >= 0) { + return ts; + } else { + struct timespec z = {0, 0}; + + return z; + } +} + +/* All the mode bits that can be affected by chmod. */ +#define CHMOD_MODE_BITS \ + (S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO) + +/* Print statx info. Return zero upon success, nonzero upon failure. */ +static int print_statx(char *pformat, size_t prefix_len, unsigned int m, + int fd, char const *filename, struct statx const *stx) +{ + struct passwd *pw_ent; + struct group *gw_ent; + int rc = 0; + int ret; + + switch (m) { + case 'n': + out_string(pformat, prefix_len, + o_dir_list ? strrchr(filename, '/') + 1 : filename); + break; + case 'N': + out_string(pformat, prefix_len, + o_dir_list ? strrchr(filename, '/') + 1 : filename); + if (S_ISLNK(stx->stx_mode)) { + char *linkname; + + linkname = areadlink_with_size(filename, stx->stx_size); + if (linkname == NULL) { + printf("cannot read symbolic link %s: %s", + filename, strerror(errno)); + return -errno; + } + printf(" -> "); + out_string(pformat, prefix_len, linkname); + free(linkname); + } + break; + case 'd': + out_uint(pformat, prefix_len, makedev(stx->stx_dev_major, + stx->stx_dev_minor)); + break; + case 'D': + out_uint_x(pformat, prefix_len, makedev(stx->stx_dev_major, + stx->stx_dev_minor)); + break; + case 'i': + out_uint(pformat, prefix_len, stx->stx_ino); + break; + case 'a': + out_uint_o(pformat, prefix_len, + stx->stx_mode & CHMOD_MODE_BITS); + break; + case 'A': + out_string(pformat, prefix_len, human_access(stx)); + break; + case 'f': + out_uint_x(pformat, prefix_len, stx->stx_mode); + break; + case 'F': + out_string(pformat, prefix_len, file_type(stx)); + break; + case 'h': + out_uint(pformat, prefix_len, stx->stx_nlink); + break; + case 'u': + out_uint(pformat, prefix_len, stx->stx_uid); + break; + case 'U': + pw_ent = getpwuid(stx->stx_uid); + out_string(pformat, prefix_len, + pw_ent ? pw_ent->pw_name : "UNKNOWN"); + break; + case 'g': + out_uint(pformat, prefix_len, stx->stx_gid); + break; + case 'G': + gw_ent = getgrgid(stx->stx_gid); + out_string(pformat, prefix_len, + gw_ent ? gw_ent->gr_name : "UNKNOWN"); + break; + case 'm': + /* + * fail |= out_mount_point(filename, pformat, prefix_len, + * statbuf); + */ + if (!rc) + rc = -ENOTSUP; + break; + case 's': + out_int(pformat, prefix_len, stx->stx_size); + break; + case 't': + out_uint_x(pformat, prefix_len, + major(makedev(stx->stx_rdev_major, + stx->stx_rdev_minor))); + break; + case 'T': + out_uint_x(pformat, prefix_len, + minor(makedev(stx->stx_rdev_major, + stx->stx_rdev_minor))); + break; + case 'B': + out_uint(pformat, prefix_len, S_BLKSIZE); + break; + case 'b': + out_uint(pformat, prefix_len, stx->stx_blocks); + break; + case 'o': + out_uint(pformat, prefix_len, stx->stx_blksize); + break; + case 'p': + out_uint_x(pformat, prefix_len, stx->stx_attributes_mask); + break; + case 'r': + out_uint_x(pformat, prefix_len, stx->stx_attributes); + break; + case 'w': + if (stx->stx_btime.tv_nsec < 0) + out_string(pformat, prefix_len, "-"); + else + out_string(pformat, prefix_len, + human_time(&stx->stx_btime)); + break; + case 'W': + out_epoch_sec(pformat, prefix_len, + neg_to_zero(statx_timestamp_to_timespec( + stx->stx_btime))); + break; + case 'x': + out_string(pformat, prefix_len, + human_time(&stx->stx_atime)); + break; + case 'X': + out_epoch_sec(pformat, prefix_len, + neg_to_zero(statx_timestamp_to_timespec( + stx->stx_atime))); + break; + case 'y': + out_string(pformat, prefix_len, + human_time(&stx->stx_mtime)); + break; + case 'Y': + out_epoch_sec(pformat, prefix_len, + neg_to_zero(statx_timestamp_to_timespec( + stx->stx_mtime))); + break; + case 'z': + out_string(pformat, prefix_len, + human_time(&stx->stx_ctime)); + break; + case 'Z': + out_epoch_sec(pformat, prefix_len, + neg_to_zero(statx_timestamp_to_timespec( + stx->stx_ctime))); + break; + case 'C': + ret = out_file_context(pformat, prefix_len, filename); + if (!rc && ret) + rc = ret; + break; + default: + fputc('?', stdout); + break; + } + + return rc; +} + +static int print_it(int fd, char const *filename, + int (*print_func)(char *, size_t, unsigned int, + int, char const *, struct statx const *), + void const *data) +{ + /* Add 2 to accommodate our conversion of the stat '%s' format string + * to the longer printf '%llu' one. + */ + enum { + MAX_ADDITIONAL_BYTES = (MAX(sizeof(PRIdMAX), + MAX(sizeof(PRIoMAX), + MAX(sizeof(PRIuMAX), + sizeof(PRIxMAX)))) - 1) + }; + size_t n_alloc; + char *dest; + char const *b; + int rc = 0; + + if (o_quiet) + return 0; + + n_alloc = strlen(format) + MAX_ADDITIONAL_BYTES + 1; + dest = malloc(n_alloc); + if (dest == NULL) + return -ENOMEM; + + for (b = format; *b; b++) { + switch (*b) { + case '%': { + size_t len = format_code_offset(b); + char const *fmt_char = b + len; + int ret; + + memcpy(dest, b, len); + b += len; + + switch (*fmt_char) { + case '\0': + --b; + case '%': + if (len > 1) { + dest[len] = *fmt_char; + dest[len + 1] = '\0'; + printf("%s: invalid directive", dest); + return -EINVAL; + } + putchar('%'); + break; + default: + ret = print_func(dest, len, to_uchar(*fmt_char), + fd, filename, data); + if (rc == 0 && ret) + rc = ret; + break; + } + break; + } + case '\\': + if (!interpret_backslash_escapes) { + putchar ('\\'); + break; + } + ++b; + if (isodigit(*b)) { + int esc_value = octtobin(*b); + int esc_length = 1; /* number of octal digits */ + + for (++b; esc_length < 3 && isodigit(*b); + ++esc_length, ++b) { + esc_value = esc_value * 8 + + octtobin(*b); + } + putchar(esc_value); + --b; + } else if (*b == 'x' && isxdigit(to_uchar(b[1]))) { + /* Value of \xhh escape. */ + int esc_value = hextobin(b[1]); + /* A hexadecimal \xhh escape sequence must have + * 1 or 2 hex. digits. + */ + + ++b; + if (isxdigit(to_uchar(b[1]))) { + ++b; + esc_value = esc_value * 16 + + hextobin(*b); + } + putchar(esc_value); + } else if (*b == '\0') { + printf("warning: backslash at end of format"); + putchar('\\'); + /* Arrange to exit the loop. */ + --b; + } else { + print_esc_char(*b); + } + break; + + default: + putchar(*b); + break; + } + } + free(dest); + + fputs(trailing_delim, stdout); + + return rc; +} + +/* Return an allocated format string in static storage that + * corresponds to whether FS and TERSE options were declared. + */ +static char *default_format(bool fs, bool terse, bool device) +{ + char *format; + + if (fs) { + if (terse) { + format = xstrdup(fmt_terse_fs); + } else { + /* TRANSLATORS: This string uses format specifiers from + * 'stat --help' with --file-system, and NOT from + * printf. + */ + format = xstrdup( + " File: \"%n\"\n" + " ID: %-8i Namelen: %-7l Type: %T\n" + "Block size: %-10s Fundamental block size: %S\n" + "Blocks: Total: %-10b Free: %-10f Available: %a\n" + "Inodes: Total: %-10c Free: %d\n"); + } + } else /* ! fs */ { + if (terse) { +#ifdef HAVE_SELINUX + if (is_selinux_enabled() > 0) + format = xstrdup(fmt_terse_selinux); + else +#endif + format = xstrdup(fmt_terse_regular); + } else { + char *temp; + + /* TRANSLATORS: This string uses format specifiers from + * 'stat --help' without --file-system, and NOT from + * printf. + */ + format = xstrdup("\ + File: %N\n\ + Size: %-10s\tBlocks: %-10b IO Block: %-6o %F\n\ +"); + + temp = format; + if (device) { + /* TRANSLATORS: This string uses format + * specifiers from 'stat --help' without + * --file-system, and NOT from printf. + */ + format = xasprintf("%s%s", format, "\ +" "Device: %Dh/%dd\tInode: %-10i Links: %-5h Device type: %t,%T\n\ +"); + } else { + /* TRANSLATORS: This string uses format + * specifiers from 'stat --help' without + * --file-system, and NOT from printf. + */ + format = xasprintf("%s%s", format, "\ +" "Device: %Dh/%dd\tInode: %-10i Links: %h\n\ +"); + } + free(temp); + + temp = format; + /* TRANSLATORS: This string uses format specifiers from + * 'stat --help' without --file-system, and NOT from + * printf. + */ + format = xasprintf("%s%s", format, "\ +" "Access: (%04a/%10.10A) Uid: (%5u/%8U) Gid: (%5g/%8G)\n\ +"); + free(temp); + + if (is_selinux_enabled() > 0) { + temp = format; + /* TRANSLATORS: This string uses format + * specifiers from 'stat --help' without + * --file-system, and NOT from printf. + */ + format = xasprintf("%s%s", format, + "Context: %C\n"); + free(temp); + } + + temp = format; + /* TRANSLATORS: This string uses format specifiers from + * 'stat --help' without --file-system, and NOT from + * printf. + */ + format = xasprintf("%s%s", format, + "Access: %x\n" + "Modify: %y\n" + "Change: %z\n" + " Birth: %w\n"); + free(temp); + } + } + return format; +} + +static char *list_long_format(void) +{ + char *format; + + format = xstrdup("\ +" "%10.10A %h %8U %8G %-10s %y %N\ +"); + + return format; +} + +static int do_statx(char const *filename, unsigned int request_mask, int flags) +{ + const char *pathname = filename; + struct statx stx = { 0, }; + int fd; + + if (strcmp(filename, "-") == 0) + fd = 0; + else + fd = AT_FDCWD; + + if (fd != AT_FDCWD) { + pathname = ""; + flags |= AT_EMPTY_PATH; + } + + fd = statx(fd, pathname, flags, request_mask, &stx); + if (fd < 0) { + if (flags & AT_EMPTY_PATH) + printf("cannot stat standard input\n"); + else + printf("cannot statx %s: %s\n", + filename, strerror(errno)); + + return -errno; + } + + return print_it(fd, filename, print_statx, &stx); +} + +/* Return true if FILE should be ignored. */ +static bool file_ignored(char const *name) +{ + return name[0] == '.'; +} + +static int do_dir_list(char const *dirname, unsigned int request_mask, + int flags) +{ + DIR *dir; + struct dirent *ent; + char fullname[PATH_MAX]; + int rc = 0; + + dir = opendir(dirname); + if (!dir) { + rc = -errno; + printf("lsx: cannot open directory '%s': %s\n", + dirname, strerror(errno)); + return rc; + } + + while ((ent = readdir(dir)) != NULL) { + int ret; + + /* skip "." and ".." */ + if (file_ignored(ent->d_name)) + continue; + + /* ls -1 */ + if (!format) { + if (o_quiet) + continue; + + printf("%s", ent->d_name); + putchar('\n'); + } else { + if (strlen(ent->d_name) + strlen(dirname) + 1 >= + sizeof(fullname)) { + errno = ENAMETOOLONG; + fprintf(stderr, + "lsx: ignored too long path: %s/%s\n", + dirname, ent->d_name); + if (!rc) + rc = -ENAMETOOLONG; + continue; + } + snprintf(fullname, PATH_MAX, "%s/%s", + dirname, ent->d_name); + ret = do_statx(fullname, request_mask, flags); + if (!ret) + putchar('\n'); + else if (rc == 0) + rc = ret; + } + } + + closedir(dir); + return rc; +} + +int main(int argc, char **argv) +{ + static struct option options[] = { + {"dereference", no_argument, NULL, 'L'}, + {"format", required_argument, NULL, 'c'}, + {"printf", required_argument, NULL, PRINTF_OPTION}, + {"terse", no_argument, NULL, 't'}, + {"cached", required_argument, NULL, 0}, + {"dir", no_argument, NULL, 'D'}, + {"long-format", no_argument, NULL, 'l'}, + {"quiet", no_argument, NULL, 'q'}, + {"help", no_argument, NULL, GETOPT_HELP_CHAR}, + {"version", no_argument, NULL, GETOPT_VERSION_CHAR}, + {NULL, 0, NULL, 0} + }; + bool terse = false; + unsigned int request_mask; + int flags = AT_SYMLINK_NOFOLLOW; + struct lconv const *locale = localeconv(); + int c; + int rc = 0; + int i = 0; + + decimal_point = locale->decimal_point[0] ? locale->decimal_point : "."; + decimal_point_len = strlen(decimal_point); + current_time.tv_sec = TYPE_MINIMUM(time_t); + current_time.tv_nsec = -1; + + while ((c = getopt_long(argc, argv, "c:DqLlt", options, NULL)) != EOF) { + switch (c) { + case 'L': + flags &= ~AT_SYMLINK_NOFOLLOW; + follow_links = true; + break; + case PRINTF_OPTION: + format = optarg; + interpret_backslash_escapes = true; + trailing_delim = ""; + break; + case 'c': + format = optarg; + interpret_backslash_escapes = false; + trailing_delim = "\n"; + break; + case 'q': + o_quiet = true; + break; + case 'l': + o_dir_list = true; + long_format = true; + break; + case 'D': + o_dir_list = true; + break; + case 't': + terse = true; + break; + case 0: + if (strcmp(optarg, "never") == 0) { + flags &= ~AT_STATX_SYNC_TYPE; + flags |= AT_STATX_FORCE_SYNC; + } else if (strcmp(optarg, "always") == 0) { + flags &= ~AT_STATX_SYNC_TYPE; + flags |= AT_STATX_DONT_SYNC; + } else if (strcmp(optarg, "default") == 0) { + flags &= ~AT_STATX_SYNC_TYPE; + flags |= AT_SYMLINK_NOFOLLOW; + } else { + printf("%s: invalid cached mode: %s\n", + argv[0], optarg); + return -EINVAL; + } + break; + case GETOPT_HELP_CHAR: + usage(argv[0]); + case GETOPT_VERSION_CHAR: + if (!o_quiet) + printf("Lustre statx: version 0.1\n"); + return 0; + default: + printf("%s: unknown option '-%c'\n", + argv[0], optopt); + return -EINVAL; + } + } + + if (format) { + request_mask = format_to_mask(format); + } else { + request_mask = STATX_ALL; + if (o_dir_list) + format = long_format ? list_long_format() : NULL; + else + format = default_format(false, terse, false); + } + + if (optind == argc) { + if (o_dir_list) + return do_dir_list(".", request_mask, flags); + + printf("statx: missing operand\n" + "Try 'stat --help' for more information.\n"); + return 0; + } + + for (i = optind; i < argc; i++) { + int ret; + + if (o_dir_list) + ret = do_dir_list(argv[i], request_mask, flags); + else + ret = do_statx(argv[i], request_mask, flags); + + if (rc == 0 && ret) + rc = ret; + } + + return rc; +} +#else +int main(int argc, char **argv) +{ + static struct option options[] = { + {"version", no_argument, NULL, GETOPT_VERSION_CHAR}, + {"quiet", no_argument, NULL, 'q'}, + {NULL, 0, NULL, 0} + }; + int c; + + while ((c = getopt_long(argc, argv, "q", options, NULL)) != EOF) { + switch (c) { + case 'q': + o_quiet = true; + break; + case GETOPT_VERSION_CHAR: + if (!o_quiet) + printf("statx: Not support statx() syscall\n"); + return -ENOTSUP; + } + } + printf("Skip: system does not support statx syscall.\n"); + return 0; +} +#endif /* __NR_statx */ diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 5fe9a9d..6e007fa 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -277,6 +277,7 @@ init_test_env() { [ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey) export MCREATE=${MCREATE:-mcreate} export MULTIOP=${MULTIOP:-multiop} + export STATX=${STATX:-statx} # Ubuntu, at least, has a truncate command in /usr/bin # so fully path our truncate command. export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate} @@ -10431,3 +10432,7 @@ sel_layout_sanity() { check_component_count $file $comp_cnt } +statx_supported() { + $STATX --quiet --version + return $? +}