From 15d44e787e17ff57fc1fb5a6c9ae568cdaab6e34 Mon Sep 17 00:00:00 2001 From: Bruno Faccini Date: Thu, 22 Aug 2019 10:22:53 +0200 Subject: [PATCH] LU-12682 llite: fake symlink type of foreign file/dir This patch implements a "fake symlink" specific usage of "foreign" LOV/LMV format. It basically allows these particular type of foreign files/dirs to behave as a symlink from VFS point of view, by allowing to construct a relative path from the LOV/LMV foreign content, to complement it with a prefix, and then to expose it to the VFS as a symlink destination. The default/internal mechanism simply takes the full foreign free string as the relative path, and for more complex internal formats an upcall has been implemented to provide format's details (presently just in terms of constant strings and substrings positions in EA, but this can be enhanced) to llite layer. Using this feature, instead of real symlinks or user EA, will permit to benefit from the special features (lock, prefetch, caches) already implemented to handle both LOV/LMV EAs. Signed-off-by: Bruno Faccini Change-Id: Id3c262e3b042447aa09aad25f682ff02787b350d Reviewed-on: https://review.whamcloud.com/35856 Reviewed-by: Andreas Dilger Tested-by: jenkins Reviewed-by: Ben Evans Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/doc/Makefile.am | 3 +- lustre/doc/lfs-find.1 | 6 +- lustre/doc/lfs-setdirstripe.1 | 13 +- lustre/doc/lfs-setstripe.1 | 8 +- lustre/doc/lfs.1 | 6 + lustre/doc/llapi_file_create_foreign.3 | 6 +- lustre/doc/llapi_unlink_foreign.3 | 80 +++ lustre/include/lustre/lustreapi.h | 1 + lustre/include/uapi/linux/lustre/lustre_user.h | 49 +- lustre/llite/Makefile.in | 2 + lustre/llite/dcache.c | 16 +- lustre/llite/dir.c | 10 + lustre/llite/file.c | 42 +- lustre/llite/foreign_symlink.h | 55 ++ lustre/llite/llite_foreign.c | 281 ++++++++ lustre/llite/llite_foreign_symlink.c | 857 +++++++++++++++++++++++++ lustre/llite/llite_internal.h | 32 +- lustre/llite/llite_lib.c | 115 ++++ lustre/llite/lproc_llite.c | 12 + lustre/llite/namei.c | 33 +- lustre/llite/pcc.c | 4 +- lustre/llite/symlink.c | 1 + lustre/lov/lov_object.c | 3 +- lustre/lov/lov_pack.c | 8 +- lustre/tests/Makefile.am | 1 + lustre/tests/checkstat.c | 41 +- lustre/tests/create_foreign_dir.c | 2 +- lustre/tests/create_foreign_file.c | 2 +- lustre/tests/foreign_symlink_striping.c | 105 +++ lustre/tests/sanity-lfsck.sh | 12 +- lustre/tests/sanity.sh | 209 +++++- lustre/utils/Makefile.am | 6 +- lustre/utils/l_foreign_symlink.c | 169 +++++ lustre/utils/lfs.c | 37 +- lustre/utils/liblustreapi.c | 393 +++++++----- lustre/utils/liblustreapi_util.c | 87 +++ 36 files changed, 2501 insertions(+), 206 deletions(-) create mode 100644 lustre/doc/llapi_unlink_foreign.3 create mode 100644 lustre/llite/foreign_symlink.h create mode 100644 lustre/llite/llite_foreign.c create mode 100644 lustre/llite/llite_foreign_symlink.c create mode 100644 lustre/tests/foreign_symlink_striping.c create mode 100644 lustre/utils/l_foreign_symlink.c diff --git a/lustre/doc/Makefile.am b/lustre/doc/Makefile.am index 24bf4ee..4d409ae 100644 --- a/lustre/doc/Makefile.am +++ b/lustre/doc/Makefile.am @@ -178,7 +178,8 @@ LIBMAN = \ llapi_rmfid.3 \ llapi_search_mdt.3 \ llapi_search_ost.3 \ - llapi_search_tgt.3 + llapi_search_tgt.3 \ + llapi_unlink_foreign.3 SERVER_MANFILES = \ lctl-barrier.8 \ diff --git a/lustre/doc/lfs-find.1 b/lustre/doc/lfs-find.1 index 063f6d9..0cb2b8a 100644 --- a/lustre/doc/lfs-find.1 +++ b/lustre/doc/lfs-find.1 @@ -119,7 +119,7 @@ File has a foreign (non-Lustre/free format) layout and is of the given if specified. Presently only .B none or -.B daos +.B symlink are defined types, though 32-bit numeric types can also be used. .TP .BR --gid | -g @@ -365,9 +365,9 @@ Recursively list all mirrored files that have more than 2 mirrors. .B $ lfs find ! --mirror-state=ro /mnt/lustre Recursively list all out-of-sync mirrored files. .TP -.B $ lfs find ! --foreign=daos /mnt/lustre +.B $ lfs find ! --foreign=symlink /mnt/lustre Recursively list all but foreign files/dirs of -.B daos +.B symlink type. .SH BUGS The diff --git a/lustre/doc/lfs-setdirstripe.1 b/lustre/doc/lfs-setdirstripe.1 index 6f0906a..d61240e 100644 --- a/lustre/doc/lfs-setdirstripe.1 +++ b/lustre/doc/lfs-setdirstripe.1 @@ -94,9 +94,10 @@ Create a directory with a foreign (non-Lustre/free format, see option) striping. Where .BR type specifies a known foreign type (like -.BR none , -.BR daos ) -or a 32-bit numeric type. +.BR none +, +.BR symlink +, ...) or a 32-bit numeric type. .TP .BR \-\-flags =\fI\fR Specify a numeric bitmask of type-specific layout flags for the foreign layout. @@ -149,12 +150,12 @@ This creates a directory striped on two MDTs, whose first stripe is on (MDT index 1), and whose hash type is .BR all_char . .TP -.B $ lfs mkdir --foreign=daos --xattr PUUID:CUUID /mnt/lustre/dir1 +.B $ lfs mkdir --foreign=symlink --xattr PUUID/CUUID /mnt/lustre/dir1 This creates .B dir1 with foreign (non-lustre/free format) -.B PUUID:CUUID -striping/LMV EA. +.B PUUID/CUUID +striping/LMV EA value (symlink type). .SH AVAILABILITY The .B lfs setdirstripe diff --git a/lustre/doc/lfs-setstripe.1 b/lustre/doc/lfs-setstripe.1 index 97f74e5..3bbac99 100644 --- a/lustre/doc/lfs-setstripe.1 +++ b/lustre/doc/lfs-setstripe.1 @@ -133,7 +133,7 @@ file. Create a new .I file with a foreign/non-lustre layout of type -.I foreign_type \fR(\fBnone\fR, \fBdaos\fR, ...) +.I foreign_type \fR(\fBnone\fR, \fBsymlink\fR, ...) with flags .I hex and a free-format layout value of @@ -264,7 +264,7 @@ options). Current known types are .BR none and -.BR daos +.BR symlink , default is .BR none . @@ -533,12 +533,12 @@ which can be created with the .B lfs getstripe --yaml command. .TP -.B lfs setstripe --foreign=daos --flags=0xda08 \ +.B lfs setstripe --foreign=symlink --flags=0xda08 \ --xattr=PUUID:CUUID /mnt/lustre/file1 This creates foreign .BR file1 of type -.BR daos +.BR symlink with non-lustre/free-format .BR PUUID:CUUID layout/LOV EA and flags diff --git a/lustre/doc/lfs.1 b/lustre/doc/lfs.1 index 2ca2c42..1e3b9c1 100644 --- a/lustre/doc/lfs.1 +++ b/lustre/doc/lfs.1 @@ -127,6 +127,9 @@ lfs \- client utility for Lustre-specific file layout and other attributes .br .B lfs setstripe --yaml=\fR<\fIyaml_template_file\fR> <\fIfilename\fR> .br +.B lfs unlink_foreign +.IR path \fR[\fIpath\fR ...] +.br .B lfs --version .br .B lfs --list-commands @@ -220,6 +223,9 @@ See lfs-migrate(1). .B setstripe See lfs-setstripe(1). .TP +.B unlink_foreign +Remove the foreign files/dirs that are prevented to be using regular unlink/rmdir commands/syscalls. Works also for regular files/dirs. +.TP .B --version Output the build version of the lfs utility. Use "lctl lustre_build_version" to get the version of the Lustre kernel modules .TP diff --git a/lustre/doc/llapi_file_create_foreign.3 b/lustre/doc/llapi_file_create_foreign.3 index be03522..2f647ed 100644 --- a/lustre/doc/llapi_file_create_foreign.3 +++ b/lustre/doc/llapi_file_create_foreign.3 @@ -73,13 +73,13 @@ int main(int argc, char *argv[]) if (argc != 2) return -1; - rc = llapi_file_create_foreign(argv[1], 0600, LOV_FOREIGN_TYPE_DAOS, - 0xda08, "PUUID@CUUID"); + rc = llapi_file_create_foreign(argv[1], 0600, LOV_FOREIGN_TYPE_SYMLINK, + 0xda05, "PUUID/CUUID"); if (rc < 0) { fprintf(stderr, "file creation has failed, %s\\n", strerror(-rc)); return -1; } - printf("foreign file %s has been created with 'PUUID@CUUID' layout!\\n", + printf("foreign file %s has been created with 'PUUID/CUUID' layout!\\n", argv[1]); return 0; } diff --git a/lustre/doc/llapi_unlink_foreign.3 b/lustre/doc/llapi_unlink_foreign.3 new file mode 100644 index 0000000..d49fb12 --- /dev/null +++ b/lustre/doc/llapi_unlink_foreign.3 @@ -0,0 +1,80 @@ +.TH lustreapi 3 "2009 Jul 10" The Lustre user application interface library +.SH NAME +llapi_unlink_foreign \- unlink file/dir with foreign layout on a Lustre filesystem +.SH SYNOPSIS +.nf +.B #include +.sp +.BI "int llapi_unlink_foreign(char *" name ");" +.sp +.fi +.SH DESCRIPTION +.LP +.B llapi_unlink_foreign(\|) +will allow to unlink a file/dir of +.I name +with foreign LOV/LMV, that would be prevented to be using regular unlink/rmdir +command/syscall. +It also works for regular file/dir. +.SH RETURN VALUES +.LP +.B llapi_unlink_foreign(\|) +return: +.TP +=0 +on success. +.TP +<0 +on failure, the absolute value is an error code. +.SH ERRORS +.TP 15 +.SM ENOENT +.I name +does not exist. +.TP +.SM ENOTTY +.I name +may not point to a Lustre filesystem. +.SH "EXAMPLE" +.nf +#include +#include +#include + +#include + +int main(int argc, char **argv) +{ + char *foreign = NULL; + int c, rc; + + while ((c = getopt(argc, argv, "hf:")) != -1) { + switch (c) { + case 'f': + foreign = optarg; + break; + case 'h': + default: + fprintf(stderr, + "Usage: %s [-f ]\n", + argv[0]); + exit(0); + break; + } + } + + if (foreign == NULL) { + fprintf(stderr, "a foreign file/dir pathname must be provided\n"); + exit(0); + } + + rc = llapi_unlink_foreign(foreign); + if (rc < 0) + fprintf(stderr, "llapi_unlink_foreign() error: %d\n", rc); + + return rc; +} +.fi +.SH "SEE ALSO" +.BR lustre (7), +.BR lustreapi (7) diff --git a/lustre/include/lustre/lustreapi.h b/lustre/include/lustre/lustreapi.h index b959bf1..8ff6c8c 100644 --- a/lustre/include/lustre/lustreapi.h +++ b/lustre/include/lustre/lustreapi.h @@ -394,6 +394,7 @@ int llapi_dir_create_pool(const char *name, int flags, int stripe_offset, int stripe_count, int stripe_pattern, const char *poolname); int llapi_direntry_remove(char *dname); +int llapi_unlink_foreign(char *dname); int llapi_obd_fstatfs(int fd, __u32 type, __u32 index, struct obd_statfs *stat_buf, struct obd_uuid *uuid_buf); diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index a13306e..ac8a3d6 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -618,6 +618,7 @@ struct ll_ioc_lease_id { #define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md) #define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64) #define LL_IOC_RMFID _IOR('f', 242, struct fid_array) +#define LL_IOC_UNLOCK_FOREIGN _IO('f', 242) #define LL_IOC_SET_LEASE _IOWR('f', 243, struct ll_ioc_lease) #define LL_IOC_SET_LEASE_OLD _IOWR('f', 243, long) #define LL_IOC_GET_LEASE _IO('f', 244) @@ -1072,7 +1073,7 @@ struct lustre_foreign_type { **/ enum lustre_foreign_types { LU_FOREIGN_TYPE_NONE = 0, - LU_FOREIGN_TYPE_DAOS = 0xda05, + LU_FOREIGN_TYPE_SYMLINK = 0xda05, /* must be the max/last one */ LU_FOREIGN_TYPE_UNKNOWN = 0xffffffff, }; @@ -2639,6 +2640,52 @@ struct fid_array { }; #define OBD_MAX_FIDS_IN_ARRAY 4096 +/* more types could be defined upon need for more complex + * format to be used in foreign symlink LOV/LMV EAs, like + * one to describe a delimiter string and occurence number + * of delimited sub-string, ... + */ +enum ll_foreign_symlink_upcall_item_type { + EOB_TYPE = 1, + STRING_TYPE = 2, + POSLEN_TYPE = 3, +}; + +/* may need to be modified to allow for more format items to be defined, and + * like for ll_foreign_symlink_upcall_item_type enum + */ +struct ll_foreign_symlink_upcall_item { + __u32 type; + union { + struct { + __u32 pos; + __u32 len; + }; + struct { + size_t size; + union { + /* internal storage of constant string */ + char *string; + /* upcall stores constant string in a raw */ + char bytestring[0]; + }; + }; + }; +}; + +#define POSLEN_ITEM_SZ (offsetof(struct ll_foreign_symlink_upcall_item, len) + \ + sizeof(((struct ll_foreign_symlink_upcall_item *)0)->len)) +#define STRING_ITEM_SZ(sz) ( \ + offsetof(struct ll_foreign_symlink_upcall_item, bytestring) + \ + (sz + sizeof(__u32) - 1) / sizeof(__u32) * sizeof(__u32)) + +/* presently limited to not cause max stack frame size to be reached + * because of temporary automatic array of + * "struct ll_foreign_symlink_upcall_item" presently used in + * foreign_symlink_upcall_info_store() + */ +#define MAX_NB_UPCALL_ITEMS 32 + #if defined(__cplusplus) } #endif diff --git a/lustre/llite/Makefile.in b/lustre/llite/Makefile.in index dd82b4d..edc2dd7 100644 --- a/lustre/llite/Makefile.in +++ b/lustre/llite/Makefile.in @@ -8,8 +8,10 @@ lustre-objs += lcommon_cl.o lustre-objs += lcommon_misc.o lustre-objs += vvp_dev.o vvp_page.o vvp_io.o vvp_object.o lustre-objs += pcc.o crypto.o +lustre-objs += llite_foreign.o llite_foreign_symlink.o EXTRA_DIST := $(lustre-objs:.o=.c) xattr.c rw26.c super25.c EXTRA_DIST += llite_internal.h vvp_internal.h pcc.h +EXTRA_DIST += foreign_symlink.h @INCLUDE_RULES@ diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index c299068..c76382f 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -294,12 +294,22 @@ static int ll_revalidate_dentry(struct dentry *dentry, return 1; /* Symlink - always valid as long as the dentry was found */ + /* only special case is to prevent ELOOP error from VFS during open + * of a foreign symlink file/dir with O_NOFOLLOW, like it happens for + * real symlinks. This will allow to open foreign symlink file/dir + * for get[dir]stripe/unlock ioctl()s. + */ #ifdef HAVE_IOP_GET_LINK - if (dentry->d_inode && dentry->d_inode->i_op->get_link) + if (dentry->d_inode && dentry->d_inode->i_op->get_link) { #else - if (dentry->d_inode && dentry->d_inode->i_op->follow_link) + if (dentry->d_inode && dentry->d_inode->i_op->follow_link) { #endif - return 1; + if (!S_ISLNK(dentry->d_inode->i_mode) && + !(lookup_flags & LOOKUP_FOLLOW)) + return 0; + else + return 1; + } /* * VFS warns us that this is the second go around and previous diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index dbacb2a..a57a11b 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -1662,6 +1662,16 @@ finish_req: return rc; } + case LL_IOC_UNLOCK_FOREIGN: + /* if not a foreign symlink do nothing */ + if (ll_foreign_is_removable(dentry, true)) { + CDEBUG(D_INFO, + "prevent rmdir of non-foreign dir ("DFID")\n", + PFID(ll_inode2fid(inode))); + RETURN(-EOPNOTSUPP); + } + RETURN(0); + case LL_IOC_REMOVE_ENTRY: { char *filename = NULL; int namelen = 0; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 2e0ca9e..092c42d 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2381,12 +2381,12 @@ retry: GOTO(out, rc = PTR_ERR(env)); rc = cl_object_layout_get(env, obj, &cl); - if (!rc && cl.cl_is_composite) + if (rc >= 0 && cl.cl_is_composite) rc = ll_layout_write_intent(inode, LAYOUT_INTENT_WRITE, &ext); cl_env_put(env, &refcheck); - if (rc) + if (rc < 0) GOTO(out, rc); } @@ -4073,6 +4073,20 @@ out_state: return -EOPNOTSUPP; return llcrypt_ioctl_get_key_status(file, (void __user *)arg); #endif + + case LL_IOC_UNLOCK_FOREIGN: { + struct dentry *dentry = file_dentry(file); + + /* if not a foreign symlink do nothing */ + if (ll_foreign_is_removable(dentry, true)) { + CDEBUG(D_INFO, + "prevent unlink of non-foreign file ("DFID")\n", + PFID(ll_inode2fid(inode))); + RETURN(-EOPNOTSUPP); + } + RETURN(0); + } + default: RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL, (void __user *)arg)); @@ -4841,7 +4855,7 @@ static int ll_merge_md_attr(struct inode *inode) } int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask, - unsigned int flags) + unsigned int flags, bool foreign) { struct inode *inode = de->d_inode; struct ll_sb_info *sbi = ll_i2sbi(inode); @@ -4867,7 +4881,10 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask, if (rc < 0) RETURN(rc); - if (S_ISREG(inode->i_mode)) { + /* foreign file/dir are always of zero length, so don't + * need to validate size. + */ + if (S_ISREG(inode->i_mode) && !foreign) { bool cached; if (!need_glimpse) @@ -4914,7 +4931,8 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask, } } else { /* If object isn't regular a file then don't validate size. */ - if (ll_dir_striped(inode)) { + /* foreign dir is not striped dir */ + if (ll_dir_striped(inode) && !foreign) { rc = ll_merge_md_attr(inode); if (rc < 0) RETURN(rc); @@ -4941,7 +4959,12 @@ fill_attr: stat->rdev = inode->i_rdev; } - stat->mode = inode->i_mode; + /* foreign symlink to be exposed as a real symlink */ + if (!foreign) + stat->mode = inode->i_mode; + else + stat->mode = (inode->i_mode & ~S_IFMT) | S_IFLNK; + stat->uid = inode->i_uid; stat->gid = inode->i_gid; stat->atime = inode->i_atime; @@ -4990,13 +5013,14 @@ fill_attr: int ll_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { - return ll_getattr_dentry(path->dentry, stat, request_mask, flags); + return ll_getattr_dentry(path->dentry, stat, request_mask, flags, + false); } #else int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) { return ll_getattr_dentry(de, stat, STATX_BASIC_STATS, - AT_STATX_SYNC_AS_STAT); + AT_STATX_SYNC_AS_STAT, false); } #endif @@ -5449,7 +5473,7 @@ int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf) out: cl_env_put(env, &refcheck); - RETURN(rc); + RETURN(rc < 0 ? rc : 0); } /* Fetch layout from MDT with getxattr request, if it's not ready yet */ diff --git a/lustre/llite/foreign_symlink.h b/lustre/llite/foreign_symlink.h new file mode 100644 index 0000000..a44fa5e --- /dev/null +++ b/lustre/llite/foreign_symlink.h @@ -0,0 +1,55 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ + +#ifndef LLITE_FOREIGN_SYMLINK_H +#define LLITE_FOREIGN_SYMLINK_H + +/* llite/llite_foreign_symlink.c */ +#ifdef HAVE_INODEOPS_ENHANCED_GETATTR +int ll_foreign_symlink_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); +#else +int ll_foreign_symlink_getattr(struct vfsmount *mnt, struct dentry *de, + struct kstat *stat); +#endif +ssize_t foreign_symlink_enable_show(struct kobject *kobj, + struct attribute *attr, char *buf); +ssize_t foreign_symlink_enable_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count); +ssize_t foreign_symlink_prefix_show(struct kobject *kobj, + struct attribute *attr, char *buf); +ssize_t foreign_symlink_prefix_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count); +ssize_t foreign_symlink_upcall_show(struct kobject *kobj, + struct attribute *attr, char *buf); +ssize_t foreign_symlink_upcall_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count); +ssize_t foreign_symlink_upcall_info_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count); +extern struct inode_operations ll_foreign_file_symlink_inode_operations; +extern struct inode_operations ll_foreign_dir_symlink_inode_operations; + +#endif /* LLITE_FOREIGN_SYMLINK_H */ diff --git a/lustre/llite/llite_foreign.c b/lustre/llite/llite_foreign.c new file mode 100644 index 0000000..d231284 --- /dev/null +++ b/lustre/llite/llite_foreign.c @@ -0,0 +1,281 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2020 Intel Corporation. + */ +#define DEBUG_SUBSYSTEM S_LLITE + +#include "llite_internal.h" + +static void ll_manage_foreign_file(struct inode *inode, + struct lov_foreign_md *lfm) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + + if (le32_to_cpu(lfm->lfm_type) == LU_FOREIGN_TYPE_SYMLINK) { + CDEBUG(D_INFO, + "%s: inode %p of fid "DFID": Foreign file of type symlink, faking a symlink\n", + sbi->ll_fsname, inode, PFID(ll_inode2fid(inode))); + /* change inode_operations to add symlink methods, and clear + * IOP_NOFOLLOW to ensure file will be treated as a symlink + * by Kernel (see in * d_flags_for_inode()). + */ + inode->i_op = &ll_foreign_file_symlink_inode_operations; + inode->i_opflags &= ~IOP_NOFOLLOW; + } else { + CDEBUG(D_INFO, + "%s: inode %p of fid "DFID": Foreign file of type %ux, nothing special to do\n", + sbi->ll_fsname, inode, PFID(ll_inode2fid(inode)), + le32_to_cpu(lfm->lfm_type)); + } +} + +static void ll_manage_foreign_dir(struct inode *inode, + struct lmv_foreign_md *lfm) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + + if (lfm->lfm_type == LU_FOREIGN_TYPE_SYMLINK) { + CDEBUG(D_INFO, + "%s: inode %p of fid "DFID": Foreign dir of type symlink, faking a symlink\n", + sbi->ll_fsname, inode, PFID(ll_inode2fid(inode))); + /* change inode_operations to add symlink methods + * IOP_NOFOLLOW should not be set for dirs + */ + inode->i_op = &ll_foreign_dir_symlink_inode_operations; + } else { + CDEBUG(D_INFO, + "%s: inode %p of fid "DFID": Foreign dir of type %ux, nothing special to do\n", + sbi->ll_fsname, inode, PFID(ll_inode2fid(inode)), + le32_to_cpu(lfm->lfm_type)); + } +} + +int ll_manage_foreign(struct inode *inode, struct lustre_md *lmd) +{ + int rc = 0; + + ENTRY; + /* apply any foreign file/dir policy */ + if (S_ISREG((inode)->i_mode)) { + struct ll_inode_info *lli = ll_i2info(inode); + struct cl_object *obj = lli->lli_clob; + + if (lmd->layout.lb_buf != NULL && lmd->layout.lb_len != 0) { + struct lov_foreign_md *lfm = lmd->layout.lb_buf; + + if (lfm->lfm_magic == LOV_MAGIC_FOREIGN) + ll_manage_foreign_file(inode, lfm); + GOTO(out, rc); + } + + if (obj) { + struct lov_foreign_md lfm = { + .lfm_magic = LOV_MAGIC, + }; + struct cl_layout cl = { + .cl_buf.lb_buf = &lfm, + .cl_buf.lb_len = sizeof(lfm), + }; + struct lu_env *env; + u16 refcheck; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + GOTO(out, rc = PTR_ERR(env)); + rc = cl_object_layout_get(env, obj, &cl); + /* error is likely to be -ERANGE because of the small + * buffer we use, only the content is significant here + */ + if (rc < 0 && rc != -ERANGE) { + cl_env_put(env, &refcheck); + GOTO(out, rc); + } + if (lfm.lfm_magic == LOV_MAGIC_FOREIGN) + ll_manage_foreign_file(inode, &lfm); + cl_env_put(env, &refcheck); + } + } else if (S_ISDIR((inode)->i_mode)) { + if (lmd->lfm != NULL && + lmd->lfm->lfm_magic == LMV_MAGIC_FOREIGN) { + ll_manage_foreign_dir(inode, lmd->lfm); + } else { + struct ll_inode_info *lli = ll_i2info(inode); + struct lmv_foreign_md *lfm; + + down_read(&lli->lli_lsm_sem); + lfm = (struct lmv_foreign_md *)(lli->lli_lsm_md); + if (lfm && lfm->lfm_magic == LMV_MAGIC_FOREIGN) + ll_manage_foreign_dir(inode, lfm); + up_read(&lli->lli_lsm_sem); + } + } +out: + RETURN(rc); +} + +/* dentry must be spliced to inode (dentry->d_inode != NULL) !!! */ +bool ll_foreign_is_openable(struct dentry *dentry, unsigned int flags) +{ + /* check for faked symlink here as they should not be opened (unless + * O_NOFOLLOW!) and thus wants ll_atomic_open() to return 1 from + * finish_no_open() in order to get follow_link() to be called in both + * path_lookupat() and path_openupat(). + * This will not break regular symlink handling as they have + * been treated/filtered upstream. + */ + if (d_is_symlink(dentry) && !S_ISLNK(dentry->d_inode->i_mode) && + !(flags & O_NOFOLLOW)) + return false; + + return true; +} + +static bool should_preserve_foreign_file(struct lov_foreign_md *lfm, + struct ll_inode_info *lli, bool unset) +{ + /* for now, only avoid foreign fake symlink file removal */ + + if (unset) + if (lfm->lfm_type == LU_FOREIGN_TYPE_SYMLINK) { + ll_file_set_flag(lli, LLIF_FOREIGN_REMOVABLE); + return true; + } else { + return false; + } + else + return lfm->lfm_type == LU_FOREIGN_TYPE_SYMLINK && + !ll_file_test_flag(lli, LLIF_FOREIGN_REMOVABLE); +} + +static bool should_preserve_foreign_dir(struct lmv_foreign_md *lfm, + struct ll_inode_info *lli, bool unset) +{ + /* for now, only avoid foreign fake symlink dir removal */ + + if (unset) + if (lfm->lfm_type == LU_FOREIGN_TYPE_SYMLINK) { + ll_file_set_flag(lli, LLIF_FOREIGN_REMOVABLE); + return true; + } else { + return false; + } + else + return lfm->lfm_type == LU_FOREIGN_TYPE_SYMLINK && + !ll_file_test_flag(lli, LLIF_FOREIGN_REMOVABLE); +} + +/* XXX + * instead of fetching type from foreign LOV/LMV, we may simply + * check (d_is_symlink(dentry) && !S_ISLNK(dentry->d_inode->i_mode)) + * to identify a fake symlink + */ +bool ll_foreign_is_removable(struct dentry *dentry, bool unset) +{ + struct inode *inode = dentry->d_inode; + struct qstr *name = &dentry->d_name; + bool preserve_foreign = false; + int rc = 0; + + ENTRY; + if (inode == NULL) + return 0; + + /* some foreign types may not be allowed to be unlinked in order to + * keep references with external objects + */ + if (S_ISREG(inode->i_mode)) { + struct ll_inode_info *lli = ll_i2info(inode); + struct cl_object *obj = lli->lli_clob; + + if (obj) { + struct lov_foreign_md lfm = { + .lfm_magic = LOV_MAGIC, + }; + struct cl_layout cl = { + .cl_buf.lb_buf = &lfm, + .cl_buf.lb_len = sizeof(lfm), + }; + struct lu_env *env; + u16 refcheck; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + GOTO(out, rc = PTR_ERR(env)); + rc = cl_object_layout_get(env, obj, &cl); + /* error is likely to be -ERANGE because of the small + * buffer we use, only the content is significant here + */ + if (rc < 0 && rc != -ERANGE) { + cl_env_put(env, &refcheck); + goto out; + } else { + rc = 0; + } + if (lfm.lfm_magic == LOV_MAGIC_FOREIGN) + preserve_foreign = + should_preserve_foreign_file(&lfm, lli, + unset); + cl_env_put(env, &refcheck); + if (preserve_foreign) { + CDEBUG(D_INFO, + "%s unlink of foreign file (%.*s, "DFID")\n", + unset ? "allow" : "prevent", + name->len, name->name, + PFID(ll_inode2fid(inode))); + RETURN(false); + } + } else { + CDEBUG(D_INFO, + "unable to check if file (%.*s, "DFID") is foreign...\n", + name->len, name->name, + PFID(ll_inode2fid(inode))); + /* XXX should we prevent removal ?? */ + } + } else if (S_ISDIR(inode->i_mode)) { + struct ll_inode_info *lli = ll_i2info(inode); + struct lmv_foreign_md *lfm; + + down_read(&lli->lli_lsm_sem); + lfm = (struct lmv_foreign_md *)(lli->lli_lsm_md); + if (!lfm) + CDEBUG(D_INFO, + "unable to check if dir (%.*s, "DFID") is foreign...\n", + name->len, name->name, + PFID(ll_inode2fid(inode))); + else if (lfm->lfm_magic == LMV_MAGIC_FOREIGN) + preserve_foreign = should_preserve_foreign_dir(lfm, lli, + unset); + up_read(&lli->lli_lsm_sem); + if (preserve_foreign) { + CDEBUG(D_INFO, + "%s unlink of foreign dir (%.*s, "DFID")\n", + unset ? "allow" : "prevent", + name->len, name->name, + PFID(ll_inode2fid(inode))); + RETURN(false); + } + } + +out: + RETURN(true); +} diff --git a/lustre/llite/llite_foreign_symlink.c b/lustre/llite/llite_foreign_symlink.c new file mode 100644 index 0000000..c36f363 --- /dev/null +++ b/lustre/llite/llite_foreign_symlink.c @@ -0,0 +1,857 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2020 Intel Corporation. + */ +/* + * Foreign symlink implementation. + * + * Methods in this source file allow to construct a relative path from the + * LOV/LMV foreign content, to complement it with a prefix, and then to + * expose it to the VFS as a symlink destination. + * The default/internal mechanism simply takes the full foreign free string + * as the relative path, and for more complex internal formats an upcall has + * been implemented to provide format's details (presently just in terms of + * constant strings and substrings positions in EA, but this can be enhanced) + * to llite layer. + */ + +#include +#include +#include +#include +#include +#define DEBUG_SUBSYSTEM S_LLITE + +#include "llite_internal.h" + +/* allocate space for "//'\0'" and copy prefix in, + * returns start position for suffix in *destname + * must be called with ll_foreign_symlink_sem locked for read, to + * protect against sbi->ll_foreign_symlink_prefix change + * on output, provides position where to start prefix complement + */ +static int foreign_symlink_alloc_and_copy_prefix(struct ll_sb_info *sbi, + struct inode *inode, + char **destname, + size_t suffix_size) +{ + size_t prefix_size, full_size; + + ENTRY; + + /* allocate enough for "//'\0'" */ + prefix_size = sbi->ll_foreign_symlink_prefix_size - 1; + full_size = suffix_size + prefix_size + 3; + if (full_size > PATH_MAX) { + CERROR("%s: inode "DFID": resolved destination path too long\n", + sbi->ll_fsname, PFID(ll_inode2fid(inode))); + RETURN(-EINVAL); + } + OBD_ALLOC(*destname, full_size); + if (*destname == NULL) + RETURN(-ENOMEM); + + memcpy(*destname + 1, sbi->ll_foreign_symlink_prefix, + prefix_size); + (*destname)[0] = '/'; + (*destname)[prefix_size + 1] = '/'; + + RETURN(prefix_size + 2); +} + +/* if no upcall registered, default foreign symlink parsing method + * is to use the full lfm_value as a relative path to complement + * foreign_prefix + */ +static int ll_foreign_symlink_default_parse(struct ll_sb_info *sbi, + struct inode *inode, + struct lov_foreign_md *lfm, + char **destname) +{ + int suffix_pos; + + down_read(&sbi->ll_foreign_symlink_sem); + suffix_pos = foreign_symlink_alloc_and_copy_prefix(sbi, inode, + destname, + lfm->lfm_length); + up_read(&sbi->ll_foreign_symlink_sem); + + if (suffix_pos < 0) + RETURN(suffix_pos); + + memcpy(*destname + suffix_pos, lfm->lfm_value, + lfm->lfm_length); + (*destname)[suffix_pos + lfm->lfm_length] = '\0'; + + RETURN(0); +} + +/* if an upcall has been registered, foreign symlink will be + * constructed as per upcall provided format + * presently we only support a serie of constant strings and sub-strings + * to be taken from lfm_value content + */ +static int ll_foreign_symlink_upcall_parse(struct ll_sb_info *sbi, + struct inode *inode, + struct lov_foreign_md *lfm, + char **destname) +{ + int pos = 0, suffix_pos = -1, items_size = 0; + struct ll_foreign_symlink_upcall_item *foreign_symlink_items = + sbi->ll_foreign_symlink_upcall_items; + int i = 0, rc = 0; + + ENTRY; + + down_read(&sbi->ll_foreign_symlink_sem); + + /* compute size of relative path of destination path + * could be done once during upcall items/infos reading + * and stored as new ll_sb_info field + */ + for (i = 0; i < sbi->ll_foreign_symlink_upcall_nb_items; i++) { + switch (foreign_symlink_items[i].type) { + case STRING_TYPE: + items_size += foreign_symlink_items[i].size; + break; + case POSLEN_TYPE: + items_size += foreign_symlink_items[i].len; + break; + case EOB_TYPE: + /* should be the last item */ + break; + default: + CERROR("%s: unexpected type '%u' found in items\n", + sbi->ll_fsname, foreign_symlink_items[i].type); + GOTO(failed, rc = -EINVAL); + } + } + + suffix_pos = foreign_symlink_alloc_and_copy_prefix(sbi, inode, destname, + items_size); + if (suffix_pos < 0) + GOTO(failed, rc = suffix_pos); + + /* rescan foreign_symlink_items[] to create faked symlink dest path */ + i = 0; + while (foreign_symlink_items[i].type != EOB_TYPE) { + if (foreign_symlink_items[i].type == STRING_TYPE) { + memcpy(*destname + suffix_pos + pos, + foreign_symlink_items[i].string, + foreign_symlink_items[i].size); + pos += foreign_symlink_items[i].size; + } else if (foreign_symlink_items[i].type == POSLEN_TYPE) { + if (lfm->lfm_length < foreign_symlink_items[i].pos + + foreign_symlink_items[i].len) { + CERROR("%s: "DFID" foreign EA too short to find (%u,%u) item\n", + sbi->ll_fsname, + PFID(ll_inode2fid(inode)), + foreign_symlink_items[i].pos, + foreign_symlink_items[i].len); + GOTO(failed, rc = -EINVAL); + } + memcpy(*destname + suffix_pos + pos, + lfm->lfm_value + foreign_symlink_items[i].pos, + foreign_symlink_items[i].len); + pos += foreign_symlink_items[i].len; + } else { + CERROR("%s: unexpected type '%u' found in items\n", + sbi->ll_fsname, foreign_symlink_items[i].type); + GOTO(failed, rc = -EINVAL); + } + i++; + } +failed: + up_read(&sbi->ll_foreign_symlink_sem); + + if (rc != 0 && suffix_pos >= 0) { + OBD_FREE_LARGE(*destname, suffix_pos + items_size); + *destname = NULL; + } + + RETURN(rc); +} + +static int ll_foreign_symlink_parse(struct ll_sb_info *sbi, + struct inode *inode, + struct lov_foreign_md *lfm, + char **destname) +{ + int rc; + + /* if no user-land upcall registered, assuming whole free field + * of foreign LOV is relative path of faked symlink destination, + * to be completed by prefix + */ + if (!(sbi->ll_flags & LL_SBI_FOREIGN_SYMLINK_UPCALL)) + rc = ll_foreign_symlink_default_parse(sbi, inode, lfm, + destname); + else /* upcall is available */ + rc = ll_foreign_symlink_upcall_parse(sbi, inode, lfm, + destname); + return rc; +} + +/* Don't need lli_size_mutex locked as LOV/LMV are EAs + * and should not be stored in data blocks + */ +static int ll_foreign_readlink_internal(struct inode *inode, char **symname) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct lov_foreign_md *lfm = NULL; + char *destname = NULL; + size_t lfm_size = 0; + int rc; + + ENTRY; + + if (S_ISREG(inode->i_mode)) { + struct cl_object *obj = lli->lli_clob; + struct cl_layout cl = { + .cl_buf.lb_len = 0, /* to get real size */ + }; + struct lu_env *env; + u16 refcheck; + + if (!obj) { + CERROR("%s: inode "DFID": can not get layout, no cl_object\n", + sbi->ll_fsname, PFID(ll_inode2fid(inode))); + GOTO(failed, rc = -EINVAL); + } + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + RETURN(PTR_ERR(env)); + /* get layout size */ + rc = cl_object_layout_get(env, obj, &cl); + if (rc <= 0) { + CERROR("%s: inode "DFID": error trying to get layout size : %d\n", + sbi->ll_fsname, PFID(ll_inode2fid(inode)), rc); + cl_env_put(env, &refcheck); + RETURN(rc); + } + OBD_ALLOC(lfm, rc); + if (!lfm) { + CERROR("%s: inode "DFID": can not allocate enough mem to get layout\n", + sbi->ll_fsname, PFID(ll_inode2fid(inode))); + cl_env_put(env, &refcheck); + RETURN(-ENOMEM); + } + cl.cl_buf.lb_len = rc; + cl.cl_buf.lb_buf = lfm; + /* get layout */ + rc = cl_object_layout_get(env, obj, &cl); + if (rc <= 0) { + CERROR("%s: inode "DFID": error trying to get layout : %d\n", + sbi->ll_fsname, PFID(ll_inode2fid(inode)), rc); + OBD_FREE(lfm, cl.cl_buf.lb_len); + cl_env_put(env, &refcheck); + RETURN(rc); + } + lfm_size = cl.cl_buf.lb_len; + cl_env_put(env, &refcheck); + } else if (S_ISDIR(inode->i_mode)) { + down_read(&lli->lli_lsm_sem); + + /* should be casted lmv_foreign_md, but it is ok as both foreign LOV + * and LMV formats are identical, and then we also only need + * one set of parsing routines for both foreign files and dirs! + */ + lfm = (struct lov_foreign_md *)(lli->lli_lsm_md); + if (lfm != NULL) { + CDEBUG(D_INFO, "%s: inode "DFID": LMV cached found\n", + sbi->ll_fsname, PFID(ll_inode2fid(inode))); + } else { + CERROR("%s: inode "DFID": cannot get layout, no LMV cached\n", + sbi->ll_fsname, PFID(ll_inode2fid(inode))); + GOTO(failed, rc = -EINVAL); + } + } else { + CERROR("%s: inode "DFID": not a regular file nor directory\n", + sbi->ll_fsname, PFID(ll_inode2fid(inode))); + GOTO(failed, rc = -EINVAL); + } + + /* XXX no assert nor double check of magic, length and type ? */ + + rc = ll_foreign_symlink_parse(sbi, inode, lfm, &destname); + +failed: + if (S_ISDIR(inode->i_mode)) + up_read(&lli->lli_lsm_sem); + + if (S_ISREG(inode->i_mode) && lfm) + OBD_FREE(lfm, lfm_size); + + if (!rc) { + *symname = destname; + CDEBUG(D_INFO, + "%s: inode "DFID": faking symlink to dest '%s'\n", + sbi->ll_fsname, PFID(ll_inode2fid(inode)), destname); + } + + RETURN(rc); +} + +#ifdef HAVE_SYMLINK_OPS_USE_NAMEIDATA +static void ll_foreign_put_link(struct dentry *dentry, + struct nameidata *nd, void *cookie) +#else +# ifdef HAVE_IOP_GET_LINK +static void ll_foreign_put_link(void *cookie) +# else +static void ll_foreign_put_link(struct inode *unused, void *cookie) +# endif +#endif +{ + /* to avoid allocating an unnecessary big buffer, and since ways to + * build the symlink path from foreign LOV/LMV can be multiple and + * not constant. So it size is not known and we need to use + * strlen(cookie)+1 to determine its size and to avoid false positive + * to be reported by memory leak check code + */ + OBD_FREE_LARGE(cookie, strlen(cookie) + 1); +} + +#ifdef HAVE_SYMLINK_OPS_USE_NAMEIDATA +static void *ll_foreign_follow_link(struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + int rc; + char *symname = NULL; + + ENTRY; + + CDEBUG(D_VFSTRACE, "VFS Op\n"); + /* + * Limit the recursive symlink depth to 5 instead of default + * 8 links when kernel has 4k stack to prevent stack overflow. + * For 8k stacks we need to limit it to 7 for local servers. + */ + if (THREAD_SIZE < 8192 && current->link_count >= 6) + rc = -ELOOP; + else if (THREAD_SIZE == 8192 && current->link_count >= 8) + rc = -ELOOP; + else + rc = ll_foreign_readlink_internal(inode, &symname); + + if (rc) + symname = ERR_PTR(rc); + + nd_set_link(nd, symname); + RETURN(symname); +} + +#elif defined(HAVE_IOP_GET_LINK) +static const char *ll_foreign_get_link(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) +{ + char *symname = NULL; + int rc; + + ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op\n"); + if (!dentry) + RETURN(ERR_PTR(-ECHILD)); + rc = ll_foreign_readlink_internal(inode, &symname); + + /* + * symname must be freed when we are done + * + * XXX we may avoid the need to do so if we use + * lli_symlink_name cache to retain symname and + * let ll_clear_inode free it... + */ + set_delayed_call(done, ll_foreign_put_link, symname); + RETURN(rc ? ERR_PTR(rc) : symname); +} + +# else /* !HAVE_IOP_GET_LINK */ +static const char *ll_foreign_follow_link(struct dentry *dentry, + void **cookie) +{ + struct inode *inode = d_inode(dentry); + char *symname = NULL; + int rc; + + ENTRY; + + CDEBUG(D_VFSTRACE, "VFS Op\n"); + rc = ll_foreign_readlink_internal(inode, &symname); + if (rc < 0) + return ERR_PTR(rc); + + /* XXX need to also return symname in cookie in order to delay + * its release ?? + */ + + RETURN(symname); +} + +#endif /* HAVE_SYMLINK_OPS_USE_NAMEIDATA, HAVE_IOP_GET_LINK */ + +/* + * Should only be called for already in-use/cache foreign dir inode + * when foreign fake-symlink behaviour has been enabled afterward + */ +static struct dentry *ll_foreign_dir_lookup(struct inode *parent, + struct dentry *dentry, + unsigned int flags) +{ + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s, dir="DFID"(%p)\n", + dentry->d_name.len, dentry->d_name.name, + PFID(ll_inode2fid(parent)), parent); + + return ERR_PTR(-ENODATA); +} + +static bool has_same_mount_namespace(struct ll_sb_info *sbi) +{ + int rc; + + rc = (sbi->ll_mnt.mnt == current->fs->root.mnt); + if (!rc) + LCONSOLE_WARN("%s: client mount %s and '%s.%d' not in same mnt-namespace\n", + sbi->ll_fsname, sbi->ll_kset.kobj.name, + current->comm, current->pid); + + return rc; +} + +ssize_t foreign_symlink_enable_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + + return snprintf(buf, PAGE_SIZE, "%d\n", + !!(sbi->ll_flags & LL_SBI_FOREIGN_SYMLINK)); +} + +/* + * XXX + * There should be already in-use/cached inodes of foreign files/dirs who + * will not-be/continue-to-be handled as fake-symlink, depending if + * feature is being enabled/disabled, until being revalidated. + * Also, does it require sbi->ll_lock protection ? + */ +ssize_t foreign_symlink_enable_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + unsigned int val; + int rc; + + if (!has_same_mount_namespace(sbi)) + return -EINVAL; + + rc = kstrtouint(buffer, 10, &val); + if (rc) + return rc; + + if (val) + sbi->ll_flags |= LL_SBI_FOREIGN_SYMLINK; + else + sbi->ll_flags &= ~LL_SBI_FOREIGN_SYMLINK; + + return count; +} + +ssize_t foreign_symlink_prefix_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + ssize_t size; + + down_read(&sbi->ll_foreign_symlink_sem); + size = snprintf(buf, PAGE_SIZE, "%s\n", sbi->ll_foreign_symlink_prefix); + up_read(&sbi->ll_foreign_symlink_sem); + + return size; +} + +ssize_t foreign_symlink_prefix_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + char *new, *old; + size_t new_len, old_len; + + if (!has_same_mount_namespace(sbi)) + return -EINVAL; + + /* XXX strip buffer of any CR/LF,space,... ?? */ + + /* check buffer looks like a valid absolute path */ + if (*buffer != '/') { + CERROR("foreign symlink prefix must be an absolute path\n"); + return -EINVAL; + } + new_len = strnlen(buffer, count); + if (new_len < count) + CDEBUG(D_INFO, "NUL byte found in %zu bytes\n", count); + if (new_len > PATH_MAX) { + CERROR("%s: foreign symlink prefix length %zu > PATH_MAX\n", + sbi->ll_fsname, new_len); + return -EINVAL; + } + OBD_ALLOC(new, new_len + 1); + if (new == NULL) { + CERROR("%s: can not allocate space for foreign path prefix\n", + sbi->ll_fsname); + return -ENOSPC; + } + + down_write(&sbi->ll_foreign_symlink_sem); + old_len = sbi->ll_foreign_symlink_prefix_size; + old = sbi->ll_foreign_symlink_prefix; + memcpy(new, buffer, new_len); + *(new + new_len) = '\0'; + + sbi->ll_foreign_symlink_prefix = new; + sbi->ll_foreign_symlink_prefix_size = new_len + 1; + up_write(&sbi->ll_foreign_symlink_sem); + + if (old) + OBD_FREE(old, old_len); + + return new_len; +} + +ssize_t foreign_symlink_upcall_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + ssize_t size; + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + + down_read(&sbi->ll_foreign_symlink_sem); + size = snprintf(buf, PAGE_SIZE, "%s\n", sbi->ll_foreign_symlink_upcall); + up_read(&sbi->ll_foreign_symlink_sem); + + return size; +} + +ssize_t foreign_symlink_upcall_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + char *old = NULL, *new = NULL; + size_t new_len; + + if (!has_same_mount_namespace(sbi)) + return -EINVAL; + + /* XXX strip buffer of any CR/LF,space,... ?? */ + + /* check buffer looks like a valid absolute path */ + if (*buffer != '/' && strcmp(buffer, "none")) { + CERROR("foreign symlink upcall must be an absolute path\n"); + return -EINVAL; + } + new_len = strnlen(buffer, count); + if (new_len < count) + CDEBUG(D_INFO, "NULL byte found in %zu bytes\n", count); + if (new_len > PATH_MAX) { + CERROR("%s: foreign symlink upcall path length %zu > PATH_MAX\n", + sbi->ll_fsname, new_len); + return -EINVAL; + } + + OBD_ALLOC(new, new_len + 1); + if (new == NULL) { + CERROR("%s: can not allocate space for foreign symlink upcall path\n", + sbi->ll_fsname); + return -ENOSPC; + } + memcpy(new, buffer, new_len); + *(new + new_len) = '\0'; + + down_write(&sbi->ll_foreign_symlink_sem); + old = sbi->ll_foreign_symlink_upcall; + + sbi->ll_foreign_symlink_upcall = new; + /* LL_SBI_FOREIGN_SYMLINK_UPCALL will be set by + * foreign_symlink_upcall_info_store() upon valid being provided + * by upcall + * XXX there is a potential race if there are multiple concurent + * attempts to set upcall path and execution occur in different + * order, we may end up using the format provided by a different + * upcall than the one set in ll_foreign_symlink_upcall + */ + sbi->ll_flags &= ~LL_SBI_FOREIGN_SYMLINK_UPCALL; + up_write(&sbi->ll_foreign_symlink_sem); + + if (strcmp(new, "none")) { + char *argv[] = { + [0] = new, + /* sbi sysfs object name */ + [1] = (char *)sbi->ll_kset.kobj.name, + [2] = NULL + }; + char *envp[] = { + [0] = "HOME=/", + [1] = "PATH=/sbin:/usr/sbin", + [2] = NULL + }; + int rc; + + rc = call_usermodehelper(new, argv, envp, UMH_WAIT_EXEC); + if (rc < 0) + CERROR("%s: error invoking foreign symlink upcall %s: rc %d\n", + sbi->ll_fsname, new, rc); + else + CDEBUG(D_INFO, "%s: invoked upcall %s\n", + sbi->ll_fsname, new); + } + + if (old) + OBD_FREE_LARGE(old, strlen(old) + 1); + + return new_len; +} + +/* foreign_symlink_upcall_info_store() stores format items in + * foreign_symlink_items[], and foreign_symlink_upcall_parse() + * uses it to parse each foreign symlink LOV/LMV EAs + */ +ssize_t foreign_symlink_upcall_info_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, + ll_kset.kobj); + struct ll_foreign_symlink_upcall_item items[MAX_NB_UPCALL_ITEMS], *item; + struct ll_foreign_symlink_upcall_item *new_items, *old_items; + size_t remaining = count; + int nb_items = 0, old_nb_items, i, rc = 0; + + ENTRY; + + if (!has_same_mount_namespace(sbi)) + return -EINVAL; + + /* parse buffer to check validity of infos and fill symlink format + * descriptors + */ + + if (count % sizeof(__u32) != 0) { + CERROR("%s: invalid size '%zu' of infos buffer returned by foreign symlink upcall\n", + sbi->ll_fsname, count); + RETURN(-EINVAL); + } + + /* evaluate number of items provided */ + while (remaining > 0) { + item = (struct ll_foreign_symlink_upcall_item *) + &buffer[count - remaining]; + switch (item->type) { + case STRING_TYPE: { + /* a constant string following */ + if (item->size >= remaining - + offsetof(struct ll_foreign_symlink_upcall_item, + bytestring) - sizeof(item->type)) { + /* size of string must not overflow remaining + * bytes minus EOB_TYPE item + */ + CERROR("%s: constant string too long in infos buffer returned by foreign symlink upcall\n", + sbi->ll_fsname); + GOTO(failed, rc = -EINVAL); + } + OBD_ALLOC(items[nb_items].string, + item->size); + if (items[nb_items].string == NULL) { + CERROR("%s: constant string allocation has failed for constant string of size %zu\n", + sbi->ll_fsname, item->size); + GOTO(failed, rc = -ENOMEM); + } + memcpy(items[nb_items].string, + item->bytestring, item->size); + items[nb_items].size = item->size; + /* string items to fit on __u32 boundary */ + remaining = remaining - STRING_ITEM_SZ(item->size); + break; + } + case POSLEN_TYPE: { + /* a tuple (pos,len) following to delimit a sub-string + * in lfm_value + */ + items[nb_items].pos = item->pos; + items[nb_items].len = item->len; + remaining -= POSLEN_ITEM_SZ; + break; + } + case EOB_TYPE: + if (remaining != sizeof(item->type)) { + CERROR("%s: early end of infos buffer returned by foreign symlink upcall\n", + sbi->ll_fsname); + GOTO(failed, rc = -EINVAL); + } + remaining -= sizeof(item->type); + break; + default: + CERROR("%s: wrong type '%u' encountered at pos %zu , with %zu remaining bytes, in infos buffer returned by foreign symlink upcall\n", + sbi->ll_fsname, (__u32)buffer[count - remaining], + count - remaining, remaining); + GOTO(failed, rc = -EINVAL); + } + + items[nb_items].type = item->type; + nb_items++; + if (nb_items >= MAX_NB_UPCALL_ITEMS) { + CERROR("%s: too many items in infos buffer returned by foreign symlink upcall\n", + sbi->ll_fsname); + GOTO(failed, rc = -EINVAL); + } + } + /* valid format has been provided by foreign symlink user upcall */ + OBD_ALLOC_LARGE(new_items, nb_items * + sizeof(struct ll_foreign_symlink_upcall_item)); + if (new_items == NULL) { + CERROR("%s: constant string allocation has failed for constant string of size %zu\n", + sbi->ll_fsname, nb_items * + sizeof(struct ll_foreign_symlink_upcall_item)); + GOTO(failed, rc = -ENOMEM); + } + for (i = 0; i < nb_items; i++) + *((struct ll_foreign_symlink_upcall_item *)new_items + i) = + items[i]; + + down_write(&sbi->ll_foreign_symlink_sem); + old_items = sbi->ll_foreign_symlink_upcall_items; + old_nb_items = sbi->ll_foreign_symlink_upcall_nb_items; + sbi->ll_foreign_symlink_upcall_items = new_items; + sbi->ll_foreign_symlink_upcall_nb_items = nb_items; + sbi->ll_flags |= LL_SBI_FOREIGN_SYMLINK_UPCALL; + up_write(&sbi->ll_foreign_symlink_sem); + + /* free old_items */ + if (old_items != NULL) { + for (i = 0 ; i < old_nb_items; i++) + if (old_items[i].type == STRING_TYPE) + OBD_FREE(old_items[i].string, + old_items[i].size); + + OBD_FREE_LARGE(old_items, old_nb_items * + sizeof(struct ll_foreign_symlink_upcall_item)); + } + +failed: + /* clean items[] and free any strings */ + if (rc != 0) { + for (i = 0; i < nb_items; i++) { + switch (items[i].type) { + case STRING_TYPE: + OBD_FREE(items[i].string, items[i].size); + items[i].string = NULL; + items[i].size = 0; + break; + case POSLEN_TYPE: + items[i].pos = 0; + items[i].len = 0; + break; + case EOB_TYPE: + break; + default: + CERROR("%s: wrong '%u'type encountered in foreign symlink upcall items\n", + sbi->ll_fsname, items[i].type); + GOTO(failed, rc = -EINVAL); + break; + } + items[i].type = 0; + } + } + + RETURN(rc == 0 ? count : rc); +} + +struct inode_operations ll_foreign_file_symlink_inode_operations = { +#ifdef HAVE_IOP_GENERIC_READLINK + .readlink = generic_readlink, +#endif + .setattr = ll_setattr, +#ifdef HAVE_IOP_GET_LINK + .get_link = ll_foreign_get_link, +#else + .follow_link = ll_foreign_follow_link, + /* .put_link method required since need to release symlink copy buf */ + .put_link = ll_foreign_put_link, +#endif + .getattr = ll_foreign_symlink_getattr, + .permission = ll_inode_permission, +#ifdef HAVE_IOP_XATTR + .setxattr = ll_setxattr, + .getxattr = ll_getxattr, + .removexattr = ll_removexattr, +#endif + .listxattr = ll_listxattr, +}; + +struct inode_operations ll_foreign_dir_symlink_inode_operations = { + .lookup = ll_foreign_dir_lookup, +#ifdef HAVE_IOP_GENERIC_READLINK + .readlink = generic_readlink, +#endif + .setattr = ll_setattr, +#ifdef HAVE_IOP_GET_LINK + .get_link = ll_foreign_get_link, +#else + .follow_link = ll_foreign_follow_link, + .put_link = ll_foreign_put_link, +#endif + .getattr = ll_foreign_symlink_getattr, + .permission = ll_inode_permission, +#ifdef HAVE_IOP_XATTR + .setxattr = ll_setxattr, + .getxattr = ll_getxattr, + .removexattr = ll_removexattr, +#endif + .listxattr = ll_listxattr, +}; + +/* foreign fake-symlink version of ll_getattr() */ +#ifdef HAVE_INODEOPS_ENHANCED_GETATTR +int ll_foreign_symlink_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + return ll_getattr_dentry(path->dentry, stat, request_mask, flags, + true); +} +#else +int ll_foreign_symlink_getattr(struct vfsmount *mnt, struct dentry *de, + struct kstat *stat) +{ + return ll_getattr_dentry(de, stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT, true); +} +#endif diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 45e6864..16e4fdd 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -50,6 +50,7 @@ #include "vvp_internal.h" #include "pcc.h" +#include "foreign_symlink.h" #ifndef FMODE_EXEC #define FMODE_EXEC 0 @@ -403,6 +404,8 @@ enum ll_file_flags { LLIF_PROJECT_INHERIT = 3, /* update atime from MDS even if it's older than local inode atime. */ LLIF_UPDATE_ATIME = 4, + /* foreign file/dir can be unlinked unconditionnaly */ + LLIF_FOREIGN_REMOVABLE = 5, }; @@ -651,6 +654,9 @@ enum stats_track_type { #define LL_SBI_FILE_HEAT 0x4000000 /* file heat support */ #define LL_SBI_TEST_DUMMY_ENCRYPTION 0x8000000 /* test dummy encryption */ #define LL_SBI_ENCRYPT 0x10000000 /* client side encryption */ +#define LL_SBI_FOREIGN_SYMLINK 0x20000000 /* foreign fake-symlink support */ +/* foreign fake-symlink upcall registered */ +#define LL_SBI_FOREIGN_SYMLINK_UPCALL 0x40000000 #define LL_SBI_FLAGS { \ "nolck", \ "checksum", \ @@ -681,6 +687,8 @@ enum stats_track_type { "file_heat", \ "test_dummy_encryption", \ "noencrypt", \ + "foreign_symlink", \ + "foreign_symlink_upcall", \ } /* This is embedded into llite super-blocks to keep track of connect @@ -781,6 +789,19 @@ struct ll_sb_info { /* Persistent Client Cache */ struct pcc_super ll_pcc_super; + + /* to protect vs updates in all following foreign symlink fields */ + struct rw_semaphore ll_foreign_symlink_sem; + /* foreign symlink path prefix */ + char *ll_foreign_symlink_prefix; + /* full prefix size including leading '\0' */ + size_t ll_foreign_symlink_prefix_size; + /* foreign symlink path upcall */ + char *ll_foreign_symlink_upcall; + /* foreign symlink path upcall infos */ + struct ll_foreign_symlink_upcall_item *ll_foreign_symlink_upcall_items; + /* foreign symlink path upcall nb infos */ + unsigned int ll_foreign_symlink_upcall_nb_items; }; #define SBI_DEFAULT_HEAT_DECAY_WEIGHT ((80 * 256 + 50) / 100) @@ -965,6 +986,11 @@ static inline bool ll_sbi_has_file_heat(struct ll_sb_info *sbi) return !!(sbi->ll_flags & LL_SBI_FILE_HEAT); } +static inline bool ll_sbi_has_foreign_symlink(struct ll_sb_info *sbi) +{ + return !!(sbi->ll_flags & LL_SBI_FOREIGN_SYMLINK); +} + void ll_ras_enter(struct file *f, loff_t pos, size_t count); /* llite/lcommon_misc.c */ @@ -1102,7 +1128,7 @@ int ll_getattr(const struct path *path, struct kstat *stat, int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat); #endif int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask, - unsigned int flags); + unsigned int flags, bool foreign); struct posix_acl *ll_get_acl(struct inode *inode, int type); #ifdef HAVE_IOP_SET_ACL #ifdef CONFIG_LUSTRE_FS_POSIX_ACL @@ -1699,5 +1725,9 @@ static inline struct pcc_super *ll_info2pccs(struct ll_inode_info *lli) /* crypto.c */ extern const struct llcrypt_operations lustre_cryptops; #endif +/* llite/llite_foreign.c */ +int ll_manage_foreign(struct inode *inode, struct lustre_md *lmd); +bool ll_foreign_is_openable(struct dentry *dentry, unsigned int flags); +bool ll_foreign_is_removable(struct dentry *dentry, bool unset); #endif /* LLITE_INTERNAL_H */ diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index bf5a09a..1f094ce 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -129,6 +129,25 @@ static struct ll_sb_info *ll_init_sbi(void) if (sbi->ll_cache == NULL) GOTO(out_destroy_ra, rc = -ENOMEM); + /* initialize foreign symlink prefix path */ + OBD_ALLOC(sbi->ll_foreign_symlink_prefix, sizeof("/mnt/")); + if (sbi->ll_foreign_symlink_prefix == NULL) + GOTO(out_destroy_ra, rc = -ENOMEM); + memcpy(sbi->ll_foreign_symlink_prefix, "/mnt/", sizeof("/mnt/")); + sbi->ll_foreign_symlink_prefix_size = sizeof("/mnt/"); + + /* initialize foreign symlink upcall path, none by default */ + OBD_ALLOC(sbi->ll_foreign_symlink_upcall, sizeof("none")); + if (sbi->ll_foreign_symlink_upcall == NULL) + GOTO(out_destroy_ra, rc = -ENOMEM); + memcpy(sbi->ll_foreign_symlink_upcall, "none", sizeof("none")); + sbi->ll_foreign_symlink_upcall_items = NULL; + sbi->ll_foreign_symlink_upcall_nb_items = 0; + init_rwsem(&sbi->ll_foreign_symlink_sem); + /* foreign symlink support (LL_SBI_FOREIGN_SYMLINK in ll_flags) + * not enabled by default + */ + sbi->ll_ra_info.ra_max_pages = min(pages / 32, SBI_DEFAULT_READ_AHEAD_MAX); sbi->ll_ra_info.ra_max_pages_per_file = @@ -183,6 +202,12 @@ static struct ll_sb_info *ll_init_sbi(void) sbi->ll_heat_period_second = SBI_DEFAULT_HEAT_PERIOD_SECOND; RETURN(sbi); out_destroy_ra: + if (sbi->ll_foreign_symlink_prefix) + OBD_FREE(sbi->ll_foreign_symlink_prefix, sizeof("/mnt/")); + if (sbi->ll_cache) { + cl_cache_decref(sbi->ll_cache); + sbi->ll_cache = NULL; + } destroy_workqueue(sbi->ll_ra_info.ll_readahead_wq); out_pcc: pcc_super_fini(&sbi->ll_pcc_super); @@ -205,6 +230,32 @@ static void ll_free_sbi(struct super_block *sb) cl_cache_decref(sbi->ll_cache); sbi->ll_cache = NULL; } + if (sbi->ll_foreign_symlink_prefix) { + OBD_FREE(sbi->ll_foreign_symlink_prefix, + sbi->ll_foreign_symlink_prefix_size); + sbi->ll_foreign_symlink_prefix = NULL; + } + if (sbi->ll_foreign_symlink_upcall) { + OBD_FREE(sbi->ll_foreign_symlink_upcall, + strlen(sbi->ll_foreign_symlink_upcall) + + 1); + sbi->ll_foreign_symlink_upcall = NULL; + } + if (sbi->ll_foreign_symlink_upcall_items) { + int i; + int nb_items = sbi->ll_foreign_symlink_upcall_nb_items; + struct ll_foreign_symlink_upcall_item *items = + sbi->ll_foreign_symlink_upcall_items; + + for (i = 0 ; i < nb_items; i++) + if (items[i].type == STRING_TYPE) + OBD_FREE(items[i].string, + items[i].size); + + OBD_FREE_LARGE(items, nb_items * + sizeof(struct ll_foreign_symlink_upcall_item)); + sbi->ll_foreign_symlink_upcall_items = NULL; + } pcc_super_fini(&sbi->ll_pcc_super); OBD_FREE(sbi, sizeof(*sbi)); } @@ -988,6 +1039,58 @@ static int ll_options(char *options, struct ll_sb_info *sbi) #endif goto next; } + tmp = ll_set_opt("foreign_symlink", s1, LL_SBI_FOREIGN_SYMLINK); + if (tmp) { + int prefix_pos = sizeof("foreign_symlink=") - 1; + int equal_pos = sizeof("foreign_symlink=") - 2; + + /* non-default prefix provided ? */ + if (strlen(s1) >= sizeof("foreign_symlink=") && + *(s1 + equal_pos) == '=') { + char *old = sbi->ll_foreign_symlink_prefix; + size_t old_len = + sbi->ll_foreign_symlink_prefix_size; + + /* path must be absolute */ + if (*(s1 + sizeof("foreign_symlink=") + - 1) != '/') { + LCONSOLE_ERROR_MSG(0x152, + "foreign prefix '%s' must be an absolute path\n", + s1 + prefix_pos); + RETURN(-EINVAL); + } + /* last option ? */ + s2 = strchrnul(s1 + prefix_pos, ','); + + if (sbi->ll_foreign_symlink_prefix) { + sbi->ll_foreign_symlink_prefix = NULL; + sbi->ll_foreign_symlink_prefix_size = 0; + } + /* alloc for path length and '\0' */ + OBD_ALLOC(sbi->ll_foreign_symlink_prefix, + s2 - (s1 + prefix_pos) + 1); + if (!sbi->ll_foreign_symlink_prefix) { + /* restore previous */ + sbi->ll_foreign_symlink_prefix = old; + sbi->ll_foreign_symlink_prefix_size = + old_len; + RETURN(-ENOMEM); + } + if (old) + OBD_FREE(old, old_len); + strncpy(sbi->ll_foreign_symlink_prefix, + s1 + prefix_pos, + s2 - (s1 + prefix_pos)); + sbi->ll_foreign_symlink_prefix_size = + s2 - (s1 + prefix_pos) + 1; + } else { + LCONSOLE_ERROR_MSG(0x152, + "invalid %s option\n", s1); + } + /* enable foreign symlink support */ + *flags |= tmp; + goto next; + } LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n", s1); RETURN(-EINVAL); @@ -2870,6 +2973,13 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, if (default_lmv_deleted) ll_update_default_lsm_md(*inode, &md); + /* we may want to apply some policy for foreign file/dir */ + if (ll_sbi_has_foreign_symlink(sbi)) { + rc = ll_manage_foreign(*inode, &md); + if (rc < 0) + GOTO(out, rc); + } + GOTO(out, rc = 0); out: @@ -3074,6 +3184,11 @@ int ll_show_options(struct seq_file *seq, struct dentry *dentry) else seq_puts(seq, ",noencrypt"); + if (sbi->ll_flags & LL_SBI_FOREIGN_SYMLINK) { + seq_puts(seq, ",foreign_symlink="); + seq_puts(seq, sbi->ll_foreign_symlink_prefix); + } + RETURN(0); } diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index cf753a5..bd13eed 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -289,6 +289,14 @@ static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr, } LUSTRE_RO_ATTR(client_type); +LUSTRE_RW_ATTR(foreign_symlink_enable); + +LUSTRE_RW_ATTR(foreign_symlink_prefix); + +LUSTRE_RW_ATTR(foreign_symlink_upcall); + +LUSTRE_WO_ATTR(foreign_symlink_upcall_info); + static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -1529,6 +1537,10 @@ static struct attribute *llite_attrs[] = { &lustre_attr_filestotal.attr, &lustre_attr_filesfree.attr, &lustre_attr_client_type.attr, + &lustre_attr_foreign_symlink_enable.attr, + &lustre_attr_foreign_symlink_prefix.attr, + &lustre_attr_foreign_symlink_upcall.attr, + &lustre_attr_foreign_symlink_upcall_info.attr, &lustre_attr_fstype.attr, &lustre_attr_uuid.attr, &lustre_attr_checksums.attr, diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 7311a2f..ac23de7 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -614,6 +614,27 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de) if (rc < 0) return ERR_PTR(rc); d_add(de, inode); + + /* this needs only to be done for foreign symlink dirs as + * DCACHE_SYMLINK_TYPE is already set by d_flags_for_inode() + * kernel routine for files with symlink ops (ie, real symlink) + */ + if (inode && S_ISDIR(inode->i_mode) && + ll_sbi_has_foreign_symlink(ll_i2sbi(inode)) && +#ifdef HAVE_IOP_GET_LINK + inode->i_op->get_link) { +#else + inode->i_op->follow_link) { +#endif + CDEBUG(D_INFO, "%s: inode "DFID": faking foreign dir as a symlink\n", + ll_i2sbi(inode)->ll_fsname, PFID(ll_inode2fid(inode))); + spin_lock(&de->d_lock); + /* like d_flags_for_inode() already does for files */ + de->d_flags = (de->d_flags & ~DCACHE_ENTRY_TYPE) | + DCACHE_SYMLINK_TYPE; + spin_unlock(&de->d_lock); + } + CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n", de, de->d_inode, ll_d_count(de), de->d_flags); return de; @@ -1195,7 +1216,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, } } - if (dentry->d_inode && it_disposition(it, DISP_OPEN_OPEN)) { + /* check also if a foreign file is openable */ + if (dentry->d_inode && it_disposition(it, DISP_OPEN_OPEN) && + ll_foreign_is_openable(dentry, open_flags)) { /* Open dentry. */ if (S_ISFIFO(dentry->d_inode->i_mode)) { /* We cannot call open here as it might @@ -1681,6 +1704,10 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild) if (unlikely(d_mountpoint(dchild))) RETURN(-EBUSY); + /* some foreign dir may not be allowed to be removed */ + if (!ll_foreign_is_removable(dchild, false)) + RETURN(-EPERM); + op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len, S_IFDIR, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) @@ -1766,6 +1793,10 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild) if (unlikely(d_mountpoint(dchild))) RETURN(-EBUSY); + /* some foreign file/dir may not be allowed to be unlinked */ + if (!ll_foreign_is_removable(dchild, false)) + RETURN(-EPERM); + op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index c51a735..2606407 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -1129,12 +1129,12 @@ static int pcc_get_layout_info(struct inode *inode, struct cl_layout *clt) RETURN(PTR_ERR(env)); rc = cl_object_layout_get(env, lli->lli_clob, clt); - if (rc) + if (rc < 0) CDEBUG(D_INODE, "Cannot get layout for "DFID"\n", PFID(ll_inode2fid(inode))); cl_env_put(env, &refcheck); - RETURN(rc); + RETURN(rc < 0 ? rc : 0); } static int pcc_fid2dataset_fullpath(char *buf, int sz, struct lu_fid *fid, diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index 17ded38..85e8193 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -38,6 +38,7 @@ #include "llite_internal.h" +/* Must be called with lli_size_mutex locked */ static int ll_readlink_internal(struct inode *inode, struct ptlrpc_request **request, char **symname) { diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index da7bfe6..2561ae1 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -2165,7 +2165,8 @@ static int lov_object_layout_get(const struct lu_env *env, rc = lov_lsm_pack(lsm, buf->lb_buf, buf->lb_len); lov_lsm_put(lsm); - RETURN(rc < 0 ? rc : 0); + /* return error or number of bytes */ + RETURN(rc); } static loff_t lov_object_maxbytes(struct cl_object *obj) diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index c0d5745..b715ec9 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -166,8 +166,14 @@ static ssize_t lov_lsm_pack_foreign(const struct lov_stripe_md *lsm, void *buf, if (buf_size == 0) RETURN(lfm_size); - if (buf_size < lfm_size) + /* if buffer too small return ERANGE but copy the size the + * caller has requested anyway. This may be useful to get + * only the header without the need to alloc the full size + */ + if (buf_size < lfm_size) { + memcpy(lfm, lsm_foreign(lsm), buf_size); RETURN(-ERANGE); + } /* full foreign LOV is already avail in its cache * no need to translate format fields to little-endian diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 9831e7a..598435f 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -77,6 +77,7 @@ THETESTS += swap_lock_test lockahead_test mirror_io mmap_mknod_test THETESTS += create_foreign_file parse_foreign_file THETESTS += create_foreign_dir parse_foreign_dir THETESTS += check_fallocate splice-test lseek_test expand_truncate_test +THETESTS += foreign_symlink_striping if LIBAIO THETESTS += aiocp diff --git a/lustre/tests/checkstat.c b/lustre/tests/checkstat.c index b1e869e..37466e0 100644 --- a/lustre/tests/checkstat.c +++ b/lustre/tests/checkstat.c @@ -76,6 +76,40 @@ usage(char *argv0, int help) printf(" Exit status is 0 on success, 1 on failure\n"); } +/* using realpath() implies the paths must be resolved/exist + * so this will fail for dangling links + */ +int check_canonical(char *lname, char *checklink, int verbose) +{ + char *lname_canon; + char *checklink_canon; + + lname_canon = realpath(lname, NULL); + if (lname_canon == NULL) { + if (verbose) + printf("%s: can't canonicalize: %s\n", + lname, strerror(errno)); + return 1; + } + + checklink_canon = realpath(checklink, NULL); + if (checklink_canon == NULL) { + if (verbose) + printf("%s: can't canonicalize: %s\n", + checklink, strerror(errno)); + return 1; + } + + if (strcmp(checklink_canon, lname_canon)) { + free(lname_canon); + free(checklink_canon); + return 1; + } + free(lname_canon); + free(checklink_canon); + return 0; +} + int main(int argc, char **argv) { @@ -287,7 +321,12 @@ main(int argc, char **argv) } lname[rc] = 0; - if (strcmp(checklink, lname)) { + + /* just in case, try to also match the canonicalized + * paths + */ + if (strcmp(checklink, lname) && + check_canonical(lname, checklink, verbose)) { if (verbose) printf("%s is a link to %s and not %s\n", fname, lname, checklink); diff --git a/lustre/tests/create_foreign_dir.c b/lustre/tests/create_foreign_dir.c index 9fa508c..7cd9b77 100644 --- a/lustre/tests/create_foreign_dir.c +++ b/lustre/tests/create_foreign_dir.c @@ -15,7 +15,7 @@ int main(int argc, char **argv) char *dir = "foreign_dir", *end; char *xval = "UUID@UUID"; mode_t mode = 0700; - __u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0; + __u32 type = LU_FOREIGN_TYPE_SYMLINK, flags = 0xda05; int c, rc; while ((c = getopt(argc, argv, "hd:f:m:t:x:")) != -1) { diff --git a/lustre/tests/create_foreign_file.c b/lustre/tests/create_foreign_file.c index ea29ceb..acd5540 100644 --- a/lustre/tests/create_foreign_file.c +++ b/lustre/tests/create_foreign_file.c @@ -17,7 +17,7 @@ int main(int argc, char **argv) size_t len; struct lov_foreign_md *lfm; char *end; - __u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0xda08; + __u32 type = LU_FOREIGN_TYPE_SYMLINK, flags = 0xda05; while ((c = getopt(argc, argv, "f:x:t:F:")) != -1) { switch (c) { diff --git a/lustre/tests/foreign_symlink_striping.c b/lustre/tests/foreign_symlink_striping.c new file mode 100644 index 0000000..d16f5db --- /dev/null +++ b/lustre/tests/foreign_symlink_striping.c @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +int main(int argc, char **argv) +{ + char *foreign = NULL; + int c, rc, fd; + bool f_opt = false, d_opt = false, h_opt = false; + /* buf must be large enough to receive biggest possible + * foreign LOV/LMV + */ + char buf[XATTR_SIZE_MAX]; + struct lmv_foreign_md *lfm = (void *)buf; + + while ((c = getopt(argc, argv, "hf:d:")) != -1) { + switch (c) { + case 'd': + foreign = optarg; + if (f_opt || d_opt) { + fprintf(stderr, + "only one foreign symlink file or dir can be specified at a time\n"); + exit(1); + } + d_opt = true; + break; + case 'f': + foreign = optarg; + if (f_opt || d_opt) { + fprintf(stderr, + "only one foreign symlink file or dir can be specified at a time\n"); + exit(1); + } + f_opt = true; + break; + case 'h': + h_opt = true; + default: + fprintf(stderr, + "Usage: %s [-[f,d] ]\n", + argv[0]); + exit(h_opt ? 0 : 1); + break; + } + } + + if (foreign == NULL) { + fprintf(stderr, + "a foreign file/dir pathname must be provided\n"); + exit(0); + } + + /* in case foreign fake symlink feature is active, file/dir must be + * opened with O_NOFOLLOW to avoid symlink resolution + */ + fd = open(foreign, O_RDONLY|O_NONBLOCK|O_NOFOLLOW); + if (fd < 0) { + fprintf(stderr, "open() of '%s' error, rc : %d\n", foreign, fd); + perror("open()"); + exit(1); + } + + rc = snprintf(buf, PATH_MAX, "%s", foreign); + if (rc >= PATH_MAX || rc < 0) { + fprintf(stderr, + "unexpected return code or size from snprintf() : %d\n", + rc); + exit(1); + } + + if (f_opt) { + rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, &buf); + } else if (d_opt) { + lfm->lfm_magic = LMV_MAGIC_V1; + rc = ioctl(fd, LL_IOC_LMV_GETSTRIPE, &buf); + } + + if (rc) { + fprintf(stderr, "%s: %s error: %s\n", foreign, + f_opt ? "getstripe" : "getdirstripe", strerror(errno)); + exit(1); + } + + if (lfm->lfm_magic != LOV_USER_MAGIC_FOREIGN && + lfm->lfm_magic != LMV_MAGIC_FOREIGN) + fprintf(stderr, "unexpected magic : 0x%08X, expected 0x%08X\n", + lfm->lfm_magic, LOV_USER_MAGIC_FOREIGN); + if (lfm->lfm_type != LU_FOREIGN_TYPE_SYMLINK) + fprintf(stderr, "unexpected type : 0x%08X, expected 0x%08X\n", + lfm->lfm_type, LU_FOREIGN_TYPE_SYMLINK); + printf("lfm_magic: 0x%08X, lfm_length: %u, lfm_type: 0x%08X, lfm_flags: 0x%08X, lfm_value: '%.*s'\n", + lfm->lfm_magic, lfm->lfm_length, lfm->lfm_type, lfm->lfm_flags, + lfm->lfm_length, lfm->lfm_value); + + return rc; +} diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index c2bf325..4360b27 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -5712,7 +5712,7 @@ test_38() local uuid2=$(cat /proc/sys/kernel/random/uuid) # create foreign file - $LFS setstripe --foreign=daos --flags 0xda05 \ + $LFS setstripe --foreign=none --flags 0xda05 \ -x "${uuid1}@${uuid2}" $DIR/$tdir/$tfile || error "$DIR/$tdir/$tfile: create failed" @@ -5722,7 +5722,7 @@ test_38() # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length) $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_length:.*73" || error "$DIR/$tdir/$tfile: invalid LOV EA foreign size" - $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*daos" || + $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*none" || error "$DIR/$tdir/$tfile: invalid LOV EA foreign type" $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_flags:.*0x0000DA05" || @@ -5765,7 +5765,7 @@ test_38() # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length) $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_length:.*73" || error "$DIR/$tdir/$tfile: invalid LOV EA foreign size" - $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*daos" || + $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*none" || error "$DIR/$tdir/$tfile: invalid LOV EA foreign type" $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_flags:.*0x0000DA05" || @@ -5799,7 +5799,7 @@ test_39() local uuid2=$(cat /proc/sys/kernel/random/uuid) # create foreign dir - $LFS mkdir --foreign=daos --xattr="${uuid1}@${uuid2}" --flags=0xda05 \ + $LFS mkdir --foreign=none --xattr="${uuid1}@${uuid2}" --flags=0xda05 \ $DIR/$tdir/${tdir}2 || error "$DIR/$tdir/${tdir}2: create failed" @@ -5810,7 +5810,7 @@ test_39() # - sizeof(lfm_type) - sizeof(lfm_flags) $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_length:.*73" || error "$DIR/$tdir/${tdir}2: invalid LMV EA size" - $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*daos" || + $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*none" || error "$DIR/$tdir/${tdir}2: invalid LMV EA type" $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_flags:.*0x0000DA05" || @@ -5862,7 +5862,7 @@ test_39() # - sizeof(lfm_type) - sizeof(lfm_flags) $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_length:.*73" || error "$DIR/$tdir/${tdir}2: invalid LMV EA size" - $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*daos" || + $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*none" || error "$DIR/$tdir/${tdir}2: invalid LMV EA type" $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_flags:.*0x0000DA05" || diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index b9e61b3..b25855a 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -2747,7 +2747,7 @@ test_27J() { error "$DIR/$tdir/$tfile: invalid LOV EA foreign value" # create foreign file (lfs + API) - $LFS setstripe --foreign=daos --flags 0xda08 \ + $LFS setstripe --foreign=none --flags 0xda08 \ -x "${uuid1}@${uuid2}" $DIR/$tdir/${tfile}2 || error "$DIR/$tdir/${tfile}2: create failed" @@ -2757,7 +2757,7 @@ test_27J() { # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length) $LFS getstripe -v $DIR/$tdir/${tfile}2 | grep "lfm_length:.*73" || error "$DIR/$tdir/${tfile}2: invalid LOV EA foreign size" - $LFS getstripe -v $DIR/$tdir/${tfile}2 | grep "lfm_type:.*daos" || + $LFS getstripe -v $DIR/$tdir/${tfile}2 | grep "lfm_type:.*none" || error "$DIR/$tdir/${tfile}2: invalid LOV EA foreign type" $LFS getstripe -v $DIR/$tdir/${tfile}2 | grep "lfm_flags:.*0x0000DA08" || @@ -2827,7 +2827,8 @@ test_27K() { error "$DIR/$tdir/$tdir: invalid LMV EA size" parse_foreign_dir -d $DIR/$tdir/$tdir | grep "lmv_foreign_type: 1$" || error "$DIR/$tdir/$tdir: invalid LMV EA type" - parse_foreign_dir -d $DIR/$tdir/$tdir | grep "lmv_foreign_flags: 0$" || + parse_foreign_dir -d $DIR/$tdir/$tdir | + grep "lmv_foreign_flags: 55813$" || error "$DIR/$tdir/$tdir: invalid LMV EA flags" local lmv=$(parse_foreign_dir -d $DIR/$tdir/$tdir | grep "lmv_foreign_value: 0x" | @@ -2837,7 +2838,7 @@ test_27K() { [[ $lmv == $lmv2 ]] || error "$DIR/$tdir/$tdir: invalid LMV EA value" # create foreign dir (lfs + API) - $LFS mkdir --foreign=daos --xattr="${uuid1}@${uuid2}" --flags=0xda05 \ + $LFS mkdir --foreign=none --xattr="${uuid1}@${uuid2}" --flags=0xda05 \ $DIR/$tdir/${tdir}2 || error "$DIR/$tdir/${tdir}2: create failed" @@ -2848,7 +2849,7 @@ test_27K() { # - sizeof(lfm_type) - sizeof(lfm_flags) $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_length:.*73" || error "$DIR/$tdir/${tdir}2: invalid LMV EA size" - $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*daos" || + $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*none" || error "$DIR/$tdir/${tdir}2: invalid LMV EA type" $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_flags:.*0x0000DA05" || @@ -3047,6 +3048,204 @@ test_27N() { } run_test 27N "lctl pool_list on separate MGS gives correct pool name" +clean_foreign_symlink() { + trap 0 + lctl set_param llite/$FSNAME-*/foreign_symlink_enable=0 + for i in $DIR/$tdir/* ; do + $LFS unlink_foreign $i || true + done +} + +test_27O() { + [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.12.51) ]] && + skip "Need MDS version newer than 2.12.51" + + test_mkdir $DIR/$tdir + local uuid1=$(cat /proc/sys/kernel/random/uuid) + local uuid2=$(cat /proc/sys/kernel/random/uuid) + + trap clean_foreign_symlink EXIT + + # enable foreign_symlink behaviour + $LCTL set_param llite/$FSNAME-*/foreign_symlink_enable=1 + + # foreign symlink LOV format is a partial path by default + + # create foreign file (lfs + API) + $LFS setstripe --foreign=symlink --flags 0xda05 \ + -x "${uuid1}/${uuid2}" --mode 0600 $DIR/$tdir/${tfile} || + error "$DIR/$tdir/${tfile}: create failed" + + $LFS getstripe -v $DIR/$tdir/${tfile} | + grep "lfm_magic:.*0x0BD70BD0" || + error "$DIR/$tdir/${tfile}: invalid LOV EA foreign magic" + $LFS getstripe -v $DIR/$tdir/${tfile} | grep "lfm_type:.*symlink" || + error "$DIR/$tdir/${tfile}: invalid LOV EA foreign type" + $LFS getstripe -v $DIR/$tdir/${tfile} | + grep "lfm_flags:.*0x0000DA05" || + error "$DIR/$tdir/${tfile}: invalid LOV EA foreign flags" + $LFS getstripe $DIR/$tdir/${tfile} | + grep "lfm_value:.*${uuid1}/${uuid2}" || + error "$DIR/$tdir/${tfile}: invalid LOV EA foreign value" + + # modify striping should fail + $LFS setstripe -c 2 $DIR/$tdir/$tfile && + error "$DIR/$tdir/$tfile: setstripe should fail" + + # R/W should fail ("/{foreign_symlink_prefix}/${uuid1}/" missing) + cat $DIR/$tdir/$tfile && error "$DIR/$tdir/$tfile: read should fail" + cat /etc/passwd > $DIR/$tdir/$tfile && + error "$DIR/$tdir/$tfile: write should fail" + + # rename should succeed + mv $DIR/$tdir/$tfile $DIR/$tdir/${tfile}.new || + error "$DIR/$tdir/$tfile: rename has failed" + + #remove foreign_symlink file should fail + rm $DIR/$tdir/${tfile}.new && + error "$DIR/$tdir/${tfile}.new: remove of foreign_symlink file should fail" + + #test fake symlink + mkdir /tmp/${uuid1} || + error "/tmp/${uuid1}: mkdir has failed" + echo FOOFOO > /tmp/${uuid1}/${uuid2} || + error "/tmp/${uuid1}/${uuid2}: echo has failed" + $LCTL set_param llite/$FSNAME-*/foreign_symlink_prefix=/tmp/ + $CHECKSTAT -t link -l /tmp/${uuid1}/${uuid2} $DIR/$tdir/${tfile}.new || + error "$DIR/$tdir/${tfile}.new: not seen as a symlink" + #read should succeed now + cat $DIR/$tdir/${tfile}.new | grep FOOFOO || + error "$DIR/$tdir/${tfile}.new: symlink resolution has failed" + #write should succeed now + cat /etc/passwd > $DIR/$tdir/${tfile}.new || + error "$DIR/$tdir/${tfile}.new: write should succeed" + diff /etc/passwd $DIR/$tdir/${tfile}.new || + error "$DIR/$tdir/${tfile}.new: diff has failed" + diff /etc/passwd /tmp/${uuid1}/${uuid2} || + error "/tmp/${uuid1}/${uuid2}: diff has failed" + + #check that getstripe still works + $LFS getstripe $DIR/$tdir/${tfile}.new || + error "$DIR/$tdir/${tfile}.new: getstripe should still work with foreign_symlink enabled" + + # chmod should still succeed + chmod 644 $DIR/$tdir/${tfile}.new || + error "$DIR/$tdir/${tfile}.new: chmod has failed" + + # chown should still succeed + chown $RUNAS_ID:$RUNAS_GID $DIR/$tdir/${tfile}.new || + error "$DIR/$tdir/${tfile}.new: chown has failed" + + # rename should still succeed + mv $DIR/$tdir/${tfile}.new $DIR/$tdir/${tfile} || + error "$DIR/$tdir/${tfile}.new: rename has failed" + + #remove foreign_symlink file should still fail + rm $DIR/$tdir/${tfile} && + error "$DIR/$tdir/${tfile}: remove of foreign_symlink file should fail" + + #use special ioctl() to unlink foreign_symlink file + $LFS unlink_foreign $DIR/$tdir/${tfile} || + error "$DIR/$tdir/$tfile: unlink/ioctl failed" + +} +run_test 27O "basic ops on foreign file of symlink type" + +test_27P() { + [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.12.49) ]] && + skip "Need MDS version newer than 2.12.49" + + test_mkdir $DIR/$tdir + local uuid1=$(cat /proc/sys/kernel/random/uuid) + local uuid2=$(cat /proc/sys/kernel/random/uuid) + + trap clean_foreign_symlink EXIT + + # enable foreign_symlink behaviour + $LCTL set_param llite/$FSNAME-*/foreign_symlink_enable=1 + + # foreign symlink LMV format is a partial path by default + + # create foreign dir (lfs + API) + $LFS mkdir --foreign=symlink --xattr="${uuid1}/${uuid2}" \ + --flags=0xda05 --mode 0750 $DIR/$tdir/${tdir} || + error "$DIR/$tdir/${tdir}: create failed" + + $LFS getdirstripe -v $DIR/$tdir/${tdir} | + grep "lfm_magic:.*0x0CD50CD0" || + error "$DIR/$tdir/${tdir}: invalid LMV EA magic" + $LFS getdirstripe -v $DIR/$tdir/${tdir} | grep "lfm_type:.*symlink" || + error "$DIR/$tdir/${tdir}: invalid LMV EA type" + $LFS getdirstripe -v $DIR/$tdir/${tdir} | + grep "lfm_flags:.*0x0000DA05" || + error "$DIR/$tdir/${tdir}: invalid LMV EA flags" + $LFS getdirstripe $DIR/$tdir/${tdir} | + grep "lfm_value.*${uuid1}/${uuid2}" || + error "$DIR/$tdir/${tdir}: invalid LMV EA value" + + # file create in dir should fail + # ("/{foreign_symlink_prefix}/${uuid1}/${uuid2}/" missing) + touch $DIR/$tdir/$tdir/$tfile && "$DIR/$tdir: file create should fail" + + # rename should succeed + mv $DIR/$tdir/$tdir $DIR/$tdir/${tdir}.new || + error "$DIR/$tdir/$tdir: rename of foreign_symlink dir has failed" + + #remove foreign_symlink dir should fail + rmdir $DIR/$tdir/${tdir}.new && + error "$DIR/$tdir/${tdir}.new: remove of foreign_symlink dir should fail" + + #test fake symlink + mkdir -p /tmp/${uuid1}/${uuid2} || + error "/tmp/${uuid1}/${uuid2}: mkdir has failed" + echo FOOFOO > /tmp/${uuid1}/${uuid2}/foo || + error "/tmp/${uuid1}/${uuid2}/foo: echo has failed" + $LCTL set_param llite/$FSNAME-*/foreign_symlink_prefix=/tmp/ + $CHECKSTAT -t link -l /tmp/${uuid1}/${uuid2} $DIR/$tdir/${tdir}.new || + error "$DIR/$tdir/${tdir}.new: not seen as a symlink" + cat $DIR/$tdir/${tdir}.new/foo | grep FOOFOO || + error "$DIR/$tdir/${tdir}.new: symlink resolution has failed" + + #check that getstripe fails now that foreign_symlink enabled + $LFS getdirstripe $DIR/$tdir/${tdir}.new || + error "$DIR/$tdir/${tdir}.new: getdirstripe should still work with foreign_symlink enabled" + + # file create in dir should work now + cp /etc/passwd $DIR/$tdir/${tdir}.new/$tfile || + error "$DIR/$tdir/${tdir}.new/$tfile: file create should fail" + diff /etc/passwd $DIR/$tdir/${tdir}.new/$tfile || + error "$DIR/$tdir/${tdir}.new/$tfile: diff has failed" + diff /etc/passwd /tmp/${uuid1}/${uuid2}/$tfile || + error "/tmp/${uuid1}/${uuid2}/$tfile: diff has failed" + + # chmod should still succeed + chmod 755 $DIR/$tdir/${tdir}.new || + error "$DIR/$tdir/${tdir}.new: chmod has failed" + + # chown should still succeed + chown $RUNAS_ID:$RUNAS_GID $DIR/$tdir/${tdir}.new || + error "$DIR/$tdir/${tdir}.new: chown has failed" + + # rename should still succeed + mv $DIR/$tdir/${tdir}.new $DIR/$tdir/${tdir} || + error "$DIR/$tdir/${tdir}.new: rename of foreign_symlink dir has failed" + + #remove foreign_symlink dir should still fail + rmdir $DIR/$tdir/${tdir} && + error "$DIR/$tdir/${tdir}: remove of foreign_symlink dir should fail" + + #use special ioctl() to unlink foreign_symlink file + $LFS unlink_foreign $DIR/$tdir/${tdir} || + error "$DIR/$tdir/$tdir: unlink/ioctl failed" + + #created file should still exist + [[ -f /tmp/${uuid1}/${uuid2}/$tfile ]] || + error "/tmp/${uuid1}/${uuid2}/$tfile has been removed" + diff /etc/passwd /tmp/${uuid1}/${uuid2}/$tfile || + error "/tmp/${uuid1}/${uuid2}/$tfile: diff has failed" +} +run_test 27P "basic ops on foreign dir of foreign_symlink type" + # createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091 diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 28a33d2..1d5a3ea 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -28,7 +28,7 @@ bin_SCRIPTS = llstat llobdstat plot-llstat bin_PROGRAMS = lfs sbin_SCRIPTS = ldlm_debug_upcall sbin_PROGRAMS = lctl l_getidentity llverfs lustre_rsync ll_decode_linkea \ - llsom_sync + llsom_sync l_foreign_symlink if TESTS sbin_PROGRAMS += wiretest @@ -220,6 +220,10 @@ l_tunedisk_CPPFLAGS := ${MNTMODCFLAGS} l_tunedisk_LDFLAGS := ${MNTMODLDFLAGS} l_tunedisk_LDADD := $(mount_lustre_LDADD) +l_foreign_symlink_SOURCES = l_foreign_symlink.c +l_foreign_symlink_LDADD := $(top_builddir)/libcfs/libcfs/libcfs.la +l_foreign_symlink_DEPENDENCIES := $(top_builddir)/libcfs/libcfs/libcfs.la + l_getidentity_SOURCES = l_getidentity.c l_getidentity_LDADD := $(top_builddir)/libcfs/libcfs/libcfs.la l_getidentity_DEPENDENCIES := $(top_builddir)/libcfs/libcfs/libcfs.la diff --git a/lustre/utils/l_foreign_symlink.c b/lustre/utils/l_foreign_symlink.c new file mode 100644 index 0000000..7bf7ec9 --- /dev/null +++ b/lustre/utils/l_foreign_symlink.c @@ -0,0 +1,169 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * Copyright (c) 2020, Intel Corporation. + */ + +/* + * lustre/utils/l_foreign_symlink.c + * Userland helper to provide detailed format items in order to allow for + * a fast parsing of foreign symlink LOV/LMV EAs in llite. + * Presently, the foreign symlink LOV/LMV EAs format and its translation + * in format items is hard-coded, but in the future we may want to make it + * smarter and automatize this process by some mean. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define UUID_STRING_LENGTH 36 +#define MAX_BUF_SIZE 1024 + +static char *progname; + +static void errlog(const char *fmt, ...) +{ + va_list args; + + openlog(progname, LOG_PERROR | LOG_PID, LOG_KERN); + + va_start(args, fmt); + vsyslog(LOG_ERR, fmt, args); + va_end(args); + + closelog(); +} + +int main(int argc, char **argv) +{ + /* we want to request llite layer to parse each foreign symlink + * LOV/LMV EAs with lfm_value of format ":" and + * translate it as "/" relative path. + * To do so, will need to pass a serie of 4 items, one for + * position and length in lfm_value, one with constant + * string "/", one for position and length in lfm_value, + * a last one to indicate end of serie. + */ + struct ll_foreign_symlink_upcall_item *items; + char *buf; + glob_t path; + int fd, rc; + + progname = basename(argv[0]); + + if (argc != 2) { + errlog("usage: %s \n", argv[0]); + return -1; + } + + buf = malloc(MAX_BUF_SIZE); + if (buf == NULL) { + errlog("unable to allocate MAX_BUF_SIZE bytes\n"); + return -1; + } + + /* the number of items is presently limited to MAX_NB_UPCALL_ITEMS */ + + /* all items are expected to be on a __u32 boundary by llite */ + + /* 1st item to locate */ + items = (struct ll_foreign_symlink_upcall_item *)buf; + items->type = POSLEN_TYPE; + items->pos = 0; + items->len = UUID_STRING_LENGTH; + + /* 2nd item to store "/" string */ + items = (struct ll_foreign_symlink_upcall_item *)((char *)items + + POSLEN_ITEM_SZ); + items->type = STRING_TYPE; + /* NUL byte is not necessary */ + items->size = strlen("/"); + memcpy(items->bytestring, "/", strlen("/")); + /* space occupied by string will fit on __u32 boundary */ + + /* 3rd item to locate */ + items = (struct ll_foreign_symlink_upcall_item *)((char *)items + + STRING_ITEM_SZ(items->size)); + items->type = POSLEN_TYPE; + items->pos = UUID_STRING_LENGTH + 1; + items->len = UUID_STRING_LENGTH; + + /* 4th item is end of buf */ + items = (struct ll_foreign_symlink_upcall_item *)((char *)items + + POSLEN_ITEM_SZ); + items->type = EOB_TYPE; + + /* Send foreign symlink parsing items info to kernelspace */ + rc = cfs_get_param_paths(&path, "llite/%s/foreign_symlink_upcall_info", + argv[1]); + if (rc != 0) { + errlog("can't get param 'llite/%s/foreign_symlink_upcall_info': %s\n", + argv[1], strerror(errno)); + rc = -errno; + goto out; + } + + fd = open(path.gl_pathv[0], O_WRONLY); + if (fd < 0) { + errlog("can't open file '%s':%s\n", path.gl_pathv[0], + strerror(errno)); + rc = -errno; + goto out_param; + } + + rc = write(fd, buf, (char *)items + sizeof(items->type) - buf); + close(fd); + if (rc != (char *)items + sizeof(items->type) - buf) { + errlog("partial write ret %d: %s\n", rc, strerror(errno)); + rc = -errno; + } else { + rc = 0; + } + +out_param: + cfs_free_param_data(&path); +out: + if (isatty(STDIN_FILENO)) + /* we are called from the command line */ + return rc < 0 ? -rc : rc; + else + return rc; +} diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index f7eacb3..c68a626 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -85,6 +85,7 @@ static int lfs_getstripe(int argc, char **argv); static int lfs_getdirstripe(int argc, char **argv); static int lfs_setdirstripe(int argc, char **argv); static int lfs_rmentry(int argc, char **argv); +static int lfs_unlink_foreign(int argc, char **argv); static int lfs_osts(int argc, char **argv); static int lfs_mdts(int argc, char **argv); static int lfs_df(int argc, char **argv); @@ -293,7 +294,7 @@ static inline int lfs_mirror_delete(int argc, char **argv) "setdirstripe|mkdir --foreign[=] -x|-xattr " \ "[--mode|-o mode] [--flags ] \n" \ "\tmode: the mode of the directory\n" \ - "\tforeign_type: none or daos\n" + "\tforeign_type: none or symlink\n" /** * command_t mirror_cmdlist - lfs mirror commands. @@ -460,6 +461,11 @@ command_t cmdlist[] = { "will become inaccessable after this command. This can only be done\n" "by the administrator\n" "usage: rm_entry \n"}, + {"unlink_foreign", lfs_unlink_foreign, 0, + "To remove the foreign file/dir.\n" + "Note: This is for files/dirs prevented to be removed using\n" + "unlink/rmdir, but works also for regular ones\n" + "usage: unlink_foreign [ ...]\n"}, {"pool_list", lfs_poollist, 0, "List pools or pool OSTs\n" "usage: pool_list [.] | \n"}, @@ -6038,7 +6044,7 @@ static int lfs_setdirstripe(int argc, char **argv) mode_t mode = S_IRWXU | S_IRWXG | S_IRWXO; mode_t previous_mode = 0; char *xattr = NULL; - __u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0; + __u32 type = LU_FOREIGN_TYPE_SYMLINK, flags = 0; struct option long_opts[] = { { .val = 'c', .name = "count", .has_arg = required_argument }, { .val = 'c', .name = "mdt-count", .has_arg = required_argument }, @@ -6355,6 +6361,33 @@ static int lfs_rmentry(int argc, char **argv) return result; } +static int lfs_unlink_foreign(int argc, char **argv) +{ + char *name; + int index; + int result = 0; + + if (argc <= 1) { + fprintf(stderr, "error: %s: missing pathname\n", + argv[0]); + return CMD_HELP; + } + + index = 1; + name = argv[index]; + while (name != NULL) { + result = llapi_unlink_foreign(name); + if (result) { + fprintf(stderr, + "error: %s: unlink foreign entry '%s' failed\n", + argv[0], name); + break; + } + name = argv[++index]; + } + return result; +} + static int lfs_mv(int argc, char **argv) { struct lmv_user_md lmu = { LMV_USER_MAGIC }; diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 5686811..2508895 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -94,7 +94,7 @@ char *mdt_hash_name[] = { "none", struct lustre_foreign_type lu_foreign_types[] = { {.lft_type = LU_FOREIGN_TYPE_NONE, .lft_name = "none"}, - {.lft_type = LU_FOREIGN_TYPE_DAOS, .lft_name = "daos"}, + {.lft_type = LU_FOREIGN_TYPE_SYMLINK, .lft_name = "symlink"}, /* must be the last element */ {.lft_type = LU_FOREIGN_TYPE_UNKNOWN, .lft_name = NULL} /* array max dimension must be <= UINT32_MAX */ @@ -1241,44 +1241,6 @@ int llapi_dir_create_pool(const char *name, int mode, int stripe_offset, return llapi_dir_create(name, mode, ¶m); } -int llapi_direntry_remove(char *dname) -{ - char *dirpath = NULL; - char *namepath = NULL; - char *dir; - char *filename; - int fd = -1; - int rc = 0; - - dirpath = strdup(dname); - namepath = strdup(dname); - if (!dirpath || !namepath) - return -ENOMEM; - - filename = basename(namepath); - - dir = dirname(dirpath); - - fd = open(dir, O_DIRECTORY | O_RDONLY); - if (fd < 0) { - rc = -errno; - llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'", - filename); - goto out; - } - - if (ioctl(fd, LL_IOC_REMOVE_ENTRY, filename)) - llapi_error(LLAPI_MSG_ERROR, errno, - "error on ioctl %#lx for '%s' (%d)", - (long)LL_IOC_LMV_SETSTRIPE, filename, fd); -out: - free(dirpath); - free(namepath); - if (fd != -1) - close(fd); - return rc; -} - /* * Find the fsname, the full path, and/or an open fd. * Either the fsname or path must not be NULL @@ -1768,7 +1730,7 @@ err: return rc; } -typedef int (semantic_func_t)(char *path, DIR *parent, DIR **d, +typedef int (semantic_func_t)(char *path, int p, int *d, void *data, struct dirent64 *de); #define OBD_NOT_FOUND (-1) @@ -1838,7 +1800,7 @@ static int common_param_init(struct find_param *param, char *path) return 0; } -static int cb_common_fini(char *path, DIR *parent, DIR **dirp, void *data, +static int cb_common_fini(char *path, int p, int *dp, void *data, struct dirent64 *de) { struct find_param *param = data; @@ -1848,26 +1810,27 @@ static int cb_common_fini(char *path, DIR *parent, DIR **dirp, void *data, } /* set errno upon failure */ -static DIR *opendir_parent(const char *path) +static int open_parent(const char *path) { char *path_copy; char *parent_path; - DIR *parent; + int parent; path_copy = strdup(path); if (path_copy == NULL) - return NULL; + return -1; parent_path = dirname(path_copy); - parent = opendir(parent_path); + parent = open(parent_path, O_RDONLY|O_NDELAY|O_DIRECTORY); free(path_copy); return parent; } -static int cb_get_dirstripe(char *path, DIR *d, struct find_param *param) +static int cb_get_dirstripe(char *path, int *d, struct find_param *param) { int ret; + bool did_nofollow = false; again: param->fp_lmv_md->lum_stripe_count = param->fp_lmv_stripe_count; @@ -1876,7 +1839,36 @@ again: else param->fp_lmv_md->lum_magic = LMV_MAGIC_V1; - ret = ioctl(dirfd(d), LL_IOC_LMV_GETSTRIPE, param->fp_lmv_md); + ret = ioctl(*d, LL_IOC_LMV_GETSTRIPE, param->fp_lmv_md); + + /* if ENOTTY likely to be a fake symlink, so try again after + * new open() with O_NOFOLLOW, but only once to prevent any + * loop like for the path of a file/dir not on Lustre !! + */ + if (ret < 0 && errno == ENOTTY && !did_nofollow) { + int fd, ret2; + + did_nofollow = true; + fd = open(path, O_RDONLY | O_NOFOLLOW); + if (fd < 0) { + /* restore original errno */ + errno = ENOTTY; + return ret; + } + + /* close original fd and set new */ + close(*d); + *d = fd; + ret2 = ioctl(fd, LL_IOC_LMV_GETSTRIPE, param->fp_lmv_md); + if (ret2 < 0 && errno != E2BIG) { + /* restore original errno */ + errno = ENOTTY; + return ret; + } + /* LMV is ok or need to handle E2BIG case now */ + ret = ret2; + } + if (errno == E2BIG && ret != 0) { int stripe_count; int lmv_size; @@ -2019,6 +2011,28 @@ retry_getinfo: if (cmd == LL_IOC_MDC_GETINFO_V1 && !ret) ret = convert_lmdbuf_v1v2(lmdbuf, lmdlen); + + if (ret < 0 && errno == ENOTTY && type == GET_LMD_STRIPE) { + int dir_fd2; + + /* retry ioctl() after new open() with O_NOFOLLOW + * just in case it could be a fake symlink + * need using a new open() as dir_fd is being closed + * by caller + */ + + dir_fd2 = open(path, O_RDONLY | O_NDELAY | O_NOFOLLOW); + if (dir_fd2 < 0) { + /* return original error */ + errno = ENOTTY; + } else { + ret = ioctl(dir_fd2, cmd, lmdbuf); + /* pass new errno or success back to caller */ + + close(dir_fd2); + } + } + } else if (parent_fd >= 0) { const char *fname = strrchr(path, '/'); @@ -2101,55 +2115,103 @@ retry_getfileinfo: return ret; } -static int get_lmd_info(char *path, DIR *parent, DIR *dir, void *lmdbuf, - int lmdlen, enum get_lmd_info_type type) -{ - int parent_fd = -1; - int dir_fd = -1; - - if (parent) - parent_fd = dirfd(parent); - if (dir) - dir_fd = dirfd(dir); - - return get_lmd_info_fd(path, parent_fd, dir_fd, lmdbuf, lmdlen, type); -} - -static int llapi_semantic_traverse(char *path, int size, DIR *parent, +static int llapi_semantic_traverse(char *path, int size, int parent, semantic_func_t sem_init, semantic_func_t sem_fini, void *data, struct dirent64 *de) { struct find_param *param = (struct find_param *)data; struct dirent64 *dent; - int len, ret; - DIR *d, *p = NULL; + int len, ret, d, p = -1; + DIR *dir = NULL; ret = 0; len = strlen(path); - d = opendir(path); - if (!d && errno != ENOTDIR) { + d = open(path, O_RDONLY|O_NDELAY|O_DIRECTORY); + /* if an invalid fake dir symlink, opendir() will return EINVAL + * instead of ENOTDIR. If a valid but dangling faked or real file/dir + * symlink ENOENT will be returned. For a valid/resolved fake or real + * file symlink ENOTDIR will be returned as for a regular file. + * opendir() will be successful for a valid and resolved fake or real + * dir simlink or a regular dir. + */ + if (d == -1 && errno != ENOTDIR && errno != EINVAL && errno != ENOENT) { ret = -errno; llapi_error(LLAPI_MSG_ERROR, ret, "%s: Failed to open '%s'", __func__, path); return ret; - } else if (!d && !parent) { - /* ENOTDIR. Open the parent dir. */ - p = opendir_parent(path); - if (!p) { - ret = -errno; - goto out; + } else if (d == -1) { + if (errno == ENOENT || errno == EINVAL) { + int old_errno = errno; + + /* try to open with O_NOFOLLOW this will help + * differentiate fake vs real symlinks + * it is ok to not use O_DIRECTORY with O_RDONLY + * and it will prevent the need to deal with ENOTDIR + * error, instead of ELOOP, being returned by recent + * kernels for real symlinks + */ + d = open(path, O_RDONLY|O_NDELAY|O_NOFOLLOW); + /* if a dangling real symlink should return ELOOP, or + * again ENOENT if really non-existing path, or E...?? + * So return original error. If success or ENOTDIR, path + * is likely to be a fake dir/file symlink, so continue + */ + if (d == -1) { + ret = -old_errno; + goto out; + } + + } + + /* ENOTDIR */ + if (parent == -1 && d == -1) { + /* Open the parent dir. */ + p = open_parent(path); + if (p == -1) { + ret = -errno; + goto out; + } + } + } else { /* d != -1 */ + int d2; + + /* try to reopen dir with O_NOFOLLOW just in case of a foreign + * symlink dir + */ + d2 = open(path, O_RDONLY|O_NDELAY|O_NOFOLLOW); + if (d2 != -1) { + close(d); + d = d2; + } else { + /* continue with d */ + errno = 0; } } - if (sem_init && (ret = sem_init(path, parent ?: p, &d, data, de))) - goto err; + if (sem_init) { + ret = sem_init(path, (parent != -1) ? parent : p, &d, data, de); + if (ret) + goto err; + } - if (d == NULL) + if (d == -1) goto out; - while ((dent = readdir64(d)) != NULL) { + dir = fdopendir(d); + if (dir == NULL) { + /* ENOTDIR if fake symlink, do not consider it as an error */ + if (errno != ENOTDIR) + llapi_error(LLAPI_MSG_ERROR, errno, + "fdopendir() failed"); + else + errno = 0; + + goto out; + } + + while ((dent = readdir64(dir)) != NULL) { int rc; if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) @@ -2168,8 +2230,8 @@ static int llapi_semantic_traverse(char *path, int size, DIR *parent, if (dent->d_type == DT_UNKNOWN) { struct lov_user_mds_data *lmd = param->fp_lmd; - rc = get_lmd_info(path, d, NULL, lmd, - param->fp_lum_size, GET_LMD_INFO); + rc = get_lmd_info_fd(path, d, -1, param->fp_lmd, + param->fp_lum_size, GET_LMD_INFO); if (rc == 0) dent->d_type = IFTODT(lmd->lmd_stx.stx_mode); else if (ret == 0) @@ -2210,10 +2272,14 @@ out: if (sem_fini) sem_fini(path, parent, &d, data, de); err: - if (d) - closedir(d); - if (p) - closedir(p); + if (d != -1) { + if (dir) + closedir(dir); + else + close(d); + } + if (p != -1) + close(p); return ret; } @@ -2241,8 +2307,8 @@ static int param_callback(char *path, semantic_func_t sem_init, param->fp_depth = 0; - ret = llapi_semantic_traverse(buf, PATH_MAX + 1, NULL, sem_init, - sem_fini, param, NULL); + ret = llapi_semantic_traverse(buf, PATH_MAX + 1, -1, sem_init, + sem_fini, param, NULL); out: find_param_fini(param); free(buf); @@ -2275,11 +2341,19 @@ int llapi_file_get_lov_uuid(const char *path, struct obd_uuid *lov_uuid) { int fd, rc; - fd = open(path, O_RDONLY | O_NONBLOCK); + /* do not follow faked symlinks */ + fd = open(path, O_RDONLY | O_NONBLOCK | O_NOFOLLOW); if (fd < 0) { - rc = -errno; - llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path); - return rc; + /* real symlink should have failed with ELOOP so retry without + * O_NOFOLLOW just in case + */ + fd = open(path, O_RDONLY | O_NONBLOCK); + if (fd < 0) { + rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", + path); + return rc; + } } rc = llapi_file_fget_lov_uuid(fd, lov_uuid); @@ -2513,7 +2587,7 @@ free_param: * obd index for all these obduuids will be returned in * param->fp_obd_indexes */ -static int setup_indexes(DIR *dir, char *path, struct obd_uuid *obduuids, +static int setup_indexes(int d, char *path, struct obd_uuid *obduuids, int num_obds, int **obdindexes, int *obdindex, enum tgt_type type) { @@ -2536,7 +2610,7 @@ static int setup_indexes(DIR *dir, char *path, struct obd_uuid *obduuids, return -ENOMEM; retry_get_uuids: - ret = llapi_get_target_uuids(dirfd(dir), uuids, &obdcount, type); + ret = llapi_get_target_uuids(d, uuids, &obdcount, type); if (ret) { if (ret == -EOVERFLOW) { struct obd_uuid *uuids_temp; @@ -2600,12 +2674,12 @@ out_free: return ret; } -static int setup_target_indexes(DIR *dir, char *path, struct find_param *param) +static int setup_target_indexes(int d, char *path, struct find_param *param) { int ret = 0; if (param->fp_mdt_uuid) { - ret = setup_indexes(dir, path, param->fp_mdt_uuid, + ret = setup_indexes(d, path, param->fp_mdt_uuid, param->fp_num_mdts, ¶m->fp_mdt_indexes, ¶m->fp_mdt_index, LMV_TYPE); @@ -2614,7 +2688,7 @@ static int setup_target_indexes(DIR *dir, char *path, struct find_param *param) } if (param->fp_obd_uuid) { - ret = setup_indexes(dir, path, param->fp_obd_uuid, + ret = setup_indexes(d, path, param->fp_obd_uuid, param->fp_num_obds, ¶m->fp_obd_indexes, ¶m->fp_obd_index, LOV_TYPE); @@ -4624,12 +4698,12 @@ static int fget_projid(int fd, int *projid) return 0; } -static int cb_find_init(char *path, DIR *parent, DIR **dirp, +static int cb_find_init(char *path, int p, int *dp, void *data, struct dirent64 *de) { struct find_param *param = (struct find_param *)data; struct lov_user_mds_data *lmd = param->fp_lmd; - DIR *dir = dirp == NULL ? NULL : *dirp; + int d = dp == NULL ? -1 : *dp; int decision = 1; /* 1 is accepted; -1 is rejected. */ int lustre_fs = 1; int checked_type = 0; @@ -4638,7 +4712,7 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp, __u64 flags; int fd = -2; - if (parent == NULL && dir == NULL) + if (p == -1 && d == -1) return -EINVAL; /* If a regular expression is presented, make the initial decision */ @@ -4684,10 +4758,10 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp, decision = 0; if (decision == 0) { - if (dir && (param->fp_check_mdt_count || + if (d != -1 && (param->fp_check_mdt_count || param->fp_check_hash_type || param->fp_check_foreign)) { param->fp_get_lmv = 1; - ret = cb_get_dirstripe(path, dir, param); + ret = cb_get_dirstripe(path, &d, param); if (ret != 0) { /* * XXX this works to decide for foreign @@ -4704,8 +4778,8 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp, } param->fp_lmd->lmd_lmm.lmm_magic = 0; - ret = get_lmd_info(path, parent, dir, param->fp_lmd, - param->fp_lum_size, GET_LMD_INFO); + ret = get_lmd_info_fd(path, p, d, param->fp_lmd, + param->fp_lum_size, GET_LMD_INFO); if (ret == 0 && param->fp_lmd->lmd_lmm.lmm_magic == 0 && find_check_lmm_info(param)) { struct lov_user_md *lmm = ¶m->fp_lmd->lmd_lmm; @@ -4724,8 +4798,8 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp, lmm->lmm_stripe_offset = -1; } if (ret == 0 && param->fp_mdt_uuid != NULL) { - if (dir != NULL) { - ret = llapi_file_fget_mdtidx(dirfd(dir), + if (d != -1) { + ret = llapi_file_fget_mdtidx(d, ¶m->fp_file_mdt_index); } else if (S_ISREG(lmd->lmd_stx.stx_mode)) { /* @@ -4748,7 +4822,7 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp, * For a special file, we assume it resides on * the same MDT as the parent directory. */ - ret = llapi_file_fget_mdtidx(dirfd(parent), + ret = llapi_file_fget_mdtidx(p, ¶m->fp_file_mdt_index); } } @@ -4787,7 +4861,7 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp, } if (lustre_fs && !param->fp_got_uuids) { - ret = setup_target_indexes(dir ? dir : parent, path, + ret = setup_target_indexes((d != -1) ? d : p, path, param); if (ret) goto out; @@ -5031,10 +5105,10 @@ obd_matches: if (param->fp_mdt_index != OBD_NOT_FOUND) print_failed_tgt(param, path, LL_STATFS_LMV); - if (dir != NULL) - ret = fstat_f(dirfd(dir), &st); + if (d != -1) + ret = fstat_f(d, &st); else if (de != NULL) - ret = fstatat_f(dirfd(parent), de->d_name, &st, + ret = fstatat_f(p, de->d_name, &st, AT_SYMLINK_NOFOLLOW); else ret = lstat_f(path, &st); @@ -5112,34 +5186,33 @@ out: return ret; } -static int cb_migrate_mdt_init(char *path, DIR *parent, DIR **dirp, +static int cb_migrate_mdt_init(char *path, int p, int *dp, void *param_data, struct dirent64 *de) { struct find_param *param = (struct find_param *)param_data; struct lmv_user_md *lmu = param->fp_lmv_md; - DIR *tmp_parent = parent; + int tmp_p = p; char raw[MAX_IOC_BUFLEN] = {'\0'}; char *rawbuf = raw; struct obd_ioctl_data data = { 0 }; - int fd; int ret; char *path_copy; char *filename; bool retry = false; - if (parent == NULL && dirp == NULL) + if (p == -1 && dp == NULL) return -EINVAL; if (!lmu) return -EINVAL; - if (dirp != NULL) - closedir(*dirp); + if (dp != NULL && *dp != -1) + close(*dp); - if (parent == NULL) { - tmp_parent = opendir_parent(path); - if (tmp_parent == NULL) { - *dirp = NULL; + if (p == -1) { + tmp_p = open_parent(path); + if (tmp_p == -1) { + *dp = -1; ret = -errno; llapi_error(LLAPI_MSG_ERROR, ret, "can not open %s", path); @@ -5147,8 +5220,6 @@ static int cb_migrate_mdt_init(char *path, DIR *parent, DIR **dirp, } } - fd = dirfd(tmp_parent); - path_copy = strdup(path); filename = basename(path_copy); @@ -5165,7 +5236,7 @@ static int cb_migrate_mdt_init(char *path, DIR *parent, DIR **dirp, } migrate: - ret = ioctl(fd, LL_IOC_MIGRATE, rawbuf); + ret = ioctl(tmp_p, LL_IOC_MIGRATE, rawbuf); if (ret != 0) { if (errno == EBUSY && !retry) { /* @@ -5198,7 +5269,7 @@ migrate: } out: - if (dirp != NULL) { + if (dp != NULL) { /* * If the directory is being migration, we need * close the directory after migration, @@ -5206,16 +5277,16 @@ out: * on the client side, and re-open to get the * new directory handle */ - *dirp = opendir(path); - if (*dirp == NULL) { + *dp = open(path, O_RDONLY|O_NDELAY|O_DIRECTORY); + if (*dp == -1) { ret = -errno; llapi_error(LLAPI_MSG_ERROR, ret, "%s: Failed to open '%s'", __func__, path); } } - if (parent == NULL) - closedir(tmp_parent); + if (p == -1) + close(tmp_p); free(path_copy); @@ -5223,7 +5294,7 @@ out: } /* dir migration finished, shrink its stripes */ -static int cb_migrate_mdt_fini(char *path, DIR *parent, DIR **dirp, void *data, +static int cb_migrate_mdt_fini(char *path, int p, int *dp, void *data, struct dirent64 *de) { struct find_param *param = data; @@ -5234,13 +5305,13 @@ static int cb_migrate_mdt_fini(char *path, DIR *parent, DIR **dirp, void *data, if (de && de->d_type != DT_DIR) goto out; - if (*dirp) { + if (*dp != -1) { /* * close it before setxattr because the latter may destroy the * original object, and cause close fail. */ - ret = closedir(*dirp); - *dirp = NULL; + ret = close(*dp); + *dp = -1; if (ret) goto out; } @@ -5249,7 +5320,7 @@ static int cb_migrate_mdt_fini(char *path, DIR *parent, DIR **dirp, void *data, if (ret == -EALREADY) ret = 0; out: - cb_common_fini(path, parent, dirp, data, de); + cb_common_fini(path, p, dp, data, de); return ret; } @@ -5291,20 +5362,20 @@ int llapi_file_fget_mdtidx(int fd, int *mdtidx) return 0; } -static int cb_get_mdt_index(char *path, DIR *parent, DIR **dirp, void *data, +static int cb_get_mdt_index(char *path, int p, int *dp, void *data, struct dirent64 *de) { struct find_param *param = (struct find_param *)data; - DIR *d = dirp == NULL ? NULL : *dirp; + int d = dp == NULL ? -1 : *dp; int ret; int mdtidx; - if (parent == NULL && d == NULL) + if (p == -1 && d == -1) return -EINVAL; - if (d != NULL) { - ret = llapi_file_fget_mdtidx(dirfd(d), &mdtidx); - } else /* if (parent) */ { + if (d != -1) { + ret = llapi_file_fget_mdtidx(d, &mdtidx); + } else /* if (p != -1) */ { int fd; fd = open(path, O_RDONLY | O_NOCTTY); @@ -5355,34 +5426,49 @@ out: return 0; } -static int cb_getstripe(char *path, DIR *parent, DIR **dirp, void *data, +static int cb_getstripe(char *path, int p, int *dp, void *data, struct dirent64 *de) { struct find_param *param = (struct find_param *)data; - DIR *d = dirp == NULL ? NULL : *dirp; + int d = dp == NULL ? -1 : *dp; int ret = 0; - if (parent == NULL && d == NULL) + if (p == -1 && d == -1) return -EINVAL; if (param->fp_obd_uuid) { param->fp_quiet = 1; - ret = setup_obd_uuid(d ? dirfd(d) : dirfd(parent), path, param); + ret = setup_obd_uuid(d != -1 ? d : p, path, param); if (ret) return ret; } - if (d && (param->fp_get_lmv || param->fp_get_default_lmv)) - ret = cb_get_dirstripe(path, d, param); - else if (d || - (parent && !param->fp_get_lmv && !param->fp_get_default_lmv)) - ret = get_lmd_info(path, parent, d, ¶m->fp_lmd->lmd_lmm, - param->fp_lum_size, GET_LMD_STRIPE); - else + if (d != -1 && (param->fp_get_lmv || param->fp_get_default_lmv)) + ret = cb_get_dirstripe(path, &d, param); + else if (d != -1 || + (p != -1 && !param->fp_get_lmv && !param->fp_get_default_lmv)) + ret = get_lmd_info_fd(path, p, d, ¶m->fp_lmd->lmd_lmm, + param->fp_lum_size, GET_LMD_STRIPE); + else if (d == -1 && (param->fp_get_lmv || param->fp_get_default_lmv)) { + /* in case of a dangling or valid faked symlink dir, opendir() + * should have return either EINVAL or ENOENT, so let's try + * to get LMV just in case, and by opening it as a file but + * with O_NOFOLLOW ... + */ + int fd = open(path, O_RDONLY | O_NOFOLLOW); + + if (fd == -1) + return 0; + ret = cb_get_dirstripe(path, &fd, param); + if (ret == 0) + llapi_lov_dump_user_lmm(param, path, LDF_IS_DIR); + close(fd); + return 0; + } else return 0; if (ret) { - if (errno == ENODATA && d != NULL) { + if (errno == ENODATA && d != -1) { /* * We need to "fake" the "use the default" values * since the lmm struct is zeroed out at this point. @@ -5402,7 +5488,7 @@ static int cb_getstripe(char *path, DIR *parent, DIR **dirp, void *data, struct lmv_user_md *lum = param->fp_lmv_md; int mdtidx; - ret = llapi_file_fget_mdtidx(dirfd(d), &mdtidx); + ret = llapi_file_fget_mdtidx(d, &mdtidx); if (ret != 0) goto err_out; lum->lum_magic = LMV_MAGIC_V1; @@ -5422,7 +5508,7 @@ static int cb_getstripe(char *path, DIR *parent, DIR **dirp, void *data, lmm->lmm_stripe_offset = -1; goto dump; } - } else if (errno == ENODATA && parent != NULL) { + } else if (errno == ENODATA && p != -1) { if (!param->fp_obd_uuid && !param->fp_mdt_uuid) llapi_printf(LLAPI_MSG_NORMAL, "%s has no stripe info\n", path); @@ -5442,8 +5528,9 @@ static int cb_getstripe(char *path, DIR *parent, DIR **dirp, void *data, err_out: llapi_error(LLAPI_MSG_ERROR, ret, "error: %s: %s failed for %s", - __func__, d ? "LL_IOC_LOV_GETSTRIPE" : - "IOC_MDC_GETFILESTRIPE", path); + __func__, d != -1 ? + "LL_IOC_LOV_GETSTRIPE" : + "IOC_MDC_GETFILESTRIPE", path); } return ret; @@ -5451,7 +5538,7 @@ err_out: dump: if (!(param->fp_verbose & VERBOSE_MDTINDEX)) - llapi_lov_dump_user_lmm(param, path, d ? LDF_IS_DIR : 0); + llapi_lov_dump_user_lmm(param, path, d != -1 ? LDF_IS_DIR : 0); out: /* Do not get down anymore? */ diff --git a/lustre/utils/liblustreapi_util.c b/lustre/utils/liblustreapi_util.c index e8af531..60ded02 100644 --- a/lustre/utils/liblustreapi_util.c +++ b/lustre/utils/liblustreapi_util.c @@ -41,6 +41,7 @@ #include #include #include +#include /* for dirname() */ #include #include /* only until LUSTRE_VERSION_CODE is gone */ #include "lustreapi_internal.h" @@ -312,6 +313,92 @@ retry_open: return rc ? -errno : 0; } +int llapi_direntry_remove(char *dname) +{ + char *dirpath = NULL; + char *namepath = NULL; + char *dir; + char *filename; + int fd = -1; + int rc = 0; + + dirpath = strdup(dname); + namepath = strdup(dname); + if (!dirpath || !namepath) + return -ENOMEM; + + filename = basename(namepath); + + dir = dirname(dirpath); + + fd = open(dir, O_DIRECTORY | O_RDONLY); + if (fd < 0) { + rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'", + filename); + goto out; + } + + if (ioctl(fd, LL_IOC_REMOVE_ENTRY, filename)) + llapi_error(LLAPI_MSG_ERROR, errno, + "error on ioctl %#lx for '%s' (%d)", + (long)LL_IOC_LMV_SETSTRIPE, filename, fd); +out: + free(dirpath); + free(namepath); + if (fd != -1) + close(fd); + return rc; +} + +int llapi_unlink_foreign(char *name) +{ + int fd = -1; + int rc = 0; + + fd = open(name, O_DIRECTORY | O_RDONLY | O_NOFOLLOW); + if (fd < 0 && errno != ENOTDIR) { + rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'", name); + goto out; + } else if (errno == ENOTDIR) { + fd = open(name, O_RDONLY | O_NOFOLLOW); + if (fd < 0) { + rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'", + name); + goto out; + } + } + + /* allow foreign symlink file/dir to be unlinked */ + if (ioctl(fd, LL_IOC_UNLOCK_FOREIGN)) { + llapi_error(LLAPI_MSG_ERROR, errno, + "error on ioctl %#lx for '%s' (%d)", + (long)LL_IOC_UNLOCK_FOREIGN, name, fd); + rc = -errno; + } + + /* XXX do not set AT_REMOVEDIR in flags even for a dir, as due to the + * hack for foreign symlink it will fail the directory check in + * Kernel's syscall code and return ENOTDIR, so treat all as files + */ + rc = unlinkat(AT_FDCWD, name, 0); + if (rc == -1 && errno == EISDIR) + rc = unlinkat(AT_FDCWD, name, AT_REMOVEDIR); + + if (rc == -1) { + llapi_error(LLAPI_MSG_ERROR, errno, + "error on unlinkat for '%s' (%d)", name, fd); + rc = -errno; + } + +out: + if (fd != -1) + close(fd); + return rc; +} + int llapi_get_fsname_instance(const char *path, char *fsname, size_t fsname_len, char *instance, size_t instance_len) { -- 1.8.3.1