Whamcloud - gitweb
Merge "LU-9121 lnet: User Defined Selection Policy (UDSP)"
authorGerrit Code Review <gerrit-review@whamcloud.com>
Fri, 26 Feb 2021 07:21:34 +0000 (07:21 +0000)
committerGerrit Code Review <gerrit-review@whamcloud.com>
Fri, 26 Feb 2021 07:21:34 +0000 (07:21 +0000)
43 files changed:
LUSTRE-VERSION-GEN
MAINTAINERS
lustre/ChangeLog
lustre/doc/Makefile.am
lustre/doc/lfs-find.1
lustre/doc/lfs-setdirstripe.1
lustre/doc/lfs-setstripe.1
lustre/doc/lfs.1
lustre/doc/llapi_file_create_foreign.3
lustre/doc/llapi_unlink_foreign.3 [new file with mode: 0644]
lustre/include/lustre/lustreapi.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/llite/Makefile.in
lustre/llite/dcache.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/foreign_symlink.h [new file with mode: 0644]
lustre/llite/llite_foreign.c [new file with mode: 0644]
lustre/llite/llite_foreign_symlink.c [new file with mode: 0644]
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/lproc_llite.c
lustre/llite/namei.c
lustre/llite/pcc.c
lustre/llite/symlink.c
lustre/lov/lov_object.c
lustre/lov/lov_pack.c
lustre/mdd/mdd_dir.c
lustre/mdt/mdt_io.c
lustre/ptlrpc/sec.c
lustre/target/tgt_handler.c
lustre/tests/Makefile.am
lustre/tests/checkstat.c
lustre/tests/create_foreign_dir.c
lustre/tests/create_foreign_file.c
lustre/tests/foreign_symlink_striping.c [new file with mode: 0644]
lustre/tests/sanity-lfsck.sh
lustre/tests/sanity.sh
lustre/utils/Makefile.am
lustre/utils/l_foreign_symlink.c [new file with mode: 0644]
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/liblustreapi_util.c

index 4b62a7c..f5af340 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-DEFAULT_VERSION=2.14.0
+DEFAULT_VERSION=2.14.50
 LVF=LUSTRE-VERSION-FILE
 
 LF='
index daf6a58..5dc30f0 100644 (file)
@@ -267,6 +267,7 @@ S:  Maintained
 F:     lustre/conf/lgssc.conf
 F:     lustre/conf/lsvcgss
 F:     lustre/doc/lgss_sk.8
+F:     lustre/doc/lfs-flushctx.1
 F:     lustre/llite/xattr_security.c
 F:     lustre/ptlrpc/gss
 F:     lustre/ptlrpc/sec*.c
index 2e4591a..3c55087 100644 (file)
@@ -1,3 +1,58 @@
+TBD Whamcloud
+       * version 2.15.0
+       * See https://wiki.whamcloud.com/display/PUB/Lustre+Support+Matrix
+         for currently supported client and server kernel versions.
+       * Server primary kernels built and tested during release cycle:
+         3.10.0-1062.18.1.el7 (RHEL7.7)
+         3.10.0-1127.19.1.el7 (RHEL7.8)
+         3.10.0-1160.6.1.el7  (RHEL7.9)
+       * Other server kernels known to build and work at some point (others may also work):
+         3.10.0-862.14.4.el7  (RHEL7.5)
+         3.10.0-957.27.2.el7  (RHEL7.6)
+         4.18.0-80.11.2.el8   (RHEL8.0)
+         4.18.0-147.8.1.el8   (RHEL8.1)
+         4.18.0-193.28.1.el8  (RHEL8.2)
+         4.18.0-240.1.1.el8   (RHEL8.3)
+         4.4.120-92.70        (SLES12 SP2)
+         4.4.180-94.100       (SLES12 SP3)
+         4.4.0-131            (Ubuntu 16.04)
+         4.15.0-32            (Ubuntu 18.04)
+         5.4.0-48             (Ubuntu 20.04)
+         vanilla linux 5.4.0  (ZFS + ldiskfs)
+       * ldiskfs needs an ldiskfs patch series for that kernel, ZFS does not
+       * Client primary kernels built and tested during release cycle:
+         3.10.0-1062.18.1.el7 (RHEL7.7)
+         3.10.0-1127.19.1.el7 (RHEL7.8)
+         3.10.0-1160.6.1.el7  (RHEL7.9)
+         4.12.14-95.48        (SLES12 SP4)
+         4.12.14-122.20       (SLES12 SP5)
+         4.12.14-197.75       (SLES15 SP1)
+         5.3.18-24.24         (SLES15 SP2)
+         4.15.0-48            (Ubuntu 18.04)
+       * Other clients known to build on these kernels at some point (others may also work):
+         3.10.0-862.14.4.el7  (RHEL7.5)
+         3.10.0-957.27.2.el7  (RHEL7.6)
+         4.14.0-49.13.1.el7a  (RHEL7.5)
+         4.14.0-115.2.2.el7a  (RHEL7.6)
+         4.18.0-80.11.2.el8   (RHEL8.0)
+         4.18.0-147.8.1.el8   (RHEL8.1)
+         4.18.0-193.28.1.el8  (RHEL8.2)
+         4.18.0-240.1.1.el8   (RHEL8.3)
+         4.4.120-92.70        (SLES12 SP2)
+         4.4.180-94.100       (SLES12 SP3)
+         4.4.0-131            (Ubuntu 16.04)
+         5.4.0-37             (Ubuntu 20.04)
+         5.4.0                (vanilla kernel.org)
+       * Recommended e2fsprogs version: 1.45.6.wc5 or newer
+       * Recommended ZFS version: 2.0.0
+       * NFS export disabled when stack size < 8192 (32-bit Lustre clients),
+         since the NFSv4 export of Lustre filesystem with 4K stack may cause a
+         stack overflow. For more information, please refer to bugzilla 17630.
+       * NFSv4 reexport to 32-bit NFS client nodes requires Lustre client on
+         the re-exporting nodes to be mounted with "32bitapi" mount option
+
+--------------------------------------------------------------------------------
+
 02-20-2021 Whamcloud
        * version 2.14.0
        * See https://wiki.whamcloud.com/display/PUB/Lustre+Support+Matrix
index b32e670..4d409ae 100644 (file)
@@ -47,6 +47,7 @@ MANFILES =                                    \
        lfs-df.1                                \
        lfs-fid2path.1                          \
        lfs-find.1                              \
+       lfs-flushctx.1                          \
        lfs-getdirstripe.1                      \
        lfs-getname.1                           \
        lfs-getsom.1                            \
@@ -177,7 +178,8 @@ LIBMAN =                                    \
        llapi_rmfid.3                           \
        llapi_search_mdt.3                      \
        llapi_search_ost.3                      \
-       llapi_search_tgt.3
+       llapi_search_tgt.3                      \
+       llapi_unlink_foreign.3
 
 SERVER_MANFILES =                              \
        lctl-barrier.8                          \
index 063f6d9..0cb2b8a 100644 (file)
@@ -119,7 +119,7 @@ File has a foreign (non-Lustre/free format) layout and is of the given
 if specified.  Presently only
 .B none
 or
-.B daos
+.B symlink
 are defined types, though 32-bit numeric types can also be used.
 .TP
 .BR --gid | -g
@@ -365,9 +365,9 @@ Recursively list all mirrored files that have more than 2 mirrors.
 .B $ lfs find ! --mirror-state=ro /mnt/lustre
 Recursively list all out-of-sync mirrored files.
 .TP
-.B $ lfs find ! --foreign=daos /mnt/lustre
+.B $ lfs find ! --foreign=symlink /mnt/lustre
 Recursively list all but foreign files/dirs of
-.B daos
+.B symlink
 type.
 .SH BUGS
 The
index 6f0906a..d61240e 100644 (file)
@@ -94,9 +94,10 @@ Create a directory with a foreign (non-Lustre/free format, see
 option) striping. Where
 .BR type
 specifies a known foreign type (like
-.BR none ,
-.BR daos )
-or a 32-bit numeric type.
+.BR none
+,
+.BR symlink
+, ...) or a 32-bit numeric type.
 .TP
 .BR \-\-flags =\fI<hex>\fR
 Specify a numeric bitmask of type-specific layout flags for the foreign layout.
@@ -149,12 +150,12 @@ This creates a directory striped on two MDTs, whose first stripe is on
 (MDT index 1), and whose hash type is
 .BR all_char .
 .TP
-.B $ lfs mkdir --foreign=daos --xattr PUUID:CUUID /mnt/lustre/dir1
+.B $ lfs mkdir --foreign=symlink --xattr PUUID/CUUID /mnt/lustre/dir1
 This creates
 .B dir1
 with foreign (non-lustre/free format)
-.B PUUID:CUUID
-striping/LMV EA.
+.B PUUID/CUUID
+striping/LMV EA value (symlink type).
 .SH AVAILABILITY
 The
 .B lfs setdirstripe
index 97f74e5..3bbac99 100644 (file)
@@ -133,7 +133,7 @@ file.
 Create a new
 .I file
 with a foreign/non-lustre layout of type
-.I foreign_type \fR(\fBnone\fR, \fBdaos\fR, ...)
+.I foreign_type \fR(\fBnone\fR, \fBsymlink\fR, ...)
 with flags
 .I hex
 and a free-format layout value of
@@ -264,7 +264,7 @@ options).
 Current known types are
 .BR none
 and
-.BR daos
+.BR symlink
 , default is
 .BR none
 .
@@ -533,12 +533,12 @@ which can be created with the
 .B lfs getstripe --yaml
 command.
 .TP
-.B lfs setstripe --foreign=daos --flags=0xda08 \
+.B lfs setstripe --foreign=symlink --flags=0xda08 \
        --xattr=PUUID:CUUID /mnt/lustre/file1
 This creates foreign
 .BR file1
 of type
-.BR daos
+.BR symlink
 with non-lustre/free-format
 .BR PUUID:CUUID
 layout/LOV EA and flags
index 2ca2c42..1e3b9c1 100644 (file)
@@ -127,6 +127,9 @@ lfs \- client utility for Lustre-specific file layout and other attributes
 .br
 .B lfs setstripe --yaml=\fR<\fIyaml_template_file\fR> <\fIfilename\fR>
 .br
+.B lfs unlink_foreign
+.IR path \fR[\fIpath\fR ...]
+.br
 .B lfs --version
 .br
 .B lfs --list-commands
@@ -220,6 +223,9 @@ See lfs-migrate(1).
 .B setstripe
 See lfs-setstripe(1).
 .TP
+.B unlink_foreign
+Remove the foreign files/dirs that are prevented to be using regular unlink/rmdir commands/syscalls. Works also for regular files/dirs.
+.TP
 .B --version
 Output the build version of the lfs utility. Use "lctl lustre_build_version" to get the version of the Lustre kernel modules
 .TP
index be03522..2f647ed 100644 (file)
@@ -73,13 +73,13 @@ int main(int argc, char *argv[])
         if (argc != 2)
                 return -1;
 
-        rc = llapi_file_create_foreign(argv[1], 0600, LOV_FOREIGN_TYPE_DAOS,
-                                      0xda08, "PUUID@CUUID");
+        rc = llapi_file_create_foreign(argv[1], 0600, LOV_FOREIGN_TYPE_SYMLINK,
+                                      0xda05, "PUUID/CUUID");
         if (rc < 0) {
                 fprintf(stderr, "file creation has failed, %s\\n", strerror(-rc));
                 return -1;
         }
-        printf("foreign file %s has been created with 'PUUID@CUUID' layout!\\n",
+        printf("foreign file %s has been created with 'PUUID/CUUID' layout!\\n",
                argv[1]);
         return 0;
 }
diff --git a/lustre/doc/llapi_unlink_foreign.3 b/lustre/doc/llapi_unlink_foreign.3
new file mode 100644 (file)
index 0000000..d49fb12
--- /dev/null
@@ -0,0 +1,80 @@
+.TH lustreapi 3 "2009 Jul 10" The Lustre user application interface library
+.SH NAME
+llapi_unlink_foreign \- unlink file/dir with foreign layout on a Lustre filesystem
+.SH SYNOPSIS
+.nf
+.B #include <lustre/lustreapi.h>
+.sp
+.BI "int llapi_unlink_foreign(char *" name ");"
+.sp
+.fi
+.SH DESCRIPTION
+.LP
+.B llapi_unlink_foreign(\|)
+will allow to unlink a file/dir of
+.I name
+with foreign LOV/LMV, that would be prevented to be using regular unlink/rmdir
+command/syscall.
+It also works for regular file/dir.
+.SH RETURN VALUES
+.LP
+.B llapi_unlink_foreign(\|)
+return:
+.TP
+=0
+on success.
+.TP
+<0
+on failure, the absolute value is an error code.
+.SH ERRORS
+.TP 15
+.SM ENOENT
+.I name
+does not exist.
+.TP
+.SM ENOTTY
+.I name
+may not point to a Lustre filesystem.
+.SH "EXAMPLE"
+.nf
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include <lustre/lustreapi.h>
+
+int main(int argc, char **argv)
+{
+       char *foreign = NULL;
+       int c, rc;
+
+       while ((c = getopt(argc, argv, "hf:")) != -1) {
+               switch (c) {
+               case 'f':
+                       foreign = optarg;
+                       break;
+               case 'h':
+               default:
+                       fprintf(stderr,
+                               "Usage: %s [-f <foreign file/dir pathname>]\n",
+                               argv[0]);
+                       exit(0);
+                       break;
+               }
+       }
+
+       if (foreign == NULL) {
+               fprintf(stderr, "a foreign file/dir pathname must be provided\n");
+               exit(0);
+       }
+
+       rc = llapi_unlink_foreign(foreign);
+       if (rc < 0)
+               fprintf(stderr, "llapi_unlink_foreign() error: %d\n", rc);
+
+       return rc;
+}
+.fi
+.SH "SEE ALSO"
+.BR lustre (7),
+.BR lustreapi (7)
index b959bf1..8ff6c8c 100644 (file)
@@ -394,6 +394,7 @@ int llapi_dir_create_pool(const char *name, int flags, int stripe_offset,
                          int stripe_count, int stripe_pattern,
                          const char *poolname);
 int llapi_direntry_remove(char *dname);
+int llapi_unlink_foreign(char *dname);
 
 int llapi_obd_fstatfs(int fd, __u32 type, __u32 index,
                      struct obd_statfs *stat_buf, struct obd_uuid *uuid_buf);
index a13306e..ac8a3d6 100644 (file)
@@ -618,6 +618,7 @@ struct ll_ioc_lease_id {
 #define LL_IOC_LMV_GETSTRIPE           _IOWR('f', 241, struct lmv_user_md)
 #define LL_IOC_REMOVE_ENTRY            _IOWR('f', 242, __u64)
 #define LL_IOC_RMFID                   _IOR('f', 242, struct fid_array)
+#define LL_IOC_UNLOCK_FOREIGN          _IO('f', 242)
 #define LL_IOC_SET_LEASE               _IOWR('f', 243, struct ll_ioc_lease)
 #define LL_IOC_SET_LEASE_OLD           _IOWR('f', 243, long)
 #define LL_IOC_GET_LEASE               _IO('f', 244)
@@ -1072,7 +1073,7 @@ struct lustre_foreign_type {
  **/
 enum lustre_foreign_types {
        LU_FOREIGN_TYPE_NONE = 0,
-       LU_FOREIGN_TYPE_DAOS = 0xda05,
+       LU_FOREIGN_TYPE_SYMLINK = 0xda05,
        /* must be the max/last one */
        LU_FOREIGN_TYPE_UNKNOWN = 0xffffffff,
 };
@@ -2639,6 +2640,52 @@ struct fid_array {
 };
 #define OBD_MAX_FIDS_IN_ARRAY  4096
 
+/* more types could be defined upon need for more complex
+ * format to be used in foreign symlink LOV/LMV EAs, like
+ * one to describe a delimiter string and occurence number
+ * of delimited sub-string, ...
+ */
+enum ll_foreign_symlink_upcall_item_type {
+       EOB_TYPE = 1,
+       STRING_TYPE = 2,
+       POSLEN_TYPE = 3,
+};
+
+/* may need to be modified to allow for more format items to be defined, and
+ * like for ll_foreign_symlink_upcall_item_type enum
+ */
+struct ll_foreign_symlink_upcall_item {
+       __u32 type;
+       union {
+               struct {
+                       __u32 pos;
+                       __u32 len;
+               };
+               struct {
+                       size_t size;
+                       union {
+                               /* internal storage of constant string */
+                               char *string;
+                               /* upcall stores constant string in a raw */
+                               char bytestring[0];
+                       };
+               };
+       };
+};
+
+#define POSLEN_ITEM_SZ (offsetof(struct ll_foreign_symlink_upcall_item, len) + \
+               sizeof(((struct ll_foreign_symlink_upcall_item *)0)->len))
+#define STRING_ITEM_SZ(sz) ( \
+       offsetof(struct ll_foreign_symlink_upcall_item, bytestring) + \
+       (sz + sizeof(__u32) - 1) / sizeof(__u32) * sizeof(__u32))
+
+/* presently limited to not cause max stack frame size to be reached
+ * because of temporary automatic array of
+ * "struct ll_foreign_symlink_upcall_item" presently used in
+ * foreign_symlink_upcall_info_store()
+ */
+#define MAX_NB_UPCALL_ITEMS 32
+
 #if defined(__cplusplus)
 }
 #endif
index dd82b4d..edc2dd7 100644 (file)
@@ -8,8 +8,10 @@ lustre-objs += lcommon_cl.o
 lustre-objs += lcommon_misc.o
 lustre-objs += vvp_dev.o vvp_page.o vvp_io.o vvp_object.o
 lustre-objs += pcc.o crypto.o
+lustre-objs += llite_foreign.o llite_foreign_symlink.o
 
 EXTRA_DIST := $(lustre-objs:.o=.c) xattr.c rw26.c super25.c
 EXTRA_DIST += llite_internal.h vvp_internal.h pcc.h
+EXTRA_DIST += foreign_symlink.h
 
 @INCLUDE_RULES@
index c299068..c76382f 100644 (file)
@@ -294,12 +294,22 @@ static int ll_revalidate_dentry(struct dentry *dentry,
                return 1;
 
        /* Symlink - always valid as long as the dentry was found */
+       /* only special case is to prevent ELOOP error from VFS during open
+        * of a foreign symlink file/dir with O_NOFOLLOW, like it happens for
+        * real symlinks. This will allow to open foreign symlink file/dir
+        * for get[dir]stripe/unlock ioctl()s.
+        */
 #ifdef HAVE_IOP_GET_LINK
-       if (dentry->d_inode && dentry->d_inode->i_op->get_link)
+       if (dentry->d_inode && dentry->d_inode->i_op->get_link) {
 #else
-       if (dentry->d_inode && dentry->d_inode->i_op->follow_link)
+       if (dentry->d_inode && dentry->d_inode->i_op->follow_link) {
 #endif
-               return 1;
+               if (!S_ISLNK(dentry->d_inode->i_mode) &&
+                   !(lookup_flags & LOOKUP_FOLLOW))
+                       return 0;
+               else
+                       return 1;
+       }
 
        /*
         * VFS warns us that this is the second go around and previous
index dbacb2a..a57a11b 100644 (file)
@@ -1662,6 +1662,16 @@ finish_req:
                return rc;
        }
 
+       case LL_IOC_UNLOCK_FOREIGN:
+               /* if not a foreign symlink do nothing */
+               if (ll_foreign_is_removable(dentry, true)) {
+                       CDEBUG(D_INFO,
+                              "prevent rmdir of non-foreign dir ("DFID")\n",
+                              PFID(ll_inode2fid(inode)));
+                       RETURN(-EOPNOTSUPP);
+               }
+               RETURN(0);
+
        case LL_IOC_REMOVE_ENTRY: {
                char            *filename = NULL;
                int              namelen = 0;
index 2e0ca9e..092c42d 100644 (file)
@@ -2381,12 +2381,12 @@ retry:
                        GOTO(out, rc = PTR_ERR(env));
 
                rc = cl_object_layout_get(env, obj, &cl);
-               if (!rc && cl.cl_is_composite)
+               if (rc >= 0 && cl.cl_is_composite)
                        rc = ll_layout_write_intent(inode, LAYOUT_INTENT_WRITE,
                                                    &ext);
 
                cl_env_put(env, &refcheck);
-               if (rc)
+               if (rc < 0)
                        GOTO(out, rc);
        }
 
@@ -4073,6 +4073,20 @@ out_state:
                        return -EOPNOTSUPP;
                return llcrypt_ioctl_get_key_status(file, (void __user *)arg);
 #endif
+
+       case LL_IOC_UNLOCK_FOREIGN: {
+               struct dentry *dentry = file_dentry(file);
+
+               /* if not a foreign symlink do nothing */
+               if (ll_foreign_is_removable(dentry, true)) {
+                       CDEBUG(D_INFO,
+                              "prevent unlink of non-foreign file ("DFID")\n",
+                              PFID(ll_inode2fid(inode)));
+                       RETURN(-EOPNOTSUPP);
+               }
+               RETURN(0);
+       }
+
        default:
                RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
                                     (void __user *)arg));
@@ -4841,7 +4855,7 @@ static int ll_merge_md_attr(struct inode *inode)
 }
 
 int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask,
-                     unsigned int flags)
+                     unsigned int flags, bool foreign)
 {
        struct inode *inode = de->d_inode;
        struct ll_sb_info *sbi = ll_i2sbi(inode);
@@ -4867,7 +4881,10 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask,
        if (rc < 0)
                RETURN(rc);
 
-       if (S_ISREG(inode->i_mode)) {
+       /* foreign file/dir are always of zero length, so don't
+        * need to validate size.
+        */
+       if (S_ISREG(inode->i_mode) && !foreign) {
                bool cached;
 
                if (!need_glimpse)
@@ -4914,7 +4931,8 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask,
                }
        } else {
                /* If object isn't regular a file then don't validate size. */
-               if (ll_dir_striped(inode)) {
+               /* foreign dir is not striped dir */
+               if (ll_dir_striped(inode) && !foreign) {
                        rc = ll_merge_md_attr(inode);
                        if (rc < 0)
                                RETURN(rc);
@@ -4941,7 +4959,12 @@ fill_attr:
                stat->rdev = inode->i_rdev;
        }
 
-       stat->mode = inode->i_mode;
+       /* foreign symlink to be exposed as a real symlink */
+       if (!foreign)
+               stat->mode = inode->i_mode;
+       else
+               stat->mode = (inode->i_mode & ~S_IFMT) | S_IFLNK;
+
        stat->uid = inode->i_uid;
        stat->gid = inode->i_gid;
        stat->atime = inode->i_atime;
@@ -4990,13 +5013,14 @@ fill_attr:
 int ll_getattr(const struct path *path, struct kstat *stat,
               u32 request_mask, unsigned int flags)
 {
-       return ll_getattr_dentry(path->dentry, stat, request_mask, flags);
+       return ll_getattr_dentry(path->dentry, stat, request_mask, flags,
+                                false);
 }
 #else
 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
 {
        return ll_getattr_dentry(de, stat, STATX_BASIC_STATS,
-                                AT_STATX_SYNC_AS_STAT);
+                                AT_STATX_SYNC_AS_STAT, false);
 }
 #endif
 
@@ -5449,7 +5473,7 @@ int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
 out:
        cl_env_put(env, &refcheck);
 
-       RETURN(rc);
+       RETURN(rc < 0 ? rc : 0);
 }
 
 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
diff --git a/lustre/llite/foreign_symlink.h b/lustre/llite/foreign_symlink.h
new file mode 100644 (file)
index 0000000..a44fa5e
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+
+#ifndef LLITE_FOREIGN_SYMLINK_H
+#define LLITE_FOREIGN_SYMLINK_H
+
+/* llite/llite_foreign_symlink.c */
+#ifdef HAVE_INODEOPS_ENHANCED_GETATTR
+int ll_foreign_symlink_getattr(const struct path *path, struct kstat *stat,
+                              u32 request_mask, unsigned int flags);
+#else
+int ll_foreign_symlink_getattr(struct vfsmount *mnt, struct dentry *de,
+                              struct kstat *stat);
+#endif
+ssize_t foreign_symlink_enable_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf);
+ssize_t foreign_symlink_enable_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count);
+ssize_t foreign_symlink_prefix_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf);
+ssize_t foreign_symlink_prefix_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count);
+ssize_t foreign_symlink_upcall_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf);
+ssize_t foreign_symlink_upcall_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count);
+ssize_t foreign_symlink_upcall_info_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count);
+extern struct inode_operations ll_foreign_file_symlink_inode_operations;
+extern struct inode_operations ll_foreign_dir_symlink_inode_operations;
+
+#endif /* LLITE_FOREIGN_SYMLINK_H */
diff --git a/lustre/llite/llite_foreign.c b/lustre/llite/llite_foreign.c
new file mode 100644 (file)
index 0000000..d231284
--- /dev/null
@@ -0,0 +1,281 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2020 Intel Corporation.
+ */
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "llite_internal.h"
+
+static void ll_manage_foreign_file(struct inode *inode,
+                                  struct lov_foreign_md *lfm)
+{
+       struct ll_sb_info *sbi = ll_i2sbi(inode);
+
+       if (le32_to_cpu(lfm->lfm_type) == LU_FOREIGN_TYPE_SYMLINK) {
+               CDEBUG(D_INFO,
+                      "%s: inode %p of fid "DFID": Foreign file of type symlink, faking a symlink\n",
+                      sbi->ll_fsname, inode, PFID(ll_inode2fid(inode)));
+               /* change inode_operations to add symlink methods, and clear
+                * IOP_NOFOLLOW to ensure file will be treated as a symlink
+                * by Kernel (see in * d_flags_for_inode()).
+                */
+               inode->i_op = &ll_foreign_file_symlink_inode_operations;
+               inode->i_opflags &= ~IOP_NOFOLLOW;
+       } else {
+               CDEBUG(D_INFO,
+                      "%s: inode %p of fid "DFID": Foreign file of type %ux, nothing special to do\n",
+                      sbi->ll_fsname, inode, PFID(ll_inode2fid(inode)),
+                      le32_to_cpu(lfm->lfm_type));
+       }
+}
+
+static void ll_manage_foreign_dir(struct inode *inode,
+                                 struct lmv_foreign_md *lfm)
+{
+       struct ll_sb_info *sbi = ll_i2sbi(inode);
+
+       if (lfm->lfm_type == LU_FOREIGN_TYPE_SYMLINK) {
+               CDEBUG(D_INFO,
+                      "%s: inode %p of fid "DFID": Foreign dir of type symlink, faking a symlink\n",
+                      sbi->ll_fsname, inode, PFID(ll_inode2fid(inode)));
+               /* change inode_operations to add symlink methods
+                * IOP_NOFOLLOW should not be set for dirs
+                */
+               inode->i_op = &ll_foreign_dir_symlink_inode_operations;
+       } else {
+               CDEBUG(D_INFO,
+                      "%s: inode %p of fid "DFID": Foreign dir of type %ux, nothing special to do\n",
+                      sbi->ll_fsname, inode, PFID(ll_inode2fid(inode)),
+                      le32_to_cpu(lfm->lfm_type));
+       }
+}
+
+int ll_manage_foreign(struct inode *inode, struct lustre_md *lmd)
+{
+       int rc = 0;
+
+       ENTRY;
+       /* apply any foreign file/dir policy */
+       if (S_ISREG((inode)->i_mode)) {
+               struct ll_inode_info *lli = ll_i2info(inode);
+               struct cl_object *obj = lli->lli_clob;
+
+               if (lmd->layout.lb_buf != NULL && lmd->layout.lb_len != 0) {
+                       struct lov_foreign_md *lfm = lmd->layout.lb_buf;
+
+                       if (lfm->lfm_magic == LOV_MAGIC_FOREIGN)
+                               ll_manage_foreign_file(inode, lfm);
+                       GOTO(out, rc);
+               }
+
+               if (obj) {
+                       struct lov_foreign_md lfm = {
+                               .lfm_magic = LOV_MAGIC,
+                       };
+                       struct cl_layout cl = {
+                               .cl_buf.lb_buf = &lfm,
+                               .cl_buf.lb_len = sizeof(lfm),
+                       };
+                       struct lu_env *env;
+                       u16 refcheck;
+
+                       env = cl_env_get(&refcheck);
+                       if (IS_ERR(env))
+                               GOTO(out, rc = PTR_ERR(env));
+                       rc = cl_object_layout_get(env, obj, &cl);
+                       /* error is likely to be -ERANGE because of the small
+                        * buffer we use, only the content is significant here
+                        */
+                       if (rc < 0 && rc != -ERANGE) {
+                               cl_env_put(env, &refcheck);
+                               GOTO(out, rc);
+                       }
+                       if (lfm.lfm_magic == LOV_MAGIC_FOREIGN)
+                               ll_manage_foreign_file(inode, &lfm);
+                       cl_env_put(env, &refcheck);
+               }
+       } else if (S_ISDIR((inode)->i_mode)) {
+               if (lmd->lfm != NULL &&
+                   lmd->lfm->lfm_magic == LMV_MAGIC_FOREIGN) {
+                       ll_manage_foreign_dir(inode, lmd->lfm);
+               } else {
+                       struct ll_inode_info *lli = ll_i2info(inode);
+                       struct lmv_foreign_md *lfm;
+
+                       down_read(&lli->lli_lsm_sem);
+                       lfm = (struct lmv_foreign_md *)(lli->lli_lsm_md);
+                       if (lfm &&  lfm->lfm_magic == LMV_MAGIC_FOREIGN)
+                               ll_manage_foreign_dir(inode, lfm);
+                       up_read(&lli->lli_lsm_sem);
+               }
+       }
+out:
+       RETURN(rc);
+}
+
+/* dentry must be spliced to inode (dentry->d_inode != NULL) !!! */
+bool ll_foreign_is_openable(struct dentry *dentry, unsigned int flags)
+{
+       /* check for faked symlink here as they should not be opened (unless
+        * O_NOFOLLOW!) and thus wants ll_atomic_open() to return 1 from
+        * finish_no_open() in order to get follow_link() to be called in both
+        * path_lookupat() and path_openupat().
+        * This will not break regular symlink handling as they have
+        * been treated/filtered upstream.
+        */
+       if (d_is_symlink(dentry) && !S_ISLNK(dentry->d_inode->i_mode) &&
+           !(flags & O_NOFOLLOW))
+               return false;
+
+       return true;
+}
+
+static bool should_preserve_foreign_file(struct lov_foreign_md *lfm,
+                                        struct ll_inode_info *lli, bool unset)
+{
+       /* for now, only avoid foreign fake symlink file removal */
+
+       if (unset)
+               if (lfm->lfm_type == LU_FOREIGN_TYPE_SYMLINK) {
+                       ll_file_set_flag(lli, LLIF_FOREIGN_REMOVABLE);
+                       return true;
+               } else {
+                       return false;
+               }
+       else
+               return lfm->lfm_type == LU_FOREIGN_TYPE_SYMLINK &&
+                      !ll_file_test_flag(lli, LLIF_FOREIGN_REMOVABLE);
+}
+
+static bool should_preserve_foreign_dir(struct lmv_foreign_md *lfm,
+                                       struct ll_inode_info *lli, bool unset)
+{
+       /* for now, only avoid foreign fake symlink dir removal */
+
+       if (unset)
+               if (lfm->lfm_type == LU_FOREIGN_TYPE_SYMLINK) {
+                       ll_file_set_flag(lli, LLIF_FOREIGN_REMOVABLE);
+                       return true;
+               } else {
+                       return false;
+               }
+       else
+               return lfm->lfm_type == LU_FOREIGN_TYPE_SYMLINK &&
+                      !ll_file_test_flag(lli, LLIF_FOREIGN_REMOVABLE);
+}
+
+/* XXX
+ * instead of fetching type from foreign LOV/LMV, we may simply
+ * check (d_is_symlink(dentry) && !S_ISLNK(dentry->d_inode->i_mode))
+ * to identify a fake symlink
+ */
+bool ll_foreign_is_removable(struct dentry *dentry, bool unset)
+{
+       struct inode *inode = dentry->d_inode;
+       struct qstr *name = &dentry->d_name;
+       bool preserve_foreign = false;
+       int rc = 0;
+
+       ENTRY;
+       if (inode == NULL)
+               return 0;
+
+       /* some foreign types may not be allowed to be unlinked in order to
+        * keep references with external objects
+        */
+       if (S_ISREG(inode->i_mode)) {
+               struct ll_inode_info *lli = ll_i2info(inode);
+               struct cl_object *obj = lli->lli_clob;
+
+               if (obj) {
+                       struct lov_foreign_md lfm = {
+                               .lfm_magic = LOV_MAGIC,
+                       };
+                       struct cl_layout cl = {
+                               .cl_buf.lb_buf = &lfm,
+                               .cl_buf.lb_len = sizeof(lfm),
+                       };
+                       struct lu_env *env;
+                       u16 refcheck;
+
+                       env = cl_env_get(&refcheck);
+                       if (IS_ERR(env))
+                               GOTO(out, rc = PTR_ERR(env));
+                       rc = cl_object_layout_get(env, obj, &cl);
+                       /* error is likely to be -ERANGE because of the small
+                        * buffer we use, only the content is significant here
+                        */
+                       if (rc < 0 && rc != -ERANGE) {
+                               cl_env_put(env, &refcheck);
+                               goto out;
+                       } else {
+                               rc = 0;
+                       }
+                       if (lfm.lfm_magic == LOV_MAGIC_FOREIGN)
+                               preserve_foreign =
+                                       should_preserve_foreign_file(&lfm, lli,
+                                                                    unset);
+                       cl_env_put(env, &refcheck);
+                       if (preserve_foreign) {
+                               CDEBUG(D_INFO,
+                                      "%s unlink of foreign file (%.*s, "DFID")\n",
+                                      unset ? "allow" : "prevent",
+                                      name->len, name->name,
+                                      PFID(ll_inode2fid(inode)));
+                               RETURN(false);
+                       }
+               } else {
+                       CDEBUG(D_INFO,
+                              "unable to check if file (%.*s, "DFID") is foreign...\n",
+                              name->len, name->name,
+                              PFID(ll_inode2fid(inode)));
+                       /* XXX should we prevent removal ?? */
+               }
+       } else if (S_ISDIR(inode->i_mode)) {
+               struct ll_inode_info *lli = ll_i2info(inode);
+               struct lmv_foreign_md *lfm;
+
+               down_read(&lli->lli_lsm_sem);
+               lfm = (struct lmv_foreign_md *)(lli->lli_lsm_md);
+               if (!lfm)
+                       CDEBUG(D_INFO,
+                              "unable to check if dir (%.*s, "DFID") is foreign...\n",
+                              name->len, name->name,
+                              PFID(ll_inode2fid(inode)));
+               else if (lfm->lfm_magic == LMV_MAGIC_FOREIGN)
+                       preserve_foreign = should_preserve_foreign_dir(lfm, lli,
+                                                                      unset);
+               up_read(&lli->lli_lsm_sem);
+               if (preserve_foreign) {
+                       CDEBUG(D_INFO,
+                              "%s unlink of foreign dir (%.*s, "DFID")\n",
+                              unset ? "allow" : "prevent",
+                              name->len, name->name,
+                              PFID(ll_inode2fid(inode)));
+                       RETURN(false);
+               }
+       }
+
+out:
+       RETURN(true);
+}
diff --git a/lustre/llite/llite_foreign_symlink.c b/lustre/llite/llite_foreign_symlink.c
new file mode 100644 (file)
index 0000000..c36f363
--- /dev/null
@@ -0,0 +1,857 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2020 Intel Corporation.
+ */
+/*
+ * Foreign symlink implementation.
+ *
+ * Methods in this source file allow to construct a relative path from the
+ * LOV/LMV foreign content, to complement it with a prefix, and then to
+ * expose it to the VFS as a symlink destination.
+ * The default/internal mechanism simply takes the full foreign free string
+ * as the relative path, and for more complex internal formats an upcall has
+ * been implemented to provide format's details (presently just in terms of
+ * constant strings and substrings positions in EA, but this can be enhanced)
+ * to llite layer.
+ */
+
+#include <linux/fs.h>
+#include <linux/fs_struct.h>
+#include <linux/mm.h>
+#include <linux/stat.h>
+#include <linux/version.h>
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include "llite_internal.h"
+
+/* allocate space for "/<prefix>/<suffix>'\0'" and copy prefix in,
+ * returns start position for suffix in *destname
+ * must be called with ll_foreign_symlink_sem locked for read, to
+ * protect against sbi->ll_foreign_symlink_prefix change
+ * on output, provides position where to start prefix complement
+ */
+static int foreign_symlink_alloc_and_copy_prefix(struct ll_sb_info *sbi,
+                                                struct inode *inode,
+                                                char **destname,
+                                                size_t suffix_size)
+{
+       size_t prefix_size, full_size;
+
+       ENTRY;
+
+       /* allocate enough for "/<prefix>/<suffix>'\0'" */
+       prefix_size = sbi->ll_foreign_symlink_prefix_size - 1;
+       full_size = suffix_size + prefix_size + 3;
+       if (full_size > PATH_MAX) {
+               CERROR("%s: inode "DFID": resolved destination path too long\n",
+                      sbi->ll_fsname, PFID(ll_inode2fid(inode)));
+               RETURN(-EINVAL);
+       }
+       OBD_ALLOC(*destname, full_size);
+       if (*destname == NULL)
+               RETURN(-ENOMEM);
+
+       memcpy(*destname + 1, sbi->ll_foreign_symlink_prefix,
+              prefix_size);
+       (*destname)[0] = '/';
+       (*destname)[prefix_size + 1] = '/';
+
+       RETURN(prefix_size + 2);
+}
+
+/* if no upcall registered, default foreign symlink parsing method
+ * is to use the full lfm_value as a relative path to complement
+ * foreign_prefix
+ */
+static int ll_foreign_symlink_default_parse(struct ll_sb_info *sbi,
+                                           struct inode *inode,
+                                           struct lov_foreign_md *lfm,
+                                           char **destname)
+{
+       int suffix_pos;
+
+       down_read(&sbi->ll_foreign_symlink_sem);
+       suffix_pos = foreign_symlink_alloc_and_copy_prefix(sbi, inode,
+                                                          destname,
+                                                          lfm->lfm_length);
+       up_read(&sbi->ll_foreign_symlink_sem);
+
+       if (suffix_pos < 0)
+               RETURN(suffix_pos);
+
+       memcpy(*destname + suffix_pos, lfm->lfm_value,
+              lfm->lfm_length);
+       (*destname)[suffix_pos + lfm->lfm_length] = '\0';
+
+       RETURN(0);
+}
+
+/* if an upcall has been registered, foreign symlink will be
+ * constructed as per upcall provided format
+ * presently we only support a serie of constant strings and sub-strings
+ * to be taken from lfm_value content
+ */
+static int ll_foreign_symlink_upcall_parse(struct ll_sb_info *sbi,
+                                          struct inode *inode,
+                                          struct lov_foreign_md *lfm,
+                                          char **destname)
+{
+       int pos = 0, suffix_pos = -1, items_size = 0;
+       struct ll_foreign_symlink_upcall_item *foreign_symlink_items =
+                       sbi->ll_foreign_symlink_upcall_items;
+       int i = 0, rc = 0;
+
+       ENTRY;
+
+       down_read(&sbi->ll_foreign_symlink_sem);
+
+       /* compute size of relative path of destination path
+        * could be done once during upcall items/infos reading
+        * and stored as new ll_sb_info field
+        */
+       for (i = 0; i < sbi->ll_foreign_symlink_upcall_nb_items; i++) {
+               switch (foreign_symlink_items[i].type) {
+               case STRING_TYPE:
+                       items_size += foreign_symlink_items[i].size;
+                       break;
+               case POSLEN_TYPE:
+                       items_size += foreign_symlink_items[i].len;
+                       break;
+               case EOB_TYPE:
+                       /* should be the last item */
+                       break;
+               default:
+                       CERROR("%s: unexpected type '%u' found in items\n",
+                              sbi->ll_fsname, foreign_symlink_items[i].type);
+                       GOTO(failed, rc = -EINVAL);
+               }
+       }
+
+       suffix_pos = foreign_symlink_alloc_and_copy_prefix(sbi, inode, destname,
+                                                          items_size);
+       if (suffix_pos < 0)
+               GOTO(failed, rc = suffix_pos);
+
+       /* rescan foreign_symlink_items[] to create faked symlink dest path */
+       i = 0;
+       while (foreign_symlink_items[i].type != EOB_TYPE) {
+               if (foreign_symlink_items[i].type == STRING_TYPE) {
+                       memcpy(*destname + suffix_pos + pos,
+                              foreign_symlink_items[i].string,
+                              foreign_symlink_items[i].size);
+                       pos += foreign_symlink_items[i].size;
+               } else if (foreign_symlink_items[i].type == POSLEN_TYPE) {
+                       if (lfm->lfm_length < foreign_symlink_items[i].pos +
+                                             foreign_symlink_items[i].len) {
+                               CERROR("%s:  "DFID" foreign EA too short to find (%u,%u) item\n",
+                                      sbi->ll_fsname,
+                                      PFID(ll_inode2fid(inode)),
+                                      foreign_symlink_items[i].pos,
+                                      foreign_symlink_items[i].len);
+                               GOTO(failed, rc = -EINVAL);
+                       }
+                       memcpy(*destname + suffix_pos + pos,
+                              lfm->lfm_value + foreign_symlink_items[i].pos,
+                              foreign_symlink_items[i].len);
+                       pos += foreign_symlink_items[i].len;
+               } else {
+                       CERROR("%s: unexpected type '%u' found in items\n",
+                              sbi->ll_fsname, foreign_symlink_items[i].type);
+                       GOTO(failed, rc = -EINVAL);
+               }
+               i++;
+       }
+failed:
+       up_read(&sbi->ll_foreign_symlink_sem);
+
+       if (rc != 0 && suffix_pos >= 0) {
+               OBD_FREE_LARGE(*destname, suffix_pos + items_size);
+               *destname = NULL;
+       }
+
+       RETURN(rc);
+}
+
+static int ll_foreign_symlink_parse(struct ll_sb_info *sbi,
+                                   struct inode *inode,
+                                   struct lov_foreign_md *lfm,
+                                   char **destname)
+{
+       int rc;
+
+       /* if no user-land upcall registered, assuming whole free field
+        * of foreign LOV is relative path of faked symlink destination,
+        * to be completed by prefix
+        */
+       if (!(sbi->ll_flags & LL_SBI_FOREIGN_SYMLINK_UPCALL))
+               rc = ll_foreign_symlink_default_parse(sbi, inode, lfm,
+                                                     destname);
+       else /* upcall is available */
+               rc = ll_foreign_symlink_upcall_parse(sbi, inode, lfm,
+                                                    destname);
+       return rc;
+}
+
+/* Don't need lli_size_mutex locked as LOV/LMV are EAs
+ * and should not be stored in data blocks
+ */
+static int ll_foreign_readlink_internal(struct inode *inode, char **symname)
+{
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct ll_sb_info *sbi = ll_i2sbi(inode);
+       struct lov_foreign_md *lfm = NULL;
+       char *destname = NULL;
+       size_t lfm_size = 0;
+       int rc;
+
+       ENTRY;
+
+       if (S_ISREG(inode->i_mode)) {
+               struct cl_object *obj = lli->lli_clob;
+               struct cl_layout cl = {
+                       .cl_buf.lb_len = 0, /* to get real size */
+               };
+               struct lu_env *env;
+               u16 refcheck;
+
+               if (!obj) {
+                       CERROR("%s: inode "DFID": can not get layout, no cl_object\n",
+                              sbi->ll_fsname, PFID(ll_inode2fid(inode)));
+                       GOTO(failed, rc = -EINVAL);
+               }
+
+               env = cl_env_get(&refcheck);
+               if (IS_ERR(env))
+                       RETURN(PTR_ERR(env));
+               /* get layout size */
+               rc = cl_object_layout_get(env, obj, &cl);
+               if (rc <= 0) {
+                       CERROR("%s: inode "DFID": error trying to get layout size : %d\n",
+                              sbi->ll_fsname, PFID(ll_inode2fid(inode)), rc);
+                       cl_env_put(env, &refcheck);
+                       RETURN(rc);
+               }
+               OBD_ALLOC(lfm, rc);
+               if (!lfm) {
+                       CERROR("%s: inode "DFID": can not allocate enough mem to get layout\n",
+                              sbi->ll_fsname, PFID(ll_inode2fid(inode)));
+                       cl_env_put(env, &refcheck);
+                       RETURN(-ENOMEM);
+               }
+               cl.cl_buf.lb_len = rc;
+               cl.cl_buf.lb_buf = lfm;
+               /* get layout */
+               rc = cl_object_layout_get(env, obj, &cl);
+               if (rc <= 0) {
+                       CERROR("%s: inode "DFID": error trying to get layout : %d\n",
+                              sbi->ll_fsname, PFID(ll_inode2fid(inode)), rc);
+                       OBD_FREE(lfm, cl.cl_buf.lb_len);
+                       cl_env_put(env, &refcheck);
+                       RETURN(rc);
+               }
+               lfm_size = cl.cl_buf.lb_len;
+               cl_env_put(env, &refcheck);
+       } else if (S_ISDIR(inode->i_mode)) {
+               down_read(&lli->lli_lsm_sem);
+
+               /* should be casted lmv_foreign_md, but it is ok as both foreign LOV
+                * and LMV formats are identical, and then we also only need
+                * one set of parsing routines for both foreign files and dirs!
+                */
+               lfm = (struct lov_foreign_md *)(lli->lli_lsm_md);
+               if (lfm != NULL) {
+                       CDEBUG(D_INFO, "%s: inode "DFID": LMV cached found\n",
+                              sbi->ll_fsname, PFID(ll_inode2fid(inode)));
+               } else {
+                       CERROR("%s: inode "DFID": cannot get layout, no LMV cached\n",
+                              sbi->ll_fsname, PFID(ll_inode2fid(inode)));
+                       GOTO(failed, rc = -EINVAL);
+               }
+       } else {
+               CERROR("%s: inode "DFID": not a regular file nor directory\n",
+                      sbi->ll_fsname, PFID(ll_inode2fid(inode)));
+               GOTO(failed, rc = -EINVAL);
+       }
+
+       /* XXX no assert nor double check of magic, length and type ? */
+
+       rc = ll_foreign_symlink_parse(sbi, inode, lfm, &destname);
+
+failed:
+       if (S_ISDIR(inode->i_mode))
+               up_read(&lli->lli_lsm_sem);
+
+       if (S_ISREG(inode->i_mode) && lfm)
+               OBD_FREE(lfm, lfm_size);
+
+       if (!rc) {
+               *symname = destname;
+               CDEBUG(D_INFO,
+                      "%s: inode "DFID": faking symlink to dest '%s'\n",
+                      sbi->ll_fsname, PFID(ll_inode2fid(inode)), destname);
+       }
+
+       RETURN(rc);
+}
+
+#ifdef HAVE_SYMLINK_OPS_USE_NAMEIDATA
+static void ll_foreign_put_link(struct dentry *dentry,
+                       struct nameidata *nd, void *cookie)
+#else
+# ifdef HAVE_IOP_GET_LINK
+static void ll_foreign_put_link(void *cookie)
+# else
+static void ll_foreign_put_link(struct inode *unused, void *cookie)
+# endif
+#endif
+{
+       /* to avoid allocating an unnecessary big buffer, and since ways to
+        * build the symlink path from foreign LOV/LMV can be multiple and
+        * not constant. So it size is not known and we need to use
+        * strlen(cookie)+1 to determine its size and to avoid false positive
+        * to be reported by memory leak check code
+        */
+       OBD_FREE_LARGE(cookie, strlen(cookie) + 1);
+}
+
+#ifdef HAVE_SYMLINK_OPS_USE_NAMEIDATA
+static void *ll_foreign_follow_link(struct dentry *dentry,
+                                     struct nameidata *nd)
+{
+       struct inode *inode = dentry->d_inode;
+       int rc;
+       char *symname = NULL;
+
+       ENTRY;
+
+       CDEBUG(D_VFSTRACE, "VFS Op\n");
+       /*
+        * Limit the recursive symlink depth to 5 instead of default
+        * 8 links when kernel has 4k stack to prevent stack overflow.
+        * For 8k stacks we need to limit it to 7 for local servers.
+        */
+       if (THREAD_SIZE < 8192 && current->link_count >= 6)
+               rc = -ELOOP;
+       else if (THREAD_SIZE == 8192 && current->link_count >= 8)
+               rc = -ELOOP;
+       else
+               rc = ll_foreign_readlink_internal(inode, &symname);
+
+       if (rc)
+               symname = ERR_PTR(rc);
+
+       nd_set_link(nd, symname);
+       RETURN(symname);
+}
+
+#elif defined(HAVE_IOP_GET_LINK)
+static const char *ll_foreign_get_link(struct dentry *dentry,
+                                      struct inode *inode,
+                                      struct delayed_call *done)
+{
+       char *symname = NULL;
+       int rc;
+
+       ENTRY;
+       CDEBUG(D_VFSTRACE, "VFS Op\n");
+       if (!dentry)
+               RETURN(ERR_PTR(-ECHILD));
+       rc = ll_foreign_readlink_internal(inode, &symname);
+
+       /*
+        * symname must be freed when we are done
+        *
+        * XXX we may avoid the need to do so if we use
+        * lli_symlink_name cache to retain symname and
+        * let ll_clear_inode free it...
+        */
+       set_delayed_call(done, ll_foreign_put_link, symname);
+       RETURN(rc ? ERR_PTR(rc) : symname);
+}
+
+# else /* !HAVE_IOP_GET_LINK */
+static const char *ll_foreign_follow_link(struct dentry *dentry,
+                                           void **cookie)
+{
+       struct inode *inode = d_inode(dentry);
+       char *symname = NULL;
+       int rc;
+
+       ENTRY;
+
+       CDEBUG(D_VFSTRACE, "VFS Op\n");
+       rc = ll_foreign_readlink_internal(inode, &symname);
+       if (rc < 0)
+               return ERR_PTR(rc);
+
+       /* XXX need to also return symname in cookie in order to delay
+        * its release ??
+        */
+
+       RETURN(symname);
+}
+
+#endif /* HAVE_SYMLINK_OPS_USE_NAMEIDATA, HAVE_IOP_GET_LINK */
+
+/*
+ * Should only be called for already in-use/cache foreign dir inode
+ * when foreign fake-symlink behaviour has been enabled afterward
+ */
+static struct dentry *ll_foreign_dir_lookup(struct inode *parent,
+                                        struct dentry *dentry,
+                                        unsigned int flags)
+{
+       CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s, dir="DFID"(%p)\n",
+              dentry->d_name.len, dentry->d_name.name,
+              PFID(ll_inode2fid(parent)), parent);
+
+       return ERR_PTR(-ENODATA);
+}
+
+static bool has_same_mount_namespace(struct ll_sb_info *sbi)
+{
+       int rc;
+
+       rc = (sbi->ll_mnt.mnt == current->fs->root.mnt);
+       if (!rc)
+               LCONSOLE_WARN("%s: client mount %s and '%s.%d' not in same mnt-namespace\n",
+                             sbi->ll_fsname, sbi->ll_kset.kobj.name,
+                             current->comm, current->pid);
+
+       return rc;
+}
+
+ssize_t foreign_symlink_enable_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%d\n",
+                       !!(sbi->ll_flags & LL_SBI_FOREIGN_SYMLINK));
+}
+
+/*
+ * XXX
+ * There should be already in-use/cached inodes of foreign files/dirs who
+ * will not-be/continue-to-be handled as fake-symlink, depending if
+ * feature is being enabled/disabled, until being revalidated.
+ * Also, does it require sbi->ll_lock protection ?
+ */
+ssize_t foreign_symlink_enable_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       unsigned int val;
+       int rc;
+
+       if (!has_same_mount_namespace(sbi))
+               return -EINVAL;
+
+       rc = kstrtouint(buffer, 10, &val);
+       if (rc)
+               return rc;
+
+       if (val)
+               sbi->ll_flags |= LL_SBI_FOREIGN_SYMLINK;
+       else
+               sbi->ll_flags &= ~LL_SBI_FOREIGN_SYMLINK;
+
+       return count;
+}
+
+ssize_t foreign_symlink_prefix_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       ssize_t size;
+
+       down_read(&sbi->ll_foreign_symlink_sem);
+       size = snprintf(buf, PAGE_SIZE, "%s\n", sbi->ll_foreign_symlink_prefix);
+       up_read(&sbi->ll_foreign_symlink_sem);
+
+       return size;
+}
+
+ssize_t foreign_symlink_prefix_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       char *new, *old;
+       size_t new_len, old_len;
+
+       if (!has_same_mount_namespace(sbi))
+               return -EINVAL;
+
+       /* XXX strip buffer of any CR/LF,space,... ?? */
+
+       /* check buffer looks like a valid absolute path */
+       if (*buffer != '/') {
+               CERROR("foreign symlink prefix must be an absolute path\n");
+               return -EINVAL;
+       }
+       new_len = strnlen(buffer, count);
+       if (new_len < count)
+               CDEBUG(D_INFO, "NUL byte found in %zu bytes\n", count);
+       if (new_len > PATH_MAX) {
+               CERROR("%s: foreign symlink prefix length %zu > PATH_MAX\n",
+                      sbi->ll_fsname, new_len);
+               return -EINVAL;
+       }
+       OBD_ALLOC(new, new_len + 1);
+       if (new == NULL) {
+               CERROR("%s: can not allocate space for foreign path prefix\n",
+                      sbi->ll_fsname);
+               return -ENOSPC;
+       }
+
+       down_write(&sbi->ll_foreign_symlink_sem);
+       old_len = sbi->ll_foreign_symlink_prefix_size;
+       old = sbi->ll_foreign_symlink_prefix;
+       memcpy(new, buffer, new_len);
+       *(new + new_len) = '\0';
+
+       sbi->ll_foreign_symlink_prefix = new;
+       sbi->ll_foreign_symlink_prefix_size = new_len + 1;
+       up_write(&sbi->ll_foreign_symlink_sem);
+
+       if (old)
+               OBD_FREE(old, old_len);
+
+       return new_len;
+}
+
+ssize_t foreign_symlink_upcall_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf)
+{
+       ssize_t size;
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+
+       down_read(&sbi->ll_foreign_symlink_sem);
+       size = snprintf(buf, PAGE_SIZE, "%s\n", sbi->ll_foreign_symlink_upcall);
+       up_read(&sbi->ll_foreign_symlink_sem);
+
+       return size;
+}
+
+ssize_t foreign_symlink_upcall_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       char *old = NULL, *new = NULL;
+       size_t new_len;
+
+       if (!has_same_mount_namespace(sbi))
+               return -EINVAL;
+
+       /* XXX strip buffer of any CR/LF,space,... ?? */
+
+       /* check buffer looks like a valid absolute path */
+       if (*buffer != '/' && strcmp(buffer, "none")) {
+               CERROR("foreign symlink upcall must be an absolute path\n");
+               return -EINVAL;
+       }
+       new_len = strnlen(buffer, count);
+       if (new_len < count)
+               CDEBUG(D_INFO, "NULL byte found in %zu bytes\n", count);
+       if (new_len > PATH_MAX) {
+               CERROR("%s: foreign symlink upcall path length %zu > PATH_MAX\n",
+                      sbi->ll_fsname, new_len);
+               return -EINVAL;
+       }
+
+       OBD_ALLOC(new, new_len + 1);
+       if (new == NULL) {
+               CERROR("%s: can not allocate space for foreign symlink upcall path\n",
+                      sbi->ll_fsname);
+               return -ENOSPC;
+       }
+       memcpy(new, buffer, new_len);
+       *(new + new_len) = '\0';
+
+       down_write(&sbi->ll_foreign_symlink_sem);
+       old = sbi->ll_foreign_symlink_upcall;
+
+       sbi->ll_foreign_symlink_upcall = new;
+       /* LL_SBI_FOREIGN_SYMLINK_UPCALL will be set by
+        * foreign_symlink_upcall_info_store() upon valid being provided
+        * by upcall
+        * XXX there is a potential race if there are multiple concurent
+        * attempts to set upcall path and execution occur in different
+        * order, we may end up using the format provided by a different
+        * upcall than the one set in ll_foreign_symlink_upcall
+        */
+       sbi->ll_flags &= ~LL_SBI_FOREIGN_SYMLINK_UPCALL;
+       up_write(&sbi->ll_foreign_symlink_sem);
+
+       if (strcmp(new, "none")) {
+               char *argv[] = {
+                         [0] = new,
+                         /* sbi sysfs object name */
+                         [1] = (char *)sbi->ll_kset.kobj.name,
+                         [2] = NULL
+               };
+               char *envp[] = {
+                         [0] = "HOME=/",
+                         [1] = "PATH=/sbin:/usr/sbin",
+                         [2] = NULL
+               };
+               int rc;
+
+               rc = call_usermodehelper(new, argv, envp, UMH_WAIT_EXEC);
+               if (rc < 0)
+                       CERROR("%s: error invoking foreign symlink upcall %s: rc %d\n",
+                              sbi->ll_fsname, new, rc);
+               else
+                       CDEBUG(D_INFO, "%s: invoked upcall %s\n",
+                              sbi->ll_fsname, new);
+       }
+
+       if (old)
+               OBD_FREE_LARGE(old, strlen(old) + 1);
+
+       return new_len;
+}
+
+/* foreign_symlink_upcall_info_store() stores format items in
+ * foreign_symlink_items[], and foreign_symlink_upcall_parse()
+ * uses it to parse each foreign symlink LOV/LMV EAs
+ */
+ssize_t foreign_symlink_upcall_info_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count)
+{
+       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+                                             ll_kset.kobj);
+       struct ll_foreign_symlink_upcall_item items[MAX_NB_UPCALL_ITEMS], *item;
+       struct ll_foreign_symlink_upcall_item *new_items, *old_items;
+       size_t remaining = count;
+       int nb_items = 0, old_nb_items, i, rc = 0;
+
+       ENTRY;
+
+       if (!has_same_mount_namespace(sbi))
+               return -EINVAL;
+
+       /* parse buffer to check validity of infos and fill symlink format
+        * descriptors
+        */
+
+       if (count % sizeof(__u32) != 0) {
+               CERROR("%s: invalid size '%zu' of infos buffer returned by foreign symlink upcall\n",
+                      sbi->ll_fsname, count);
+               RETURN(-EINVAL);
+       }
+
+       /* evaluate number of items provided */
+       while (remaining > 0) {
+               item = (struct ll_foreign_symlink_upcall_item *)
+                               &buffer[count - remaining];
+               switch (item->type) {
+               case STRING_TYPE: {
+                       /* a constant string following */
+                       if (item->size >= remaining -
+                           offsetof(struct ll_foreign_symlink_upcall_item,
+                                    bytestring) - sizeof(item->type)) {
+                               /* size of string must not overflow remaining
+                                * bytes minus EOB_TYPE item
+                                */
+                               CERROR("%s: constant string too long in infos buffer returned by foreign symlink upcall\n",
+                                      sbi->ll_fsname);
+                               GOTO(failed, rc = -EINVAL);
+                       }
+                       OBD_ALLOC(items[nb_items].string,
+                                 item->size);
+                       if (items[nb_items].string == NULL) {
+                               CERROR("%s: constant string allocation has failed for constant string of size %zu\n",
+                                      sbi->ll_fsname, item->size);
+                               GOTO(failed, rc = -ENOMEM);
+                       }
+                       memcpy(items[nb_items].string,
+                              item->bytestring, item->size);
+                       items[nb_items].size = item->size;
+                       /* string items to fit on __u32 boundary */
+                       remaining = remaining - STRING_ITEM_SZ(item->size);
+                       break;
+               }
+               case POSLEN_TYPE: {
+                       /* a tuple (pos,len) following to delimit a sub-string
+                        * in lfm_value
+                        */
+                       items[nb_items].pos = item->pos;
+                       items[nb_items].len = item->len;
+                       remaining -= POSLEN_ITEM_SZ;
+                       break;
+               }
+               case EOB_TYPE:
+                       if (remaining != sizeof(item->type)) {
+                               CERROR("%s: early end of infos buffer returned by foreign symlink upcall\n",
+                                      sbi->ll_fsname);
+                               GOTO(failed, rc = -EINVAL);
+                       }
+                       remaining -= sizeof(item->type);
+                       break;
+               default:
+                       CERROR("%s: wrong type '%u' encountered at pos %zu , with %zu remaining bytes, in infos buffer returned by foreign symlink upcall\n",
+                              sbi->ll_fsname, (__u32)buffer[count - remaining],
+                              count - remaining, remaining);
+                       GOTO(failed, rc = -EINVAL);
+               }
+
+               items[nb_items].type = item->type;
+               nb_items++;
+               if (nb_items >= MAX_NB_UPCALL_ITEMS) {
+                       CERROR("%s: too many items in infos buffer returned by foreign symlink upcall\n",
+                              sbi->ll_fsname);
+                       GOTO(failed, rc = -EINVAL);
+               }
+       }
+       /* valid format has been provided by foreign symlink user upcall */
+       OBD_ALLOC_LARGE(new_items, nb_items *
+                       sizeof(struct ll_foreign_symlink_upcall_item));
+       if (new_items == NULL) {
+               CERROR("%s: constant string allocation has failed for constant string of size %zu\n",
+                      sbi->ll_fsname, nb_items *
+                       sizeof(struct ll_foreign_symlink_upcall_item));
+               GOTO(failed, rc = -ENOMEM);
+       }
+       for (i = 0; i < nb_items; i++)
+               *((struct ll_foreign_symlink_upcall_item *)new_items + i) =
+                       items[i];
+
+       down_write(&sbi->ll_foreign_symlink_sem);
+       old_items = sbi->ll_foreign_symlink_upcall_items;
+       old_nb_items = sbi->ll_foreign_symlink_upcall_nb_items;
+       sbi->ll_foreign_symlink_upcall_items = new_items;
+       sbi->ll_foreign_symlink_upcall_nb_items = nb_items;
+       sbi->ll_flags |= LL_SBI_FOREIGN_SYMLINK_UPCALL;
+       up_write(&sbi->ll_foreign_symlink_sem);
+
+       /* free old_items */
+       if (old_items != NULL) {
+               for (i = 0 ; i < old_nb_items; i++)
+                       if (old_items[i].type == STRING_TYPE)
+                               OBD_FREE(old_items[i].string,
+                                        old_items[i].size);
+
+               OBD_FREE_LARGE(old_items, old_nb_items *
+                              sizeof(struct ll_foreign_symlink_upcall_item));
+       }
+
+failed:
+       /* clean items[] and free any strings */
+       if (rc != 0) {
+               for (i = 0; i < nb_items; i++) {
+                       switch (items[i].type) {
+                       case STRING_TYPE:
+                               OBD_FREE(items[i].string, items[i].size);
+                               items[i].string = NULL;
+                               items[i].size = 0;
+                               break;
+                       case POSLEN_TYPE:
+                               items[i].pos = 0;
+                               items[i].len = 0;
+                               break;
+                       case EOB_TYPE:
+                               break;
+                       default:
+                               CERROR("%s: wrong '%u'type encountered in foreign symlink upcall items\n",
+                                      sbi->ll_fsname, items[i].type);
+                               GOTO(failed, rc = -EINVAL);
+                               break;
+                       }
+                       items[i].type = 0;
+               }
+       }
+
+       RETURN(rc == 0 ? count : rc);
+}
+
+struct inode_operations ll_foreign_file_symlink_inode_operations = {
+#ifdef HAVE_IOP_GENERIC_READLINK
+       .readlink       = generic_readlink,
+#endif
+       .setattr        = ll_setattr,
+#ifdef HAVE_IOP_GET_LINK
+       .get_link       = ll_foreign_get_link,
+#else
+       .follow_link    = ll_foreign_follow_link,
+       /* .put_link method required since need to release symlink copy buf */
+       .put_link       = ll_foreign_put_link,
+#endif
+       .getattr        = ll_foreign_symlink_getattr,
+       .permission     = ll_inode_permission,
+#ifdef HAVE_IOP_XATTR
+       .setxattr       = ll_setxattr,
+       .getxattr       = ll_getxattr,
+       .removexattr    = ll_removexattr,
+#endif
+       .listxattr      = ll_listxattr,
+};
+
+struct inode_operations ll_foreign_dir_symlink_inode_operations = {
+       .lookup         = ll_foreign_dir_lookup,
+#ifdef HAVE_IOP_GENERIC_READLINK
+       .readlink       = generic_readlink,
+#endif
+       .setattr        = ll_setattr,
+#ifdef HAVE_IOP_GET_LINK
+       .get_link       = ll_foreign_get_link,
+#else
+       .follow_link    = ll_foreign_follow_link,
+       .put_link       = ll_foreign_put_link,
+#endif
+       .getattr        = ll_foreign_symlink_getattr,
+       .permission     = ll_inode_permission,
+#ifdef HAVE_IOP_XATTR
+       .setxattr       = ll_setxattr,
+       .getxattr       = ll_getxattr,
+       .removexattr    = ll_removexattr,
+#endif
+       .listxattr      = ll_listxattr,
+};
+
+/* foreign fake-symlink version of ll_getattr() */
+#ifdef HAVE_INODEOPS_ENHANCED_GETATTR
+int ll_foreign_symlink_getattr(const struct path *path, struct kstat *stat,
+                              u32 request_mask, unsigned int flags)
+{
+       return ll_getattr_dentry(path->dentry, stat, request_mask, flags,
+                                true);
+}
+#else
+int ll_foreign_symlink_getattr(struct vfsmount *mnt, struct dentry *de,
+                              struct kstat *stat)
+{
+       return ll_getattr_dentry(de, stat, STATX_BASIC_STATS,
+                                AT_STATX_SYNC_AS_STAT, true);
+}
+#endif
index 45e6864..16e4fdd 100644 (file)
@@ -50,6 +50,7 @@
 
 #include "vvp_internal.h"
 #include "pcc.h"
+#include "foreign_symlink.h"
 
 #ifndef FMODE_EXEC
 #define FMODE_EXEC 0
@@ -403,6 +404,8 @@ enum ll_file_flags {
        LLIF_PROJECT_INHERIT    = 3,
        /* update atime from MDS even if it's older than local inode atime. */
        LLIF_UPDATE_ATIME       = 4,
+       /* foreign file/dir can be unlinked unconditionnaly */
+       LLIF_FOREIGN_REMOVABLE  = 5,
 
 };
 
@@ -651,6 +654,9 @@ enum stats_track_type {
 #define LL_SBI_FILE_HEAT    0x4000000 /* file heat support */
 #define LL_SBI_TEST_DUMMY_ENCRYPTION    0x8000000 /* test dummy encryption */
 #define LL_SBI_ENCRYPT    0x10000000 /* client side encryption */
+#define LL_SBI_FOREIGN_SYMLINK     0x20000000 /* foreign fake-symlink support */
+/* foreign fake-symlink upcall registered */
+#define LL_SBI_FOREIGN_SYMLINK_UPCALL      0x40000000
 #define LL_SBI_FLAGS {         \
        "nolck",        \
        "checksum",     \
@@ -681,6 +687,8 @@ enum stats_track_type {
        "file_heat",    \
        "test_dummy_encryption", \
        "noencrypt",    \
+       "foreign_symlink",      \
+       "foreign_symlink_upcall",       \
 }
 
 /* This is embedded into llite super-blocks to keep track of connect
@@ -781,6 +789,19 @@ struct ll_sb_info {
 
        /* Persistent Client Cache */
        struct pcc_super          ll_pcc_super;
+
+       /* to protect vs updates in all following foreign symlink fields */
+       struct rw_semaphore       ll_foreign_symlink_sem;
+       /* foreign symlink path prefix */
+       char                     *ll_foreign_symlink_prefix;
+       /* full prefix size including leading '\0' */
+       size_t                    ll_foreign_symlink_prefix_size;
+       /* foreign symlink path upcall */
+       char                     *ll_foreign_symlink_upcall;
+       /* foreign symlink path upcall infos */
+       struct ll_foreign_symlink_upcall_item *ll_foreign_symlink_upcall_items;
+       /* foreign symlink path upcall nb infos */
+       unsigned int              ll_foreign_symlink_upcall_nb_items;
 };
 
 #define SBI_DEFAULT_HEAT_DECAY_WEIGHT  ((80 * 256 + 50) / 100)
@@ -965,6 +986,11 @@ static inline bool ll_sbi_has_file_heat(struct ll_sb_info *sbi)
        return !!(sbi->ll_flags & LL_SBI_FILE_HEAT);
 }
 
+static inline bool ll_sbi_has_foreign_symlink(struct ll_sb_info *sbi)
+{
+       return !!(sbi->ll_flags & LL_SBI_FOREIGN_SYMLINK);
+}
+
 void ll_ras_enter(struct file *f, loff_t pos, size_t count);
 
 /* llite/lcommon_misc.c */
@@ -1102,7 +1128,7 @@ int ll_getattr(const struct path *path, struct kstat *stat,
 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
 #endif
 int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask,
-                     unsigned int flags);
+                     unsigned int flags, bool foreign);
 struct posix_acl *ll_get_acl(struct inode *inode, int type);
 #ifdef HAVE_IOP_SET_ACL
 #ifdef CONFIG_LUSTRE_FS_POSIX_ACL
@@ -1699,5 +1725,9 @@ static inline struct pcc_super *ll_info2pccs(struct ll_inode_info *lli)
 /* crypto.c */
 extern const struct llcrypt_operations lustre_cryptops;
 #endif
+/* llite/llite_foreign.c */
+int ll_manage_foreign(struct inode *inode, struct lustre_md *lmd);
+bool ll_foreign_is_openable(struct dentry *dentry, unsigned int flags);
+bool ll_foreign_is_removable(struct dentry *dentry, bool unset);
 
 #endif /* LLITE_INTERNAL_H */
index bf5a09a..1f094ce 100644 (file)
@@ -129,6 +129,25 @@ static struct ll_sb_info *ll_init_sbi(void)
        if (sbi->ll_cache == NULL)
                GOTO(out_destroy_ra, rc = -ENOMEM);
 
+       /* initialize foreign symlink prefix path */
+       OBD_ALLOC(sbi->ll_foreign_symlink_prefix, sizeof("/mnt/"));
+       if (sbi->ll_foreign_symlink_prefix == NULL)
+               GOTO(out_destroy_ra, rc = -ENOMEM);
+       memcpy(sbi->ll_foreign_symlink_prefix, "/mnt/", sizeof("/mnt/"));
+       sbi->ll_foreign_symlink_prefix_size = sizeof("/mnt/");
+
+       /* initialize foreign symlink upcall path, none by default */
+       OBD_ALLOC(sbi->ll_foreign_symlink_upcall, sizeof("none"));
+       if (sbi->ll_foreign_symlink_upcall == NULL)
+               GOTO(out_destroy_ra, rc = -ENOMEM);
+       memcpy(sbi->ll_foreign_symlink_upcall, "none", sizeof("none"));
+       sbi->ll_foreign_symlink_upcall_items = NULL;
+       sbi->ll_foreign_symlink_upcall_nb_items = 0;
+       init_rwsem(&sbi->ll_foreign_symlink_sem);
+       /* foreign symlink support (LL_SBI_FOREIGN_SYMLINK in ll_flags)
+        * not enabled by default
+        */
+
        sbi->ll_ra_info.ra_max_pages =
                min(pages / 32, SBI_DEFAULT_READ_AHEAD_MAX);
        sbi->ll_ra_info.ra_max_pages_per_file =
@@ -183,6 +202,12 @@ static struct ll_sb_info *ll_init_sbi(void)
        sbi->ll_heat_period_second = SBI_DEFAULT_HEAT_PERIOD_SECOND;
        RETURN(sbi);
 out_destroy_ra:
+       if (sbi->ll_foreign_symlink_prefix)
+               OBD_FREE(sbi->ll_foreign_symlink_prefix, sizeof("/mnt/"));
+       if (sbi->ll_cache) {
+               cl_cache_decref(sbi->ll_cache);
+               sbi->ll_cache = NULL;
+       }
        destroy_workqueue(sbi->ll_ra_info.ll_readahead_wq);
 out_pcc:
        pcc_super_fini(&sbi->ll_pcc_super);
@@ -205,6 +230,32 @@ static void ll_free_sbi(struct super_block *sb)
                        cl_cache_decref(sbi->ll_cache);
                        sbi->ll_cache = NULL;
                }
+               if (sbi->ll_foreign_symlink_prefix) {
+                       OBD_FREE(sbi->ll_foreign_symlink_prefix,
+                                sbi->ll_foreign_symlink_prefix_size);
+                       sbi->ll_foreign_symlink_prefix = NULL;
+               }
+               if (sbi->ll_foreign_symlink_upcall) {
+                       OBD_FREE(sbi->ll_foreign_symlink_upcall,
+                                strlen(sbi->ll_foreign_symlink_upcall) +
+                                      1);
+                       sbi->ll_foreign_symlink_upcall = NULL;
+               }
+               if (sbi->ll_foreign_symlink_upcall_items) {
+                       int i;
+                       int nb_items = sbi->ll_foreign_symlink_upcall_nb_items;
+                       struct ll_foreign_symlink_upcall_item *items =
+                               sbi->ll_foreign_symlink_upcall_items;
+
+                       for (i = 0 ; i < nb_items; i++)
+                               if (items[i].type == STRING_TYPE)
+                                       OBD_FREE(items[i].string,
+                                                      items[i].size);
+
+                       OBD_FREE_LARGE(items, nb_items *
+                               sizeof(struct ll_foreign_symlink_upcall_item));
+                       sbi->ll_foreign_symlink_upcall_items = NULL;
+               }
                pcc_super_fini(&sbi->ll_pcc_super);
                OBD_FREE(sbi, sizeof(*sbi));
        }
@@ -988,6 +1039,58 @@ static int ll_options(char *options, struct ll_sb_info *sbi)
 #endif
                        goto next;
                }
+               tmp = ll_set_opt("foreign_symlink", s1, LL_SBI_FOREIGN_SYMLINK);
+               if (tmp) {
+                       int prefix_pos = sizeof("foreign_symlink=") - 1;
+                       int equal_pos = sizeof("foreign_symlink=") - 2;
+
+                       /* non-default prefix provided ? */
+                       if (strlen(s1) >= sizeof("foreign_symlink=") &&
+                           *(s1 + equal_pos) == '=') {
+                               char *old = sbi->ll_foreign_symlink_prefix;
+                               size_t old_len =
+                                       sbi->ll_foreign_symlink_prefix_size;
+
+                               /* path must be absolute */
+                               if (*(s1 + sizeof("foreign_symlink=")
+                                     - 1) != '/') {
+                                       LCONSOLE_ERROR_MSG(0x152,
+                                               "foreign prefix '%s' must be an absolute path\n",
+                                               s1 + prefix_pos);
+                                       RETURN(-EINVAL);
+                               }
+                               /* last option ? */
+                               s2 = strchrnul(s1 + prefix_pos, ',');
+
+                               if (sbi->ll_foreign_symlink_prefix) {
+                                       sbi->ll_foreign_symlink_prefix = NULL;
+                                       sbi->ll_foreign_symlink_prefix_size = 0;
+                               }
+                               /* alloc for path length and '\0' */
+                               OBD_ALLOC(sbi->ll_foreign_symlink_prefix,
+                                               s2 - (s1 + prefix_pos) + 1);
+                               if (!sbi->ll_foreign_symlink_prefix) {
+                                       /* restore previous */
+                                       sbi->ll_foreign_symlink_prefix = old;
+                                       sbi->ll_foreign_symlink_prefix_size =
+                                               old_len;
+                                       RETURN(-ENOMEM);
+                               }
+                               if (old)
+                                       OBD_FREE(old, old_len);
+                               strncpy(sbi->ll_foreign_symlink_prefix,
+                                       s1 + prefix_pos,
+                                       s2 - (s1 + prefix_pos));
+                               sbi->ll_foreign_symlink_prefix_size =
+                                       s2 - (s1 + prefix_pos) + 1;
+                       } else {
+                               LCONSOLE_ERROR_MSG(0x152,
+                                                  "invalid %s option\n", s1);
+                       }
+                       /* enable foreign symlink support */
+                       *flags |= tmp;
+                       goto next;
+               }
                 LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
                                    s1);
                 RETURN(-EINVAL);
@@ -2870,6 +2973,13 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
        if (default_lmv_deleted)
                ll_update_default_lsm_md(*inode, &md);
 
+       /* we may want to apply some policy for foreign file/dir */
+       if (ll_sbi_has_foreign_symlink(sbi)) {
+               rc = ll_manage_foreign(*inode, &md);
+               if (rc < 0)
+                       GOTO(out, rc);
+       }
+
        GOTO(out, rc = 0);
 
 out:
@@ -3074,6 +3184,11 @@ int ll_show_options(struct seq_file *seq, struct dentry *dentry)
        else
                seq_puts(seq, ",noencrypt");
 
+       if (sbi->ll_flags & LL_SBI_FOREIGN_SYMLINK) {
+               seq_puts(seq, ",foreign_symlink=");
+               seq_puts(seq, sbi->ll_foreign_symlink_prefix);
+       }
+
        RETURN(0);
 }
 
index cf753a5..bd13eed 100644 (file)
@@ -289,6 +289,14 @@ static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr,
 }
 LUSTRE_RO_ATTR(client_type);
 
+LUSTRE_RW_ATTR(foreign_symlink_enable);
+
+LUSTRE_RW_ATTR(foreign_symlink_prefix);
+
+LUSTRE_RW_ATTR(foreign_symlink_upcall);
+
+LUSTRE_WO_ATTR(foreign_symlink_upcall_info);
+
 static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr,
                           char *buf)
 {
@@ -1529,6 +1537,10 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_filestotal.attr,
        &lustre_attr_filesfree.attr,
        &lustre_attr_client_type.attr,
+       &lustre_attr_foreign_symlink_enable.attr,
+       &lustre_attr_foreign_symlink_prefix.attr,
+       &lustre_attr_foreign_symlink_upcall.attr,
+       &lustre_attr_foreign_symlink_upcall_info.attr,
        &lustre_attr_fstype.attr,
        &lustre_attr_uuid.attr,
        &lustre_attr_checksums.attr,
index 7311a2f..ac23de7 100644 (file)
@@ -614,6 +614,27 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
        if (rc < 0)
                return ERR_PTR(rc);
        d_add(de, inode);
+
+       /* this needs only to be done for foreign symlink dirs as
+        * DCACHE_SYMLINK_TYPE is already set by d_flags_for_inode()
+        * kernel routine for files with symlink ops (ie, real symlink)
+        */
+       if (inode && S_ISDIR(inode->i_mode) &&
+           ll_sbi_has_foreign_symlink(ll_i2sbi(inode)) &&
+#ifdef HAVE_IOP_GET_LINK
+           inode->i_op->get_link) {
+#else
+           inode->i_op->follow_link) {
+#endif
+               CDEBUG(D_INFO, "%s: inode "DFID": faking foreign dir as a symlink\n",
+                      ll_i2sbi(inode)->ll_fsname, PFID(ll_inode2fid(inode)));
+               spin_lock(&de->d_lock);
+               /* like d_flags_for_inode() already does for files */
+               de->d_flags = (de->d_flags & ~DCACHE_ENTRY_TYPE) |
+                             DCACHE_SYMLINK_TYPE;
+               spin_unlock(&de->d_lock);
+       }
+
        CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
               de, de->d_inode, ll_d_count(de), de->d_flags);
         return de;
@@ -1195,7 +1216,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
                        }
                }
 
-               if (dentry->d_inode && it_disposition(it, DISP_OPEN_OPEN)) {
+               /* check also if a foreign file is openable */
+               if (dentry->d_inode && it_disposition(it, DISP_OPEN_OPEN) &&
+                   ll_foreign_is_openable(dentry, open_flags)) {
                        /* Open dentry. */
                        if (S_ISFIFO(dentry->d_inode->i_mode)) {
                                /* We cannot call open here as it might
@@ -1681,6 +1704,10 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild)
        if (unlikely(d_mountpoint(dchild)))
                 RETURN(-EBUSY);
 
+       /* some foreign dir may not be allowed to be removed */
+       if (!ll_foreign_is_removable(dchild, false))
+               RETURN(-EPERM);
+
        op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len,
                                     S_IFDIR, LUSTRE_OPC_ANY, NULL);
        if (IS_ERR(op_data))
@@ -1766,6 +1793,10 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild)
        if (unlikely(d_mountpoint(dchild)))
                RETURN(-EBUSY);
 
+       /* some foreign file/dir may not be allowed to be unlinked */
+       if (!ll_foreign_is_removable(dchild, false))
+               RETURN(-EPERM);
+
        op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len, 0,
                                     LUSTRE_OPC_ANY, NULL);
        if (IS_ERR(op_data))
index c51a735..2606407 100644 (file)
@@ -1129,12 +1129,12 @@ static int pcc_get_layout_info(struct inode *inode, struct cl_layout *clt)
                RETURN(PTR_ERR(env));
 
        rc = cl_object_layout_get(env, lli->lli_clob, clt);
-       if (rc)
+       if (rc < 0)
                CDEBUG(D_INODE, "Cannot get layout for "DFID"\n",
                       PFID(ll_inode2fid(inode)));
 
        cl_env_put(env, &refcheck);
-       RETURN(rc);
+       RETURN(rc < 0 ? rc : 0);
 }
 
 static int pcc_fid2dataset_fullpath(char *buf, int sz, struct lu_fid *fid,
index 17ded38..85e8193 100644 (file)
@@ -38,6 +38,7 @@
 
 #include "llite_internal.h"
 
+/* Must be called with lli_size_mutex locked */
 static int ll_readlink_internal(struct inode *inode,
                                struct ptlrpc_request **request, char **symname)
 {
index da7bfe6..2561ae1 100644 (file)
@@ -2165,7 +2165,8 @@ static int lov_object_layout_get(const struct lu_env *env,
        rc = lov_lsm_pack(lsm, buf->lb_buf, buf->lb_len);
        lov_lsm_put(lsm);
 
-       RETURN(rc < 0 ? rc : 0);
+       /* return error or number of bytes */
+       RETURN(rc);
 }
 
 static loff_t lov_object_maxbytes(struct cl_object *obj)
index c0d5745..b715ec9 100644 (file)
@@ -166,8 +166,14 @@ static ssize_t lov_lsm_pack_foreign(const struct lov_stripe_md *lsm, void *buf,
        if (buf_size == 0)
                RETURN(lfm_size);
 
-       if (buf_size < lfm_size)
+       /* if buffer too small return ERANGE but copy the size the
+        * caller has requested anyway. This may be useful to get
+        * only the header without the need to alloc the full size
+        */
+       if (buf_size < lfm_size) {
+               memcpy(lfm, lsm_foreign(lsm), buf_size);
                RETURN(-ERANGE);
+       }
 
        /* full foreign LOV is already avail in its cache
         * no need to translate format fields to little-endian
index 3f13460..c0e27bf 100644 (file)
@@ -124,7 +124,7 @@ int mdd_lookup(const struct lu_env *env,
 }
 
 /** Read the link EA into a temp buffer.
- * Uses the mdd_thread_info::mti_big_buf since it is generally large.
+ * Uses the mdd_thread_info::mti_link_buf since it is generally large.
  * A pointer to the buffer is stored in \a ldata::ld_buf.
  *
  * \retval 0 or error
@@ -1254,7 +1254,7 @@ static inline int mdd_links_del(const struct lu_env *env,
 /** Read the link EA into a temp buffer.
  * Uses the name_buf since it is generally large.
  * \retval IS_ERR err
- * \retval ptr to \a lu_buf (always \a mti_big_buf)
+ * \retval ptr to \a lu_buf (always \a mti_link_buf)
  */
 struct lu_buf *mdd_links_get(const struct lu_env *env,
                             struct mdd_object *mdd_obj)
@@ -2303,6 +2303,7 @@ static int mdd_acl_init(const struct lu_env *env, struct mdd_object *pobj,
                        struct lu_buf *acl_buf)
 {
        int     rc;
+
        ENTRY;
 
        if (S_ISLNK(la->la_mode)) {
@@ -2572,22 +2573,23 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj,
                      const struct lu_name *lname, struct md_object *child,
                      struct md_op_spec *spec, struct md_attr *ma)
 {
-       struct mdd_thread_info  *info = mdd_env_info(env);
-       struct lu_attr          *la = &info->mti_la_for_fix;
-       struct mdd_object       *mdd_pobj = md2mdd_obj(pobj);
-       struct mdd_object       *son = md2mdd_obj(child);
-       struct mdd_device       *mdd = mdo2mdd(pobj);
-       struct lu_attr          *attr = &ma->ma_attr;
-       struct thandle          *handle;
-       struct lu_attr          *pattr = &info->mti_pattr;
-       struct lu_buf           acl_buf;
-       struct lu_buf           def_acl_buf;
-       struct lu_buf           hsm_buf;
-       struct linkea_data      *ldata = &info->mti_link_data;
-       const char              *name = lname->ln_name;
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct lu_attr *la = &info->mti_la_for_fix;
+       struct mdd_object *mdd_pobj = md2mdd_obj(pobj);
+       struct mdd_object *son = md2mdd_obj(child);
+       struct mdd_device *mdd = mdo2mdd(pobj);
+       struct lu_attr *attr = &ma->ma_attr;
+       struct thandle *handle;
+       struct lu_attr *pattr = &info->mti_pattr;
+       struct lu_buf acl_buf;
+       struct lu_buf def_acl_buf;
+       struct lu_buf hsm_buf;
+       struct linkea_data *ldata = &info->mti_link_data;
+       const char *name = lname->ln_name;
        struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint;
-       int                      rc;
-       int                      rc2;
+       int acl_size = LUSTRE_POSIX_ACL_MAX_SIZE_OLD;
+       int rc, rc2;
+
        ENTRY;
 
        rc = mdd_la_get(env, mdd_pobj, pattr);
@@ -2606,13 +2608,25 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj,
        if (IS_ERR(handle))
                GOTO(out_free, rc = PTR_ERR(handle));
 
-       lu_buf_check_and_alloc(&info->mti_xattr_buf,
-                       min_t(unsigned int, mdd->mdd_dt_conf.ddp_max_ea_size,
-                             XATTR_SIZE_MAX));
-       acl_buf = info->mti_xattr_buf;
-       def_acl_buf.lb_buf = info->mti_key;
-       def_acl_buf.lb_len = sizeof(info->mti_key);
+use_bigger_buffer:
+       acl_buf = *lu_buf_check_and_alloc(&info->mti_xattr_buf, acl_size);
+       if (!acl_buf.lb_buf)
+               GOTO(out_stop, rc = -ENOMEM);
+       /* mti_big_buf is also used down below in mdd_changelog_ns_store(),
+        * but def_acl_buf is finished with it before then
+        */
+       def_acl_buf = *lu_buf_check_and_alloc(&info->mti_big_buf, acl_size);
+       if (!def_acl_buf.lb_buf)
+               GOTO(out_stop, rc = -ENOMEM);
+
        rc = mdd_acl_init(env, mdd_pobj, attr, &def_acl_buf, &acl_buf);
+       if (unlikely(rc == -ERANGE &&
+                    acl_size == LUSTRE_POSIX_ACL_MAX_SIZE_OLD)) {
+               /* use maximum-sized xattr buffer for too-big default ACL */
+               acl_size = min_t(unsigned int, mdd->mdd_dt_conf.ddp_max_ea_size,
+                                XATTR_SIZE_MAX);
+               goto use_bigger_buffer;
+       }
        if (rc < 0)
                GOTO(out_stop, rc);
 
index c80d602..5924aa1 100644 (file)
@@ -63,7 +63,7 @@ static void mdt_dom_resource_prolong(struct ldlm_prolong_args *arg)
        ENTRY;
 
        res = ldlm_resource_get(arg->lpa_export->exp_obd->obd_namespace, NULL,
-                               &arg->lpa_resid, LDLM_EXTENT, 0);
+                               &arg->lpa_resid, LDLM_IBITS, 0);
        if (IS_ERR(res)) {
                CDEBUG(D_DLMTRACE,
                       "Failed to get resource for resid %llu/%llu\n",
@@ -76,7 +76,11 @@ static void mdt_dom_resource_prolong(struct ldlm_prolong_args *arg)
                if (ldlm_has_dom(lock)) {
                        LDLM_DEBUG(lock, "DOM lock to prolong ");
                        ldlm_lock_prolong_one(lock, arg);
-                       break;
+                       /* only one PW or EX lock can be granted,
+                        * no need to continue search
+                        */
+                       if (lock->l_granted_mode & (LCK_PW | LCK_EX))
+                               break;
                }
        }
        unlock_res(res);
@@ -137,7 +141,7 @@ static int mdt_rw_hpreq_lock_match(struct ptlrpc_request *req,
                RETURN(0);
 
        /* a bulk write can only hold a reference on a PW extent lock. */
-       mode = LCK_PW;
+       mode = LCK_PW | LCK_GROUP;
        if (opc == OST_READ)
                /* whereas a bulk read can be protected by either a PR or PW
                 * extent lock */
@@ -177,7 +181,7 @@ static int mdt_rw_hpreq_check(struct ptlrpc_request *req)
        LASSERT(rnb != NULL);
        LASSERT(!(rnb->rnb_flags & OBD_BRW_SRVLOCK));
 
-       pa.lpa_mode = LCK_PW;
+       pa.lpa_mode = LCK_PW | LCK_GROUP;
        if (opc == OST_READ)
                pa.lpa_mode |= LCK_PR;
 
@@ -265,7 +269,7 @@ static int mdt_punch_hpreq_lock_match(struct ptlrpc_request *req,
        if (!fid_res_name_eq(&oa->o_oi.oi_fid, &lock->l_resource->lr_name))
                RETURN(0);
 
-       if (!(lock->l_granted_mode & LCK_PW))
+       if (!(lock->l_granted_mode & (LCK_PW | LCK_GROUP)))
                RETURN(0);
 
        RETURN(1);
@@ -303,14 +307,13 @@ static int mdt_punch_hpreq_check(struct ptlrpc_request *req)
        LASSERT(!(oa->o_valid & OBD_MD_FLFLAGS &&
                  oa->o_flags & OBD_FL_SRVLOCK));
 
-       pa.lpa_mode = LCK_PW;
+       pa.lpa_mode = LCK_PW | LCK_GROUP;
 
        CDEBUG(D_DLMTRACE, "%s: refresh DOM lock for "DFID"\n",
               tgt_name(tsi->tsi_tgt), PFID(&tsi->tsi_fid));
 
        mdt_prolong_dom_lock(tsi, &pa);
 
-
        if (pa.lpa_blocks_cnt > 0) {
                CDEBUG(D_DLMTRACE,
                       "%s: refreshed %u locks timeout for req %p.\n",
index 27da5c8..dc2f7c8 100644 (file)
@@ -492,15 +492,11 @@ int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
                           struct ptlrpc_cli_ctx *oldctx,
                           struct ptlrpc_cli_ctx *newctx)
 {
-       struct sptlrpc_flavor   old_flvr;
+       struct sptlrpc_flavor old_flvr;
        char *reqmsg = NULL; /* to workaround old gcc */
        int reqmsg_size;
        int rc = 0;
 
-       LASSERT(req->rq_reqmsg);
-       LASSERT(req->rq_reqlen);
-       LASSERT(req->rq_replen);
-
        CDEBUG(D_SEC,
               "req %p: switch ctx %p(%u->%s) -> %p(%u->%s), switch sec %p(%s) -> %p(%s)\n",
               req, oldctx, oldctx->cc_vcred.vc_uid,
@@ -515,6 +511,7 @@ int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
        /* save request message */
        reqmsg_size = req->rq_reqlen;
        if (reqmsg_size != 0) {
+               LASSERT(req->rq_reqmsg);
                OBD_ALLOC_LARGE(reqmsg, reqmsg_size);
                if (reqmsg == NULL)
                        return -ENOMEM;
index 340d491..4558c37 100644 (file)
@@ -2074,6 +2074,7 @@ static int tgt_checksum_niobuf_t10pi(struct lu_target *tgt,
                 * whole page
                 */
                if (t10_cksum_type && opc == OST_READ &&
+                   local_nb[i].lnb_len == PAGE_SIZE &&
                    local_nb[i].lnb_guard_disk) {
                        used = DIV_ROUND_UP(local_nb[i].lnb_len, sector_size);
                        if (used > (guard_number - used_number)) {
index 9831e7a..598435f 100644 (file)
@@ -77,6 +77,7 @@ THETESTS += swap_lock_test lockahead_test mirror_io mmap_mknod_test
 THETESTS += create_foreign_file parse_foreign_file
 THETESTS += create_foreign_dir parse_foreign_dir
 THETESTS += check_fallocate splice-test lseek_test expand_truncate_test
+THETESTS += foreign_symlink_striping
 
 if LIBAIO
 THETESTS += aiocp
index b1e869e..37466e0 100644 (file)
@@ -76,6 +76,40 @@ usage(char *argv0, int help)
        printf(" Exit status is 0 on success, 1 on failure\n");
 }
 
+/* using realpath() implies the paths must be resolved/exist
+ * so this will fail for dangling links
+ */
+int check_canonical(char *lname, char *checklink, int verbose)
+{
+       char *lname_canon;
+       char *checklink_canon;
+
+       lname_canon = realpath(lname, NULL);
+       if (lname_canon == NULL) {
+               if (verbose)
+                       printf("%s: can't canonicalize: %s\n",
+                              lname, strerror(errno));
+               return 1;
+       }
+
+       checklink_canon = realpath(checklink, NULL);
+       if (checklink_canon == NULL) {
+               if (verbose)
+                       printf("%s: can't canonicalize: %s\n",
+                              checklink, strerror(errno));
+               return 1;
+       }
+
+       if (strcmp(checklink_canon, lname_canon)) {
+               free(lname_canon);
+               free(checklink_canon);
+               return 1;
+       }
+       free(lname_canon);
+       free(checklink_canon);
+       return 0;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -287,7 +321,12 @@ main(int argc, char **argv)
                        }
 
                        lname[rc] = 0;
-                       if (strcmp(checklink, lname)) {
+
+                       /* just in case, try to also match the canonicalized
+                        * paths
+                        */
+                       if (strcmp(checklink, lname) &&
+                           check_canonical(lname, checklink, verbose)) {
                                if (verbose)
                                        printf("%s is a link to %s and not %s\n",
                                               fname, lname, checklink);
index 9fa508c..7cd9b77 100644 (file)
@@ -15,7 +15,7 @@ int main(int argc, char **argv)
        char *dir = "foreign_dir", *end;
        char *xval = "UUID@UUID";
        mode_t mode = 0700;
-       __u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0;
+       __u32 type = LU_FOREIGN_TYPE_SYMLINK, flags = 0xda05;
        int c, rc;
 
        while ((c = getopt(argc, argv, "hd:f:m:t:x:")) != -1) {
index ea29ceb..acd5540 100644 (file)
@@ -17,7 +17,7 @@ int main(int argc, char **argv)
        size_t len;
        struct lov_foreign_md *lfm;
        char *end;
-       __u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0xda08;
+       __u32 type = LU_FOREIGN_TYPE_SYMLINK, flags = 0xda05;
 
        while ((c = getopt(argc, argv, "f:x:t:F:")) != -1) {
                switch (c) {
diff --git a/lustre/tests/foreign_symlink_striping.c b/lustre/tests/foreign_symlink_striping.c
new file mode 100644 (file)
index 0000000..d16f5db
--- /dev/null
@@ -0,0 +1,105 @@
+#include <stdlib.h>
+#include <unistd.h>
+#include <limits.h>
+#include <stdio.h>
+#include <errno.h>
+#include <libgen.h>
+#include <sys/ioctl.h>
+#include <sys/xattr.h>
+#include <sys/file.h>
+
+#include <lustre/lustreapi.h>
+#include <linux/lustre/lustre_idl.h>
+
+int main(int argc, char **argv)
+{
+       char *foreign = NULL;
+       int c, rc, fd;
+       bool f_opt = false, d_opt = false, h_opt = false;
+       /* buf must be large enough to receive biggest possible
+        * foreign LOV/LMV
+        */
+       char buf[XATTR_SIZE_MAX];
+       struct lmv_foreign_md *lfm = (void *)buf;
+
+       while ((c = getopt(argc, argv, "hf:d:")) != -1) {
+               switch (c) {
+               case 'd':
+                       foreign = optarg;
+                       if (f_opt || d_opt) {
+                               fprintf(stderr,
+                                       "only one foreign symlink file or dir can be specified at a time\n");
+                               exit(1);
+                       }
+                       d_opt = true;
+                       break;
+               case 'f':
+                       foreign = optarg;
+                       if (f_opt || d_opt) {
+                               fprintf(stderr,
+                                       "only one foreign symlink file or dir can be specified at a time\n");
+                               exit(1);
+                       }
+                       f_opt = true;
+                       break;
+               case 'h':
+                       h_opt = true;
+               default:
+                       fprintf(stderr,
+                               "Usage: %s [-[f,d] <foreign file/dir pathname>]\n",
+                               argv[0]);
+                       exit(h_opt ? 0 : 1);
+                       break;
+               }
+       }
+
+       if (foreign == NULL) {
+               fprintf(stderr,
+                       "a foreign file/dir pathname must be provided\n");
+               exit(0);
+       }
+
+       /* in case foreign fake symlink feature is active, file/dir must be
+        * opened with O_NOFOLLOW to avoid symlink resolution
+        */
+       fd = open(foreign, O_RDONLY|O_NONBLOCK|O_NOFOLLOW);
+       if (fd < 0) {
+               fprintf(stderr, "open() of '%s' error, rc : %d\n", foreign, fd);
+               perror("open()");
+               exit(1);
+       }
+
+       rc = snprintf(buf, PATH_MAX, "%s", foreign);
+       if (rc >= PATH_MAX || rc < 0) {
+               fprintf(stderr,
+                       "unexpected return code or size from snprintf() : %d\n",
+                       rc);
+               exit(1);
+       }
+
+       if (f_opt) {
+               rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, &buf);
+       } else if (d_opt) {
+               lfm->lfm_magic = LMV_MAGIC_V1;
+               rc = ioctl(fd, LL_IOC_LMV_GETSTRIPE, &buf);
+       }
+
+       if (rc) {
+               fprintf(stderr, "%s: %s error: %s\n", foreign,
+                       f_opt ? "getstripe" : "getdirstripe", strerror(errno));
+               exit(1);
+       }
+
+       if (lfm->lfm_magic != LOV_USER_MAGIC_FOREIGN &&
+           lfm->lfm_magic != LMV_MAGIC_FOREIGN)
+               fprintf(stderr, "unexpected magic : 0x%08X, expected 0x%08X\n",
+                       lfm->lfm_magic, LOV_USER_MAGIC_FOREIGN);
+       if (lfm->lfm_type != LU_FOREIGN_TYPE_SYMLINK)
+               fprintf(stderr, "unexpected type : 0x%08X, expected 0x%08X\n",
+                       lfm->lfm_type, LU_FOREIGN_TYPE_SYMLINK);
+       printf("lfm_magic: 0x%08X, lfm_length: %u, lfm_type: 0x%08X, lfm_flags: 0x%08X, lfm_value: '%.*s'\n",
+              lfm->lfm_magic, lfm->lfm_length, lfm->lfm_type, lfm->lfm_flags,
+              lfm->lfm_length, lfm->lfm_value);
+
+       return rc;
+}
index c2bf325..4360b27 100644 (file)
@@ -5712,7 +5712,7 @@ test_38()
        local uuid2=$(cat /proc/sys/kernel/random/uuid)
 
        # create foreign file
-       $LFS setstripe --foreign=daos --flags 0xda05 \
+       $LFS setstripe --foreign=none --flags 0xda05 \
                -x "${uuid1}@${uuid2}" $DIR/$tdir/$tfile ||
                error "$DIR/$tdir/$tfile: create failed"
 
@@ -5722,7 +5722,7 @@ test_38()
        # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length)
        $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_length:.*73" ||
                error "$DIR/$tdir/$tfile: invalid LOV EA foreign size"
-       $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*daos" ||
+       $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*none" ||
                error "$DIR/$tdir/$tfile: invalid LOV EA foreign type"
        $LFS getstripe -v $DIR/$tdir/$tfile |
                grep "lfm_flags:.*0x0000DA05" ||
@@ -5765,7 +5765,7 @@ test_38()
        # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length)
        $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_length:.*73" ||
                error "$DIR/$tdir/$tfile: invalid LOV EA foreign size"
-       $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*daos" ||
+       $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*none" ||
                error "$DIR/$tdir/$tfile: invalid LOV EA foreign type"
        $LFS getstripe -v $DIR/$tdir/$tfile |
                grep "lfm_flags:.*0x0000DA05" ||
@@ -5799,7 +5799,7 @@ test_39()
        local uuid2=$(cat /proc/sys/kernel/random/uuid)
 
        # create foreign dir
-       $LFS mkdir --foreign=daos --xattr="${uuid1}@${uuid2}" --flags=0xda05 \
+       $LFS mkdir --foreign=none --xattr="${uuid1}@${uuid2}" --flags=0xda05 \
                $DIR/$tdir/${tdir}2 ||
                error "$DIR/$tdir/${tdir}2: create failed"
 
@@ -5810,7 +5810,7 @@ test_39()
        # - sizeof(lfm_type) - sizeof(lfm_flags)
        $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_length:.*73" ||
                error "$DIR/$tdir/${tdir}2: invalid LMV EA size"
-       $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*daos" ||
+       $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*none" ||
                error "$DIR/$tdir/${tdir}2: invalid LMV EA type"
        $LFS getdirstripe -v $DIR/$tdir/${tdir}2 |
                grep "lfm_flags:.*0x0000DA05" ||
@@ -5862,7 +5862,7 @@ test_39()
        # - sizeof(lfm_type) - sizeof(lfm_flags)
        $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_length:.*73" ||
                error "$DIR/$tdir/${tdir}2: invalid LMV EA size"
-       $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*daos" ||
+       $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*none" ||
                error "$DIR/$tdir/${tdir}2: invalid LMV EA type"
        $LFS getdirstripe -v $DIR/$tdir/${tdir}2 |
                grep "lfm_flags:.*0x0000DA05" ||
index d9e438e..b25855a 100755 (executable)
@@ -2747,7 +2747,7 @@ test_27J() {
                error "$DIR/$tdir/$tfile: invalid LOV EA foreign value"
 
        # create foreign file (lfs + API)
-       $LFS setstripe --foreign=daos --flags 0xda08 \
+       $LFS setstripe --foreign=none --flags 0xda08 \
                -x "${uuid1}@${uuid2}" $DIR/$tdir/${tfile}2 ||
                error "$DIR/$tdir/${tfile}2: create failed"
 
@@ -2757,7 +2757,7 @@ test_27J() {
        # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length)
        $LFS getstripe -v $DIR/$tdir/${tfile}2 | grep "lfm_length:.*73" ||
                error "$DIR/$tdir/${tfile}2: invalid LOV EA foreign size"
-       $LFS getstripe -v $DIR/$tdir/${tfile}2 | grep "lfm_type:.*daos" ||
+       $LFS getstripe -v $DIR/$tdir/${tfile}2 | grep "lfm_type:.*none" ||
                error "$DIR/$tdir/${tfile}2: invalid LOV EA foreign type"
        $LFS getstripe -v $DIR/$tdir/${tfile}2 |
                grep "lfm_flags:.*0x0000DA08" ||
@@ -2827,7 +2827,8 @@ test_27K() {
                error "$DIR/$tdir/$tdir: invalid LMV EA size"
        parse_foreign_dir -d $DIR/$tdir/$tdir | grep "lmv_foreign_type: 1$" ||
                error "$DIR/$tdir/$tdir: invalid LMV EA type"
-       parse_foreign_dir -d $DIR/$tdir/$tdir | grep "lmv_foreign_flags: 0$" ||
+       parse_foreign_dir -d $DIR/$tdir/$tdir |
+               grep "lmv_foreign_flags: 55813$" ||
                error "$DIR/$tdir/$tdir: invalid LMV EA flags"
        local lmv=$(parse_foreign_dir -d $DIR/$tdir/$tdir |
                grep "lmv_foreign_value: 0x" |
@@ -2837,7 +2838,7 @@ test_27K() {
        [[ $lmv == $lmv2 ]] || error "$DIR/$tdir/$tdir: invalid LMV EA value"
 
        # create foreign dir (lfs + API)
-       $LFS mkdir --foreign=daos --xattr="${uuid1}@${uuid2}" --flags=0xda05 \
+       $LFS mkdir --foreign=none --xattr="${uuid1}@${uuid2}" --flags=0xda05 \
                $DIR/$tdir/${tdir}2 ||
                error "$DIR/$tdir/${tdir}2: create failed"
 
@@ -2848,7 +2849,7 @@ test_27K() {
        # - sizeof(lfm_type) - sizeof(lfm_flags)
        $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_length:.*73" ||
                error "$DIR/$tdir/${tdir}2: invalid LMV EA size"
-       $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*daos" ||
+       $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_type:.*none" ||
                error "$DIR/$tdir/${tdir}2: invalid LMV EA type"
        $LFS getdirstripe -v $DIR/$tdir/${tdir}2 |
                grep "lfm_flags:.*0x0000DA05" ||
@@ -3047,6 +3048,204 @@ test_27N() {
 }
 run_test 27N "lctl pool_list on separate MGS gives correct pool name"
 
+clean_foreign_symlink() {
+       trap 0
+       lctl set_param llite/$FSNAME-*/foreign_symlink_enable=0
+       for i in $DIR/$tdir/* ; do
+               $LFS unlink_foreign $i || true
+       done
+}
+
+test_27O() {
+       [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.12.51) ]] &&
+               skip "Need MDS version newer than 2.12.51"
+
+       test_mkdir $DIR/$tdir
+       local uuid1=$(cat /proc/sys/kernel/random/uuid)
+       local uuid2=$(cat /proc/sys/kernel/random/uuid)
+
+       trap clean_foreign_symlink EXIT
+
+       # enable foreign_symlink behaviour
+       $LCTL set_param llite/$FSNAME-*/foreign_symlink_enable=1
+
+       # foreign symlink LOV format is a partial path by default
+
+       # create foreign file (lfs + API)
+       $LFS setstripe --foreign=symlink --flags 0xda05 \
+               -x "${uuid1}/${uuid2}" --mode 0600 $DIR/$tdir/${tfile} ||
+               error "$DIR/$tdir/${tfile}: create failed"
+
+       $LFS getstripe -v $DIR/$tdir/${tfile} |
+               grep "lfm_magic:.*0x0BD70BD0" ||
+               error "$DIR/$tdir/${tfile}: invalid LOV EA foreign magic"
+       $LFS getstripe -v $DIR/$tdir/${tfile} | grep "lfm_type:.*symlink" ||
+               error "$DIR/$tdir/${tfile}: invalid LOV EA foreign type"
+       $LFS getstripe -v $DIR/$tdir/${tfile} |
+               grep "lfm_flags:.*0x0000DA05" ||
+               error "$DIR/$tdir/${tfile}: invalid LOV EA foreign flags"
+       $LFS getstripe $DIR/$tdir/${tfile} |
+               grep "lfm_value:.*${uuid1}/${uuid2}" ||
+               error "$DIR/$tdir/${tfile}: invalid LOV EA foreign value"
+
+       # modify striping should fail
+       $LFS setstripe -c 2 $DIR/$tdir/$tfile &&
+               error "$DIR/$tdir/$tfile: setstripe should fail"
+
+       # R/W should fail ("/{foreign_symlink_prefix}/${uuid1}/" missing)
+       cat $DIR/$tdir/$tfile && error "$DIR/$tdir/$tfile: read should fail"
+       cat /etc/passwd > $DIR/$tdir/$tfile &&
+               error "$DIR/$tdir/$tfile: write should fail"
+
+       # rename should succeed
+       mv $DIR/$tdir/$tfile $DIR/$tdir/${tfile}.new ||
+               error "$DIR/$tdir/$tfile: rename has failed"
+
+       #remove foreign_symlink file should fail
+       rm $DIR/$tdir/${tfile}.new &&
+               error "$DIR/$tdir/${tfile}.new: remove of foreign_symlink file should fail"
+
+       #test fake symlink
+       mkdir /tmp/${uuid1} ||
+               error "/tmp/${uuid1}: mkdir has failed"
+       echo FOOFOO > /tmp/${uuid1}/${uuid2} ||
+               error "/tmp/${uuid1}/${uuid2}: echo has failed"
+       $LCTL set_param llite/$FSNAME-*/foreign_symlink_prefix=/tmp/
+       $CHECKSTAT -t link -l /tmp/${uuid1}/${uuid2} $DIR/$tdir/${tfile}.new ||
+               error "$DIR/$tdir/${tfile}.new: not seen as a symlink"
+       #read should succeed now
+       cat $DIR/$tdir/${tfile}.new | grep FOOFOO ||
+               error "$DIR/$tdir/${tfile}.new: symlink resolution has failed"
+       #write should succeed now
+       cat /etc/passwd > $DIR/$tdir/${tfile}.new ||
+               error "$DIR/$tdir/${tfile}.new: write should succeed"
+       diff /etc/passwd $DIR/$tdir/${tfile}.new ||
+               error "$DIR/$tdir/${tfile}.new: diff has failed"
+       diff /etc/passwd /tmp/${uuid1}/${uuid2} ||
+               error "/tmp/${uuid1}/${uuid2}: diff has failed"
+
+       #check that getstripe still works
+       $LFS getstripe $DIR/$tdir/${tfile}.new ||
+               error "$DIR/$tdir/${tfile}.new: getstripe should still work with foreign_symlink enabled"
+
+       # chmod should still succeed
+       chmod 644 $DIR/$tdir/${tfile}.new ||
+               error "$DIR/$tdir/${tfile}.new: chmod has failed"
+
+       # chown should still succeed
+       chown $RUNAS_ID:$RUNAS_GID $DIR/$tdir/${tfile}.new ||
+               error "$DIR/$tdir/${tfile}.new: chown has failed"
+
+       # rename should still succeed
+       mv $DIR/$tdir/${tfile}.new $DIR/$tdir/${tfile} ||
+               error "$DIR/$tdir/${tfile}.new: rename has failed"
+
+       #remove foreign_symlink file should still fail
+       rm $DIR/$tdir/${tfile} &&
+               error "$DIR/$tdir/${tfile}: remove of foreign_symlink file should fail"
+
+       #use special ioctl() to unlink foreign_symlink file
+       $LFS unlink_foreign $DIR/$tdir/${tfile} ||
+               error "$DIR/$tdir/$tfile: unlink/ioctl failed"
+
+}
+run_test 27O "basic ops on foreign file of symlink type"
+
+test_27P() {
+       [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.12.49) ]] &&
+               skip "Need MDS version newer than 2.12.49"
+
+       test_mkdir $DIR/$tdir
+       local uuid1=$(cat /proc/sys/kernel/random/uuid)
+       local uuid2=$(cat /proc/sys/kernel/random/uuid)
+
+       trap clean_foreign_symlink EXIT
+
+       # enable foreign_symlink behaviour
+       $LCTL set_param llite/$FSNAME-*/foreign_symlink_enable=1
+
+       # foreign symlink LMV format is a partial path by default
+
+       # create foreign dir (lfs + API)
+       $LFS mkdir --foreign=symlink --xattr="${uuid1}/${uuid2}" \
+               --flags=0xda05 --mode 0750 $DIR/$tdir/${tdir} ||
+               error "$DIR/$tdir/${tdir}: create failed"
+
+       $LFS getdirstripe -v $DIR/$tdir/${tdir} |
+               grep "lfm_magic:.*0x0CD50CD0" ||
+               error "$DIR/$tdir/${tdir}: invalid LMV EA magic"
+       $LFS getdirstripe -v $DIR/$tdir/${tdir} | grep "lfm_type:.*symlink" ||
+               error "$DIR/$tdir/${tdir}: invalid LMV EA type"
+       $LFS getdirstripe -v $DIR/$tdir/${tdir} |
+               grep "lfm_flags:.*0x0000DA05" ||
+               error "$DIR/$tdir/${tdir}: invalid LMV EA flags"
+       $LFS getdirstripe $DIR/$tdir/${tdir} |
+               grep "lfm_value.*${uuid1}/${uuid2}" ||
+               error "$DIR/$tdir/${tdir}: invalid LMV EA value"
+
+       # file create in dir should fail
+       # ("/{foreign_symlink_prefix}/${uuid1}/${uuid2}/" missing)
+       touch $DIR/$tdir/$tdir/$tfile && "$DIR/$tdir: file create should fail"
+
+       # rename should succeed
+       mv $DIR/$tdir/$tdir $DIR/$tdir/${tdir}.new ||
+               error "$DIR/$tdir/$tdir: rename of foreign_symlink dir has failed"
+
+       #remove foreign_symlink dir should fail
+       rmdir $DIR/$tdir/${tdir}.new &&
+               error "$DIR/$tdir/${tdir}.new: remove of foreign_symlink dir should fail"
+
+       #test fake symlink
+       mkdir -p /tmp/${uuid1}/${uuid2} ||
+               error "/tmp/${uuid1}/${uuid2}: mkdir has failed"
+       echo FOOFOO > /tmp/${uuid1}/${uuid2}/foo ||
+               error "/tmp/${uuid1}/${uuid2}/foo: echo has failed"
+       $LCTL set_param llite/$FSNAME-*/foreign_symlink_prefix=/tmp/
+       $CHECKSTAT -t link -l /tmp/${uuid1}/${uuid2} $DIR/$tdir/${tdir}.new ||
+               error "$DIR/$tdir/${tdir}.new: not seen as a symlink"
+       cat $DIR/$tdir/${tdir}.new/foo | grep FOOFOO ||
+               error "$DIR/$tdir/${tdir}.new: symlink resolution has failed"
+
+       #check that getstripe fails now that foreign_symlink enabled
+       $LFS getdirstripe $DIR/$tdir/${tdir}.new ||
+               error "$DIR/$tdir/${tdir}.new: getdirstripe should still work with foreign_symlink enabled"
+
+       # file create in dir should work now
+       cp /etc/passwd $DIR/$tdir/${tdir}.new/$tfile ||
+               error "$DIR/$tdir/${tdir}.new/$tfile: file create should fail"
+       diff /etc/passwd $DIR/$tdir/${tdir}.new/$tfile ||
+               error "$DIR/$tdir/${tdir}.new/$tfile: diff has failed"
+       diff /etc/passwd /tmp/${uuid1}/${uuid2}/$tfile ||
+               error "/tmp/${uuid1}/${uuid2}/$tfile: diff has failed"
+
+       # chmod should still succeed
+       chmod 755 $DIR/$tdir/${tdir}.new ||
+               error "$DIR/$tdir/${tdir}.new: chmod has failed"
+
+       # chown should still succeed
+       chown $RUNAS_ID:$RUNAS_GID $DIR/$tdir/${tdir}.new ||
+               error "$DIR/$tdir/${tdir}.new: chown has failed"
+
+       # rename should still succeed
+       mv $DIR/$tdir/${tdir}.new $DIR/$tdir/${tdir} ||
+               error "$DIR/$tdir/${tdir}.new: rename of foreign_symlink dir has failed"
+
+       #remove foreign_symlink dir should still fail
+       rmdir $DIR/$tdir/${tdir} &&
+               error "$DIR/$tdir/${tdir}: remove of foreign_symlink dir should fail"
+
+       #use special ioctl() to unlink foreign_symlink file
+       $LFS unlink_foreign $DIR/$tdir/${tdir} ||
+               error "$DIR/$tdir/$tdir: unlink/ioctl failed"
+
+       #created file should still exist
+       [[ -f /tmp/${uuid1}/${uuid2}/$tfile ]] ||
+               error "/tmp/${uuid1}/${uuid2}/$tfile has been removed"
+       diff /etc/passwd /tmp/${uuid1}/${uuid2}/$tfile ||
+               error "/tmp/${uuid1}/${uuid2}/$tfile: diff has failed"
+}
+run_test 27P "basic ops on foreign dir of foreign_symlink type"
+
 # createtest also checks that device nodes are created and
 # then visible correctly (#2091)
 test_28() { # bug 2091
@@ -10501,6 +10700,21 @@ test_103c() {
 }
 run_test 103c "'cp -rp' won't set empty acl"
 
+test_103e() {
+       (( $MDS1_VERSION >= $(version_code 2.13.59) )) ||
+               skip "MDS needs to be at least 2.13.59"
+
+       mkdir -p $DIR/$tdir
+       # one default ACL will be created for the file owner
+       for U in {2..256}; do
+               setfacl -m default:user:$U:rwx $DIR/$tdir
+               numacl=$(getfacl $DIR/$tdir |& grep -c "default:user")
+               touch $DIR/$tdir/$tfile.$U ||
+                       error "failed to create $tfile.$U with $numacl ACLs"
+       done
+}
+run_test 103e "inheritance of big amount of default ACLs"
+
 test_104a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"
 
index 28a33d2..1d5a3ea 100644 (file)
@@ -28,7 +28,7 @@ bin_SCRIPTS   = llstat llobdstat plot-llstat
 bin_PROGRAMS  = lfs
 sbin_SCRIPTS  = ldlm_debug_upcall
 sbin_PROGRAMS = lctl l_getidentity llverfs lustre_rsync ll_decode_linkea \
-               llsom_sync
+               llsom_sync l_foreign_symlink
 
 if TESTS
 sbin_PROGRAMS += wiretest
@@ -220,6 +220,10 @@ l_tunedisk_CPPFLAGS := ${MNTMODCFLAGS}
 l_tunedisk_LDFLAGS  := ${MNTMODLDFLAGS}
 l_tunedisk_LDADD    := $(mount_lustre_LDADD)
 
+l_foreign_symlink_SOURCES = l_foreign_symlink.c
+l_foreign_symlink_LDADD := $(top_builddir)/libcfs/libcfs/libcfs.la
+l_foreign_symlink_DEPENDENCIES := $(top_builddir)/libcfs/libcfs/libcfs.la
+
 l_getidentity_SOURCES = l_getidentity.c
 l_getidentity_LDADD := $(top_builddir)/libcfs/libcfs/libcfs.la
 l_getidentity_DEPENDENCIES := $(top_builddir)/libcfs/libcfs/libcfs.la
diff --git a/lustre/utils/l_foreign_symlink.c b/lustre/utils/l_foreign_symlink.c
new file mode 100644 (file)
index 0000000..7bf7ec9
--- /dev/null
@@ -0,0 +1,169 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2020, Intel Corporation.
+ */
+
+/*
+ * lustre/utils/l_foreign_symlink.c
+ * Userland helper to provide detailed format items in order to allow for
+ * a fast parsing of foreign symlink LOV/LMV EAs in llite.
+ * Presently, the foreign symlink LOV/LMV EAs format and its translation
+ * in format items is hard-coded, but in the future we may want to make it
+ * smarter and automatize this process by some mean.
+ */
+
+#include <sys/types.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <libgen.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <syslog.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <getopt.h>
+
+#include <libcfs/util/param.h>
+#include <linux/lustre/lustre_user.h>
+#include <linux/lustre/lustre_idl.h>
+
+#define UUID_STRING_LENGTH 36
+#define MAX_BUF_SIZE 1024
+
+static char *progname;
+
+static void errlog(const char *fmt, ...)
+{
+       va_list args;
+
+       openlog(progname, LOG_PERROR | LOG_PID, LOG_KERN);
+
+       va_start(args, fmt);
+       vsyslog(LOG_ERR, fmt, args);
+       va_end(args);
+
+       closelog();
+}
+
+int main(int argc, char **argv)
+{
+       /* we want to request llite layer to parse each foreign symlink
+        * LOV/LMV EAs with lfm_value of format "<PUUID>:<CUUID>" and
+        * translate it as "<UUID>/<UUID>" relative path.
+        * To do so, will need to pass a serie of 4 items, one for
+        * <PUUID> position and length in lfm_value, one with constant
+        * string "/", one for <CUUID> position and length in lfm_value,
+        * a last one to indicate end of serie.
+        */
+       struct ll_foreign_symlink_upcall_item *items;
+       char *buf;
+       glob_t path;
+       int fd, rc;
+
+       progname = basename(argv[0]);
+
+       if (argc != 2) {
+               errlog("usage: %s <sbi_sysfs_object_name>\n", argv[0]);
+               return -1;
+       }
+
+       buf = malloc(MAX_BUF_SIZE);
+       if (buf == NULL) {
+               errlog("unable to allocate MAX_BUF_SIZE bytes\n");
+               return -1;
+       }
+
+       /* the number of items is presently limited to MAX_NB_UPCALL_ITEMS */
+
+       /* all items are expected to be on a __u32 boundary by llite */
+
+       /* 1st item to locate <PUUID> */
+       items = (struct ll_foreign_symlink_upcall_item *)buf;
+       items->type = POSLEN_TYPE;
+       items->pos = 0;
+       items->len = UUID_STRING_LENGTH;
+
+       /* 2nd item to store "/" string */
+       items = (struct ll_foreign_symlink_upcall_item *)((char *)items +
+                       POSLEN_ITEM_SZ);
+       items->type = STRING_TYPE;
+       /* NUL byte is not necessary */
+       items->size = strlen("/");
+       memcpy(items->bytestring, "/", strlen("/"));
+       /* space occupied by string will fit on __u32 boundary */
+
+       /* 3rd item to locate <CUUID> */
+       items = (struct ll_foreign_symlink_upcall_item *)((char *)items +
+               STRING_ITEM_SZ(items->size));
+       items->type = POSLEN_TYPE;
+       items->pos = UUID_STRING_LENGTH + 1;
+       items->len = UUID_STRING_LENGTH;
+
+       /* 4th item is end of buf */
+       items = (struct ll_foreign_symlink_upcall_item *)((char *)items +
+                       POSLEN_ITEM_SZ);
+       items->type = EOB_TYPE;
+
+       /* Send foreign symlink parsing items info to kernelspace */
+       rc = cfs_get_param_paths(&path, "llite/%s/foreign_symlink_upcall_info",
+                                argv[1]);
+       if (rc != 0) {
+               errlog("can't get param 'llite/%s/foreign_symlink_upcall_info': %s\n",
+                      argv[1], strerror(errno));
+               rc = -errno;
+               goto out;
+       }
+
+       fd = open(path.gl_pathv[0], O_WRONLY);
+       if (fd < 0) {
+               errlog("can't open file '%s':%s\n", path.gl_pathv[0],
+                      strerror(errno));
+               rc = -errno;
+               goto out_param;
+       }
+
+       rc = write(fd, buf, (char *)items + sizeof(items->type) - buf);
+       close(fd);
+       if (rc != (char *)items + sizeof(items->type) - buf) {
+               errlog("partial write ret %d: %s\n", rc, strerror(errno));
+               rc = -errno;
+       } else {
+               rc = 0;
+       }
+
+out_param:
+       cfs_free_param_data(&path);
+out:
+       if (isatty(STDIN_FILENO))
+               /* we are called from the command line */
+               return rc < 0 ? -rc : rc;
+       else
+               return rc;
+}
index f7eacb3..c68a626 100644 (file)
@@ -85,6 +85,7 @@ static int lfs_getstripe(int argc, char **argv);
 static int lfs_getdirstripe(int argc, char **argv);
 static int lfs_setdirstripe(int argc, char **argv);
 static int lfs_rmentry(int argc, char **argv);
+static int lfs_unlink_foreign(int argc, char **argv);
 static int lfs_osts(int argc, char **argv);
 static int lfs_mdts(int argc, char **argv);
 static int lfs_df(int argc, char **argv);
@@ -293,7 +294,7 @@ static inline int lfs_mirror_delete(int argc, char **argv)
        "setdirstripe|mkdir --foreign[=<foreign_type>] -x|-xattr <string> " \
                "[--mode|-o mode] [--flags <hex>] <dir>\n" \
        "\tmode: the mode of the directory\n" \
-       "\tforeign_type: none or daos\n"
+       "\tforeign_type: none or symlink\n"
 
 /**
  * command_t mirror_cmdlist - lfs mirror commands.
@@ -460,6 +461,11 @@ command_t cmdlist[] = {
         "will become inaccessable after this command. This can only be done\n"
         "by the administrator\n"
         "usage: rm_entry <dir>\n"},
+       {"unlink_foreign", lfs_unlink_foreign, 0,
+        "To remove the foreign file/dir.\n"
+        "Note: This is for files/dirs prevented to be removed using\n"
+        "unlink/rmdir, but works also for regular ones\n"
+        "usage: unlink_foreign <foreign_dir/file> [<foreign_dir/file> ...]\n"},
        {"pool_list", lfs_poollist, 0,
         "List pools or pool OSTs\n"
         "usage: pool_list <fsname>[.<pool>] | <pathname>\n"},
@@ -6038,7 +6044,7 @@ static int lfs_setdirstripe(int argc, char **argv)
        mode_t mode = S_IRWXU | S_IRWXG | S_IRWXO;
        mode_t previous_mode = 0;
        char *xattr = NULL;
-       __u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0;
+       __u32 type = LU_FOREIGN_TYPE_SYMLINK, flags = 0;
        struct option long_opts[] = {
        { .val = 'c',   .name = "count",        .has_arg = required_argument },
        { .val = 'c',   .name = "mdt-count",    .has_arg = required_argument },
@@ -6355,6 +6361,33 @@ static int lfs_rmentry(int argc, char **argv)
        return result;
 }
 
+static int lfs_unlink_foreign(int argc, char **argv)
+{
+       char *name;
+       int   index;
+       int   result = 0;
+
+       if (argc <= 1) {
+               fprintf(stderr, "error: %s: missing pathname\n",
+                       argv[0]);
+               return CMD_HELP;
+       }
+
+       index = 1;
+       name = argv[index];
+       while (name != NULL) {
+               result = llapi_unlink_foreign(name);
+               if (result) {
+                       fprintf(stderr,
+                               "error: %s: unlink foreign entry '%s' failed\n",
+                               argv[0], name);
+                       break;
+               }
+               name = argv[++index];
+       }
+       return result;
+}
+
 static int lfs_mv(int argc, char **argv)
 {
        struct lmv_user_md lmu = { LMV_USER_MAGIC };
index 5686811..2508895 100644 (file)
@@ -94,7 +94,7 @@ char *mdt_hash_name[] = { "none",
 
 struct lustre_foreign_type lu_foreign_types[] = {
        {.lft_type = LU_FOREIGN_TYPE_NONE, .lft_name = "none"},
-       {.lft_type = LU_FOREIGN_TYPE_DAOS, .lft_name = "daos"},
+       {.lft_type = LU_FOREIGN_TYPE_SYMLINK, .lft_name = "symlink"},
        /* must be the last element */
        {.lft_type = LU_FOREIGN_TYPE_UNKNOWN, .lft_name = NULL}
        /* array max dimension must be <= UINT32_MAX */
@@ -1241,44 +1241,6 @@ int llapi_dir_create_pool(const char *name, int mode, int stripe_offset,
        return llapi_dir_create(name, mode, &param);
 }
 
-int llapi_direntry_remove(char *dname)
-{
-       char *dirpath = NULL;
-       char *namepath = NULL;
-       char *dir;
-       char *filename;
-       int fd = -1;
-       int rc = 0;
-
-       dirpath = strdup(dname);
-       namepath = strdup(dname);
-       if (!dirpath || !namepath)
-               return -ENOMEM;
-
-       filename = basename(namepath);
-
-       dir = dirname(dirpath);
-
-       fd = open(dir, O_DIRECTORY | O_RDONLY);
-       if (fd < 0) {
-               rc = -errno;
-               llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'",
-                           filename);
-               goto out;
-       }
-
-       if (ioctl(fd, LL_IOC_REMOVE_ENTRY, filename))
-               llapi_error(LLAPI_MSG_ERROR, errno,
-                           "error on ioctl %#lx for '%s' (%d)",
-                           (long)LL_IOC_LMV_SETSTRIPE, filename, fd);
-out:
-       free(dirpath);
-       free(namepath);
-       if (fd != -1)
-               close(fd);
-       return rc;
-}
-
 /*
  * Find the fsname, the full path, and/or an open fd.
  * Either the fsname or path must not be NULL
@@ -1768,7 +1730,7 @@ err:
        return rc;
 }
 
-typedef int (semantic_func_t)(char *path, DIR *parent, DIR **d,
+typedef int (semantic_func_t)(char *path, int p, int *d,
                              void *data, struct dirent64 *de);
 
 #define OBD_NOT_FOUND           (-1)
@@ -1838,7 +1800,7 @@ static int common_param_init(struct find_param *param, char *path)
        return 0;
 }
 
-static int cb_common_fini(char *path, DIR *parent, DIR **dirp, void *data,
+static int cb_common_fini(char *path, int p, int *dp, void *data,
                          struct dirent64 *de)
 {
        struct find_param *param = data;
@@ -1848,26 +1810,27 @@ static int cb_common_fini(char *path, DIR *parent, DIR **dirp, void *data,
 }
 
 /* set errno upon failure */
-static DIR *opendir_parent(const char *path)
+static int open_parent(const char *path)
 {
        char *path_copy;
        char *parent_path;
-       DIR *parent;
+       int parent;
 
        path_copy = strdup(path);
        if (path_copy == NULL)
-               return NULL;
+               return -1;
 
        parent_path = dirname(path_copy);
-       parent = opendir(parent_path);
+       parent = open(parent_path, O_RDONLY|O_NDELAY|O_DIRECTORY);
        free(path_copy);
 
        return parent;
 }
 
-static int cb_get_dirstripe(char *path, DIR *d, struct find_param *param)
+static int cb_get_dirstripe(char *path, int *d, struct find_param *param)
 {
        int ret;
+       bool did_nofollow = false;
 
 again:
        param->fp_lmv_md->lum_stripe_count = param->fp_lmv_stripe_count;
@@ -1876,7 +1839,36 @@ again:
        else
                param->fp_lmv_md->lum_magic = LMV_MAGIC_V1;
 
-       ret = ioctl(dirfd(d), LL_IOC_LMV_GETSTRIPE, param->fp_lmv_md);
+       ret = ioctl(*d, LL_IOC_LMV_GETSTRIPE, param->fp_lmv_md);
+
+       /* if ENOTTY likely to be a fake symlink, so try again after
+        * new open() with O_NOFOLLOW, but only once to prevent any
+        * loop like for the path of a file/dir not on Lustre !!
+        */
+       if (ret < 0 && errno == ENOTTY && !did_nofollow) {
+               int fd, ret2;
+
+               did_nofollow = true;
+               fd = open(path, O_RDONLY | O_NOFOLLOW);
+               if (fd < 0) {
+                       /* restore original errno */
+                       errno = ENOTTY;
+                       return ret;
+               }
+
+               /* close original fd and set new */
+               close(*d);
+               *d = fd;
+               ret2 = ioctl(fd, LL_IOC_LMV_GETSTRIPE, param->fp_lmv_md);
+               if (ret2 < 0 && errno != E2BIG) {
+                       /* restore original errno */
+                       errno = ENOTTY;
+                       return ret;
+               }
+               /* LMV is ok or need to handle E2BIG case now */
+               ret = ret2;
+       }
+
        if (errno == E2BIG && ret != 0) {
                int stripe_count;
                int lmv_size;
@@ -2019,6 +2011,28 @@ retry_getinfo:
 
                if (cmd == LL_IOC_MDC_GETINFO_V1 && !ret)
                        ret = convert_lmdbuf_v1v2(lmdbuf, lmdlen);
+
+               if (ret < 0 && errno == ENOTTY && type == GET_LMD_STRIPE) {
+                       int dir_fd2;
+
+                       /* retry ioctl() after new open() with O_NOFOLLOW
+                        * just in case it could be a fake symlink
+                        * need using a new open() as dir_fd is being closed
+                        * by caller
+                        */
+
+                       dir_fd2 = open(path, O_RDONLY | O_NDELAY | O_NOFOLLOW);
+                       if (dir_fd2 < 0) {
+                               /* return original error */
+                               errno = ENOTTY;
+                       } else {
+                               ret = ioctl(dir_fd2, cmd, lmdbuf);
+                               /* pass new errno or success back to caller */
+
+                               close(dir_fd2);
+                       }
+               }
+
        } else if (parent_fd >= 0) {
                const char *fname = strrchr(path, '/');
 
@@ -2101,55 +2115,103 @@ retry_getfileinfo:
        return ret;
 }
 
-static int get_lmd_info(char *path, DIR *parent, DIR *dir, void *lmdbuf,
-                       int lmdlen, enum get_lmd_info_type type)
-{
-       int parent_fd = -1;
-       int dir_fd = -1;
-
-       if (parent)
-               parent_fd = dirfd(parent);
-       if (dir)
-               dir_fd = dirfd(dir);
-
-       return get_lmd_info_fd(path, parent_fd, dir_fd, lmdbuf, lmdlen, type);
-}
-
-static int llapi_semantic_traverse(char *path, int size, DIR *parent,
+static int llapi_semantic_traverse(char *path, int size, int parent,
                                   semantic_func_t sem_init,
                                   semantic_func_t sem_fini, void *data,
                                   struct dirent64 *de)
 {
        struct find_param *param = (struct find_param *)data;
        struct dirent64 *dent;
-       int len, ret;
-       DIR *d, *p = NULL;
+       int len, ret, d, p = -1;
+       DIR *dir = NULL;
 
        ret = 0;
        len = strlen(path);
 
-       d = opendir(path);
-       if (!d && errno != ENOTDIR) {
+       d = open(path, O_RDONLY|O_NDELAY|O_DIRECTORY);
+       /* if an invalid fake dir symlink, opendir() will return EINVAL
+        * instead of ENOTDIR. If a valid but dangling faked or real file/dir
+        * symlink ENOENT will be returned. For a valid/resolved fake or real
+        * file symlink ENOTDIR will be returned as for a regular file.
+        * opendir() will be successful for a  valid and resolved fake or real
+        * dir simlink or a regular dir.
+        */
+       if (d == -1 && errno != ENOTDIR && errno != EINVAL && errno != ENOENT) {
                ret = -errno;
                llapi_error(LLAPI_MSG_ERROR, ret, "%s: Failed to open '%s'",
                            __func__, path);
                return ret;
-       } else if (!d && !parent) {
-               /* ENOTDIR. Open the parent dir. */
-               p = opendir_parent(path);
-               if (!p) {
-                       ret = -errno;
-                       goto out;
+       } else if (d == -1) {
+               if (errno == ENOENT || errno == EINVAL) {
+                       int old_errno = errno;
+
+                       /* try to open with O_NOFOLLOW this will help
+                        * differentiate fake vs real symlinks
+                        * it is ok to not use O_DIRECTORY with O_RDONLY
+                        * and it will prevent the need to deal with ENOTDIR
+                        * error, instead of ELOOP, being returned by recent
+                        * kernels for real symlinks
+                        */
+                       d = open(path, O_RDONLY|O_NDELAY|O_NOFOLLOW);
+                       /* if a dangling real symlink should return ELOOP, or
+                        * again ENOENT if really non-existing path, or E...??
+                        * So return original error. If success or ENOTDIR, path
+                        * is likely to be a fake dir/file symlink, so continue
+                        */
+                       if (d == -1) {
+                               ret =  -old_errno;
+                               goto out;
+                       }
+
+               }
+
+               /* ENOTDIR */
+               if (parent == -1 && d == -1) {
+                       /* Open the parent dir. */
+                       p = open_parent(path);
+                       if (p == -1) {
+                               ret = -errno;
+                               goto out;
+                       }
+               }
+       } else { /* d != -1 */
+               int d2;
+
+               /* try to reopen dir with O_NOFOLLOW just in case of a foreign
+                * symlink dir
+                */
+               d2 = open(path, O_RDONLY|O_NDELAY|O_NOFOLLOW);
+               if (d2 != -1) {
+                       close(d);
+                       d = d2;
+               } else {
+                       /* continue with d */
+                       errno = 0;
                }
        }
 
-       if (sem_init && (ret = sem_init(path, parent ?: p, &d, data, de)))
-               goto err;
+       if (sem_init) {
+               ret = sem_init(path, (parent != -1) ? parent : p, &d, data, de);
+               if (ret)
+                       goto err;
+       }
 
-       if (d == NULL)
+       if (d == -1)
                goto out;
 
-       while ((dent = readdir64(d)) != NULL) {
+       dir = fdopendir(d);
+       if (dir == NULL) {
+               /* ENOTDIR if fake symlink, do not consider it as an error */
+               if (errno != ENOTDIR)
+                       llapi_error(LLAPI_MSG_ERROR, errno,
+                                   "fdopendir() failed");
+               else
+                       errno = 0;
+
+               goto out;
+       }
+
+       while ((dent = readdir64(dir)) != NULL) {
                int rc;
 
                if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
@@ -2168,8 +2230,8 @@ static int llapi_semantic_traverse(char *path, int size, DIR *parent,
                if (dent->d_type == DT_UNKNOWN) {
                        struct lov_user_mds_data *lmd = param->fp_lmd;
 
-                       rc = get_lmd_info(path, d, NULL, lmd,
-                                         param->fp_lum_size, GET_LMD_INFO);
+                       rc = get_lmd_info_fd(path, d, -1, param->fp_lmd,
+                                            param->fp_lum_size, GET_LMD_INFO);
                        if (rc == 0)
                                dent->d_type = IFTODT(lmd->lmd_stx.stx_mode);
                        else if (ret == 0)
@@ -2210,10 +2272,14 @@ out:
        if (sem_fini)
                sem_fini(path, parent, &d, data, de);
 err:
-       if (d)
-               closedir(d);
-       if (p)
-               closedir(p);
+       if (d != -1) {
+               if (dir)
+                       closedir(dir);
+               else
+                       close(d);
+       }
+       if (p != -1)
+               close(p);
        return ret;
 }
 
@@ -2241,8 +2307,8 @@ static int param_callback(char *path, semantic_func_t sem_init,
 
        param->fp_depth = 0;
 
-       ret = llapi_semantic_traverse(buf, PATH_MAX + 1, NULL, sem_init,
-                                     sem_fini, param, NULL);
+       ret = llapi_semantic_traverse(buf, PATH_MAX + 1, -1, sem_init,
+                                      sem_fini, param, NULL);
 out:
        find_param_fini(param);
        free(buf);
@@ -2275,11 +2341,19 @@ int llapi_file_get_lov_uuid(const char *path, struct obd_uuid *lov_uuid)
 {
        int fd, rc;
 
-       fd = open(path, O_RDONLY | O_NONBLOCK);
+       /* do not follow faked symlinks */
+       fd = open(path, O_RDONLY | O_NONBLOCK | O_NOFOLLOW);
        if (fd < 0) {
-               rc = -errno;
-               llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'", path);
-               return rc;
+               /* real symlink should have failed with ELOOP so retry without
+                * O_NOFOLLOW just in case
+                */
+               fd = open(path, O_RDONLY | O_NONBLOCK);
+               if (fd < 0) {
+                       rc = -errno;
+                       llapi_error(LLAPI_MSG_ERROR, rc, "cannot open '%s'",
+                                   path);
+                       return rc;
+               }
        }
 
        rc = llapi_file_fget_lov_uuid(fd, lov_uuid);
@@ -2513,7 +2587,7 @@ free_param:
  * obd index for all these obduuids will be returned in
  * param->fp_obd_indexes
  */
-static int setup_indexes(DIR *dir, char *path, struct obd_uuid *obduuids,
+static int setup_indexes(int d, char *path, struct obd_uuid *obduuids,
                         int num_obds, int **obdindexes, int *obdindex,
                         enum tgt_type type)
 {
@@ -2536,7 +2610,7 @@ static int setup_indexes(DIR *dir, char *path, struct obd_uuid *obduuids,
                return -ENOMEM;
 
 retry_get_uuids:
-       ret = llapi_get_target_uuids(dirfd(dir), uuids, &obdcount, type);
+       ret = llapi_get_target_uuids(d, uuids, &obdcount, type);
        if (ret) {
                if (ret == -EOVERFLOW) {
                        struct obd_uuid *uuids_temp;
@@ -2600,12 +2674,12 @@ out_free:
        return ret;
 }
 
-static int setup_target_indexes(DIR *dir, char *path, struct find_param *param)
+static int setup_target_indexes(int d, char *path, struct find_param *param)
 {
        int ret = 0;
 
        if (param->fp_mdt_uuid) {
-               ret = setup_indexes(dir, path, param->fp_mdt_uuid,
+               ret = setup_indexes(d, path, param->fp_mdt_uuid,
                                    param->fp_num_mdts,
                                    &param->fp_mdt_indexes,
                                    &param->fp_mdt_index, LMV_TYPE);
@@ -2614,7 +2688,7 @@ static int setup_target_indexes(DIR *dir, char *path, struct find_param *param)
        }
 
        if (param->fp_obd_uuid) {
-               ret = setup_indexes(dir, path, param->fp_obd_uuid,
+               ret = setup_indexes(d, path, param->fp_obd_uuid,
                                    param->fp_num_obds,
                                    &param->fp_obd_indexes,
                                    &param->fp_obd_index, LOV_TYPE);
@@ -4624,12 +4698,12 @@ static int fget_projid(int fd, int *projid)
        return 0;
 }
 
-static int cb_find_init(char *path, DIR *parent, DIR **dirp,
+static int cb_find_init(char *path, int p, int *dp,
                        void *data, struct dirent64 *de)
 {
        struct find_param *param = (struct find_param *)data;
        struct lov_user_mds_data *lmd = param->fp_lmd;
-       DIR *dir = dirp == NULL ? NULL : *dirp;
+       int d = dp == NULL ? -1 : *dp;
        int decision = 1; /* 1 is accepted; -1 is rejected. */
        int lustre_fs = 1;
        int checked_type = 0;
@@ -4638,7 +4712,7 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp,
        __u64 flags;
        int fd = -2;
 
-       if (parent == NULL && dir == NULL)
+       if (p == -1 && d == -1)
                return -EINVAL;
 
        /* If a regular expression is presented, make the initial decision */
@@ -4684,10 +4758,10 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp,
                decision = 0;
 
        if (decision == 0) {
-               if (dir && (param->fp_check_mdt_count ||
+               if (d != -1 && (param->fp_check_mdt_count ||
                    param->fp_check_hash_type || param->fp_check_foreign)) {
                        param->fp_get_lmv = 1;
-                       ret = cb_get_dirstripe(path, dir, param);
+                       ret = cb_get_dirstripe(path, &d, param);
                        if (ret != 0) {
                                /*
                                 * XXX this works to decide for foreign
@@ -4704,8 +4778,8 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp,
                }
 
                param->fp_lmd->lmd_lmm.lmm_magic = 0;
-               ret = get_lmd_info(path, parent, dir, param->fp_lmd,
-                                  param->fp_lum_size, GET_LMD_INFO);
+               ret = get_lmd_info_fd(path, p, d, param->fp_lmd,
+                                     param->fp_lum_size, GET_LMD_INFO);
                if (ret == 0 && param->fp_lmd->lmd_lmm.lmm_magic == 0 &&
                    find_check_lmm_info(param)) {
                        struct lov_user_md *lmm = &param->fp_lmd->lmd_lmm;
@@ -4724,8 +4798,8 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp,
                        lmm->lmm_stripe_offset = -1;
                }
                if (ret == 0 && param->fp_mdt_uuid != NULL) {
-                       if (dir != NULL) {
-                               ret = llapi_file_fget_mdtidx(dirfd(dir),
+                       if (d != -1) {
+                               ret = llapi_file_fget_mdtidx(d,
                                                     &param->fp_file_mdt_index);
                        } else if (S_ISREG(lmd->lmd_stx.stx_mode)) {
                                /*
@@ -4748,7 +4822,7 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp,
                                 * For a special file, we assume it resides on
                                 * the same MDT as the parent directory.
                                 */
-                               ret = llapi_file_fget_mdtidx(dirfd(parent),
+                               ret = llapi_file_fget_mdtidx(p,
                                                     &param->fp_file_mdt_index);
                        }
                }
@@ -4787,7 +4861,7 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp,
                }
 
                if (lustre_fs && !param->fp_got_uuids) {
-                       ret = setup_target_indexes(dir ? dir : parent, path,
+                       ret = setup_target_indexes((d != -1) ? d : p, path,
                                                   param);
                        if (ret)
                                goto out;
@@ -5031,10 +5105,10 @@ obd_matches:
                if (param->fp_mdt_index != OBD_NOT_FOUND)
                        print_failed_tgt(param, path, LL_STATFS_LMV);
 
-               if (dir != NULL)
-                       ret = fstat_f(dirfd(dir), &st);
+               if (d != -1)
+                       ret = fstat_f(d, &st);
                else if (de != NULL)
-                       ret = fstatat_f(dirfd(parent), de->d_name, &st,
+                       ret = fstatat_f(p, de->d_name, &st,
                                        AT_SYMLINK_NOFOLLOW);
                else
                        ret = lstat_f(path, &st);
@@ -5112,34 +5186,33 @@ out:
        return ret;
 }
 
-static int cb_migrate_mdt_init(char *path, DIR *parent, DIR **dirp,
+static int cb_migrate_mdt_init(char *path, int p, int *dp,
                               void *param_data, struct dirent64 *de)
 {
        struct find_param *param = (struct find_param *)param_data;
        struct lmv_user_md *lmu = param->fp_lmv_md;
-       DIR *tmp_parent = parent;
+       int tmp_p = p;
        char raw[MAX_IOC_BUFLEN] = {'\0'};
        char *rawbuf = raw;
        struct obd_ioctl_data data = { 0 };
-       int fd;
        int ret;
        char *path_copy;
        char *filename;
        bool retry = false;
 
-       if (parent == NULL && dirp == NULL)
+       if (p == -1 && dp == NULL)
                return -EINVAL;
 
        if (!lmu)
                return -EINVAL;
 
-       if (dirp != NULL)
-               closedir(*dirp);
+       if (dp != NULL && *dp != -1)
+               close(*dp);
 
-       if (parent == NULL) {
-               tmp_parent = opendir_parent(path);
-               if (tmp_parent == NULL) {
-                       *dirp = NULL;
+       if (p == -1) {
+               tmp_p = open_parent(path);
+               if (tmp_p == -1) {
+                       *dp = -1;
                        ret = -errno;
                        llapi_error(LLAPI_MSG_ERROR, ret,
                                    "can not open %s", path);
@@ -5147,8 +5220,6 @@ static int cb_migrate_mdt_init(char *path, DIR *parent, DIR **dirp,
                }
        }
 
-       fd = dirfd(tmp_parent);
-
        path_copy = strdup(path);
        filename = basename(path_copy);
 
@@ -5165,7 +5236,7 @@ static int cb_migrate_mdt_init(char *path, DIR *parent, DIR **dirp,
        }
 
 migrate:
-       ret = ioctl(fd, LL_IOC_MIGRATE, rawbuf);
+       ret = ioctl(tmp_p, LL_IOC_MIGRATE, rawbuf);
        if (ret != 0) {
                if (errno == EBUSY && !retry) {
                        /*
@@ -5198,7 +5269,7 @@ migrate:
        }
 
 out:
-       if (dirp != NULL) {
+       if (dp != NULL) {
                /*
                 * If the directory is being migration, we need
                 * close the directory after migration,
@@ -5206,16 +5277,16 @@ out:
                 * on the client side, and re-open to get the
                 * new directory handle
                 */
-               *dirp = opendir(path);
-               if (*dirp == NULL) {
+               *dp = open(path, O_RDONLY|O_NDELAY|O_DIRECTORY);
+               if (*dp == -1) {
                        ret = -errno;
                        llapi_error(LLAPI_MSG_ERROR, ret,
                                    "%s: Failed to open '%s'", __func__, path);
                }
        }
 
-       if (parent == NULL)
-               closedir(tmp_parent);
+       if (p == -1)
+               close(tmp_p);
 
        free(path_copy);
 
@@ -5223,7 +5294,7 @@ out:
 }
 
 /* dir migration finished, shrink its stripes */
-static int cb_migrate_mdt_fini(char *path, DIR *parent, DIR **dirp, void *data,
+static int cb_migrate_mdt_fini(char *path, int p, int *dp, void *data,
                               struct dirent64 *de)
 {
        struct find_param *param = data;
@@ -5234,13 +5305,13 @@ static int cb_migrate_mdt_fini(char *path, DIR *parent, DIR **dirp, void *data,
        if (de && de->d_type != DT_DIR)
                goto out;
 
-       if (*dirp) {
+       if (*dp != -1) {
                /*
                 * close it before setxattr because the latter may destroy the
                 * original object, and cause close fail.
                 */
-               ret = closedir(*dirp);
-               *dirp = NULL;
+               ret = close(*dp);
+               *dp = -1;
                if (ret)
                        goto out;
        }
@@ -5249,7 +5320,7 @@ static int cb_migrate_mdt_fini(char *path, DIR *parent, DIR **dirp, void *data,
        if (ret == -EALREADY)
                ret = 0;
 out:
-       cb_common_fini(path, parent, dirp, data, de);
+       cb_common_fini(path, p, dp, data, de);
        return ret;
 }
 
@@ -5291,20 +5362,20 @@ int llapi_file_fget_mdtidx(int fd, int *mdtidx)
        return 0;
 }
 
-static int cb_get_mdt_index(char *path, DIR *parent, DIR **dirp, void *data,
+static int cb_get_mdt_index(char *path, int p, int *dp, void *data,
                            struct dirent64 *de)
 {
        struct find_param *param = (struct find_param *)data;
-       DIR *d = dirp == NULL ? NULL : *dirp;
+       int d = dp == NULL ? -1 : *dp;
        int ret;
        int mdtidx;
 
-       if (parent == NULL && d == NULL)
+       if (p == -1 && d == -1)
                return -EINVAL;
 
-       if (d != NULL) {
-               ret = llapi_file_fget_mdtidx(dirfd(d), &mdtidx);
-       } else /* if (parent) */ {
+       if (d != -1) {
+               ret = llapi_file_fget_mdtidx(d, &mdtidx);
+       } else /* if (p != -1) */ {
                int fd;
 
                fd = open(path, O_RDONLY | O_NOCTTY);
@@ -5355,34 +5426,49 @@ out:
        return 0;
 }
 
-static int cb_getstripe(char *path, DIR *parent, DIR **dirp, void *data,
+static int cb_getstripe(char *path, int p, int *dp, void *data,
                        struct dirent64 *de)
 {
        struct find_param *param = (struct find_param *)data;
-       DIR *d = dirp == NULL ? NULL : *dirp;
+       int d = dp == NULL ? -1 : *dp;
        int ret = 0;
 
-       if (parent == NULL && d == NULL)
+       if (p == -1 && d == -1)
                return -EINVAL;
 
        if (param->fp_obd_uuid) {
                param->fp_quiet = 1;
-               ret = setup_obd_uuid(d ? dirfd(d) : dirfd(parent), path, param);
+               ret = setup_obd_uuid(d != -1 ? d : p, path, param);
                if (ret)
                        return ret;
        }
 
-       if (d && (param->fp_get_lmv || param->fp_get_default_lmv))
-               ret = cb_get_dirstripe(path, d, param);
-       else if (d ||
-                (parent && !param->fp_get_lmv && !param->fp_get_default_lmv))
-               ret = get_lmd_info(path, parent, d, &param->fp_lmd->lmd_lmm,
-                                  param->fp_lum_size, GET_LMD_STRIPE);
-       else
+       if (d != -1 && (param->fp_get_lmv || param->fp_get_default_lmv))
+               ret = cb_get_dirstripe(path, &d, param);
+       else if (d != -1 ||
+                (p != -1 && !param->fp_get_lmv && !param->fp_get_default_lmv))
+               ret = get_lmd_info_fd(path, p, d, &param->fp_lmd->lmd_lmm,
+                                     param->fp_lum_size, GET_LMD_STRIPE);
+       else if (d == -1 && (param->fp_get_lmv || param->fp_get_default_lmv)) {
+               /* in case of a dangling or valid faked symlink dir, opendir()
+                * should have return either EINVAL or ENOENT, so let's try
+                * to get LMV just in case, and by opening it as a file but
+                * with O_NOFOLLOW ...
+                */
+               int fd = open(path, O_RDONLY | O_NOFOLLOW);
+
+               if (fd == -1)
+                       return 0;
+               ret = cb_get_dirstripe(path, &fd, param);
+               if (ret == 0)
+                       llapi_lov_dump_user_lmm(param, path, LDF_IS_DIR);
+               close(fd);
+               return 0;
+       } else
                return 0;
 
        if (ret) {
-               if (errno == ENODATA && d != NULL) {
+               if (errno == ENODATA && d != -1) {
                        /*
                         * We need to "fake" the "use the default" values
                         * since the lmm struct is zeroed out at this point.
@@ -5402,7 +5488,7 @@ static int cb_getstripe(char *path, DIR *parent, DIR **dirp, void *data,
                                struct lmv_user_md *lum = param->fp_lmv_md;
                                int mdtidx;
 
-                               ret = llapi_file_fget_mdtidx(dirfd(d), &mdtidx);
+                               ret = llapi_file_fget_mdtidx(d, &mdtidx);
                                if (ret != 0)
                                        goto err_out;
                                lum->lum_magic = LMV_MAGIC_V1;
@@ -5422,7 +5508,7 @@ static int cb_getstripe(char *path, DIR *parent, DIR **dirp, void *data,
                                lmm->lmm_stripe_offset = -1;
                                goto dump;
                        }
-               } else if (errno == ENODATA && parent != NULL) {
+               } else if (errno == ENODATA && p != -1) {
                        if (!param->fp_obd_uuid && !param->fp_mdt_uuid)
                                llapi_printf(LLAPI_MSG_NORMAL,
                                             "%s has no stripe info\n", path);
@@ -5442,8 +5528,9 @@ static int cb_getstripe(char *path, DIR *parent, DIR **dirp, void *data,
 err_out:
                        llapi_error(LLAPI_MSG_ERROR, ret,
                                    "error: %s: %s failed for %s",
-                                    __func__, d ? "LL_IOC_LOV_GETSTRIPE" :
-                                   "IOC_MDC_GETFILESTRIPE", path);
+                                    __func__, d != -1 ?
+                                              "LL_IOC_LOV_GETSTRIPE" :
+                                              "IOC_MDC_GETFILESTRIPE", path);
                }
 
                return ret;
@@ -5451,7 +5538,7 @@ err_out:
 
 dump:
        if (!(param->fp_verbose & VERBOSE_MDTINDEX))
-               llapi_lov_dump_user_lmm(param, path, d ? LDF_IS_DIR : 0);
+               llapi_lov_dump_user_lmm(param, path, d != -1 ? LDF_IS_DIR : 0);
 
 out:
        /* Do not get down anymore? */
index e8af531..60ded02 100644 (file)
@@ -41,6 +41,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
+#include <libgen.h> /* for dirname() */
 #include <lustre/lustreapi.h>
 #include <linux/lustre/lustre_ver.h>   /* only until LUSTRE_VERSION_CODE is gone */
 #include "lustreapi_internal.h"
@@ -312,6 +313,92 @@ retry_open:
        return rc ? -errno : 0;
 }
 
+int llapi_direntry_remove(char *dname)
+{
+       char *dirpath = NULL;
+       char *namepath = NULL;
+       char *dir;
+       char *filename;
+       int fd = -1;
+       int rc = 0;
+
+       dirpath = strdup(dname);
+       namepath = strdup(dname);
+       if (!dirpath || !namepath)
+               return -ENOMEM;
+
+       filename = basename(namepath);
+
+       dir = dirname(dirpath);
+
+       fd = open(dir, O_DIRECTORY | O_RDONLY);
+       if (fd < 0) {
+               rc = -errno;
+               llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'",
+                           filename);
+               goto out;
+       }
+
+       if (ioctl(fd, LL_IOC_REMOVE_ENTRY, filename))
+               llapi_error(LLAPI_MSG_ERROR, errno,
+                           "error on ioctl %#lx for '%s' (%d)",
+                           (long)LL_IOC_LMV_SETSTRIPE, filename, fd);
+out:
+       free(dirpath);
+       free(namepath);
+       if (fd != -1)
+               close(fd);
+       return rc;
+}
+
+int llapi_unlink_foreign(char *name)
+{
+       int fd = -1;
+       int rc = 0;
+
+       fd = open(name, O_DIRECTORY | O_RDONLY | O_NOFOLLOW);
+       if (fd < 0 && errno != ENOTDIR) {
+               rc = -errno;
+               llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'", name);
+               goto out;
+       } else if (errno == ENOTDIR) {
+               fd = open(name, O_RDONLY | O_NOFOLLOW);
+               if (fd < 0) {
+                       rc = -errno;
+                       llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'",
+                                   name);
+                       goto out;
+               }
+       }
+
+       /* allow foreign symlink file/dir to be unlinked */
+       if (ioctl(fd, LL_IOC_UNLOCK_FOREIGN)) {
+               llapi_error(LLAPI_MSG_ERROR, errno,
+                           "error on ioctl %#lx for '%s' (%d)",
+                           (long)LL_IOC_UNLOCK_FOREIGN, name, fd);
+               rc = -errno;
+       }
+
+       /* XXX do not set AT_REMOVEDIR in flags even for a dir, as due to the
+        * hack for foreign symlink it will fail the directory check in
+        * Kernel's syscall code and return ENOTDIR, so treat all as files
+        */
+       rc = unlinkat(AT_FDCWD, name, 0);
+       if (rc == -1 && errno == EISDIR)
+               rc = unlinkat(AT_FDCWD, name, AT_REMOVEDIR);
+
+       if (rc == -1) {
+               llapi_error(LLAPI_MSG_ERROR, errno,
+                           "error on unlinkat for '%s' (%d)", name, fd);
+               rc = -errno;
+       }
+
+out:
+       if (fd != -1)
+               close(fd);
+       return rc;
+}
+
 int llapi_get_fsname_instance(const char *path, char *fsname, size_t fsname_len,
                              char *instance, size_t instance_len)
 {