From 7681cdd22debca79c149e21d74b4e1ef508fead0 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Mon, 18 Nov 2013 02:47:26 -0700 Subject: [PATCH] LU-4209 utils: fix O_TMPFILE/O_LOV_DELAY_CREATE conflict In kernel 3.11 O_TMPFILE was introduced, but the open flag value conflicts with the O_LOV_DELAY_CREATE flag 020000000 added to fix LU-812 in Lustre 2.4. O_LOV_DELAY_CREATE allows applications to defer file layout and object creation from open time (the default) until it can instead be specified by the application using an ioctl. Instead of trying to find a non-conflicting O_LOV_DELAY_CREATE flag or define a Lustre-specific flag that isn't of use to most/any other filesystems, use (O_NOCTTY|FASYNC) as the new value. These flags are not meaningful for newly-created regular files and should be ok since O_LOV_DELAY_CREATE is only meaningful for new files. I looked into using O_ACCMODE/FMODE_WRITE_IOCTL, which allows calling ioctl() on the minimally-opened fd and is close to what is needed, but that doesn't allow specifying the actual read or write mode for the file, and fcntl(F_SETFL) doesn't allow O_RDONLY/O_WRONLY/O_RDWR to be set after the file is opened. We will keep the 0100000000 flag for backward compatibility until 3.13 is the oldest client kernel that is supported, but drop the conflicting __O_TMPFILE value of 02000000 since that will cause an error when running on newer kernels. The 020000000 has only been used since Lustre 2.4.0 and always in conjunction with 0100000000, so any apps that used O_LOV_DELAY_CREATE directly instead of calling llapi_file_create*() will still work until Linux 3.13 is used. Signed-off-by: Andreas Dilger Change-Id: I565f3454616edc60c6acee01034aa5d773500c1e Reviewed-on: http://review.whamcloud.com/8312 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Bob Glossman Reviewed-by: Peng Tao Reviewed-by: Oleg Drokin --- libcfs/include/libcfs/darwin/darwin-fs.h | 6 ++-- lustre/include/lustre/lustre_user.h | 17 +++++----- lustre/include/lustre_mdc.h | 14 +++++++++ lustre/liblustre/file.c | 13 +++++--- lustre/llite/file.c | 37 +++++++++++----------- lustre/mdc/mdc_lib.c | 38 +++++++++++------------ lustre/utils/liblustreapi.c | 53 +++++++++++++++++--------------- 7 files changed, 102 insertions(+), 76 deletions(-) diff --git a/libcfs/include/libcfs/darwin/darwin-fs.h b/libcfs/include/libcfs/darwin/darwin-fs.h index 9f71d6a..177f7d0 100644 --- a/libcfs/include/libcfs/darwin/darwin-fs.h +++ b/libcfs/include/libcfs/darwin/darwin-fs.h @@ -81,9 +81,9 @@ * Kernel file descriptor */ struct file { - int f_flags; - vnode_t f_vp; - vfs_context_t f_ctxt; + unsigned f_flags; + vnode_t f_vp; + vfs_context_t f_ctxt; }; #endif diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index cca3216..e9b205b 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -284,13 +284,16 @@ struct ost_id { #define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */ -/* Hopefully O_LOV_DELAY_CREATE does not conflict with standard O_xxx flags. - * Previously it was defined as 0100000000 and conflicts with FMODE_NONOTIFY - * which was added since kernel 2.6.36, so we redefine it as 020000000. - * To be compatible with old version's statically linked binary, finally we - * define it as (020000000 | 0100000000). - * */ -#define O_LOV_DELAY_CREATE 0120000000 +/* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular + * files, but are unlikely to be used in practice and are not harmful if + * used incorrectly. O_NOCTTY and FASYNC are only meaningful for character + * devices and are safe for use on new files. See LU-4209. */ +/* To be compatible with old statically linked binary we keep the check for + * the older 0100000000 flag. This is already removed upstream. LU-812. */ +#define O_LOV_DELAY_CREATE_1_8 0100000000 /* FMODE_NONOTIFY masked in 2.6.36 */ +#define O_LOV_DELAY_CREATE_MASK (O_NOCTTY | FASYNC) +#define O_LOV_DELAY_CREATE (O_LOV_DELAY_CREATE_1_8 | \ + O_LOV_DELAY_CREATE_MASK) #define LL_FILE_IGNORE_LOCK 0x00000001 #define LL_FILE_GROUP_LOCKED 0x00000002 diff --git a/lustre/include/lustre_mdc.h b/lustre/include/lustre_mdc.h index e7152d8..3c7b6f0 100644 --- a/lustre/include/lustre_mdc.h +++ b/lustre/include/lustre_mdc.h @@ -197,6 +197,20 @@ int mdc_sendpage(struct obd_export *exp, const struct lu_fid *fid, const struct page *page, int offset); #endif +static inline bool cl_is_lov_delay_create(unsigned int flags) +{ + return (flags & O_LOV_DELAY_CREATE_1_8) != 0 || + (flags & O_LOV_DELAY_CREATE_MASK) == O_LOV_DELAY_CREATE_MASK; +} + +static inline void cl_lov_delay_create_clear(unsigned int *flags) +{ + if ((*flags & O_LOV_DELAY_CREATE_1_8) != 0) + *flags &= ~O_LOV_DELAY_CREATE_1_8; + if ((*flags & O_LOV_DELAY_CREATE_MASK) == O_LOV_DELAY_CREATE_MASK) + *flags &= ~O_LOV_DELAY_CREATE_MASK; +} + /** @} mdc */ #endif diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index be3bdd5..13350c2 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -212,10 +212,15 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) if (!S_ISREG(st->st_mode)) GOTO(out_release, rc = 0); - if (lli->lli_has_smd) - flags &= ~O_LOV_DELAY_CREATE; - /*XXX: open_flags are overwritten and the previous ones are lost */ - lli->lli_open_flags = flags & ~(O_CREAT | O_EXCL | O_TRUNC); + if (lli->lli_has_smd && cl_is_lov_delay_create(flags)) { + /* a bit ugly, but better than changing the open() API */ + unsigned int tmp_flags = flags; + + cl_lov_delay_create_clear(&tmp_flags); + flags = tmp_flags; + } + /*XXX: open_flags are overwritten and the previous ones are lost */ + lli->lli_open_flags = flags & ~(O_CREAT | O_EXCL | O_TRUNC); out_release: request = it->d.lustre.it_data; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index f57b204..a9f1314 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -695,15 +695,14 @@ restart: ll_capa_open(inode); - if (!lli->lli_has_smd) { - if (file->f_flags & O_LOV_DELAY_CREATE || - !(file->f_mode & FMODE_WRITE)) { - CDEBUG(D_INODE, "object creation was delayed\n"); - GOTO(out_och_free, rc); - } - } - file->f_flags &= ~O_LOV_DELAY_CREATE; - GOTO(out_och_free, rc); + if (!lli->lli_has_smd && + (cl_is_lov_delay_create(file->f_flags) || + (file->f_mode & FMODE_WRITE) == 0)) { + CDEBUG(D_INODE, "object creation was delayed\n"); + GOTO(out_och_free, rc); + } + cl_lov_delay_create_clear(&file->f_flags); + GOTO(out_och_free, rc); out_och_free: if (rc) { @@ -1503,23 +1502,25 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, ccc_inode_lsm_put(inode, lsm); CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n", PFID(ll_inode2fid(inode))); - RETURN(-EEXIST); + GOTO(out, rc = -EEXIST); } ll_inode_size_lock(inode); - rc = ll_intent_file_open(file, lum, lum_size, &oit); - if (rc) - GOTO(out, rc); - rc = oit.d.lustre.it_status; - if (rc < 0) - GOTO(out_req_free, rc); + rc = ll_intent_file_open(file, lum, lum_size, &oit); + if (rc) + GOTO(out_unlock, rc); + rc = oit.d.lustre.it_status; + if (rc < 0) + GOTO(out_req_free, rc); - ll_release_openhandle(file->f_dentry, &oit); + ll_release_openhandle(file->f_dentry, &oit); - out: +out_unlock: ll_inode_size_unlock(inode); ll_intent_release(&oit); ccc_inode_lsm_put(inode, lsm); +out: + cl_lov_delay_create_clear(&file->f_flags); RETURN(rc); out_req_free: ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data); diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index 544e816..3de8ca5 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -194,29 +194,29 @@ static __u64 mds_pack_open_flags(__u64 flags, __u32 mode) MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE | MDS_OPEN_RELEASE)); - if (flags & O_CREAT) - cr_flags |= MDS_OPEN_CREAT; - if (flags & O_EXCL) - cr_flags |= MDS_OPEN_EXCL; - if (flags & O_TRUNC) - cr_flags |= MDS_OPEN_TRUNC; - if (flags & O_APPEND) - cr_flags |= MDS_OPEN_APPEND; - if (flags & O_SYNC) - cr_flags |= MDS_OPEN_SYNC; - if (flags & O_DIRECTORY) - cr_flags |= MDS_OPEN_DIRECTORY; + if (flags & O_CREAT) + cr_flags |= MDS_OPEN_CREAT; + if (flags & O_EXCL) + cr_flags |= MDS_OPEN_EXCL; + if (flags & O_TRUNC) + cr_flags |= MDS_OPEN_TRUNC; + if (flags & O_APPEND) + cr_flags |= MDS_OPEN_APPEND; + if (flags & O_SYNC) + cr_flags |= MDS_OPEN_SYNC; + if (flags & O_DIRECTORY) + cr_flags |= MDS_OPEN_DIRECTORY; #ifdef FMODE_EXEC - if (flags & FMODE_EXEC) - cr_flags |= MDS_FMODE_EXEC; + if (flags & FMODE_EXEC) + cr_flags |= MDS_FMODE_EXEC; #endif - if (flags & O_LOV_DELAY_CREATE) - cr_flags |= MDS_OPEN_DELAY_CREATE; + if (cl_is_lov_delay_create(flags)) + cr_flags |= MDS_OPEN_DELAY_CREATE; - if ((flags & O_NOACCESS) || (flags & O_NONBLOCK)) - cr_flags |= MDS_OPEN_NORESTORE; + if ((flags & O_NOACCESS) || (flags & O_NONBLOCK)) + cr_flags |= MDS_OPEN_NORESTORE; - return cr_flags; + return cr_flags; } /* packing of MDS records */ diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 0a9f684..04827bb 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -657,12 +657,11 @@ int llapi_search_ost(char *fsname, char *poolname, char *ostname) } int llapi_file_open_pool(const char *name, int flags, int mode, - unsigned long long stripe_size, int stripe_offset, - int stripe_count, int stripe_pattern, char *pool_name) + unsigned long long stripe_size, int stripe_offset, + int stripe_count, int stripe_pattern, char *pool_name) { - struct lov_user_md_v3 lum = { 0 }; - int fd, rc = 0; - int isdir = 0; + struct lov_user_md_v3 lum = { 0 }; + int fd, rc = 0; /* Make sure we have a good pool */ if (pool_name != NULL) { @@ -701,11 +700,14 @@ int llapi_file_open_pool(const char *name, int flags, int mode, } } - fd = open(name, flags | O_LOV_DELAY_CREATE, mode); - if (fd < 0 && errno == EISDIR) { - fd = open(name, O_DIRECTORY | O_RDONLY); - isdir++; - } +retry_open: + fd = open(name, flags | O_LOV_DELAY_CREATE, mode); + if (fd < 0) { + if (errno == EISDIR && !(flags & O_DIRECTORY)) { + flags = O_DIRECTORY | O_RDONLY; + goto retry_open; + } + } if (fd < 0) { rc = -errno; @@ -4290,12 +4292,12 @@ int llapi_get_data_version(int fd, __u64 *data_version, __u64 flags) * - file modes are rw-------, if user wants another one it must use fchmod() * \param directory Directory where the file is created * \param idx MDT index on which the file is created - * \param flags Std open flags + * \param open_flags Standard open flags * * \retval 0 on success. * \retval -errno on error. */ -int llapi_create_volatile_idx(char *directory, int idx, int mode) +int llapi_create_volatile_idx(char *directory, int idx, int open_flags) { char file_path[PATH_MAX]; char filename[PATH_MAX]; @@ -4329,7 +4331,7 @@ int llapi_create_volatile_idx(char *directory, int idx, int mode) if (rc >= sizeof(file_path)) return -E2BIG; - fd = open(file_path, (O_RDWR | O_CREAT | mode), (S_IRUSR | S_IWUSR)); + fd = open(file_path, O_RDWR | O_CREAT | open_flags, S_IRUSR | S_IWUSR); if (fd < 0) { llapi_error(LLAPI_MSG_ERROR, errno, "Cannot create volatile file %s in %s\n", @@ -4374,28 +4376,29 @@ int llapi_swap_layouts(const char *path1, const char *path2, fd1 = open(path1, O_WRONLY | O_LOV_DELAY_CREATE); if (fd1 < 0) { - llapi_error(LLAPI_MSG_ERROR, -errno, - "error: cannot open for write %s", - path1); - return -errno; + rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, + "error: cannot open '%s' for write", path1); + goto out; } fd2 = open(path2, O_WRONLY | O_LOV_DELAY_CREATE); if (fd2 < 0) { - llapi_error(LLAPI_MSG_ERROR, -errno, - "error: cannot open for write %s", - path2); - close(fd1); - return -errno; + rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, + "error: cannot open '%s' for write", path2); + goto out_close; } rc = llapi_fswap_layouts(fd1, fd2, dv1, dv2, flags); if (rc < 0) llapi_error(LLAPI_MSG_ERROR, rc, - "error: cannot swap layouts between %s and %s\n", - path1, path2); + "error: cannot swap layout between '%s' and '%s'\n", + path1, path2); - close(fd1); close(fd2); +out_close: + close(fd1); +out: return rc; } -- 1.8.3.1