In kernel 3.11 O_TMPFILE was introduced, but the open flag value
conflicts with the O_LOV_DELAY_CREATE flag
020000000 added to fix
LU-812 in Lustre 2.4. O_LOV_DELAY_CREATE allows applications
to defer file layout and object creation from open time (the default)
until it can instead be specified by the application using an ioctl.
Instead of trying to find a non-conflicting O_LOV_DELAY_CREATE flag
or define a Lustre-specific flag that isn't of use to most/any other
filesystems, use (O_NOCTTY|FASYNC) as the new value. These flags
are not meaningful for newly-created regular files and should be
ok since O_LOV_DELAY_CREATE is only meaningful for new files.
I looked into using O_ACCMODE/FMODE_WRITE_IOCTL, which allows calling
ioctl() on the minimally-opened fd and is close to what is needed,
but that doesn't allow specifying the actual read or write mode for
the file, and fcntl(F_SETFL) doesn't allow O_RDONLY/O_WRONLY/O_RDWR
to be set after the file is opened.
We will keep the
0100000000 flag for backward compatibility until
3.13 is the oldest client kernel that is supported, but drop the
conflicting __O_TMPFILE value of
02000000 since that will cause an
error when running on newer kernels. The
020000000 has only been
used since Lustre 2.4.0 and always in conjunction with
0100000000,
so any apps that used O_LOV_DELAY_CREATE directly instead of calling
llapi_file_create*() will still work until Linux 3.13 is used.
Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Change-Id: I565f3454616edc60c6acee01034aa5d773500c1e
Reviewed-on: http://review.whamcloud.com/8312
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Bob Glossman <bob.glossman@intel.com>
Reviewed-by: Peng Tao <bergwolf@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
* Kernel file descriptor
*/
struct file {
- int f_flags;
- vnode_t f_vp;
- vfs_context_t f_ctxt;
+ unsigned f_flags;
+ vnode_t f_vp;
+ vfs_context_t f_ctxt;
};
#endif
#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
-/* Hopefully O_LOV_DELAY_CREATE does not conflict with standard O_xxx flags.
- * Previously it was defined as 0100000000 and conflicts with FMODE_NONOTIFY
- * which was added since kernel 2.6.36, so we redefine it as 020000000.
- * To be compatible with old version's statically linked binary, finally we
- * define it as (020000000 | 0100000000).
- * */
-#define O_LOV_DELAY_CREATE 0120000000
+/* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular
+ * files, but are unlikely to be used in practice and are not harmful if
+ * used incorrectly. O_NOCTTY and FASYNC are only meaningful for character
+ * devices and are safe for use on new files. See LU-4209. */
+/* To be compatible with old statically linked binary we keep the check for
+ * the older 0100000000 flag. This is already removed upstream. LU-812. */
+#define O_LOV_DELAY_CREATE_1_8 0100000000 /* FMODE_NONOTIFY masked in 2.6.36 */
+#define O_LOV_DELAY_CREATE_MASK (O_NOCTTY | FASYNC)
+#define O_LOV_DELAY_CREATE (O_LOV_DELAY_CREATE_1_8 | \
+ O_LOV_DELAY_CREATE_MASK)
#define LL_FILE_IGNORE_LOCK 0x00000001
#define LL_FILE_GROUP_LOCKED 0x00000002
const struct page *page, int offset);
#endif
+static inline bool cl_is_lov_delay_create(unsigned int flags)
+{
+ return (flags & O_LOV_DELAY_CREATE_1_8) != 0 ||
+ (flags & O_LOV_DELAY_CREATE_MASK) == O_LOV_DELAY_CREATE_MASK;
+}
+
+static inline void cl_lov_delay_create_clear(unsigned int *flags)
+{
+ if ((*flags & O_LOV_DELAY_CREATE_1_8) != 0)
+ *flags &= ~O_LOV_DELAY_CREATE_1_8;
+ if ((*flags & O_LOV_DELAY_CREATE_MASK) == O_LOV_DELAY_CREATE_MASK)
+ *flags &= ~O_LOV_DELAY_CREATE_MASK;
+}
+
/** @} mdc */
#endif
if (!S_ISREG(st->st_mode))
GOTO(out_release, rc = 0);
- if (lli->lli_has_smd)
- flags &= ~O_LOV_DELAY_CREATE;
- /*XXX: open_flags are overwritten and the previous ones are lost */
- lli->lli_open_flags = flags & ~(O_CREAT | O_EXCL | O_TRUNC);
+ if (lli->lli_has_smd && cl_is_lov_delay_create(flags)) {
+ /* a bit ugly, but better than changing the open() API */
+ unsigned int tmp_flags = flags;
+
+ cl_lov_delay_create_clear(&tmp_flags);
+ flags = tmp_flags;
+ }
+ /*XXX: open_flags are overwritten and the previous ones are lost */
+ lli->lli_open_flags = flags & ~(O_CREAT | O_EXCL | O_TRUNC);
out_release:
request = it->d.lustre.it_data;
ll_capa_open(inode);
- if (!lli->lli_has_smd) {
- if (file->f_flags & O_LOV_DELAY_CREATE ||
- !(file->f_mode & FMODE_WRITE)) {
- CDEBUG(D_INODE, "object creation was delayed\n");
- GOTO(out_och_free, rc);
- }
- }
- file->f_flags &= ~O_LOV_DELAY_CREATE;
- GOTO(out_och_free, rc);
+ if (!lli->lli_has_smd &&
+ (cl_is_lov_delay_create(file->f_flags) ||
+ (file->f_mode & FMODE_WRITE) == 0)) {
+ CDEBUG(D_INODE, "object creation was delayed\n");
+ GOTO(out_och_free, rc);
+ }
+ cl_lov_delay_create_clear(&file->f_flags);
+ GOTO(out_och_free, rc);
out_och_free:
if (rc) {
ccc_inode_lsm_put(inode, lsm);
CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
PFID(ll_inode2fid(inode)));
- RETURN(-EEXIST);
+ GOTO(out, rc = -EEXIST);
}
ll_inode_size_lock(inode);
- rc = ll_intent_file_open(file, lum, lum_size, &oit);
- if (rc)
- GOTO(out, rc);
- rc = oit.d.lustre.it_status;
- if (rc < 0)
- GOTO(out_req_free, rc);
+ rc = ll_intent_file_open(file, lum, lum_size, &oit);
+ if (rc)
+ GOTO(out_unlock, rc);
+ rc = oit.d.lustre.it_status;
+ if (rc < 0)
+ GOTO(out_req_free, rc);
- ll_release_openhandle(file->f_dentry, &oit);
+ ll_release_openhandle(file->f_dentry, &oit);
- out:
+out_unlock:
ll_inode_size_unlock(inode);
ll_intent_release(&oit);
ccc_inode_lsm_put(inode, lsm);
+out:
+ cl_lov_delay_create_clear(&file->f_flags);
RETURN(rc);
out_req_free:
ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |
MDS_OPEN_BY_FID | MDS_OPEN_LEASE |
MDS_OPEN_RELEASE));
- if (flags & O_CREAT)
- cr_flags |= MDS_OPEN_CREAT;
- if (flags & O_EXCL)
- cr_flags |= MDS_OPEN_EXCL;
- if (flags & O_TRUNC)
- cr_flags |= MDS_OPEN_TRUNC;
- if (flags & O_APPEND)
- cr_flags |= MDS_OPEN_APPEND;
- if (flags & O_SYNC)
- cr_flags |= MDS_OPEN_SYNC;
- if (flags & O_DIRECTORY)
- cr_flags |= MDS_OPEN_DIRECTORY;
+ if (flags & O_CREAT)
+ cr_flags |= MDS_OPEN_CREAT;
+ if (flags & O_EXCL)
+ cr_flags |= MDS_OPEN_EXCL;
+ if (flags & O_TRUNC)
+ cr_flags |= MDS_OPEN_TRUNC;
+ if (flags & O_APPEND)
+ cr_flags |= MDS_OPEN_APPEND;
+ if (flags & O_SYNC)
+ cr_flags |= MDS_OPEN_SYNC;
+ if (flags & O_DIRECTORY)
+ cr_flags |= MDS_OPEN_DIRECTORY;
#ifdef FMODE_EXEC
- if (flags & FMODE_EXEC)
- cr_flags |= MDS_FMODE_EXEC;
+ if (flags & FMODE_EXEC)
+ cr_flags |= MDS_FMODE_EXEC;
#endif
- if (flags & O_LOV_DELAY_CREATE)
- cr_flags |= MDS_OPEN_DELAY_CREATE;
+ if (cl_is_lov_delay_create(flags))
+ cr_flags |= MDS_OPEN_DELAY_CREATE;
- if ((flags & O_NOACCESS) || (flags & O_NONBLOCK))
- cr_flags |= MDS_OPEN_NORESTORE;
+ if ((flags & O_NOACCESS) || (flags & O_NONBLOCK))
+ cr_flags |= MDS_OPEN_NORESTORE;
- return cr_flags;
+ return cr_flags;
}
/* packing of MDS records */
}
int llapi_file_open_pool(const char *name, int flags, int mode,
- unsigned long long stripe_size, int stripe_offset,
- int stripe_count, int stripe_pattern, char *pool_name)
+ unsigned long long stripe_size, int stripe_offset,
+ int stripe_count, int stripe_pattern, char *pool_name)
{
- struct lov_user_md_v3 lum = { 0 };
- int fd, rc = 0;
- int isdir = 0;
+ struct lov_user_md_v3 lum = { 0 };
+ int fd, rc = 0;
/* Make sure we have a good pool */
if (pool_name != NULL) {
}
}
- fd = open(name, flags | O_LOV_DELAY_CREATE, mode);
- if (fd < 0 && errno == EISDIR) {
- fd = open(name, O_DIRECTORY | O_RDONLY);
- isdir++;
- }
+retry_open:
+ fd = open(name, flags | O_LOV_DELAY_CREATE, mode);
+ if (fd < 0) {
+ if (errno == EISDIR && !(flags & O_DIRECTORY)) {
+ flags = O_DIRECTORY | O_RDONLY;
+ goto retry_open;
+ }
+ }
if (fd < 0) {
rc = -errno;
* - file modes are rw-------, if user wants another one it must use fchmod()
* \param directory Directory where the file is created
* \param idx MDT index on which the file is created
- * \param flags Std open flags
+ * \param open_flags Standard open flags
*
* \retval 0 on success.
* \retval -errno on error.
*/
-int llapi_create_volatile_idx(char *directory, int idx, int mode)
+int llapi_create_volatile_idx(char *directory, int idx, int open_flags)
{
char file_path[PATH_MAX];
char filename[PATH_MAX];
if (rc >= sizeof(file_path))
return -E2BIG;
- fd = open(file_path, (O_RDWR | O_CREAT | mode), (S_IRUSR | S_IWUSR));
+ fd = open(file_path, O_RDWR | O_CREAT | open_flags, S_IRUSR | S_IWUSR);
if (fd < 0) {
llapi_error(LLAPI_MSG_ERROR, errno,
"Cannot create volatile file %s in %s\n",
fd1 = open(path1, O_WRONLY | O_LOV_DELAY_CREATE);
if (fd1 < 0) {
- llapi_error(LLAPI_MSG_ERROR, -errno,
- "error: cannot open for write %s",
- path1);
- return -errno;
+ rc = -errno;
+ llapi_error(LLAPI_MSG_ERROR, rc,
+ "error: cannot open '%s' for write", path1);
+ goto out;
}
fd2 = open(path2, O_WRONLY | O_LOV_DELAY_CREATE);
if (fd2 < 0) {
- llapi_error(LLAPI_MSG_ERROR, -errno,
- "error: cannot open for write %s",
- path2);
- close(fd1);
- return -errno;
+ rc = -errno;
+ llapi_error(LLAPI_MSG_ERROR, rc,
+ "error: cannot open '%s' for write", path2);
+ goto out_close;
}
rc = llapi_fswap_layouts(fd1, fd2, dv1, dv2, flags);
if (rc < 0)
llapi_error(LLAPI_MSG_ERROR, rc,
- "error: cannot swap layouts between %s and %s\n",
- path1, path2);
+ "error: cannot swap layout between '%s' and '%s'\n",
+ path1, path2);
- close(fd1);
close(fd2);
+out_close:
+ close(fd1);
+out:
return rc;
}