* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2016, Intel Corporation.
+ * Copyright (c) 2010, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* @{
*/
+#include <linux/kernel.h>
#include <linux/types.h>
#ifdef __KERNEL__
# include <linux/lustre/lustre_fiemap.h>
#endif /* __KERNEL__ */
+/* Handle older distros */
+#ifndef __ALIGN_KERNEL
+# define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+# define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
+#endif
+
#if defined(__cplusplus)
extern "C" {
#endif
};
#define PRJQUOTA 2
-#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \
- defined(__craynv) || defined(__mips64__) || defined(__powerpc64__) || \
- defined(__aarch64__)
-typedef struct stat lstat_t;
-# define lstat_f lstat
-# define fstat_f fstat
-# define fstatat_f fstatat
-# define HAVE_LOV_USER_MDS_DATA
-#elif defined(__USE_LARGEFILE64) || defined(__KERNEL__)
-typedef struct stat64 lstat_t;
-# define lstat_f lstat64
-# define fstat_f fstat64
-# define fstatat_f fstatat64
-# define HAVE_LOV_USER_MDS_DATA
+/*
+ * We need to always use 64bit version because the structure
+ * is shared across entire cluster where 32bit and 64bit machines
+ * are co-existing.
+ */
+#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
+typedef struct stat64 lstat_t;
+#define lstat_f lstat64
+#define fstat_f fstat64
+#define fstatat_f fstatat64
+#else
+typedef struct stat lstat_t;
+#define lstat_f lstat
+#define fstat_f fstat
+#define fstatat_f fstatat
#endif
+#define HAVE_LOV_USER_MDS_DATA
+
#define LUSTRE_EOF 0xffffffffffffffffULL
/* for statfs() */
* under /O/<seq>/d<x>. */
LMAC_STRIPE_INFO = 0x00000010, /* stripe info in the LMA EA. */
LMAC_COMP_INFO = 0x00000020, /* Component info in the LMA EA. */
+ LMAC_IDX_BACKUP = 0x00000040, /* Has index backup. */
};
/**
enum ll_lease_flags {
LL_LEASE_RESYNC = 0x1,
LL_LEASE_RESYNC_DONE = 0x2,
+ LL_LEASE_LAYOUT_MERGE = 0x4,
+ LL_LEASE_LAYOUT_SPLIT = 0x8,
};
#define IOC_IDS_MAX 4096
static inline bool lov_pattern_supported(__u32 pattern)
{
- return pattern == LOV_PATTERN_RAID0 ||
- pattern == LOV_PATTERN_MDT ||
- pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_F_RELEASED);
+ return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 ||
+ (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_MDT;
}
#define LOV_MAXPOOLNAME 15
}
enum lov_comp_md_entry_flags {
- LCME_FL_PRIMARY = 0x00000001, /* Not used */
- LCME_FL_STALE = 0x00000002, /* Not used */
- LCME_FL_OFFLINE = 0x00000004, /* Not used */
- LCME_FL_PREFERRED = 0x00000008, /* Not used */
+ LCME_FL_STALE = 0x00000001, /* FLR: stale data */
+ LCME_FL_PREF_RD = 0x00000002, /* FLR: preferred for reading */
+ LCME_FL_PREF_WR = 0x00000004, /* FLR: preferred for writing */
+ LCME_FL_PREF_RW = LCME_FL_PREF_RD | LCME_FL_PREF_WR,
+ LCME_FL_OFFLINE = 0x00000008, /* Not used */
LCME_FL_INIT = 0x00000010, /* instantiated */
LCME_FL_NEG = 0x80000000 /* used to indicate a negative flag,
won't be stored on disk */
};
-#define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT)
+#define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT | LCME_FL_STALE | \
+ LCME_FL_PREF_RW)
+/* The flags can be set by users at mirror creation time. */
+#define LCME_USER_FLAGS (LCME_FL_PREF_RW)
/* the highest bit in obdo::o_layout_version is used to mark if the file is
* being resynced. */
*/
enum lov_comp_md_flags {
/* the least 2 bits are used by FLR to record file state */
- LCM_FL_NOT_FLR = 0,
+ LCM_FL_NONE = 0,
LCM_FL_RDONLY = 1,
LCM_FL_WRITE_PENDING = 2,
LCM_FL_SYNC_PENDING = 3,
struct lov_comp_md_entry_v1 lcm_entries[0];
} __attribute__((packed));
-/*
- * Maximum number of mirrors Lustre can support.
- */
-#define LUSTRE_MIRROR_COUNT_MAX 16
-
static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
{
if (stripes == (__u16)-1)
#endif /* !__KERNEL__ */
/* lustre volatile file support
- * file name header: .^L^S^T^R:volatile"
+ * file name header: ".^L^S^T^R:volatile"
*/
#define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE"
#define LUSTRE_VOLATILE_HDR_LEN 14
-typedef enum lustre_quota_version {
+enum lustre_quota_version {
LUSTRE_QUOTA_V2 = 1
-} lustre_quota_version_t;
+};
/* XXX: same as if_dqinfo struct in kernel */
struct obd_dqinfo {
#define SWAP_LAYOUTS_KEEP_MTIME (1 << 2)
#define SWAP_LAYOUTS_KEEP_ATIME (1 << 3)
#define SWAP_LAYOUTS_CLOSE (1 << 4)
-#define MERGE_LAYOUTS_CLOSE (1 << 5)
-#define INTENT_LAYOUTS_CLOSE (SWAP_LAYOUTS_CLOSE | MERGE_LAYOUTS_CLOSE)
/* Swap XATTR_NAME_HSM as well, only on the MDT so far */
#define SWAP_LAYOUTS_MDS_HSM (1 << 31)
__u64 sl_dv2;
};
+/** Bit-mask of valid attributes */
+/* The LA_* flags are written to disk as part of the ChangeLog records
+ * so they are part of the on-disk and network protocol, and cannot be changed.
+ * Only the first 12 bits are currently saved.
+ */
+enum la_valid {
+ LA_ATIME = 1 << 0,
+ LA_MTIME = 1 << 1,
+ LA_CTIME = 1 << 2,
+ LA_SIZE = 1 << 3,
+ LA_MODE = 1 << 4,
+ LA_UID = 1 << 5,
+ LA_GID = 1 << 6,
+ LA_BLOCKS = 1 << 7,
+ LA_TYPE = 1 << 8,
+ LA_FLAGS = 1 << 9,
+ LA_NLINK = 1 << 10,
+ LA_RDEV = 1 << 11,
+ LA_BLKSIZE = 1 << 12,
+ LA_KILL_SUID = 1 << 13,
+ LA_KILL_SGID = 1 << 14,
+ LA_PROJID = 1 << 15,
+ LA_LAYOUT_VERSION = 1 << 16,
+ /**
+ * Attributes must be transmitted to OST objects
+ */
+ LA_REMOTE_ATTR_SET = (LA_UID | LA_GID | LA_PROJID | LA_LAYOUT_VERSION)
+};
+
+#ifndef FMODE_READ
+#define FMODE_READ 00000001
+#define FMODE_WRITE 00000002
+#endif
+
+#define MDS_FMODE_CLOSED 00000000
+#define MDS_FMODE_EXEC 00000004
+/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */
+/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */
+/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */
+
+#define MDS_OPEN_CREATED 00000010
+#define MDS_OPEN_CROSS 00000020
+
+#define MDS_OPEN_CREAT 00000100
+#define MDS_OPEN_EXCL 00000200
+#define MDS_OPEN_TRUNC 00001000
+#define MDS_OPEN_APPEND 00002000
+#define MDS_OPEN_SYNC 00010000
+#define MDS_OPEN_DIRECTORY 00200000
+
+#define MDS_OPEN_BY_FID 040000000 /* open_by_fid for known object */
+#define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */
+#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
+#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file.
+ * We do not support JOIN FILE
+ * anymore, reserve this flags
+ * just for preventing such bit
+ * to be reused. */
+
+#define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */
+#define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */
+#define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */
+#define MDS_OPEN_NORESTORE 0100000000000ULL /* Do not restore file at open */
+#define MDS_OPEN_NEWSTRIPE 0200000000000ULL /* New stripe needed (restripe or
+ * hsm restore) */
+#define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created
+ unlinked */
+#define MDS_OPEN_LEASE 01000000000000ULL /* Open the file and grant lease
+ * delegation, succeed if it's not
+ * being opened with conflict mode.
+ */
+#define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */
+
+#define MDS_OPEN_RESYNC 04000000000000ULL /* FLR: file resync */
+
+/* lustre internal open flags, which should not be set from user space */
+#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \
+ MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \
+ MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \
+ MDS_OPEN_RELEASE | MDS_OPEN_RESYNC)
+
/********* Changelogs **********/
/** Changelog record types */
CL_LAYOUT = 12, /* file layout/striping modified */
CL_TRUNC = 13,
CL_SETATTR = 14,
- CL_XATTR = 15,
+ CL_SETXATTR = 15,
+ CL_XATTR = CL_SETXATTR, /* Deprecated name */
CL_HSM = 16, /* HSM specific events, see flags */
CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */
CL_CTIME = 18,
CL_MIGRATE = 20,
CL_FLRW = 21, /* FLR: file was firstly written */
CL_RESYNC = 22, /* FLR: file was resync-ed */
+ CL_GETXATTR = 23,
+ CL_DN_OPEN = 24, /* denied open */
CL_LAST
};
"MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
"RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC",
"SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", "MIGRT",
- "FLRW", "RESYNC",
+ "FLRW", "RESYNC","GXATR", "NOPEN",
};
if (type >= 0 && type < CL_LAST)
enum changelog_rec_extra_flags {
CLFE_INVALID = 0,
CLFE_UIDGID = 0x0001,
- CLFE_SUPPORTED = CLFE_UIDGID
+ CLFE_NID = 0x0002,
+ CLFE_OPEN = 0x0004,
+ CLFE_XATTR = 0x0008,
+ CLFE_SUPPORTED = CLFE_UIDGID | CLFE_NID | CLFE_OPEN | CLFE_XATTR
};
enum changelog_send_flag {
enum changelog_send_extra_flag {
/* Pack uid/gid into the changelog record */
CHANGELOG_EXTRA_FLAG_UIDGID = 0x01,
+ /* Pack nid into the changelog record */
+ CHANGELOG_EXTRA_FLAG_NID = 0x02,
+ /* Pack open mode into the changelog record */
+ CHANGELOG_EXTRA_FLAG_OMODE = 0x04,
+ /* Pack xattr name into the changelog record */
+ CHANGELOG_EXTRA_FLAG_XATTR = 0x08,
};
-#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \
+#define CR_MAXSIZE __ALIGN_KERNEL(2 * NAME_MAX + 2 + \
changelog_rec_offset(CLF_SUPPORTED, \
- CLFE_SUPPORTED))
+ CLFE_SUPPORTED), 8)
/* 31 usable bytes string + null terminator. */
#define LUSTRE_JOBID_SIZE 32
__u64 cr_gid;
};
+/* Changelog extra extension to include NID. */
+struct changelog_ext_nid {
+ /* have __u64 instead of lnet_nid_t type for use by client api */
+ __u64 cr_nid;
+ /* for use when IPv6 support is added */
+ __u64 extra;
+ __u32 padding;
+};
+
+/* Changelog extra extension to include OPEN mode. */
+struct changelog_ext_openmode {
+ __u32 cr_openflags;
+};
+
+/* Changelog extra extension to include xattr */
+struct changelog_ext_xattr {
+ char cr_xattr[XATTR_NAME_MAX + 1]; /**< zero-terminated string. */
+};
+
static inline struct changelog_ext_extra_flags *changelog_rec_extra_flags(
const struct changelog_rec *rec);
size += sizeof(struct changelog_ext_extra_flags);
if (cref & CLFE_UIDGID)
size += sizeof(struct changelog_ext_uidgid);
+ if (cref & CLFE_NID)
+ size += sizeof(struct changelog_ext_nid);
+ if (cref & CLFE_OPEN)
+ size += sizeof(struct changelog_ext_openmode);
+ if (cref & CLFE_XATTR)
+ size += sizeof(struct changelog_ext_xattr);
}
return size;
CLFE_INVALID));
}
+/* The nid is the second extra extension */
+static inline
+struct changelog_ext_nid *changelog_rec_nid(const struct changelog_rec *rec)
+{
+ enum changelog_rec_flags crf = rec->cr_flags &
+ (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS);
+ enum changelog_rec_extra_flags cref = CLFE_INVALID;
+
+ if (rec->cr_flags & CLF_EXTRA_FLAGS)
+ cref = changelog_rec_extra_flags(rec)->cr_extra_flags &
+ CLFE_UIDGID;
+
+ return (struct changelog_ext_nid *)((char *)rec +
+ changelog_rec_offset(crf, cref));
+}
+
+/* The OPEN mode is the third extra extension */
+static inline
+struct changelog_ext_openmode *changelog_rec_openmode(
+ const struct changelog_rec *rec)
+{
+ enum changelog_rec_flags crf = rec->cr_flags &
+ (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS);
+ enum changelog_rec_extra_flags cref = CLFE_INVALID;
+
+ if (rec->cr_flags & CLF_EXTRA_FLAGS)
+ cref = changelog_rec_extra_flags(rec)->cr_extra_flags &
+ (CLFE_UIDGID | CLFE_NID);
+
+ return (struct changelog_ext_openmode *)((char *)rec +
+ changelog_rec_offset(crf, cref));
+}
+
+/* The xattr name is the fourth extra extension */
+static inline
+struct changelog_ext_xattr *changelog_rec_xattr(
+ const struct changelog_rec *rec)
+{
+ enum changelog_rec_flags crf = rec->cr_flags &
+ (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS);
+ enum changelog_rec_extra_flags cref = CLFE_INVALID;
+
+ if (rec->cr_flags & CLF_EXTRA_FLAGS)
+ cref = changelog_rec_extra_flags(rec)->cr_extra_flags &
+ (CLFE_UIDGID | CLFE_NID | CLFE_OPEN);
+
+ return (struct changelog_ext_xattr *)((char *)rec +
+ changelog_rec_offset(crf, cref));
+}
+
/* The name follows the rename, jobid and extra flags extns, if present */
static inline char *changelog_rec_name(const struct changelog_rec *rec)
{
enum changelog_rec_flags crf_wanted,
enum changelog_rec_extra_flags cref_want)
{
+ char *xattr_mov = NULL;
+ char *omd_mov = NULL;
+ char *nid_mov = NULL;
char *uidgid_mov = NULL;
char *ef_mov;
char *jid_mov;
/* Locations of extensions in the remapped record */
if (rec->cr_flags & CLF_EXTRA_FLAGS) {
+ xattr_mov = (char *)rec +
+ changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
+ cref_want & ~CLFE_XATTR);
+ omd_mov = (char *)rec +
+ changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
+ cref_want & ~(CLFE_OPEN |
+ CLFE_XATTR));
+ nid_mov = (char *)rec +
+ changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
+ cref_want & ~(CLFE_NID |
+ CLFE_OPEN |
+ CLFE_XATTR));
uidgid_mov = (char *)rec +
changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
- CLFE_INVALID);
+ cref_want & ~(CLFE_UIDGID |
+ CLFE_NID |
+ CLFE_OPEN |
+ CLFE_XATTR));
cref = changelog_rec_extra_flags(rec)->cr_extra_flags;
}
/* Move the extension fields to the desired positions */
if ((crf_wanted & CLF_EXTRA_FLAGS) &&
(rec->cr_flags & CLF_EXTRA_FLAGS)) {
+ if ((cref_want & CLFE_XATTR) && (cref & CLFE_XATTR))
+ memmove(xattr_mov, changelog_rec_xattr(rec),
+ sizeof(struct changelog_ext_xattr));
+
+ if ((cref_want & CLFE_OPEN) && (cref & CLFE_OPEN))
+ memmove(omd_mov, changelog_rec_openmode(rec),
+ sizeof(struct changelog_ext_openmode));
+
+ if ((cref_want & CLFE_NID) && (cref & CLFE_NID))
+ memmove(nid_mov, changelog_rec_nid(rec),
+ sizeof(struct changelog_ext_nid));
+
if ((cref_want & CLFE_UIDGID) && (cref & CLFE_UIDGID))
memmove(uidgid_mov, changelog_rec_uidgid(rec),
sizeof(struct changelog_ext_uidgid));
sizeof(struct changelog_ext_rename));
/* Clear newly added fields */
+ if (xattr_mov && (cref_want & CLFE_XATTR) &&
+ !(cref & CLFE_XATTR))
+ memset(xattr_mov, 0, sizeof(struct changelog_ext_xattr));
+
+ if (omd_mov && (cref_want & CLFE_OPEN) &&
+ !(cref & CLFE_OPEN))
+ memset(omd_mov, 0, sizeof(struct changelog_ext_openmode));
+
+ if (nid_mov && (cref_want & CLFE_NID) &&
+ !(cref & CLFE_NID))
+ memset(nid_mov, 0, sizeof(struct changelog_ext_nid));
+
if (uidgid_mov && (cref_want & CLFE_UIDGID) &&
!(cref & CLFE_UIDGID))
memset(uidgid_mov, 0, sizeof(struct changelog_ext_uidgid));
boundaries. See hai_zero */
} __attribute__((packed));
-#ifndef HAVE_CFS_SIZE_ROUND
-static inline int cfs_size_round (int val)
-{
- return (val + 7) & (~0x7);
-}
-#define HAVE_CFS_SIZE_ROUND
-#endif
-
/* Return pointer to first hai in action list */
static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
{
- return (struct hsm_action_item *)(hal->hal_fsname +
- cfs_size_round(strlen(hal-> \
- hal_fsname)
- + 1));
+ size_t offset = __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8);
+
+ return (struct hsm_action_item *)(hal->hal_fsname + offset);
}
+
/* Return pointer to next hai */
static inline struct hsm_action_item * hai_next(struct hsm_action_item *hai)
{
- return (struct hsm_action_item *)((char *)hai +
- cfs_size_round(hai->hai_len));
+ size_t offset = __ALIGN_KERNEL(hai->hai_len, 8);
+
+ return (struct hsm_action_item *)((char *)hai + offset);
}
/* Return size of an hsm_action_list */
size_t sz;
struct hsm_action_item *hai;
- sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1);
+ sz = sizeof(*hal) + __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8);
hai = hai_first(hal);
for (i = 0; i < hal->hal_count ; i++, hai = hai_next(hai))
- sz += cfs_size_round(hai->hai_len);
+ sz += __ALIGN_KERNEL(hai->hai_len, 8);
return sz;
}