Merge remote-tracking branch 'origin/flr'.
Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Change-Id: Idee9297fbcab2bea3bd5987c94e4b4e79c49b3b6
struct filter_fid {
struct lu_fid ff_parent;
struct ost_layout ff_layout;
+ __u32 ff_layout_version;
+ __u32 ff_range; /* range of layout version that
+ * write are allowed */
} __attribute__((packed));
/* Userspace should treat lu_fid as opaque, and only use the following methods
*/
#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
+ enum {
+ LSOM_FL_VALID = 1 << 0,
+ };
+
+ struct lustre_som_attrs {
+ __u16 lsa_valid;
+ __u16 lsa_reserved[3];
+ __u64 lsa_size;
+ __u64 lsa_blocks;
+ };
+
/**
* OST object IDentifier.
*/
};
/*
+ * Maximum number of mirrors currently implemented.
+ */
+ #define LUSTRE_MIRROR_COUNT_MAX 16
+
+ /* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
+ enum ll_lease_mode {
+ LL_LEASE_RDLCK = 0x01,
+ LL_LEASE_WRLCK = 0x02,
+ LL_LEASE_UNLCK = 0x04,
+ };
+
+ enum ll_lease_flags {
+ LL_LEASE_RESYNC = 0x1,
+ LL_LEASE_RESYNC_DONE = 0x2,
+ };
+
+ #define IOC_IDS_MAX 4096
+ struct ll_ioc_lease {
+ __u32 lil_mode;
+ __u32 lil_flags;
+ __u32 lil_count;
+ __u32 lil_ids[0];
+ };
+
+ /*
* The ioctl naming rules:
* LL_* - works on the currently opened filehandle instead of parent dir
* *_OBD_* - gets data for both OSC or MDC (LOV, LMV indirectly)
#define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *)
#define LL_IOC_GET_MDTIDX _IOR ('f', 175, int)
#define LL_IOC_FUTIMES_3 _IOWR('f', 176, struct ll_futimes_3)
+ #define LL_IOC_FLR_SET_MIRROR _IOW ('f', 177, long)
/* lustre_ioctl.h 177-210 */
#define LL_IOC_HSM_STATE_GET _IOR('f', 211, struct hsm_user_state)
#define LL_IOC_HSM_STATE_SET _IOW('f', 212, struct hsm_state_set)
#define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md)
#define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md)
#define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64)
- #define LL_IOC_SET_LEASE _IOWR('f', 243, long)
+ #define LL_IOC_SET_LEASE _IOWR('f', 243, struct ll_ioc_lease)
+ #define LL_IOC_SET_LEASE_OLD _IOWR('f', 243, long)
#define LL_IOC_GET_LEASE _IO('f', 244)
#define LL_IOC_HSM_IMPORT _IOWR('f', 245, struct hsm_user_import)
#define LL_IOC_LMV_SET_DEFAULT_STRIPE _IOWR('f', 246, struct lmv_user_md)
#define LL_IOC_FSSETXATTR FS_IOC_FSSETXATTR
- /* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
- enum ll_lease_type {
- LL_LEASE_RDLCK = 0x1,
- LL_LEASE_WRLCK = 0x2,
- LL_LEASE_UNLCK = 0x4,
- };
-
#define LL_STATFS_LMV 1
#define LL_STATFS_LOV 2
#define LL_STATFS_NODELAY 4
#define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */
#define LOV_USER_MAGIC_COMP_V1 0x0BD60BD0
-#define LMV_USER_MAGIC 0x0CD30CD0 /*default lmv magic*/
+#define LMV_USER_MAGIC 0x0CD30CD0 /* default lmv magic */
+#define LMV_USER_MAGIC_V0 0x0CD20CD0 /* old default lmv magic*/
#define LOV_PATTERN_NONE 0x000
#define LOV_PATTERN_RAID0 0x001
__u64 e_end;
};
- #define DEXT "[ %#llx , %#llx )"
+ #define DEXT "[%#llx, %#llx)"
#define PEXT(ext) (ext)->e_start, (ext)->e_end
static inline bool lu_extent_is_overlapped(struct lu_extent *e1,
return e1->e_start < e2->e_end && e2->e_start < e1->e_end;
}
+ static inline bool lu_extent_is_whole(struct lu_extent *e)
+ {
+ return e->e_start == 0 && e->e_end == LUSTRE_EOF;
+ }
+
enum lov_comp_md_entry_flags {
LCME_FL_PRIMARY = 0x00000001, /* Not used */
LCME_FL_STALE = 0x00000002, /* Not used */
#define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT)
+ /* the highest bit in obdo::o_layout_version is used to mark if the file is
+ * being resynced. */
+ #define LU_LAYOUT_RESYNC LCME_FL_NEG
+
/* lcme_id can be specified as certain flags, and the the first
* bit of lcme_id is used to indicate that the ID is representing
* certain LCME_FL_* but not a real ID. Which implies we can have
__u64 lcme_padding[2];
} __attribute__((packed));
- enum lov_comp_md_flags;
+ #define SEQ_ID_MAX 0x0000FFFF
+ #define SEQ_ID_MASK SEQ_ID_MAX
+ /* bit 30:16 of lcme_id is used to store mirror id */
+ #define MIRROR_ID_MASK 0x7FFF0000
+ #define MIRROR_ID_SHIFT 16
+
+ static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid)
+ {
+ return ((mirror_id << MIRROR_ID_SHIFT) & MIRROR_ID_MASK) | seqid;
+ }
+
+ static inline __u16 mirror_id_of(__u32 id)
+ {
+ return (id & MIRROR_ID_MASK) >> MIRROR_ID_SHIFT;
+ }
+
+ /**
+ * on-disk data for lcm_flags. Valid if lcm_magic is LOV_MAGIC_COMP_V1.
+ */
+ enum lov_comp_md_flags {
+ /* the least 2 bits are used by FLR to record file state */
+ LCM_FL_NOT_FLR = 0,
+ LCM_FL_RDONLY = 1,
+ LCM_FL_WRITE_PENDING = 2,
+ LCM_FL_SYNC_PENDING = 3,
+ LCM_FL_FLR_MASK = 0x3,
+ };
struct lov_comp_md_v1 {
__u32 lcm_magic; /* LOV_USER_MAGIC_COMP_V1 */
__u32 lcm_layout_gen;
__u16 lcm_flags;
__u16 lcm_entry_count;
- __u64 lcm_padding1;
+ /* lcm_mirror_count stores the number of actual mirrors minus 1,
+ * so that non-flr files will have value 0 meaning 1 mirror. */
+ __u16 lcm_mirror_count;
+ __u16 lcm_padding1[3];
__u64 lcm_padding2;
struct lov_comp_md_entry_v1 lcm_entries[0];
} __attribute__((packed));
+ /*
+ * Maximum number of mirrors Lustre can support.
+ */
+ #define LUSTRE_MIRROR_COUNT_MAX 16
+
static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
{
if (stripes == (__u16)-1)
#define SWAP_LAYOUTS_KEEP_MTIME (1 << 2)
#define SWAP_LAYOUTS_KEEP_ATIME (1 << 3)
#define SWAP_LAYOUTS_CLOSE (1 << 4)
+ #define MERGE_LAYOUTS_CLOSE (1 << 5)
+ #define INTENT_LAYOUTS_CLOSE (SWAP_LAYOUTS_CLOSE | MERGE_LAYOUTS_CLOSE)
/* Swap XATTR_NAME_HSM as well, only on the MDT so far */
#define SWAP_LAYOUTS_MDS_HSM (1 << 31)
CL_CTIME = 18,
CL_ATIME = 19,
CL_MIGRATE = 20,
+ CL_FLRW = 21, /* FLR: file was firstly written */
+ CL_RESYNC = 22, /* FLR: file was resync-ed */
CL_LAST
};
static const char *changelog_str[] = {
"MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
"RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC",
- "SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", "MIGRT"
+ "SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", "MIGRT",
+ "FLRW", "RESYNC",
};
if (type >= 0 && type < CL_LAST)
/********* Misc **********/
struct ioc_data_version {
- __u64 idv_version;
- __u64 idv_flags; /* See LL_DV_xxx */
+ __u64 idv_version;
+ __u32 idv_layout_version; /* FLR: layout version for OST objects */
+ __u32 idv_flags; /* enum ioc_data_version_flags */
+ };
+
+ enum ioc_data_version_flags {
+ LL_DV_RD_FLUSH = (1 << 0), /* Flush dirty pages from clients */
+ LL_DV_WR_FLUSH = (1 << 1), /* Flush all caching pages from clients */
};
- #define LL_DV_RD_FLUSH (1 << 0) /* Flush dirty pages from clients */
- #define LL_DV_WR_FLUSH (1 << 1) /* Flush all caching pages from clients */
#ifndef offsetof
#define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
ll_prepare_close(inode, op_data, och);
switch (bias) {
+ case MDS_CLOSE_LAYOUT_MERGE:
+ /* merge blocks from the victim inode */
+ op_data->op_attr_blocks += ((struct inode *)data)->i_blocks;
+ op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
case MDS_CLOSE_LAYOUT_SWAP:
LASSERT(data != NULL);
- op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
+ op_data->op_bias |= bias;
op_data->op_data_version = 0;
op_data->op_lease_handle = och->och_lease_handle;
op_data->op_fid2 = *ll_inode2fid(data);
break;
+ case MDS_CLOSE_RESYNC_DONE: {
+ struct ll_ioc_lease *ioc = data;
+
+ LASSERT(data != NULL);
+ op_data->op_attr_blocks +=
+ ioc->lil_count * op_data->op_attr_blocks;
+ op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+ op_data->op_bias |= MDS_CLOSE_RESYNC_DONE;
+
+ op_data->op_lease_handle = och->och_lease_handle;
+ op_data->op_data = &ioc->lil_ids[0];
+ op_data->op_data_size =
+ ioc->lil_count * sizeof(ioc->lil_ids[0]);
+ break;
+ }
+
case MDS_HSM_RELEASE:
LASSERT(data != NULL);
op_data->op_bias |= MDS_HSM_RELEASE;
CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
- if (rc == 0 &&
- op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
+ if (rc == 0 && op_data->op_bias & bias) {
struct mdt_body *body;
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
}
static int ll_swap_layouts_close(struct obd_client_handle *och,
- struct inode *inode, struct inode *inode2)
+ struct inode *inode, struct inode *inode2,
+ int intent)
{
const struct lu_fid *fid1 = ll_inode2fid(inode);
const struct lu_fid *fid2;
+ enum mds_op_bias bias;
int rc;
ENTRY;
if (rc == 0)
GOTO(out_free_och, rc = -EINVAL);
- /* Close the file and swap layouts between inode & inode2.
+ switch (intent) {
+ case SWAP_LAYOUTS_CLOSE:
+ bias = MDS_CLOSE_LAYOUT_SWAP;
+ break;
+ case MERGE_LAYOUTS_CLOSE:
+ bias = MDS_CLOSE_LAYOUT_MERGE;
+ break;
+ default:
+ GOTO(out_free_och, rc = -EOPNOTSUPP);
+ }
+
+ /* Close the file and {swap,merge} layouts between inode & inode2.
* NB: lease lock handle is released in mdc_close_layout_swap_pack()
* because we still need it to pack l_remote_handle to MDT. */
- rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP,
- inode2);
+ rc = ll_close_inode_openhandle(inode, och, bias, inode2);
och = NULL; /* freed in ll_close_inode_openhandle() */
* Release lease and close the file.
* It will check if the lease has ever broken.
*/
- static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
- bool *lease_broken)
+ static int ll_lease_close_intent(struct obd_client_handle *och,
+ struct inode *inode,
+ bool *lease_broken, enum mds_op_bias bias,
+ void *data)
{
struct ldlm_lock *lock;
bool cancelled = true;
LDLM_LOCK_PUT(lock);
}
- CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
- PFID(&ll_i2info(inode)->lli_fid), cancelled);
-
- if (!cancelled)
- ldlm_cli_cancel(&och->och_lease_handle, 0);
+ CDEBUG(D_INODE, "lease for "DFID" broken? %d, bias: %x\n",
+ PFID(&ll_i2info(inode)->lli_fid), cancelled, bias);
if (lease_broken != NULL)
*lease_broken = cancelled;
- rc = ll_close_inode_openhandle(inode, och, 0, NULL);
+ if (!cancelled && !bias)
+ ldlm_cli_cancel(&och->och_lease_handle, 0);
+
+ if (cancelled) { /* no need to excute intent */
+ bias = 0;
+ data = NULL;
+ }
+
+ rc = ll_close_inode_openhandle(inode, och, bias, data);
RETURN(rc);
}
+ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
+ bool *lease_broken)
+ {
+ return ll_lease_close_intent(och, inode, lease_broken, 0, NULL);
+ }
+
+ /**
+ * After lease is taken, send the RPC MDS_REINT_RESYNC to the MDT
+ */
+ static int ll_lease_file_resync(struct obd_client_handle *och,
+ struct inode *inode)
+ {
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct md_op_data *op_data;
+ __u64 data_version_unused;
+ int rc;
+ ENTRY;
+
+ op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ /* before starting file resync, it's necessary to clean up page cache
+ * in client memory, otherwise once the layout version is increased,
+ * writing back cached data will be denied the OSTs. */
+ rc = ll_data_version(inode, &data_version_unused, LL_DV_WR_FLUSH);
+ if (rc)
+ GOTO(out, rc);
+
+ op_data->op_handle = och->och_lease_handle;
+ rc = md_file_resync(sbi->ll_md_exp, op_data);
+ if (rc)
+ GOTO(out, rc);
+
+ EXIT;
+ out:
+ ll_finish_md_op_data(op_data);
+ return rc;
+ }
+
int ll_merge_attr(const struct lu_env *env, struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
RETURN(rc);
}
+ /**
+ * Set designated mirror for I/O.
+ *
+ * So far only read, write, and truncated can support to issue I/O to
+ * designated mirror.
+ */
+ void ll_io_set_mirror(struct cl_io *io, const struct file *file)
+ {
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+
+ /* clear layout version for generic(non-resync) I/O in case it carries
+ * stale layout version due to I/O restart */
+ io->ci_layout_version = 0;
+
+ /* FLR: disable non-delay for designated mirror I/O because obviously
+ * only one mirror is available */
+ if (fd->fd_designated_mirror > 0) {
+ io->ci_ndelay = 0;
+ io->ci_designated_mirror = fd->fd_designated_mirror;
+ io->ci_layout_version = fd->fd_layout_version;
+ io->ci_pio = 0; /* doesn't have a mechanism to pass mirror
+ * io to ptasks */
+ }
+
+ CDEBUG(D_VFSTRACE, "%s: desiginated mirror: %d\n",
+ file->f_path.dentry->d_name.name, io->ci_designated_mirror);
+ }
+
static bool file_is_noatime(const struct file *file)
{
const struct vfsmount *mnt = file->f_path.mnt;
io->ci_pio = !io->u.ci_rw.rw_append;
else
io->ci_pio = 0;
+
+ /* FLR: only use non-delay I/O for read as there is only one
+ * avaliable mirror for write. */
+ io->ci_ndelay = !(iot == CIT_WRITE);
+
+ ll_io_set_mirror(io, file);
}
static int ll_file_io_ptask(struct cfs_ptask *ptask)
__u16 refcheck;
ENTRY;
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- RETURN(PTR_ERR(env));
-
CDEBUG(D_VFSTRACE, "%s: %s range: [%llu, %llu)\n",
file_dentry(file)->d_name.name,
pt->cip_iot == CIT_READ ? "read" : "write",
pos, pos + pt->cip_count);
- restart:
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
io = vvp_env_thread_io(env);
ll_io_init(io, file, pt->cip_iot);
io->u.ci_rw.rw_iter = pt->cip_iter;
}
cl_io_fini(env, io);
+ cl_env_put(env, &refcheck);
- if ((rc == 0 || rc == -ENODATA) &&
- pt->cip_result < pt->cip_count &&
- io->ci_need_restart) {
- CDEBUG(D_VFSTRACE,
- "%s: restart %s range: [%llu, %llu) ret: %zd, rc: %d\n",
- file_dentry(file)->d_name.name,
- pt->cip_iot == CIT_READ ? "read" : "write",
- pos, pos + pt->cip_count - pt->cip_result,
- pt->cip_result, rc);
- goto restart;
- }
+ pt->cip_need_restart = io->ci_need_restart;
CDEBUG(D_VFSTRACE, "%s: %s ret: %zd, rc: %d\n",
file_dentry(file)->d_name.name,
pt->cip_iot == CIT_READ ? "read" : "write",
pt->cip_result, rc);
- cl_env_put(env, &refcheck);
RETURN(pt->cip_result > 0 ? 0 : rc);
}
loff_t pos = *ppos;
ssize_t result = 0;
int rc = 0;
+ unsigned retried = 0;
+ bool restarted = false;
ENTRY;
if (args->via_io_subtype == IO_NORMAL) {
io->u.ci_rw.rw_iter = *args->u.normal.via_iter;
io->u.ci_rw.rw_iocb = *args->u.normal.via_iocb;
- } else {
- io->ci_pio = 0;
}
+ if (args->via_io_subtype != IO_NORMAL || restarted)
+ io->ci_pio = 0;
+ io->ci_ndelay_tried = retried;
if (cl_io_rw_init(env, io, iot, pos, count) == 0) {
bool range_locked = false;
out:
cl_io_fini(env, io);
+ CDEBUG(D_VFSTRACE,
+ "%s: %d io complete with rc: %d, result: %zd, restart: %d\n",
+ file->f_path.dentry->d_name.name,
+ iot, rc, result, io->ci_need_restart);
+
if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
CDEBUG(D_VFSTRACE,
"%s: restart %s range: [%llu, %llu) ret: %zd, rc: %d\n",
file_dentry(file)->d_name.name,
iot == CIT_READ ? "read" : "write",
pos, pos + count, result, rc);
+ /* preserve the tried count for FLR */
+ retried = io->ci_ndelay_tried;
+ restarted = true;
goto restart;
}
struct cl_layout cl = {
.cl_is_composite = false,
};
+ struct lu_extent ext = {
+ .e_start = 0,
+ .e_end = OBD_OBJECT_EOF,
+ };
env = cl_env_get(&refcheck);
if (IS_ERR(env))
rc = cl_object_layout_get(env, obj, &cl);
if (!rc && cl.cl_is_composite)
- rc = ll_layout_write_intent(inode, 0, OBD_OBJECT_EOF);
+ rc = ll_layout_write_intent(inode, LAYOUT_INTENT_WRITE,
+ &ext);
cl_env_put(env, &refcheck);
if (rc)
RETURN(rc);
}
- /*
- * Read the data_version for inode.
- *
- * This value is computed using stripe object version on OST.
- * Version is computed using server side locking.
- *
- * @param flags if do sync on the OST side;
- * 0: no sync
- * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
- * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
- */
- int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
+ static int
+ ll_ioc_data_version(struct inode *inode, struct ioc_data_version *ioc)
{
struct cl_object *obj = ll_i2info(inode)->lli_clob;
struct lu_env *env;
ENTRY;
+ ioc->idv_version = 0;
+ ioc->idv_layout_version = UINT_MAX;
+
/* If no file object initialized, we consider its version is 0. */
- if (obj == NULL) {
- *data_version = 0;
+ if (obj == NULL)
RETURN(0);
- }
env = cl_env_get(&refcheck);
if (IS_ERR(env))
io = vvp_env_thread_io(env);
io->ci_obj = obj;
io->u.ci_data_version.dv_data_version = 0;
- io->u.ci_data_version.dv_flags = flags;
+ io->u.ci_data_version.dv_layout_version = UINT_MAX;
+ io->u.ci_data_version.dv_flags = ioc->idv_flags;
restart:
if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
else
result = io->ci_result;
- *data_version = io->u.ci_data_version.dv_data_version;
+ ioc->idv_version = io->u.ci_data_version.dv_data_version;
+ ioc->idv_layout_version = io->u.ci_data_version.dv_layout_version;
cl_io_fini(env, io);
}
/*
+ * Read the data_version for inode.
+ *
+ * This value is computed using stripe object version on OST.
+ * Version is computed using server side locking.
+ *
+ * @param flags if do sync on the OST side;
+ * 0: no sync
+ * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
+ * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
+ */
+ int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
+ {
+ struct ioc_data_version ioc = { .idv_flags = flags };
+ int rc;
+
+ rc = ll_ioc_data_version(inode, &ioc);
+ if (!rc)
+ *data_version = ioc.idv_version;
+
+ return rc;
+ }
+
+ /*
* Trigger a HSM release request for the provided inode.
*/
int ll_hsm_release(struct inode *inode)
sizeof(fsxattr)))
RETURN(-EFAULT);
+ fsxattr.fsx_xflags = ll_inode_to_ext_flags(inode->i_flags);
fsxattr.fsx_projid = ll_i2info(inode)->lli_projid;
if (copy_to_user((struct fsxattr __user *)arg,
&fsxattr, sizeof(fsxattr)))
struct ptlrpc_request *req = NULL;
int rc = 0;
struct fsxattr fsxattr;
+ struct cl_object *obj;
/* only root could change project ID */
if (!cfs_capable(CFS_CAP_SYS_ADMIN))
sizeof(fsxattr)))
GOTO(out_fsxattr1, rc = -EFAULT);
+ op_data->op_attr_flags = fsxattr.fsx_xflags;
op_data->op_projid = fsxattr.fsx_projid;
- op_data->op_attr.ia_valid |= MDS_ATTR_PROJID;
+ op_data->op_attr.ia_valid |= (MDS_ATTR_PROJID | ATTR_ATTR_FLAG);
rc = md_setattr(ll_i2sbi(inode)->ll_md_exp, op_data, NULL,
0, &req);
ptlrpc_req_finished(req);
+ obj = ll_i2info(inode)->lli_clob;
+ if (obj) {
+ struct iattr *attr;
+
+ inode->i_flags = ll_ext_to_inode_flags(fsxattr.fsx_xflags);
+ OBD_ALLOC_PTR(attr);
+ if (attr == NULL)
+ GOTO(out_fsxattr1, rc = -ENOMEM);
+ attr->ia_valid = ATTR_ATTR_FLAG;
+ rc = cl_setattr_ost(obj, attr, fsxattr.fsx_xflags);
+
+ OBD_FREE_PTR(attr);
+ }
out_fsxattr1:
ll_finish_md_op_data(op_data);
RETURN(rc);
+ }
+
+ static long ll_file_unlock_lease(struct file *file, struct ll_ioc_lease *ioc,
+ unsigned long arg)
+ {
+ struct inode *inode = file_inode(file);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_client_handle *och = NULL;
+ bool lease_broken;
+ fmode_t fmode = 0;
+ enum mds_op_bias bias = 0;
+ void *data = NULL;
+ size_t data_size = 0;
+ long rc;
+ ENTRY;
+
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och != NULL) {
+ och = fd->fd_lease_och;
+ fd->fd_lease_och = NULL;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+
+ if (och == NULL)
+ GOTO(out, rc = -ENOLCK);
+
+ fmode = och->och_flags;
+
+ if (ioc->lil_flags & LL_LEASE_RESYNC_DONE) {
+ if (ioc->lil_count > IOC_IDS_MAX)
+ GOTO(out, rc = -EINVAL);
+
+ data_size = offsetof(typeof(*ioc), lil_ids[ioc->lil_count]);
+ OBD_ALLOC(data, data_size);
+ if (!data)
+ GOTO(out, rc = -ENOMEM);
+
+ if (copy_from_user(data, (void __user *)arg, data_size))
+ GOTO(out, rc = -EFAULT);
+
+ bias = MDS_CLOSE_RESYNC_DONE;
+ }
+
+ rc = ll_lease_close_intent(och, inode, &lease_broken, bias, data);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ rc = ll_lease_och_release(inode, file);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ if (lease_broken)
+ fmode = 0;
+ EXIT;
+
+ out:
+ if (data)
+ OBD_FREE(data, data_size);
+ if (!rc)
+ rc = ll_lease_type_from_fmode(fmode);
+ RETURN(rc);
+ }
+
+ static long ll_file_set_lease(struct file *file, struct ll_ioc_lease *ioc,
+ unsigned long arg)
+ {
+ struct inode *inode = file_inode(file);
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct obd_client_handle *och = NULL;
+ __u64 open_flags = 0;
+ bool lease_broken;
+ fmode_t fmode;
+ long rc;
+ ENTRY;
+
+ switch (ioc->lil_mode) {
+ case LL_LEASE_WRLCK:
+ if (!(file->f_mode & FMODE_WRITE))
+ RETURN(-EPERM);
+ fmode = FMODE_WRITE;
+ break;
+ case LL_LEASE_RDLCK:
+ if (!(file->f_mode & FMODE_READ))
+ RETURN(-EPERM);
+ fmode = FMODE_READ;
+ break;
+ case LL_LEASE_UNLCK:
+ RETURN(ll_file_unlock_lease(file, ioc, arg));
+ default:
+ RETURN(-EINVAL);
+ }
+ CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
+
+ /* apply for lease */
+ if (ioc->lil_flags & LL_LEASE_RESYNC)
+ open_flags = MDS_OPEN_RESYNC;
+ och = ll_lease_open(inode, file, fmode, open_flags);
+ if (IS_ERR(och))
+ RETURN(PTR_ERR(och));
+ if (ioc->lil_flags & LL_LEASE_RESYNC) {
+ rc = ll_lease_file_resync(och, inode);
+ if (rc) {
+ ll_lease_close(och, inode, NULL);
+ RETURN(rc);
+ }
+ rc = ll_layout_refresh(inode, &fd->fd_layout_version);
+ if (rc) {
+ ll_lease_close(och, inode, NULL);
+ RETURN(rc);
+ }
+ }
+
+ rc = 0;
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och == NULL) {
+ fd->fd_lease_och = och;
+ och = NULL;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ if (och != NULL) {
+ /* impossible now that only excl is supported for now */
+ ll_lease_close(och, inode, &lease_broken);
+ rc = -EBUSY;
+ }
+ RETURN(rc);
}
static long
case LL_IOC_LOV_SWAP_LAYOUTS: {
struct file *file2;
struct lustre_swap_layouts lsl;
+ __u64 intent;
if (copy_from_user(&lsl, (char __user *)arg,
sizeof(struct lustre_swap_layouts)))
if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
GOTO(out, rc = -EPERM);
- if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
+ intent = lsl.sl_flags & INTENT_LAYOUTS_CLOSE;
+ if (intent) {
struct inode *inode2;
struct ll_inode_info *lli;
struct obd_client_handle *och = NULL;
- if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
- GOTO(out, rc = -EINVAL);
-
lli = ll_i2info(inode);
mutex_lock(&lli->lli_och_mutex);
if (fd->fd_lease_och != NULL) {
if (och == NULL)
GOTO(out, rc = -ENOLCK);
inode2 = file_inode(file2);
- rc = ll_swap_layouts_close(och, inode, inode2);
+ rc = ll_swap_layouts_close(och, inode, inode2, intent);
} else {
rc = ll_swap_layouts(file, file2, &lsl);
}
RETURN(-EFAULT);
idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
- rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
+ rc = ll_ioc_data_version(inode, &idv);
if (rc == 0 &&
copy_to_user((char __user *)arg, &idv, sizeof(idv)))
OBD_FREE_PTR(hca);
RETURN(rc);
}
- case LL_IOC_SET_LEASE: {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct obd_client_handle *och = NULL;
- bool lease_broken;
- fmode_t fmode;
-
- switch (arg) {
- case LL_LEASE_WRLCK:
- if (!(file->f_mode & FMODE_WRITE))
- RETURN(-EPERM);
- fmode = FMODE_WRITE;
- break;
- case LL_LEASE_RDLCK:
- if (!(file->f_mode & FMODE_READ))
- RETURN(-EPERM);
- fmode = FMODE_READ;
- break;
- case LL_LEASE_UNLCK:
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och != NULL) {
- och = fd->fd_lease_och;
- fd->fd_lease_och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
+ case LL_IOC_SET_LEASE_OLD: {
+ struct ll_ioc_lease ioc = { .lil_mode = (__u32)arg };
- if (och == NULL)
- RETURN(-ENOLCK);
-
- fmode = och->och_flags;
- rc = ll_lease_close(och, inode, &lease_broken);
- if (rc < 0)
- RETURN(rc);
-
- rc = ll_lease_och_release(inode, file);
- if (rc < 0)
- RETURN(rc);
-
- if (lease_broken)
- fmode = 0;
-
- RETURN(ll_lease_type_from_fmode(fmode));
- default:
- RETURN(-EINVAL);
- }
-
- CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
+ RETURN(ll_file_set_lease(file, &ioc, 0));
+ }
+ case LL_IOC_SET_LEASE: {
+ struct ll_ioc_lease ioc;
- /* apply for lease */
- och = ll_lease_open(inode, file, fmode, 0);
- if (IS_ERR(och))
- RETURN(PTR_ERR(och));
+ if (copy_from_user(&ioc, (void __user *)arg, sizeof(ioc)))
+ RETURN(-EFAULT);
- rc = 0;
- mutex_lock(&lli->lli_och_mutex);
- if (fd->fd_lease_och == NULL) {
- fd->fd_lease_och = och;
- och = NULL;
- }
- mutex_unlock(&lli->lli_och_mutex);
- if (och != NULL) {
- /* impossible now that only excl is supported for now */
- ll_lease_close(och, inode, &lease_broken);
- rc = -EBUSY;
- }
- RETURN(rc);
+ RETURN(ll_file_set_lease(file, &ioc, arg));
}
case LL_IOC_GET_LEASE: {
struct ll_inode_info *lli = ll_i2info(inode);
OBD_FREE(k_ladvise_hdr, alloc_size);
RETURN(rc);
}
+ case LL_IOC_FLR_SET_MIRROR: {
+ /* mirror I/O must be direct to avoid polluting page cache
+ * by stale data. */
+ if (!(file->f_flags & O_DIRECT))
+ RETURN(-EINVAL);
+
+ fd->fd_designated_mirror = (__u32)arg;
+ RETURN(0);
+ }
case LL_IOC_FSGETXATTR:
RETURN(ll_ioctl_fsgetxattr(inode, cmd, arg));
case LL_IOC_FSSETXATTR:
* Issue layout intent RPC indicating where in a file an IO is about to write.
*
* \param[in] inode file inode.
- * \param[in] start start offset of fille in bytes where an IO is about to
- * write.
- * \param[in] end exclusive end offset in bytes of the write range.
+ * \param[in] ext write range with start offset of fille in bytes where
+ * an IO is about to write, and exclusive end offset in
+ * bytes.
*
* \retval 0 on success
* \retval < 0 error code
*/
- int ll_layout_write_intent(struct inode *inode, __u64 start, __u64 end)
+ int ll_layout_write_intent(struct inode *inode, enum layout_intent_opc opc,
+ struct lu_extent *ext)
{
struct layout_intent intent = {
- .li_opc = LAYOUT_INTENT_WRITE,
- .li_start = start,
- .li_end = end,
+ .li_opc = opc,
+ .li_extent.e_start = ext->e_start,
+ .li_extent.e_end = ext->e_end,
};
int rc;
ENTRY;
}
int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo,
- struct thandle *th, lod_obj_stripe_cb_t cb,
+ struct thandle *th,
struct lod_obj_stripe_cb_data *data)
{
struct lod_layout_component *lod_comp;
if (lod_comp->llc_stripe == NULL)
continue;
+ /* has stripe but not inited yet, this component has been
+ * declared to be created, but hasn't created yet.
+ */
+ if (!lod_comp_inited(lod_comp))
+ continue;
+
+ if (data->locd_comp_skip_cb &&
+ data->locd_comp_skip_cb(env, lo, i, data))
+ continue;
+
LASSERT(lod_comp->llc_stripe_count > 0);
for (j = 0; j < lod_comp->llc_stripe_count; j++) {
struct dt_object *dt = lod_comp->llc_stripe[j];
if (dt == NULL)
continue;
- rc = cb(env, lo, dt, th, j, data);
+ rc = data->locd_stripe_cb(env, lo, dt, th, i, j, data);
if (rc != 0)
RETURN(rc);
}
RETURN(0);
}
+ static bool lod_obj_attr_set_comp_skip_cb(const struct lu_env *env,
+ struct lod_object *lo, int comp_idx,
+ struct lod_obj_stripe_cb_data *data)
+ {
+ struct lod_layout_component *lod_comp = &lo->ldo_comp_entries[comp_idx];
+ bool skipped = false;
+
+ if (!(data->locd_attr->la_valid & LA_LAYOUT_VERSION))
+ return skipped;
+
+ switch (lo->ldo_flr_state) {
+ case LCM_FL_WRITE_PENDING: {
+ int i;
+
+ /* skip stale components */
+ if (lod_comp->llc_flags & LCME_FL_STALE) {
+ skipped = true;
+ break;
+ }
+
+ /* skip valid and overlapping components, therefore any
+ * attempts to write overlapped components will never succeed
+ * because client will get EINPROGRESS. */
+ for (i = 0; i < lo->ldo_comp_cnt; i++) {
+ if (i == comp_idx)
+ continue;
+
+ if (lo->ldo_comp_entries[i].llc_flags & LCME_FL_STALE)
+ continue;
+
+ if (lu_extent_is_overlapped(&lod_comp->llc_extent,
+ &lo->ldo_comp_entries[i].llc_extent)) {
+ skipped = true;
+ break;
+ }
+ }
+ break;
+ }
+ default:
+ LASSERTF(0, "impossible: %d\n", lo->ldo_flr_state);
+ case LCM_FL_SYNC_PENDING:
+ break;
+ }
+
+ CDEBUG(D_LAYOUT, DFID": %s to set component %x to version: %u\n",
+ PFID(lu_object_fid(&lo->ldo_obj.do_lu)),
+ skipped ? "skipped" : "chose", lod_comp->llc_id,
+ data->locd_attr->la_layout_version);
+
+ return skipped;
+ }
+
static inline int
lod_obj_stripe_attr_set_cb(const struct lu_env *env, struct lod_object *lo,
struct dt_object *dt, struct thandle *th,
- int stripe_idx, struct lod_obj_stripe_cb_data *data)
+ int comp_idx, int stripe_idx,
+ struct lod_obj_stripe_cb_data *data)
{
if (data->locd_declare)
return lod_sub_declare_attr_set(env, dt, data->locd_attr, th);
+ if (data->locd_attr->la_valid & LA_LAYOUT_VERSION) {
+ CDEBUG(D_LAYOUT, DFID": set layout version: %u, comp_idx: %d\n",
+ PFID(lu_object_fid(&dt->do_lu)),
+ data->locd_attr->la_layout_version, comp_idx);
+ }
+
return lod_sub_attr_set(env, dt, data->locd_attr, th);
}
* speed up rename().
*/
if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
- if (!(attr->la_valid & (LA_UID | LA_GID | LA_PROJID)))
+ if (!(attr->la_valid & LA_REMOTE_ATTR_SET))
RETURN(rc);
if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER))
RETURN(rc);
}
} else {
- struct lod_obj_stripe_cb_data data;
+ struct lod_obj_stripe_cb_data data = { { 0 } };
data.locd_attr = attr;
data.locd_declare = true;
- rc = lod_obj_for_each_stripe(env, lo, th,
- lod_obj_stripe_attr_set_cb, &data);
+ data.locd_stripe_cb = lod_obj_stripe_attr_set_cb;
+ rc = lod_obj_for_each_stripe(env, lo, th, &data);
}
if (rc)
RETURN(rc);
if (!S_ISDIR(dt->do_lu.lo_header->loh_attr)) {
- if (!(attr->la_valid & (LA_UID | LA_GID | LA_PROJID)))
+ if (!(attr->la_valid & LA_REMOTE_ATTR_SET))
RETURN(rc);
if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_OWNER))
RETURN(rc);
}
+ /* FIXME: a tricky case in the code path of mdd_layout_change():
+ * the in-memory striping information has been freed in lod_xattr_set()
+ * due to layout change. It has to load stripe here again. It only
+ * changes flags of layout so declare_attr_set() is still accurate */
+ rc = lod_load_striping_locked(env, lo);
+ if (rc)
+ RETURN(rc);
+
if (!lod_obj_is_striped(dt))
RETURN(0);
break;
}
} else {
- struct lod_obj_stripe_cb_data data;
+ struct lod_obj_stripe_cb_data data = { { 0 } };
data.locd_attr = attr;
data.locd_declare = false;
- rc = lod_obj_for_each_stripe(env, lo, th,
- lod_obj_stripe_attr_set_cb, &data);
+ data.locd_comp_skip_cb = lod_obj_attr_set_comp_skip_cb;
+ data.locd_stripe_cb = lod_obj_stripe_attr_set_cb;
+ rc = lod_obj_for_each_stripe(env, lo, th, &data);
}
if (rc)
struct thandle *th)
{
struct lod_object *lo = lod_dt_obj(dt);
- struct lod_device *lod = lu2lod_dev(dt->do_lu.lo_dev);
- struct lmv_user_md_v1 *lum;
+ struct lmv_user_md_v1 *lum = lum_buf->lb_buf;
int rc;
ENTRY;
- lum = lum_buf->lb_buf;
LASSERT(lum != NULL);
CDEBUG(D_INFO, "lum magic = %x count = %u offset = %d\n",
if (le32_to_cpu(lum->lum_stripe_count) == 0)
GOTO(out, rc = 0);
- rc = lod_verify_md_striping(lod, lum);
- if (rc != 0)
- GOTO(out, rc);
-
/* prepare dir striped objects */
rc = lod_prep_md_striped_create(env, dt, attr, lum, dof, th);
if (rc != 0) {
if (rc != 0)
RETURN(rc);
} else if (strcmp(name, XATTR_NAME_LOV) == 0) {
- rc = lod_verify_striping(d, buf, false, 0);
+ rc = lod_verify_striping(d, lo, buf, false);
if (rc != 0)
RETURN(rc);
}
lod_obj_stripe_replace_parent_fid_cb(const struct lu_env *env,
struct lod_object *lo,
struct dt_object *dt, struct thandle *th,
- int stripe_idx,
+ int comp_idx, int stripe_idx,
struct lod_obj_stripe_cb_data *data)
{
struct lod_thread_info *info = lod_env_info(env);
struct lod_thread_info *info = lod_env_info(env);
struct lu_buf *buf = &info->lti_buf;
struct filter_fid *ff;
- struct lod_obj_stripe_cb_data data;
+ struct lod_obj_stripe_cb_data data = { { 0 } };
int rc;
ENTRY;
buf->lb_len = info->lti_ea_store_size;
data.locd_declare = declare;
- rc = lod_obj_for_each_stripe(env, lo, th,
- lod_obj_stripe_replace_parent_fid_cb,
- &data);
+ data.locd_stripe_cb = lod_obj_stripe_replace_parent_fid_cb;
+ rc = lod_obj_for_each_stripe(env, lo, th, &data);
RETURN(rc);
}
struct thandle *th)
{
struct lod_thread_info *info = lod_env_info(env);
- struct lod_layout_component *comp_array, *lod_comp;
+ struct lod_layout_component *comp_array, *lod_comp, *old_array;
struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
struct dt_object *next = dt_object_child(dt);
struct lov_desc *desc = &d->lod_desc;
struct lov_user_md_v3 *v3;
struct lov_comp_md_v1 *comp_v1 = buf->lb_buf;
__u32 magic;
- __u64 prev_end;
- int i, rc, array_cnt;
+ int i, rc, array_cnt, old_array_cnt;
ENTRY;
LASSERT(lo->ldo_is_composite);
- prev_end = lo->ldo_comp_entries[lo->ldo_comp_cnt - 1].llc_extent.e_end;
- rc = lod_verify_striping(d, buf, false, prev_end);
+ if (lo->ldo_flr_state != LCM_FL_NOT_FLR)
+ RETURN(-EBUSY);
+
+ rc = lod_verify_striping(d, lo, buf, false);
if (rc != 0)
RETURN(rc);
lod_comp->llc_extent.e_start = ext->e_start;
lod_comp->llc_extent.e_end = ext->e_end;
lod_comp->llc_stripe_offset = v1->lmm_stripe_offset;
+ lod_comp->llc_flags = comp_v1->lcm_entries[i].lcme_flags;
lod_comp->llc_stripe_count = v1->lmm_stripe_count;
if (!lod_comp->llc_stripe_count ||
}
}
- OBD_FREE(lo->ldo_comp_entries, sizeof(*lod_comp) * lo->ldo_comp_cnt);
+ old_array = lo->ldo_comp_entries;
+ old_array_cnt = lo->ldo_comp_cnt;
+
lo->ldo_comp_entries = comp_array;
lo->ldo_comp_cnt = array_cnt;
+
/* No need to increase layout generation here, it will be increased
* later when generating component ID for the new components */
info->lti_buf.lb_len = lod_comp_md_size(lo, false);
rc = lod_sub_declare_xattr_set(env, next, &info->lti_buf,
XATTR_NAME_LOV, 0, th);
- if (rc)
+ if (rc) {
+ lo->ldo_comp_entries = old_array;
+ lo->ldo_comp_cnt = old_array_cnt;
GOTO(error, rc);
+ }
+
+ OBD_FREE(old_array, sizeof(*lod_comp) * old_array_cnt);
+
+ LASSERT(lo->ldo_mirror_count == 1);
+ lo->ldo_mirrors[0].lme_end = array_cnt - 1;
RETURN(0);
LASSERT(lo->ldo_is_composite);
- rc = lod_verify_striping(d, buf, false, 0);
- if (rc != 0)
- RETURN(rc);
+ if (lo->ldo_flr_state != LCM_FL_NOT_FLR)
+ RETURN(-EBUSY);
magic = comp_v1->lcm_magic;
if (magic == __swab32(LOV_USER_MAGIC_COMP_V1)) {
}
/**
+ * Convert a plain file lov_mds_md to a composite layout.
+ *
+ * \param[in,out] info the thread info::lti_ea_store buffer contains little
+ * endian plain file layout
+ *
+ * \retval 0 on success, <0 on failure
+ */
+ static int lod_layout_convert(struct lod_thread_info *info)
+ {
+ struct lov_mds_md *lmm = info->lti_ea_store;
+ struct lov_mds_md *lmm_save;
+ struct lov_comp_md_v1 *lcm;
+ struct lov_comp_md_entry_v1 *lcme;
+ size_t size;
+ __u32 blob_size;
+ int rc = 0;
+ ENTRY;
+
+ /* realloc buffer to a composite layout which contains one component */
+ blob_size = lov_mds_md_size(le16_to_cpu(lmm->lmm_stripe_count),
+ le32_to_cpu(lmm->lmm_magic));
+ size = sizeof(*lcm) + sizeof(*lcme) + blob_size;
+
+ OBD_ALLOC_LARGE(lmm_save, blob_size);
+ if (!lmm_save)
+ GOTO(out, rc = -ENOMEM);
+
+ memcpy(lmm_save, lmm, blob_size);
+
+ if (info->lti_ea_store_size < size) {
+ rc = lod_ea_store_resize(info, size);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ lcm = info->lti_ea_store;
+ lcm->lcm_magic = cpu_to_le32(LOV_MAGIC_COMP_V1);
+ lcm->lcm_size = cpu_to_le32(size);
+ lcm->lcm_layout_gen = cpu_to_le32(le16_to_cpu(
+ lmm_save->lmm_layout_gen));
+ lcm->lcm_flags = cpu_to_le16(LCM_FL_NOT_FLR);
+ lcm->lcm_entry_count = cpu_to_le16(1);
+ lcm->lcm_mirror_count = 0;
+
+ lcme = &lcm->lcm_entries[0];
+ lcme->lcme_flags = cpu_to_le32(LCME_FL_INIT);
+ lcme->lcme_extent.e_start = 0;
+ lcme->lcme_extent.e_end = cpu_to_le64(OBD_OBJECT_EOF);
+ lcme->lcme_offset = cpu_to_le32(sizeof(*lcm) + sizeof(*lcme));
+ lcme->lcme_size = cpu_to_le32(blob_size);
+
+ memcpy((char *)lcm + lcme->lcme_offset, (char *)lmm_save, blob_size);
+
+ EXIT;
+ out:
+ if (lmm_save)
+ OBD_FREE_LARGE(lmm_save, blob_size);
+ return rc;
+ }
+
+ /**
+ * Merge layouts to form a mirrored file.
+ */
+ static int lod_declare_layout_merge(const struct lu_env *env,
+ struct dt_object *dt, const struct lu_buf *mbuf,
+ struct thandle *th)
+ {
+ struct lod_thread_info *info = lod_env_info(env);
+ struct lu_buf *buf = &info->lti_buf;
+ struct lod_object *lo = lod_dt_obj(dt);
+ struct lov_comp_md_v1 *lcm;
+ struct lov_comp_md_v1 *cur_lcm;
+ struct lov_comp_md_v1 *merge_lcm;
+ struct lov_comp_md_entry_v1 *lcme;
+ size_t size = 0;
+ size_t offset;
+ __u16 cur_entry_count;
+ __u16 merge_entry_count;
+ __u32 id = 0;
+ __u16 mirror_id = 0;
+ __u32 mirror_count;
+ int rc, i;
+ ENTRY;
+
+ merge_lcm = mbuf->lb_buf;
+ if (mbuf->lb_len < sizeof(*merge_lcm))
+ RETURN(-EINVAL);
+
+ /* must be an existing layout from disk */
+ if (le32_to_cpu(merge_lcm->lcm_magic) != LOV_MAGIC_COMP_V1)
+ RETURN(-EINVAL);
+
+ merge_entry_count = le16_to_cpu(merge_lcm->lcm_entry_count);
+
+ /* do not allow to merge two mirrored files */
+ if (le16_to_cpu(merge_lcm->lcm_mirror_count))
+ RETURN(-EBUSY);
+
+ /* verify the target buffer */
+ rc = lod_get_lov_ea(env, lo);
+ if (rc <= 0)
+ RETURN(rc ? : -ENODATA);
+
+ cur_lcm = info->lti_ea_store;
+ switch (le32_to_cpu(cur_lcm->lcm_magic)) {
+ case LOV_MAGIC_V1:
+ case LOV_MAGIC_V3:
+ rc = lod_layout_convert(info);
+ break;
+ case LOV_MAGIC_COMP_V1:
+ rc = 0;
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ if (rc)
+ RETURN(rc);
+
+ /* info->lti_ea_store could be reallocated in lod_layout_convert() */
+ cur_lcm = info->lti_ea_store;
+ cur_entry_count = le16_to_cpu(cur_lcm->lcm_entry_count);
+
+ /* 'lcm_mirror_count + 1' is the current # of mirrors the file has */
+ mirror_count = le16_to_cpu(cur_lcm->lcm_mirror_count) + 1;
+ if (mirror_count + 1 > LUSTRE_MIRROR_COUNT_MAX)
+ RETURN(-ERANGE);
+
+ /* size of new layout */
+ size = le32_to_cpu(cur_lcm->lcm_size) +
+ le32_to_cpu(merge_lcm->lcm_size) - sizeof(*cur_lcm);
+
+ memset(buf, 0, sizeof(*buf));
+ lu_buf_alloc(buf, size);
+ if (buf->lb_buf == NULL)
+ RETURN(-ENOMEM);
+
+ lcm = buf->lb_buf;
+ memcpy(lcm, cur_lcm, sizeof(*lcm) + cur_entry_count * sizeof(*lcme));
+
+ offset = sizeof(*lcm) +
+ sizeof(*lcme) * (cur_entry_count + merge_entry_count);
+ for (i = 0; i < cur_entry_count; i++) {
+ struct lov_comp_md_entry_v1 *cur_lcme;
+
+ lcme = &lcm->lcm_entries[i];
+ cur_lcme = &cur_lcm->lcm_entries[i];
+
+ lcme->lcme_offset = cpu_to_le32(offset);
+ memcpy((char *)lcm + offset,
+ (char *)cur_lcm + le32_to_cpu(cur_lcme->lcme_offset),
+ le32_to_cpu(lcme->lcme_size));
+
+ offset += le32_to_cpu(lcme->lcme_size);
+
+ if (mirror_count == 1) {
+ /* new mirrored file, create new mirror ID */
+ id = pflr_id(1, i + 1);
+ lcme->lcme_id = cpu_to_le32(id);
+ }
+
+ id = MAX(le32_to_cpu(lcme->lcme_id), id);
+ }
+
+ mirror_id = mirror_id_of(id) + 1;
+ for (i = 0; i < merge_entry_count; i++) {
+ struct lov_comp_md_entry_v1 *merge_lcme;
+
+ merge_lcme = &merge_lcm->lcm_entries[i];
+ lcme = &lcm->lcm_entries[cur_entry_count + i];
+
+ *lcme = *merge_lcme;
+ lcme->lcme_offset = cpu_to_le32(offset);
+
+ id = pflr_id(mirror_id, i + 1);
+ lcme->lcme_id = cpu_to_le32(id);
+
+ memcpy((char *)lcm + offset,
+ (char *)merge_lcm + le32_to_cpu(merge_lcme->lcme_offset),
+ le32_to_cpu(lcme->lcme_size));
+
+ offset += le32_to_cpu(lcme->lcme_size);
+ }
+
+ /* fixup layout information */
+ lod_obj_inc_layout_gen(lo);
+ lcm->lcm_layout_gen = cpu_to_le32(lo->ldo_layout_gen);
+ lcm->lcm_size = cpu_to_le32(size);
+ lcm->lcm_entry_count = cpu_to_le16(cur_entry_count + merge_entry_count);
+ lcm->lcm_mirror_count = cpu_to_le16(mirror_count);
+ if ((le16_to_cpu(lcm->lcm_flags) & LCM_FL_FLR_MASK) == LCM_FL_NOT_FLR)
+ lcm->lcm_flags = cpu_to_le32(LCM_FL_RDONLY);
+
+ LASSERT(dt_write_locked(env, dt_object_child(dt)));
+ lod_object_free_striping(env, lo);
+ rc = lod_parse_striping(env, lo, buf);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = lod_sub_declare_xattr_set(env, dt_object_child(dt), buf,
+ XATTR_NAME_LOV, LU_XATTR_REPLACE, th);
+
+ out:
+ lu_buf_free(buf);
+ RETURN(rc);
+ }
+
+ /**
* Implementation of dt_object_operations::do_declare_xattr_set.
*
* \see dt_object_operations::do_declare_xattr_set() in the API description
ENTRY;
mode = dt->do_lu.lo_header->loh_attr & S_IFMT;
- if ((S_ISREG(mode) || mode == 0) && !(fl & LU_XATTR_REPLACE) &&
+ if ((S_ISREG(mode) || mode == 0) &&
+ !(fl & (LU_XATTR_REPLACE | LU_XATTR_MERGE)) &&
(strcmp(name, XATTR_NAME_LOV) == 0 ||
strcmp(name, XATTR_LUSTRE_LOV) == 0)) {
/*
attr->la_mode = S_IFREG;
}
rc = lod_declare_striped_create(env, dt, attr, buf, th);
+ } else if (fl & LU_XATTR_MERGE) {
+ LASSERT(strcmp(name, XATTR_NAME_LOV) == 0 ||
+ strcmp(name, XATTR_LUSTRE_LOV) == 0);
+ rc = lod_declare_layout_merge(env, dt, buf, th);
} else if (S_ISREG(mode) &&
strlen(name) > strlen(XATTR_LUSTRE_LOV) + 1 &&
strncmp(name, XATTR_LUSTRE_LOV,
sizeof(*comp_array) * lo->ldo_comp_cnt);
lo->ldo_comp_entries = comp_array;
lo->ldo_comp_cnt = left;
+
+ LASSERT(lo->ldo_mirror_count == 1);
+ lo->ldo_mirrors[0].lme_end = left - 1;
lod_obj_inc_layout_gen(lo);
} else {
lod_free_comp_entries(lo);
struct lov_user_md_v3 *v3 = NULL;
struct lov_comp_md_v1 *comp_v1 = NULL;
__u16 comp_cnt;
+ __u16 mirror_cnt;
bool composite;
int rc, i;
ENTRY;
comp_cnt = comp_v1->lcm_entry_count;
if (comp_cnt == 0)
RETURN(-EINVAL);
+ mirror_cnt = comp_v1->lcm_mirror_count + 1;
composite = true;
} else {
comp_cnt = 1;
+ mirror_cnt = 0;
composite = false;
}
RETURN(rc);
lds->lds_def_comp_cnt = comp_cnt;
- lds->lds_def_striping_is_composite = composite ? 1 : 0;
+ lds->lds_def_striping_is_composite = composite;
+ lds->lds_def_mirror_cnt = mirror_cnt;
for (i = 0; i < comp_cnt; i++) {
struct lod_layout_component *lod_comp;
int i, rc;
if (lds->lds_def_striping_set && S_ISREG(mode)) {
- rc = lod_alloc_comp_entries(lo, lds->lds_def_comp_cnt);
+ rc = lod_alloc_comp_entries(lo, lds->lds_def_mirror_cnt,
+ lds->lds_def_comp_cnt);
if (rc != 0)
return;
lo->ldo_is_composite = lds->lds_def_striping_is_composite;
+ if (lds->lds_def_mirror_cnt > 1)
+ lo->ldo_flr_state = LCM_FL_RDONLY;
for (i = 0; i < lo->ldo_comp_cnt; i++) {
struct lod_layout_component *obj_comp =
nextc->do_ops->do_ah_init(env, ah, nextp, nextc, child_mode);
if (S_ISDIR(child_mode)) {
+ const struct lmv_user_md_v1 *lum1 = ah->dah_eadata;
+
/* other default values are 0 */
lc->ldo_dir_stripe_offset = -1;
lc->ldo_def_striping = lds;
/* It should always honour the specified stripes */
+ /* Note: old client (< 2.7)might also do lfs mkdir, whose EA
+ * will have old magic. In this case, we should ignore the
+ * stripe count and try to create dir by default stripe.
+ */
if (ah->dah_eadata != NULL && ah->dah_eadata_len != 0 &&
- lod_verify_md_striping(d, ah->dah_eadata) == 0) {
- const struct lmv_user_md_v1 *lum1 = ah->dah_eadata;
-
+ le32_to_cpu(lum1->lum_magic) == LMV_USER_MAGIC) {
lc->ldo_dir_stripe_count =
le32_to_cpu(lum1->lum_stripe_count);
lc->ldo_dir_stripe_offset =
* in config log, use them.
*/
if (lod_need_inherit_more(lc, false)) {
-
if (lc->ldo_comp_cnt == 0) {
- rc = lod_alloc_comp_entries(lc, 1);
+ rc = lod_alloc_comp_entries(lc, 0, 1);
if (rc)
/* fail to allocate memory, will create a
* non-striped file. */
struct lu_attr *attr = &lod_env_info(env)->lti_attr;
uint64_t size, offs;
int i, rc, stripe, stripe_count = 0, stripe_size = 0;
+ struct lu_extent size_ext;
ENTRY;
if (!lod_obj_is_striped(dt))
if (size == 0)
RETURN(0);
+ size_ext = (typeof(size_ext)){ .e_start = size - 1, .e_end = size };
for (i = 0; i < lo->ldo_comp_cnt; i++) {
struct lod_layout_component *lod_comp;
struct lu_extent *extent;
continue;
extent = &lod_comp->llc_extent;
- CDEBUG(D_INFO, "%lld [%lld, %lld)\n",
- size, extent->e_start, extent->e_end);
+ CDEBUG(D_INFO, "%lld "DEXT"\n", size, PEXT(extent));
if (!lo->ldo_is_composite ||
- (size >= extent->e_start && size < extent->e_end)) {
+ lu_extent_is_overlapped(extent, &size_ext)) {
objects = lod_comp->llc_stripe;
stripe_count = lod_comp->llc_stripe_count;
stripe_size = lod_comp->llc_stripe_size;
- break;
- }
- }
- if (stripe_count == 0)
- RETURN(0);
-
- LASSERT(objects != NULL && stripe_size != 0);
+ /* next mirror */
+ if (stripe_count == 0)
+ continue;
- /* ll_do_div64(a, b) returns a % b, and a = a / b */
- ll_do_div64(size, (__u64)stripe_size);
- stripe = ll_do_div64(size, (__u64)stripe_count);
- LASSERT(objects[stripe] != NULL);
+ LASSERT(objects != NULL && stripe_size != 0);
+ /* ll_do_div64(a, b) returns a % b, and a = a / b */
+ ll_do_div64(size, (__u64)stripe_size);
+ stripe = ll_do_div64(size, (__u64)stripe_count);
+ LASSERT(objects[stripe] != NULL);
- size = size * stripe_size;
- offs = attr->la_size;
- size += ll_do_div64(offs, stripe_size);
+ size = size * stripe_size;
+ offs = attr->la_size;
+ size += ll_do_div64(offs, stripe_size);
- attr->la_valid = LA_SIZE;
- attr->la_size = size;
+ attr->la_valid = LA_SIZE;
+ attr->la_size = size;
- rc = lod_sub_declare_attr_set(env, objects[stripe], attr, th);
+ rc = lod_sub_declare_attr_set(env, objects[stripe],
+ attr, th);
+ }
+ }
RETURN(rc);
}
}
/**
+ * Generate component ID for new created component.
+ *
+ * \param[in] lo LOD object
+ * \param[in] comp_idx index of ldo_comp_entries
+ *
+ * \retval component ID on success
+ * \retval LCME_ID_INVAL on failure
+ */
+ static __u32 lod_gen_component_id(struct lod_object *lo,
+ int mirror_id, int comp_idx)
+ {
+ struct lod_layout_component *lod_comp;
+ __u32 id, start, end;
+ int i;
+
+ LASSERT(lo->ldo_comp_entries[comp_idx].llc_id == LCME_ID_INVAL);
+
+ lod_obj_inc_layout_gen(lo);
+ id = lo->ldo_layout_gen;
+ if (likely(id <= SEQ_ID_MAX))
+ RETURN(pflr_id(mirror_id, id & SEQ_ID_MASK));
+
+ /* Layout generation wraps, need to check collisions. */
+ start = id & SEQ_ID_MASK;
+ end = SEQ_ID_MAX;
+ again:
+ for (id = start; id <= end; id++) {
+ for (i = 0; i < lo->ldo_comp_cnt; i++) {
+ lod_comp = &lo->ldo_comp_entries[i];
+ if (pflr_id(mirror_id, id) == lod_comp->llc_id)
+ break;
+ }
+ /* Found the ununsed ID */
+ if (i == lo->ldo_comp_cnt)
+ RETURN(pflr_id(mirror_id, id));
+ }
+ if (end == LCME_ID_MAX) {
+ start = 1;
+ end = min(lo->ldo_layout_gen & LCME_ID_MASK,
+ (__u32)(LCME_ID_MAX - 1));
+ goto again;
+ }
+
+ RETURN(LCME_ID_INVAL);
+ }
+
+ /**
* Creation of a striped regular object.
*
* The function is called to create the stripe objects for a regular
{
struct lod_layout_component *lod_comp;
struct lod_object *lo = lod_dt_obj(dt);
+ __u16 mirror_id;
int rc = 0, i, j;
ENTRY;
LASSERT(lo->ldo_comp_cnt != 0 && lo->ldo_comp_entries != NULL);
+ mirror_id = lo->ldo_mirror_count > 1 ? 1 : 0;
+
/* create all underlying objects */
for (i = 0; i < lo->ldo_comp_cnt; i++) {
lod_comp = &lo->ldo_comp_entries[i];
+ if (lod_comp->llc_extent.e_start == 0 && i > 0) /* new mirror */
+ ++mirror_id;
+
+ if (lod_comp->llc_id == LCME_ID_INVAL) {
+ lod_comp->llc_id = lod_gen_component_id(lo,
+ mirror_id, i);
+ if (lod_comp->llc_id == LCME_ID_INVAL)
+ GOTO(out, rc = -ERANGE);
+ }
+
if (lod_comp_inited(lod_comp))
continue;
LASSERT(object != NULL);
rc = lod_sub_create(env, object, attr, NULL, dof, th);
if (rc)
- break;
+ GOTO(out, rc);
}
lod_comp_set_init(lod_comp);
}
- if (rc == 0)
- rc = lod_generate_and_set_lovea(env, lo, th);
+ rc = lod_fill_mirrors(lo);
+ if (rc)
+ GOTO(out, rc);
- if (rc == 0)
- lo->ldo_comp_cached = 1;
- else
- lod_object_free_striping(env, lo);
+ rc = lod_generate_and_set_lovea(env, lo, th);
+ if (rc)
+ GOTO(out, rc);
+
+ lo->ldo_comp_cached = 1;
+ RETURN(0);
+ out:
+ lod_object_free_striping(env, lo);
RETURN(rc);
}
static inline int
lod_obj_stripe_destroy_cb(const struct lu_env *env, struct lod_object *lo,
struct dt_object *dt, struct thandle *th,
- int stripe_idx, struct lod_obj_stripe_cb_data *data)
+ int comp_idx, int stripe_idx,
+ struct lod_obj_stripe_cb_data *data)
{
if (data->locd_declare)
return lod_sub_declare_destroy(env, dt, th);
break;
}
} else {
- struct lod_obj_stripe_cb_data data;
+ struct lod_obj_stripe_cb_data data = { { 0 } };
data.locd_declare = true;
- rc = lod_obj_for_each_stripe(env, lo, th,
- lod_obj_stripe_destroy_cb, &data);
+ data.locd_stripe_cb = lod_obj_stripe_destroy_cb;
+ rc = lod_obj_for_each_stripe(env, lo, th, &data);
}
RETURN(rc);
}
}
} else {
- struct lod_obj_stripe_cb_data data;
+ struct lod_obj_stripe_cb_data data = { { 0 } };
data.locd_declare = false;
- rc = lod_obj_for_each_stripe(env, lo, th,
- lod_obj_stripe_destroy_cb, &data);
+ data.locd_stripe_cb = lod_obj_stripe_destroy_cb;
+ rc = lod_obj_for_each_stripe(env, lo, th, &data);
}
RETURN(rc);
return dt_invalidate(env, dt_object_child(dt));
}
- static int lod_declare_layout_change(const struct lu_env *env,
- struct dt_object *dt,
- struct layout_intent *layout,
- const struct lu_buf *buf,
- struct thandle *th)
+ static int lod_layout_data_init(struct lod_thread_info *info, __u32 comp_cnt)
{
- struct lod_thread_info *info = lod_env_info(env);
- struct lod_object *lo = lod_dt_obj(dt);
- struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
- struct dt_object *next = dt_object_child(dt);
+ ENTRY;
+
+ /* clear memory region that will be used for layout change */
+ memset(&info->lti_layout_attr, 0, sizeof(struct lu_attr));
+ info->lti_count = 0;
+
+ if (info->lti_comp_size >= comp_cnt)
+ RETURN(0);
+
+ if (info->lti_comp_size > 0) {
+ OBD_FREE(info->lti_comp_idx,
+ info->lti_comp_size * sizeof(__u32));
+ info->lti_comp_size = 0;
+ }
+
+ OBD_ALLOC(info->lti_comp_idx, comp_cnt * sizeof(__u32));
+ if (!info->lti_comp_idx)
+ RETURN(-ENOMEM);
+
+ info->lti_comp_size = comp_cnt;
+ RETURN(0);
+ }
+
+ static int lod_declare_instantiate_components(const struct lu_env *env,
+ struct lod_object *lo, struct thandle *th)
+ {
+ struct lod_thread_info *info = lod_env_info(env);
struct ost_pool *inuse = &info->lti_inuse_osts;
+ int i;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(info->lti_count < lo->ldo_comp_cnt);
+ if (info->lti_count > 0) {
+ /* Prepare inuse array for composite file */
+ rc = lod_prepare_inuse(env, lo);
+ if (rc)
+ RETURN(rc);
+ }
+
+ for (i = 0; i < info->lti_count; i++) {
+ rc = lod_qos_prep_create(env, lo, NULL, th,
+ info->lti_comp_idx[i], inuse);
+ if (rc)
+ break;
+ }
+
+ if (!rc) {
+ info->lti_buf.lb_len = lod_comp_md_size(lo, false);
+ rc = lod_sub_declare_xattr_set(env, lod_object_child(lo),
+ &info->lti_buf, XATTR_NAME_LOV, 0, th);
+ }
+
+ RETURN(rc);
+ }
+
+ static int lod_declare_update_plain(const struct lu_env *env,
+ struct lod_object *lo, struct layout_intent *layout,
+ const struct lu_buf *buf, struct thandle *th)
+ {
+ struct lod_thread_info *info = lod_env_info(env);
+ struct lod_device *d = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
struct lod_layout_component *lod_comp;
struct lov_comp_md_v1 *comp_v1 = NULL;
bool replay = false;
int i, rc;
ENTRY;
- if (!S_ISREG(dt->do_lu.lo_header->loh_attr) || !dt_object_exists(dt) ||
- dt_object_remote(next))
- RETURN(-EINVAL);
+ LASSERT(lo->ldo_flr_state == LCM_FL_NOT_FLR);
- dt_write_lock(env, next, 0);
/*
* In case the client is passing lovea, which only happens during
* the replay of layout intent write RPC for now, we may need to
if (rc <= 0)
GOTO(out, rc);
/* old on-disk EA is stored in info->lti_buf */
- comp_v1 = (struct lov_comp_md_v1 *)&info->lti_buf.lb_buf;
+ comp_v1 = (struct lov_comp_md_v1 *)info->lti_buf.lb_buf;
replay = true;
} else {
/* non replay path */
rc = lod_load_striping_locked(env, lo);
if (rc)
GOTO(out, rc);
+ }
- /* Prepare inuse array for composite file */
- rc = lod_prepare_inuse(env, lo);
- if (rc)
- GOTO(out, rc);
+ if (layout->li_opc == LAYOUT_INTENT_TRUNC) {
+ /**
+ * trunc transfers [size, eof) in the intent extent, while
+ * we'd instantiated components covers [0, size).
+ */
+ layout->li_extent.e_end = layout->li_extent.e_start;
+ layout->li_extent.e_start = 0;
}
/* Make sure defined layout covers the requested write range. */
lod_comp = &lo->ldo_comp_entries[lo->ldo_comp_cnt - 1];
if (lo->ldo_comp_cnt > 1 &&
lod_comp->llc_extent.e_end != OBD_OBJECT_EOF &&
- lod_comp->llc_extent.e_end < layout->li_end) {
+ lod_comp->llc_extent.e_end < layout->li_extent.e_end) {
CDEBUG(replay ? D_ERROR : D_LAYOUT,
"%s: the defined layout [0, %#llx) does not covers "
- "the write range [%#llx, %#llx).\n",
+ "the write range "DEXT"\n",
lod2obd(d)->obd_name, lod_comp->llc_extent.e_end,
- layout->li_start, layout->li_end);
+ PEXT(&layout->li_extent));
GOTO(out, rc = -EINVAL);
}
+ CDEBUG(D_LAYOUT, "%s: "DFID": instantiate components "DEXT"\n",
+ lod2obd(d)->obd_name, PFID(lod_object_fid(lo)),
+ PEXT(&layout->li_extent));
+
/*
* Iterate ld->ldo_comp_entries, find the component whose extent under
* the write range and not instantianted.
for (i = 0; i < lo->ldo_comp_cnt; i++) {
lod_comp = &lo->ldo_comp_entries[i];
- if (lod_comp->llc_extent.e_start >= layout->li_end)
+ if (lod_comp->llc_extent.e_start >= layout->li_extent.e_end)
break;
if (!replay) {
if (lod_comp->llc_pattern & LOV_PATTERN_F_RELEASED)
GOTO(out, rc = -EINVAL);
- need_create = true;
+ LASSERT(info->lti_comp_idx != NULL);
+ info->lti_comp_idx[info->lti_count++] = i;
+ }
- rc = lod_qos_prep_create(env, lo, NULL, th, i, inuse);
- if (rc)
+ if (info->lti_count == 0)
+ RETURN(-EALREADY);
+
+ lod_obj_inc_layout_gen(lo);
+ rc = lod_declare_instantiate_components(env, lo, th);
+ out:
+ if (rc)
+ lod_object_free_striping(env, lo);
+ RETURN(rc);
+ }
+
+ #define lod_foreach_mirror_comp(comp, lo, mirror_idx) \
+ for (comp = &lo->ldo_comp_entries[lo->ldo_mirrors[mirror_idx].lme_start]; \
+ comp <= &lo->ldo_comp_entries[lo->ldo_mirrors[mirror_idx].lme_end]; \
+ comp++)
+
+ static inline int lod_comp_index(struct lod_object *lo,
+ struct lod_layout_component *lod_comp)
+ {
+ LASSERT(lod_comp >= lo->ldo_comp_entries &&
+ lod_comp <= &lo->ldo_comp_entries[lo->ldo_comp_cnt - 1]);
+
+ return lod_comp - lo->ldo_comp_entries;
+ }
+
+ /**
+ * Stale other mirrors by writing extent.
+ */
+ static void lod_stale_components(struct lod_object *lo, int primary,
+ struct lu_extent *extent)
+ {
+ struct lod_layout_component *pri_comp, *lod_comp;
+ int i;
+
+ /* The writing extent decides which components in the primary
+ * are affected... */
+ CDEBUG(D_LAYOUT, "primary mirror %d, "DEXT"\n", primary, PEXT(extent));
+ lod_foreach_mirror_comp(pri_comp, lo, primary) {
+ if (!lu_extent_is_overlapped(extent, &pri_comp->llc_extent))
+ continue;
+
+ CDEBUG(D_LAYOUT, "primary comp %u "DEXT"\n",
+ lod_comp_index(lo, pri_comp),
+ PEXT(&pri_comp->llc_extent));
+
+ for (i = 0; i < lo->ldo_mirror_count; i++) {
+ if (i == primary)
+ continue;
+
+ /* ... and then stale other components that are
+ * overlapping with primary components */
+ lod_foreach_mirror_comp(lod_comp, lo, i) {
+ if (!lu_extent_is_overlapped(
+ &pri_comp->llc_extent,
+ &lod_comp->llc_extent))
+ continue;
+
+ CDEBUG(D_LAYOUT, "stale: %u / %u\n",
+ i, lod_comp_index(lo, lod_comp));
+
+ lod_comp->llc_flags |= LCME_FL_STALE;
+ lo->ldo_mirrors[i].lme_stale = 1;
+ }
+ }
+ }
+ }
+
+ static int lod_declare_update_rdonly(const struct lu_env *env,
+ struct lod_object *lo, struct md_layout_change *mlc,
+ struct thandle *th)
+ {
+ struct lod_thread_info *info = lod_env_info(env);
+ struct lu_attr *layout_attr = &info->lti_layout_attr;
+ struct lod_layout_component *lod_comp;
+ struct layout_intent *layout = mlc->mlc_intent;
+ struct lu_extent extent = layout->li_extent;
+ unsigned int seq = 0;
+ int picked;
+ int i;
+ int rc;
+ ENTRY;
+
+ LASSERT(mlc->mlc_opc == MD_LAYOUT_WRITE);
+ LASSERT(lo->ldo_flr_state == LCM_FL_RDONLY);
+ LASSERT(lo->ldo_mirror_count > 0);
+
+ CDEBUG(D_LAYOUT, DFID": trying to write :"DEXT"\n",
+ PFID(lod_object_fid(lo)), PEXT(&extent));
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_FLR_RANDOM_PICK_MIRROR)) {
+ get_random_bytes(&seq, sizeof(seq));
+ seq %= lo->ldo_mirror_count;
+ }
+
+ /**
+ * Pick a mirror as the primary.
+ * Now it only picks the first mirror, this algo can be
+ * revised later after knowing the topology of cluster or
+ * the availability of OSTs.
+ */
+ for (picked = -1, i = 0; i < lo->ldo_mirror_count; i++) {
+ int index = (i + seq) % lo->ldo_mirror_count;
+
+ if (!lo->ldo_mirrors[index].lme_stale) {
+ picked = index;
break;
+ }
}
+ if (picked < 0) /* failed to pick a primary */
+ RETURN(-ENODATA);
- if (need_create)
- lod_obj_inc_layout_gen(lo);
- else
- GOTO(unlock, rc = -EALREADY);
+ CDEBUG(D_LAYOUT, DFID": picked mirror %u as primary\n",
+ PFID(lod_object_fid(lo)), lo->ldo_mirrors[picked].lme_id);
- if (!rc) {
- info->lti_buf.lb_len = lod_comp_md_size(lo, false);
- rc = lod_sub_declare_xattr_set(env, next, &info->lti_buf,
- XATTR_NAME_LOV, 0, th);
+ /* stale overlapping components from other mirrors */
+ lod_stale_components(lo, picked, &extent);
+
+ /* instantiate components for the picked mirror, start from 0 */
+ if (layout->li_opc == LAYOUT_INTENT_TRUNC) {
+ /**
+ * trunc transfers [size, eof) in the intent extent, we'd
+ * stale components overlapping [size, eof), while we'd
+ * instantiated components covers [0, size).
+ */
+ extent.e_end = extent.e_start;
+ }
+ extent.e_start = 0;
+
+ lod_foreach_mirror_comp(lod_comp, lo, picked) {
+ if (!lu_extent_is_overlapped(&extent,
+ &lod_comp->llc_extent))
+ break;
+
+ if (lod_comp_inited(lod_comp))
+ continue;
+
+ CDEBUG(D_LAYOUT, "instantiate: %u / %u\n",
+ i, lod_comp_index(lo, lod_comp));
+
+ info->lti_comp_idx[info->lti_count++] =
+ lod_comp_index(lo, lod_comp);
+ }
+
+ lo->ldo_flr_state = LCM_FL_WRITE_PENDING;
+
+ /* Reset the layout version once it's becoming too large.
+ * This way it can make sure that the layout version is
+ * monotonously increased in this writing era. */
+ lod_obj_inc_layout_gen(lo);
+ if (lo->ldo_layout_gen > (LCME_ID_MAX >> 1)) {
+ __u32 layout_version;
+
+ cfs_get_random_bytes(&layout_version, sizeof(layout_version));
+ lo->ldo_layout_gen = layout_version & 0xffff;
}
+
+ rc = lod_declare_instantiate_components(env, lo, th);
+ if (rc)
+ GOTO(out, rc);
+
+ layout_attr->la_valid = LA_LAYOUT_VERSION;
+ layout_attr->la_layout_version = 0; /* set current version */
+ rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th);
+ if (rc)
+ GOTO(out, rc);
+
out:
if (rc)
lod_object_free_striping(env, lo);
+ RETURN(rc);
+ }
- unlock:
- dt_write_unlock(env, next);
+ static int lod_declare_update_write_pending(const struct lu_env *env,
+ struct lod_object *lo, struct md_layout_change *mlc,
+ struct thandle *th)
+ {
+ struct lod_thread_info *info = lod_env_info(env);
+ struct lu_attr *layout_attr = &info->lti_layout_attr;
+ struct lod_layout_component *lod_comp;
+ struct lu_extent extent = { 0 };
+ int primary = -1;
+ int i;
+ int rc;
+ ENTRY;
+
+ LASSERT(lo->ldo_flr_state == LCM_FL_WRITE_PENDING);
+ LASSERT(mlc->mlc_opc == MD_LAYOUT_WRITE ||
+ mlc->mlc_opc == MD_LAYOUT_RESYNC);
+
+ /* look for the primary mirror */
+ for (i = 0; i < lo->ldo_mirror_count; i++) {
+ if (lo->ldo_mirrors[i].lme_stale)
+ continue;
+
+ LASSERTF(primary < 0, DFID " has multiple primary: %u / %u",
+ PFID(lod_object_fid(lo)),
+ lo->ldo_mirrors[i].lme_id,
+ lo->ldo_mirrors[primary].lme_id);
+
+ primary = i;
+ }
+ if (primary < 0) {
+ CERROR(DFID ": doesn't have a primary mirror\n",
+ PFID(lod_object_fid(lo)));
+ GOTO(out, rc = -ENODATA);
+ }
+
+ CDEBUG(D_LAYOUT, DFID": found primary %u\n",
+ PFID(lod_object_fid(lo)), lo->ldo_mirrors[primary].lme_id);
+
+ LASSERT(!lo->ldo_mirrors[primary].lme_stale);
+
+ /* for LAYOUT_WRITE opc, it has to do the following operations:
+ * 1. stale overlapping componets from stale mirrors;
+ * 2. instantiate components of the primary mirror;
+ * 3. transfter layout version to all objects of the primary;
+ *
+ * for LAYOUT_RESYNC opc, it will do:
+ * 1. instantiate components of all stale mirrors;
+ * 2. transfer layout version to all objects to close write era. */
+
+ if (mlc->mlc_opc == MD_LAYOUT_WRITE) {
+ LASSERT(mlc->mlc_intent != NULL);
+
+ extent = mlc->mlc_intent->li_extent;
+
+ CDEBUG(D_LAYOUT, DFID": intent to write: "DEXT"\n",
+ PFID(lod_object_fid(lo)), PEXT(&extent));
+
+ /* 1. stale overlapping components */
+ lod_stale_components(lo, primary, &extent);
+
+ /* 2. find out the components need instantiating.
+ * instantiate [0, mlc->mlc_intent->e_end) */
+ if (mlc->mlc_intent->li_opc == LAYOUT_INTENT_TRUNC) {
+ /**
+ * trunc transfers [size, eof) in the intent extent,
+ * we'd stale components overlapping [size, eof),
+ * while we'd instantiated components covers [0, size).
+ */
+ extent.e_end = extent.e_start;
+ }
+ extent.e_start = 0;
+
+ lod_foreach_mirror_comp(lod_comp, lo, primary) {
+ if (!lu_extent_is_overlapped(&extent,
+ &lod_comp->llc_extent))
+ break;
+
+ if (lod_comp_inited(lod_comp))
+ continue;
+
+ CDEBUG(D_LAYOUT, "write instantiate %d / %d\n",
+ primary, lod_comp_index(lo, lod_comp));
+ info->lti_comp_idx[info->lti_count++] =
+ lod_comp_index(lo, lod_comp);
+ }
+ } else { /* MD_LAYOUT_RESYNC */
+ /* figure out the components that have been instantiated in
+ * in primary to decide what components should be instantiated
+ * in stale mirrors */
+ lod_foreach_mirror_comp(lod_comp, lo, primary) {
+ if (!lod_comp_inited(lod_comp))
+ break;
+
+ extent.e_end = lod_comp->llc_extent.e_end;
+ }
+
+ CDEBUG(D_LAYOUT,
+ DFID": instantiate all stale components in "DEXT"\n",
+ PFID(lod_object_fid(lo)), PEXT(&extent));
+
+ /* 1. instantiate all components within this extent, even
+ * non-stale components so that it won't need to instantiate
+ * those components for mirror truncate later. */
+ for (i = 0; i < lo->ldo_mirror_count; i++) {
+ if (primary == i)
+ continue;
+
+ LASSERTF(lo->ldo_mirrors[i].lme_stale,
+ "both %d and %d are primary\n", i, primary);
+
+ lod_foreach_mirror_comp(lod_comp, lo, i) {
+ if (!lu_extent_is_overlapped(&extent,
+ &lod_comp->llc_extent))
+ break;
+
+ if (lod_comp_inited(lod_comp))
+ continue;
+
+ CDEBUG(D_LAYOUT, "resync instantiate %d / %d\n",
+ i, lod_comp_index(lo, lod_comp));
+
+ info->lti_comp_idx[info->lti_count++] =
+ lod_comp_index(lo, lod_comp);
+ }
+ }
+
+ /* change the file state to SYNC_PENDING */
+ lo->ldo_flr_state = LCM_FL_SYNC_PENDING;
+ }
+ rc = lod_declare_instantiate_components(env, lo, th);
+ if (rc)
+ GOTO(out, rc);
+
+ /* 3. transfer layout version to OST objects.
+ * transfer new layout version to OST objects so that stale writes
+ * can be denied. It also ends an era of writing by setting
+ * LU_LAYOUT_RESYNC. Normal client can never use this bit to
+ * send write RPC; only resync RPCs could do it. */
+ layout_attr->la_valid = LA_LAYOUT_VERSION;
+ layout_attr->la_layout_version = 0; /* set current version */
+ if (mlc->mlc_opc == MD_LAYOUT_RESYNC)
+ layout_attr->la_layout_version = LU_LAYOUT_RESYNC;
+ rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th);
+ if (rc)
+ GOTO(out, rc);
+
+ lod_obj_inc_layout_gen(lo);
+ out:
+ if (rc)
+ lod_object_free_striping(env, lo);
+ RETURN(rc);
+ }
+
+ static int lod_declare_update_sync_pending(const struct lu_env *env,
+ struct lod_object *lo, struct md_layout_change *mlc,
+ struct thandle *th)
+ {
+ struct lod_thread_info *info = lod_env_info(env);
+ unsigned sync_components = 0;
+ unsigned resync_components = 0;
+ int i;
+ int rc;
+ ENTRY;
+
+ LASSERT(lo->ldo_flr_state == LCM_FL_SYNC_PENDING);
+ LASSERT(mlc->mlc_opc == MD_LAYOUT_RESYNC_DONE ||
+ mlc->mlc_opc == MD_LAYOUT_WRITE);
+
+ CDEBUG(D_LAYOUT, DFID ": received op %d in sync pending\n",
+ PFID(lod_object_fid(lo)), mlc->mlc_opc);
+
+ if (mlc->mlc_opc == MD_LAYOUT_WRITE) {
+ CDEBUG(D_LAYOUT, DFID": cocurrent write to sync pending\n",
+ PFID(lod_object_fid(lo)));
+
+ lo->ldo_flr_state = LCM_FL_WRITE_PENDING;
+ return lod_declare_update_write_pending(env, lo, mlc, th);
+ }
+
+ /* MD_LAYOUT_RESYNC_DONE */
+
+ for (i = 0; i < lo->ldo_comp_cnt; i++) {
+ struct lod_layout_component *lod_comp;
+ int j;
+
+ lod_comp = &lo->ldo_comp_entries[i];
+
+ if (!(lod_comp->llc_flags & LCME_FL_STALE)) {
+ sync_components++;
+ continue;
+ }
+
+ for (j = 0; j < mlc->mlc_resync_count; j++) {
+ if (lod_comp->llc_id != mlc->mlc_resync_ids[j])
+ continue;
+
+ mlc->mlc_resync_ids[j] = LCME_ID_INVAL;
+ lod_comp->llc_flags &= ~LCME_FL_STALE;
+ resync_components++;
+ break;
+ }
+ }
+
+ /* valid check */
+ for (i = 0; i < mlc->mlc_resync_count; i++) {
+ if (mlc->mlc_resync_ids[i] == LCME_ID_INVAL)
+ continue;
+
+ CDEBUG(D_LAYOUT, DFID": lcme id %u (%d / %zd) not exist "
+ "or already synced\n", PFID(lod_object_fid(lo)),
+ mlc->mlc_resync_ids[i], i, mlc->mlc_resync_count);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ if (!sync_components || !resync_components) {
+ CDEBUG(D_LAYOUT, DFID": no mirror in sync or resync\n",
+ PFID(lod_object_fid(lo)));
+
+ /* tend to return an error code here to prevent
+ * the MDT from setting SoM attribute */
+ GOTO(out, rc = -EINVAL);
+ }
+
+ CDEBUG(D_LAYOUT, DFID": resynced %u/%zu components\n",
+ PFID(lod_object_fid(lo)),
+ resync_components, mlc->mlc_resync_count);
+
+ lo->ldo_flr_state = LCM_FL_RDONLY;
+ lod_obj_inc_layout_gen(lo);
+
+ info->lti_buf.lb_len = lod_comp_md_size(lo, false);
+ rc = lod_sub_declare_xattr_set(env, lod_object_child(lo),
+ &info->lti_buf, XATTR_NAME_LOV, 0, th);
+ EXIT;
+
+ out:
+ if (rc)
+ lod_object_free_striping(env, lo);
+ RETURN(rc);
+ }
+
+ static int lod_declare_layout_change(const struct lu_env *env,
+ struct dt_object *dt, struct md_layout_change *mlc,
+ struct thandle *th)
+ {
+ struct lod_thread_info *info = lod_env_info(env);
+ struct lod_object *lo = lod_dt_obj(dt);
+ int rc;
+ ENTRY;
+
+ if (!S_ISREG(dt->do_lu.lo_header->loh_attr) || !dt_object_exists(dt) ||
+ dt_object_remote(dt_object_child(dt)))
+ RETURN(-EINVAL);
+
+ lod_write_lock(env, dt, 0);
+ rc = lod_load_striping_locked(env, lo);
+ if (rc)
+ GOTO(out, rc);
+
+ LASSERT(lo->ldo_comp_cnt > 0);
+
+ rc = lod_layout_data_init(info, lo->ldo_comp_cnt);
+ if (rc)
+ GOTO(out, rc);
+
+ switch (lo->ldo_flr_state) {
+ case LCM_FL_NOT_FLR:
+ rc = lod_declare_update_plain(env, lo, mlc->mlc_intent,
+ &mlc->mlc_buf, th);
+ break;
+ case LCM_FL_RDONLY:
+ rc = lod_declare_update_rdonly(env, lo, mlc, th);
+ break;
+ case LCM_FL_WRITE_PENDING:
+ rc = lod_declare_update_write_pending(env, lo, mlc, th);
+ break;
+ case LCM_FL_SYNC_PENDING:
+ rc = lod_declare_update_sync_pending(env, lo, mlc, th);
+ break;
+ default:
+ rc = -ENOTSUPP;
+ break;
+ }
+ out:
+ dt_write_unlock(env, dt);
RETURN(rc);
}
* Instantiate layout component objects which covers the intent write offset.
*/
static int lod_layout_change(const struct lu_env *env, struct dt_object *dt,
- struct layout_intent *layout,
- const struct lu_buf *buf, struct thandle *th)
+ struct md_layout_change *mlc, struct thandle *th)
{
struct lu_attr *attr = &lod_env_info(env)->lti_attr;
+ struct lu_attr *layout_attr = &lod_env_info(env)->lti_layout_attr;
+ struct lod_object *lo = lod_dt_obj(dt);
+ int rc;
- RETURN(lod_striped_create(env, dt, attr, NULL, th));
+ rc = lod_striped_create(env, dt, attr, NULL, th);
+ if (!rc && layout_attr->la_valid & LA_LAYOUT_VERSION) {
+ layout_attr->la_layout_version |= lo->ldo_layout_gen;
+ rc = lod_attr_set(env, dt, layout_attr, th);
+ }
+
+ return rc;
}
struct dt_object_operations lod_obj_ops = {
cpy_len = strlcpy(buf, name, buf_size);
- LASSERT(cpy_len == name_len && lu_name_is_valid_2(buf, cpy_len));
+ LASSERT(lu_name_is_valid_2(buf, cpy_len));
+ if (cpy_len != name_len)
+ CDEBUG(D_DENTRY, "%s: %s len %zd != %zd, concurrent rename?\n",
+ req->rq_export->exp_obd->obd_name, buf, name_len,
+ cpy_len);
}
void mdc_file_secctx_pack(struct ptlrpc_request *req, const char *secctx_name,
struct ldlm_lock *lock;
enum mds_op_bias bias = op_data->op_bias;
- if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP |
- MDS_RENAME_MIGRATE)))
+ if (!(bias & (MDS_CLOSE_INTENT | MDS_RENAME_MIGRATE)))
return;
data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
data->cd_data_version = op_data->op_data_version;
data->cd_fid = op_data->op_fid2;
+
+ if (bias & MDS_CLOSE_RESYNC_DONE) {
+ struct close_data_resync_done *sync = &data->cd_resync;
+
+ CLASSERT(sizeof(data->cd_resync) <= sizeof(data->cd_reserved));
+ sync->resync_count = op_data->op_data_size / sizeof(__u32);
+ if (sync->resync_count <= INLINE_RESYNC_ARRAY_SIZE) {
+ memcpy(sync->resync_ids_inline, op_data->op_data,
+ op_data->op_data_size);
+ } else {
+ size_t count = sync->resync_count;
+
+ memcpy(req_capsule_client_get(&req->rq_pill, &RMF_U32),
+ op_data->op_data, count * sizeof(__u32));
+ }
+ }
}
void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
else
b->mbo_blocks = 1;
b->mbo_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+ } else if (info->mti_som_valid) { /* som is valid */
+ b->mbo_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
}
}
rc = mo_attr_get(env, next, ma);
if (rc)
GOTO(out, rc);
+
+ if (S_ISREG(mode))
+ (void) mdt_get_som(info, o, &ma->ma_attr);
ma->ma_valid |= MA_INODE;
}
*
* \param[in] info thread environment
* \param[in] obj object
- * \param[in] layout layout intent
- * \param[in] buf buffer containing client's lovea, could be empty
+ * \param[in] layout layout change descriptor
*
* \retval 0 on success
* \retval < 0 error code
*/
- static int mdt_layout_change(struct mdt_thread_info *info,
- struct mdt_object *obj,
- struct layout_intent *layout,
- const struct lu_buf *buf)
+ int mdt_layout_change(struct mdt_thread_info *info, struct mdt_object *obj,
+ struct md_layout_change *layout)
{
struct mdt_lock_handle *lh = &info->mti_lh[MDT_LH_LOCAL];
int rc;
ENTRY;
- CDEBUG(D_INFO, "got layout change request from client: "
- "opc:%u flags:%#x extent[%#llx,%#llx)\n",
- layout->li_opc, layout->li_flags,
- layout->li_start, layout->li_end);
- if (layout->li_start >= layout->li_end) {
- CERROR("Recieved an invalid layout change range [%llu, %llu) "
- "for "DFID"\n", layout->li_start, layout->li_end,
- PFID(mdt_object_fid(obj)));
- RETURN(-EINVAL);
- }
+ if (!mdt_object_exists(obj))
+ GOTO(out, rc = -ENOENT);
if (!S_ISREG(lu_object_attr(&obj->mot_obj)))
GOTO(out, rc = -EINVAL);
/* take layout lock to prepare layout change */
mdt_lock_reg_init(lh, LCK_EX);
- rc = mdt_object_lock(info, obj, lh,
- MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
+ rc = mdt_object_lock(info, obj, lh, MDS_INODELOCK_LAYOUT);
if (rc)
GOTO(out, rc);
- rc = mo_layout_change(info->mti_env, mdt_object_child(obj), layout,
- buf);
+ rc = mo_layout_change(info->mti_env, mdt_object_child(obj), layout);
mdt_object_unlock(info, obj, lh, 1);
out:
if (ma->ma_valid & MA_INODE &&
ma->ma_attr.la_valid & LA_CTIME &&
info->mti_mdt->mdt_namespace->ns_ctime_age_limit +
- ma->ma_attr.la_ctime < cfs_time_current_sec())
+ ma->ma_attr.la_ctime < ktime_get_real_seconds())
child_bits |= MDS_INODELOCK_UPDATE;
}
[REINT_OPEN] = &RQF_MDS_REINT_OPEN,
[REINT_SETXATTR] = &RQF_MDS_REINT_SETXATTR,
[REINT_RMENTRY] = &RQF_MDS_REINT_UNLINK,
- [REINT_MIGRATE] = &RQF_MDS_REINT_RENAME
+ [REINT_MIGRATE] = &RQF_MDS_REINT_RENAME,
+ [REINT_RESYNC] = &RQF_MDS_REINT_RESYNC,
};
ENTRY;
info->mti_opdata = 0;
info->mti_big_lmm_used = 0;
info->mti_big_acl_used = 0;
+ info->mti_som_valid = 0;
info->mti_spec.no_create = 0;
info->mti_spec.sp_rm_entry = 0;
__u64 flags)
{
struct mdt_lock_handle *lhc = &info->mti_lh[MDT_LH_LAYOUT];
- struct layout_intent *layout;
- struct lu_fid *fid;
+ struct md_layout_change layout = { .mlc_opc = MD_LAYOUT_NOP };
+ struct layout_intent *intent;
+ struct lu_fid *fid = &info->mti_tmp_fid2;
struct mdt_object *obj = NULL;
- bool layout_change = false;
int layout_size = 0;
int rc = 0;
ENTRY;
RETURN(-EINVAL);
}
- layout = req_capsule_client_get(info->mti_pill, &RMF_LAYOUT_INTENT);
- if (layout == NULL)
+ fid_extract_from_res_name(fid, &(*lockp)->l_resource->lr_name);
+
+ intent = req_capsule_client_get(info->mti_pill, &RMF_LAYOUT_INTENT);
+ if (intent == NULL)
RETURN(-EPROTO);
- switch (layout->li_opc) {
+ CDEBUG(D_INFO, DFID "got layout change request from client: "
+ "opc:%u flags:%#x extent "DEXT"\n",
+ PFID(fid), intent->li_opc, intent->li_flags,
+ PEXT(&intent->li_extent));
+
+ switch (intent->li_opc) {
case LAYOUT_INTENT_TRUNC:
case LAYOUT_INTENT_WRITE:
- layout_change = true;
+ layout.mlc_opc = MD_LAYOUT_WRITE;
+ layout.mlc_intent = intent;
break;
case LAYOUT_INTENT_ACCESS:
break;
case LAYOUT_INTENT_RELEASE:
case LAYOUT_INTENT_RESTORE:
CERROR("%s: Unsupported layout intent opc %d\n",
- mdt_obd_name(info->mti_mdt), layout->li_opc);
+ mdt_obd_name(info->mti_mdt), intent->li_opc);
rc = -ENOTSUPP;
break;
default:
CERROR("%s: Unknown layout intent opc %d\n",
- mdt_obd_name(info->mti_mdt), layout->li_opc);
+ mdt_obd_name(info->mti_mdt), intent->li_opc);
rc = -EINVAL;
break;
}
if (rc < 0)
RETURN(rc);
- fid = &info->mti_tmp_fid2;
- fid_extract_from_res_name(fid, &(*lockp)->l_resource->lr_name);
-
/* Get lock from request for possible resent case. */
mdt_intent_fixup_resent(info, *lockp, lhc, flags);
GOTO(out_obj, rc);
- if (layout_change) {
- struct lu_buf *buf = &info->mti_buf;
+ if (layout.mlc_opc != MD_LAYOUT_NOP) {
+ struct lu_buf *buf = &layout.mlc_buf;
/**
* mdt_layout_change is a reint operation, when the request
* lovea, then it's a replay of the layout intent write
* RPC.
*/
- rc = mdt_layout_change(info, obj, layout, buf);
+ rc = mdt_layout_change(info, obj, &layout);
if (rc)
GOTO(out_obj, rc);
}
* list */
struct mutex cdt_restore_lock; /**< protect restore
* list */
- cfs_time_t cdt_loop_period; /**< llog scan period */
- cfs_time_t cdt_grace_delay; /**< request grace
+ time64_t cdt_loop_period; /**< llog scan period */
+ time64_t cdt_grace_delay; /**< request grace
* delay */
- cfs_time_t cdt_active_req_timeout; /**< request timeout */
+ time64_t cdt_active_req_timeout; /**< request timeout */
__u32 cdt_default_archive_id; /**< archive id used
* when none are
* specified */
#define MDT_SERVICE_WATCHDOG_FACTOR (2)
#define MDT_COS_DEFAULT (0)
+#define ENOENT_VERSION 1 /** 'virtual' version of non-existent object */
+
struct mdt_object {
struct lu_object_header mot_header;
struct lu_object mot_obj;
mti_cross_ref:1,
/* big_lmm buffer was used and must be used in reply */
mti_big_lmm_used:1,
- mti_big_acl_used:1;
+ mti_big_acl_used:1,
+ mti_som_valid:1;
/* opdata for mdt_reint_open(), has the same as
* ldlm_reply:lock_policy_res1. mdt_update_last_rcvd() stores this
char mti_xattr_buf[128];
struct ldlm_enqueue_info mti_einfo;
struct tg_reply_data *mti_reply_data;
+
+ struct lustre_som_attrs mti_som;
+
+ /* FLR: layout change API */
+ struct md_layout_change mti_layout;
};
extern struct lu_context_key mdt_thread_key;
struct obd_uuid car_uuid; /**< agent doing the req. */
__u32 car_archive_id; /**< archive id */
int car_canceled; /**< request was canceled */
- cfs_time_t car_req_start; /**< start time */
- cfs_time_t car_req_update; /**< last update time */
+ time64_t car_req_start; /**< start time */
+ time64_t car_req_update; /**< last update time */
struct hsm_action_item *car_hai; /**< req. to the agent */
struct cdt_req_progress car_progress; /**< track data mvt
* progress */
int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *,
struct md_attr *);
void mdt_reconstruct_open(struct mdt_thread_info *, struct mdt_lock_handle *);
+ int mdt_layout_change(struct mdt_thread_info *info, struct mdt_object *obj,
+ struct md_layout_change *spec);
struct lu_buf *mdt_buf(const struct lu_env *env, void *area, ssize_t len);
const struct lu_buf *mdt_buf_const(const struct lu_env *env,
return mdt_dlm_lock_modes[mode];
}
+ /* mdt_som.c */
+ int mdt_set_som(struct mdt_thread_info *info, struct mdt_object *obj,
+ struct lu_attr *attr);
+ int mdt_get_som(struct mdt_thread_info *info, struct mdt_object *obj,
+ struct lu_attr *attr);
+
/* mdt_lvb.c */
extern struct ldlm_valblock_ops mdt_lvbo;
int mdt_dom_lvb_is_valid(struct ldlm_resource *res);
}
static int
-check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
+check_write_checksum(struct obdo *oa, const struct lnet_process_id *peer,
__u32 client_cksum, __u32 server_cksum,
struct osc_brw_async_args *aa)
{
CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc);
/* When server return -EINPROGRESS, client should always retry
* regardless of the number of times the bulk was resent already. */
- if (osc_recoverable_error(rc)) {
+ if (osc_recoverable_error(rc) && !req->rq_no_delay) {
if (req->rq_import_generation !=
req->rq_import->imp_generation) {
CDEBUG(D_HA, "%s: resend cross eviction for object: "
list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
list_del_init(&ext->oe_link);
- osc_extent_finish(env, ext, 1, rc);
+ osc_extent_finish(env, ext, 1,
+ rc && req->rq_no_delay ? -EWOULDBLOCK : rc);
}
LASSERT(list_empty(&aa->aa_exts));
LASSERT(list_empty(&aa->aa_oaps));
int page_count = 0;
bool soft_sync = false;
bool interrupted = false;
+ bool ndelay = false;
int i;
int grant = 0;
int rc;
+ __u32 layout_version = 0;
struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
struct ost_body *body;
ENTRY;
mem_tight |= ext->oe_memalloc;
grant += ext->oe_grants;
page_count += ext->oe_nr_pages;
+ layout_version = MAX(layout_version, ext->oe_layout_version);
if (obj == NULL)
obj = ext->oe_obj;
}
if (oap->oap_interrupted)
interrupted = true;
}
+ if (ext->oe_ndelay)
+ ndelay = true;
}
/* first page in the list */
crattr->cra_oa = oa;
cl_req_attr_set(env, osc2cl(obj), crattr);
- if (cmd == OBD_BRW_WRITE)
+ if (cmd == OBD_BRW_WRITE) {
oa->o_grant_used = grant;
+ if (layout_version > 0) {
+ CDEBUG(D_LAYOUT, DFID": write with layout version %u\n",
+ PFID(&oa->o_oi.oi_fid), layout_version);
+
+ oa->o_layout_version = layout_version;
+ oa->o_valid |= OBD_MD_LAYOUT_VERSION;
+ }
+ }
sort_brw_pages(pga, page_count);
rc = osc_brw_prep_request(cmd, cli, oa, page_count, pga, &req, 0);
oap->oap_request = ptlrpc_request_addref(req);
if (interrupted && !req->rq_intr)
ptlrpc_mark_interrupted(req);
+ if (ndelay) {
+ req->rq_no_resend = req->rq_no_delay = 1;
+ /* probably set a shorter timeout value.
+ * to handle ETIMEDOUT in brw_interpret() correctly. */
+ /* lustre_msg_set_timeout(req, req->rq_timeout / 2); */
+ }
/* Need to update the timestamps after the request is built in case
* we race with setattr (locally or in queue at OST). If OST gets
ALWAYS_EXCEPT="$ALWAYS_EXCEPT 43b 53b 54b"
# bug number for skipped test: LU-9875 LU-9879 LU-9879 LU-9879 LU-9879
ALWAYS_EXCEPT="$ALWAYS_EXCEPT 70e 80 84 87 100"
- # bug number for skipped test: LU-8110 LU-9400 LU-9879 LU-9879 LU-9879
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT 102 103 104 105 107"
+ # bug number for skipped test: LU-8110 LU-9879 LU-9879 LU-9879
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT 102 104 105 107"
fi
# pass "-E lazy_itable_init" to mke2fs to speed up the formatting time
error_noexit "Verify DoM creation"
return 1
}
- [ $($LFS getstripe -L $tmp/mnt/lustre/dom) == 100 ] || {
+ [ $($LFS getstripe -L $tmp/mnt/lustre/dom) == "mdt" ] || {
error_noexit "Verify a DoM file"
return 1
}
# Remove OSTs from a pool and destroy the pool.
destroy_pool $ost_pool || true
+ if ! combined_mgs_mds ; then
+ umount_mgs_client
+ fi
restore_ostindex
}
done
mount_client $MOUNT || error "mount client $MOUNT failed"
+ if ! combined_mgs_mds ; then
+ mount_mgs_client
+ fi
+
wait_osts_up
$LFS df $MOUNT || error "$LFS df $MOUNT failed"
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
echo "rename $FSNAME to $newname"
- if [ ! combined_mgs_mds ]; then
+ if ! combined_mgs_mds ; then
local facet=$(mgsdevname)
do_facet mgs \
cp $LUSTRE/tests/test-framework.sh $DIR/$tdir ||
error "(2) Fail to copy test-framework.sh"
+ if ! combined_mgs_mds ; then
+ mount_mgs_client
+ fi
do_facet mgs $LCTL pool_new $FSNAME.pool1 ||
error "(3) Fail to create $FSNAME.pool1"
# name the pool name as the fsname
$SETSTRIPE -p $FSNAME $DIR/$tdir/d0 ||
error "(6) Fail to setstripe on $DIR/$tdir/d0"
+ if ! combined_mgs_mds ; then
+ umount_mgs_client
+ fi
KEEP_ZPOOL=true
stopall
FSNAME="mylustre"
setupall
+ if ! combined_mgs_mds ; then
+ mount_mgs_client
+ fi
test_103_check_pool $save_fsname 7
if [ $OSTCOUNT -ge 2 ]; then
$SETSTRIPE -p $save_fsname $DIR/$tdir/f0 ||
error "(16) Fail to setstripe on $DIR/$tdir/f0"
+ if ! combined_mgs_mds ; then
+ umount_mgs_client
+ fi
stopall
FSNAME="tfs"
setupall
+ if ! combined_mgs_mds ; then
+ mount_mgs_client
+ fi
test_103_check_pool $save_fsname 17
+ if ! combined_mgs_mds ; then
+ umount_mgs_client
+ fi
stopall
test_renamefs $save_fsname
[[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME
local copytool_log=$prefix.copytool${arc_id}_log.$agent.log
+ stack_trap cleanup EXIT
do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1"
if [[ $? != 0 ]]; then
[[ $HSMTOOL_NOERROR == true ]] ||
error "start copytool $facet on $agent failed"
echo "start copytool $facet on $agent failed"
fi
-
- trap cleanup EXIT
}
get_copytool_event_log() {
echo -n "Verifying released pattern: "
local PTRN=$($GETSTRIPE -L $f)
echo $PTRN
- [[ $PTRN == 80000001 ]] || error "Is not released"
+ [[ $PTRN == released ]] || error "Is not released"
local fid=$(path2fid $f)
echo "Verifying new fid $fid in archive"
run_test 24c "check that user,group,other request masks work"
cleanup_test_24d() {
- trap 0
mount -o remount,rw $MOUNT2
zconf_umount $(facet_host $SINGLEAGT) "$MOUNT3"
}
mount -o remount,ro $MOUNT2
do_nodes $(comma_list $(nodes_list)) $LCTL clear
- start_full_debug_logging
fid2=$(path2fid $file2)
[ "$fid1" == "$fid2" ] ||
$LFS hsm_archive $file1 || error "Fail to archive $file1"
wait_request_state $fid1 ARCHIVE SUCCEED
- stop_full_debug_logging
-
$LFS hsm_release $file1
$LFS hsm_restore $file2
wait_request_state $fid1 RESTORE SUCCEED
$LFS hsm_release $file2 &&
error "release should fail on read-only mount"
- copytool_cleanup
- cleanup_test_24d
+ return 0
}
run_test 24d "check that read-only mounts are respected"
local there=$(ps -o pid,comm hp $pid >/dev/null)
[[ -z $there ]] || error "Restore initiator does not exit"
- local rc=$(wait $pid)
- [[ $rc -eq 0 ]] || error "Restore initiator failed with $rc"
+ wait $pid || error "Restore initiator failed with $?"
copytool_cleanup
}
local there=$(ps -o pid,comm hp $pid >/dev/null)
[[ -z $there ]] || error "Restore initiator does not exit"
- local rc=$(wait $pid)
- [[ $rc -eq 0 ]] || error "Restore initiator failed with $rc"
+ wait $pid || error "Restore initiator failed with $?"
fid2=$(path2fid $f)
[[ $fid2 == $fid1 ]] || error "Wrong fid after mv $fid2 != $fid1"
[[ -z $there ]] ||
error "Restore initiator does not exit"
- local rc=$(wait $pid)
- [[ $rc -eq 0 ]] ||
- error "Restore initiator failed with $rc"
+ wait $pid || error "Restore initiator failed with $?"
copytool_cleanup
}
local ost_list=$(seq $first_ost $ost_step $last_ost)
local ost_range="$first_ost $last_ost $ost_step"
+ if ! combined_mgs_mds ; then
+ mount_mgs_client
+ fi
+
test_mkdir $DIR/$tdir
pool_add $POOL || error "pool_add failed"
pool_add_targets $POOL $ost_range || error "pool_add_targets failed"
error "llapi_layout_test failed"
destroy_test_pools || error "destroy test pools failed"
+
+ if ! combined_mgs_mds ; then
+ umount_mgs_client
+ fi
}
run_test 27D "validate llapi_layout API"
}
run_test 56aa "lfs find --size under striped dir"
+test_56ba() {
+ # Create composite files with one component
+ TDIR=$DIR/$tdir/1Mfiles
+ setup_56 5 1 "--component-end 1M"
+ # Create composite files with three components
+ TDIR=$DIR/$tdir/2Mfiles
+ setup_56 5 2 "-E 2M -E 4M -E 6M"
+ TDIR=$DIR/$tdir
+ # Create non-composite files
+ createmany -o $TDIR/${tfile}- 10
+
+ local nfiles=$($LFIND --component-end 1M --type f $TDIR | wc -l)
+ [[ $nfiles == 10 ]] ||
+ error "lfs find -E 1M found $nfiles != 10 files"
+
+ nfiles=$($LFIND ! -E 1M --type f $TDIR | wc -l)
+ [[ $nfiles == 25 ]] ||
+ error "lfs find ! -E 1M found $nfiles != 25 files"
+
+ # All files have a component that starts at 0
+ local nfiles=$($LFIND --component-start 0 --type f $TDIR | wc -l)
+ [[ $nfiles == 35 ]] ||
+ error "lfs find --component-start 0 found $nfiles != 35 files"
+
+ nfiles=$($LFIND --component-start 2M --type f $TDIR | wc -l)
+ [[ $nfiles == 15 ]] ||
+ error "$LFIND --component-start 2M found $nfiles != 15 files"
+
+ # All files created here have a componenet that does not starts at 2M
+ nfiles=$($LFIND ! --component-start 2M --type f $TDIR | wc -l)
+ [[ $nfiles == 35 ]] ||
+ error "$LFIND ! --component-start 2M found $nfiles != 35 files"
+
+ # Find files with a specified number of components
+ local nfiles=$($LFIND --component-count 3 --type f $TDIR | wc -l)
+ [[ $nfiles == 15 ]] ||
+ error "lfs find --component-count 3 found $nfiles != 15 files"
+
+ # Remember non-composite files have a component count of zero
+ local nfiles=$($LFIND --component-count 0 --type f $TDIR | wc -l)
+ [[ $nfiles == 10 ]] ||
+ error "lfs find --component-count 0 found $nfiles != 10 files"
+
+ nfiles=$($LFIND ! --component-count 3 --type f $TDIR | wc -l)
+ [[ $nfiles == 20 ]] ||
+ error "$LFIND ! --component-count 3 found $nfiles != 20 files"
+
+ # All files have a flag called "init"
+ local nfiles=$($LFIND --component-flags init --type f $TDIR | wc -l)
+ [[ $nfiles == 35 ]] ||
+ error "$LFIND --component-flags init found $nfiles != 35 files"
+
+ # Multi-component files will have a component not initialized
+ local nfiles=$($LFIND ! --component-flags init --type f $TDIR | wc -l)
+ [[ $nfiles == 15 ]] ||
+ error "$LFIND !--component-flags init found $nfiles != 15 files"
+
+ rm -rf $TDIR
+
+}
+run_test 56ba "test lfs find --component-end, -start, -count, and -flags"
+
test_57a() {
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
# note test will not do anything if MDS is not local
-type f \
-not -name force_lbug \
-not -name changelog_mask \
- -exec badarea_io '{}' \; &> /dev/null ||
+ -exec badarea_io '{}' \; ||
error "find $proc_dirs failed"
local facet
-type f \
-not -name force_lbug \
-not -name changelog_mask \
- -exec badarea_io '{}' \\\; &> /dev/null ||
+ -exec badarea_io '{}' \\\; ||
error "$facet find $facet_proc_dirs failed"
done
local test_path=$POOL_ROOT/$POOL_DIR_NAME
local file_dir=$POOL_ROOT/file_tst
local subdir=$test_path/subdir
-
local rc=0
+
+ if ! combined_mgs_mds ; then
+ mount_mgs_client
+ fi
+
while : ; do
# former test_200a test_200b
pool_add $POOL || { rc=$? ; break; }
pool_create_files $POOL $file_dir $files "$ost_list" \
|| { rc=$? ; break; }
# former test_200g test_200h
- pool_lfs_df $POOL || { rc=$? ; break; }
+ pool_lfs_df $POOL || { rc=$? ; break; }
pool_file_rel_path $POOL $test_path || { rc=$? ; break; }
# former test_201a test_201b test_201c
local f=$test_path/$tfile
pool_remove_all_targets $POOL $f || { rc=$? ; break; }
- pool_remove $POOL $f || { rc=$? ; break; }
+ pool_remove $POOL $f || { rc=$? ; break; }
break
done
destroy_test_pools
+
+ if ! combined_mgs_mds ; then
+ umount_mgs_client
+ fi
return $rc
}
run_test 200 "OST pools"
$LFS df -i
+ if ! combined_mgs_mds ; then
+ mount_mgs_client
+ fi
+
do_facet ost$((OSTIDX + 1)) lctl set_param fail_val=-1
#define OBD_FAIL_OST_ENOINO 0x229
do_facet ost$((OSTIDX + 1)) lctl set_param fail_loc=0x229
do_facet ost$((OSTIDX + 1)) lctl set_param fail_val=0
do_facet ost$((OSTIDX + 1)) lctl set_param fail_loc=0
- do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $OST || return 4
- do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME || return 5
+ do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $OST ||
+ error "$LCTL pool_remove $FSNAME.$TESTNAME $OST failed"
+ do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME ||
+ error "$LCTL pool_destroy $FSNAME.$TESTNAME failed"
echo "unlink $MDSOBJS files @$next_id..."
- unlinkmany $DIR/$tdir/f $MDSOBJS || return 6
+ unlinkmany $DIR/$tdir/f $MDSOBJS || error "unlinkmany failed"
+
+ if ! combined_mgs_mds ; then
+ umount_mgs_client
+ fi
}
run_test 220 "preallocated MDS objects still used if ENOSPC from OST"
skip_env "Need to mount OST to test" && return
fi
- [ $MDSCOUNT -ge 2 ] &&
- skip "skipping now for more than one MDT" && return
local mds=$(facet_host $SINGLEMDS)
local target=$(do_nodes $mds 'lctl dl' | \
awk "{if (\$2 == \"UP\" && \$3 == \"mdt\") {print \$4}}")
$GETSTRIPE -v $DIR/$tfile
local pattern=$($GETSTRIPE -L $DIR/$tfile)
- [ X"$pattern" = X"80000001" ] || error "pattern error ($pattern)"
+ [ X"$pattern" = X"released" ] || error "pattern error ($pattern)"
local stripe_count=$($GETSTRIPE -c $DIR/$tfile) || error "getstripe"
[ $stripe_count -eq 2 ] || error "stripe count not 2 ($stripe_count)"
osp.$mdtosc_proc1.reserved_mb_low)
echo "prev high watermark $last_wm_h, prev low watermark $last_wm_l"
+ if ! combined_mgs_mds ; then
+ mount_mgs_client
+ fi
create_pool $FSNAME.$TESTNAME || error "Pool creation failed"
do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $ost_name ||
error "Adding $ost_name to pool failed"
error "Remove $ost_name from pool failed"
do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME ||
error "Pool destroy fialed"
+
+ if ! combined_mgs_mds ; then
+ umount_mgs_client
+ fi
}
run_test 253 "Check object allocation limit"
local difference
local i
local rc
+
+ [ $(lustre_version_code ost1) -lt $(version_code 2.10.50) ] &&
+ skip "lustre < 2.10.53 does not support lockahead" && return
+
test_mkdir -p $DIR/$tdir
$SETSTRIPE -i 0 $DIR/$tdir
ldlm.namespaces.$FSNAME-OST0000*osc-f*.lock_unused_count)
difference="$((new_count - count))"
- # Test 15 output is divided by 1000 to map down to valid return
+ # Test 15 output is divided by 100 to map down to valid return
if [ $i -eq 15 ]; then
- rc="$((rc * 1000))"
+ rc="$((rc * 100))"
fi
if [ $difference -ne $rc ]; then
$LFS setstripe -E 1M -L mdt $dom ||
error "Can't create DoM layout"
- [ $($LFS getstripe -L $dom) == 100 ] || error "bad pattern"
+ [ $($LFS getstripe -L $dom) == "mdt" ] || error "bad pattern"
[ $($LFS getstripe -c $dom) == 0 ] || error "bad stripe count"
[ $($LFS getstripe -S $dom) == 1048576 ] || error "bad stripe size"
# check files inherit DoM EA
touch $DIR/$tdir/first
- [ $($GETSTRIPE -L $DIR/$tdir/first) == 100 ] ||
+ [ $($GETSTRIPE -L $DIR/$tdir/first) == "mdt" ] ||
error "bad pattern"
[ $($LFS getstripe -c $DIR/$tdir/first) == 0 ] ||
error "bad stripe count"
# check directory inherits DoM EA and uses it as default
mkdir $DIR/$tdir/subdir
touch $DIR/$tdir/subdir/second
- [ $($LFS getstripe -L $DIR/$tdir/subdir/second) == 100 ] ||
+ [ $($LFS getstripe -L $DIR/$tdir/subdir/second) == "mdt" ] ||
error "bad pattern in sub-directory"
[ $($LFS getstripe -c $DIR/$tdir/subdir/second) == 0 ] ||
error "bad stripe count in sub-directory"
touch $DIR/$tdir/subdir/f2
[ $($LFS getstripe -c $DIR/$tdir/subdir/f2) == 1 ] ||
error "wrong default striping in file 2"
- [ $($LFS getstripe -L $DIR/$tdir/subdir/f2) == 1 ] ||
+ [ $($LFS getstripe -L $DIR/$tdir/subdir/f2) == "raid0" ] ||
error "bad pattern in file 2"
return 0
}
local def_stripe_size=$($GETSTRIPE -S $MOUNT)
local def_stripe_offset=$($GETSTRIPE -i $MOUNT)
local def_pool=$($GETSTRIPE -p $MOUNT)
-
local test_pool=$TESTNAME
+
+ if ! combined_mgs_mds ; then
+ mount_mgs_client
+ fi
pool_add $test_pool || error "pool_add failed"
pool_add_targets $test_pool 0 $(($OSTCOUNT - 1)) 1 ||
error "pool_add_targets failed"
local f=$DIR/$tdir/$tfile
pool_remove_all_targets $test_pool $f
pool_remove $test_pool $f
+
+ if ! combined_mgs_mds ; then
+ umount_mgs_client
+ fi
}
run_test 406 "DNE support fs default striping"
export SGPDDSURVEY=${SGPDDSURVEY:-"$LUSTRE/../lustre-iokit/sgpdd-survey/sgpdd-survey")}
[ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey)
export MCREATE=${MCREATE:-mcreate}
+ export MULTIOP=${MULTIOP:-multiop}
# Ubuntu, at least, has a truncate command in /usr/bin
# so fully path our truncate command.
export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate}
fi
}
+# Mount the file system on the MGS
+mount_mgs_client() {
+ do_facet mgs "mkdir -p $MOUNT"
+ zconf_mount $mgs_HOST $MOUNT $MOUNT_OPTS ||
+ error "unable to mount $MOUNT on MGS"
+}
+
+# Unmount the file system on the MGS
+umount_mgs_client() {
+ zconf_umount $mgs_HOST $MOUNT
+ do_facet mgs "rm -rf $MOUNT"
+}
+
# nodes is comma list
sanity_mount_check_nodes () {
local nodes=$1
}
check_network() {
- local host=$1
- local max=$2
- local sleep=${3:-5}
+ local host=$1
+ local max=$2
+ local sleep=${3:-5}
- echo `date +"%H:%M:%S (%s)"` waiting for $host network $max secs ...
- if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then
- echo "Network not available!"
- exit 1
- fi
+ [ "$host" = "$HOSTNAME" ] && return 0
+
+ echo "$(date +'%H:%M:%S (%s)') waiting for $host network $max secs ..."
+ if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then
+ echo "Network not available!"
+ exit 1
+ fi
- echo `date +"%H:%M:%S (%s)"` network interface is UP
+ echo "$(date +'%H:%M:%S (%s)') network interface is UP"
}
no_dsh() {
# Test interface
##################################
+# usage: stack_trap arg sigspec
+#
+# stack_trap() behaves like bash's built-in trap, except that it "stacks" the
+# command ``arg`` on top of previously defined commands for ``sigspec`` instead
+# of overwriting them.
+# stacked traps are executed in reverse order of their registration
+#
+# arg and sigspec have the same meaning as in man (1) trap
+stack_trap()
+{
+ local arg="$1"
+ local sigspec="$2"
+
+ local cmd="$(trap -p $sigspec)"
+
+ cmd="${cmd#trap -- \'}"
+ cmd="${cmd%\'*}"
+ [ -n "$cmd" ] && cmd="; $cmd"
+ cmd="${arg}$cmd"
+
+ trap "$cmd" $sigspec
+}
+
error_noexit() {
report_error "$@"
}
}
get_clientosc_proc_path() {
- echo "${1}-osc-*"
+ echo "${1}-osc-ffff*"
}
# If the 2.0 MDS was mounted on 1.8 device, then the OSC and LOV names
local mdt_label=$(convert_facet2label $mds_facet)
local mdt_index=$(echo $mdt_label | sed -e 's/^.*-//')
- if [ $(lustre_version_code $mds_facet) -le $(version_code 1.8.0) ] ||
- mds_on_old_device $mds_facet; then
- echo "${ost_label}-osc"
- elif [[ $ost_label = *OST* ]]; then
+ if [[ $ost_label = *OST* ]]; then
echo "${ost_label}-osc-${mdt_index}"
else
echo "${ost_label}-osp-${mdt_index}"
noinst_LIBRARIES = liblustreapitmp.a
endif # UTILS
-lctl_SOURCES = lustre_lfsck.c portals.c debug.c obd.c lustre_cfg.c lctl.c obdctl.h lsnapshot.c
+lctl_SOURCES = portals.c debug.c obd.c lustre_cfg.c lctl.c obdctl.h
+if SERVER
+lctl_SOURCES += lustre_lfsck.c lsnapshot.c
+endif
lctl_LDADD := liblustreapi.a $(LIBCFS) $(LIBREADLINE) $(PTHREAD_LIBS)
lctl_DEPENDENCIES := $(LIBCFS) liblustreapi.a
liblustreapi_json.c liblustreapi_layout.c \
liblustreapi_lease.c liblustreapi_util.c \
liblustreapi_kernelconn.c liblustreapi_param.c \
+ liblustreapi_mirror.c \
$(top_builddir)/libcfs/libcfs/util/string.c \
$(top_builddir)/libcfs/libcfs/util/param.c \
liblustreapi_ladvise.c liblustreapi_chlg.c
#endif /* !ARRAY_SIZE */
/* all functions */
- static int lfs_setstripe(int argc, char **argv);
static int lfs_find(int argc, char **argv);
static int lfs_getstripe(int argc, char **argv);
static int lfs_getdirstripe(int argc, char **argv);
static int lfs_swap_layouts(int argc, char **argv);
static int lfs_mv(int argc, char **argv);
static int lfs_ladvise(int argc, char **argv);
+ static int lfs_mirror(int argc, char **argv);
+ static int lfs_mirror_list_commands(int argc, char **argv);
static int lfs_list_commands(int argc, char **argv);
+ static inline int lfs_mirror_resync(int argc, char **argv);
+
+ enum setstripe_origin {
+ SO_SETSTRIPE,
+ SO_MIGRATE,
+ SO_MIRROR_CREATE,
+ SO_MIRROR_EXTEND
+ };
+ static int lfs_setstripe0(int argc, char **argv, enum setstripe_origin opc);
+
+ static inline int lfs_setstripe(int argc, char **argv)
+ {
+ return lfs_setstripe0(argc, argv, SO_SETSTRIPE);
+ }
+ static inline int lfs_setstripe_migrate(int argc, char **argv)
+ {
+ return lfs_setstripe0(argc, argv, SO_MIGRATE);
+ }
+ static inline int lfs_mirror_create(int argc, char **argv)
+ {
+ return lfs_setstripe0(argc, argv, SO_MIRROR_CREATE);
+ }
+ static inline int lfs_mirror_extend(int argc, char **argv)
+ {
+ return lfs_setstripe0(argc, argv, SO_MIRROR_EXTEND);
+ }
/* Setstripe and migrate share mostly the same parameters */
#define SSM_CMD_COMMON(cmd) \
"\t respectively, -1 for EOF). Must be a multiple of\n"\
"\t stripe_size.\n"
+ #define MIRROR_CREATE_HELP \
+ "\tmirror_count: Number of mirrors to be created with the upcoming\n" \
+ "\t setstripe layout options\n" \
+ "\t It defaults to 1 if not specified; if specified,\n" \
+ "\t it must follow the option without a space.\n" \
+ "\t The option can also be repeated multiple times to\n" \
+ "\t separate mirrors that have different layouts.\n" \
+ "\tsetstripe options: Mirror layout\n" \
+ "\t It can be a plain layout or a composite layout.\n" \
+ "\t If not specified, the stripe options inherited\n" \
+ "\t from the previous component will be used.\n" \
+ "\tparent: Use default stripe options from parent directory\n"
+
+ #define MIRROR_EXTEND_HELP \
+ MIRROR_CREATE_HELP \
+ "\tvictim_file: The layout of victim_file will be split and used\n" \
+ "\t as a mirror added to the mirrored file.\n" \
+ "\tno-verify: This option indicates not to verify the mirror(s)\n" \
+ "\t from victim file(s) in case the victim file(s)\n" \
+ "\t contains the same data as the original mirrored\n" \
+ "\t file.\n"
+
+ #define MIRROR_EXTEND_USAGE \
+ " <--mirror-count|-N[mirror_count]>\n" \
+ " [setstripe options|--parent|-f <victim_file>]\n" \
+ " [--no-verify]\n"
+
+ #define SETSTRIPE_USAGE \
+ SSM_CMD_COMMON("setstripe") \
+ MIRROR_EXTEND_USAGE \
+ " <directory|filename>\n" \
+ SSM_HELP_COMMON \
+ MIRROR_EXTEND_HELP
#define MIGRATE_USAGE \
SSM_CMD_COMMON("migrate ") \
"\tmode: the mode of the directory\n"
static const char *progname;
- static bool file_lease_supported = true;
+
+ /**
+ * command_t mirror_cmdlist - lfs mirror commands.
+ */
+ command_t mirror_cmdlist[] = {
+ { .pc_name = "create", .pc_func = lfs_mirror_create,
+ .pc_help = "Create a mirrored file.\n"
+ "usage: lfs mirror create "
+ "<--mirror-count|-N[mirror_count]> "
+ "[setstripe options|--parent] ... <filename|directory>\n"
+ MIRROR_CREATE_HELP },
+ { .pc_name = "extend", .pc_func = lfs_mirror_extend,
+ .pc_help = "Extend a mirrored file.\n"
+ "usage: lfs mirror extend "
+ "<--mirror-count|-N[mirror_count]> [--no-verify] "
+ "[setstripe options|--parent|-f <victim_file>] ... <filename>\n"
+ MIRROR_EXTEND_HELP },
+ { .pc_name = "resync", .pc_func = lfs_mirror_resync,
+ .pc_help = "Resynchronizes out-of-sync mirrored file(s).\n"
+ "usage: lfs mirror resync [--only <mirror_id[,...]>] "
+ "<mirrored file> [<mirrored file2>...]\n"},
+ { .pc_name = "--list-commands", .pc_func = lfs_mirror_list_commands,
+ .pc_help = "list commands supported by lfs mirror"},
+ { .pc_name = "help", .pc_func = Parser_help, .pc_help = "help" },
+ { .pc_name = "exit", .pc_func = Parser_quit, .pc_help = "quit" },
+ { .pc_name = "quit", .pc_func = Parser_quit, .pc_help = "quit" },
+ { .pc_help = NULL }
+ };
/* all available commands */
command_t cmdlist[] = {
"usage: hsm_release [--filelist FILELIST] [--data DATA] <file> ..."},
{"hsm_remove", lfs_hsm_remove, 0,
"Remove file copy from external storage.\n"
- "usage: hsm_remove [--filelist FILELIST] [--data DATA]\n"
- " [--mntpath MOUNTPATH] [--archive NUM] <file|FID> ...\n"
+ "usage: hsm_remove [--filelist FILELIST] [--data DATA] "
+ "[--archive NUM]\n"
+ " (FILE [FILE ...] | "
+ "--mntpath MOUNTPATH FID [FID ...])\n"
"\n"
- "Note: To remove files from the archive that have been deleted on\n"
- "Lustre, set mntpath and optionally archive. In that case, all the\n"
- "positional arguments and entries in the file list must be FIDs."
+ "Note: To remove an archived copy of a file already deleted from a "
+ "Lustre FS, the\n"
+ "--mntpath option and a list of FIDs must be specified"
},
{"hsm_cancel", lfs_hsm_cancel, 0,
"Cancel requests related to specified files.\n"
"usage: hsm_cancel [--filelist FILELIST] [--data DATA] <file> ..."},
{"swap_layouts", lfs_swap_layouts, 0, "Swap layouts between 2 files.\n"
"usage: swap_layouts <path1> <path2>"},
- {"migrate", lfs_setstripe, 0,
+ {"migrate", lfs_setstripe_migrate, 0,
"migrate a directory between MDTs.\n"
"usage: migrate --mdt-index <mdt_idx> [--verbose|-v] "
"<directory>\n"
" {[--end|-e END[kMGT]] | [--length|-l LENGTH[kMGT]]}\n"
" {[--mode|-m [READ,WRITE]}\n"
" <file> ...\n"},
+ {"mirror", lfs_mirror, mirror_cmdlist,
+ "lfs commands used to manage files with mirrored components:\n"
+ "lfs mirror create - create a mirrored file or directory\n"
+ "lfs mirror extend - add mirror(s) to an existing file\n"
+ "lfs mirror split - split a mirror from an existing mirrored file\n"
+ "lfs mirror resync - resynchronize an out-of-sync mirrored file\n"
+ "lfs mirror verify - verify a mirrored file\n"},
{"help", Parser_help, 0, "help"},
{"exit", Parser_quit, 0, "quit"},
{"quit", Parser_quit, 0, "quit"},
};
- #define MIGRATION_NONBLOCK 1
-
static int check_hashtype(const char *hashtype)
{
int i;
return 0;
}
- /**
- * Internal helper for migrate_copy_data(). Check lease and report error if
- * need be.
- *
- * \param[in] fd File descriptor on which to check the lease.
- * \param[out] lease_broken Set to true if the lease was broken.
- * \param[in] group_locked Whether a group lock was taken or not.
- * \param[in] path Name of the file being processed, for error
- * reporting
- *
- * \retval 0 Migration can keep on going.
- * \retval -errno Error occurred, abort migration.
- */
- static int check_lease(int fd, bool *lease_broken, bool group_locked,
- const char *path)
+
+ static const char *error_loc = "syserror";
+
+ enum {
+ MIGRATION_NONBLOCK = 1 << 0,
+ MIGRATION_MIRROR = 1 << 1,
+ };
+
+ static int lfs_component_create(char *fname, int open_flags, mode_t open_mode,
+ struct llapi_layout *layout);
+
+ static int
+ migrate_open_files(const char *name, const struct llapi_stripe_param *param,
+ struct llapi_layout *layout, int *fd_src, int *fd_tgt)
{
- int rc;
+ int fd = -1;
+ int fdv = -1;
+ int mdt_index;
+ int random_value;
+ char parent[PATH_MAX];
+ char volatile_file[PATH_MAX];
+ char *ptr;
+ int rc;
+ struct stat st;
+ struct stat stv;
- if (!file_lease_supported)
- return 0;
+ if (param == NULL && layout == NULL) {
+ error_loc = "layout information";
+ return -EINVAL;
+ }
- rc = llapi_lease_check(fd);
- if (rc > 0)
- return 0; /* llapi_check_lease returns > 0 on success. */
+ /* search for file directory pathname */
+ if (strlen(name) > sizeof(parent) - 1) {
+ error_loc = "source file name";
+ return -ERANGE;
+ }
- if (!group_locked) {
- fprintf(stderr, "%s: cannot migrate '%s': file busy\n",
- progname, path);
- rc = rc ? rc : -EAGAIN;
+ strncpy(parent, name, sizeof(parent));
+ ptr = strrchr(parent, '/');
+ if (ptr == NULL) {
+ if (getcwd(parent, sizeof(parent)) == NULL) {
+ error_loc = "getcwd";
+ return -errno;
+ }
} else {
- fprintf(stderr, "%s: external attempt to access file '%s' "
- "blocked until migration ends.\n", progname, path);
- rc = 0;
+ if (ptr == parent) /* leading '/' */
+ ptr = parent + 1;
+ *ptr = '\0';
+ }
+
+ /* open file, direct io */
+ /* even if the file is only read, WR mode is nedeed to allow
+ * layout swap on fd */
+ fd = open(name, O_RDWR | O_DIRECT);
+ if (fd < 0) {
+ rc = -errno;
+ error_loc = "cannot open source file";
+ return rc;
+ }
+
+ rc = llapi_file_fget_mdtidx(fd, &mdt_index);
+ if (rc < 0) {
+ error_loc = "cannot get MDT index";
+ goto out;
+ }
+
+ do {
+ int open_flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW;
+ mode_t open_mode = S_IRUSR | S_IWUSR;
+
+ random_value = random();
+ rc = snprintf(volatile_file, sizeof(volatile_file),
+ "%s/%s:%.4X:%.4X", parent, LUSTRE_VOLATILE_HDR,
+ mdt_index, random_value);
+ if (rc >= sizeof(volatile_file)) {
+ rc = -ENAMETOOLONG;
+ break;
+ }
+
+ /* create, open a volatile file, use caching (ie no directio) */
+ if (param != NULL)
+ fdv = llapi_file_open_param(volatile_file, open_flags,
+ open_mode, param);
+ else
+ fdv = lfs_component_create(volatile_file, open_flags,
+ open_mode, layout);
+ } while (fdv < 0 && (rc = fdv) == -EEXIST);
+
+ if (rc < 0) {
+ error_loc = "cannot create volatile file";
+ goto out;
+ }
+
+ /* In case the MDT does not support creation of volatile files
+ * we should try to unlink it. */
+ (void)unlink(volatile_file);
+
+ /* Not-owner (root?) special case.
+ * Need to set owner/group of volatile file like original.
+ * This will allow to pass related check during layout_swap.
+ */
+ rc = fstat(fd, &st);
+ if (rc != 0) {
+ rc = -errno;
+ error_loc = "cannot stat source file";
+ goto out;
+ }
+
+ rc = fstat(fdv, &stv);
+ if (rc != 0) {
+ rc = -errno;
+ error_loc = "cannot stat volatile";
+ goto out;
+ }
+
+ if (st.st_uid != stv.st_uid || st.st_gid != stv.st_gid) {
+ rc = fchown(fdv, st.st_uid, st.st_gid);
+ if (rc != 0) {
+ rc = -errno;
+ error_loc = "cannot change ownwership of volatile";
+ goto out;
+ }
+ }
+
+ out:
+ if (rc < 0) {
+ if (fd > 0)
+ close(fd);
+ if (fdv > 0)
+ close(fdv);
+ } else {
+ *fd_src = fd;
+ *fd_tgt = fdv;
+ error_loc = NULL;
}
return rc;
}
- static int migrate_copy_data(int fd_src, int fd_dst, size_t buf_size,
- bool group_locked, const char *fname)
+ static int migrate_copy_data(int fd_src, int fd_dst, int (*check_file)(int))
{
+ struct llapi_layout *layout;
+ size_t buf_size = 4 * 1024 * 1024;
void *buf = NULL;
ssize_t rsize = -1;
ssize_t wsize = 0;
size_t wpos = 0;
off_t bufoff = 0;
int rc;
- bool lease_broken = false;
+
+ layout = llapi_layout_get_by_fd(fd_src, 0);
+ if (layout != NULL) {
+ uint64_t stripe_size;
+
+ rc = llapi_layout_stripe_size_get(layout, &stripe_size);
+ if (rc == 0)
+ buf_size = stripe_size;
+
+ llapi_layout_free(layout);
+ }
/* Use a page-aligned buffer for direct I/O */
rc = posix_memalign(&buf, getpagesize(), buf_size);
/* read new data only if we have written all
* previously read data */
if (wpos == rpos) {
- if (!lease_broken) {
- rc = check_lease(fd_src, &lease_broken,
- group_locked, fname);
+ if (check_file) {
+ rc = check_file(fd_src);
if (rc < 0)
- goto out;
+ break;
}
+
rsize = read(fd_src, buf, buf_size);
if (rsize < 0) {
rc = -errno;
- fprintf(stderr, "%s: %s: read failed: %s\n",
- progname, fname, strerror(-rc));
- goto out;
+ break;
}
rpos += rsize;
bufoff = 0;
wsize = write(fd_dst, buf + bufoff, rpos - wpos);
if (wsize < 0) {
rc = -errno;
- fprintf(stderr,
- "%s: %s: write failed on volatile: %s\n",
- progname, fname, strerror(-rc));
- goto out;
+ break;
}
wpos += wsize;
bufoff += wsize;
}
- rc = fsync(fd_dst);
- if (rc < 0) {
- rc = -errno;
- fprintf(stderr, "%s: %s: fsync failed: %s\n",
- progname, fname, strerror(-rc));
+ if (rc == 0) {
+ rc = fsync(fd_dst);
+ if (rc < 0)
+ rc = -errno;
}
- out:
free(buf);
return rc;
}
- static int migrate_copy_timestamps(int fdv, const struct stat *st)
+ static int migrate_copy_timestamps(int fd, int fdv)
{
- struct timeval tv[2] = {
- {.tv_sec = st->st_atime},
- {.tv_sec = st->st_mtime}
- };
+ struct stat st;
- return futimes(fdv, tv);
+ if (fstat(fd, &st) == 0) {
+ struct timeval tv[2] = {
+ {.tv_sec = st.st_atime},
+ {.tv_sec = st.st_mtime}
+ };
+
+ return futimes(fdv, tv);
+ }
+
+ return -errno;
}
- static int migrate_block(int fd, int fdv, const struct stat *st,
- size_t buf_size, const char *name)
+ static int migrate_block(int fd, int fdv)
{
__u64 dv1;
int gid;
rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
if (rc < 0) {
- fprintf(stderr, "%s: %s: cannot get dataversion: %s\n",
- progname, name, strerror(-rc));
+ error_loc = "cannot get dataversion";
return rc;
}
* block it too. */
rc = llapi_group_lock(fd, gid);
if (rc < 0) {
- fprintf(stderr, "%s: %s: cannot get group lock: %s\n",
- progname, name, strerror(-rc));
+ error_loc = "cannot get group lock";
return rc;
}
- rc = migrate_copy_data(fd, fdv, buf_size, true, name);
+ rc = migrate_copy_data(fd, fdv, NULL);
if (rc < 0) {
- fprintf(stderr, "%s: %s: data copy failed\n", progname, name);
+ error_loc = "data copy failed";
goto out_unlock;
}
/* Make sure we keep original atime/mtime values */
- rc = migrate_copy_timestamps(fdv, st);
+ rc = migrate_copy_timestamps(fd, fdv);
if (rc < 0) {
- fprintf(stderr, "%s: %s: timestamp copy failed\n",
- progname, name);
+ error_loc = "timestamp copy failed";
goto out_unlock;
}
rc = llapi_fswap_layouts_grouplock(fd, fdv, dv1, 0, 0,
SWAP_LAYOUTS_CHECK_DV1);
if (rc == -EAGAIN) {
- fprintf(stderr, "%s: %s: dataversion changed during copy, "
- "migration aborted\n", progname, name);
+ error_loc = "file changed";
goto out_unlock;
} else if (rc < 0) {
- fprintf(stderr, "%s: %s: cannot swap layouts: %s\n", progname,
- name, strerror(-rc));
+ error_loc = "cannot swap layout";
goto out_unlock;
}
out_unlock:
rc2 = llapi_group_unlock(fd, gid);
if (rc2 < 0 && rc == 0) {
- fprintf(stderr, "%s: %s: putting group lock failed: %s\n",
- progname, name, strerror(-rc2));
+ error_loc = "unlock group lock";
rc = rc2;
}
return rc;
}
- static int migrate_nonblock(int fd, int fdv, const struct stat *st,
- size_t buf_size, const char *name)
+ /**
+ * Internal helper for migrate_copy_data(). Check lease and report error if
+ * need be.
+ *
+ * \param[in] fd File descriptor on which to check the lease.
+ *
+ * \retval 0 Migration can keep on going.
+ * \retval -errno Error occurred, abort migration.
+ */
+ static int check_lease(int fd)
+ {
+ int rc;
+
+ rc = llapi_lease_check(fd);
+ if (rc > 0)
+ return 0; /* llapi_check_lease returns > 0 on success. */
+
+ return -EBUSY;
+ }
+
+ static int migrate_nonblock(int fd, int fdv)
{
__u64 dv1;
__u64 dv2;
rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH);
if (rc < 0) {
- fprintf(stderr, "%s: %s: cannot get data version: %s\n",
- progname, name, strerror(-rc));
+ error_loc = "cannot get data version";
return rc;
}
- rc = migrate_copy_data(fd, fdv, buf_size, false, name);
+ rc = migrate_copy_data(fd, fdv, check_lease);
if (rc < 0) {
- fprintf(stderr, "%s: %s: data copy failed\n", progname, name);
+ error_loc = "data copy failed";
return rc;
}
rc = llapi_get_data_version(fd, &dv2, LL_DV_RD_FLUSH);
if (rc != 0) {
- fprintf(stderr, "%s: %s: cannot get data version: %s\n",
- progname, name, strerror(-rc));
+ error_loc = "cannot get data version";
return rc;
}
if (dv1 != dv2) {
rc = -EAGAIN;
- fprintf(stderr, "%s: %s: data version changed during "
- "migration\n",
- progname, name);
+ error_loc = "source file changed";
return rc;
}
/* Make sure we keep original atime/mtime values */
- rc = migrate_copy_timestamps(fdv, st);
- if (rc < 0) {
- fprintf(stderr, "%s: %s: timestamp copy failed\n",
- progname, name);
- return rc;
- }
-
- /* Atomically put lease, swap layouts and close.
- * for a migration we need to check data version on file did
- * not change. */
- rc = llapi_fswap_layouts(fd, fdv, 0, 0, SWAP_LAYOUTS_CLOSE);
+ rc = migrate_copy_timestamps(fd, fdv);
if (rc < 0) {
- fprintf(stderr, "%s: %s: cannot swap layouts: %s\n",
- progname, name, strerror(-rc));
+ error_loc = "timestamp copy failed";
return rc;
}
/* LCME_FL_INIT is the only supported flag in PFL */
if (flags != 0) {
if (flags & ~LCME_KNOWN_FLAGS) {
- fprintf(stderr, "Invalid component flags %#x\n", flags);
+ fprintf(stderr,
+ "%s setstripe: bad component flags %#x\n",
+ progname, flags);
return -EINVAL;
}
} else if (comp_id > LCME_ID_MAX) {
- fprintf(stderr, "Invalid component id %u\n", comp_id);
+ fprintf(stderr, "%s setstripe: bad component id %u\n",
+ progname, comp_id);
return -EINVAL;
}
rc = llapi_layout_file_comp_del(fname, comp_id, flags);
if (rc)
- fprintf(stderr, "Delete component %#x from %s failed. %s\n",
- comp_id, fname, strerror(errno));
+ fprintf(stderr,
+ "%s setstripe: cannot delete component %#x from '%s': %s\n",
+ progname, comp_id, fname, strerror(errno));
return rc;
}
struct llapi_stripe_param *param,
struct llapi_layout *layout)
{
- int fd = -1;
- int fdv = -1;
- char parent[PATH_MAX];
- int mdt_index;
- int random_value;
- char volatile_file[sizeof(parent) +
- LUSTRE_VOLATILE_HDR_LEN +
- 2 * sizeof(mdt_index) +
- 2 * sizeof(random_value) + 4];
- char *ptr;
- int rc;
- struct lov_user_md *lum = NULL;
- int lum_size;
- int buf_size = 1024 * 1024 * 4;
- bool have_lease_rdlck = false;
- struct stat st;
- struct stat stv;
+ int fd = -1;
+ int fdv = -1;
+ int rc;
+
+ rc = migrate_open_files(name, param, layout, &fd, &fdv);
+ if (rc < 0)
+ goto out;
- /* find the right size for the IO and allocate the buffer */
- lum_size = lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3);
- lum = malloc(lum_size);
- if (lum == NULL) {
- rc = -ENOMEM;
- goto free;
+ if (!(migration_flags & MIGRATION_NONBLOCK)) {
+ /* Blocking mode (forced if servers do not support file lease).
+ * It is also the default mode, since we cannot distinguish
+ * between a broken lease and a server that does not support
+ * atomic swap/close (LU-6785) */
+ rc = migrate_block(fd, fdv);
+ goto out;
}
- rc = llapi_file_get_stripe(name, lum);
- /* failure can happen for many reasons and some may be not real errors
- * (eg: no stripe)
- * in case of a real error, a later call will fail with better
- * error management */
- if (rc == 0) {
- if ((lum->lmm_magic == LOV_USER_MAGIC_V1 ||
- lum->lmm_magic == LOV_USER_MAGIC_V3) &&
- lum->lmm_stripe_size != 0)
- buf_size = lum->lmm_stripe_size;
+ rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
+ if (rc < 0) {
+ error_loc = "cannot get lease";
+ goto out;
}
- /* open file, direct io */
- /* even if the file is only read, WR mode is nedeed to allow
- * layout swap on fd */
- fd = open(name, O_RDWR | O_DIRECT);
- if (fd == -1) {
- rc = -errno;
- fprintf(stderr, "%s: cannot open '%s': %s\n", progname, name,
- strerror(-rc));
- goto free;
- }
-
- if (file_lease_supported) {
- rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
- if (rc == -EOPNOTSUPP) {
- /* Older servers do not support file lease.
- * Disable related checks. This opens race conditions
- * as explained in LU-4840 */
- file_lease_supported = false;
- } else if (rc < 0) {
- fprintf(stderr, "%s: %s: cannot get open lease: %s\n",
- progname, name, strerror(-rc));
- goto error;
+ rc = migrate_nonblock(fd, fdv);
+ if (rc < 0) {
+ llapi_lease_put(fd);
+ goto out;
+ }
+
+ /* Atomically put lease, swap layouts and close.
+ * for a migration we need to check data version on file did
+ * not change. */
+ rc = llapi_fswap_layouts(fd, fdv, 0, 0,
+ migration_flags & MIGRATION_MIRROR ?
+ MERGE_LAYOUTS_CLOSE : SWAP_LAYOUTS_CLOSE);
+ if (rc < 0) {
+ error_loc = "cannot swap layout";
+ goto out;
+ }
+
+ out:
+ if (fd >= 0)
+ close(fd);
+
+ if (fdv >= 0)
+ close(fdv);
+
+ if (rc < 0)
+ fprintf(stderr, "error: %s: %s: %s: %s\n",
+ progname, name, error_loc, strerror(-rc));
+ return rc;
+ }
+
+ /**
+ * struct mirror_args - Command-line arguments for mirror(s).
+ * @m_count: Number of mirrors to be created with this layout.
+ * @m_layout: Mirror layout.
+ * @m_file: A victim file. Its layout will be split and used as a mirror.
+ * @m_next: Point to the next node of the list.
+ *
+ * Command-line arguments for mirror(s) will be parsed and stored in
+ * a linked list that consists of this structure.
+ */
+ struct mirror_args {
+ __u32 m_count;
+ struct llapi_layout *m_layout;
+ const char *m_file;
+ struct mirror_args *m_next;
+ };
+
+ static inline int mirror_sanity_check_one(struct llapi_layout *layout)
+ {
+ uint64_t start, end;
+ uint64_t pattern;
+ int rc;
+
+ /* LU-10112: do not support dom+flr in phase 1 */
+ rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
+ if (rc)
+ return -errno;
+
+ rc = llapi_layout_pattern_get(layout, &pattern);
+ if (rc)
+ return -errno;
+
+ if (pattern == LOV_PATTERN_MDT || pattern == LLAPI_LAYOUT_MDT) {
+ fprintf(stderr, "error: %s: doesn't support dom+flr for now\n",
+ progname);
+ return -ENOTSUP;
+ }
+
+ rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_LAST);
+ if (rc)
+ return -errno;
+
+ rc = llapi_layout_comp_extent_get(layout, &start, &end);
+ if (rc)
+ return -errno;
+
+ if (end != LUSTRE_EOF) {
+ fprintf(stderr, "error: %s: mirror layout doesn't reach eof\n",
+ progname);
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ /**
+ * enum mirror_flags - Flags for extending a mirrored file.
+ * @NO_VERIFY: Indicates not to verify the mirror(s) from victim file(s)
+ * in case the victim file(s) contains the same data as the
+ * original mirrored file.
+ *
+ * Flags for extending a mirrored file.
+ */
+ enum mirror_flags {
+ NO_VERIFY = 0x1,
+ };
+
+ /**
+ * mirror_create_sanity_check() - Check mirror list.
+ * @list: A linked list that stores the mirror arguments.
+ *
+ * This function does a sanity check on @list for creating
+ * a mirrored file.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+ static int mirror_create_sanity_check(const char *fname,
+ struct mirror_args *list)
+ {
+ int rc = 0;
+ bool has_m_file = false;
+ bool has_m_layout = false;
+
+ if (list == NULL)
+ return -EINVAL;
+
+ if (fname) {
+ struct llapi_layout *layout;
+
+ layout = llapi_layout_get_by_path(fname, 0);
+ if (!layout) {
+ fprintf(stderr,
+ "error: %s: file '%s' couldn't get layout\n",
+ progname, fname);
+ return -ENODATA;
+ }
+
+ rc = mirror_sanity_check_one(layout);
+ llapi_layout_free(layout);
+
+ if (rc)
+ return rc;
+ }
+
+ while (list != NULL) {
+ if (list->m_file != NULL) {
+ has_m_file = true;
+ llapi_layout_free(list->m_layout);
+
+ list->m_layout =
+ llapi_layout_get_by_path(list->m_file, 0);
+ if (list->m_layout == NULL) {
+ fprintf(stderr,
+ "error: %s: file '%s' has no layout\n",
+ progname, list->m_file);
+ return -ENODATA;
+ }
} else {
- have_lease_rdlck = true;
+ if (list->m_layout != NULL)
+ has_m_layout = true;
+ else {
+ fprintf(stderr, "error: %s: no mirror layout\n",
+ progname);
+ return -EINVAL;
+ }
}
+
+ rc = mirror_sanity_check_one(list->m_layout);
+ if (rc)
+ return rc;
+
+ list = list->m_next;
}
- /* search for file directory pathname */
- if (strlen(name) > sizeof(parent)-1) {
- rc = -E2BIG;
- goto error;
+ if (has_m_file && has_m_layout) {
+ fprintf(stderr, "error: %s: -f <victim_file> option should not "
+ "be specified with setstripe options or "
+ "--parent option\n", progname);
+ return -EINVAL;
}
- strncpy(parent, name, sizeof(parent));
- ptr = strrchr(parent, '/');
- if (ptr == NULL) {
- if (getcwd(parent, sizeof(parent)) == NULL) {
- rc = -errno;
- goto error;
+
+ return 0;
+ }
+
+ /**
+ * mirror_create() - Create a mirrored file.
+ * @fname: The file to be created.
+ * @mirror_list: A linked list that stores the mirror arguments.
+ *
+ * This function creates a mirrored file @fname with the mirror(s)
+ * from @mirror_list.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+ static int mirror_create(char *fname, struct mirror_args *mirror_list)
+ {
+ struct llapi_layout *layout = NULL;
+ struct mirror_args *cur_mirror = NULL;
+ uint16_t mirror_count = 0;
+ int i = 0;
+ int rc = 0;
+
+ rc = mirror_create_sanity_check(NULL, mirror_list);
+ if (rc)
+ return rc;
+
+ cur_mirror = mirror_list;
+ while (cur_mirror != NULL) {
+ for (i = 0; i < cur_mirror->m_count; i++) {
+ rc = llapi_layout_merge(&layout, cur_mirror->m_layout);
+ if (rc) {
+ rc = -errno;
+ fprintf(stderr, "error: %s: "
+ "merge layout failed: %s\n",
+ progname, strerror(errno));
+ goto error;
+ }
}
- } else {
- if (ptr == parent)
- strcpy(parent, "/");
- else
- *ptr = '\0';
+ mirror_count += cur_mirror->m_count;
+ cur_mirror = cur_mirror->m_next;
}
- rc = llapi_file_fget_mdtidx(fd, &mdt_index);
- if (rc < 0) {
- fprintf(stderr, "%s: %s: cannot get MDT index: %s\n",
- progname, name, strerror(-rc));
+ rc = llapi_layout_mirror_count_set(layout, mirror_count);
+ if (rc) {
+ rc = -errno;
+ fprintf(stderr, "error: %s: set mirror count failed: %s\n",
+ progname, strerror(errno));
goto error;
}
- do {
- int open_flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW;
- mode_t open_mode = S_IRUSR | S_IWUSR;
+ rc = lfs_component_create(fname, O_CREAT | O_WRONLY, 0644,
+ layout);
+ if (rc >= 0) {
+ close(rc);
+ rc = 0;
+ }
- random_value = random();
- rc = snprintf(volatile_file, sizeof(volatile_file),
- "%s/%s:%.4X:%.4X", parent, LUSTRE_VOLATILE_HDR,
- mdt_index, random_value);
- if (rc >= sizeof(volatile_file)) {
- rc = -E2BIG;
- goto error;
+ error:
+ llapi_layout_free(layout);
+ return rc;
+ }
+
+ /**
+ * Compare files and check lease on @fd.
+ *
+ * \retval bytes number of bytes are the same
+ */
+ static ssize_t mirror_file_compare(int fd, int fdv)
+ {
+ const size_t buflen = 4 * 1024 * 1024; /* 4M */
+ void *buf;
+ ssize_t bytes_done = 0;
+ ssize_t bytes_read = 0;
+
+ buf = malloc(buflen * 2);
+ if (!buf)
+ return -ENOMEM;
+
+ while (1) {
+ if (!llapi_lease_check(fd)) {
+ bytes_done = -EBUSY;
+ break;
}
- /* create, open a volatile file, use caching (ie no directio) */
- if (param != NULL)
- fdv = llapi_file_open_param(volatile_file, open_flags,
- open_mode, param);
- else if (layout != NULL)
- fdv = lfs_component_create(volatile_file, open_flags,
- open_mode, layout);
- else
- fdv = -EINVAL;
- } while (fdv == -EEXIST);
+ bytes_read = read(fd, buf, buflen);
+ if (bytes_read <= 0)
+ break;
- if (fdv < 0) {
- rc = fdv;
- fprintf(stderr, "%s: %s: cannot create volatile file in"
- " directory: %s\n",
- progname, parent, strerror(-rc));
- goto error;
+ if (bytes_read != read(fdv, buf + buflen, buflen))
+ break;
+
+ /* XXX: should compute the checksum on each buffer and then
+ * compare checksum to avoid cache collision */
+ if (memcmp(buf, buf + buflen, bytes_read))
+ break;
+
+ bytes_done += bytes_read;
}
- /* In case the MDT does not support creation of volatile files
- * we should try to unlink it. */
- (void)unlink(volatile_file);
+ free(buf);
- /* Not-owner (root?) special case.
- * Need to set owner/group of volatile file like original.
- * This will allow to pass related check during layout_swap.
- */
- rc = fstat(fd, &st);
- if (rc != 0) {
+ return bytes_done;
+ }
+
+ static int mirror_extend_file(const char *fname, const char *victim_file,
+ enum mirror_flags mirror_flags)
+ {
+ int fd = -1;
+ int fdv = -1;
+ struct stat stbuf;
+ struct stat stbuf_v;
+ __u64 dv;
+ int rc;
+
+ fd = open(fname, O_RDWR);
+ if (fd < 0) {
+ error_loc = "open source file";
rc = -errno;
- fprintf(stderr, "%s: %s: cannot stat: %s\n", progname, name,
- strerror(errno));
- goto error;
+ goto out;
}
- rc = fstat(fdv, &stv);
- if (rc != 0) {
+
+ fdv = open(victim_file, O_RDWR);
+ if (fdv < 0) {
+ error_loc = "open target file";
rc = -errno;
- fprintf(stderr, "%s: %s: cannot stat: %s\n", progname,
- volatile_file, strerror(errno));
- goto error;
+ goto out;
}
- if (st.st_uid != stv.st_uid || st.st_gid != stv.st_gid) {
- rc = fchown(fdv, st.st_uid, st.st_gid);
- if (rc != 0) {
- rc = -errno;
- fprintf(stderr, "%s: %s: cannot chown: %s\n", progname,
- name, strerror(errno));
- goto error;
- }
+
+ if (fstat(fd, &stbuf) || fstat(fdv, &stbuf_v)) {
+ error_loc = "stat source or target file";
+ rc = -errno;
+ goto out;
+ }
+
+ if (stbuf.st_dev != stbuf_v.st_dev) {
+ error_loc = "stat source and target file";
+ rc = -EXDEV;
+ goto out;
}
- if (migration_flags & MIGRATION_NONBLOCK && file_lease_supported) {
- rc = migrate_nonblock(fd, fdv, &st, buf_size, name);
- if (rc == 0) {
- have_lease_rdlck = false;
- fdv = -1; /* The volatile file is closed as we put the
- * lease in non-blocking mode. */
+ /* mirrors should be of the same size */
+ if (stbuf.st_size != stbuf_v.st_size) {
+ error_loc = "file sizes don't match";
+ rc = -EINVAL;
+ goto out;
+ }
+
+ rc = llapi_lease_get(fd, LL_LEASE_RDLCK);
+ if (rc < 0) {
+ error_loc = "cannot get lease";
+ goto out;
+ }
+
+ if (!(mirror_flags & NO_VERIFY)) {
+ ssize_t ret;
+ /* mirrors should have the same contents */
+ ret = mirror_file_compare(fd, fdv);
+ if (ret != stbuf.st_size) {
+ error_loc = "file busy or contents don't match";
+ rc = ret < 0 ? ret : -EINVAL;
+ goto out;
}
- } else {
- /* Blocking mode (forced if servers do not support file lease).
- * It is also the default mode, since we cannot distinguish
- * between a broken lease and a server that does not support
- * atomic swap/close (LU-6785) */
- rc = migrate_block(fd, fdv, &st, buf_size, name);
}
- error:
- if (have_lease_rdlck)
- llapi_lease_put(fd);
+ /* Get rid of caching pages from clients */
+ rc = llapi_get_data_version(fd, &dv, LL_DV_WR_FLUSH);
+ if (rc < 0) {
+ error_loc = "cannot get data version";
+ return rc;
+ }
+
+ rc = llapi_get_data_version(fdv, &dv, LL_DV_WR_FLUSH);
+ if (rc < 0) {
+ error_loc = "cannot get data version";
+ return rc;
+
+ }
+
+ /* Make sure we keep original atime/mtime values */
+ rc = migrate_copy_timestamps(fd, fdv);
+ /* Atomically put lease, swap layouts and close.
+ * for a migration we need to check data version on file did
+ * not change. */
+ rc = llapi_fswap_layouts(fd, fdv, 0, 0, MERGE_LAYOUTS_CLOSE);
+ if (rc < 0) {
+ error_loc = "cannot swap layout";
+ goto out;
+ }
+
+ out:
if (fd >= 0)
close(fd);
if (fdv >= 0)
close(fdv);
- free:
- if (lum)
- free(lum);
+ if (!rc)
+ (void) unlink(victim_file);
+
+ if (rc < 0)
+ fprintf(stderr, "error: %s: %s: %s: %s\n",
+ progname, fname, error_loc, strerror(-rc));
+ return rc;
+ }
+
+ static int mirror_extend(char *fname, struct mirror_args *mirror_list,
+ enum mirror_flags mirror_flags)
+ {
+ int rc;
+
+ rc = mirror_create_sanity_check(fname, mirror_list);
+ if (rc)
+ return rc;
+
+ while (mirror_list) {
+ if (mirror_list->m_file != NULL) {
+ rc = mirror_extend_file(fname, mirror_list->m_file,
+ mirror_flags);
+ } else {
+ __u32 mirror_count = mirror_list->m_count;
+
+ while (mirror_count > 0) {
+ rc = lfs_migrate(fname,
+ MIGRATION_NONBLOCK | MIGRATION_MIRROR,
+ NULL, mirror_list->m_layout);
+ if (rc)
+ break;
+
+ --mirror_count;
+ }
+ }
+ if (rc)
+ break;
+
+ mirror_list = mirror_list->m_next;
+ }
return rc;
}
struct lfs_setstripe_args {
unsigned long long lsa_comp_end;
unsigned long long lsa_stripe_size;
- int lsa_stripe_count;
- int lsa_stripe_off;
+ long long lsa_stripe_count;
+ long long lsa_stripe_off;
__u32 lsa_comp_flags;
int lsa_nr_osts;
- int lsa_pattern;
+ unsigned long long lsa_pattern;
__u32 *lsa_osts;
char *lsa_pool_name;
};
static inline void setstripe_args_init(struct lfs_setstripe_args *lsa)
{
memset(lsa, 0, sizeof(*lsa));
- lsa->lsa_stripe_off = -1;
+
+ lsa->lsa_stripe_size = LLAPI_LAYOUT_DEFAULT;
+ lsa->lsa_stripe_count = LLAPI_LAYOUT_DEFAULT;
+ lsa->lsa_stripe_off = LLAPI_LAYOUT_DEFAULT;
+ lsa->lsa_pattern = LLAPI_LAYOUT_RAID0;
+ lsa->lsa_pool_name = NULL;
+ }
+
+ /**
+ * setstripe_args_init_inherit() - Initialize and inherit stripe options.
+ * @lsa: Stripe options to be initialized and inherited.
+ *
+ * This function initializes stripe options in @lsa and inherit
+ * stripe_size, stripe_count and OST pool_name options.
+ *
+ * Return: void.
+ */
+ static inline void setstripe_args_init_inherit(struct lfs_setstripe_args *lsa)
+ {
+ unsigned long long stripe_size;
+ long long stripe_count;
+ char *pool_name = NULL;
+
+ stripe_size = lsa->lsa_stripe_size;
+ stripe_count = lsa->lsa_stripe_count;
+ pool_name = lsa->lsa_pool_name;
+
+ setstripe_args_init(lsa);
+
+ lsa->lsa_stripe_size = stripe_size;
+ lsa->lsa_stripe_count = stripe_count;
+ lsa->lsa_pool_name = pool_name;
}
static inline bool setstripe_args_specified(struct lfs_setstripe_args *lsa)
{
- return (lsa->lsa_stripe_size != 0 || lsa->lsa_stripe_count != 0 ||
- lsa->lsa_stripe_off != -1 || lsa->lsa_pool_name != NULL ||
- lsa->lsa_comp_end != 0 || lsa->lsa_pattern != 0);
+ return (lsa->lsa_stripe_size != LLAPI_LAYOUT_DEFAULT ||
+ lsa->lsa_stripe_count != LLAPI_LAYOUT_DEFAULT ||
+ lsa->lsa_stripe_off != LLAPI_LAYOUT_DEFAULT ||
+ lsa->lsa_pattern != LLAPI_LAYOUT_RAID0 ||
+ lsa->lsa_pool_name != NULL ||
+ lsa->lsa_comp_end != 0);
}
+ /**
+ * comp_args_to_layout() - Create or extend a composite layout.
+ * @composite: Pointer to the composite layout.
+ * @lsa: Stripe options for the new component.
+ *
+ * This function creates or extends a composite layout by adding a new
+ * component with stripe options from @lsa.
+ *
+ * Return: 0 on success or an error code on failure.
+ */
static int comp_args_to_layout(struct llapi_layout **composite,
struct lfs_setstripe_args *lsa)
{
if (lsa->lsa_pattern == LLAPI_LAYOUT_MDT) {
/* In case of Data-on-MDT patterns the only extra option
* applicable is stripe size option. */
- if (lsa->lsa_stripe_count) {
+ if (lsa->lsa_stripe_count != LLAPI_LAYOUT_DEFAULT) {
fprintf(stderr, "Option 'stripe-count' can't be "
- "specified with Data-on-MDT component: %i\n",
+ "specified with Data-on-MDT component: %lld\n",
lsa->lsa_stripe_count);
return -EINVAL;
}
- if (lsa->lsa_stripe_size) {
+ if (lsa->lsa_stripe_size != LLAPI_LAYOUT_DEFAULT) {
fprintf(stderr, "Option 'stripe-size' can't be "
"specified with Data-on-MDT component: %llu\n",
lsa->lsa_stripe_size);
lsa->lsa_nr_osts);
return -EINVAL;
}
- if (lsa->lsa_stripe_off != -1) {
+ if (lsa->lsa_stripe_off != LLAPI_LAYOUT_DEFAULT) {
fprintf(stderr, "Option 'stripe-offset' can't be "
- "specified with Data-on-MDT component: %i\n",
+ "specified with Data-on-MDT component: %lld\n",
lsa->lsa_stripe_off);
return -EINVAL;
}
rc = llapi_layout_pattern_set(layout, lsa->lsa_pattern);
if (rc) {
- fprintf(stderr, "Set stripe pattern %#x failed. %s\n",
+ fprintf(stderr, "Set stripe pattern %#llx failed. %s\n",
lsa->lsa_pattern, strerror(errno));
return rc;
}
lsa->lsa_stripe_size = lsa->lsa_comp_end;
}
- if (lsa->lsa_stripe_size != 0) {
- rc = llapi_layout_stripe_size_set(layout,
- lsa->lsa_stripe_size);
- if (rc) {
- fprintf(stderr, "Set stripe size %llu failed. %s\n",
- lsa->lsa_stripe_size, strerror(errno));
- return rc;
- }
+ rc = llapi_layout_stripe_size_set(layout, lsa->lsa_stripe_size);
+ if (rc) {
+ fprintf(stderr, "Set stripe size %llu failed: %s\n",
+ lsa->lsa_stripe_size, strerror(errno));
+ return rc;
}
- if (lsa->lsa_stripe_count != 0) {
- rc = llapi_layout_stripe_count_set(layout,
- lsa->lsa_stripe_count == -1 ?
- LLAPI_LAYOUT_WIDE :
- lsa->lsa_stripe_count);
- if (rc) {
- fprintf(stderr, "Set stripe count %d failed. %s\n",
- lsa->lsa_stripe_count, strerror(errno));
- return rc;
- }
+ rc = llapi_layout_stripe_count_set(layout, lsa->lsa_stripe_count);
+ if (rc) {
+ fprintf(stderr, "Set stripe count %lld failed: %s\n",
+ lsa->lsa_stripe_count, strerror(errno));
+ return rc;
}
if (lsa->lsa_pool_name != NULL) {
lsa->lsa_pool_name, strerror(errno));
return rc;
}
+ } else {
+ rc = llapi_layout_pool_name_set(layout, "");
+ if (rc) {
+ fprintf(stderr, "Clear pool name failed: %s\n",
+ strerror(errno));
+ return rc;
+ }
}
if (lsa->lsa_nr_osts > 0) {
if (lsa->lsa_stripe_count > 0 &&
+ lsa->lsa_stripe_count != LLAPI_LAYOUT_DEFAULT &&
+ lsa->lsa_stripe_count != LLAPI_LAYOUT_WIDE &&
lsa->lsa_nr_osts != lsa->lsa_stripe_count) {
- fprintf(stderr, "stripe_count(%d) != nr_osts(%d)\n",
+ fprintf(stderr, "stripe_count(%lld) != nr_osts(%d)\n",
lsa->lsa_stripe_count, lsa->lsa_nr_osts);
return -EINVAL;
}
if (rc)
break;
}
- } else if (lsa->lsa_stripe_off != -1) {
+ } else if (lsa->lsa_stripe_off != LLAPI_LAYOUT_DEFAULT) {
rc = llapi_layout_ost_index_set(layout, 0, lsa->lsa_stripe_off);
}
if (rc) {
uint64_t start, end, stripe_size, prev_end = 0;
int rc;
- if (layout == NULL)
+ if (layout == NULL) {
+ fprintf(stderr,
+ "%s setstripe: layout must be specified\n",
+ progname);
return -EINVAL;
+ }
errno = 0;
head = llapi_layout_get_by_path(fname, 0);
if (head == NULL) {
- fprintf(stderr, "Read layout from %s failed. %s\n",
- fname, strerror(errno));
+ fprintf(stderr,
+ "%s setstripe: cannot read layout from '%s': %s\n",
+ progname, fname, strerror(errno));
return -EINVAL;
} else if (errno == ENODATA) {
/* file without LOVEA, this component-add will be turned
llapi_layout_free(head);
return -ENODATA;
} else if (!llapi_layout_is_composite(head)) {
- fprintf(stderr, "'%s' isn't a composite file.\n",
- fname);
+ fprintf(stderr, "%s setstripe: '%s' not a composite file\n",
+ progname, fname);
llapi_layout_free(head);
return -EINVAL;
}
rc = llapi_layout_comp_extent_get(head, &start, &prev_end);
if (rc) {
- fprintf(stderr, "Get prev extent failed. %s\n",
- strerror(errno));
+ fprintf(stderr, "%s setstripe: cannot get prev extent: %s\n",
+ progname, strerror(errno));
llapi_layout_free(head);
return rc;
}
/* Make sure we use the first component of the layout to be added. */
rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
if (rc < 0) {
- fprintf(stderr, "Move component cursor failed. %s\n",
- strerror(errno));
+ fprintf(stderr,
+ "%s setstripe: cannot move component cursor: %s\n",
+ progname, strerror(errno));
return rc;
}
rc = llapi_layout_comp_extent_get(layout, &start, &end);
if (rc) {
- fprintf(stderr, "Get extent failed. %s\n", strerror(errno));
+ fprintf(stderr, "%s setstripe: cannot get extent: %s\n",
+ progname, strerror(errno));
return rc;
}
if (start > prev_end || end <= prev_end) {
- fprintf(stderr, "First extent to be set [%lu, %lu) isn't "
- "adjacent with the existing file extent end: %lu\n",
- start, end, prev_end);
+ fprintf(stderr,
+ "%s setstripe: first extent [%lu, %lu) not adjacent with extent end %lu\n",
+ progname, start, end, prev_end);
return -EINVAL;
}
rc = llapi_layout_stripe_size_get(layout, &stripe_size);
if (rc) {
- fprintf(stderr, "Get stripe size failed. %s\n",
- strerror(errno));
+ fprintf(stderr, "%s setstripe: cannot get stripe size: %s\n",
+ progname, strerror(errno));
return rc;
}
if (stripe_size != LLAPI_LAYOUT_DEFAULT &&
(prev_end & (stripe_size - 1))) {
- fprintf(stderr, "Stripe size %lu not aligned with %lu\n",
- stripe_size, prev_end);
+ fprintf(stderr,
+ "%s setstripe: stripe size %lu not aligned with %lu\n",
+ progname, stripe_size, prev_end);
return -EINVAL;
}
rc = llapi_layout_comp_extent_set(layout, prev_end, end);
if (rc) {
- fprintf(stderr, "Set component extent [%lu, %lu) failed. %s\n",
- prev_end, end, strerror(errno));
+ fprintf(stderr,
+ "%s setstripe: cannot set component extent [%lu, %lu): %s\n",
+ progname, prev_end, end, strerror(errno));
return rc;
}
!strncmp(arg, "eof", strlen("eof"));
}
+ /**
+ * lfs_mirror_alloc() - Allocate a mirror argument structure.
+ *
+ * Return: Valid mirror_args pointer on success and
+ * NULL if memory allocation fails.
+ */
+ static struct mirror_args *lfs_mirror_alloc(void)
+ {
+ struct mirror_args *mirror = NULL;
+
+ while (1) {
+ mirror = calloc(1, sizeof(*mirror));
+ if (mirror != NULL)
+ break;
+
+ sleep(1);
+ }
+
+ return mirror;
+ }
+
+ /**
+ * lfs_mirror_free() - Free memory allocated for a mirror argument
+ * structure.
+ * @mirror: Previously allocated mirror argument structure by
+ * lfs_mirror_alloc().
+ *
+ * Free memory allocated for @mirror.
+ *
+ * Return: void.
+ */
+ static void lfs_mirror_free(struct mirror_args *mirror)
+ {
+ if (mirror->m_layout != NULL)
+ llapi_layout_free(mirror->m_layout);
+ free(mirror);
+ }
+
+ /**
+ * lfs_mirror_list_free() - Free memory allocated for a mirror list.
+ * @mirror_list: Previously allocated mirror list.
+ *
+ * Free memory allocated for @mirror_list.
+ *
+ * Return: void.
+ */
+ static void lfs_mirror_list_free(struct mirror_args *mirror_list)
+ {
+ struct mirror_args *next_mirror = NULL;
+
+ while (mirror_list != NULL) {
+ next_mirror = mirror_list->m_next;
+ lfs_mirror_free(mirror_list);
+ mirror_list = next_mirror;
+ }
+ }
+
enum {
LFS_POOL_OPT = 3,
LFS_COMP_COUNT_OPT,
LFS_COMP_DEL_OPT,
LFS_COMP_SET_OPT,
LFS_COMP_ADD_OPT,
+ LFS_COMP_USE_PARENT_OPT,
+ LFS_COMP_NO_VERIFY_OPT,
LFS_PROJID_OPT,
};
/* functions */
- static int lfs_setstripe(int argc, char **argv)
+ static int lfs_setstripe0(int argc, char **argv, enum setstripe_origin opc)
{
struct lfs_setstripe_args lsa;
struct llapi_stripe_param *param = NULL;
int comp_add = 0;
__u32 comp_id = 0;
struct llapi_layout *layout = NULL;
+ struct llapi_layout **lpp = &layout;
+ bool mirror_mode = false;
+ bool has_m_file = false;
+ __u32 mirror_count = 0;
+ enum mirror_flags mirror_flags = 0;
+ struct mirror_args *mirror_list = NULL;
+ struct mirror_args *new_mirror = NULL;
+ struct mirror_args *last_mirror = NULL;
+ char cmd[PATH_MAX];
struct option long_opts[] = {
/* --block is only valid in migrate mode */
{ .val = LFS_COMP_SET_OPT,
.name = "component-set",
.has_arg = no_argument},
+ { .val = LFS_COMP_USE_PARENT_OPT,
+ .name = "parent", .has_arg = no_argument},
+ { .val = LFS_COMP_NO_VERIFY_OPT,
+ .name = "no-verify", .has_arg = no_argument},
{ .val = 'c', .name = "stripe-count", .has_arg = required_argument},
{ .val = 'c', .name = "stripe_count", .has_arg = required_argument},
{ .val = 'd', .name = "delete", .has_arg = no_argument},
{ .val = 'E', .name = "comp-end", .has_arg = required_argument},
{ .val = 'E', .name = "component-end",
.has_arg = required_argument},
+ { .val = 'f', .name = "file", .has_arg = required_argument },
/* dirstripe {"mdt-hash", required_argument, 0, 'H'}, */
{ .val = 'i', .name = "stripe-index", .has_arg = required_argument},
{ .val = 'i', .name = "stripe_index", .has_arg = required_argument},
{ .val = 'm', .name = "mdt", .has_arg = required_argument},
{ .val = 'm', .name = "mdt-index", .has_arg = required_argument},
{ .val = 'm', .name = "mdt_index", .has_arg = required_argument},
+ { .val = 'N', .name = "mirror-count", .has_arg = optional_argument},
/* --non-block is only valid in migrate mode */
{ .val = 'n', .name = "non-block", .has_arg = no_argument},
{ .val = 'o', .name = "ost", .has_arg = required_argument},
/* dirstripe {"mdt-count", required_argument, 0, 'T'}, */
/* --verbose is only valid in migrate mode */
{ .val = 'v', .name = "verbose", .has_arg = no_argument },
- { .val = LFS_COMP_ADD_OPT,
- .name = "component-add",
- .has_arg = no_argument },
- { .val = LFS_COMP_DEL_OPT,
- .name = "component-del",
- .has_arg = no_argument },
- { .val = LFS_COMP_FLAGS_OPT,
- .name = "component-flags",
- .has_arg = required_argument },
- { .val = LFS_COMP_SET_OPT,
- .name = "component-set",
- .has_arg = no_argument },
{ .name = NULL } };
setstripe_args_init(&lsa);
- if (strcmp(argv[0], "migrate") == 0)
- migrate_mode = true;
+ migrate_mode = (opc == SO_MIGRATE);
+ mirror_mode = (opc == SO_MIRROR_CREATE || opc == SO_MIRROR_EXTEND);
- while ((c = getopt_long(argc, argv, "bc:dE:i:I:m:no:p:L:s:S:v",
+ snprintf(cmd, sizeof(cmd), "%s %s", progname, argv[0]);
+ progname = cmd;
+ while ((c = getopt_long(argc, argv, "bc:dE:f:i:I:m:N::no:p:L:s:S:v",
long_opts, NULL)) >= 0) {
switch (c) {
case 0:
case LFS_COMP_SET_OPT:
comp_set = 1;
break;
+ case LFS_COMP_USE_PARENT_OPT:
+ if (!mirror_mode) {
+ fprintf(stderr, "error: %s: --parent must be "
+ "specified with --mirror-count|-N "
+ "option\n", progname);
+ goto usage_error;
+ }
+ setstripe_args_init(&lsa);
+ break;
+ case LFS_COMP_NO_VERIFY_OPT:
+ mirror_flags |= NO_VERIFY;
+ break;
case 'b':
if (!migrate_mode) {
fprintf(stderr,
progname, argv[0], optarg);
goto usage_error;
}
+
+ if (lsa.lsa_stripe_count == -1)
+ lsa.lsa_stripe_count = LLAPI_LAYOUT_WIDE;
break;
case 'd':
/* delete the default striping pattern */
break;
case 'E':
if (lsa.lsa_comp_end != 0) {
- result = comp_args_to_layout(&layout, &lsa);
+ result = comp_args_to_layout(lpp, &lsa);
if (result) {
fprintf(stderr,
"%s %s: invalid layout\n",
goto usage_error;
}
- setstripe_args_init(&lsa);
+ setstripe_args_init_inherit(&lsa);
}
if (arg_is_eof(optarg)) {
progname, argv[0], optarg);
goto usage_error;
}
+ if (lsa.lsa_stripe_off == -1)
+ lsa.lsa_stripe_off = LLAPI_LAYOUT_DEFAULT;
break;
case 'I':
comp_id = strtoul(optarg, &end, 0);
goto usage_error;
}
break;
+ case 'f':
+ if (opc != SO_MIRROR_EXTEND) {
+ fprintf(stderr,
+ "error: %s: invalid option: %s\n",
+ progname, argv[optopt + 1]);
+ goto usage_error;
+ }
+ if (last_mirror == NULL) {
+ fprintf(stderr, "error: %s: '-N' must exist "
+ "in front of '%s'\n",
+ progname, argv[optopt + 1]);
+ goto usage_error;
+ }
+
+ last_mirror->m_file = optarg;
+ last_mirror->m_count = 1;
+ has_m_file = true;
+ break;
case 'L':
if (strcmp(argv[optind - 1], "mdt") == 0) {
/* Can be only the first component */
}
migration_flags |= MIGRATION_NONBLOCK;
break;
+ case 'N':
+ if (opc == SO_SETSTRIPE) {
+ opc = SO_MIRROR_CREATE;
+ mirror_mode = true;
+ }
+ mirror_count = 1;
+ if (optarg != NULL) {
+ mirror_count = strtoul(optarg, &end, 0);
+ if (*end != '\0' || mirror_count == 0) {
+ fprintf(stderr,
+ "error: %s: bad mirror count: %s\n",
+ progname, optarg);
+ result = -EINVAL;
+ goto error;
+ }
+ }
+
+ new_mirror = lfs_mirror_alloc();
+ new_mirror->m_count = mirror_count;
+
+ if (mirror_list == NULL)
+ mirror_list = new_mirror;
+
+ if (last_mirror != NULL) {
+ /* wrap up last mirror */
+ if (lsa.lsa_comp_end == 0)
+ lsa.lsa_comp_end = LUSTRE_EOF;
+
+ result = comp_args_to_layout(lpp, &lsa);
+ if (result) {
+ lfs_mirror_free(new_mirror);
+ goto error;
+ }
+
+ setstripe_args_init_inherit(&lsa);
+
+ last_mirror->m_next = new_mirror;
+ }
+
+ last_mirror = new_mirror;
+ lpp = &last_mirror->m_layout;
+ break;
case 'o':
lsa.lsa_nr_osts = parse_targets(osts,
sizeof(osts) / sizeof(__u32),
}
lsa.lsa_osts = osts;
- if (lsa.lsa_stripe_off == -1)
+ if (lsa.lsa_stripe_off == LLAPI_LAYOUT_DEFAULT)
lsa.lsa_stripe_off = osts[0];
break;
case 'p':
fname = argv[optind];
- if (lsa.lsa_comp_end != 0) {
- result = comp_args_to_layout(&layout, &lsa);
- if (result) {
- fprintf(stderr, "%s %s: invalid component layout\n",
- progname, argv[0]);
- goto usage_error;
- }
- }
-
if (optind == argc) {
fprintf(stderr, "%s %s: FILE must be specified\n",
progname, argv[0]);
goto usage_error;
}
+ if (mirror_mode && mirror_count == 0) {
+ fprintf(stderr,
+ "error: %s: --mirror-count|-N option is required\n",
+ progname);
+ result = -EINVAL;
+ goto error;
+ }
+
+ if (mirror_mode) {
+ if (lsa.lsa_comp_end == 0)
+ lsa.lsa_comp_end = LUSTRE_EOF;
+ }
+
+ if (lsa.lsa_comp_end != 0) {
+ result = comp_args_to_layout(lpp, &lsa);
+ if (result)
+ goto error;
+ }
+
+ if (mirror_flags & NO_VERIFY) {
+ if (opc != SO_MIRROR_EXTEND) {
+ fprintf(stderr,
+ "error: %s: --no-verify is valid only for lfs mirror extend command\n",
+ progname);
+ result = -EINVAL;
+ goto error;
+ } else if (!has_m_file) {
+ fprintf(stderr,
+ "error: %s: --no-verify must be specified with -f <victim_file> option\n",
+ progname);
+ result = -EINVAL;
+ goto error;
+ }
+ }
+
/* Only LCME_FL_INIT flags is used in PFL, and it shouldn't be
* altered by user space tool, so we don't need to support the
* --component-set for this moment. */
progname);
goto usage_error;
}
+
+ if (mirror_mode) {
+ fprintf(stderr, "error: %s: can't use --component-add "
+ "or --component-del for mirror operation\n",
+ progname);
+ goto usage_error;
+ }
}
if (comp_add) {
progname, argv[0]);
goto usage_error;
}
+
result = adjust_first_extent(fname, layout);
if (result == -ENODATA)
comp_add = 0;
goto error;
}
- param->lsp_stripe_size = lsa.lsa_stripe_size;
- param->lsp_stripe_offset = lsa.lsa_stripe_off;
- param->lsp_stripe_count = lsa.lsa_stripe_count;
+ if (lsa.lsa_stripe_size != LLAPI_LAYOUT_DEFAULT)
+ param->lsp_stripe_size = lsa.lsa_stripe_size;
+ if (lsa.lsa_stripe_count != LLAPI_LAYOUT_DEFAULT) {
+ if (lsa.lsa_stripe_count == LLAPI_LAYOUT_WIDE)
+ param->lsp_stripe_count = -1;
+ else
+ param->lsp_stripe_count = lsa.lsa_stripe_count;
+ }
+ if (lsa.lsa_stripe_off == LLAPI_LAYOUT_DEFAULT)
+ param->lsp_stripe_offset = -1;
+ else
+ param->lsp_stripe_offset = lsa.lsa_stripe_off;
param->lsp_pool = lsa.lsa_pool_name;
param->lsp_is_specific = false;
if (lsa.lsa_nr_osts > 0) {
if (lsa.lsa_stripe_count > 0 &&
+ lsa.lsa_stripe_count != LLAPI_LAYOUT_DEFAULT &&
+ lsa.lsa_stripe_count != LLAPI_LAYOUT_WIDE &&
lsa.lsa_nr_osts != lsa.lsa_stripe_count) {
- fprintf(stderr,
- "%s %s: stripe count '%d' does not match number of OSTs: %d\n",
- progname, argv[0], lsa.lsa_stripe_count,
+ fprintf(stderr, "error: %s: stripe count %lld "
+ "doesn't match the number of OSTs: %d\n"
+ , argv[0], lsa.lsa_stripe_count,
lsa.lsa_nr_osts);
free(param);
goto usage_error;
lsa.lsa_comp_flags);
} else if (comp_add != 0) {
result = lfs_component_add(fname, layout);
+ } else if (opc == SO_MIRROR_CREATE) {
+ result = mirror_create(fname, mirror_list);
+ } else if (opc == SO_MIRROR_EXTEND) {
+ result = mirror_extend(fname, mirror_list,
+ mirror_flags);
} else if (layout != NULL) {
result = lfs_component_create(fname, O_CREAT | O_WRONLY,
0644, layout);
free(param);
llapi_layout_free(layout);
+ lfs_mirror_list_free(mirror_list);
return result2;
usage_error:
result = CMD_HELP;
error:
llapi_layout_free(layout);
+ lfs_mirror_list_free(mirror_list);
return result;
}
return rc ? : rc1;
}
+static int get_print_quota(char *mnt, char *name, struct if_quotactl *qctl,
+ int verbose, int quiet, bool human_readable)
+{
+ int rc1 = 0, rc2 = 0, rc3 = 0;
+ char *obd_type = (char *)qctl->obd_type;
+ char *obd_uuid = (char *)qctl->obd_uuid.uuid;
+ __u64 total_ialloc = 0, total_balloc = 0;
+ int inacc;
+
+ rc1 = llapi_quotactl(mnt, qctl);
+ if (rc1 < 0) {
+ switch (rc1) {
+ case -ESRCH:
+ fprintf(stderr, "%s quotas are not enabled.\n",
+ qtype_name(qctl->qc_type));
+ goto out;
+ case -EPERM:
+ fprintf(stderr, "Permission denied.\n");
+ case -ENODEV:
+ case -ENOENT:
+ /* We already got error message. */
+ goto out;
+ default:
+ fprintf(stderr, "Unexpected quotactl error: %s\n",
+ strerror(-rc1));
+ }
+ }
+
+ if (qctl->qc_cmd == LUSTRE_Q_GETQUOTA && !quiet)
+ print_quota_title(name, qctl, human_readable);
+
+ if (rc1 && *obd_type)
+ fprintf(stderr, "%s %s ", obd_type, obd_uuid);
+
+ if (qctl->qc_valid != QC_GENERAL)
+ mnt = "";
+
+ inacc = (qctl->qc_cmd == LUSTRE_Q_GETQUOTA) &&
+ ((qctl->qc_dqblk.dqb_valid & (QIF_LIMITS|QIF_USAGE)) !=
+ (QIF_LIMITS|QIF_USAGE));
+
+ print_quota(mnt, qctl, QC_GENERAL, rc1, human_readable);
+
+ if (qctl->qc_valid == QC_GENERAL && qctl->qc_cmd != LUSTRE_Q_GETINFO &&
+ verbose) {
+ char strbuf[STRBUF_LEN];
+
+ rc2 = print_obd_quota(mnt, qctl, 1, human_readable,
+ &total_ialloc);
+ rc3 = print_obd_quota(mnt, qctl, 0, human_readable,
+ &total_balloc);
+ kbytes2str(total_balloc, strbuf, sizeof(strbuf),
+ human_readable);
+ printf("Total allocated inode limit: %ju, total "
+ "allocated block limit: %s\n", (uintmax_t)total_ialloc,
+ strbuf);
+ }
+
+ if (rc1 || rc2 || rc3 || inacc)
+ printf("Some errors happened when getting quota info. "
+ "Some devices may be not working or deactivated. "
+ "The data in \"[]\" is inaccurate.\n");
+out:
+ return rc1;
+
+}
+
static int lfs_quota(int argc, char **argv)
{
int c;
char *mnt, *name = NULL;
struct if_quotactl qctl = { .qc_cmd = LUSTRE_Q_GETQUOTA,
.qc_type = ALLQUOTA };
- char *obd_type = (char *)qctl.obd_type;
char *obd_uuid = (char *)qctl.obd_uuid.uuid;
- int rc = 0, rc1 = 0, rc2 = 0, rc3 = 0,
- verbose = 0, pass = 0, quiet = 0, inacc;
+ int rc = 0, rc1 = 0, verbose = 0, quiet = 0;
char *endptr;
__u32 valid = QC_GENERAL, idx = 0;
- __u64 total_ialloc = 0, total_balloc = 0;
bool human_readable = false;
int qtype;
/* current uid/gid info for "lfs quota /path/to/lustre/mount" */
if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA && qctl.qc_type == ALLQUOTA &&
optind == argc - 1) {
-all_output:
- memset(&qctl, 0, sizeof(qctl)); /* spoiled by print_*_quota */
+
qctl.qc_cmd = LUSTRE_Q_GETQUOTA;
qctl.qc_valid = valid;
qctl.qc_idx = idx;
- qctl.qc_type = pass;
- switch (qctl.qc_type) {
- case USRQUOTA:
- qctl.qc_id = geteuid();
- rc = uid2name(&name, qctl.qc_id);
- break;
- case GRPQUOTA:
- qctl.qc_id = getegid();
- rc = gid2name(&name, qctl.qc_id);
- break;
- default:
- rc = -ENOTSUP;
- pass++;
- goto out;
+
+ for (qtype = USRQUOTA; qtype <= GRPQUOTA; qtype++) {
+ qctl.qc_type = qtype;
+ if (qtype == USRQUOTA) {
+ qctl.qc_id = geteuid();
+ rc = uid2name(&name, qctl.qc_id);
+ } else {
+ qctl.qc_id = getegid();
+ rc = gid2name(&name, qctl.qc_id);
+ }
+ if (rc)
+ name = "<unknown>";
+ mnt = argv[optind];
+ rc1 = get_print_quota(mnt, name, &qctl, verbose, quiet,
+ human_readable);
+ if (rc1 && !rc)
+ rc = rc1;
}
- if (rc)
- name = "<unknown>";
- pass++;
/* lfs quota -u username /path/to/lustre/mount */
} else if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA) {
/* options should be followed by u/g-name and mntpoint */
return CMD_HELP;
}
}
+ mnt = argv[optind];
+ rc = get_print_quota(mnt, name, &qctl, verbose, quiet,
+ human_readable);
} else if (optind + 1 != argc || qctl.qc_type == ALLQUOTA) {
fprintf(stderr, "error: missing quota info argument(s)\n");
return CMD_HELP;
}
- mnt = argv[optind];
- rc1 = llapi_quotactl(mnt, &qctl);
- if (rc1 < 0) {
- switch (rc1) {
- case -ESRCH:
- fprintf(stderr, "%s quotas are not enabled.\n",
- qtype_name(qctl.qc_type));
- goto out;
- case -EPERM:
- fprintf(stderr, "Permission denied.\n");
- case -ENODEV:
- case -ENOENT:
- /* We already got error message. */
- goto out;
- default:
- fprintf(stderr, "Unexpected quotactl error: %s\n",
- strerror(-rc1));
- }
- }
-
- if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA && !quiet)
- print_quota_title(name, &qctl, human_readable);
-
- if (rc1 && *obd_type)
- fprintf(stderr, "%s %s ", obd_type, obd_uuid);
-
- if (qctl.qc_valid != QC_GENERAL)
- mnt = "";
-
- inacc = (qctl.qc_cmd == LUSTRE_Q_GETQUOTA) &&
- ((qctl.qc_dqblk.dqb_valid & (QIF_LIMITS|QIF_USAGE)) !=
- (QIF_LIMITS|QIF_USAGE));
-
- print_quota(mnt, &qctl, QC_GENERAL, rc1, human_readable);
-
- if (qctl.qc_valid == QC_GENERAL && qctl.qc_cmd != LUSTRE_Q_GETINFO &&
- verbose) {
- char strbuf[STRBUF_LEN];
-
- rc2 = print_obd_quota(mnt, &qctl, 1, human_readable,
- &total_ialloc);
- rc3 = print_obd_quota(mnt, &qctl, 0, human_readable,
- &total_balloc);
- kbytes2str(total_balloc, strbuf, sizeof(strbuf),
- human_readable);
- printf("Total allocated inode limit: %ju, total "
- "allocated block limit: %s\n", (uintmax_t)total_ialloc,
- strbuf);
- }
-
- if (rc1 || rc2 || rc3 || inacc)
- printf("Some errors happened when getting quota info. "
- "Some devices may be not working or deactivated. "
- "The data in \"[]\" is inaccurate.\n");
-
-out:
- if (pass > 0 && pass < LL_MAXQUOTAS)
- goto all_output;
-
- return rc1;
+ return rc;
}
#endif /* HAVE_SYS_QUOTA_H! */
return rc;
}
+ /** The input string contains a comma delimited list of component ids and
+ * ranges, for example "1,2-4,7".
+ */
+ static int parse_mirror_ids(__u16 *ids, int size, char *arg)
+ {
+ bool end_of_loop = false;
+ char *ptr = NULL;
+ int nr = 0;
+ int rc;
+
+ if (arg == NULL)
+ return -EINVAL;
+
+ while (!end_of_loop) {
+ int start_index;
+ int end_index;
+ int i;
+ char *endptr = NULL;
+
+ rc = -EINVAL;
+ ptr = strchrnul(arg, ',');
+ end_of_loop = *ptr == '\0';
+ *ptr = '\0';
+
+ start_index = strtol(arg, &endptr, 0);
+ if (endptr == arg) /* no data at all */
+ break;
+ if (*endptr != '-' && *endptr != '\0') /* has invalid data */
+ break;
+ if (start_index < 0)
+ break;
+
+ end_index = start_index;
+ if (*endptr == '-') {
+ end_index = strtol(endptr + 1, &endptr, 0);
+ if (*endptr != '\0')
+ break;
+ if (end_index < start_index)
+ break;
+ }
+
+ for (i = start_index; i <= end_index && size > 0; i++) {
+ int j;
+
+ /* remove duplicate */
+ for (j = 0; j < nr; j++) {
+ if (ids[j] == i)
+ break;
+ }
+ if (j == nr) { /* no duplicate */
+ ids[nr++] = i;
+ --size;
+ }
+ }
+
+ if (size == 0 && i < end_index)
+ break;
+
+ *ptr = ',';
+ arg = ++ptr;
+ rc = 0;
+ }
+ if (!end_of_loop && ptr != NULL)
+ *ptr = ',';
+
+ return rc < 0 ? rc : nr;
+ }
+
+ static inline
+ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
+ __u16 *mirror_ids, int ids_nr)
+ {
+ const char *progname = "lfs mirror resync";
+ struct llapi_resync_comp comp_array[1024] = { { 0 } };
+ struct llapi_layout *layout;
+ struct stat stbuf;
+ uint32_t flr_state;
+ int comp_size = 0;
+ int idx;
+ int fd;
+ int rc;
+
+ if (stat(fname, &stbuf) < 0) {
+ fprintf(stderr, "%s: cannot stat file '%s': %s.\n",
+ progname, fname, strerror(errno));
+ rc = -errno;
+ goto error;
+ }
+ if (!S_ISREG(stbuf.st_mode)) {
+ fprintf(stderr, "%s: '%s' is not a regular file.\n",
+ progname, fname);
+ rc = -EINVAL;
+ goto error;
+ }
+
+ fd = open(fname, O_DIRECT | O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "%s: cannot open '%s': %s.\n",
+ progname, fname, strerror(errno));
+ rc = -errno;
+ goto error;
+ }
+
+ ioc->lil_mode = LL_LEASE_WRLCK;
+ ioc->lil_flags = LL_LEASE_RESYNC;
+ rc = llapi_lease_get_ext(fd, ioc);
+ if (rc < 0) {
+ fprintf(stderr, "%s: '%s' llapi_lease_get_ext resync failed: "
+ "%s.\n", progname, fname, strerror(errno));
+ goto close_fd;
+ }
+
+ layout = llapi_layout_get_by_fd(fd, 0);
+ if (layout == NULL) {
+ fprintf(stderr, "%s: '%s' llapi_layout_get_by_fd failed: %s.\n",
+ progname, fname, strerror(errno));
+ rc = -errno;
+ goto close_fd;
+ }
+
+ rc = llapi_layout_flags_get(layout, &flr_state);
+ if (rc) {
+ fprintf(stderr, "%s: '%s' llapi_layout_flags_get failed: %s.\n",
+ progname, fname, strerror(errno));
+ rc = -errno;
+ goto close_fd;
+ }
+
+ flr_state &= LCM_FL_FLR_MASK;
+ switch (flr_state) {
+ case LCM_FL_NOT_FLR:
+ rc = -EINVAL;
+ case LCM_FL_RDONLY:
+ fprintf(stderr, "%s: '%s' file state error: %s.\n",
+ progname, fname, lcm_flags_string(flr_state));
+ goto close_fd;
+ default:
+ break;
+ }
+
+ /* get stale component info */
+ comp_size = llapi_mirror_find_stale(layout, comp_array,
+ ARRAY_SIZE(comp_array),
+ mirror_ids, ids_nr);
+ if (comp_size < 0) {
+ rc = comp_size;
+ goto close_fd;
+ }
+
+ idx = 0;
+ while (idx < comp_size) {
+ ssize_t result;
+ uint64_t end;
+ __u16 mirror_id;
+ int i;
+
+ rc = llapi_lease_check(fd);
+ if (rc != LL_LEASE_WRLCK) {
+ fprintf(stderr, "%s: '%s' lost lease lock.\n",
+ progname, fname);
+ goto close_fd;
+ }
+
+ mirror_id = comp_array[idx].lrc_mirror_id;
+ end = comp_array[idx].lrc_end;
+
+ /* try to combine adjacent component */
+ for (i = idx + 1; i < comp_size; i++) {
+ if (mirror_id != comp_array[i].lrc_mirror_id ||
+ end != comp_array[i].lrc_start)
+ break;
+ end = comp_array[i].lrc_end;
+ }
+
+ result = llapi_mirror_resync_one(fd, layout, mirror_id,
+ comp_array[idx].lrc_start,
+ end);
+ if (result < 0) {
+ fprintf(stderr, "%s: '%s' llapi_mirror_resync_one: "
+ "%ld.\n", progname, fname, result);
+ rc = result;
+ goto close_fd;
+ } else if (result > 0) {
+ int j;
+
+ /* mark synced components */
+ for (j = idx; j < i; j++)
+ comp_array[j].lrc_synced = true;
+ }
+
+ idx = i;
+ }
+
+ /* prepare ioc for lease put */
+ ioc->lil_mode = LL_LEASE_UNLCK;
+ ioc->lil_flags = LL_LEASE_RESYNC_DONE;
+ ioc->lil_count = 0;
+ for (idx = 0; idx < comp_size; idx++) {
+ if (comp_array[idx].lrc_synced) {
+ ioc->lil_ids[ioc->lil_count] = comp_array[idx].lrc_id;
+ ioc->lil_count++;
+ }
+ }
+
+ llapi_layout_free(layout);
+
+ rc = llapi_lease_get_ext(fd, ioc);
+ if (rc <= 0) {
+ if (rc == 0) /* lost lease lock */
+ rc = -EBUSY;
+ fprintf(stderr, "%s: resync file '%s' failed: %s.\n",
+ progname, fname, strerror(errno));
+ goto close_fd;
+ }
+ /**
+ * llapi_lease_get_ext returns lease mode when it request to unlock
+ * the lease lock
+ */
+ rc = 0;
+
+ close_fd:
+ close(fd);
+ error:
+ return rc;
+ }
+
+ static inline int lfs_mirror_resync(int argc, char **argv)
+ {
+ struct ll_ioc_lease *ioc = NULL;
+ __u16 mirror_ids[128] = { 0 };
+ int ids_nr = 0;
+ int c;
+ int rc = 0;
+
+ struct option long_opts[] = {
+ { .val = 'o', .name = "only", .has_arg = required_argument },
+ { .name = NULL } };
+
+ while ((c = getopt_long(argc, argv, "o:", long_opts, NULL)) >= 0) {
+ switch (c) {
+ case 'o':
+ rc = parse_mirror_ids(mirror_ids,
+ sizeof(mirror_ids) / sizeof(__u16),
+ optarg);
+ if (rc < 0) {
+ fprintf(stderr,
+ "%s: bad mirror ids '%s'.\n",
+ argv[0], optarg);
+ goto error;
+ }
+ ids_nr = rc;
+ break;
+ default:
+ fprintf(stderr, "%s: options '%s' unrecognized.\n",
+ argv[0], argv[optind - 1]);
+ rc = -EINVAL;
+ goto error;
+ }
+ }
+
+ if (argc == optind) {
+ fprintf(stderr, "%s: no file name given.\n", argv[0]);
+ rc = CMD_HELP;
+ goto error;
+ }
+
+ if (ids_nr > 0 && argc > optind + 1) {
+ fprintf(stderr, "%s: option '--only' cannot be used upon "
+ "multiple files.\n", argv[0]);
+ rc = CMD_HELP;
+ goto error;
+
+ }
+
+ /* set the lease on the file */
+ ioc = calloc(sizeof(*ioc) + sizeof(__u32) * 4096, 1);
+ if (ioc == NULL) {
+ fprintf(stderr, "%s: cannot alloc id array for ioc: %s.\n",
+ argv[0], strerror(errno));
+ rc = -errno;
+ goto error;
+ }
+
+ for (; optind < argc; optind++) {
+ rc = lfs_mirror_resync_file(argv[optind], ioc,
+ mirror_ids, ids_nr);
+ if (rc)
+ fprintf(stderr, "%s: resync file '%s' failed: %d\n",
+ argv[0], argv[optind], rc);
+ /* ignore previous file's error, continue with next file */
+
+ /* reset ioc */
+ memset(ioc, 0, sizeof(__u32) * 4096);
+ }
+
+ free(ioc);
+ error:
+ return rc;
+ }
+
+ /**
+ * lfs_mirror() - Parse and execute lfs mirror commands.
+ * @argc: The count of lfs mirror command line arguments.
+ * @argv: Array of strings for lfs mirror command line arguments.
+ *
+ * This function parses lfs mirror commands and performs the
+ * corresponding functions specified in mirror_cmdlist[].
+ *
+ * Return: 0 on success or an error code on failure.
+ */
+ static int lfs_mirror(int argc, char **argv)
+ {
+ char cmd[PATH_MAX];
+ int rc = 0;
+
+ setlinebuf(stdout);
+
+ Parser_init("lfs-mirror > ", mirror_cmdlist);
+
+ snprintf(cmd, sizeof(cmd), "%s %s", progname, argv[0]);
+ progname = cmd;
+ program_invocation_short_name = cmd;
+ if (argc > 1)
+ rc = Parser_execarg(argc - 1, argv + 1, mirror_cmdlist);
+ else
+ rc = Parser_commands();
+
+ return rc < 0 ? -rc : rc;
+ }
+
+ /**
+ * lfs_mirror_list_commands() - List lfs mirror commands.
+ * @argc: The count of command line arguments.
+ * @argv: Array of strings for command line arguments.
+ *
+ * This function lists lfs mirror commands defined in mirror_cmdlist[].
+ *
+ * Return: 0 on success.
+ */
+ static int lfs_mirror_list_commands(int argc, char **argv)
+ {
+ char buffer[81] = "";
+
+ Parser_list_commands(mirror_cmdlist, buffer, sizeof(buffer),
+ NULL, 0, 4);
+
+ return 0;
+ }
+
static int lfs_list_commands(int argc, char **argv)
{
char buffer[81] = ""; /* 80 printable chars + terminating NUL */
return 0;
}
+ static char *layout2name(__u32 layout_pattern)
+ {
+ if (layout_pattern == LOV_PATTERN_MDT)
+ return "mdt";
+ else if (layout_pattern == LOV_PATTERN_RAID0)
+ return "raid0";
+ else if (layout_pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_F_RELEASED))
+ return "released";
+ else
+ return "unknown";
+ }
+
enum lov_dump_flags {
LDF_IS_DIR = 0x0001,
LDF_IS_RAW = 0x0002,
if (verbose & ~VERBOSE_LAYOUT)
llapi_printf(LLAPI_MSG_NORMAL, "%s%spattern: ",
space, prefix);
- llapi_printf(LLAPI_MSG_NORMAL, "%.x", lum->lmm_pattern);
+ if (lov_pattern_supported(lum->lmm_pattern))
+ llapi_printf(LLAPI_MSG_NORMAL, "%s",
+ layout2name(lum->lmm_pattern));
+ else
+ llapi_printf(LLAPI_MSG_NORMAL, "%.x", lum->lmm_pattern);
separator = is_dir ? " " : "\n";
}
obdindex == idx ? " *" : "");
}
}
- llapi_printf(LLAPI_MSG_NORMAL, "\n");
}
+ llapi_printf(LLAPI_MSG_NORMAL, "\n");
}
void lmv_dump_user_lmm(struct lmv_user_md *lum, char *pool_name,
if (verbose & VERBOSE_DETAIL) {
llapi_printf(LLAPI_MSG_NORMAL, "composite_header:\n");
- llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_magic: 0x%08X\n",
+ llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_magic: 0x%08X\n",
" ", comp_v1->lcm_magic);
- llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_size: %u\n",
+ llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_size: %u\n",
" ", comp_v1->lcm_size);
- llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_flags: %u\n",
- " ", comp_v1->lcm_flags);
+ if (flags & LDF_IS_DIR)
+ llapi_printf(LLAPI_MSG_NORMAL,
+ "%2slcm_flags: %s\n", " ",
+ comp_v1->lcm_mirror_count > 0 ?
+ "mirrored" : "");
+ else
+ llapi_printf(LLAPI_MSG_NORMAL,
+ "%2slcm_flags: %s\n",
+ " ", lcm_flags_string(comp_v1->lcm_flags));
}
if (verbose & VERBOSE_GENERATION) {
if (verbose & ~VERBOSE_GENERATION)
- llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_layout_gen: ",
+ llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_layout_gen: ",
" ");
llapi_printf(LLAPI_MSG_NORMAL, "%u\n", comp_v1->lcm_layout_gen);
}
+ if (verbose & VERBOSE_MIRROR_COUNT) {
+ if (verbose & ~VERBOSE_MIRROR_COUNT)
+ llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_mirror_count: ",
+ " ");
+ llapi_printf(LLAPI_MSG_NORMAL, "%u\n",
+ comp_v1->lcm_magic == LOV_USER_MAGIC_COMP_V1 ?
+ comp_v1->lcm_mirror_count + 1 : 1);
+ }
+
if (verbose & VERBOSE_COMP_COUNT) {
if (verbose & ~VERBOSE_COMP_COUNT)
- llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_entry_count: ",
+ llapi_printf(LLAPI_MSG_NORMAL, "%2slcm_entry_count: ",
" ");
llapi_printf(LLAPI_MSG_NORMAL, "%u\n",
comp_v1->lcm_magic == LOV_USER_MAGIC_COMP_V1 ?
llapi_printf(LLAPI_MSG_NORMAL, "components:\n");
}
- static void comp_flags2str(__u32 comp_flags)
+ static void lcme_flags2str(__u32 comp_flags)
{
bool found = false;
int i = 0;
if (verbose & ~VERBOSE_COMP_FLAGS)
llapi_printf(LLAPI_MSG_NORMAL,
"%4slcme_flags: ", " ");
- comp_flags2str(entry->lcme_flags);
+ lcme_flags2str(entry->lcme_flags);
separator = "\n";
}
* lmm_fid: [0x200000401:0x1:0x0]
* lmm_stripe_count: 1
* lmm_stripe_size: 1048576
- * lmm_pattern: 1
+ * lmm_pattern: raid0
* lmm_layout_gen: 0
* lmm_stripe_offset: 0
* lmm_objects:
* lmm_fid: [0x200000401:0x1:0x0]
* lmm_stripe_count: 2
* lmm_stripe_size: 1048576
- * lmm_pattern: 1
+ * lmm_pattern: raid0
* lmm_layout_gen: 0
* lmm_stripe_offset: 1
* lmm_objects:
} else {
/* Not enough room to add suffix */
llapi_err_noerrno(LLAPI_MSG_ERROR,
- "MDT name too long |%s|", name);
+ "Invalid MDT name |%s|", name);
return -EINVAL;
}
}
const char *fidstr_orig = fidstr;
struct lu_fid fid;
struct getinfo_fid2path *gf;
+ char *a;
+ char *b;
int rc;
while (*fidstr == '[')
if (rc)
goto out_free;
- memcpy(buf, gf->gf_u.gf_path, gf->gf_pathlen);
+ b = buf;
+ /* strip out instances of // */
+ for (a = gf->gf_u.gf_path; *a != '\0'; a++) {
+ if ((*a == '/') && (*(a + 1) == '/'))
+ continue;
+ *b = *a;
+ b++;
+ }
+ *b = '\0';
+
if (buf[0] == '\0') { /* ROOT path */
buf[0] = '/';
buf[1] = '\0';
}
+
*recno = gf->gf_recno;
*linkno = gf->gf_linkno;
*/
int llapi_get_data_version(int fd, __u64 *data_version, __u64 flags)
{
- int rc;
- struct ioc_data_version idv;
+ int rc;
+ struct ioc_data_version idv;
- idv.idv_flags = flags;
+ idv.idv_flags = (__u32)flags;
- rc = ioctl(fd, LL_IOC_DATA_VERSION, &idv);
- if (rc)
- rc = -errno;
- else
- *data_version = idv.idv_version;
+ rc = ioctl(fd, LL_IOC_DATA_VERSION, &idv);
+ if (rc)
+ rc = -errno;
+ else
+ *data_version = idv.idv_version;
- return rc;
+ return rc;
+ }
+
+ /*
+ * Fetch layout version from OST objects. Layout version on OST objects are
+ * only set when the file is a mirrored file AND after the file has been
+ * written at least once.
+ *
+ * It actually fetches the least layout version from the objects.
+ */
+ int llapi_get_ost_layout_version(int fd, __u32 *layout_version)
+ {
+ int rc;
+ struct ioc_data_version idv = { 0 };
+
+ rc = ioctl(fd, LL_IOC_DATA_VERSION, &idv);
+ if (rc)
+ rc = -errno;
+ else
+ *layout_version = idv.idv_layout_version;
+
+ return rc;
}
/*
#include <string.h>
#include <linux/lustre/lustre_idl.h>
+#ifdef HAVE_SERVER_SUPPORT
#include <linux/lustre/lustre_lfsck_user.h>
#include <linux/lustre/lustre_disk.h>
+#endif
#define LASSERT(cond) if (!(cond)) { printf("failed " #cond "\n"); ret = 1; }
#define LASSERTF(cond, fmt, ...) if (!(cond)) { printf("failed '" #cond "'" fmt, ## __VA_ARGS__);ret = 1;}
(long long)REINT_RMENTRY);
LASSERTF(REINT_MIGRATE == 9, "found %lld\n",
(long long)REINT_MIGRATE);
- LASSERTF(REINT_MAX == 10, "found %lld\n",
+ LASSERTF(REINT_MAX == 11, "found %lld\n",
(long long)REINT_MAX);
LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
(unsigned)DISP_IT_EXECD);
(long long)(int)offsetof(struct obdo, o_layout));
LASSERTF((int)sizeof(((struct obdo *)0)->o_layout) == 28, "found %lld\n",
(long long)(int)sizeof(((struct obdo *)0)->o_layout));
- LASSERTF((int)offsetof(struct obdo, o_padding_3) == 164, "found %lld\n",
- (long long)(int)offsetof(struct obdo, o_padding_3));
- LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_3) == 4, "found %lld\n",
- (long long)(int)sizeof(((struct obdo *)0)->o_padding_3));
+ LASSERTF((int)offsetof(struct obdo, o_layout_version) == 164, "found %lld\n",
+ (long long)(int)offsetof(struct obdo, o_layout_version));
+ LASSERTF((int)sizeof(((struct obdo *)0)->o_layout_version) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct obdo *)0)->o_layout_version));
LASSERTF((int)offsetof(struct obdo, o_uid_h) == 168, "found %lld\n",
(long long)(int)offsetof(struct obdo, o_uid_h));
LASSERTF((int)sizeof(((struct obdo *)0)->o_uid_h) == 4, "found %lld\n",
(long long)(int)sizeof(((struct lov_comp_md_entry_v1 *)0)->lcme_padding));
LASSERTF(LCME_FL_INIT == 0x00000010UL, "found 0x%.8xUL\n",
(unsigned)LCME_FL_INIT);
+ LASSERTF(LCME_FL_NEG == 0x80000000UL, "found 0x%.8xUL\n",
+ (unsigned)LCME_FL_NEG);
/* Checks for struct lov_comp_md_v1 */
LASSERTF((int)sizeof(struct lov_comp_md_v1) == 32, "found %lld\n",
(long long)(int)offsetof(struct lov_comp_md_v1, lcm_entry_count));
LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count) == 2, "found %lld\n",
(long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entry_count));
- LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 16, "found %lld\n",
+ LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_mirror_count) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lov_comp_md_v1, lcm_mirror_count));
+ LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_mirror_count));
+ LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding1) == 18, "found %lld\n",
(long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding1));
- LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 8, "found %lld\n",
+ LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1) == 6, "found %lld\n",
(long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_padding1));
LASSERTF((int)offsetof(struct lov_comp_md_v1, lcm_padding2) == 24, "found %lld\n",
(long long)(int)offsetof(struct lov_comp_md_v1, lcm_padding2));
LASSERTF((int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]) == 48, "found %lld\n",
(long long)(int)sizeof(((struct lov_comp_md_v1 *)0)->lcm_entries[0]));
CLASSERT(LOV_MAGIC_COMP_V1 == (0x0BD60000 | 0x0BD0));
+ LASSERTF(LCM_FL_NOT_FLR == 0, "found %lld\n",
+ (long long)LCM_FL_NOT_FLR);
+ LASSERTF(LCM_FL_RDONLY == 1, "found %lld\n",
+ (long long)LCM_FL_RDONLY);
+ LASSERTF(LCM_FL_WRITE_PENDING == 2, "found %lld\n",
+ (long long)LCM_FL_WRITE_PENDING);
+ LASSERTF(LCM_FL_SYNC_PENDING == 3, "found %lld\n",
+ (long long)LCM_FL_SYNC_PENDING);
/* Checks for struct lmv_mds_md_v1 */
LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11) == 4, "found %lld\n",
(long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11));
+ /* Checks for struct mdt_rec_resync */
+ LASSERTF((int)sizeof(struct mdt_rec_resync) == 136, "found %lld\n",
+ (long long)(int)sizeof(struct mdt_rec_resync));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_opcode) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_opcode));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_opcode) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_opcode));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_cap) == 4, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_cap));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_cap) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_cap));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_fsuid) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_fsuid));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_fsuid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_fsuid));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_fsuid_h) == 12, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_fsuid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_fsuid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_fsuid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_fsgid) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_fsgid));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_fsgid) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_fsgid));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_fsgid_h) == 20, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_fsgid_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_fsgid_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_fsgid_h));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_suppgid1) == 24, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_suppgid1));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid1) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid1));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_suppgid1_h) == 28, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_suppgid1_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid1_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid1_h));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_suppgid2) == 32, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_suppgid2));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid2) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid2));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_suppgid2_h) == 36, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_suppgid2_h));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid2_h) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_suppgid2_h));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_fid) == 40, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_fid));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_fid) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_fid));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding0) == 56, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_padding0));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding0) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding0));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding1) == 80, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_padding1));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding1) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding1));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding2) == 88, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_padding2));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding2) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding2));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding3) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_padding3));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding3) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding3));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding4) == 104, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_padding4));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding4) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding4));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_bias) == 112, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_bias));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_bias) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_bias));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding5) == 116, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_padding5));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding5) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding5));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding6) == 120, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_padding6));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding6) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding6));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding7) == 124, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_padding7));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding7) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding7));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding8) == 128, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_padding8));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding8) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding8));
+ LASSERTF((int)offsetof(struct mdt_rec_resync, rs_padding9) == 132, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_rec_resync, rs_padding9));
+ LASSERTF((int)sizeof(((struct mdt_rec_resync *)0)->rs_padding9) == 4, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_rec_resync *)0)->rs_padding9));
+
/* Checks for struct mdt_rec_reint */
LASSERTF((int)sizeof(struct mdt_rec_reint) == 136, "found %lld\n",
(long long)(int)sizeof(struct mdt_rec_reint));
(long long)(int)offsetof(struct layout_intent, li_flags));
LASSERTF((int)sizeof(((struct layout_intent *)0)->li_flags) == 4, "found %lld\n",
(long long)(int)sizeof(((struct layout_intent *)0)->li_flags));
- LASSERTF((int)offsetof(struct layout_intent, li_start) == 8, "found %lld\n",
- (long long)(int)offsetof(struct layout_intent, li_start));
- LASSERTF((int)sizeof(((struct layout_intent *)0)->li_start) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct layout_intent *)0)->li_start));
- LASSERTF((int)offsetof(struct layout_intent, li_end) == 16, "found %lld\n",
- (long long)(int)offsetof(struct layout_intent, li_end));
- LASSERTF((int)sizeof(((struct layout_intent *)0)->li_end) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct layout_intent *)0)->li_end));
+ LASSERTF((int)offsetof(struct layout_intent, li_extent) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct layout_intent, li_extent));
+ LASSERTF((int)sizeof(((struct layout_intent *)0)->li_extent) == 16, "found %lld\n",
+ (long long)(int)sizeof(((struct layout_intent *)0)->li_extent));
LASSERTF(LAYOUT_INTENT_ACCESS == 0, "found %lld\n",
(long long)LAYOUT_INTENT_ACCESS);
LASSERTF(LAYOUT_INTENT_READ == 1, "found %lld\n",
(long long)(int)offsetof(struct lfsck_request, lr_padding_3));
LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_3) == 8, "found %lld\n",
(long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_3));
+#ifdef HAVE_SERVER_SUPPORT
LASSERTF(LFSCK_TYPE_SCRUB == 0x00000000UL, "found 0x%.8xUL\n",
(unsigned)LFSCK_TYPE_SCRUB);
LASSERTF(LFSCK_TYPE_LAYOUT == 0x00000001UL, "found 0x%.8xUL\n",
(unsigned)LFSCK_TYPE_LAYOUT);
LASSERTF(LFSCK_TYPE_NAMESPACE == 0x00000004UL, "found 0x%.8xUL\n",
(unsigned)LFSCK_TYPE_NAMESPACE);
+#endif
LASSERTF(LE_LASTID_REBUILDING == 1, "found %lld\n",
(long long)LE_LASTID_REBUILDING);
LASSERTF(LE_LASTID_REBUILT == 2, "found %lld\n",