MA_HSM = 1 << 6,
MA_PFID = 1 << 7,
MA_LMV_DEF = 1 << 8,
+ MA_SOM = 1 << 9,
};
typedef enum {
__u64 mh_arch_ver;
};
+
+/* memory structure for SOM attributes
+ * for fields description see the on disk structure som_attrs
+ * which is defined in lustre_idl.h
+ */
+struct md_som {
+ __u16 ms_valid;
+ __u64 ms_size;
+ __u64 ms_blocks;
+};
+
struct md_attr {
- __u64 ma_valid;
- __u64 ma_need;
- __u64 ma_attr_flags;
- struct lu_attr ma_attr;
- struct lu_fid ma_pfid;
- struct md_hsm ma_hsm;
- struct lov_mds_md *ma_lmm;
- union lmv_mds_md *ma_lmv;
- void *ma_acl;
- int ma_lmm_size;
- int ma_lmv_size;
- int ma_acl_size;
+ __u64 ma_valid;
+ __u64 ma_need;
+ __u64 ma_attr_flags;
+ struct lu_attr ma_attr;
+ struct lu_fid ma_pfid;
+ struct md_hsm ma_hsm;
+ struct md_som ma_som;
+ struct lov_mds_md *ma_lmm;
+ union lmv_mds_md *ma_lmv;
+ void *ma_acl;
+ int ma_lmm_size;
+ int ma_lmv_size;
+ int ma_acl_size;
};
/** Additional parameters for create */
* Parameters for layout change API.
*/
struct md_layout_change {
- enum md_layout_opc mlc_opc;
- struct layout_intent *mlc_intent;
- struct lu_buf mlc_buf;
- struct lustre_som_attrs mlc_som;
- size_t mlc_resync_count;
- __u32 *mlc_resync_ids;
+ enum md_layout_opc mlc_opc;
+ struct layout_intent *mlc_intent;
+ struct lu_buf mlc_buf;
+ struct lustre_som_attrs mlc_som;
+ size_t mlc_resync_count;
+ __u32 *mlc_resync_ids;
};
union ldlm_policy_data;
struct dt_device;
+void lustre_som_swab(struct lustre_som_attrs *attrs);
int lustre_buf2hsm(void *buf, int rc, struct md_hsm *mh);
void lustre_hsm2buf(void *buf, const struct md_hsm *mh);
#define MDS_ATTR_FROM_OPEN 0x4000ULL /* = 16384, called from open path, ie O_TRUNC */
#define MDS_ATTR_BLOCKS 0x8000ULL /* = 32768 */
#define MDS_ATTR_PROJID 0x10000ULL /* = 65536 */
+#define MDS_ATTR_LSIZE 0x20000ULL /* = 131072 */
+#define MDS_ATTR_LBLOCKS 0x40000ULL /* = 262144 */
enum mds_op_bias {
/* MDS_CHECK_SPLIT = 1 << 0, obsolete before 2.3.58 */
*/
#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
-enum {
- LSOM_FL_VALID = 1 << 0,
+enum lustre_som_flags {
+ /* Unknow or no SoM data, must get size from OSTs. */
+ SOM_FL_UNKNOWN = 0x0000,
+ /* Known strictly correct, FLR or DoM file (SoM guaranteed). */
+ SOM_FL_STRICT = 0x0001,
+ /* Known stale - was right at some point in the past, but it is
+ * known (or likely) to be incorrect now (e.g. opened for write). */
+ SOM_FL_STALE = 0x0002,
+ /* Approximate, may never have been strictly correct,
+ * need to sync SOM data to achieve eventual consistency. */
+ SOM_FL_LAZY = 0x0004,
};
struct lustre_som_attrs {
LA_KILL_SGID = 1 << 14,
LA_PROJID = 1 << 15,
LA_LAYOUT_VERSION = 1 << 16,
+ LA_LSIZE = 1 << 17,
+ LA_LBLOCKS = 1 << 18,
/**
* Attributes must be transmitted to OST objects
*/
break;
}
+ if (!(op_data->op_attr.ia_valid & ATTR_SIZE))
+ op_data->op_attr.ia_valid |= MDS_ATTR_LSIZE;
+ if (!(op_data->op_attr.ia_valid & ATTR_BLOCKS))
+ op_data->op_attr.ia_valid |= MDS_ATTR_LBLOCKS;
+
rc = md_close(md_exp, op_data, och->och_mod, &req);
if (rc != 0 && rc != -EINTR)
CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
sa_valid |= MDS_OPEN_OWNEROVERRIDE;
if (ia_valid & MDS_ATTR_PROJID)
sa_valid |= MDS_ATTR_PROJID;
+ if (ia_valid & MDS_ATTR_LSIZE)
+ sa_valid |= MDS_ATTR_LSIZE;
+ if (ia_valid & MDS_ATTR_LBLOCKS)
+ sa_valid |= MDS_ATTR_LBLOCKS;
return sa_valid;
}
(uc->uc_fsuid != attr->la_uid) &&
!md_capable(uc, CFS_CAP_FOWNER))
RETURN(-EPERM);
- } else {
- if ((uc->uc_fsuid != attr->la_uid) &&
- !md_capable(uc, CFS_CAP_FOWNER))
- RETURN(-EPERM);
+ } else if (strcmp(name, XATTR_NAME_SOM) != 0 &&
+ (uc->uc_fsuid != attr->la_uid) &&
+ !md_capable(uc, CFS_CAP_FOWNER)) {
+ RETURN(-EPERM);
}
RETURN(0);
struct md_layout_change *mlc, struct thandle *handle)
{
struct mdd_device *mdd = mdd_obj2mdd_dev(obj);
+ struct lu_buf *som_buf = &mdd_env_info(env)->mti_buf[1];
+ struct lustre_som_attrs *som = &mlc->mlc_som;
+ int fl = 0;
int rc;
ENTRY;
RETURN(0);
}
+ som_buf->lb_buf = som;
+ som_buf->lb_len = sizeof(*som);
+ rc = mdo_xattr_get(env, obj, som_buf, XATTR_NAME_SOM);
+ if (rc < 0 && rc != -ENODATA)
+ RETURN(rc);
+
+ if (rc > 0) {
+ lustre_som_swab(som);
+ if (som->lsa_valid & SOM_FL_STRICT)
+ fl = LU_XATTR_REPLACE;
+ }
+
rc = mdd_declare_layout_change(env, mdd, obj, mlc, handle);
if (rc)
GOTO(out, rc);
- rc = mdd_declare_xattr_del(env, mdd, obj, XATTR_NAME_SOM, handle);
- if (rc)
- GOTO(out, rc);
+ if (fl) {
+ rc = mdd_declare_xattr_set(env, mdd, obj, som_buf,
+ XATTR_NAME_SOM, fl, handle);
+ if (rc)
+ GOTO(out, rc);
+ }
/* record a changelog for data mover to consume */
rc = mdd_declare_changelog_store(env, mdd, CL_FLRW, NULL, NULL, handle);
mdd_write_lock(env, obj, MOR_TGT_CHILD);
rc = mdo_layout_change(env, obj, mlc, handle);
- if (!rc) {
- rc = mdo_xattr_del(env, obj, XATTR_NAME_SOM, handle);
- if (rc == -ENODATA)
- rc = 0;
+ if (!rc && fl) {
+ /* SOM state transition from STRICT to STALE */
+ som->lsa_valid = SOM_FL_STALE;
+ lustre_som_swab(som);
+ rc = mdo_xattr_set(env, obj, som_buf, XATTR_NAME_SOM,
+ fl, handle);
}
mdd_write_unlock(env, obj);
if (rc)
RETURN(-EBUSY);
}
- if (mlc->mlc_som.lsa_valid & LSOM_FL_VALID) {
+ if (mlc->mlc_som.lsa_valid & SOM_FL_STRICT) {
rc = mdo_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_SOM);
- if (rc && rc != -ENODATA)
+ if (rc < 0 && rc != -ENODATA)
RETURN(rc);
fl = rc == -ENODATA ? LU_XATTR_CREATE : LU_XATTR_REPLACE;
+ lustre_som_swab(&mlc->mlc_som);
som_buf->lb_buf = &mlc->mlc_som;
som_buf->lb_len = sizeof(mlc->mlc_som);
}
mdt_object_child(obj),
mdt_object_child(dobj),
SWAP_LAYOUTS_MDS_HSM);
-
+ if (rc == 0) {
+ rc = mdt_lsom_downgrade(mti, obj);
+ if (rc)
+ CDEBUG(D_INODE,
+ "%s: File fid="DFID" SOM "
+ "downgrade failed, rc = %d\n",
+ mdt_obd_name(mti->mti_mdt),
+ PFID(mdt_object_fid(obj)), rc);
+ }
out_dobj:
mdt_object_unlock_put(mti, dobj, dlh, 1);
out:
GOTO(out, rc);
if (S_ISREG(mode))
- (void) mdt_get_som(info, o, &ma->ma_attr);
+ (void) mdt_get_som(info, o, ma);
ma->ma_valid |= MA_INODE;
}
GOTO(out, rc);
}
+ /*
+ * In the handle of MA_INODE, we may already get the SOM attr.
+ */
+ if (need & MA_SOM && S_ISREG(mode) && !(ma->ma_valid & MA_SOM)) {
+ rc = mdt_get_som(info, o, ma);
+ if (rc != 0)
+ GOTO(out, rc);
+ }
+
if (need & MA_HSM && S_ISREG(mode)) {
buf->lb_buf = info->mti_xattr_buf;
buf->lb_len = sizeof(info->mti_xattr_buf);
if (rc)
GOTO(out, rc);
+ mutex_lock(&obj->mot_som_mutex);
rc = mo_layout_change(info->mti_env, mdt_object_child(obj), layout);
-
+ mutex_unlock(&obj->mot_som_mutex);
mdt_object_unlock(info, obj, lh, 1);
out:
RETURN(rc);
lu_object_add_top(h, o);
o->lo_ops = &mdt_obj_ops;
spin_lock_init(&mo->mot_write_lock);
+ mutex_init(&mo->mot_som_mutex);
mutex_init(&mo->mot_lov_mutex);
init_rwsem(&mo->mot_dom_sem);
init_rwsem(&mo->mot_open_sem);
* attribute cache */
int mot_write_count;
spinlock_t mot_write_lock;
+ /* Lock to protect object's SOM update. */
+ struct mutex mot_som_mutex;
/* Lock to protect create_data */
struct mutex mot_lov_mutex;
/* lock to protect read/write stages for Data-on-MDT files */
struct {
struct md_attr attr;
} hsm;
- } mti_u;
+ struct {
+ struct md_attr attr;
+ } som;
+ } mti_u;
struct lustre_handle mti_close_handle;
loff_t mti_off;
struct ldlm_enqueue_info mti_remote_einfo;
struct tg_reply_data *mti_reply_data;
- struct lustre_som_attrs mti_som;
-
/* FLR: layout change API */
struct md_layout_change mti_layout;
};
/* mdt_som.c */
int mdt_set_som(struct mdt_thread_info *info, struct mdt_object *obj,
- struct lu_attr *attr);
+ enum lustre_som_flags flag, __u64 size, __u64 blocks);
int mdt_get_som(struct mdt_thread_info *info, struct mdt_object *obj,
- struct lu_attr *attr);
+ struct md_attr *ma);
+int mdt_lsom_downgrade(struct mdt_thread_info *info, struct mdt_object *obj);
+int mdt_lsom_update(struct mdt_thread_info *info, struct mdt_object *obj,
+ bool truncate);
/* mdt_lvb.c */
extern struct ldlm_valblock_ops mdt_lvbo;
out |= LA_KILL_SGID;
if (in & MDS_ATTR_PROJID)
out |= LA_PROJID;
+ if (in & MDS_ATTR_LSIZE)
+ out |= LA_LSIZE;
+ if (in & MDS_ATTR_LBLOCKS)
+ out |= LA_LBLOCKS;
if (in & MDS_ATTR_FROM_OPEN)
rr->rr_flags |= MRF_OPEN_TRUNC;
MDS_ATTR_ATIME_SET | MDS_ATTR_CTIME_SET | MDS_ATTR_MTIME_SET |
MDS_ATTR_SIZE | MDS_ATTR_BLOCKS | MDS_ATTR_ATTR_FLAG |
MDS_ATTR_FORCE | MDS_ATTR_KILL_SUID | MDS_ATTR_KILL_SGID |
- MDS_ATTR_FROM_OPEN | MDS_OPEN_OWNEROVERRIDE);
+ MDS_ATTR_FROM_OPEN | MDS_ATTR_LSIZE | MDS_ATTR_LBLOCKS |
+ MDS_OPEN_OWNEROVERRIDE);
if (in != 0)
CERROR("Unknown attr bits: %#llx\n", in);
return out;
if (rc < 0)
GOTO(out_unlock, rc);
+ mutex_lock(&o->mot_som_mutex);
+ rc2 = mdt_set_som(info, o, SOM_FL_STRICT, ma->ma_attr.la_size,
+ ma->ma_attr.la_blocks);
+ mutex_unlock(&o->mot_som_mutex);
+ if (rc2 < 0)
+ CDEBUG(D_INODE,
+ "%s: File "DFID" SOM update failed: rc = %d\n",
+ mdt_obd_name(info->mti_mdt),
+ PFID(mdt_object_fid(o)), rc2);
+
+
ma->ma_need = MA_INODE | MA_LOV;
rc = mdt_attr_get_complex(info, o, ma);
if (rc < 0)
if (rc == 0 && ma->ma_attr.la_valid & (LA_SIZE | LA_BLOCKS)) {
int rc2;
- rc2 = mdt_set_som(info, o, &ma->ma_attr);
+ mutex_lock(&o->mot_som_mutex);
+ rc2 = mdt_set_som(info, o, SOM_FL_STRICT,
+ ma->ma_attr.la_size,
+ ma->ma_attr.la_blocks);
+ mutex_unlock(&o->mot_som_mutex);
if (rc2 < 0)
CERROR(DFID": Setting i_blocks error: %d, "
"i_blocks will be reported wrongly and "
layout.mlc_opc = MD_LAYOUT_RESYNC_DONE;
layout.mlc_resync_count = resync_count;
if (ma->ma_attr.la_valid & (LA_SIZE | LA_BLOCKS)) {
- layout.mlc_som.lsa_valid = LSOM_FL_VALID;
+ layout.mlc_som.lsa_valid = SOM_FL_STRICT;
layout.mlc_som.lsa_size = ma->ma_attr.la_size;
layout.mlc_som.lsa_blocks = ma->ma_attr.la_blocks;
}
int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd)
{
- struct mdt_object *o = mfd->mfd_object;
- struct md_object *next = mdt_object_child(o);
- struct md_attr *ma = &info->mti_attr;
- int rc = 0;
+ struct mdt_object *o = mfd->mfd_object;
+ struct md_object *next = mdt_object_child(o);
+ struct md_attr *ma = &info->mti_attr;
+ int rc = 0;
__u64 mode;
__u64 intent;
- ENTRY;
- mode = mfd->mfd_mode;
+ ENTRY;
+ mode = mfd->mfd_mode;
intent = ma->ma_attr_flags & MDS_CLOSE_INTENT;
CDEBUG(D_INODE, "%s: close file "DFID" with intent: %llx\n",
rc = mdt_close_handle_layouts(info, o, ma);
if (rc < 0) {
CDEBUG(D_INODE,
- "%s: cannot swap layout of "DFID": rc=%d\n",
+ "%s: cannot swap layout of "DFID": rc = %d\n",
mdt_obd_name(info->mti_mdt),
PFID(mdt_object_fid(o)), rc);
/* continue to close even if error occurred. */
break;
}
+ if (S_ISREG(lu_object_attr(&o->mot_obj)) &&
+ ma->ma_attr.la_valid & (LA_LSIZE | LA_LBLOCKS)) {
+ int rc2;
+
+ rc2 = mdt_lsom_update(info, o, false);
+ if (rc2 < 0)
+ CDEBUG(D_INODE,
+ "%s: File " DFID " LSOM failed: rc = %d\n",
+ mdt_obd_name(info->mti_mdt),
+ PFID(mdt_object_fid(o)), rc2);
+ /* continue to close even if error occured. */
+ }
+
if (mode & MDS_FMODE_WRITE)
mdt_write_put(o);
else if (mode & MDS_FMODE_EXEC)
/* Update atime on close only. */
if ((mode & MDS_FMODE_EXEC || mode & MDS_FMODE_READ ||
- mode & MDS_FMODE_WRITE)
- && (ma->ma_valid & MA_INODE) && (ma->ma_attr.la_valid & LA_ATIME)) {
+ mode & MDS_FMODE_WRITE) && (ma->ma_valid & MA_INODE) &&
+ (ma->ma_attr.la_valid & LA_ATIME)) {
/* Set the atime only. */
ma->ma_valid = MA_INODE;
ma->ma_attr.la_valid = LA_ATIME;
if (rc > 0 && mdt_lmm_is_flr(info->mti_big_lmm))
GOTO(out_put, rc = -EOPNOTSUPP);
}
+
+ /* For truncate, the file size sent from client
+ * is believable, but the blocks are incorrect,
+ * which makes the block size in LSOM attribute
+ * inconsisent with the real block size.
+ */
+ rc = mdt_lsom_update(info, mo, true);
+ if (rc)
+ GOTO(out_put, rc);
}
if ((ma->ma_valid & MA_INODE) && ma->ma_attr.la_valid) {
* Size on MDS revival
*
* Author: Jinshan Xiong <jinshan.xiong@intel.com>
+ * Author: Yingjin Qian <qian@ddn.com>
*/
#define DEBUG_SUBSYSTEM S_MDS
#include "mdt_internal.h"
+/*
+ * Swab and extract SOM attributes from on-disk xattr.
+ *
+ * \param buf - is a buffer containing the on-disk LSOM extended attribute.
+ * \param rc - is the SOM xattr stored in \a buf
+ * \param ms - is the md_som structure where to extract SOM attributes.
+ */
+int lustre_buf2som(void *buf, int rc, struct md_som *ms)
+{
+ struct lustre_som_attrs *attrs = (struct lustre_som_attrs *)buf;
+ ENTRY;
+
+ if (rc == 0 || rc == -ENODATA)
+ /* no LSOM attributes */
+ RETURN(-ENODATA);
+
+ if (rc < 0)
+ /* error hit while fetching xattr */
+ RETURN(rc);
+
+ /* unpack LSOM attributes */
+ lustre_som_swab(attrs);
+
+ /* fill in-memory md_som structure */
+ ms->ms_valid = attrs->lsa_valid;
+ ms->ms_size = attrs->lsa_size;
+ ms->ms_blocks = attrs->lsa_blocks;
+
+ RETURN(0);
+}
+
int mdt_get_som(struct mdt_thread_info *info, struct mdt_object *obj,
- struct lu_attr *attr)
+ struct md_attr *ma)
{
struct lu_buf *buf = &info->mti_buf;
- struct lustre_som_attrs *som;
+ struct lu_attr *attr = &ma->ma_attr;
int rc;
- som = buf->lb_buf = info->mti_xattr_buf;
+ buf->lb_buf = info->mti_xattr_buf;
buf->lb_len = sizeof(info->mti_xattr_buf);
+ CLASSERT(sizeof(struct lustre_som_attrs) <=
+ sizeof(info->mti_xattr_buf));
rc = mo_xattr_get(info->mti_env, mdt_object_child(obj), buf,
XATTR_NAME_SOM);
- if (rc >= (int)sizeof(*som) && (som->lsa_valid & LSOM_FL_VALID)) {
- attr->la_valid |= LA_SIZE | LA_BLOCKS;
- attr->la_size = som->lsa_size;
- attr->la_blocks = som->lsa_blocks;
-
- /* Size on MDS is valid and could be returned to client */
- info->mti_som_valid = 1;
-
- CDEBUG(D_INODE, DFID": Reading som attrs: "
- "valid: %x, size: %lld, blocks: %lld, rc: %d.\n",
- PFID(mdt_object_fid(obj)), som->lsa_valid,
- som->lsa_size, som->lsa_blocks, rc);
+ rc = lustre_buf2som(info->mti_xattr_buf, rc, &ma->ma_som);
+ if (rc == 0) {
+ struct md_som *som = &ma->ma_som;
+
+ ma->ma_valid |= MA_SOM;
+
+ if ((som->ms_valid & SOM_FL_STRICT)) {
+ attr->la_valid |= LA_SIZE | LA_BLOCKS;
+ attr->la_size = som->ms_size;
+ attr->la_blocks = som->ms_blocks;
+
+ /*
+ * Size on MDS is valid and could be returned
+ * to client.
+ */
+ info->mti_som_valid = 1;
+
+ CDEBUG(D_INODE, DFID": Reading som attrs: "
+ "valid: %x, size: %lld, blocks: %lld\n",
+ PFID(mdt_object_fid(obj)), som->ms_valid,
+ som->ms_size, som->ms_blocks);
+ }
+ } else if (rc == -ENODATA) {
+ rc = 0;
}
- return (rc > 0 || rc == -ENODATA) ? 0 : rc;
+ return rc;
}
+/**
+ * Update SOM on-disk attributes.
+ */
int mdt_set_som(struct mdt_thread_info *info, struct mdt_object *obj,
- struct lu_attr *attr)
+ enum lustre_som_flags flag, __u64 size, __u64 blocks)
{
struct md_object *next = mdt_object_child(obj);
struct lu_buf *buf = &info->mti_buf;
struct lustre_som_attrs *som;
int rc;
+
ENTRY;
- buf->lb_buf = info->mti_xattr_buf;
- buf->lb_len = sizeof(info->mti_xattr_buf);
- rc = mo_xattr_get(info->mti_env, next, buf, XATTR_NAME_SOM);
- if (rc < 0 && rc != -ENODATA)
- RETURN(rc);
+ CDEBUG(D_INODE,
+ DFID": Set SOM attrs S/B/F: %lld/%lld/%x.\n",
+ PFID(mdt_object_fid(obj)), size, blocks, flag);
- som = buf->lb_buf;
+ som = (struct lustre_som_attrs *)info->mti_xattr_buf;
+ CLASSERT(sizeof(info->mti_xattr_buf) >= sizeof(*som));
- CDEBUG(D_INODE,
- DFID": Set som attrs: S/B: %lld/%lld to %lld/%lld, rc: %d\n",
- PFID(mdt_object_fid(obj)), som->lsa_size, som->lsa_blocks,
- attr->la_size, attr->la_blocks, rc);
-
- if (rc == -ENODATA)
- memset(som, 0, sizeof(*som));
- if (attr->la_valid & (LA_SIZE | LA_BLOCKS)) {
- som->lsa_valid |= LSOM_FL_VALID;
- som->lsa_size = attr->la_size;
- som->lsa_blocks = attr->la_blocks;
- }
+ som->lsa_valid = flag;
+ som->lsa_size = size;
+ som->lsa_blocks = blocks;
+ memset(&som->lsa_reserved, 0, sizeof(som->lsa_reserved));
+ lustre_som_swab(som);
+
+ /* update SOM attributes */
+ buf->lb_buf = som;
buf->lb_len = sizeof(*som);
rc = mo_xattr_set(info->mti_env, next, buf, XATTR_NAME_SOM, 0);
+
+ RETURN(rc);
+}
+
+/**
+ * SOM state transition from STRICT to STALE,
+ */
+int mdt_lsom_downgrade(struct mdt_thread_info *info, struct mdt_object *o)
+{
+ struct md_attr *tmp_ma;
+ int rc;
+
+ ENTRY;
+
+ mutex_lock(&o->mot_som_mutex);
+ tmp_ma = &info->mti_u.som.attr;
+ tmp_ma->ma_need = MA_SOM;
+ tmp_ma->ma_valid = 0;
+
+ rc = mdt_get_som(info, o, tmp_ma);
+ if (rc < 0)
+ GOTO(out_lock, rc);
+
+ if (tmp_ma->ma_valid & MA_SOM) {
+ struct md_som *som = &tmp_ma->ma_som;
+
+ info->mti_som_valid = 0;
+ /* The size and blocks info should be still correct. */
+ if (som->ms_valid & SOM_FL_STRICT)
+ rc = mdt_set_som(info, o, SOM_FL_STALE,
+ som->ms_size, som->ms_blocks);
+ }
+out_lock:
+ mutex_unlock(&o->mot_som_mutex);
+ RETURN(rc);
+}
+
+int mdt_lsom_update(struct mdt_thread_info *info,
+ struct mdt_object *o, bool truncate)
+{
+ struct md_attr *ma, *tmp_ma;
+ struct lu_attr *la;
+ int rc = 0;
+
+ ENTRY;
+
+ ma = &info->mti_attr;
+ la = &ma->ma_attr;
+
+ mutex_lock(&o->mot_som_mutex);
+ tmp_ma = &info->mti_u.som.attr;
+ tmp_ma->ma_need = MA_INODE | MA_SOM;
+ tmp_ma->ma_valid = 0;
+
+ rc = mdt_attr_get_complex(info, o, tmp_ma);
+ if (rc)
+ GOTO(out_lock, rc);
+
+ rc = mo_xattr_get(info->mti_env, mdt_object_child(o), &LU_BUF_NULL,
+ XATTR_NAME_LOV);
+ if (rc < 0 && rc != -ENODATA)
+ GOTO(out_lock, rc);
+ else if (rc > 0) /* has LOV EA*/
+ tmp_ma->ma_valid |= MA_LOV;
+
+ rc = 0;
+ /**
+ * Check if a Lazy Size-on-MDS update is needed. Skip the
+ * file with no LOV EA or unlink files.
+ * MDS only updates LSOM of the file if the size or block
+ * size is being increased or the file is being truncated.
+ */
+ if ((tmp_ma->ma_valid & MA_LOV) &&
+ !(tmp_ma->ma_valid & MA_INODE &&
+ tmp_ma->ma_attr.la_nlink == 0)) {
+ __u64 size;
+ __u64 blocks;
+ bool changed = false;
+ struct md_som *som = &tmp_ma->ma_som;
+
+ if (truncate) {
+ size = la->la_size;
+ if (size == 0) {
+ blocks = 0;
+ } else if (!(tmp_ma->ma_valid & MA_SOM) ||
+ size < som->ms_size) {
+ /* We cannot rely to blocks after
+ * truncate especially for spare file,
+ * and the truncate operation is usually
+ * followed with a close, so just set blocks
+ * to 1 here, and the following close will
+ * update it accordingly.
+ */
+ blocks = 1;
+ } else {
+ blocks = som->ms_blocks;
+ }
+ } else {
+ if (!(tmp_ma->ma_valid & MA_SOM)) {
+ /* Only set initial SOM Xattr data when both
+ * size and blocks are valid.
+ */
+ if (la->la_valid & (LA_SIZE | LA_LSIZE) &&
+ la->la_valid & (LA_BLOCKS | LA_LBLOCKS)) {
+ changed = true;
+ size = la->la_size;
+ blocks = la->la_blocks;
+ }
+ } else {
+ /* Double check whether it is already set
+ * to SOM_FL_STRICT in mdt_mfd_close.
+ * If file is in SOM_FL_STALE state, and
+ * the close indicates there is no data
+ * modified, skip to transimit to LAZY
+ * state.
+ */
+ if (som->ms_valid & SOM_FL_STRICT ||
+ (som->ms_valid & SOM_FL_STALE &&
+ !(ma->ma_attr_flags & MDS_DATA_MODIFIED)))
+ GOTO(out_lock, rc);
+
+ size = som->ms_size;
+ blocks = som->ms_blocks;
+ if (la->la_valid & (LA_SIZE | LA_LSIZE) &&
+ la->la_size > som->ms_size) {
+ changed = true;
+ size = la->la_size;
+ }
+ if (la->la_valid & (LA_BLOCKS | LA_LBLOCKS) &&
+ la->la_blocks > som->ms_blocks) {
+ changed = true;
+ blocks = la->la_blocks;
+ }
+ }
+ }
+ if (truncate || changed)
+ rc = mdt_set_som(info, o, SOM_FL_LAZY, size, blocks);
+ }
+
+out_lock:
+ mutex_unlock(&o->mot_som_mutex);
RETURN(rc);
}
EXPORT_SYMBOL(lustre_loa_swab);
/**
+ * Swab, if needed, SOM structure which is stored on-disk in little-endian
+ * order.
+ *
+ * \param attrs - is a pointer to the SOM structure to be swabbed.
+ */
+void lustre_som_swab(struct lustre_som_attrs *attrs)
+{
+#ifdef __BIG_ENDIAN
+ __swab16s(&attrs->lsa_valid);
+ __swab64s(&attrs->lsa_size);
+ __swab64s(&attrs->lsa_blocks);
+#endif
+}
+EXPORT_SYMBOL(lustre_som_swab);
+
+/**
* Swab, if needed, HSM structure which is stored on-disk in little-endian
* order.
*
(long long)MDS_ATTR_BLOCKS);
LASSERTF(MDS_ATTR_PROJID == 0x0000000000010000ULL, "found 0x%.16llxULL\n",
(long long)MDS_ATTR_PROJID);
+ LASSERTF(MDS_ATTR_LSIZE == 0x0000000000020000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_LSIZE);
+ LASSERTF(MDS_ATTR_LBLOCKS == 0x0000000000040000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_LBLOCKS);
LASSERTF(FLD_QUERY == 900, "found %lld\n",
(long long)FLD_QUERY);
LASSERTF(FLD_READ == 901, "found %lld\n",
LASSERTF(OUT_XATTR_LIST == 17, "found %lld\n",
(long long)OUT_XATTR_LIST);
+ /* Checks for struct lustre_som_attrs */
+ LASSERTF((int)sizeof(struct lustre_som_attrs) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_som_attrs));
+ LASSERTF((int)offsetof(struct lustre_som_attrs, lsa_valid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_som_attrs, lsa_valid));
+ LASSERTF((int)sizeof(((struct lustre_som_attrs *)0)->lsa_valid) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_som_attrs *)0)->lsa_valid));
+ LASSERTF((int)offsetof(struct lustre_som_attrs, lsa_reserved) == 2, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_som_attrs, lsa_reserved));
+ LASSERTF((int)sizeof(((struct lustre_som_attrs *)0)->lsa_reserved) == 6, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_som_attrs *)0)->lsa_reserved));
+ LASSERTF((int)offsetof(struct lustre_som_attrs, lsa_size) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_som_attrs, lsa_size));
+ LASSERTF((int)sizeof(((struct lustre_som_attrs *)0)->lsa_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_som_attrs *)0)->lsa_size));
+ LASSERTF((int)offsetof(struct lustre_som_attrs, lsa_blocks) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_som_attrs, lsa_blocks));
+ LASSERTF((int)sizeof(((struct lustre_som_attrs *)0)->lsa_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_som_attrs *)0)->lsa_blocks));
+
/* Checks for struct hsm_attrs */
LASSERTF((int)sizeof(struct hsm_attrs) == 24, "found %lld\n",
(long long)(int)sizeof(struct hsm_attrs));
done
echo
+ # sync all the data and make sure no pending data on the client,
+ # thus the SOM xattr would not be changed any more.
+ cancel_lru_locks osc
+
# backup files
echo backup files to $TMP/$tdir
local files=$(find $DIR/$tdir -type f -newer $TMP/modified_first)
}
run_test 805 "ZFS can remove from full fs"
+# Size-on-MDS test
+check_lsom_data()
+{
+ local file=$1
+ local size=$($LFS getsom -s $file)
+ local expect=$(stat -c %s $file)
+
+ [[ $size == $expect ]] ||
+ error "$file expected size: $expect, got: $size"
+
+ local blocks=$($LFS getsom -b $file)
+ expect=$(stat -c %b $file)
+ [[ $blocks == $expect ]] ||
+ error "$file expected blocks: $expect, got: $blocks"
+}
+
+check_lsom_size()
+{
+ local size=$($LFS getsom -s $1)
+ local expect=$2
+
+ [[ $size == $expect ]] ||
+ error "$file expected size: $expect, got: $size"
+}
+
+test_806() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.11.52) ] &&
+ skip "Need MDS version at least 2.11.52" && return
+
+ local bs=1048576
+
+ touch $DIR/$tfile || error "touch $tfile failed"
+
+ local save="$TMP/$TESTSUITE-$TESTNAME.parameters"
+ save_lustre_params client "llite.*.xattr_cache" > $save
+ lctl set_param llite.*.xattr_cache=0
+ stack_trap "restore_lustre_params < $save" EXIT
+
+ # single-threaded write
+ echo "Test SOM for single-threaded write"
+ dd if=/dev/zero of=$DIR/$tfile bs=$bs count=1 ||
+ error "write $tfile failed"
+ check_lsom_size $DIR/$tfile $bs
+
+ local num=32
+ local size=$(($num * $bs))
+ local offset=0
+ local i
+
+ echo "Test SOM for single client muti-threaded($num) write"
+ $TRUNCATE $DIR/$tfile 0
+ for ((i = 0; i < $num; i++)); do
+ $MULTIOP $DIR/$tfile Oz${offset}w${bs}c &
+ local pids[$i]=$!
+ offset=$((offset + $bs))
+ done
+ for (( i=0; i < $num; i++ )); do
+ wait ${pids[$i]}
+ done
+ check_lsom_size $DIR/$tfile $size
+
+ $TRUNCATE $DIR/$tfile 0
+ for ((i = 0; i < $num; i++)); do
+ offset=$((offset - $bs))
+ $MULTIOP $DIR/$tfile Oz${offset}w${bs}c &
+ local pids[$i]=$!
+ done
+ for (( i=0; i < $num; i++ )); do
+ wait ${pids[$i]}
+ done
+ check_lsom_size $DIR/$tfile $size
+
+ # multi-client wirtes
+ num=$(get_node_count ${CLIENTS//,/ })
+ size=$(($num * $bs))
+ offset=0
+ i=0
+
+ echo "Test SOM for muti-client ($num) writes"
+ $TRUNCATE $DIR/$tfile 0
+ for client in ${CLIENTS//,/ }; do
+ do_node $client $MULTIOP $DIR/$tfile Oz${offset}w${bs}c &
+ local pids[$i]=$!
+ i=$((i + 1))
+ offset=$((offset + $bs))
+ done
+ for (( i=0; i < $num; i++ )); do
+ wait ${pids[$i]}
+ done
+ check_lsom_size $DIR/$tfile $offset
+
+ i=0
+ $TRUNCATE $DIR/$tfile 0
+ for client in ${CLIENTS//,/ }; do
+ offset=$((offset - $bs))
+ do_node $client $MULTIOP $DIR/$tfile Oz${offset}w${bs}c &
+ local pids[$i]=$!
+ i=$((i + 1))
+ done
+ for (( i=0; i < $num; i++ )); do
+ wait ${pids[$i]}
+ done
+ check_lsom_size $DIR/$tfile $size
+
+ # verify truncate
+ echo "Test SOM for truncate"
+ $TRUNCATE $DIR/$tfile 1048576
+ check_lsom_size $DIR/$tfile 1048576
+ $TRUNCATE $DIR/$tfile 1234
+ check_lsom_size $DIR/$tfile 1234
+
+ # verify SOM blocks count
+ echo "Verify SOM block count"
+ $TRUNCATE $DIR/$tfile 0
+ $MULTIOP $DIR/$tfile oO_TRUNC:O_RDWR:w1048576YSc ||
+ error "failed to write file $tfile"
+ check_lsom_data $DIR/$tfile
+}
+run_test 806 "Verify Lazy Size on MDS"
+
#
# tests that do cleanup/setup should be run at the end
#
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/param.h>
+#include <attr/xattr.h>
#include <fcntl.h>
#include <dirent.h>
#include <time.h>
#include <ctype.h>
#include <zlib.h>
#include <libgen.h>
+#include <asm/byteorder.h>
#include "lfs_project.h"
#include <libcfs/util/string.h>
static int lfs_swap_layouts(int argc, char **argv);
static int lfs_mv(int argc, char **argv);
static int lfs_ladvise(int argc, char **argv);
+static int lfs_getsom(int argc, char **argv);
static int lfs_mirror(int argc, char **argv);
static int lfs_mirror_list_commands(int argc, char **argv);
static int lfs_list_commands(int argc, char **argv);
"lfs mirror split - split a mirror from an existing mirrored file\n"
"lfs mirror resync - resynchronize out-of-sync mirrored file(s)\n"
"lfs mirror verify - verify mirrored file(s)\n"},
+ {"getsom", lfs_getsom, 0, "To list the SOM info for a given file.\n"
+ "usage: getsom [-s] [-b] [-f] <path>\n"
+ "\t-s: Only show the size value of the SOM data for a given file\n"
+ "\t-b: Only show the blocks value of the SOM data for a given file\n"
+ "\t-f: Only show the flags value of the SOM data for a given file\n"},
{"help", Parser_help, 0, "help"},
{"exit", Parser_quit, 0, "quit"},
{"quit", Parser_quit, 0, "quit"},
return rc < 0 ? -rc : rc;
}
+static void lustre_som_swab(struct lustre_som_attrs *attrs)
+{
+#if __BYTE_ORDER == __BIG_ENDIAN
+ __swab16s(&attrs->lsa_valid);
+ __swab64s(&attrs->lsa_size);
+ __swab64s(&attrs->lsa_blocks);
+#endif
+}
+
+enum lfs_som_type {
+ LFS_SOM_SIZE = 0x1,
+ LFS_SOM_BLOCKS = 0x2,
+ LFS_SOM_FLAGS = 0x4,
+ LFS_SOM_ATTR_ALL = LFS_SOM_SIZE | LFS_SOM_BLOCKS |
+ LFS_SOM_FLAGS,
+};
+
+static int lfs_getsom(int argc, char **argv)
+{
+ const char *path;
+ struct lustre_som_attrs *attrs;
+ char buf[sizeof(*attrs) + 64];
+ enum lfs_som_type type = LFS_SOM_ATTR_ALL;
+ int rc = 0, c;
+
+ while ((c = getopt(argc, argv, "sbf")) != -1) {
+ switch (c) {
+ case 's':
+ type = LFS_SOM_SIZE;
+ break;
+ case 'b':
+ type = LFS_SOM_BLOCKS;
+ break;
+ case 'f':
+ type = LFS_SOM_FLAGS;
+ break;
+ default:
+ fprintf(stderr, "%s: invalid option '%c'\n",
+ progname, optopt);
+ return CMD_HELP;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 1) {
+ fprintf(stderr, "%s: %s\n",
+ progname, argc == 0 ? "miss file target" :
+ "input more than 2 files");
+ return CMD_HELP;
+ }
+
+ path = argv[0];
+ attrs = (void *)buf;
+ rc = lgetxattr(path, "trusted.som", attrs, sizeof(buf));
+ if (rc < 0) {
+ fprintf(stderr, "%s failed to get som xattr: %s\n", argv[0],
+ strerror(-rc));
+ return rc;
+ }
+
+ lustre_som_swab(attrs);
+
+ switch (type) {
+ case LFS_SOM_ATTR_ALL:
+ printf("file: %s size: %llu blocks: %llu flags: %x\n",
+ path, attrs->lsa_size, attrs->lsa_blocks,
+ attrs->lsa_valid);
+ break;
+ case LFS_SOM_SIZE:
+ printf("%llu\n", attrs->lsa_size);
+ break;
+ case LFS_SOM_BLOCKS:
+ printf("%llu\n", attrs->lsa_blocks);
+ break;
+ case LFS_SOM_FLAGS:
+ printf("%x\n", attrs->lsa_valid);
+ break;
+ default:
+ fprintf(stderr, "%s: unknown option\n", progname);
+ return CMD_HELP;
+ }
+
+ return rc;
+}
+
/**
* lfs_mirror_list_commands() - List lfs mirror commands.
* @argc: The count of command line arguments.
CHECK_MEMBER(lustre_ost_attrs, loa_comp_end);
}
+
+static void
+check_som_attrs(void)
+{
+ BLANK_LINE();
+ CHECK_STRUCT(lustre_som_attrs);
+ CHECK_MEMBER(lustre_som_attrs, lsa_valid);
+ CHECK_MEMBER(lustre_som_attrs, lsa_reserved;
+ CHECK_MEMBER(lustre_som_attrs, lsa_size);
+ CHECK_MEMBER(lustre_som_attrs, lsa_blocks);
+}
+
static void
check_hsm_attrs(void)
{
CHECK_VALUE_64X(MDS_ATTR_FROM_OPEN);
CHECK_VALUE_64X(MDS_ATTR_BLOCKS);
CHECK_VALUE_64X(MDS_ATTR_PROJID);
+ CHECK_VALUE_64X(MDS_ATTR_LSIZE);
+ CHECK_VALUE_64X(MDS_ATTR_LBLOCKS);
CHECK_VALUE(FLD_QUERY);
CHECK_VALUE(FLD_READ);
CHECK_VALUE(OUT_NOOP);
CHECK_VALUE(OUT_XATTR_LIST);
+ check_som_attrs();
check_hsm_attrs();
check_ost_id();
check_lu_dirent();
(long long)MDS_ATTR_BLOCKS);
LASSERTF(MDS_ATTR_PROJID == 0x0000000000010000ULL, "found 0x%.16llxULL\n",
(long long)MDS_ATTR_PROJID);
+ LASSERTF(MDS_ATTR_LSIZE == 0x0000000000020000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_LSIZE);
+ LASSERTF(MDS_ATTR_LBLOCKS == 0x0000000000040000ULL, "found 0x%.16llxULL\n",
+ (long long)MDS_ATTR_BLOCKS);
LASSERTF(FLD_QUERY == 900, "found %lld\n",
(long long)FLD_QUERY);
LASSERTF(FLD_READ == 901, "found %lld\n",
LASSERTF(OUT_XATTR_LIST == 17, "found %lld\n",
(long long)OUT_XATTR_LIST);
+ /* Checks for struct lustre_som_attrs */
+ LASSERTF((int)sizeof(struct lustre_som_attrs) == 24, "found %lld\n",
+ (long long)(int)sizeof(struct lustre_som_attrs));
+ LASSERTF((int)offsetof(struct lustre_som_attrs, lsa_valid) == 0, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_som_attrs, lsa_valid));
+ LASSERTF((int)sizeof(((struct lustre_som_attrs *)0)->lsa_valid) == 2, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_som_attrs *)0)->lsa_valid));
+ LASSERTF((int)offsetof(struct lustre_som_attrs, lsa_reserved) == 2, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_som_attrs, lsa_reserved));
+ LASSERTF((int)sizeof(((struct lustre_som_attrs *)0)->lsa_reserved) == 6, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_som_attrs *)0)->lsa_reserved));
+ LASSERTF((int)offsetof(struct lustre_som_attrs, lsa_size) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_som_attrs, lsa_size));
+ LASSERTF((int)sizeof(((struct lustre_som_attrs *)0)->lsa_size) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_som_attrs *)0)->lsa_size));
+ LASSERTF((int)offsetof(struct lustre_som_attrs, lsa_blocks) == 16, "found %lld\n",
+ (long long)(int)offsetof(struct lustre_som_attrs, lsa_blocks));
+ LASSERTF((int)sizeof(((struct lustre_som_attrs *)0)->lsa_blocks) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lustre_som_attrs *)0)->lsa_blocks));
+
/* Checks for struct hsm_attrs */
LASSERTF((int)sizeof(struct hsm_attrs) == 24, "found %lld\n",
(long long)(int)sizeof(struct hsm_attrs));