-static int mdt_ioepoch_opened(struct mdt_object *mo)
-{
- return mo->mot_ioepoch_count;
-}
-
-int mdt_object_is_som_enabled(struct mdt_object *mo)
-{
- return !mo->mot_ioepoch;
-}
-
-/**
- * Re-enable Size-on-MDS.
- * Call under ->mot_ioepoch_mutex.
- */
-static void mdt_object_som_enable(struct mdt_object *mo, __u64 ioepoch)
-{
- if (ioepoch == mo->mot_ioepoch) {
- LASSERT(!mdt_ioepoch_opened(mo));
- mo->mot_ioepoch = 0;
- mo->mot_flags = 0;
- }
-}
-
-/**
- * Open the IOEpoch. It is allowed if @writecount is not negative.
- * The epoch and writecount handling is performed under the mot_ioepoch_mutex.
- */
-int mdt_ioepoch_open(struct mdt_thread_info *info, struct mdt_object *o,
- int created)
-{
- struct mdt_device *mdt = info->mti_mdt;
- int cancel = 0;
- int rc = 0;
- ENTRY;
-
- if (!(mdt_conn_flags(info) & OBD_CONNECT_SOM) ||
- !S_ISREG(lu_object_attr(&o->mot_obj)))
- RETURN(0);
-
- mutex_lock(&o->mot_ioepoch_mutex);
- if (mdt_ioepoch_opened(o)) {
- /* Epoch continues even if there is no writers yet. */
- CDEBUG(D_INODE, "continue epoch "LPU64" for "DFID"\n",
- o->mot_ioepoch, PFID(mdt_object_fid(o)));
- } else {
- /* XXX: ->mdt_ioepoch is not initialized at the mount */
- spin_lock(&mdt->mdt_ioepoch_lock);
- if (mdt->mdt_ioepoch < info->mti_replayepoch)
- mdt->mdt_ioepoch = info->mti_replayepoch;
-
- if (info->mti_replayepoch)
- o->mot_ioepoch = info->mti_replayepoch;
- else if (++mdt->mdt_ioepoch == IOEPOCH_INVAL)
- o->mot_ioepoch = ++mdt->mdt_ioepoch;
- else
- o->mot_ioepoch = mdt->mdt_ioepoch;
-
- spin_unlock(&mdt->mdt_ioepoch_lock);
-
- CDEBUG(D_INODE, "starting epoch "LPU64" for "DFID"\n",
- o->mot_ioepoch, PFID(mdt_object_fid(o)));
- if (created)
- o->mot_flags |= MOF_SOM_CREATED;
- cancel = 1;
- }
- o->mot_ioepoch_count++;
- mutex_unlock(&o->mot_ioepoch_mutex);
-
- /* Cancel Size-on-MDS attributes cached on clients for the open case.
- * In the truncate case, see mdt_reint_setattr(). */
- if (cancel && (info->mti_rr.rr_fid1 != NULL)) {
- struct mdt_lock_handle *lh = &info->mti_lh[MDT_LH_CHILD];
- mdt_lock_reg_init(lh, LCK_EX);
- rc = mdt_object_lock(info, o, lh, MDS_INODELOCK_UPDATE,
- MDT_LOCAL_LOCK);
- if (rc == 0)
- mdt_object_unlock(info, o, lh, 1);
- }
- RETURN(rc);
-}
-
-/**
- * Update SOM on-disk attributes.
- * If enabling, write update inodes and lustre-ea with the proper IOEpoch,
- * mountid and attributes. If disabling, clean SOM xattr.
- * Call under ->mot_ioepoch_mutex.
- */
-static int mdt_som_attr_set(struct mdt_thread_info *info,
- struct mdt_object *obj, __u64 ioepoch, bool enable)
-{
- struct md_object *next = mdt_object_child(obj);
- int rc;
- ENTRY;
-
- CDEBUG(D_INODE, "Size-on-MDS attribute %s for epoch "LPU64
- " on "DFID".\n", enable ? "update" : "disabling",
- ioepoch, PFID(mdt_object_fid(obj)));
-
- if (enable) {
- struct lu_buf *buf = &info->mti_buf;
- struct som_attrs *attrs;
- struct md_attr *ma = &info->mti_attr;
- struct lu_attr *la = &ma->ma_attr;
- struct obd_device *obd = info->mti_mdt->mdt_lut.lut_obd;
-
- attrs = (struct som_attrs *)info->mti_xattr_buf;
- CLASSERT(sizeof(info->mti_xattr_buf) >= sizeof(*attrs));
-
- /* pack SOM attributes */
- memset(attrs, 0, sizeof(*attrs));
- attrs->som_ioepoch = ioepoch;
- attrs->som_mountid = obd->u.obt.obt_mount_count;
- if ((la->la_valid & LA_SIZE) != 0)
- attrs->som_size = la->la_size;
- if ((la->la_valid & LA_BLOCKS) != 0)
- attrs->som_blocks = la->la_blocks;
- lustre_som_swab(attrs);
-
- /* update SOM attributes */
- buf->lb_buf = attrs;
- buf->lb_len = sizeof(*attrs);
- rc = mo_xattr_set(info->mti_env, next, buf, XATTR_NAME_SOM, 0);
- } else {
- /* delete SOM attributes */
- rc = mo_xattr_del(info->mti_env, next, XATTR_NAME_SOM);
- }
-
- RETURN(rc);
-}
-
-/** Perform the eviction specific actions on ioepoch close. */
-static inline int mdt_ioepoch_close_on_eviction(struct mdt_thread_info *info,
- struct mdt_object *o)
-{
- int rc = 0;
-
- mutex_lock(&o->mot_ioepoch_mutex);
- CDEBUG(D_INODE, "Eviction. Closing IOepoch "LPU64" on "DFID". "
- "Count %d\n", o->mot_ioepoch, PFID(mdt_object_fid(o)),
- o->mot_ioepoch_count);
- o->mot_ioepoch_count--;
-
- /* If eviction occured set MOF_SOM_RECOV,
- * if no other epoch holders, disable SOM on disk. */
- o->mot_flags |= MOF_SOM_CHANGE | MOF_SOM_RECOV;
- if (!mdt_ioepoch_opened(o)) {
- rc = mdt_som_attr_set(info, o, o->mot_ioepoch, MDT_SOM_DISABLE);
- mdt_object_som_enable(o, o->mot_ioepoch);
- }
- mutex_unlock(&o->mot_ioepoch_mutex);
- RETURN(rc);
-}
-
-/**
- * Perform the replay specific actions on ioepoch close.
- * Skip SOM attribute update if obtained and just forget about the inode state
- * for the last ioepoch holder. The SOM cache is invalidated on MDS failure.
- */
-static inline int mdt_ioepoch_close_on_replay(struct mdt_thread_info *info,
- struct mdt_object *o)
-{
- int rc = MDT_IOEPOCH_CLOSED;
- ENTRY;
-
- mutex_lock(&o->mot_ioepoch_mutex);
- CDEBUG(D_INODE, "Replay. Closing epoch "LPU64" on "DFID". Count %d\n",
- o->mot_ioepoch, PFID(mdt_object_fid(o)), o->mot_ioepoch_count);
- o->mot_ioepoch_count--;
-
- /* Get an info from the replayed request if client is supposed
- * to send an Attibute Update, reconstruct @rc if so */
- if (info->mti_ioepoch->flags & MF_SOM_AU)
- rc = MDT_IOEPOCH_GETATTR;
-
- if (!mdt_ioepoch_opened(o))
- mdt_object_som_enable(o, info->mti_ioepoch->ioepoch);
- mutex_unlock(&o->mot_ioepoch_mutex);
-
- RETURN(rc);
-}
-
-/**
- * Regular file IOepoch close.
- * Closes the ioepoch, checks the object state, apply obtained attributes and
- * re-enable SOM on the object, if possible. Also checks if the recovery is
- * needed and packs OBD_MD_FLGETATTRLOCK flag into the reply to force the client
- * to obtain SOM attributes under the server-side OST locks.
- *
- * Return value:
- * MDT_IOEPOCH_CLOSED if ioepoch is closed.
- * MDT_IOEPOCH_GETATTR if ioepoch is closed but another SOM update is needed.
- */
-static inline int mdt_ioepoch_close_reg(struct mdt_thread_info *info,
- struct mdt_object *o)
-{
- struct md_attr *tmp_ma;
- struct lu_attr *la;
- int achange, opened;
- int recovery = 0;
- int rc = 0, ret = MDT_IOEPOCH_CLOSED;
- ENTRY;
-
- la = &info->mti_attr.ma_attr;
- achange = (info->mti_ioepoch->flags & MF_SOM_CHANGE);
-
- mutex_lock(&o->mot_ioepoch_mutex);
- o->mot_ioepoch_count--;
-
- tmp_ma = &info->mti_u.som.attr;
- tmp_ma->ma_lmm = info->mti_attr.ma_lmm;
- tmp_ma->ma_lmm_size = info->mti_attr.ma_lmm_size;
- tmp_ma->ma_som = &info->mti_u.som.data;
- tmp_ma->ma_need = MA_INODE | MA_LOV | MA_SOM;
- tmp_ma->ma_valid = 0;
- rc = mdt_attr_get_complex(info, o, tmp_ma);
- if (rc)
- GOTO(error_up, rc);
-
- /* Check the on-disk SOM state. */
- if (o->mot_flags & MOF_SOM_RECOV)
- recovery = 1;
- else if (!(o->mot_flags & MOF_SOM_CREATED) &&
- !(tmp_ma->ma_valid & MA_SOM))
- recovery = 1;
-
- CDEBUG(D_INODE, "Closing epoch "LPU64" on "DFID". Count %d\n",
- o->mot_ioepoch, PFID(mdt_object_fid(o)), o->mot_ioepoch_count);
-
- opened = mdt_ioepoch_opened(o);
- /**
- * If IOEpoch is not opened, check if a Size-on-MDS update is needed.
- * Skip the check for file with no LOV or for unlink files.
- */
- if (!opened && tmp_ma->ma_valid & MA_LOV &&
- !(tmp_ma->ma_valid & MA_INODE && tmp_ma->ma_attr.la_nlink == 0)) {
- if (recovery)
- /* If some previous writer was evicted, re-ask the
- * client for attributes. Even if attributes are
- * provided, we cannot believe in them.
- * Another use case is that there is no SOM cache on
- * disk -- first access with SOM or there was an MDS
- * failure. */
- ret = MDT_IOEPOCH_GETATTR;
- else if (o->mot_flags & MOF_SOM_CHANGE)
- /* Some previous writer changed the attribute.
- * Do not believe to the current Size-on-MDS
- * update, re-ask client. */
- ret = MDT_IOEPOCH_GETATTR;
- else if (!(la->la_valid & LA_SIZE) && achange)
- /* Attributes were changed by the last writer
- * only but no Size-on-MDS update is received.*/
- ret = MDT_IOEPOCH_GETATTR;
- }
-
- if (achange || ret == MDT_IOEPOCH_GETATTR)
- o->mot_flags |= MOF_SOM_CHANGE;
-
- /* If epoch ends and relable SOM attributes are obtained, update them.
- * Create SOM ea for new files even if there is no attributes obtained
- * (0-length file). */
- if (ret == MDT_IOEPOCH_CLOSED && !opened) {
- if (achange || o->mot_flags & MOF_SOM_CREATED) {
- LASSERT(achange || !(la->la_valid & LA_SIZE));
- rc = mdt_som_attr_set(info, o, o->mot_ioepoch,
- MDT_SOM_ENABLE);
- /* Avoid the following setattrs of these attributes,
- * e.g. for atime update. */
- info->mti_attr.ma_valid = 0;
- }
- mdt_object_som_enable(o, o->mot_ioepoch);
- }
-
- mutex_unlock(&o->mot_ioepoch_mutex);
- /* If recovery is needed, tell the client to perform GETATTR under
- * the lock. */
- if (ret == MDT_IOEPOCH_GETATTR && recovery) {
- struct mdt_body *rep;
- rep = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
- rep->mbo_valid |= OBD_MD_FLGETATTRLOCK;
- }
-
- RETURN(rc ? : ret);
-
-error_up:
- mutex_unlock(&o->mot_ioepoch_mutex);
- return rc;
-}
-
-/**
- * Close IOEpoch (opened file or MDS_FMODE_EPOCH state). It happens if:
- * - a client closes the IOEpoch;
- * - a client eviction occured.
- * Return values:
- * MDT_IOEPOCH_OPENED if the client does not close IOEpoch.
- * MDT_IOEPOCH_CLOSED if the client closes IOEpoch.
- * MDT_IOEPOCH_GETATTR if the client closes IOEpoch but another SOM attribute
- * update is needed.
- */
-static int mdt_ioepoch_close(struct mdt_thread_info *info, struct mdt_object *o)