#define DEBUG_SUBSYSTEM S_OSD
#include <linux/module.h>
+#include <linux/user_namespace.h>
+#ifdef HAVE_UIDGID_HEADER
+# include <linux/uidgid.h>
+#endif
/* LUSTRE_VERSION_CODE */
#include <lustre_ver.h>
return inode;
}
-static struct inode *osd_iget_check(struct osd_thread_info *info,
- struct osd_device *dev,
- const struct lu_fid *fid,
- struct osd_inode_id *id,
- bool in_oi)
-{
- struct inode *inode;
- int rc = 0;
- ENTRY;
-
- inode = ldiskfs_iget(osd_sb(dev), id->oii_ino);
- if (IS_ERR(inode)) {
- rc = PTR_ERR(inode);
- if (!in_oi || (rc != -ENOENT && rc != -ESTALE)) {
- CDEBUG(D_INODE, "no inode: ino = %u, rc = %d\n",
- id->oii_ino, rc);
-
- GOTO(put, rc);
- }
-
- goto check_oi;
- }
-
- if (is_bad_inode(inode)) {
- rc = -ENOENT;
- if (!in_oi) {
- CDEBUG(D_INODE, "bad inode: ino = %u\n", id->oii_ino);
-
- GOTO(put, rc);
- }
-
- goto check_oi;
- }
-
- if (id->oii_gen != OSD_OII_NOGEN &&
- inode->i_generation != id->oii_gen) {
- rc = -ESTALE;
- if (!in_oi) {
- CDEBUG(D_INODE, "unmatched inode: ino = %u, "
- "oii_gen = %u, i_generation = %u\n",
- id->oii_ino, id->oii_gen, inode->i_generation);
-
- GOTO(put, rc);
- }
-
- goto check_oi;
- }
-
- if (inode->i_nlink == 0) {
- rc = -ENOENT;
- if (!in_oi) {
- CDEBUG(D_INODE, "stale inode: ino = %u\n", id->oii_ino);
-
- GOTO(put, rc);
- }
-
- goto check_oi;
- }
-
-check_oi:
- if (rc != 0) {
- struct osd_inode_id saved_id = *id;
-
- LASSERTF(rc == -ESTALE || rc == -ENOENT, "rc = %d\n", rc);
-
- rc = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD);
- /* XXX: There are some possible cases:
- * 1. rc = 0.
- * Backup/restore caused the OI invalid.
- * 2. rc = 0.
- * Someone unlinked the object but NOT removed
- * the OI mapping, such as mount target device
- * as ldiskfs, and modify something directly.
- * 3. rc = -ENOENT.
- * Someone just removed the object between the
- * former oi_lookup and the iget. It is normal.
- * 4. Other failure cases.
- *
- * Generally, when the device is mounted, it will
- * auto check whether the system is restored from
- * file-level backup or not. We trust such detect
- * to distinguish the 1st case from the 2nd case. */
- if (rc == 0) {
- if (!IS_ERR(inode) && inode->i_generation != 0 &&
- inode->i_generation == id->oii_gen) {
- rc = -ENOENT;
- } else {
- __u32 level = D_LFSCK;
-
- rc = -EREMCHG;
- if (!thread_is_running(&dev->od_scrub.os_thread))
- level |= D_CONSOLE;
-
- CDEBUG(level, "%s: the OI mapping for the FID "
- DFID" become inconsistent, the given ID "
- "%u/%u, the ID in OI mapping %u/%u\n",
- osd_name(dev), PFID(fid),
- saved_id.oii_ino, saved_id.oii_gen,
- id->oii_ino, id->oii_ino);
- }
- }
- } else {
- if (id->oii_gen == OSD_OII_NOGEN)
- osd_id_gen(id, inode->i_ino, inode->i_generation);
-
- /* Do not update file c/mtime in ldiskfs.
- * NB: we don't have any lock to protect this because we don't
- * have reference on osd_object now, but contention with
- * another lookup + attr_set can't happen in the tiny window
- * between if (...) and set S_NOCMTIME. */
- if (!(inode->i_flags & S_NOCMTIME))
- inode->i_flags |= S_NOCMTIME;
- }
-
- GOTO(put, rc);
-
-put:
- if (rc != 0) {
- if (!IS_ERR(inode))
- iput(inode);
-
- inode = ERR_PTR(rc);
- }
-
- return inode;
-}
-
/**
* \retval +v: new filter_fid, does not contain self-fid
* \retval 0: filter_fid_old, contains self-fid
}
if (fid != NULL && unlikely(!lu_fid_eq(rfid, fid))) {
- __u32 level = D_LFSCK;
-
if (fid_is_idif(rfid) && fid_is_idif(fid)) {
struct ost_id *oi = &info->oti_ostid;
struct lu_fid *fid1 = &info->oti_fid3;
}
}
-
rc = -EREMCHG;
- if (!thread_is_running(&osd->od_scrub.os_thread))
- level |= D_CONSOLE;
-
- CDEBUG(level, "%s: FID "DFID" != self_fid "DFID"\n",
- osd_name(osd), PFID(rfid), PFID(fid));
}
RETURN(rc);
int result;
int saved = 0;
bool in_oi = false;
+ bool in_cache = false;
bool triggered = false;
ENTRY;
if (lu_fid_eq(fid, &oic->oic_fid) &&
likely(oic->oic_dev == dev)) {
id = &oic->oic_lid;
+ in_cache = true;
goto iget;
}
/* Search order: 3. OI files. */
result = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD);
if (result == -ENOENT) {
- if (!fid_is_norm(fid) ||
- fid_is_on_ost(info, dev, fid, OI_CHECK_FLD) ||
+ if (!(fid_is_norm(fid) || fid_is_igif(fid)) ||
!ldiskfs_test_bit(osd_oi_fid2idx(dev,fid),
sf->sf_oi_bitmap))
GOTO(out, result = 0);
in_oi = true;
iget:
- inode = osd_iget_check(info, dev, fid, id, in_oi);
+ inode = osd_iget(info, dev, id);
if (IS_ERR(inode)) {
result = PTR_ERR(inode);
- if (result == -ENOENT || result == -ESTALE) {
- if (!in_oi)
- fid_zero(&oic->oic_fid);
+ if (result != -ENOENT && result != -ESTALE)
+ GOTO(out, result);
- GOTO(out, result = -ENOENT);
- } else if (result == -EREMCHG) {
+ if (in_cache)
+ fid_zero(&oic->oic_fid);
-trigger:
- if (!in_oi)
- fid_zero(&oic->oic_fid);
+ result = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD);
+ if (result != 0)
+ GOTO(out, result = (result == -ENOENT ? 0 : result));
- if (unlikely(triggered))
- GOTO(out, result = saved);
+ /* The OI mapping is there, but the inode is NOT there.
+ * Two possible cases for that:
+ *
+ * 1) Backup/restore caused the OI invalid.
+ * 2) Someone unlinked the object but NOT removed
+ * the OI mapping, such as mount target device
+ * as ldiskfs, and modify something directly.
+ *
+ * Generally, when the device is mounted, it will
+ * auto check whether the system is restored from
+ * file-level backup or not. We trust such detect
+ * to distinguish the 1st case from the 2nd case. */
+ if (!(scrub->os_file.sf_flags & SF_INCONSISTENT))
+ GOTO(out, result = 0);
- triggered = true;
- if (thread_is_running(&scrub->os_thread)) {
+trigger:
+ if (unlikely(triggered))
+ GOTO(out, result = saved);
+
+ triggered = true;
+ if (thread_is_running(&scrub->os_thread)) {
+ result = -EINPROGRESS;
+ } else if (!dev->od_noscrub) {
+ /* Since we do not know the right OI mapping, we have
+ * to trigger OI scrub to scan the whole device. */
+ result = osd_scrub_start(dev, SS_AUTO_FULL |
+ SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT);
+ CDEBUG(D_LFSCK | D_CONSOLE, "%.16s: trigger OI "
+ "scrub by RPC for "DFID", rc = %d [1]\n",
+ osd_name(dev), PFID(fid), result);
+ if (result == 0 || result == -EALREADY)
result = -EINPROGRESS;
- } else if (!dev->od_noscrub) {
- /* Since we do not know the right OI mapping,
- * we have to trigger OI scrub to scan the
- * whole device. */
- result = osd_scrub_start(dev, SS_AUTO_FULL |
- SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT);
- CDEBUG(D_LFSCK | D_CONSOLE, "%.16s: trigger OI "
- "scrub by RPC for "DFID", rc = %d [1]\n",
- osd_name(dev), PFID(fid),result);
- if (result == 0 || result == -EALREADY)
- result = -EINPROGRESS;
- else
- result = -EREMCHG;
- }
-
- /* We still have chance to get the valid inode: for the
- * object which is referenced by remote name entry, the
- * object on the local MDT will be linked under the dir
- * of "/REMOTE_PARENT_DIR" with its FID string as name.
- *
- * We do not know whether the object for the given FID
- * is referenced by some remote name entry or not, and
- * especially for DNE II, a multiple-linked object may
- * have many name entries reside on many MDTs.
- *
- * To simplify the operation, OSD will not distinguish
- * more, just lookup "/REMOTE_PARENT_DIR". Usually, it
- * only happened for the RPC from other MDT during the
- * OI scrub, or for the client side RPC with FID only,
- * such as FID to path, or from old connected client. */
- saved = result;
- result = osd_lookup_in_remote_parent(info, dev,
- fid, id);
- if (result == 0) {
- in_oi = false;
- goto iget;
- }
+ else
+ result = -EREMCHG;
+ }
- result = saved;
+ /* We still have chance to get the valid inode: for the
+ * object which is referenced by remote name entry, the
+ * object on the local MDT will be linked under the dir
+ * of "/REMOTE_PARENT_DIR" with its FID string as name.
+ *
+ * We do not know whether the object for the given FID
+ * is referenced by some remote name entry or not, and
+ * especially for DNE II, a multiple-linked object may
+ * have many name entries reside on many MDTs.
+ *
+ * To simplify the operation, OSD will not distinguish
+ * more, just lookup "/REMOTE_PARENT_DIR". Usually, it
+ * only happened for the RPC from other MDT during the
+ * OI scrub, or for the client side RPC with FID only,
+ * such as FID to path, or from old connected client. */
+ saved = result;
+ result = osd_lookup_in_remote_parent(info, dev, fid, id);
+ if (result == 0) {
+ in_oi = false;
+ goto iget;
}
- GOTO(out, result);
- }
+ GOTO(out, result = saved);
+ }
- obj->oo_inode = inode;
- LASSERT(obj->oo_inode->i_sb == osd_sb(dev));
+ obj->oo_inode = inode;
+ LASSERT(obj->oo_inode->i_sb == osd_sb(dev));
result = osd_check_lma(env, obj);
if (result != 0) {
iput(inode);
obj->oo_inode = NULL;
- if (result == -EREMCHG) {
- if (!in_oi) {
- result = osd_oi_lookup(info, dev, fid, id,
- OI_CHECK_FLD);
- if (result != 0) {
- fid_zero(&oic->oic_fid);
- GOTO(out, result);
- }
- }
+ if (result != -EREMCHG)
+ GOTO(out, result);
+
+ if (in_cache)
+ fid_zero(&oic->oic_fid);
+
+ result = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD);
+ if (result == 0)
+ goto trigger;
+
+ if (result != -ENOENT)
+ GOTO(out, result);
+
+ if (!in_oi && (fid_is_norm(fid) || fid_is_igif(fid)) &&
+ ldiskfs_test_bit(osd_oi_fid2idx(dev, fid),
+ sf->sf_oi_bitmap))
goto trigger;
- }
- GOTO(out, result);
+ GOTO(out, result = 0);
}
obj->oo_compat_dot_created = 1;
dt_txn_hook_commit(th);
/* call per-transaction callbacks if any */
- list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage) {
+ list_for_each_entry_safe(dcb, tmp, &oh->ot_commit_dcb_list,
+ dcb_linkage) {
LASSERTF(dcb->dcb_magic == TRANS_COMMIT_CB_MAGIC,
"commit callback entry: magic=%x name='%s'\n",
dcb->dcb_magic, dcb->dcb_name);
th->th_result = 0;
th->th_tags = LCT_TX_HANDLE;
oh->ot_credits = 0;
- INIT_LIST_HEAD(&oh->ot_dcb_list);
+ INIT_LIST_HEAD(&oh->ot_commit_dcb_list);
+ INIT_LIST_HEAD(&oh->ot_stop_dcb_list);
osd_th_alloced(oh);
memset(oti->oti_declare_ops, 0,
- sizeof(oti->oti_declare_ops));
- memset(oti->oti_declare_ops_rb, 0,
- sizeof(oti->oti_declare_ops_rb));
+ sizeof(oti->oti_declare_ops));
memset(oti->oti_declare_ops_cred, 0,
- sizeof(oti->oti_declare_ops_cred));
- oti->oti_rollback = false;
+ sizeof(oti->oti_declare_ops_cred));
+ memset(oti->oti_declare_ops_used, 0,
+ sizeof(oti->oti_declare_ops_used));
}
RETURN(th);
}
+void osd_trans_dump_creds(const struct lu_env *env, struct thandle *th)
+{
+ struct osd_thread_info *oti = osd_oti_get(env);
+ struct osd_thandle *oh;
+
+ oh = container_of0(th, struct osd_thandle, ot_super);
+ LASSERT(oh != NULL);
+
+ CWARN(" create: %u/%u/%u, destroy: %u/%u/%u\n",
+ oti->oti_declare_ops[OSD_OT_CREATE],
+ oti->oti_declare_ops_cred[OSD_OT_CREATE],
+ oti->oti_declare_ops_used[OSD_OT_CREATE],
+ oti->oti_declare_ops[OSD_OT_DESTROY],
+ oti->oti_declare_ops_cred[OSD_OT_DESTROY],
+ oti->oti_declare_ops_used[OSD_OT_DESTROY]);
+ CWARN(" attr_set: %u/%u/%u, xattr_set: %u/%u/%u\n",
+ oti->oti_declare_ops[OSD_OT_ATTR_SET],
+ oti->oti_declare_ops_cred[OSD_OT_ATTR_SET],
+ oti->oti_declare_ops_used[OSD_OT_ATTR_SET],
+ oti->oti_declare_ops[OSD_OT_XATTR_SET],
+ oti->oti_declare_ops_cred[OSD_OT_XATTR_SET],
+ oti->oti_declare_ops_used[OSD_OT_XATTR_SET]);
+ CWARN(" write: %u/%u/%u, punch: %u/%u/%u, quota %u/%u/%u\n",
+ oti->oti_declare_ops[OSD_OT_WRITE],
+ oti->oti_declare_ops_cred[OSD_OT_WRITE],
+ oti->oti_declare_ops_used[OSD_OT_WRITE],
+ oti->oti_declare_ops[OSD_OT_PUNCH],
+ oti->oti_declare_ops_cred[OSD_OT_PUNCH],
+ oti->oti_declare_ops_used[OSD_OT_PUNCH],
+ oti->oti_declare_ops[OSD_OT_QUOTA],
+ oti->oti_declare_ops_cred[OSD_OT_QUOTA],
+ oti->oti_declare_ops_used[OSD_OT_QUOTA]);
+ CWARN(" insert: %u/%u/%u, delete: %u/%u/%u\n",
+ oti->oti_declare_ops[OSD_OT_INSERT],
+ oti->oti_declare_ops_cred[OSD_OT_INSERT],
+ oti->oti_declare_ops_used[OSD_OT_INSERT],
+ oti->oti_declare_ops[OSD_OT_DELETE],
+ oti->oti_declare_ops_cred[OSD_OT_DELETE],
+ oti->oti_declare_ops_used[OSD_OT_DELETE]);
+ CWARN(" ref_add: %u/%u/%u, ref_del: %u/%u/%u\n",
+ oti->oti_declare_ops[OSD_OT_REF_ADD],
+ oti->oti_declare_ops_cred[OSD_OT_REF_ADD],
+ oti->oti_declare_ops_used[OSD_OT_REF_ADD],
+ oti->oti_declare_ops[OSD_OT_REF_DEL],
+ oti->oti_declare_ops_cred[OSD_OT_REF_DEL],
+ oti->oti_declare_ops_used[OSD_OT_REF_DEL]);
+}
+
/*
* Concurrency: shouldn't matter.
*/
LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name,
oh->ot_credits,
osd_journal(dev)->j_max_transaction_buffers);
- CWARN(" create: %u/%u, destroy: %u/%u\n",
- oti->oti_declare_ops[OSD_OT_CREATE],
- oti->oti_declare_ops_cred[OSD_OT_CREATE],
- oti->oti_declare_ops[OSD_OT_DESTROY],
- oti->oti_declare_ops_cred[OSD_OT_DESTROY]);
- CWARN(" attr_set: %u/%u, xattr_set: %u/%u\n",
- oti->oti_declare_ops[OSD_OT_ATTR_SET],
- oti->oti_declare_ops_cred[OSD_OT_ATTR_SET],
- oti->oti_declare_ops[OSD_OT_XATTR_SET],
- oti->oti_declare_ops_cred[OSD_OT_XATTR_SET]);
- CWARN(" write: %u/%u, punch: %u/%u, quota %u/%u\n",
- oti->oti_declare_ops[OSD_OT_WRITE],
- oti->oti_declare_ops_cred[OSD_OT_WRITE],
- oti->oti_declare_ops[OSD_OT_PUNCH],
- oti->oti_declare_ops_cred[OSD_OT_PUNCH],
- oti->oti_declare_ops[OSD_OT_QUOTA],
- oti->oti_declare_ops_cred[OSD_OT_QUOTA]);
- CWARN(" insert: %u/%u, delete: %u/%u\n",
- oti->oti_declare_ops[OSD_OT_INSERT],
- oti->oti_declare_ops_cred[OSD_OT_INSERT],
- oti->oti_declare_ops[OSD_OT_DELETE],
- oti->oti_declare_ops_cred[OSD_OT_DELETE]);
- CWARN(" ref_add: %u/%u, ref_del: %u/%u\n",
- oti->oti_declare_ops[OSD_OT_REF_ADD],
- oti->oti_declare_ops_cred[OSD_OT_REF_ADD],
- oti->oti_declare_ops[OSD_OT_REF_DEL],
- oti->oti_declare_ops_cred[OSD_OT_REF_DEL]);
+
+ osd_trans_dump_creds(env, th);
if (last_credits != oh->ot_credits &&
time_after(jiffies, last_printed +
RETURN(ss->ss_node_id == range->lsr_index);
}
+static void osd_trans_stop_cb(struct osd_thandle *oth, int result)
+{
+ struct dt_txn_commit_cb *dcb;
+ struct dt_txn_commit_cb *tmp;
+
+ /* call per-transaction stop callbacks if any */
+ list_for_each_entry_safe(dcb, tmp, &oth->ot_stop_dcb_list,
+ dcb_linkage) {
+ LASSERTF(dcb->dcb_magic == TRANS_COMMIT_CB_MAGIC,
+ "commit callback entry: magic=%x name='%s'\n",
+ dcb->dcb_magic, dcb->dcb_name);
+ list_del_init(&dcb->dcb_linkage);
+ dcb->dcb_func(NULL, &oth->ot_super, dcb, result);
+ }
+}
+
/*
* Concurrency: shouldn't matter.
*/
qtrans = oh->ot_quota_trans;
oh->ot_quota_trans = NULL;
- if (oh->ot_handle != NULL) {
+ if (oh->ot_handle != NULL) {
handle_t *hdl = oh->ot_handle;
/*
LASSERT(oti->oti_txns == 1);
oti->oti_txns--;
+
rc = dt_txn_hook_stop(env, th);
if (rc != 0)
CERROR("%s: failed in transaction hook: rc = %d\n",
osd_name(osd), rc);
+ osd_trans_stop_cb(oh, rc);
/* hook functions might modify th_sync */
hdl->h_sync = th->th_sync;
CERROR("%s: failed to stop transaction: rc = %d\n",
osd_name(osd), rc);
} else {
+ osd_trans_stop_cb(oh, th->th_result);
OBD_FREE_PTR(oh);
}
LASSERT(dcb->dcb_magic == TRANS_COMMIT_CB_MAGIC);
LASSERT(&dcb->dcb_func != NULL);
- list_add(&dcb->dcb_linkage, &oh->ot_dcb_list);
+ if (dcb->dcb_flags & DCB_TRANS_STOP)
+ list_add(&dcb->dcb_linkage, &oh->ot_stop_dcb_list);
+ else
+ list_add(&dcb->dcb_linkage, &oh->ot_commit_dcb_list);
return 0;
}
result = sb->s_op->statfs(sb->s_root, ksfs);
if (likely(result == 0)) { /* N.B. statfs can't really fail */
statfs_pack(sfs, ksfs);
- if (sb->s_flags & MS_RDONLY)
+ if (unlikely(sb->s_flags & MS_RDONLY))
sfs->os_state = OS_STATE_READONLY;
+ if (LDISKFS_HAS_INCOMPAT_FEATURE(sb,
+ LDISKFS_FEATURE_INCOMPAT_EXTENTS))
+ sfs->os_maxbytes = sb->s_maxbytes;
+ else
+ sfs->os_maxbytes = LDISKFS_SB(sb)->s_bitmap_maxbytes;
}
-
spin_unlock(&osd->od_osfs_lock);
if (unlikely(env == NULL))
param->ddp_max_nlink = LDISKFS_LINK_MAX;
param->ddp_block_shift = sb->s_blocksize_bits;
param->ddp_mount_type = LDD_MT_LDISKFS;
- param->ddp_maxbytes = sb->s_maxbytes;
+ if (LDISKFS_HAS_INCOMPAT_FEATURE(sb, LDISKFS_FEATURE_INCOMPAT_EXTENTS))
+ param->ddp_maxbytes = sb->s_maxbytes;
+ else
+ param->ddp_maxbytes = LDISKFS_SB(sb)->s_bitmap_maxbytes;
/* Overhead estimate should be fairly accurate, so we really take a tiny
* error margin which also avoids fragmenting the filesystem too much */
param->ddp_grant_reserved = 2; /* end up to be 1.9% after conversion */
if (!rc)
ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+
+ osd_trans_exec_check(env, handle, OSD_OT_ATTR_SET);
+
return rc;
}
oth = container_of(th, struct osd_thandle, ot_super);
LASSERT(oth->ot_handle->h_transaction != NULL);
- if (hint && hint->dah_parent)
- parent = hint->dah_parent;
+ if (hint != NULL && hint->dah_parent != NULL &&
+ !dt_object_remote(hint->dah_parent))
+ parent = hint->dah_parent;
inode = ldiskfs_create_inode(oth->ot_handle,
parent ? osd_dt_obj(parent)->oo_inode :
int result;
__u32 umask;
+ osd_trans_exec_op(info->oti_env, th, OSD_OT_CREATE);
+
/* we drop umask so that permissions we pass are not affected */
umask = current->fs->umask;
current->fs->umask = 0;
result = osd_create_type_f(dof->dof_type)(info, obj, attr, hint, dof,
th);
- if (result == 0) {
- osd_attr_init(info, obj, attr, dof);
- osd_object_init0(obj);
- }
-
- if (obj->oo_inode != NULL) {
+ if (likely(obj->oo_inode != NULL)) {
LASSERT(obj->oo_inode->i_state & I_NEW);
+ /* Unlock the inode before attr initialization to avoid
+ * unnecessary dqget operations. LU-6378 */
unlock_new_inode(obj->oo_inode);
}
+ if (likely(result == 0)) {
+ osd_attr_init(info, obj, attr, dof);
+ osd_object_init0(obj);
+ }
+
/* restore previous umask value */
current->fs->umask = umask;
+ osd_trans_exec_check(info->oti_env, th, OSD_OT_CREATE);
+
return result;
}
struct osd_inode_id *id = &info->oti_id;
struct osd_device *osd = osd_obj2dev(obj);
struct osd_thandle *oh;
+ int rc;
LASSERT(obj->oo_inode != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle);
+ osd_trans_exec_op(env, th, OSD_OT_INSERT);
osd_id_gen(id, obj->oo_inode->i_ino, obj->oo_inode->i_generation);
- return osd_oi_insert(info, osd, fid, id, oh->ot_handle, OI_CHECK_FLD);
+ rc = osd_oi_insert(info, osd, fid, id, oh->ot_handle, OI_CHECK_FLD);
+ osd_trans_exec_check(env, th, OSD_OT_INSERT);
+
+ return rc;
}
int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
oh = container_of0(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
+ /* EA object consumes more credits than regular object: osd_mk_index
+ * vs. osd_mkreg: osd_mk_index will create 2 blocks for root_node and
+ * leaf_node, could involves the block, block bitmap, groups, GDT
+ * change for each block, so add 4 * 2 credits in that case. */
osd_trans_declare_op(env, oh, OSD_OT_CREATE,
- osd_dto_credits_noquota[DTO_OBJECT_CREATE]);
+ osd_dto_credits_noquota[DTO_OBJECT_CREATE] +
+ (dof->dof_type == DFT_INDEX) ? 4 * 2 : 0);
/* Reuse idle OI block may cause additional one OI block
* to be changed. */
osd_trans_declare_op(env, oh, OSD_OT_INSERT,
osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
- /* If this is directory, then we expect . and .. to be inserted as
- * well. The one directory block always needs to be created for the
- * directory, so we could use DTO_WRITE_BASE here (GDT, block bitmap,
- * block), there is no danger of needing a tree for the first block.
- */
- if (attr && S_ISDIR(attr->la_mode)) {
- osd_trans_declare_op(env, oh, OSD_OT_INSERT,
- osd_dto_credits_noquota[DTO_WRITE_BASE]);
- osd_trans_declare_op(env, oh, OSD_OT_INSERT, 0);
- }
-
if (!attr)
RETURN(0);
* 'tune2fs -O quota' will take care of creating them */
RETURN(-EPERM);
- osd_trans_exec_op(env, th, OSD_OT_CREATE);
- osd_trans_declare_rb(env, th, OSD_OT_REF_ADD);
-
- result = __osd_object_create(info, obj, attr, hint, dof, th);
- if (result == 0)
- result = __osd_oi_insert(env, obj, fid, th);
-
+ result = __osd_object_create(info, obj, attr, hint, dof, th);
+ if (result == 0) {
+ result = __osd_oi_insert(env, obj, fid, th);
+ if (obj->oo_dt.do_body_ops == &osd_body_ops_new)
+ obj->oo_dt.do_body_ops = &osd_body_ops;
+ }
LASSERT(ergo(result == 0,
- dt_object_exists(dt) && !dt_object_remote(dt)));
+ dt_object_exists(dt) && !dt_object_remote(dt)));
- LASSERT(osd_invariant(obj));
- RETURN(result);
+ LASSERT(osd_invariant(obj));
+ RETURN(result);
}
/**
result = osd_oi_delete(osd_oti_get(env), osd, fid, oh->ot_handle,
OI_CHECK_FLD);
- /* XXX: add to ext3 orphan list */
- /* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */
+ osd_trans_exec_check(env, th, OSD_OT_DESTROY);
+ /* XXX: add to ext3 orphan list */
+ /* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */
- /* not needed in the cache anymore */
- set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags);
+ /* not needed in the cache anymore */
+ set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags);
+ obj->oo_destroyed = 1;
- RETURN(0);
+ RETURN(0);
}
/**
if (OBD_FAIL_CHECK(OBD_FAIL_FID_INLMA))
RETURN(0);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_OST_EA_FID_SET))
+ rc = -ENOMEM;
+
lustre_lma_init(lma, fid, compat, incompat);
lustre_lma_swab(lma);
dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
dot_ldp->edp_magic = 0;
+
return ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir,
- dir, dot_ldp, dot_dot_ldp);
+ dir, dot_ldp, dot_dot_ldp);
}
/**
* 'tune2fs -O quota' will take care of creating them */
RETURN(-EPERM);
- osd_trans_exec_op(env, th, OSD_OT_CREATE);
- osd_trans_declare_rb(env, th, OSD_OT_REF_ADD);
-
result = __osd_object_create(info, obj, attr, hint, dof, th);
if (result == 0) {
if (fid_is_idif(fid) &&
fid, OI_CHECK_FLD) ?
LMAC_FID_ON_OST : 0, 0);
}
+ if (obj->oo_dt.do_body_ops == &osd_body_ops_new)
+ obj->oo_dt.do_body_ops = &osd_body_ops;
}
if (result == 0)
ll_dirty_inode(inode, I_DIRTY_DATASYNC);
LINVRNT(osd_invariant(obj));
+ osd_trans_exec_check(env, th, OSD_OT_REF_ADD);
+
return rc;
}
ll_dirty_inode(inode, I_DIRTY_DATASYNC);
LINVRNT(osd_invariant(obj));
+ osd_trans_exec_check(env, th, OSD_OT_REF_DEL);
+
return 0;
}
oh = container_of0(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
- /* optimistic optimization: LMA is set first and usually fit inode */
if (strcmp(name, XATTR_NAME_LMA) == 0) {
- if (dt_object_exists(dt))
+ /* For non-upgrading case, the LMA is set first and
+ * usually fit inode. But for upgrade case, the LMA
+ * may be in another separated EA block. */
+ if (!dt_object_exists(dt))
credits = 0;
- else
+ else if (fl == LU_XATTR_REPLACE)
credits = 1;
+ else
+ goto upgrade;
} else if (strcmp(name, XATTR_NAME_VERSION) == 0) {
credits = 1;
} else {
+upgrade:
credits = osd_dto_credits_noquota[DTO_XATTR_SET];
if (buf && buf->lb_len > sb->s_blocksize) {
credits *= (buf->lb_len + sb->s_blocksize - 1) >>
* xattr set may involve inode quota change, reserve credits for
* dquot_initialize()
*/
- oh->ot_credits += LDISKFS_MAXQUOTAS_INIT_BLOCKS(sb);
+ credits += LDISKFS_MAXQUOTAS_INIT_BLOCKS(sb);
}
osd_trans_declare_op(env, oh, OSD_OT_XATTR_SET, credits);
struct inode *inode = obj->oo_inode;
struct osd_thread_info *info = osd_oti_get(env);
int fs_flags = 0;
+ int rc;
ENTRY;
LASSERT(handle != NULL);
strcmp(name, XATTR_NAME_LINK) == 0)
return -ENOSPC;
- return __osd_xattr_set(info, inode, name, buf->lb_buf, buf->lb_len,
+ rc = __osd_xattr_set(info, inode, name, buf->lb_buf, buf->lb_len,
fs_flags);
+ osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
+
+ return rc;
}
/*
dentry->d_inode = inode;
dentry->d_sb = inode->i_sb;
rc = inode->i_op->removexattr(dentry, name);
+ osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
return rc;
}
dentry->d_inode = inode;
dentry->d_sb = inode->i_sb;
- file->f_dentry = dentry;
+ file->f_path.dentry = dentry;
file->f_mapping = inode->i_mapping;
file->f_op = inode->i_fop;
set_file_inode(file, inode);
}
LINVRNT(osd_invariant(obj));
- if (result == 0 && is_quota_glb_feat(feat) &&
+ if (result == 0 && feat == &dt_quota_glb_features &&
fid_seq(lu_object_fid(&dt->do_lu)) == FID_SEQ_QUOTA_GLB)
- result = osd_quota_migration(env, dt, feat);
+ result = osd_quota_migration(env, dt);
return result;
}
rc = iam_delete(oh->ot_handle, bag, (const struct iam_key *)key, ipd);
osd_ipd_put(env, bag, ipd);
LINVRNT(osd_invariant(obj));
+ osd_trans_exec_check(env, handle, OSD_OT_DELETE);
RETURN(rc);
}
oh = container_of0(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
+ /* due to DNE we may need to remove an agent inode */
osd_trans_declare_op(env, oh, OSD_OT_DELETE,
- osd_dto_credits_noquota[DTO_INDEX_DELETE]);
+ osd_dto_credits_noquota[DTO_INDEX_DELETE] +
+ osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
inode = osd_dt_obj(dt)->oo_inode;
LASSERT(inode);
if (de->file_type & LDISKFS_DIRENT_LUFID) {
rec = (struct osd_fid_pack *) (de->name + de->name_len + 1);
rc = osd_fid_unpack((struct lu_fid *)fid, rec);
+ if (rc == 0 && unlikely(!fid_is_sane((struct lu_fid *)fid)))
+ rc = -EINVAL;
}
return rc;
}
out:
LASSERT(osd_invariant(obj));
+ osd_trans_exec_check(env, handle, OSD_OT_DELETE);
RETURN(rc);
}
iam_rec, ipd);
osd_ipd_put(env, bag, ipd);
LINVRNT(osd_invariant(obj));
+ osd_trans_exec_check(env, th, OSD_OT_INSERT);
RETURN(rc);
}
struct osd_thandle *oh;
struct osd_device *osd = osd_dev(dt->do_lu.lo_dev);
struct lu_fid *fid = (struct lu_fid *)rec;
- int rc;
+ int credits, rc = 0;
ENTRY;
LASSERT(!dt_object_remote(dt));
oh = container_of0(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
- osd_trans_declare_op(env, oh, OSD_OT_INSERT,
- osd_dto_credits_noquota[DTO_INDEX_INSERT]);
+ credits = osd_dto_credits_noquota[DTO_INDEX_INSERT];
+ if (fid != NULL) {
+ rc = osd_remote_fid(env, osd, fid);
+ if (unlikely(rc < 0))
+ RETURN(rc);
+ if (rc > 0) {
+ /* a reference to remote inode is represented by an
+ * agent inode which we have to create */
+ credits += osd_dto_credits_noquota[DTO_OBJECT_CREATE];
+ credits += osd_dto_credits_noquota[DTO_INDEX_INSERT];
+ }
+ rc = 0;
+ }
+
+ osd_trans_declare_op(env, oh, OSD_OT_INSERT, credits);
if (osd_dt_obj(dt)->oo_inode != NULL) {
struct inode *inode = osd_dt_obj(dt)->oo_inode;
osd_dt_obj(dt), true, NULL, false);
}
- if (fid == NULL)
- RETURN(0);
-
- rc = osd_remote_fid(env, osd, fid);
- if (rc <= 0)
- RETURN(rc);
-
- rc = 0;
-
- osd_trans_declare_op(env, oh, OSD_OT_CREATE,
- osd_dto_credits_noquota[DTO_OBJECT_CREATE]);
- osd_trans_declare_op(env, oh, OSD_OT_INSERT,
- osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
- osd_trans_declare_op(env, oh, OSD_OT_INSERT,
- osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1);
-
RETURN(rc);
}
if (child != NULL)
osd_object_put(env, child);
LASSERT(osd_invariant(obj));
+ osd_trans_exec_check(env, th, OSD_OT_INSERT);
RETURN(rc);
}
struct dentry *obj_dentry;
ENTRY;
- if (!dt_object_exists(dt))
+ if (!dt_object_exists(dt) || obj->oo_destroyed)
RETURN(ERR_PTR(-ENOENT));
OBD_SLAB_ALLOC_PTR_GFP(oie, osd_itea_cachep, GFP_NOFS);
file->f_mode = FMODE_64BITHASH;
else
file->f_mode = FMODE_32BITHASH;
- file->f_dentry = obj_dentry;
- file->f_mapping = obj->oo_inode->i_mapping;
+ file->f_path.dentry = obj_dentry;
+ file->f_mapping = obj->oo_inode->i_mapping;
file->f_op = obj->oo_inode->i_fop;
set_file_inode(file, obj->oo_inode);
} else {
attr &= ~LU_DIRENT_ATTRS_MASK;
if (!fid_is_sane(fid)) {
+ bool is_dotdot = false;
if (it->oie_dirent->oied_namelen == 2 &&
it->oie_dirent->oied_name[0] == '.' &&
- it->oie_dirent->oied_name[1] == '.') {
- /* If the parent is on remote MDT, and there
- * is no FID-in-dirent, then we have to get
- * the parent FID from the linkEA. */
- if (ino == osd_remote_parent_ino(dev))
- rc = osd_get_pfid_from_linkea(env, obj,
- fid);
+ it->oie_dirent->oied_name[1] == '.')
+ is_dotdot = true;
+ /* If the parent is on remote MDT, and there
+ * is no FID-in-dirent, then we have to get
+ * the parent FID from the linkEA. */
+ if (ino == osd_remote_parent_ino(dev) && is_dotdot) {
+ rc = osd_get_pfid_from_linkea(env, obj, fid);
} else {
- if (OBD_FAIL_CHECK(OBD_FAIL_FID_LOOKUP))
+ if (is_dotdot == false &&
+ OBD_FAIL_CHECK(OBD_FAIL_FID_LOOKUP))
RETURN(-ENOENT);
rc = osd_ea_fid_get(env, obj, ino, fid, id);
rc = seq_client_init(osd->od_cl_seq, NULL, LUSTRE_SEQ_METADATA,
osd->od_svname, ss->ss_server_seq);
-
if (rc != 0) {
OBD_FREE_PTR(osd->od_cl_seq);
osd->od_cl_seq = NULL;
+ RETURN(rc);
+ }
+
+ if (ss->ss_node_id == 0) {
+ /* If the OSD on the sequence controller(MDT0), then allocate
+ * sequence here, otherwise allocate sequence after connected
+ * to MDT0 (see mdt_register_lwp_callback()). */
+ rc = seq_server_alloc_meta(osd->od_cl_seq->lcs_srv,
+ &osd->od_cl_seq->lcs_space, env);
}
RETURN(rc);
if (opts != NULL && strstr(opts, "force_over_128tb") != NULL)
force_over_128tb = 1;
- OBD_PAGE_ALLOC(__page, GFP_IOFS);
+ __page = alloc_page(GFP_IOFS);
if (__page == NULL)
GOTO(out, rc = -ENOMEM);
page = (unsigned long)page_address(__page);
out:
if (__page)
- OBD_PAGE_FREE(__page);
+ __free_page(__page);
return rc;
}
GOTO(out, rc);
}
- if (server_name_is_ost(o->od_svname))
+ o->od_index = -1; /* -1 means index is invalid */
+ rc = server_name2index(o->od_svname, &o->od_index, NULL);
+ if (rc == LDD_F_SV_TYPE_OST)
o->od_is_ost = 1;
o->od_full_scrub_ratio = OFSR_DEFAULT;
MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
MODULE_DESCRIPTION("Lustre Object Storage Device ("LUSTRE_OSD_LDISKFS_NAME")");
+MODULE_VERSION(LUSTRE_VERSION_STRING);
MODULE_LICENSE("GPL");
-cfs_module(osd, "0.1.0", osd_mod_init, osd_mod_exit);
+module_init(osd_mod_init);
+module_exit(osd_mod_exit);