*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* lustre/osd/osd_handler.c
*
#define DEBUG_SUBSYSTEM S_OSD
-#include <linux/kallsyms.h>
+#include <linux/fs_struct.h>
#include <linux/module.h>
#include <linux/user_namespace.h>
-#ifdef HAVE_UIDGID_HEADER
-# include <linux/uidgid.h>
-#endif
+#include <linux/uidgid.h>
/* prerequisite for linux/xattr.h */
#include <linux/types.h>
#include <lustre_linkea.h>
+/* encoding routines */
+#include <lustre_crypto.h>
+
/* Maximum EA size is limited by LNET_MTU for remote objects */
#define OSD_MAX_EA_SIZE 1048364
i = oti->oti_ins_cache_size * 2;
if (i == 0)
i = OSD_INS_CACHE_SIZE;
- OBD_ALLOC(idc, sizeof(*idc) * i);
+ OBD_ALLOC_PTR_ARRAY_LARGE(idc, i);
if (idc == NULL)
return ERR_PTR(-ENOMEM);
if (oti->oti_ins_cache != NULL) {
memcpy(idc, oti->oti_ins_cache,
oti->oti_ins_cache_used * sizeof(*idc));
- OBD_FREE(oti->oti_ins_cache,
- oti->oti_ins_cache_used * sizeof(*idc));
+ OBD_FREE_PTR_ARRAY_LARGE(oti->oti_ins_cache,
+ oti->oti_ins_cache_used);
}
oti->oti_ins_cache = idc;
oti->oti_ins_cache_size = i;
return idc;
}
+static void osd_idc_dump_lma(const struct lu_env *env,
+ struct osd_device *osd,
+ unsigned long ino,
+ bool check_in_oi)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
+ const struct lu_fid *fid;
+ struct osd_inode_id lid;
+ struct inode *inode;
+ int rc;
+
+ inode = osd_ldiskfs_iget(osd_sb(osd), ino);
+ if (IS_ERR(inode)) {
+ CERROR("%s: can't get inode %lu: rc = %d\n",
+ osd->od_svname, ino, (int)PTR_ERR(inode));
+ return;
+ }
+ if (is_bad_inode(inode)) {
+ CERROR("%s: bad inode %lu\n", osd->od_svname, ino);
+ goto put;
+ }
+ rc = osd_get_lma(info, inode, &info->oti_obj_dentry, loa);
+ if (rc) {
+ CERROR("%s: can't get LMA for %lu: rc = %d\n",
+ osd->od_svname, ino, rc);
+ goto put;
+ }
+ fid = &loa->loa_lma.lma_self_fid;
+ LCONSOLE(D_INFO, "%s: "DFID" in inode %lu/%u\n", osd->od_svname,
+ PFID(fid), ino, (unsigned)inode->i_generation);
+ if (!check_in_oi)
+ goto put;
+ rc = osd_oi_lookup(osd_oti_get(env), osd, fid, &lid, 0);
+ if (rc) {
+ CERROR("%s: can't lookup "DFID": rc = %d\n",
+ osd->od_svname, PFID(fid), rc);
+ goto put;
+ }
+ LCONSOLE(D_INFO, "%s: "DFID" maps to %u/%u\n", osd->od_svname,
+ PFID(fid), lid.oii_ino, lid.oii_gen);
+put:
+ iput(inode);
+}
+
+static void osd_idc_dump_debug(const struct lu_env *env,
+ struct osd_device *osd,
+ const struct lu_fid *fid,
+ unsigned long ino1,
+ unsigned long ino2)
+{
+ struct osd_inode_id lid;
+
+ int rc;
+
+ rc = osd_oi_lookup(osd_oti_get(env), osd, fid, &lid, 0);
+ if (!rc) {
+ LCONSOLE(D_INFO, "%s: "DFID" maps to %u/%u\n",
+ osd->od_svname, PFID(fid), lid.oii_ino, lid.oii_gen);
+ osd_idc_dump_lma(env, osd, lid.oii_ino, false);
+ } else {
+ CERROR("%s: can't lookup "DFID": rc = %d\n",
+ osd->od_svname, PFID(fid), rc);
+ }
+ if (ino1)
+ osd_idc_dump_lma(env, osd, ino1, true);
+ if (ino2)
+ osd_idc_dump_lma(env, osd, ino2, true);
+}
+
/*
* lookup mapping for given FID and fill it from the given object.
* the object is lolcal by definition.
if (obj->oo_inode == NULL)
return 0;
if (idc->oic_lid.oii_ino != obj->oo_inode->i_ino) {
- LASSERT(idc->oic_lid.oii_ino == 0);
+ if (idc->oic_lid.oii_ino) {
+ osd_idc_dump_debug(env, osd, fid,
+ idc->oic_lid.oii_ino,
+ obj->oo_inode->i_ino);
+ return -EINVAL;
+ }
idc->oic_lid.oii_ino = obj->oo_inode->i_ino;
idc->oic_lid.oii_gen = obj->oo_inode->i_generation;
}
lustre_loa_swab(loa, true);
/* Check LMA compatibility */
if (lma->lma_incompat & ~LMA_INCOMPAT_SUPP) {
- CWARN("%s: unsupported incompat LMA feature(s) %#x "
- "for fid = "DFID", ino = %lu\n",
+ rc = -EOPNOTSUPP;
+ CWARN("%s: unsupported incompat LMA feature(s) %#x for fid = "DFID", ino = %lu: rc = %d\n",
osd_ino2name(inode),
lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
- PFID(&lma->lma_self_fid), inode->i_ino);
- rc = -EOPNOTSUPP;
+ PFID(&lma->lma_self_fid), inode->i_ino, rc);
}
} else if (rc == 0) {
rc = -ENODATA;
iput(inode);
inode = ERR_PTR(-ESTALE);
} else if (is_bad_inode(inode)) {
- CWARN("%s: bad inode: ino = %u\n",
- osd_dev2name(dev), id->oii_ino);
+ CWARN("%s: bad inode: ino = %u: rc = %d\n",
+ osd_dev2name(dev), id->oii_ino, -ENOENT);
+ iput(inode);
+ inode = ERR_PTR(-ENOENT);
+ } else if (osd_is_ea_inode(inode)) {
+ /*
+ * EA inode is internal ldiskfs object, should don't visible
+ * on osd
+ */
+ CDEBUG(D_INODE, "EA inode: ino = %u\n", id->oii_ino);
iput(inode);
inode = ERR_PTR(-ENOENT);
} else if ((rc = osd_attach_jinode(inode))) {
struct lustre_ost_attrs *loa = &info->oti_ost_attrs;
struct inode *parent = child->d_parent->d_inode;
struct lu_fid *fid = NULL;
+ char fidstr[FID_LEN + 1] = "unknown";
rc2 = osd_get_lma(info, parent, child->d_parent, loa);
if (!rc2) {
fid = &loa->loa_lma.lma_self_fid;
} else if (rc2 == -ENODATA) {
- if (unlikely(parent == inode->i_sb->s_root->d_inode)) {
+ if (unlikely(is_root_inode(parent))) {
fid = &info->oti_fid3;
lu_local_obj_fid(fid, OSD_FS_ROOT_OID);
} else if (!osd->od_is_ost && osd->od_index == 0) {
}
if (fid != NULL)
- /* below message is checked in sanity.sh test_129 */
- CWARN("%s: directory (inode: %lu, FID: "DFID") %s maximum entry limit\n",
- osd_name(osd), parent->i_ino, PFID(fid),
- rc == -ENOSPC ? "has reached" : "is approaching");
- else
- /* below message is checked in sanity.sh test_129 */
- CWARN("%s: directory (inode: %lu, FID: unknown) %s maximum entry limit\n",
- osd_name(osd), parent->i_ino,
- rc == -ENOSPC ? "has reached" : "is approaching");
+ snprintf(fidstr, sizeof(fidstr), DFID, PFID(fid));
+
+ /* below message is checked in sanity.sh test_129 */
+ if (rc == -ENOSPC) {
+ CWARN("%s: directory (inode: %lu, FID: %s) has reached max size limit\n",
+ osd_name(osd), parent->i_ino, fidstr);
+ } else {
+ rc = 0; /* ignore such error now */
+ CWARN("%s: directory (inode: %lu, FID: %s) is approaching max size limit\n",
+ osd_name(osd), parent->i_ino, fidstr);
+ }
- /* ignore such error now */
- if (rc == -ENOBUFS)
- rc = 0;
}
return rc;
if (!rc) {
*fid = loa->loa_lma.lma_self_fid;
} else if (rc == -ENODATA) {
- if (unlikely(inode == osd_sb(dev)->s_root->d_inode))
+ if (unlikely(is_root_inode(inode)))
lu_local_obj_fid(fid, OSD_FS_ROOT_OID);
else
lu_igif_build(fid, inode->i_ino, inode->i_generation);
*/
again:
- inode = osd_ldiskfs_iget(osd_sb(dev), id->oii_ino);
+ inode = osd_iget(info, dev, id);
if (IS_ERR(inode)) {
rc = PTR_ERR(inode);
if (!trusted && (rc == -ENOENT || rc == -ESTALE))
GOTO(put, rc);
}
- if (is_bad_inode(inode)) {
- rc = -ENOENT;
- if (!trusted)
- goto check_oi;
-
- CDEBUG(D_INODE, "bad inode for FID: "DFID", ino = %u\n",
- PFID(fid), id->oii_ino);
- GOTO(put, rc);
- }
-
- if (id->oii_gen != OSD_OII_NOGEN &&
- inode->i_generation != id->oii_gen) {
- rc = -ESTALE;
- if (!trusted)
- goto check_oi;
-
- CDEBUG(D_INODE, "unmatched inode for FID: "DFID", ino = %u, "
- "oii_gen = %u, i_generation = %u\n", PFID(fid),
- id->oii_ino, id->oii_gen, inode->i_generation);
- GOTO(put, rc);
- }
-
- if (inode->i_nlink == 0) {
- rc = -ENOENT;
- if (!trusted)
- goto check_oi;
-
- CDEBUG(D_INODE, "stale inode for FID: "DFID", ino = %u\n",
- PFID(fid), id->oii_ino);
- GOTO(put, rc);
- }
-
- ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_DESTROY);
-
check_oi:
if (rc != 0) {
__u32 saved_ino = id->oii_ino;
rc = -ENOENT;
else
rc = -EREMCHG;
- } else {
- if (id->oii_gen == OSD_OII_NOGEN)
- osd_id_gen(id, inode->i_ino, inode->i_generation);
-
- /*
- * Do not update file c/mtime in ldiskfs.
- * NB: we don't have any lock to protect this because we don't
- * have reference on osd_object now, but contention with
- * another lookup + attr_set can't happen in the tiny window
- * between if (...) and set S_NOCMTIME.
- */
- if (!(inode->i_flags & S_NOCMTIME))
- inode->i_flags |= S_NOCMTIME;
}
GOTO(put, rc);
struct dir_context ctx;
struct osd_thread_info *oclb_info;
struct osd_device *oclb_dev;
- struct osd_idmap_cache *oclb_oic;
int oclb_items;
bool oclb_found;
};
struct lu_fid *fid = &oti->oti_fid3;
struct osd_inode_id *id = &oti->oti_id3;
struct osd_device *dev = oclb->oclb_dev;
- struct osd_idmap_cache *oic = oclb->oclb_oic;
struct inode *inode;
oclb->oclb_items++;
iput(inode);
osd_add_oi_cache(oti, dev, id, fid);
- oic->oic_fid = *fid;
- oic->oic_lid = *id;
- oic->oic_dev = dev;
- osd_oii_insert(dev, oic, true);
+ osd_oii_insert(dev, fid, id, true);
oclb->oclb_found = true;
return 1;
* the correct OI mapping for the slave MDT-object.
*/
static int osd_check_lmv(struct osd_thread_info *oti, struct osd_device *dev,
- struct inode *inode, struct osd_idmap_cache *oic)
+ struct inode *inode)
{
struct lu_buf *buf = &oti->oti_big_buf;
- struct dentry *dentry = &oti->oti_obj_dentry;
- struct file *filp = &oti->oti_file;
- const struct file_operations *fops;
+ struct file *filp;
struct lmv_mds_md_v1 *lmv1;
struct osd_check_lmv_buf oclb = {
.ctx.actor = osd_stripe_dir_filldir,
.oclb_info = oti,
.oclb_dev = dev,
- .oclb_oic = oic,
.oclb_found = false,
};
int rc = 0;
ENTRY;
+ /* We should use the VFS layer to create a real dentry. */
+ oti->oti_obj_dentry.d_inode = inode;
+ oti->oti_obj_dentry.d_sb = inode->i_sb;
+
+ filp = alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
+ inode->i_fop);
+ if (IS_ERR(filp))
+ RETURN(-ENOMEM);
+ filp->f_mode |= FMODE_64BITHASH;
+ filp->f_pos = 0;
+ ihold(inode);
again:
- rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMV, buf->lb_buf,
- buf->lb_len);
+ rc = __osd_xattr_get(inode, filp->f_path.dentry, XATTR_NAME_LMV,
+ buf->lb_buf, buf->lb_len);
if (rc == -ERANGE) {
- rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMV, NULL, 0);
+ rc = __osd_xattr_get(inode, filp->f_path.dentry,
+ XATTR_NAME_LMV, NULL, 0);
if (rc > 0) {
lu_buf_realloc(buf, rc);
if (buf->lb_buf == NULL)
if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
GOTO(out, rc = 0);
- fops = inode->i_fop;
- dentry->d_inode = inode;
- dentry->d_sb = inode->i_sb;
- filp->f_pos = 0;
- filp->f_path.dentry = dentry;
- filp->f_flags |= O_NOATIME;
- filp->f_mode = FMODE_64BITHASH | FMODE_NONOTIFY;
- filp->f_mapping = inode->i_mapping;
- filp->f_op = fops;
- filp->private_data = NULL;
- set_file_inode(filp, inode);
- rc = osd_security_file_alloc(filp);
- if (rc)
- goto out;
-
do {
oclb.oclb_items = 0;
rc = iterate_dir(filp, &oclb.ctx);
} while (rc >= 0 && oclb.oclb_items > 0 && !oclb.oclb_found &&
filp->f_pos != LDISKFS_HTREE_EOF_64BIT);
- fops->release(inode, filp);
-
out:
+ fput(filp);
if (rc < 0)
- CDEBUG(D_LFSCK, "%s: fail to check LMV EA, inode = %lu/%u,"
- DFID": rc = %d\n", osd_ino2name(inode),
- inode->i_ino, inode->i_generation,
- PFID(&oic->oic_fid), rc);
+ CDEBUG(D_LFSCK,
+ "%s: cannot check LMV, ino = %lu/%u: rc = %d\n",
+ osd_ino2name(inode), inode->i_ino, inode->i_generation,
+ rc);
else
rc = 0;
LINVRNT(osd_invariant(obj));
LASSERT(obj->oo_inode == NULL);
- LASSERTF(fid_is_sane(fid) || fid_is_idif(fid), DFID"\n", PFID(fid));
+
+ if (fid_is_sane(fid) == 0) {
+ CERROR("%s: invalid FID "DFID"\n", ldev->ld_obd->obd_name,
+ PFID(fid));
+ dump_stack();
+ RETURN(-EINVAL);
+ }
dev = osd_dev(ldev);
scrub = &dev->od_scrub.os_scrub;
}
id = &info->oti_id;
+ memset(id, 0, sizeof(struct osd_inode_id));
if (!list_empty(&scrub->os_inconsistent_items)) {
/* Search order: 2. OI scrub pending list. */
result = osd_oii_lookup(dev, fid, id);
/* Search order: 3. OI files. */
result = osd_oi_lookup(info, dev, fid, id, OI_CHECK_FLD);
if (result == -ENOENT) {
- if (!(fid_is_norm(fid) || fid_is_igif(fid)) ||
+ if (!fid_is_norm(fid) ||
fid_is_on_ost(info, dev, fid, OI_CHECK_FLD) ||
!ldiskfs_test_bit(osd_oi_fid2idx(dev, fid),
sf->sf_oi_bitmap))
}
}
- if (thread_is_running(&scrub->os_thread)) {
+ if (scrub->os_running) {
if (scrub->os_partial_scan && !scrub->os_in_join)
goto join;
- if (IS_ERR_OR_NULL(inode) || result)
+ if (IS_ERR_OR_NULL(inode) || result) {
+ osd_oii_insert(dev, fid, id, result == -ENOENT);
GOTO(out, result = -EINPROGRESS);
+ }
LASSERT(remote);
LASSERT(obj->oo_inode == inode);
- osd_add_oi_cache(info, dev, id, fid);
- osd_oii_insert(dev, oic, true);
+ osd_oii_insert(dev, fid, id, true);
goto found;
}
- if (dev->od_auto_scrub_interval == AS_NEVER) {
+ if (dev->od_scrub.os_scrub.os_auto_scrub_interval == AS_NEVER) {
if (!remote)
GOTO(out, result = -EREMCHG);
join:
rc1 = osd_scrub_start(env, dev, flags);
- LCONSOLE_WARN("%s: trigger OI scrub by RPC for the " DFID" with flags "
- "0x%x, rc = %d\n", osd_name(dev), PFID(fid), flags, rc1);
+ CDEBUG_LIMIT(D_LFSCK | D_CONSOLE | D_WARNING,
+ "%s: trigger OI scrub by RPC for "DFID"/%u with flags %#x: rc = %d\n",
+ osd_name(dev), PFID(fid), id->oii_ino, flags, rc1);
if (rc1 && rc1 != -EALREADY)
GOTO(out, result = -EREMCHG);
- if (IS_ERR_OR_NULL(inode) || result)
+ if (IS_ERR_OR_NULL(inode) || result) {
+ osd_oii_insert(dev, fid, id, result == -ENOENT);
GOTO(out, result = -EINPROGRESS);
+ }
LASSERT(remote);
LASSERT(obj->oo_inode == inode);
- osd_add_oi_cache(info, dev, id, fid);
- osd_oii_insert(dev, oic, true);
+ osd_oii_insert(dev, fid, id, true);
goto found;
check_lma:
if (saved_ino == id->oii_ino && saved_gen == id->oii_gen) {
result = -EREMCHG;
+ osd_scrub_refresh_mapping(info, dev, fid, id, DTO_INDEX_DELETE,
+ true, 0, NULL);
goto trigger;
}
if (S_ISDIR(inode->i_mode) &&
(flags & SS_AUTO_PARTIAL || sf->sf_status == SS_SCANNING))
- osd_check_lmv(info, dev, inode, oic);
+ osd_check_lmv(info, dev, inode);
result = osd_attach_jinode(inode);
if (result)
LINVRNT(osd_invariant(obj));
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_LLOG_UMOUNT_RACE) &&
+ cfs_fail_val == 2) {
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct osd_idmap_cache *oic = &info->oti_cache;
+ /* invalidate thread cache */
+ memset(&oic->oic_fid, 0, sizeof(oic->oic_fid));
+ }
if (fid_is_otable_it(&l->lo_header->loh_fid)) {
obj->oo_dt.do_ops = &osd_obj_otable_it_ops;
l->lo_header->loh_attr |= LOHA_EXISTS;
result = 0;
}
}
+ obj->oo_dirent_count = LU_DIRENT_COUNT_UNSET;
LINVRNT(osd_invariant(obj));
return result;
size_t namelen = strlen(name);
int rc;
- ENTRY;
-
rcu_read_lock();
list_for_each_entry_rcu(tmp, &obj->oo_xattr_list, oxe_list) {
if (namelen == tmp->oxe_namelen &&
GOTO(out, rc = -ERANGE);
memcpy(buf->lb_buf, &oxe->oxe_buf[namelen + 1], rc);
- EXIT;
out:
rcu_read_unlock();
dt_object_fini(&obj->oo_dt);
if (obj->oo_hl_head != NULL)
ldiskfs_htree_lock_head_free(obj->oo_hl_head);
+ /* obj doesn't contain an lu_object_header, so we don't need call_rcu */
OBD_FREE_PTR(obj);
- if (unlikely(h)) {
- lu_object_header_fini(h);
- OBD_FREE_PTR(h);
- }
+ if (unlikely(h))
+ lu_object_header_free(h);
}
/*
}
}
-/*
- * Concurrency: no concurrent access is possible that late in object
- * life-cycle (for all existing callers, that is. New callers have to provide
- * their own locking.)
- */
-static int osd_inode_unlinked(const struct inode *inode)
-{
- return inode->i_nlink == 0;
-}
-
enum {
OSD_TXN_OI_DELETE_CREDITS = 20,
OSD_TXN_INODE_DELETE_CREDITS = 20
static void osd_trans_commit_cb(struct super_block *sb,
struct ldiskfs_journal_cb_entry *jcb, int error)
{
- struct osd_thandle *oh = container_of0(jcb, struct osd_thandle, ot_jcb);
+ struct osd_thandle *oh = container_of(jcb, struct osd_thandle, ot_jcb);
struct thandle *th = &oh->ot_super;
struct lu_device *lud = &th->th_dev->dd_lu_dev;
+ struct osd_device *osd = osd_dev(lud);
struct dt_txn_commit_cb *dcb, *tmp;
LASSERT(oh->ot_handle == NULL);
if (error)
CERROR("transaction @0x%p commit error: %d\n", th, error);
- OBD_FAIL_TIMEOUT(OBD_FAIL_OST_DELAY_TRANS, 40);
/* call per-transaction callbacks if any */
list_for_each_entry_safe(dcb, tmp, &oh->ot_commit_dcb_list,
dcb_linkage) {
}
lu_ref_del_at(&lud->ld_reference, &oh->ot_dev_link, "osd-tx", th);
- lu_device_put(lud);
+ if (atomic_dec_and_test(&osd->od_commit_cb_in_flight))
+ wake_up(&osd->od_commit_cb_done);
th->th_dev = NULL;
OBD_FREE_PTR(oh);
}
-#ifndef HAVE_SB_START_WRITE
-# define sb_start_write(sb) do {} while (0)
-# define sb_end_write(sb) do {} while (0)
-#endif
-
static struct thandle *osd_trans_create(const struct lu_env *env,
struct dt_device *d)
{
th->th_dev = d;
th->th_result = 0;
oh->ot_credits = 0;
+ oh->oh_declared_ext = 0;
INIT_LIST_HEAD(&oh->ot_commit_dcb_list);
INIT_LIST_HEAD(&oh->ot_stop_dcb_list);
INIT_LIST_HEAD(&oh->ot_trunc_locks);
struct osd_thread_info *oti = osd_oti_get(env);
struct osd_thandle *oh;
- oh = container_of0(th, struct osd_thandle, ot_super);
+ oh = container_of(th, struct osd_thandle, ot_super);
LASSERT(oh != NULL);
CWARN(" create: %u/%u/%u, destroy: %u/%u/%u\n",
LASSERT(current->journal_info == NULL);
- oh = container_of0(th, struct osd_thandle, ot_super);
+ oh = container_of(th, struct osd_thandle, ot_super);
LASSERT(oh != NULL);
LASSERT(oh->ot_handle == NULL);
oh->ot_handle = jh;
LASSERT(oti->oti_txns == 0);
- lu_device_get(&d->dd_lu_dev);
+ atomic_inc(&dev->od_commit_cb_in_flight);
lu_ref_add_at(&d->dd_lu_dev.ld_reference, &oh->ot_dev_link,
"osd-tx", th);
oti->oti_txns++;
ENTRY;
- oh = container_of0(th, struct osd_thandle, ot_super);
+ oh = container_of(th, struct osd_thandle, ot_super);
remove_agents = oh->ot_remove_agents;
if (!rc)
rc = rc2;
- osd_process_truncates(&truncates);
+ /* We preserve the origin behavior of ignoring any
+ * failures with the underlying punch / truncate
+ * operation. We do record for debugging if an error
+ * does occur in the lctl dk logs.
+ */
+ rc2 = osd_process_truncates(env, &truncates);
+ if (rc2 != 0)
+ CERROR("%s: failed truncate process: rc = %d\n",
+ osd_name(osd), rc2);
} else {
osd_trans_stop_cb(oh, th->th_result);
OBD_FREE_PTR(oh);
}
- osd_trunc_unlock_all(&truncates);
+ osd_trunc_unlock_all(env, &truncates);
/* inform the quota slave device that the transaction is stopping */
qsd_op_end(env, qsd, qtrans);
if (unlikely(remove_agents != 0))
osd_process_scheduled_agent_removals(env, osd);
+ LASSERT(oti->oti_ins_cache_depth > 0);
oti->oti_ins_cache_depth--;
/* reset OI cache for safety */
if (oti->oti_ins_cache_depth == 0)
static int osd_trans_cb_add(struct thandle *th, struct dt_txn_commit_cb *dcb)
{
- struct osd_thandle *oh = container_of0(th, struct osd_thandle,
- ot_super);
+ struct osd_thandle *oh = container_of(th, struct osd_thandle,
+ ot_super);
LASSERT(dcb->dcb_magic == TRANS_COMMIT_CB_MAGIC);
LASSERT(&dcb->dcb_func != NULL);
if (!inode)
return;
+ if (osd_has_index(obj) && obj->oo_dt.do_index_ops == &osd_index_iam_ops)
+ ldiskfs_set_inode_flag(inode, LDISKFS_INODE_JOURNAL_DATA);
+
uid = i_uid_read(inode);
gid = i_gid_read(inode);
projid = i_projid_read(inode);
statfs_pack(sfs, ksfs);
if (unlikely(sb->s_flags & SB_RDONLY))
- sfs->os_state |= OS_STATE_READONLY;
+ sfs->os_state |= OS_STATFS_READONLY;
- sfs->os_state |= osd->od_nonrotational ? OS_STATE_NONROT : 0;
+ sfs->os_state |= osd->od_nonrotational ? OS_STATFS_NONROT : 0;
if (ldiskfs_has_feature_extents(sb))
sfs->os_maxbytes = sb->s_maxbytes;
*
* Reserve 0.78% of total space, at least 8MB for small filesystems.
*/
- CLASSERT(OSD_STATFS_RESERVED > LDISKFS_MAX_BLOCK_SIZE);
+ BUILD_BUG_ON(OSD_STATFS_RESERVED <= LDISKFS_MAX_BLOCK_SIZE);
reserved = OSD_STATFS_RESERVED >> sb->s_blocksize_bits;
if (likely(sfs->os_blocks >= reserved << OSD_STATFS_RESERVED_SHIFT))
reserved = sfs->os_blocks >> OSD_STATFS_RESERVED_SHIFT;
OBD_CKSUM_T10IP512 :
OBD_CKSUM_T10IP4K;
} else {
- CERROR("%s: unsupported checksum type of "
- "T10PI type '%s'",
+ CERROR("%s: unsupported checksum type of T10PI type '%s'\n",
d->od_svname, name);
}
} else {
- CERROR("%s: unsupported T10PI type '%s'",
+ CERROR("%s: unsupported T10PI type '%s'\n",
d->od_svname, name);
}
}
+
+ param->ddp_has_lseek_data_hole = true;
}
static struct super_block *osd_mnt_sb_get(const struct dt_device *d)
RETURN(rc);
}
-/* Our own copy of the set readonly functions if present, or NU if not. */
-static int (*priv_dev_set_rdonly)(struct block_device *bdev);
-static int (*priv_dev_check_rdonly)(struct block_device *bdev);
-/* static int (*priv_dev_clear_rdonly)(struct block_device *bdev); */
-static int (*priv_security_file_alloc)(struct file *file);
-
-int osd_security_file_alloc(struct file *file)
-{
- if (priv_security_file_alloc)
- return priv_security_file_alloc(file);
- return 0;
-}
-
/*
* Concurrency: shouldn't matter.
*/
ENTRY;
- if (priv_dev_set_rdonly) {
- struct block_device *jdev = LDISKFS_SB(sb)->journal_bdev;
-
- rc = 0;
- CERROR("*** setting %s read-only ***\n",
- osd_dt_dev(d)->od_svname);
-
- if (sb->s_op->freeze_fs) {
- rc = sb->s_op->freeze_fs(sb);
- if (rc)
- goto out;
- }
-
- if (jdev && (jdev != dev)) {
- CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n",
- (long)jdev);
- priv_dev_set_rdonly(jdev);
- }
- CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
- priv_dev_set_rdonly(dev);
-
- if (sb->s_op->unfreeze_fs)
- sb->s_op->unfreeze_fs(sb);
- }
-
-out:
- if (rc)
- CERROR("%s: %lx CANNOT BE SET READONLY: rc = %d\n",
- osd_dt_dev(d)->od_svname, (long)dev, rc);
+ CERROR("%s: %lx CANNOT BE SET READONLY: rc = %d\n",
+ osd_dt_dev(d)->od_svname, (long)dev, rc);
RETURN(rc);
}
[DTO_ATTR_SET_CHOWN] = 0
};
+/* reserve or free quota for some operation */
+static int osd_reserve_or_free_quota(const struct lu_env *env,
+ struct dt_device *dev,
+ struct lquota_id_info *qi)
+{
+ struct osd_device *osd = osd_dt_dev(dev);
+ struct qsd_instance *qsd = NULL;
+ int rc;
+
+ ENTRY;
+
+ if (qi->lqi_is_blk)
+ qsd = osd->od_quota_slave_dt;
+ else
+ qsd = osd->od_quota_slave_md;
+
+ rc = qsd_reserve_or_free_quota(env, qsd, qi);
+ RETURN(rc);
+}
+
static const struct dt_device_operations osd_dt_ops = {
- .dt_root_get = osd_root_get,
- .dt_statfs = osd_statfs,
- .dt_trans_create = osd_trans_create,
- .dt_trans_start = osd_trans_start,
- .dt_trans_stop = osd_trans_stop,
- .dt_trans_cb_add = osd_trans_cb_add,
- .dt_conf_get = osd_conf_get,
- .dt_mnt_sb_get = osd_mnt_sb_get,
- .dt_sync = osd_sync,
- .dt_ro = osd_ro,
- .dt_commit_async = osd_commit_async,
+ .dt_root_get = osd_root_get,
+ .dt_statfs = osd_statfs,
+ .dt_trans_create = osd_trans_create,
+ .dt_trans_start = osd_trans_start,
+ .dt_trans_stop = osd_trans_stop,
+ .dt_trans_cb_add = osd_trans_cb_add,
+ .dt_conf_get = osd_conf_get,
+ .dt_mnt_sb_get = osd_mnt_sb_get,
+ .dt_sync = osd_sync,
+ .dt_ro = osd_ro,
+ .dt_commit_async = osd_commit_async,
+ .dt_reserve_or_free_quota = osd_reserve_or_free_quota,
};
static void osd_read_lock(const struct lu_env *env, struct dt_object *dt,
attr->la_valid |= LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE |
LA_SIZE | LA_BLOCKS | LA_UID | LA_GID |
LA_PROJID | LA_FLAGS | LA_NLINK | LA_RDEV |
- LA_BLKSIZE | LA_TYPE;
+ LA_BLKSIZE | LA_TYPE | LA_BTIME;
attr->la_atime = inode->i_atime.tv_sec;
attr->la_mtime = inode->i_mtime.tv_sec;
attr->la_ctime = inode->i_ctime.tv_sec;
+ attr->la_btime = LDISKFS_I(inode)->i_crtime.tv_sec;
attr->la_mode = inode->i_mode;
attr->la_size = i_size_read(inode);
attr->la_blocks = inode->i_blocks;
attr->la_flags |= LUSTRE_PROJINHERIT_FL;
}
+static int osd_dirent_count(const struct lu_env *env, struct dt_object *dt,
+ u64 *count)
+{
+ struct osd_object *obj = osd_dt_obj(dt);
+ const struct dt_it_ops *iops;
+ struct dt_it *it;
+ int rc;
+
+ ENTRY;
+
+ LASSERT(S_ISDIR(obj->oo_inode->i_mode));
+ LASSERT(fid_is_namespace_visible(lu_object_fid(&obj->oo_dt.do_lu)));
+
+ if (obj->oo_dirent_count != LU_DIRENT_COUNT_UNSET) {
+ *count = obj->oo_dirent_count;
+ RETURN(0);
+ }
+
+ /* directory not initialized yet */
+ if (!dt->do_index_ops) {
+ *count = 0;
+ RETURN(0);
+ }
+
+ iops = &dt->do_index_ops->dio_it;
+ it = iops->init(env, dt, LUDA_64BITHASH);
+ if (IS_ERR(it))
+ RETURN(PTR_ERR(it));
+
+ rc = iops->load(env, it, 0);
+ if (rc < 0) {
+ if (rc == -ENODATA) {
+ rc = 0;
+ *count = 0;
+ }
+ GOTO(out, rc);
+ }
+ if (rc > 0)
+ rc = iops->next(env, it);
+
+ for (*count = 0; rc == 0 || rc == -ESTALE; rc = iops->next(env, it)) {
+ if (rc == -ESTALE)
+ continue;
+
+ if (iops->key_size(env, it) == 0)
+ continue;
+
+ (*count)++;
+ }
+ if (rc == 1) {
+ obj->oo_dirent_count = *count;
+ rc = 0;
+ }
+out:
+ iops->put(env, it);
+ iops->fini(env, it);
+
+ RETURN(rc);
+}
+
static int osd_attr_get(const struct lu_env *env, struct dt_object *dt,
struct lu_attr *attr)
{
struct osd_object *obj = osd_dt_obj(dt);
+ int rc = 0;
if (unlikely(!dt_object_exists(dt)))
return -ENOENT;
attr->la_valid |= LA_FLAGS;
attr->la_flags |= LUSTRE_ORPHAN_FL;
}
+ if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL) {
+ attr->la_valid |= LA_FLAGS;
+ attr->la_flags |= LUSTRE_ENCRYPT_FL;
+ }
spin_unlock(&obj->oo_guard);
- return 0;
+ if (S_ISDIR(obj->oo_inode->i_mode) &&
+ fid_is_namespace_visible(lu_object_fid(&dt->do_lu)))
+ rc = osd_dirent_count(env, dt, &attr->la_dirent_count);
+
+ return rc;
}
static int osd_declare_attr_qid(const struct lu_env *env,
struct osd_object *obj,
struct osd_thandle *oh, long long bspace,
qid_t old_id, qid_t new_id, bool enforce,
- unsigned int type, bool ignore_edquot)
+ unsigned int type)
{
int rc;
struct osd_thread_info *info = osd_oti_get(env);
qi->lqi_space = 1;
/* Reserve credits for the new id */
rc = osd_declare_qid(env, oh, qi, NULL, enforce, NULL);
- if (ignore_edquot && (rc == -EDQUOT || rc == -EINPROGRESS))
+ if (rc == -EDQUOT || rc == -EINPROGRESS)
rc = 0;
if (rc)
RETURN(rc);
qi->lqi_id.qid_uid = old_id;
qi->lqi_space = -1;
rc = osd_declare_qid(env, oh, qi, obj, enforce, NULL);
- if (ignore_edquot && (rc == -EDQUOT || rc == -EINPROGRESS))
+ if (rc == -EDQUOT || rc == -EINPROGRESS)
rc = 0;
if (rc)
RETURN(rc);
* to save credit reservation.
*/
rc = osd_declare_qid(env, oh, qi, obj, enforce, NULL);
- if (ignore_edquot && (rc == -EDQUOT || rc == -EINPROGRESS))
+ if (rc == -EDQUOT || rc == -EINPROGRESS)
rc = 0;
if (rc)
RETURN(rc);
qi->lqi_id.qid_uid = old_id;
qi->lqi_space = -bspace;
rc = osd_declare_qid(env, oh, qi, obj, enforce, NULL);
- if (ignore_edquot && (rc == -EDQUOT || rc == -EINPROGRESS))
+ if (rc == -EDQUOT || rc == -EINPROGRESS)
rc = 0;
RETURN(rc);
obj = osd_dt_obj(dt);
LASSERT(osd_invariant(obj));
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
osd_trans_declare_op(env, oh, OSD_OT_ATTR_SET,
* space adjustment once the operation is completed.
*/
if (attr->la_valid & LA_UID || attr->la_valid & LA_GID) {
- bool ignore_edquot = !(attr->la_flags & LUSTRE_SET_SYNC_FL);
-
- if (!ignore_edquot)
- CDEBUG(D_QUOTA, "%s: enforce quota on UID %u, GID %u"
- "(the quota space is %lld)\n",
- obj->oo_inode->i_sb->s_id, attr->la_uid,
- attr->la_gid, bspace);
-
/* USERQUOTA */
uid = i_uid_read(obj->oo_inode);
enforce = (attr->la_valid & LA_UID) && (attr->la_uid != uid);
rc = osd_declare_attr_qid(env, obj, oh, bspace, uid,
- attr->la_uid, enforce, USRQUOTA,
- true);
+ attr->la_uid, enforce, USRQUOTA);
if (rc)
RETURN(rc);
gid = i_gid_read(obj->oo_inode);
+ CDEBUG(D_QUOTA, "declare uid %d -> %d gid %d -> %d\n", uid,
+ attr->la_uid, gid, attr->la_gid);
enforce = (attr->la_valid & LA_GID) && (attr->la_gid != gid);
- rc = osd_declare_attr_qid(env, obj, oh, bspace,
- i_gid_read(obj->oo_inode),
- attr->la_gid, enforce, GRPQUOTA,
- ignore_edquot);
+ rc = osd_declare_attr_qid(env, obj, oh, bspace, gid,
+ attr->la_gid, enforce, GRPQUOTA);
if (rc)
RETURN(rc);
(attr->la_projid != projid);
rc = osd_declare_attr_qid(env, obj, oh, bspace,
(qid_t)projid, (qid_t)attr->la_projid,
- enforce, PRJQUOTA, true);
+ enforce, PRJQUOTA);
if (rc)
RETURN(rc);
}
/* always keep S_NOCMTIME */
inode->i_flags = ll_ext_to_inode_flags(attr->la_flags) |
S_NOCMTIME;
+#if defined(S_ENCRYPTED)
+ /* Always remove S_ENCRYPTED, because ldiskfs must not be
+ * aware of encryption status. It is just stored into LMA
+ * so that it can be forwared to client side.
+ */
+ inode->i_flags &= ~S_ENCRYPTED;
+#endif
/*
* Ext4 did not transfer inherit flags from
* @inode->i_flags to raw inode i_flags when writing
}
#ifdef HAVE_PROJECT_QUOTA
-static int osd_transfer_project(struct inode *inode, __u32 projid)
+static int osd_transfer_project(struct inode *inode, __u32 projid,
+ struct thandle *handle)
{
struct super_block *sb = inode->i_sb;
struct ldiskfs_inode_info *ei = LDISKFS_I(inode);
raw_inode = ldiskfs_raw_inode(&iloc);
if (!LDISKFS_FITS_IN_INODE(raw_inode, ei, i_projid)) {
- err = -EOVERFLOW;
- brelse(iloc.bh);
- return err;
+ struct osd_thandle *oh = container_of(handle,
+ struct osd_thandle,
+ ot_super);
+ /**
+ * try to expand inode size automatically.
+ */
+ ldiskfs_mark_inode_dirty(oh->ot_handle, inode);
+ if (!LDISKFS_FITS_IN_INODE(raw_inode, ei, i_projid)) {
+ err = -EOVERFLOW;
+ brelse(iloc.bh);
+ return err;
+ }
}
brelse(iloc.bh);
dquot_initialize(inode);
transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
if (transfer_to[PRJQUOTA]) {
+ lock_dquot_transfer(inode);
err = __dquot_transfer(inode, transfer_to);
+ unlock_dquot_transfer(inode);
dqput(transfer_to[PRJQUOTA]);
if (err)
return err;
}
#endif
-static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr)
+static int osd_quota_transfer(struct inode *inode, const struct lu_attr *attr,
+ struct thandle *handle)
{
int rc;
(attr->la_valid & LA_GID && attr->la_gid != i_gid_read(inode))) {
struct iattr iattr;
+ CDEBUG(D_QUOTA,
+ "executing dquot_transfer inode %ld uid %d -> %d gid %d -> %d\n",
+ inode->i_ino, i_uid_read(inode), attr->la_uid,
+ i_gid_read(inode), attr->la_gid);
+
dquot_initialize(inode);
iattr.ia_valid = 0;
if (attr->la_valid & LA_UID)
iattr.ia_uid = make_kuid(&init_user_ns, attr->la_uid);
iattr.ia_gid = make_kgid(&init_user_ns, attr->la_gid);
+ lock_dquot_transfer(inode);
rc = dquot_transfer(inode, &iattr);
+ unlock_dquot_transfer(inode);
if (rc) {
- CERROR("%s: quota transfer failed: rc = %d. Is quota "
- "enforcement enabled on the ldiskfs "
- "filesystem?\n", inode->i_sb->s_id, rc);
+ CERROR("%s: quota transfer failed. Is quota enforcement enabled on the ldiskfs filesystem? rc = %d\n",
+ osd_ino2name(inode), rc);
return rc;
}
}
/* Handle project id transfer here properly */
if (attr->la_valid & LA_PROJID &&
attr->la_projid != i_projid_read(inode)) {
+ if (!projid_valid(make_kprojid(&init_user_ns, attr->la_projid)))
+ return -EINVAL;
#ifdef HAVE_PROJECT_QUOTA
- rc = osd_transfer_project(inode, attr->la_projid);
+ rc = osd_transfer_project(inode, attr->la_projid, handle);
#else
rc = -ENOTSUPP;
#endif
if (rc) {
- CERROR("%s: quota transfer failed: rc = %d. Is project "
- "enforcement enabled on the ldiskfs "
- "filesystem?\n", inode->i_sb->s_id, rc);
+ CERROR("%s: quota transfer failed. Is project enforcement enabled on the ldiskfs filesystem? rc = %d\n",
+ osd_ino2name(inode), rc);
return rc;
}
}
if (unlikely(ipd == NULL))
RETURN(-ENOMEM);
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
rc = iam_update(oh->ot_handle, bag,
(const struct iam_key *)fid1,
(const struct iam_rec *)id, ipd);
inode = obj->oo_inode;
- rc = osd_quota_transfer(inode, attr);
+ rc = osd_quota_transfer(inode, attr, handle);
if (rc)
return rc;
if (rc != 0)
GOTO(out, rc);
- ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
+
+ osd_trans_exec_check(env, handle, OSD_OT_ATTR_SET);
if (!(attr->la_valid & LA_FLAGS))
GOTO(out, rc);
lma->lma_incompat |=
lustre_to_lma_flags(attr->la_flags);
lustre_lma_swab(lma);
+
+ osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET);
+
rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA,
lma, sizeof(*lma), XATTR_REPLACE);
if (rc != 0) {
osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
}
out:
- osd_trans_exec_check(env, handle, OSD_OT_ATTR_SET);
return rc;
}
struct osd_thandle *oth;
struct dt_object *parent = NULL;
struct inode *inode;
- uid_t owner[2] = {0, 0};
+ struct iattr iattr = {
+ .ia_valid = ATTR_UID | ATTR_GID |
+ ATTR_CTIME | ATTR_MTIME | ATTR_ATIME,
+ .ia_ctime.tv_sec = attr->la_ctime,
+ .ia_mtime.tv_sec = attr->la_mtime,
+ .ia_atime.tv_sec = attr->la_atime,
+ .ia_uid = GLOBAL_ROOT_UID,
+ .ia_gid = GLOBAL_ROOT_GID,
+ };
+ const struct osd_timespec omit = { .tv_nsec = UTIME_OMIT };
if (attr->la_valid & LA_UID)
- owner[0] = attr->la_uid;
+ iattr.ia_uid = make_kuid(&init_user_ns, attr->la_uid);
if (attr->la_valid & LA_GID)
- owner[1] = attr->la_gid;
+ iattr.ia_gid = make_kgid(&init_user_ns, attr->la_gid);
LINVRNT(osd_invariant(obj));
LASSERT(obj->oo_inode == NULL);
!dt_object_remote(hint->dah_parent))
parent = hint->dah_parent;
+ /* if a time component is not valid set it to UTIME_OMIT */
+ if (!(attr->la_valid & LA_CTIME))
+ iattr.ia_ctime = omit;
+ if (!(attr->la_valid & LA_MTIME))
+ iattr.ia_mtime = omit;
+ if (!(attr->la_valid & LA_ATIME))
+ iattr.ia_atime = omit;
+
inode = ldiskfs_create_inode(oth->ot_handle,
parent ? osd_dt_obj(parent)->oo_inode :
osd_sb(osd)->s_root->d_inode,
- mode, owner);
+ mode, &iattr);
if (!IS_ERR(inode)) {
/* Do not update file c/mtime in ldiskfs. */
inode->i_flags |= S_NOCMTIME;
oth = container_of(th, struct osd_thandle, ot_super);
LASSERT(oth->ot_handle->h_transaction != NULL);
+ if (fid_is_namespace_visible(lu_object_fid(&obj->oo_dt.do_lu)))
+ obj->oo_dirent_count = 0;
result = osd_mkfile(info, obj, mode, hint, th, attr);
return result;
return result;
}
-
static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah,
struct dt_object *parent, struct dt_object *child,
umode_t child_mode)
LASSERT(ah);
ah->dah_parent = parent;
- ah->dah_mode = child_mode;
if (parent != NULL && !dt_object_remote(parent)) {
/* will help to find FID->ino at dt_insert("..") */
}
static void osd_attr_init(struct osd_thread_info *info, struct osd_object *obj,
- struct lu_attr *attr, struct dt_object_format *dof)
+ struct lu_attr *attr, struct dt_object_format *dof,
+ struct thandle *handle)
{
struct inode *inode = obj->oo_inode;
__u64 valid = attr->la_valid;
if ((valid & LA_MTIME) && (attr->la_mtime == inode->i_mtime.tv_sec))
attr->la_valid &= ~LA_MTIME;
- result = osd_quota_transfer(inode, attr);
+ result = osd_quota_transfer(inode, attr, handle);
if (result)
return;
* enabled on ldiskfs (lquota takes care of it).
*/
LASSERTF(result == 0, "%d\n", result);
- ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
}
attr->la_valid = valid;
}
if (likely(result == 0)) {
- osd_attr_init(info, obj, attr, dof);
+ osd_attr_init(info, obj, attr, dof, th);
osd_object_init0(obj);
}
LASSERT(obj->oo_inode != NULL);
- oh = container_of0(th, struct osd_thandle, ot_super);
+ if (CFS_FAIL_CHECK(OBD_FAIL_OSD_OI_ENOSPC))
+ return -ENOSPC;
+
+ oh = container_of(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle);
osd_trans_exec_op(env, th, OSD_OT_INSERT);
struct thandle *handle)
{
struct osd_thandle *oh;
+ struct super_block *sb = osd_sb(osd_dev(dt->do_lu.lo_dev));
+ int credits;
int rc;
ENTRY;
LASSERT(handle != NULL);
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
/*
* vs. osd_mkreg: osd_mk_index will create 2 blocks for root_node and
* leaf_node, could involves the block, block bitmap, groups, GDT
* change for each block, so add 4 * 2 credits in that case.
+ *
+ * The default ACL initialization may consume an additional 16 blocks
+ */
+ credits = osd_dto_credits_noquota[DTO_OBJECT_CREATE] +
+ ((dof->dof_type == DFT_INDEX) ? 4 * 2 : 0);
+
+ /**
+ * While ldiskfs_new_inode() calls ldiskfs_init_acl() we have to add
+ * credits for possible default ACL creation in new inode
*/
- osd_trans_declare_op(env, oh, OSD_OT_CREATE,
- osd_dto_credits_noquota[DTO_OBJECT_CREATE] +
- (dof->dof_type == DFT_INDEX) ? 4 * 2 : 0);
+ if (hint && hint->dah_acl_len)
+ credits += osd_calc_bkmap_credits(sb, NULL, 0, -1,
+ (hint->dah_acl_len + sb->s_blocksize - 1) >>
+ sb->s_blocksize_bits);
+
+ osd_trans_declare_op(env, oh, OSD_OT_CREATE, credits);
+
/*
* Reuse idle OI block may cause additional one OI block
* to be changed.
if (inode == NULL)
RETURN(-ENOENT);
- oh = container_of0(th, struct osd_thandle, ot_super);
+ oh = container_of(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
osd_trans_declare_op(env, oh, OSD_OT_DESTROY,
ENTRY;
- oh = container_of0(th, struct osd_thandle, ot_super);
+ oh = container_of(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle);
LASSERT(inode);
LASSERT(!lu_object_is_dying(dt->do_lu.lo_header));
}
if (S_ISDIR(inode->i_mode)) {
- LASSERT(osd_inode_unlinked(inode) || inode->i_nlink == 1 ||
- inode->i_nlink == 2);
+ if (inode->i_nlink > 2)
+ CERROR("%s: directory "DFID" ino %lu link count is %u at unlink. run e2fsck to repair\n",
+ osd_name(osd), PFID(fid), inode->i_ino,
+ inode->i_nlink);
spin_lock(&obj->oo_guard);
clear_nlink(inode);
spin_unlock(&obj->oo_guard);
- ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
}
osd_trans_exec_op(env, th, OSD_OT_DESTROY);
__u32 saved_nlink = dir->i_nlink;
int rc;
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_DOTDOT_ENOSPC))
+ return -ENOSPC;
+
dot_dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp2;
osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
struct osd_thread_info *info = osd_oti_get(env);
struct inode *local;
struct osd_thandle *oh;
+ struct iattr iattr = {
+ .ia_valid = ATTR_UID | ATTR_GID |
+ ATTR_CTIME | ATTR_MTIME | ATTR_ATIME,
+ .ia_ctime.tv_nsec = UTIME_OMIT,
+ .ia_mtime.tv_nsec = UTIME_OMIT,
+ .ia_atime.tv_nsec = UTIME_OMIT,
+ .ia_uid = GLOBAL_ROOT_UID,
+ .ia_gid = GLOBAL_ROOT_GID,
+ };
int rc;
ENTRY;
oh = container_of(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle->h_transaction != NULL);
- local = ldiskfs_create_inode(oh->ot_handle, pobj->oo_inode, type,
- NULL);
+ local = ldiskfs_create_inode(oh->ot_handle, pobj->oo_inode,
+ type, &iattr);
if (IS_ERR(local)) {
CERROR("%s: create local error %d\n", osd_name(osd),
(int)PTR_ERR(local));
* debugging if we need to determine where this symlink came from.
*/
if (S_ISLNK(type)) {
- CLASSERT(LDISKFS_N_BLOCKS * 4 >= FID_LEN + 1);
- rc = snprintf((char *)LDISKFS_I(local)->i_data,
- LDISKFS_N_BLOCKS * 4, DFID, PFID(fid));
+ BUILD_BUG_ON(LDISKFS_N_BLOCKS * 4 < FID_LEN + 1);
+ rc = scnprintf((char *)LDISKFS_I(local)->i_data,
+ LDISKFS_N_BLOCKS * 4, DFID, PFID(fid));
i_size_write(local, rc);
LDISKFS_I(local)->i_disksize = rc;
#ifdef HAVE_PROJECT_QUOTA
if (LDISKFS_I(pobj->oo_inode)->i_flags & LUSTRE_PROJINHERIT_FL &&
i_projid_read(pobj->oo_inode) != 0) {
- rc = osd_transfer_project(local, 0);
+ rc = osd_transfer_project(local, 0, th);
if (rc) {
- CERROR("%s: quota transfer failed: rc = %d. Is project "
- "quota enforcement enabled on the ldiskfs "
- "filesystem?\n", local->i_sb->s_id, rc);
+ CERROR("%s: quota transfer failed:. Is project quota enforcement enabled on the ldiskfs filesystem? rc = %d\n",
+ osd_ino2name(local), rc);
RETURN(ERR_PTR(rc));
}
}
struct osd_thread_info *info = osd_oti_get(env);
struct osd_obj_orphan *oor, *tmp;
struct osd_inode_id id;
- struct list_head list;
+ LIST_HEAD(list);
struct inode *inode;
struct lu_fid fid;
handle_t *jh;
__u32 ino;
- INIT_LIST_HEAD(&list);
-
spin_lock(&osd->od_osfs_lock);
list_for_each_entry_safe(oor, tmp, &osd->od_orphan_list, oor_list) {
- if (oor->oor_env == env) {
- list_del(&oor->oor_list);
- list_add(&oor->oor_list, &list);
- }
+ if (oor->oor_env == env)
+ list_move(&oor->oor_list, &list);
}
spin_unlock(&osd->od_osfs_lock);
obj->oo_dt.do_body_ops = &osd_body_ops;
}
- if (!result && !CFS_FAIL_CHECK(OBD_FAIL_OSD_NO_OI_ENTRY))
+ if (!result && !CFS_FAIL_CHECK(OBD_FAIL_OSD_NO_OI_ENTRY)) {
+ struct inode *inode = obj->oo_inode;
+
result = __osd_oi_insert(env, obj, fid, th);
+ if (result && inode) {
+ spin_lock(&obj->oo_guard);
+ clear_nlink(inode);
+ spin_unlock(&obj->oo_guard);
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
+ ldiskfs_set_inode_state(inode,
+ LDISKFS_STATE_LUSTRE_DESTROY);
+ iput(inode);
+ obj->oo_inode = NULL;
+ }
+ }
/*
* a small optimization - dt_insert() isn't usually applied
struct thandle *handle)
{
struct osd_thandle *oh;
+ int rc;
/* it's possible that object doesn't exist yet */
LASSERT(handle != NULL);
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
osd_trans_declare_op(env, oh, OSD_OT_REF_ADD,
osd_dto_credits_noquota[DTO_ATTR_SET_BASE]);
- osd_idc_find_and_init(env, osd_dev(dt->do_lu.lo_dev), osd_dt_obj(dt));
+ rc = osd_idc_find_and_init(env, osd_dev(dt->do_lu.lo_dev),
+ osd_dt_obj(dt));
- return 0;
+ return rc;
}
/*
LASSERT(osd_is_write_locked(env, obj));
LASSERT(th != NULL);
- oh = container_of0(th, struct osd_thandle, ot_super);
+ oh = container_of(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle != NULL);
osd_trans_exec_op(env, th, OSD_OT_REF_ADD);
/* inc_nlink from 0 may cause WARN_ON */
set_nlink(inode, 1);
else {
- ldiskfs_inc_count(oh->ot_handle, inode);
+ osd_ldiskfs_inc_count(oh->ot_handle, inode);
if (!S_ISDIR(inode->i_mode))
LASSERT(inode->i_nlink <= LDISKFS_LINK_MAX);
}
spin_unlock(&obj->oo_guard);
- ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
LINVRNT(osd_invariant(obj));
osd_trans_exec_check(env, th, OSD_OT_REF_ADD);
LASSERT(!dt_object_remote(dt));
LASSERT(handle != NULL);
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
osd_trans_declare_op(env, oh, OSD_OT_REF_DEL,
LASSERT(osd_is_write_locked(env, obj));
LASSERT(th != NULL);
- oh = container_of0(th, struct osd_thandle, ot_super);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_REF_DEL))
+ return -EIO;
+
+ oh = container_of(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle != NULL);
osd_trans_exec_op(env, th, OSD_OT_REF_DEL);
CDEBUG(D_INODE, DFID" decrease nlink %d\n",
PFID(lu_object_fid(&dt->do_lu)), inode->i_nlink);
- ldiskfs_dec_count(oh->ot_handle, inode);
+ osd_ldiskfs_dec_count(oh->ot_handle, inode);
spin_unlock(&obj->oo_guard);
- ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
LINVRNT(osd_invariant(obj));
osd_trans_exec_check(env, th, OSD_OT_REF_DEL);
ol->ol_comp_id = 0;
}
} else {
- rc = __osd_xattr_get(inode, dentry, name,
- buf->lb_buf, buf->lb_len);
+ /* Get enc context xattr directly from ldiskfs instead of going
+ * through the VFS, as there is no xattr handler for
+ * "encryption.".
+ */
+ if (strcmp(name, LL_XATTR_NAME_ENCRYPTION_CONTEXT) == 0)
+ rc = ldiskfs_xattr_get(inode,
+ LDISKFS_XATTR_INDEX_ENCRYPTION,
+ LDISKFS_XATTR_NAME_ENCRYPTION_CONTEXT,
+ buf->lb_buf, buf->lb_len);
+ else
+ rc = __osd_xattr_get(inode, dentry, name,
+ buf->lb_buf, buf->lb_len);
}
if (cache_xattr) {
LASSERT(handle != NULL);
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
if (strcmp(name, XATTR_NAME_LMA) == 0) {
/* Remove old PFID EA entry firstly. */
dquot_initialize(inode);
- rc = osd_removexattr(dentry, inode, XATTR_NAME_FID);
+ rc = ll_vfs_removexattr(dentry, inode, XATTR_NAME_FID);
if (rc == -ENODATA) {
- if ((fl & LU_XATTR_REPLACE) && !(fl & LU_XATTR_CREATE))
- RETURN(rc);
+ /* XATTR_NAME_FID is already absent */
+ rc = 0;
} else if (rc) {
RETURN(rc);
}
ENTRY;
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle != NULL);
rc = linkea_init_with_rec(&ldata);
* Version is set after all inode operations are finished,
* so we should mark it dirty here
*/
- ll_dirty_inode(inode, I_DIRTY_DATASYNC);
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
RETURN(0);
}
if (fl & LU_XATTR_CREATE)
fs_flags |= XATTR_CREATE;
- rc = __osd_xattr_set(info, inode, name, buf->lb_buf, len, fs_flags);
+ if (strcmp(name, LL_XATTR_NAME_ENCRYPTION_CONTEXT) == 0) {
+ /* Set enc context xattr directly in ldiskfs instead of going
+ * through the VFS, as there is no xattr handler for
+ * "encryption.".
+ */
+ struct osd_thandle *oth = container_of(handle,
+ struct osd_thandle,
+ ot_super);
+
+ if (!oth->ot_handle)
+ /* this should be already part of a transaction */
+ RETURN(-EPROTO);
+
+ rc = ldiskfs_xattr_set_handle(oth->ot_handle, inode,
+ LDISKFS_XATTR_INDEX_ENCRYPTION,
+ LDISKFS_XATTR_NAME_ENCRYPTION_CONTEXT,
+ buf->lb_buf, len, fs_flags);
+ } else {
+ rc = __osd_xattr_set(info, inode, name,
+ buf->lb_buf, len, fs_flags);
+ }
osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
if (rc == 0 &&
const struct lu_buf *buf)
{
struct osd_object *obj = osd_dt_obj(dt);
+ struct osd_device *dev = osd_obj2dev(obj);
struct inode *inode = obj->oo_inode;
struct osd_thread_info *info = osd_oti_get(env);
struct dentry *dentry = &info->oti_obj_dentry;
+ int rc;
if (!dt_object_exists(dt))
return -ENOENT;
dentry->d_inode = inode;
dentry->d_sb = inode->i_sb;
- return inode->i_op->listxattr(dentry, buf->lb_buf, buf->lb_len);
+ rc = inode->i_op->listxattr(dentry, buf->lb_buf, buf->lb_len);
+
+ if (rc < 0 || buf->lb_buf == NULL)
+ return rc;
+
+ /* Hide virtual project ID xattr from list if disabled */
+ if (!dev->od_enable_projid_xattr) {
+ char *end = (char *)buf->lb_buf + rc;
+ char *p = buf->lb_buf;
+
+ while (p < end) {
+ char *next = p + strlen(p) + 1;
+
+ if (strcmp(p, XATTR_NAME_PROJID) == 0) {
+ if (end - next > 0)
+ memmove(p, next, end - next);
+ rc -= next - p;
+ break;
+ }
+
+ p = next;
+ }
+ }
+
+ return rc;
}
static int osd_declare_xattr_del(const struct lu_env *env,
LASSERT(!dt_object_remote(dt));
LASSERT(handle != NULL);
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
osd_trans_declare_op(env, oh, OSD_OT_XATTR_SET,
dquot_initialize(inode);
dentry->d_inode = inode;
dentry->d_sb = inode->i_sb;
- rc = osd_removexattr(dentry, inode, name);
+ rc = ll_vfs_removexattr(dentry, inode, name);
}
osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET);
__u64 start, __u64 end)
{
struct osd_object *obj = osd_dt_obj(dt);
+ struct osd_device *dev = osd_obj2dev(obj);
struct inode *inode = obj->oo_inode;
- struct osd_thread_info *info = osd_oti_get(env);
- struct dentry *dentry = &info->oti_obj_dentry;
- struct file *file = &info->oti_file;
+ struct file *file;
int rc;
ENTRY;
+ file = alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
+ inode->i_fop);
+ if (IS_ERR(file))
+ RETURN(PTR_ERR(file));
- dentry->d_inode = inode;
- dentry->d_sb = inode->i_sb;
- file->f_path.dentry = dentry;
- file->f_mapping = inode->i_mapping;
- file->f_op = inode->i_fop;
- set_file_inode(file, inode);
-
+ file->f_mode |= FMODE_64BITHASH;
rc = vfs_fsync_range(file, start, end, 0);
+ ihold(inode);
+ fput(file);
RETURN(rc);
}
return 0;
}
+static bool osd_check_stale(struct dt_object *dt)
+{
+ return false;
+}
/*
* Index operations.
*/
.do_xattr_list = osd_xattr_list,
.do_object_sync = osd_object_sync,
.do_invalidate = osd_invalidate,
+ .do_check_stale = osd_check_stale,
};
static const struct dt_object_operations osd_obj_otable_it_ops = {
{
struct osd_thandle *oh;
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
/* Recycle may cause additional three blocks to be changed. */
if (unlikely(ipd == NULL))
RETURN(-ENOMEM);
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle != NULL);
LASSERT(oh->ot_handle->h_transaction != NULL);
LASSERT(!dt_object_remote(dt));
LASSERT(handle != NULL);
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
credits = osd_dto_credits_noquota[DTO_INDEX_DELETE];
}
/**
+ * Utility function to get real name from object name
+ *
+ * \param[in] obj pointer to the object to be handled
+ * \param[in] name object name
+ * \param[in] len object name len
+ * \param[out]ln pointer to the struct lu_name to hold the real name
+ *
+ * If file is not encrypted, real name is just the object name.
+ * If file is encrypted, object name needs to be decoded. In
+ * this case a new buffer is allocated, and ln->ln_name needs to be freed by
+ * the caller.
+ *
+ * \retval 0, on success
+ * \retval -ve, on error
+ */
+static int obj_name2lu_name(struct osd_object *obj, const char *name,
+ int len, struct lu_name *ln)
+{
+ if (!(obj->oo_lma_flags & LUSTRE_ENCRYPT_FL)) {
+ ln->ln_name = name;
+ ln->ln_namelen = len;
+ } else {
+ char *buf = kmalloc(len, GFP_NOFS);
+
+ if (!buf)
+ return -ENOMEM;
+
+ len = critical_decode(name, len, buf);
+ ln->ln_name = buf;
+ ln->ln_namelen = len;
+ }
+
+ return 0;
+}
+
+/**
* Index delete function for interoperability mode (b11826).
* It will remove the directory entry added by osd_index_ea_insert().
* This entry is needed to maintain name->fid mapping.
struct buffer_head *bh;
struct htree_lock *hlock = NULL;
struct osd_device *osd = osd_dev(dt->do_lu.lo_dev);
+ struct lu_name ln;
int rc;
ENTRY;
LASSERT(!dt_object_remote(dt));
LASSERT(handle != NULL);
+ rc = obj_name2lu_name(obj, (char *)key, strlen((char *)key), &ln);
+ if (rc)
+ RETURN(rc);
+
osd_trans_exec_op(env, handle, OSD_OT_DELETE);
oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle->h_transaction != NULL);
dquot_initialize(dir);
- dentry = osd_child_dentry_get(env, obj,
- (char *)key, strlen((char *)key));
+ dentry = osd_child_dentry_get(env, obj, ln.ln_name, ln.ln_namelen);
if (obj->oo_hl_head != NULL) {
hlock = osd_oti_get(env)->oti_hlock;
} else {
rc = PTR_ERR(bh);
}
+
+ if (!rc && fid_is_namespace_visible(lu_object_fid(&dt->do_lu)) &&
+ obj->oo_dirent_count != LU_DIRENT_COUNT_UNSET) {
+ /* NB, dirent count may not be accurate, because it's counted
+ * without lock.
+ */
+ if (obj->oo_dirent_count)
+ obj->oo_dirent_count--;
+ else
+ obj->oo_dirent_count = LU_DIRENT_COUNT_UNSET;
+ }
if (hlock != NULL)
ldiskfs_htree_unlock(hlock);
else
up_write(&obj->oo_ext_idx_sem);
-
GOTO(out, rc);
out:
LASSERT(osd_invariant(obj));
osd_trans_exec_check(env, handle, OSD_OT_DELETE);
+ if (ln.ln_name != (char *)key)
+ kfree(ln.ln_name);
RETURN(rc);
}
LASSERT(handle != NULL);
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
osd_trans_declare_op(env, oh, OSD_OT_INSERT,
if (unlikely(ipd == NULL))
RETURN(-ENOMEM);
- oh = container_of0(th, struct osd_thandle, ot_super);
+ oh = container_of(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle != NULL);
LASSERT(oh->ot_handle->h_transaction != NULL);
if (S_ISDIR(obj->oo_inode->i_mode)) {
struct ldiskfs_dentry_param *ldp;
struct dentry *child;
struct osd_thandle *oth;
+ struct lu_name ln;
int rc;
oth = container_of(th, struct osd_thandle, ot_super);
LASSERT(oth->ot_handle->h_transaction != NULL);
LASSERT(pobj->oo_inode);
+ rc = obj_name2lu_name(pobj, name, strlen(name), &ln);
+ if (rc)
+ RETURN(rc);
+
ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
- if (unlikely(pobj->oo_inode ==
- osd_sb(osd_obj2dev(pobj))->s_root->d_inode))
+ if (unlikely(osd_object_is_root(pobj)))
ldp->edp_magic = 0;
else
osd_get_ldiskfs_dirent_param(ldp, fid);
- child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
+ child = osd_child_dentry_get(info->oti_env, pobj,
+ ln.ln_name, ln.ln_namelen);
child->d_fsdata = (void *)ldp;
dquot_initialize(pobj->oo_inode);
rc = osd_ldiskfs_add_entry(info, osd_obj2dev(pobj), oth->ot_handle,
}
}
+ if (ln.ln_name != name)
+ kfree(ln.ln_name);
RETURN(rc);
}
hlock, th);
}
}
+ if (!rc && fid_is_namespace_visible(lu_object_fid(&pobj->oo_dt.do_lu))
+ && pobj->oo_dirent_count != LU_DIRENT_COUNT_UNSET)
+ pobj->oo_dirent_count++;
+
if (hlock != NULL)
ldiskfs_htree_unlock(hlock);
else
}
static int
-osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev,
- struct osd_idmap_cache *oic)
+osd_ldiskfs_consistency_check(struct osd_thread_info *oti,
+ struct osd_device *dev,
+ const struct lu_fid *fid,
+ struct osd_inode_id *id)
{
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
- struct lu_fid *fid = &oic->oic_fid;
- struct osd_inode_id *id = &oic->oic_lid;
struct inode *inode = NULL;
int once = 0;
bool insert;
int rc;
ENTRY;
-
- if (!fid_is_norm(fid) && !fid_is_igif(fid))
- RETURN(0);
-
- if (thread_is_running(&scrub->os_thread) &&
- scrub->os_pos_current > id->oii_ino)
- RETURN(0);
-
- if (dev->od_auto_scrub_interval == AS_NEVER ||
- ktime_get_real_seconds() <
- scrub->os_file.sf_time_last_complete + dev->od_auto_scrub_interval)
+ if (!scrub_needs_check(scrub, fid, id->oii_ino))
RETURN(0);
-
again:
rc = osd_oi_lookup(oti, dev, fid, &oti->oti_id, 0);
if (rc == -ENOENT) {
insert = false;
trigger:
- if (thread_is_running(&scrub->os_thread)) {
+ if (scrub->os_running) {
if (inode == NULL) {
inode = osd_iget(oti, dev, id);
/* The inode has been removed (by race maybe). */
}
}
- rc = osd_oii_insert(dev, oic, insert);
+ rc = osd_oii_insert(dev, fid, id, insert);
/*
* There is race condition between osd_oi_lookup and OI scrub.
* The OI scrub finished just after osd_oi_lookup() failure.
if (!S_ISDIR(inode->i_mode))
rc = 0;
else
- rc = osd_check_lmv(oti, dev, inode, oic);
+ rc = osd_check_lmv(oti, dev, inode);
GOTO(out, rc);
}
- if (dev->od_auto_scrub_interval != AS_NEVER && ++once == 1) {
+ if (dev->od_scrub.os_scrub.os_auto_scrub_interval != AS_NEVER &&
+ ++once == 1) {
rc = osd_scrub_start(oti->oti_env, dev, SS_AUTO_PARTIAL |
SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT);
- CDEBUG(D_LFSCK | D_CONSOLE | D_WARNING,
- "%s: trigger partial OI scrub for RPC inconsistency "
- "checking FID "DFID": rc = %d\n",
- osd_dev2name(dev), PFID(fid), rc);
+ CDEBUG_LIMIT(D_LFSCK | D_CONSOLE | D_WARNING,
+ "%s: trigger partial OI scrub for RPC inconsistency, checking FID "DFID"/%u: rc = %d\n",
+ osd_dev2name(dev), PFID(fid), id->oii_ino, rc);
if (rc == 0 || rc == -EALREADY)
goto again;
}
GOTO(out, rc);
out:
- if (inode)
- iput(inode);
+ iput(inode);
RETURN(rc);
}
static int osd_fail_fid_lookup(struct osd_thread_info *oti,
struct osd_device *dev,
- struct osd_idmap_cache *oic,
struct lu_fid *fid, __u32 ino)
{
struct lustre_ost_attrs *loa = &oti->oti_ost_attrs;
+ struct osd_idmap_cache *oic = &oti->oti_cache;
struct inode *inode;
int rc;
struct buffer_head *bh;
struct lu_fid *fid = (struct lu_fid *)rec;
struct htree_lock *hlock = NULL;
+ struct lu_name ln;
int ino;
int rc;
LASSERT(dir->i_op != NULL);
LASSERT(dir->i_op->lookup != NULL);
- dentry = osd_child_dentry_get(env, obj,
- (char *)key, strlen((char *)key));
+ rc = obj_name2lu_name(obj, (char *)key, strlen((char *)key), &ln);
+ if (rc)
+ RETURN(rc);
+
+ dentry = osd_child_dentry_get(env, obj, ln.ln_name, ln.ln_namelen);
if (obj->oo_hl_head != NULL) {
hlock = osd_oti_get(env)->oti_hlock;
if (!IS_ERR(bh)) {
struct osd_thread_info *oti = osd_oti_get(env);
struct osd_inode_id *id = &oti->oti_id;
- struct osd_idmap_cache *oic = &oti->oti_cache;
struct osd_device *dev = osd_obj2dev(obj);
ino = le32_to_cpu(de->inode);
if (OBD_FAIL_CHECK(OBD_FAIL_FID_LOOKUP)) {
brelse(bh);
- rc = osd_fail_fid_lookup(oti, dev, oic, fid, ino);
+ rc = osd_fail_fid_lookup(oti, dev, fid, ino);
GOTO(out, rc);
}
osd_id_gen(id, ino, OSD_OII_NOGEN);
}
- if (rc != 0 || osd_remote_fid(env, dev, fid)) {
- fid_zero(&oic->oic_fid);
-
+ if (rc != 0 || osd_remote_fid(env, dev, fid))
GOTO(out, rc);
- }
- osd_add_oi_cache(osd_oti_get(env), osd_obj2dev(obj), id, fid);
- rc = osd_consistency_check(oti, dev, oic);
- if (rc == -ENOENT)
- fid_zero(&oic->oic_fid);
- else
+ rc = osd_ldiskfs_consistency_check(oti, dev, fid, id);
+ if (rc != -ENOENT) {
/* Other error should not affect lookup result. */
rc = 0;
+
+ /* Normal file mapping should be added into OI cache
+ * after FID in LMA check, but for local files like
+ * hsm_actions, their FIDs are not stored in OI files,
+ * see osd_initial_OI_scrub(), and here is the only
+ * place to load mapping into OI cache.
+ */
+ if (!fid_is_namespace_visible(fid))
+ osd_add_oi_cache(osd_oti_get(env),
+ osd_obj2dev(obj), id, fid);
+ }
} else {
rc = PTR_ERR(bh);
}
ldiskfs_htree_unlock(hlock);
else
up_read(&obj->oo_ext_idx_sem);
- return rc;
+ if (ln.ln_name != (char *)key)
+ kfree(ln.ln_name);
+ RETURN(rc);
}
static int osd_index_declare_ea_insert(const struct lu_env *env,
LASSERT(fid != NULL);
LASSERT(rec1->rec_type != 0);
- oh = container_of0(handle, struct osd_thandle, ot_super);
+ oh = container_of(handle, struct osd_thandle, ot_super);
LASSERT(oh->ot_handle == NULL);
credits = osd_dto_credits_noquota[DTO_INDEX_INSERT];
i_projid_read(inode) != 0)
rc = osd_declare_attr_qid(env, osd_dt_obj(dt), oh,
0, i_projid_read(inode),
- 0, false, PRJQUOTA, true);
+ 0, false, PRJQUOTA);
#endif
}
iput(child_inode);
LASSERT(osd_invariant(obj));
osd_trans_exec_check(env, th, OSD_OT_INSERT);
+
RETURN(rc);
}
}
};
+struct osd_it_ea *osd_it_dir_init(const struct lu_env *env,
+ struct osd_device *dev,
+ struct inode *inode, u32 attr)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct osd_it_ea *oie;
+ struct file *file;
+
+ ENTRY;
+ file = alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
+ inode->i_fop);
+ if (IS_ERR(file))
+ RETURN(ERR_CAST(file));
+
+ /* Only FMODE_64BITHASH or FMODE_32BITHASH should be set, NOT both. */
+ if (attr & LUDA_64BITHASH)
+ file->f_mode |= FMODE_64BITHASH;
+ else
+ file->f_mode |= FMODE_32BITHASH;
+ ihold(inode);
+
+ OBD_SLAB_ALLOC_PTR(oie, osd_itea_cachep);
+ if (!oie)
+ goto out_fput;
+
+ oie->oie_rd_dirent = 0;
+ oie->oie_it_dirent = 0;
+ oie->oie_dirent = NULL;
+ if (unlikely(!info->oti_it_ea_buf_used)) {
+ oie->oie_buf = info->oti_it_ea_buf;
+ info->oti_it_ea_buf_used = 1;
+ } else {
+ OBD_ALLOC(oie->oie_buf, OSD_IT_EA_BUFSIZE);
+ if (!oie->oie_buf)
+ goto out_free;
+ }
+ oie->oie_obj = NULL;
+ oie->oie_file = file;
+
+ RETURN(oie);
+
+out_free:
+ OBD_SLAB_FREE_PTR(oie, osd_itea_cachep);
+out_fput:
+ fput(file);
+
+ return ERR_PTR(-ENOMEM);
+}
/**
* Creates or initializes iterator context.
__u32 attr)
{
struct osd_object *obj = osd_dt_obj(dt);
- struct osd_thread_info *info = osd_oti_get(env);
- struct osd_it_ea *oie;
- struct file *file;
+ struct osd_device *dev = osd_obj2dev(obj);
struct lu_object *lo = &dt->do_lu;
- struct dentry *obj_dentry;
+ struct osd_it_ea *oie;
ENTRY;
if (!dt_object_exists(dt) || obj->oo_destroyed)
RETURN(ERR_PTR(-ENOENT));
- OBD_SLAB_ALLOC_PTR_GFP(oie, osd_itea_cachep, GFP_NOFS);
- if (oie == NULL)
- RETURN(ERR_PTR(-ENOMEM));
- obj_dentry = &oie->oie_dentry;
-
- obj_dentry->d_inode = obj->oo_inode;
- obj_dentry->d_sb = osd_sb(osd_obj2dev(obj));
- obj_dentry->d_name.hash = 0;
+ oie = osd_it_dir_init(env, dev, obj->oo_inode, attr);
+ if (IS_ERR(oie))
+ RETURN(ERR_CAST(oie));
- oie->oie_rd_dirent = 0;
- oie->oie_it_dirent = 0;
- oie->oie_dirent = NULL;
- if (unlikely(!info->oti_it_ea_buf_used)) {
- oie->oie_buf = info->oti_it_ea_buf;
- info->oti_it_ea_buf_used = 1;
- } else {
- OBD_ALLOC(oie->oie_buf, OSD_IT_EA_BUFSIZE);
- if (oie->oie_buf == NULL)
- RETURN(ERR_PTR(-ENOMEM));
- }
oie->oie_obj = obj;
+ lu_object_get(lo);
+ RETURN((struct dt_it *)oie);
+}
- file = &oie->oie_file;
+void osd_it_dir_fini(const struct lu_env *env, struct osd_it_ea *oie,
+ struct inode *inode)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
- /* Only FMODE_64BITHASH or FMODE_32BITHASH should be set, NOT both. */
- if (attr & LUDA_64BITHASH)
- file->f_mode = FMODE_64BITHASH;
+ ENTRY;
+ fput(oie->oie_file);
+ if (unlikely(oie->oie_buf != info->oti_it_ea_buf))
+ OBD_FREE(oie->oie_buf, OSD_IT_EA_BUFSIZE);
else
- file->f_mode = FMODE_32BITHASH;
- file->f_path.dentry = obj_dentry;
- file->f_mapping = obj->oo_inode->i_mapping;
- file->f_op = obj->oo_inode->i_fop;
- set_file_inode(file, obj->oo_inode);
-
- lu_object_get(lo);
- RETURN((struct dt_it *)oie);
+ info->oti_it_ea_buf_used = 0;
+ OBD_SLAB_FREE_PTR(oie, osd_itea_cachep);
+ EXIT;
}
/**
*/
static void osd_it_ea_fini(const struct lu_env *env, struct dt_it *di)
{
- struct osd_thread_info *info = osd_oti_get(env);
struct osd_it_ea *oie = (struct osd_it_ea *)di;
struct osd_object *obj = oie->oie_obj;
struct inode *inode = obj->oo_inode;
ENTRY;
- oie->oie_file.f_op->release(inode, &oie->oie_file);
+ osd_it_dir_fini(env, (struct osd_it_ea *)di, inode);
osd_object_put(env, obj);
- if (unlikely(oie->oie_buf != info->oti_it_ea_buf))
- OBD_FREE(oie->oie_buf, OSD_IT_EA_BUFSIZE);
- else
- info->oti_it_ea_buf_used = 0;
- OBD_SLAB_FREE_PTR(oie, osd_itea_cachep);
EXIT;
}
ENTRY;
LASSERT(((const char *)key)[0] == '\0');
- it->oie_file.f_pos = 0;
+ it->oie_file->f_pos = 0;
it->oie_rd_dirent = 0;
it->oie_it_dirent = 0;
it->oie_dirent = NULL;
* \retval 1 on buffer full
*/
#ifdef HAVE_FILLDIR_USE_CTX
-static int osd_ldiskfs_filldir(struct dir_context *buf,
+static int osd_ldiskfs_filldir(struct dir_context *ctx,
#else
-static int osd_ldiskfs_filldir(void *buf,
+static int osd_ldiskfs_filldir(void *ctx,
#endif
const char *name, int namelen,
loff_t offset, __u64 ino, unsigned int d_type)
{
- struct osd_it_ea *it = ((struct osd_filldir_cbs *)buf)->it;
+ struct osd_it_ea *it = ((struct osd_filldir_cbs *)ctx)->it;
struct osd_object *obj = it->oie_obj;
struct osd_it_ea_dirent *ent = it->oie_dirent;
struct lu_fid *fid = &ent->oied_fid;
+ char *buf = it->oie_buf;
struct osd_fid_pack *rec;
-
ENTRY;
-/* this should never happen */
+ /* this should never happen */
if (unlikely(namelen == 0 || namelen > LDISKFS_NAME_LEN)) {
CERROR("ldiskfs return invalid namelen %d\n", namelen);
RETURN(-EIO);
}
- if ((void *)ent - it->oie_buf + sizeof(*ent) + namelen >
- OSD_IT_EA_BUFSIZE)
+ /* Check for enough space. Note oied_name is not NUL terminated. */
+ if (&ent->oied_name[namelen] > buf + OSD_IT_EA_BUFSIZE)
RETURN(1);
/* "." is just the object itself. */
if (namelen == 1 && name[0] == '.') {
- *fid = obj->oo_dt.do_lu.lo_header->loh_fid;
+ if (obj != NULL)
+ *fid = obj->oo_dt.do_lu.lo_header->loh_fid;
} else if (d_type & LDISKFS_DIRENT_LUFID) {
rec = (struct osd_fid_pack *)(name + namelen + 1);
if (osd_fid_unpack(fid, rec) != 0)
d_type &= ~LDISKFS_DIRENT_LUFID;
/* NOT export local root. */
- if (unlikely(osd_sb(osd_obj2dev(obj))->s_root->d_inode->i_ino == ino)) {
+ if (obj != NULL &&
+ unlikely(osd_sb(osd_obj2dev(obj))->s_root->d_inode->i_ino == ino)) {
ino = obj->oo_inode->i_ino;
*fid = obj->oo_dt.do_lu.lo_header->loh_fid;
}
+ if (obj == NULL || !(obj->oo_lma_flags & LUSTRE_ENCRYPT_FL)) {
+ ent->oied_namelen = namelen;
+ memcpy(ent->oied_name, name, namelen);
+ } else {
+ int encoded_namelen = critical_chars(name, namelen);
+
+ /* Check again for enough space. */
+ if (&ent->oied_name[encoded_namelen] > buf + OSD_IT_EA_BUFSIZE)
+ RETURN(1);
+
+ ent->oied_namelen = encoded_namelen;
+
+ if (encoded_namelen == namelen)
+ memcpy(ent->oied_name, name, namelen);
+ else
+ critical_encode(name, namelen, ent->oied_name);
+ }
+
ent->oied_ino = ino;
ent->oied_off = offset;
- ent->oied_namelen = namelen;
ent->oied_type = d_type;
- memcpy(ent->oied_name, name, namelen);
-
it->oie_rd_dirent++;
- it->oie_dirent = (void *)ent + cfs_size_round(sizeof(*ent) + namelen);
+ it->oie_dirent = (void *)ent + cfs_size_round(sizeof(*ent) + ent->oied_namelen);
RETURN(0);
}
* \retval -ve on error
* \retval +1 reach the end of entry
*/
-static int osd_ldiskfs_it_fill(const struct lu_env *env,
- const struct dt_it *di)
+int osd_ldiskfs_it_fill(const struct lu_env *env, const struct dt_it *di)
{
struct osd_it_ea *it = (struct osd_it_ea *)di;
struct osd_object *obj = it->oie_obj;
- struct inode *inode = obj->oo_inode;
struct htree_lock *hlock = NULL;
- struct file *filp = &it->oie_file;
+ struct file *filp = it->oie_file;
int rc = 0;
struct osd_filldir_cbs buf = {
.ctx.actor = osd_ldiskfs_filldir,
it->oie_dirent = it->oie_buf;
it->oie_rd_dirent = 0;
- if (obj->oo_hl_head != NULL) {
- hlock = osd_oti_get(env)->oti_hlock;
- ldiskfs_htree_lock(hlock, obj->oo_hl_head,
- inode, LDISKFS_HLOCK_READDIR);
- } else {
- down_read(&obj->oo_ext_idx_sem);
+ if (obj) {
+ if (obj->oo_hl_head != NULL) {
+ hlock = osd_oti_get(env)->oti_hlock;
+ ldiskfs_htree_lock(hlock, obj->oo_hl_head,
+ obj->oo_inode,
+ LDISKFS_HLOCK_READDIR);
+ } else {
+ down_read(&obj->oo_ext_idx_sem);
+ }
}
- rc = osd_security_file_alloc(filp);
- if (rc)
- RETURN(rc);
-
- filp->f_flags |= O_NOATIME;
- filp->f_mode |= FMODE_NONOTIFY;
rc = iterate_dir(filp, &buf.ctx);
if (rc)
- RETURN(rc);
-
- if (hlock != NULL)
- ldiskfs_htree_unlock(hlock);
- else
- up_read(&obj->oo_ext_idx_sem);
+ GOTO(unlock, rc);
if (it->oie_rd_dirent == 0) {
/*
* If it does not get any dirent, it means it has been reached
* to the end of the dir
*/
- it->oie_file.f_pos = ldiskfs_get_htree_eof(&it->oie_file);
+ it->oie_file->f_pos = ldiskfs_get_htree_eof(it->oie_file);
if (rc == 0)
rc = 1;
} else {
it->oie_dirent = it->oie_buf;
it->oie_it_dirent = 1;
}
+unlock:
+ if (obj) {
+ if (hlock != NULL)
+ ldiskfs_htree_unlock(hlock);
+ else
+ up_read(&obj->oo_ext_idx_sem);
+ }
RETURN(rc);
}
it->oie_it_dirent++;
rc = 0;
} else {
- if (it->oie_file.f_pos == ldiskfs_get_htree_eof(&it->oie_file))
+ if (it->oie_file->f_pos == ldiskfs_get_htree_eof(it->oie_file))
rc = 1;
else
rc = osd_ldiskfs_it_fill(env, di);
return it->oie_dirent->oied_namelen;
}
+#if defined LDISKFS_DIR_ENTRY_LEN && defined LDISKFS_DIR_ENTRY_LEN_
+#undef LDISKFS_DIR_REC_LEN
+#define LDISKFS_DIR_REC_LEN(de) LDISKFS_DIR_ENTRY_LEN_((de))
+#endif
+
static inline bool osd_dotdot_has_space(struct ldiskfs_dir_entry_2 *de)
{
if (LDISKFS_DIR_REC_LEN(de) >=
* That means we lose it!
*/
if (rc != 0)
- CDEBUG(D_LFSCK, "%s: fail to reinsert the dirent, "
- "dir = %lu/%u, name = %.*s, "DFID": rc = %d\n",
- osd_ino2name(inode),
- dir->i_ino, dir->i_generation, namelen,
- dentry->d_name.name, PFID(fid), rc);
+ CDEBUG(D_LFSCK,
+ "%s: fail to reinsert the dirent, dir = %lu/%u, name = %.*s, "DFID": rc = %d\n",
+ osd_ino2name(inode), dir->i_ino, dir->i_generation,
+ namelen, dentry->d_name.name, PFID(fid), rc);
RETURN(rc);
}
int rc;
bool dotdot = false;
bool dirty = false;
+ struct lu_name ln;
ENTRY;
RETURN(rc);
}
- dentry = osd_child_dentry_by_inode(env, dir, ent->oied_name,
- ent->oied_namelen);
+ rc = obj_name2lu_name(obj, ent->oied_name, ent->oied_namelen, &ln);
+ if (rc)
+ RETURN(rc);
+
+ dentry = osd_child_dentry_by_inode(env, dir, ln.ln_name, ln.ln_namelen);
rc = osd_get_lma(info, inode, dentry, &info->oti_ost_attrs);
if (rc == -ENODATA || !fid_is_sane(&lma->lma_self_fid))
lma = NULL;
iput(inode);
if (rc >= 0 && !dirty)
dev->od_dirent_journal = 0;
+ if (ln.ln_name != ent->oied_name)
+ kfree(ln.ln_name);
return rc;
}
rc = osd_ea_fid_get(env, obj, ino, fid, id);
}
- } else {
- osd_id_gen(id, ino, OSD_OII_NOGEN);
}
}
it->oie_dirent->oied_namelen,
it->oie_dirent->oied_type, attr);
- if (rc < 0)
- RETURN(rc);
-
- if (osd_remote_fid(env, dev, fid))
- RETURN(0);
-
- if (likely(!(attr & (LUDA_IGNORE | LUDA_UNKNOWN)) && rc == 0))
- osd_add_oi_cache(oti, dev, id, fid);
-
RETURN(rc > 0 ? 0 : rc);
}
int rc;
ENTRY;
- it->oie_file.f_pos = hash;
+ it->oie_file->f_pos = hash;
rc = osd_ldiskfs_it_fill(env, di);
if (rc > 0)
if (info->oti_dio_pages) {
int i;
for (i = 0; i < PTLRPC_MAX_BRW_PAGES; i++) {
- if (info->oti_dio_pages[i])
- __free_page(info->oti_dio_pages[i]);
+ struct page *page = info->oti_dio_pages[i];
+ if (page) {
+ LASSERT(PagePrivate2(page));
+ LASSERT(PageLocked(page));
+ ClearPagePrivate2(page);
+ unlock_page(page);
+ __free_page(page);
+ }
}
- OBD_FREE(info->oti_dio_pages,
- sizeof(struct page *) * PTLRPC_MAX_BRW_PAGES);
+ OBD_FREE_PTR_ARRAY_LARGE(info->oti_dio_pages,
+ PTLRPC_MAX_BRW_PAGES);
}
if (info->oti_inode != NULL)
lu_buf_free(&info->oti_big_buf);
if (idc != NULL) {
LASSERT(info->oti_ins_cache_size > 0);
- OBD_FREE(idc, sizeof(*idc) * info->oti_ins_cache_size);
+ OBD_FREE_PTR_ARRAY_LARGE(idc, info->oti_ins_cache_size);
info->oti_ins_cache = NULL;
info->oti_ins_cache_size = 0;
}
LASSERT(info->oti_r_locks == 0);
LASSERT(info->oti_w_locks == 0);
LASSERT(info->oti_txns == 0);
+ LASSERTF(info->oti_dio_pages_used == 0, "%d\n",
+ info->oti_dio_pages_used);
}
/* type constructor/destructor: osd_type_init, osd_type_fini */
static int osd_fid_init(const struct lu_env *env, struct osd_device *osd)
{
struct seq_server_site *ss = osd_seq_site(osd);
- int rc;
+ int rc = 0;
ENTRY;
if (osd->od_cl_seq == NULL)
RETURN(-ENOMEM);
- rc = seq_client_init(osd->od_cl_seq, NULL, LUSTRE_SEQ_METADATA,
- osd->od_svname, ss->ss_server_seq);
- if (rc != 0) {
- OBD_FREE_PTR(osd->od_cl_seq);
- osd->od_cl_seq = NULL;
- RETURN(rc);
- }
+ seq_client_init(osd->od_cl_seq, NULL, LUSTRE_SEQ_METADATA,
+ osd->od_svname, ss->ss_server_seq);
if (ss->ss_node_id == 0) {
/*
if (o->od_mnt != NULL) {
shrink_dcache_sb(osd_sb(o));
osd_sync(env, &o->od_dt_dev);
+ wait_event(o->od_commit_cb_done,
+ !atomic_read(&o->od_commit_cb_in_flight));
mntput(o->od_mnt);
o->od_mnt = NULL;
EXIT;
}
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 53, 0)
+# ifndef LDISKFS_HAS_INCOMPAT_FEATURE
+/* Newer kernels provide the ldiskfs_set_feature_largedir() wrapper already,
+ * which calls ldiskfs_update_dynamic_rev() to update ancient filesystems.
+ * All ldiskfs filesystems are already v2, so it is a no-op and unnecessary.
+ * This avoids maintaining patches to export this otherwise-useless function.
+ */
+void ldiskfs_update_dynamic_rev(struct super_block *sb)
+{
+ /* do nothing */
+}
+# endif
+#endif
+
static int osd_mount(const struct lu_env *env,
struct osd_device *o, struct lustre_cfg *cfg)
{
const char *name = lustre_cfg_string(cfg, 0);
const char *dev = lustre_cfg_string(cfg, 1);
const char *opts;
- unsigned long page, s_flags, lmd_flags = 0;
+ unsigned long page, s_flags = 0, lmd_flags = 0;
struct page *__page;
struct file_system_type *type;
char *options = NULL;
- char *str;
+ const char *str;
struct osd_thread_info *info = osd_oti_get(env);
struct lu_fid *fid = &info->oti_fid;
struct inode *inode;
- int rc = 0, force_over_512tb = 0;
+ int rc = 0, force_over_1024tb = 0;
ENTRY;
RETURN(-E2BIG);
strcpy(o->od_mntdev, dev);
- str = lustre_cfg_string(cfg, 2);
- s_flags = simple_strtoul(str, NULL, 0);
- str = strstr(str, ":");
- if (str)
- lmd_flags = simple_strtoul(str + 1, NULL, 0);
+ str = lustre_cfg_buf(cfg, 2);
+ sscanf(str, "%lu:%lu", &s_flags, &lmd_flags);
+
opts = lustre_cfg_string(cfg, 3);
#ifdef __BIG_ENDIAN
if (opts == NULL || strstr(opts, "bigendian_extents") == NULL) {
#endif
#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
if (opts != NULL && strstr(opts, "force_over_128tb") != NULL) {
- CWARN("force_over_128tb option is deprecated. "
- "Filesystems less than 512TB can be created without any "
- "force options. Use force_over_512tb option for "
- "filesystems greater than 512TB.\n");
+ CWARN("force_over_128tb option is deprecated. Filesystems smaller than 1024TB can be created without any force option. Use force_over_1024tb option for filesystems larger than 1024TB.\n");
}
#endif
#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 1, 53, 0)
if (opts != NULL && strstr(opts, "force_over_256tb") != NULL) {
- CWARN("force_over_256tb option is deprecated. "
- "Filesystems less than 512TB can be created without any "
- "force options. Use force_over_512tb option for "
- "filesystems greater than 512TB.\n");
+ CWARN("force_over_256tb option is deprecated. Filesystems smaller than 1024TB can be created without any force options. Use force_over_1024tb option for filesystems larger than 1024TB.\n");
+ }
+#endif
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 53, 0)
+ if (opts != NULL && strstr(opts, "force_over_512tb") != NULL) {
+ CWARN("force_over_512tb option is deprecated. Filesystems smaller than 1024TB can be created without any force options. Use force_over_1024tb option for filesystems larger than 1024TB.\n");
}
#endif
- if (opts != NULL && strstr(opts, "force_over_512tb") != NULL)
- force_over_512tb = 1;
+ if (opts != NULL && strstr(opts, "force_over_1024tb") != NULL)
+ force_over_1024tb = 1;
__page = alloc_page(GFP_KERNEL);
if (__page == NULL)
*options = '\0';
if (opts != NULL) {
/* strip out the options for back compatiblity */
- static char *sout[] = {
+ static const char * const sout[] = {
"mballoc",
"iopen",
"noiopen",
"force_over_128tb",
"force_over_256tb",
"force_over_512tb",
+ "force_over_1024tb",
+ "resetoi",
NULL
};
strncat(options, opts, PAGE_SIZE);
}
if (ldiskfs_blocks_count(LDISKFS_SB(osd_sb(o))->s_es) <<
- osd_sb(o)->s_blocksize_bits > 512ULL << 40 &&
- force_over_512tb == 0) {
- CERROR("%s: device %s LDISKFS does not support filesystems "
- "greater than 512TB and can cause data corruption. "
- "Use \"force_over_512tb\" mount option to override.\n",
+ osd_sb(o)->s_blocksize_bits > 1024ULL << 40 &&
+ force_over_1024tb == 0) {
+ CERROR("%s: device %s LDISKFS has not been tested on filesystems larger than 1024TB and may cause data corruption. Use 'force_over_1024tb' mount option to override.\n",
name, dev);
GOTO(out_mnt, rc = -EINVAL);
}
if (lmd_flags & LMD_FLG_DEV_RDONLY) {
- if (priv_dev_set_rdonly) {
- priv_dev_set_rdonly(osd_sb(o)->s_bdev);
- o->od_dt_dev.dd_rdonly = 1;
- LCONSOLE_WARN("%s: set dev_rdonly on this device\n",
- name);
- } else {
- LCONSOLE_WARN("%s: not support dev_rdonly on this device",
- name);
-
- GOTO(out_mnt, rc = -EOPNOTSUPP);
- }
- } else if (priv_dev_check_rdonly &&
- priv_dev_check_rdonly(osd_sb(o)->s_bdev)) {
- CERROR("%s: underlying device %s is marked as "
- "read-only. Setup failed\n", name, dev);
+ LCONSOLE_WARN("%s: not support dev_rdonly on this device\n",
+ name);
- GOTO(out_mnt, rc = -EROFS);
+ GOTO(out_mnt, rc = -EOPNOTSUPP);
}
if (!ldiskfs_has_feature_journal(o->od_mnt->mnt_sb)) {
GOTO(out_mnt, rc = -EINVAL);
}
+ if (ldiskfs_has_feature_fast_commit(o->od_mnt->mnt_sb)) {
+ CERROR("%s: device %s is mounted with fast_commit that breaks recovery\n",
+ name, dev);
+ GOTO(out_mnt, rc = -EOPNOTSUPP);
+ }
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 53, 0)
#ifdef LDISKFS_MOUNT_DIRDATA
if (ldiskfs_has_feature_dirdata(o->od_mnt->mnt_sb))
LDISKFS_SB(osd_sb(o))->s_mount_opt |= LDISKFS_MOUNT_DIRDATA;
"downgrade to Lustre-1.x again, you can enable it via "
"'tune2fs -O dirdata device'\n", name, dev);
#endif
+ /* enable large_dir on MDTs to avoid REMOTE_PARENT_DIR overflow,
+ * and on very large OSTs to avoid object directory overflow */
+ if (unlikely(!ldiskfs_has_feature_largedir(o->od_mnt->mnt_sb) &&
+ !strstr(name, "MGS"))) {
+ ldiskfs_set_feature_largedir(o->od_mnt->mnt_sb);
+ LCONSOLE_INFO("%s: enabled 'large_dir' feature on device %s\n",
+ name, dev);
+ }
+#endif
inode = osd_sb(o)->s_root->d_inode;
lu_local_obj_fid(fid, OSD_FS_ROOT_OID);
rc = osd_ea_fid_set(info, inode, fid, LMAC_NOT_IN_OI, 0);
}
if (lmd_flags & LMD_FLG_NOSCRUB)
- o->od_auto_scrub_interval = AS_NEVER;
+ o->od_scrub.os_scrub.os_auto_scrub_interval = AS_NEVER;
if (blk_queue_nonrot(bdev_get_queue(osd_sb(o)->s_bdev))) {
/* do not use pagecache with flash-backed storage */
osd_index_backup(env, o, false);
osd_shutdown(env, o);
osd_procfs_fini(o);
+ if (o->od_oi_table != NULL)
+ osd_oi_fini(osd_oti_get(env), o);
+ if (o->od_extent_bytes_percpu)
+ free_percpu(o->od_extent_bytes_percpu);
osd_obj_map_fini(o);
osd_umount(env, o);
{
struct lu_device *l = osd2lu_dev(o);
struct osd_thread_info *info;
- int rc;
int cplen = 0;
+ char *opts = NULL;
+ bool restored = false;
+ int rc;
/* if the module was re-loaded, env can loose its keys */
rc = lu_env_refill((struct lu_env *)env);
spin_lock_init(&o->od_lock);
o->od_index_backup_policy = LIBP_NONE;
o->od_t10_type = 0;
+ init_waitqueue_head(&o->od_commit_cb_done);
o->od_read_cache = 1;
o->od_writethrough_cache = 1;
+ o->od_enable_projid_xattr = 0;
o->od_readcache_max_filesize = OSD_MAX_CACHE_SIZE;
o->od_readcache_max_iosize = OSD_READCACHE_MAX_IO_MB << 20;
o->od_writethrough_max_iosize = OSD_WRITECACHE_MAX_IO_MB << 20;
- o->od_auto_scrub_interval = AS_DEFAULT;
+ o->od_scrub.os_scrub.os_auto_scrub_interval = AS_DEFAULT;
+ /* default fallocate to unwritten extents: LU-14326/LU-14333 */
+ o->od_fallocate_zero_blocks = 0;
cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4),
sizeof(o->od_svname));
if (rc != 0)
GOTO(out_site, rc);
+ opts = lustre_cfg_string(cfg, 3);
+ if (opts && strstr(opts, "resetoi"))
+ restored = true;
+
INIT_LIST_HEAD(&o->od_ios_list);
+
+ rc = lprocfs_init_brw_stats(&o->od_brw_stats);
+ if (rc)
+ GOTO(out_brw_stats, rc);
+
/* setup scrub, including OI files initialization */
o->od_in_init = 1;
- rc = osd_scrub_setup(env, o);
+ rc = osd_scrub_setup(env, o, restored);
o->od_in_init = 0;
if (rc < 0)
- GOTO(out_site, rc);
+ GOTO(out_brw_stats, rc);
rc = osd_procfs_init(o, o->od_svname);
if (rc != 0) {
/* currently it's no need to prepare qsd_instance_md for OST */
if (!o->od_is_ost) {
o->od_quota_slave_md = qsd_init(env, o->od_svname,
- &o->od_dt_dev,
- o->od_proc_entry, true);
+ &o->od_dt_dev, o->od_proc_entry,
+ true, true);
if (IS_ERR(o->od_quota_slave_md)) {
rc = PTR_ERR(o->od_quota_slave_md);
o->od_quota_slave_md = NULL;
}
o->od_quota_slave_dt = qsd_init(env, o->od_svname, &o->od_dt_dev,
- o->od_proc_entry, false);
+ o->od_proc_entry, false, true);
if (IS_ERR(o->od_quota_slave_dt)) {
if (o->od_quota_slave_md != NULL) {
GOTO(out_procfs, rc);
}
+ o->od_extent_bytes_percpu = alloc_percpu(unsigned int);
+ if (!o->od_extent_bytes_percpu) {
+ rc = -ENOMEM;
+ GOTO(out_procfs, rc);
+ }
+
RETURN(0);
out_procfs:
osd_procfs_fini(o);
out_scrub:
osd_scrub_cleanup(env, o);
+out_brw_stats:
+ lprocfs_fini_brw_stats(&o->od_brw_stats);
out_site:
lu_site_fini(&o->od_site);
out_compat:
/* XXX: make osd top device in order to release reference */
d->ld_site->ls_top_dev = d;
lu_site_purge(env, d->ld_site, -1);
- if (!cfs_hash_is_empty(d->ld_site->ls_obj_hash)) {
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
- lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer);
- }
+ lu_site_print(env, d->ld_site, &d->ld_site->ls_obj_hash.nelems,
+ D_ERROR, lu_cdebug_printer);
lu_site_fini(&o->od_site);
dt_device_fini(&o->od_dt_dev);
OBD_FREE_PTR(o);
RETURN(result);
}
-static int osd_fid_alloc(const struct lu_env *env, struct obd_export *exp,
- struct lu_fid *fid, struct md_op_data *op_data)
+/**
+ * Implementation of lu_device_operations::ldo_fid_alloc() for OSD
+ *
+ * Allocate FID.
+ *
+ * see include/lu_object.h for the details.
+ */
+static int osd_fid_alloc(const struct lu_env *env, struct lu_device *d,
+ struct lu_fid *fid, struct lu_object *parent,
+ const struct lu_name *name)
{
- struct osd_device *osd = osd_dev(exp->exp_obd->obd_lu_dev);
+ struct osd_device *osd = osd_dev(d);
return seq_client_alloc_fid(env, osd->od_cl_seq, fid);
}
.ldo_process_config = osd_process_config,
.ldo_recovery_complete = osd_recovery_complete,
.ldo_prepare = osd_prepare,
+ .ldo_fid_alloc = osd_fid_alloc,
};
static const struct lu_device_type_operations osd_device_type_ops = {
/*
* lprocfs legacy support.
*/
-static struct obd_ops osd_obd_device_ops = {
+static const struct obd_ops osd_obd_device_ops = {
.o_owner = THIS_MODULE,
.o_connect = osd_obd_connect,
.o_disconnect = osd_obd_disconnect,
- .o_fid_alloc = osd_fid_alloc,
.o_health_check = osd_health_check,
};
struct kobject *kobj;
int rc;
- CLASSERT(BH_DXLock < sizeof(((struct buffer_head *)0)->b_state) * 8);
+ BUILD_BUG_ON(BH_DXLock >=
+ sizeof(((struct buffer_head *)0)->b_state) * 8);
#if !defined(CONFIG_DEBUG_MUTEXES) && !defined(CONFIG_DEBUG_SPINLOCK)
/* please, try to keep osd_thread_info smaller than a page */
- CLASSERT(sizeof(struct osd_thread_info) <= PAGE_SIZE);
+ BUILD_BUG_ON(sizeof(struct osd_thread_info) > PAGE_SIZE);
#endif
osd_oi_mod_init();
if (rc)
return rc;
-#ifdef CONFIG_KALLSYMS
- priv_security_file_alloc =
- (void *)kallsyms_lookup_name("security_file_alloc");
- priv_dev_set_rdonly = (void *)kallsyms_lookup_name("dev_set_rdonly");
- priv_dev_check_rdonly =
- (void *)kallsyms_lookup_name("dev_check_rdonly");
-#endif
-
- rc = class_register_type(&osd_obd_device_ops, NULL, true, NULL,
+ rc = class_register_type(&osd_obd_device_ops, NULL, true,
LUSTRE_OSD_LDISKFS_NAME, &osd_device_type);
if (rc) {
lu_kmem_fini(ldiskfs_caches);
rc = 0;
}
}
+
return rc;
}