AC_DEFINE(HAVE_DMU_PREFETCH_6ARG, 1,
[Have 6 argument dmu_pretch in ZFS])
])
+ dnl #
+ dnl # ZFS 0.7.0 feature: SPA_FEATURE_USEROBJ_ACCOUNTING
+ dnl #
+ LB_CHECK_COMPILE([if zfs has native dnode accounting supported],
+ dmu_objset_userobjspace_upgrade, [
+ #include <sys/dmu_objset.h>
+ ],[
+ dmu_objset_userobjspace_upgrade(NULL);
+ ],[
+ AC_DEFINE(HAVE_DMU_USEROBJ_ACCOUNTING, 1,
+ [Have native dnode accounting in ZFS])
+ ])
])
AM_CONDITIONAL(ZFS_ENABLED, [test "x$enable_zfs" = xyes])
RETURN(0);
}
- /* When doing our own inode accounting, the ZAPs storing per-uid/gid
- * usage are updated at operation execution time, so we should call
- * qsd_op_end() straight away. Otherwise (for blk accounting maintained
- * by ZFS and when #inode is estimated from #blks) accounting is updated
- * at commit time and the call to qsd_op_end() must be delayed */
- if (oh->ot_quota_trans.lqt_id_cnt > 0 &&
- !oh->ot_quota_trans.lqt_ids[0].lqi_is_blk &&
- !osd->od_quota_iused_est)
- qsd_op_end(env, osd->od_quota_slave, &oh->ot_quota_trans);
-
rc = dt_txn_hook_stop(env, th);
if (rc != 0)
CDEBUG(D_OTHER, "%s: transaction hook failed: rc = %d\n",
if (rc)
GOTO(err, rc);
- /* Use our own ZAP for inode accounting by default, this can be changed
- * via procfs to estimate the inode usage from the block usage */
- o->od_quota_iused_est = 0;
-
rc = osd_procfs_init(o, o->od_svname);
if (rc)
GOTO(err, rc);
#include <sys/zfs_znode.h>
#include <sys/zap.h>
#include <sys/dbuf.h>
+#include <sys/dmu_objset.h>
/**
* By design including kmem.h overrides the Linux slab interfaces to provide
unsigned int od_dev_set_rdonly:1, /**< osd_ro() called */
od_prop_rdonly:1, /**< ZFS property readonly */
od_xattr_in_sa:1,
- od_quota_iused_est:1,
od_is_ost:1,
od_posix_acl:1;
DB_DNODE_EXIT(db);
dmu_buf_rele(&db->db, osd_obj_tag);
}
+
+#ifdef HAVE_DMU_USEROBJ_ACCOUNTING
+
+#define OSD_DMU_USEROBJ_PREFIX DMU_OBJACCT_PREFIX
+
+static inline bool osd_dmu_userobj_accounting_available(struct osd_device *osd)
+{
+ if (unlikely(dmu_objset_userobjspace_upgradable(osd->od_os)))
+ dmu_objset_userobjspace_upgrade(osd->od_os);
+
+ return dmu_objset_userobjspace_present(osd->od_os);
+}
+#else
+
+#define OSD_DMU_USEROBJ_PREFIX "obj-"
+
+static inline bool osd_dmu_userobj_accounting_available(struct osd_device *osd)
+{
+ return false;
+}
+#endif /* #ifdef HAVE_DMU_USEROBJ_ACCOUNTING */
+
#endif /* _OSD_INTERNAL_H */
}
LPROC_SEQ_FOPS_WO_TYPE(zfs, osd_force_sync);
-static int zfs_osd_iused_est_seq_show(struct seq_file *m, void *data)
-{
- struct osd_device *osd = osd_dt_dev((struct dt_device *)m->private);
- LASSERT(osd != NULL);
-
- seq_printf(m, "%d\n", osd->od_quota_iused_est);
- return 0;
-}
-
-static ssize_t
-zfs_osd_iused_est_seq_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
-{
- struct seq_file *m = file->private_data;
- struct dt_device *dt = m->private;
- struct osd_device *osd = osd_dt_dev(dt);
- int rc;
- __s64 val;
-
- LASSERT(osd != NULL);
-
- rc = lprocfs_str_to_s64(buffer, count, &val);
- if (rc)
- return rc;
-
- osd->od_quota_iused_est = !!val;
-
- return count;
-}
-LPROC_SEQ_FOPS(zfs_osd_iused_est);
-
LPROC_SEQ_FOPS_RO_TYPE(zfs, dt_blksize);
LPROC_SEQ_FOPS_RO_TYPE(zfs, dt_kbytestotal);
LPROC_SEQ_FOPS_RO_TYPE(zfs, dt_kbytesfree);
.fops = &zfs_osd_mntdev_fops },
{ .name = "force_sync",
.fops = &zfs_osd_force_sync_fops },
- { .name = "quota_iused_estimate",
- .fops = &zfs_osd_iused_est_fops },
{ 0 }
};
osd_declare_xattrs_destroy(env, obj, oh);
- /* declare that we'll remove object from inode accounting ZAPs */
- dmu_tx_hold_zap(oh->ot_tx, osd->od_iusr_oid, FALSE, NULL);
- dmu_tx_hold_zap(oh->ot_tx, osd->od_igrp_oid, FALSE, NULL);
-
/* one less inode */
rc = osd_declare_quota(env, osd, obj->oo_attr.la_uid,
obj->oo_attr.la_gid, -1, oh, false, NULL, false);
GOTO(out, rc);
}
- /* Remove object from inode accounting. It is not fatal for the destroy
- * operation if something goes wrong while updating accounting, but we
- * still log an error message to notify the administrator */
- rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid,
- obj->oo_attr.la_uid, -1, oh->ot_tx);
- if (rc)
- CERROR("%s: failed to remove "DFID" from accounting ZAP for usr"
- " %d: rc = %d\n", osd->od_svname, PFID(fid),
- obj->oo_attr.la_uid, rc);
- rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid,
- obj->oo_attr.la_gid, -1, oh->ot_tx);
- if (rc)
- CERROR("%s: failed to remove "DFID" from accounting ZAP for grp"
- " %d: rc = %d\n", osd->od_svname, PFID(fid),
- obj->oo_attr.la_gid, rc);
-
oid = obj->oo_dn->dn_object;
if (unlikely(obj->oo_destroy == OSD_DESTROY_NONE)) {
/* this may happen if the destroy wasn't declared
}
if (attr && attr->la_valid & LA_UID) {
- /* account for user inode tracking ZAP update */
- dmu_tx_hold_zap(oh->ot_tx, osd->od_iusr_oid, FALSE, NULL);
-
/* quota enforcement for user */
if (attr->la_uid != obj->oo_attr.la_uid) {
rc = qsd_transfer(env, osd->od_quota_slave,
}
}
if (attr && attr->la_valid & LA_GID) {
- /* account for user inode tracking ZAP update */
- dmu_tx_hold_zap(oh->ot_tx, osd->od_igrp_oid, FALSE, NULL);
-
/* quota enforcement for group */
if (attr->la_gid != obj->oo_attr.la_gid) {
rc = qsd_transfer(env, osd->od_quota_slave,
}
}
- /* do both accounting updates outside oo_attr_lock below */
- if ((valid & LA_UID) && (la->la_uid != obj->oo_attr.la_uid)) {
- /* Update user accounting. Failure isn't fatal, but we still
- * log an error message */
- rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid,
- la->la_uid, 1, oh->ot_tx);
- if (rc)
- CERROR("%s: failed to update accounting ZAP for user "
- "%d (%d)\n", osd->od_svname, la->la_uid, rc);
- rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid,
- obj->oo_attr.la_uid, -1, oh->ot_tx);
- if (rc)
- CERROR("%s: failed to update accounting ZAP for user "
- "%d (%d)\n", osd->od_svname,
- obj->oo_attr.la_uid, rc);
- }
- if ((valid & LA_GID) && (la->la_gid != obj->oo_attr.la_gid)) {
- /* Update group accounting. Failure isn't fatal, but we still
- * log an error message */
- rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid,
- la->la_gid, 1, oh->ot_tx);
- if (rc)
- CERROR("%s: failed to update accounting ZAP for user "
- "%d (%d)\n", osd->od_svname, la->la_gid, rc);
- rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid,
- obj->oo_attr.la_gid, -1, oh->ot_tx);
- if (rc)
- CERROR("%s: failed to update accounting ZAP for user "
- "%d (%d)\n", osd->od_svname,
- obj->oo_attr.la_gid, rc);
- }
-
write_lock(&obj->oo_attr_lock);
cnt = 0;
if (valid & LA_ATIME) {
zapid = osd_get_name_n_idx(env, osd, fid, NULL, 0);
dmu_tx_hold_zap(oh->ot_tx, zapid, TRUE, NULL);
- /* we will also update inode accounting ZAPs */
- dmu_tx_hold_zap(oh->ot_tx, osd->od_iusr_oid, FALSE, NULL);
- dmu_tx_hold_zap(oh->ot_tx, osd->od_igrp_oid, FALSE, NULL);
-
/* will help to find FID->ino mapping at dt_insert() */
osd_idc_find_and_init(env, osd, obj);
LASSERT(th != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
- /*
- * XXX missing: Quote handling.
- */
-
LASSERT(obj->oo_dn == NULL);
/* to follow ZFS on-disk format we need
GOTO(out, rc);
osd_idc_find_and_init(env, osd, obj);
- /* Add new object to inode accounting.
- * Errors are not considered as fatal */
- rc = -zap_increment_int(osd->od_os, osd->od_iusr_oid,
- (attr->la_valid & LA_UID) ? attr->la_uid : 0, 1,
- oh->ot_tx);
- if (rc)
- CERROR("%s: failed to add "DFID" to accounting ZAP for usr %d "
- "(%d)\n", osd->od_svname, PFID(fid), attr->la_uid, rc);
- rc = -zap_increment_int(osd->od_os, osd->od_igrp_oid,
- (attr->la_valid & LA_GID) ? attr->la_gid : 0, 1,
- oh->ot_tx);
- if (rc)
- CERROR("%s: failed to add "DFID" to accounting ZAP for grp %d "
- "(%d)\n", osd->od_svname, PFID(fid), attr->la_gid, rc);
-
out:
if (unlikely(rc && dn)) {
dmu_object_free(osd->od_os, dn->dn_object, oh->ot_tx);
{
struct osd_thread_info *info = osd_oti_get(env);
char *buf = info->oti_buf;
+ size_t buflen = sizeof(info->oti_buf);
struct lquota_acct_rec *rec = (struct lquota_acct_rec *)dtrec;
struct osd_object *obj = osd_dt_obj(dtobj);
struct osd_device *osd = osd_obj2dev(obj);
rec->bspace = rec->ispace = 0;
/* convert the 64-bit uid/gid into a string */
- sprintf(buf, "%llx", *((__u64 *)dtkey));
+ snprintf(buf, buflen, "%llx", *((__u64 *)dtkey));
/* fetch DMU object ID (DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT) to be
* used */
oid = osd_quota_fid2dmu(lu_object_fid(&dtobj->do_lu));
/* disk usage (in bytes) is maintained by DMU.
* DMU_USERUSED_OBJECT/DMU_GROUPUSED_OBJECT are special objects which
- * not associated with any dmu_but_t (see dnode_special_open()).
- * As a consequence, we cannot use udmu_zap_lookup() here since it
- * requires a valid oo_dn. */
- rc = -zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1,
+ * not associated with any dmu_but_t (see dnode_special_open()). */
+ rc = zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1,
&rec->bspace);
- if (rc == -ENOENT)
+ if (rc == -ENOENT) {
/* user/group has not created anything yet */
CDEBUG(D_QUOTA, "%s: id %s not found in DMU accounting ZAP\n",
osd->od_svname, buf);
- else if (rc)
+ } else if (rc) {
RETURN(rc);
+ }
- if (osd->od_quota_iused_est) {
+ if (!osd_dmu_userobj_accounting_available(osd)) {
if (rec->bspace != 0)
/* estimate #inodes in use */
rec->ispace = osd_objset_user_iused(osd, rec->bspace);
- RETURN(+1);
+ rc = 1;
+ } else {
+ snprintf(buf, buflen, OSD_DMU_USEROBJ_PREFIX "%llx",
+ *((__u64 *)dtkey));
+ rc = zap_lookup(osd->od_os, oid, buf, sizeof(uint64_t), 1,
+ &rec->ispace);
+ if (rc == -ENOENT) {
+ CDEBUG(D_QUOTA,
+ "%s: id %s not found dnode accounting\n",
+ osd->od_svname, buf);
+ } else if (rc == 0) {
+ rc = 1;
+ }
}
- /* as for inode accounting, it is not maintained by DMU, so we just
- * use our own ZAP to track inode usage */
- rc = -zap_lookup(osd->od_os, obj->oo_dn->dn_object,
- buf, sizeof(uint64_t), 1, &rec->ispace);
- if (rc == -ENOENT)
- /* user/group has not created any file yet */
- CDEBUG(D_QUOTA, "%s: id %s not found in accounting ZAP\n",
- osd->od_svname, buf);
- else if (rc)
- RETURN(rc);
-
- RETURN(+1);
+ RETURN(rc);
}
/**
if (rc)
RETURN(rc);
- if (osd->od_quota_iused_est) {
+ if (!osd_dmu_userobj_accounting_available(osd)) {
if (rec->bspace != 0)
/* estimate #inodes in use */
rec->ispace = osd_objset_user_iused(osd, rec->bspace);
local qval
local cmd
+ # LU-2435: if the underlying zfs doesn't support userobj_accounting,
+ # lustre will estimate the object count usage. This fails quota
+ # verification in 32b. The object quota usage should be accurate after
+ # zfs-0.7.0 is released.
+ [ $fstype == "zfs" ] && {
+ local zfs_version=$(do_node $node cat /sys/module/zfs/version)
+
+ [ $(version_code $zfs_version) -lt $(version_code 0.7.0) ] && {
+ echo "Skip quota verify for zfs: $zfs_version"
+ return 0
+ }
+ }
+
$LFS quota -u $T32_QID -v $mnt
qval=$($LFS quota -v -u $T32_QID $mnt |
$ZPOOL import -f -d $tmp $poolname"
done
+ # upgrade zpool to latest supported features, including
+ # dnode quota accounting in 0.7.0
+ $r "$ZPOOL upgrade -a"
+
mdt_dev=t32fs-mdt1/mdt1
ost_dev=t32fs-ost1/ost1
! $mdt2_is_available || mdt2_dev=t32fs-mdt2/mdt2
if [[ $fstype == zfs ]]; then
local poolname=t32fs-mdt1
$r "modprobe zfs;
- $ZPOOL list -H $poolname >/dev/null 2>&1 ||
+ $ZPOOL list -H $poolname >/dev/null 2>&1 ||
$ZPOOL import -f -d $tmp $poolname"
+
+ # upgrade zpool to latest supported features,
+ # including dnode quota accounting in 0.7.0
+ $r "$ZPOOL upgrade $poolname"
fi
# mount a second time to make sure we didnt leave upgrade flag on
test_7e() {
[ "$MDSCOUNT" -lt "2" ] && skip "Required more MDTs" && return
+ # LU-2435: skip this quota test if underlying zfs version has not
+ # supported native dnode accounting
+ [ "$(facet_fstype mds1)" == "zfs" ] && {
+ local zfs_version=$(do_facet mds1 cat /sys/module/zfs/version)
+
+ [ $(version_code $zfs_version) -lt $(version_code 0.7.0) ] &&
+ skip "requires zfs version at least 0.7.0" && return
+ }
+
local ilimit=$((1024 * 2)) # 2k inodes
local TESTFILE=$DIR/${tdir}-1/$tfile