#include <obd_class.h>
#include <lustre_disk.h>
#include <lustre_fid.h>
-#include <lustre_param.h>
+#include <uapi/linux/lustre_param.h>
#include <md_object.h>
#include "osd_internal.h"
struct lu_env env;
int rc;
+ LASSERT(site->ls_obj_hash);
+
rc = lu_env_init(&env, LCT_SHRINKER);
if (rc) {
CERROR("%s: can't initialize shrinker env: rc = %d\n",
dt_txn_hook_commit(th);
/* call per-transaction callbacks if any */
- list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage)
+ list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage) {
+ LASSERTF(dcb->dcb_magic == TRANS_COMMIT_CB_MAGIC,
+ "commit callback entry: magic=%x name='%s'\n",
+ dcb->dcb_magic, dcb->dcb_name);
+ list_del_init(&dcb->dcb_linkage);
dcb->dcb_func(NULL, th, dcb, error);
+ }
/* Unlike ldiskfs, zfs updates space accounting at commit time.
* As a consequence, op_end is called only now to inform the quota slave
RETURN(rc);
}
-static int osd_unlinked_object_free(struct osd_device *osd, uint64_t oid);
-
-static void osd_unlinked_list_emptify(struct osd_device *osd,
+static void osd_unlinked_list_emptify(const struct lu_env *env,
+ struct osd_device *osd,
struct list_head *list, bool free)
{
struct osd_object *obj;
list_del_init(&obj->oo_unlinked_linkage);
if (free)
- (void)osd_unlinked_object_free(osd, oid);
+ (void)osd_unlinked_object_free(env, osd, oid);
}
}
LASSERT(oh->ot_tx);
dmu_tx_abort(oh->ot_tx);
osd_object_sa_dirty_rele(oh);
- osd_unlinked_list_emptify(osd, &unlinked, false);
+ osd_unlinked_list_emptify(env, osd, &unlinked, false);
/* there won't be any commit, release reserved quota space now,
* if any */
qsd_op_end(env, osd->od_quota_slave, &oh->ot_quota_trans);
RETURN(0);
}
- /* When doing our own inode accounting, the ZAPs storing per-uid/gid
- * usage are updated at operation execution time, so we should call
- * qsd_op_end() straight away. Otherwise (for blk accounting maintained
- * by ZFS and when #inode is estimated from #blks) accounting is updated
- * at commit time and the call to qsd_op_end() must be delayed */
- if (oh->ot_quota_trans.lqt_id_cnt > 0 &&
- !oh->ot_quota_trans.lqt_ids[0].lqi_is_blk &&
- !osd->od_quota_iused_est)
- qsd_op_end(env, osd->od_quota_slave, &oh->ot_quota_trans);
-
rc = dt_txn_hook_stop(env, th);
if (rc != 0)
CDEBUG(D_OTHER, "%s: transaction hook failed: rc = %d\n",
* by osd_trans_commit_cb already. */
dmu_tx_commit(oh->ot_tx);
- osd_unlinked_list_emptify(osd, &unlinked, true);
+ osd_unlinked_list_emptify(env, osd, &unlinked, true);
if (sync)
txg_wait_synced(dmu_objset_pool(osd->od_os), txg);
dmu_tx_t *tx;
ENTRY;
+ if (dt->dd_rdonly) {
+ CERROR("%s: someone try to start transaction under "
+ "readonly mode, should be disabled.\n",
+ osd_name(osd_dt_dev(dt)));
+ dump_stack();
+ RETURN(ERR_PTR(-EROFS));
+ }
+
tx = dmu_tx_create(osd->od_os);
if (tx == NULL)
RETURN(ERR_PTR(-ENOMEM));
CLASSERT(OSD_DNODE_MIN_BLKSHIFT > 0);
CLASSERT(OSD_DNODE_EST_BLKSHIFT > 0);
- est_usedblocks = (usedbytes >> est_maxblockshift) +
- (OSD_DNODE_EST_COUNT >> OSD_DNODE_EST_BLKSHIFT);
- est_usedobjs = usedobjs + OSD_DNODE_EST_COUNT;
+ est_usedblocks = ((OSD_DNODE_EST_COUNT << OSD_DNODE_EST_BLKSHIFT) +
+ usedbytes) >> est_maxblockshift;
+ est_usedobjs = OSD_DNODE_EST_COUNT + usedobjs;
if (est_usedobjs <= est_usedblocks) {
/*
osfs->os_bavail = osfs->os_bfree; /* no extra root reservation */
/* Take replication (i.e. number of copies) into account */
- osfs->os_bavail /= os->os_copies;
+ if (os->os_copies != 0)
+ osfs->os_bavail /= os->os_copies;
/*
* Reserve some space so we don't run into ENOSPC due to grants not
*/
static int osd_sync(const struct lu_env *env, struct dt_device *d)
{
- struct osd_device *osd = osd_dt_dev(d);
- CDEBUG(D_CACHE, "syncing OSD %s\n", LUSTRE_OSD_ZFS_NAME);
- txg_wait_synced(dmu_objset_pool(osd->od_os), 0ULL);
- CDEBUG(D_CACHE, "synced OSD %s\n", LUSTRE_OSD_ZFS_NAME);
+ if (!d->dd_rdonly) {
+ struct osd_device *osd = osd_dt_dev(d);
+
+ CDEBUG(D_CACHE, "syncing OSD %s\n", LUSTRE_OSD_ZFS_NAME);
+ txg_wait_synced(dmu_objset_pool(osd->od_os), 0ULL);
+ CDEBUG(D_CACHE, "synced OSD %s\n", LUSTRE_OSD_ZFS_NAME);
+ }
+
return 0;
}
/* shutdown quota slave instance associated with the device */
if (o->od_quota_slave != NULL) {
+ /* complete all in-flight callbacks */
+ osd_sync(env, &o->od_dt_dev);
+ txg_wait_callbacks(spa_get_dsl(dmu_objset_spa(o->od_os)));
qsd_fini(env, o->od_quota_slave);
o->od_quota_slave = NULL;
}
static int osd_objset_open(struct osd_device *o)
{
uint64_t version = ZPL_VERSION;
- uint64_t sa_obj;
+ uint64_t sa_obj, unlink_obj;
int rc;
ENTRY;
- rc = -dmu_objset_own(o->od_mntdev, DMU_OST_ZFS, B_FALSE, o, &o->od_os);
+ rc = -dmu_objset_own(o->od_mntdev, DMU_OST_ZFS,
+ o->od_dt_dev.dd_rdonly ? B_TRUE : B_FALSE,
+ o, &o->od_os);
if (rc) {
CERROR("%s: can't open %s\n", o->od_svname, o->od_mntdev);
o->od_os = NULL;
- goto out;
+
+ GOTO(out, rc);
}
/* Check ZFS version */
}
rc = -zap_lookup(o->od_os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET,
- 8, 1, &o->od_unlinkedid);
+ 8, 1, &unlink_obj);
if (rc) {
CERROR("%s: lookup for %s failed: rc = %d\n",
o->od_svname, ZFS_UNLINKED_SET, rc);
GOTO(out, rc = -ENOTSUPP);
}
+ rc = __osd_obj2dnode(o->od_os, unlink_obj, &o->od_unlinked);
+ if (rc) {
+ CERROR("%s: can't get dnode for unlinked: rc = %d\n",
+ o->od_svname, rc);
+ GOTO(out, rc);
+ }
+
out:
if (rc != 0 && o->od_os != NULL) {
dmu_objset_disown(o->od_os, o);
RETURN(rc);
}
-static int
-osd_unlinked_object_free(struct osd_device *osd, uint64_t oid)
+int osd_unlinked_object_free(const struct lu_env *env, struct osd_device *osd,
+ uint64_t oid)
{
+ char *key = osd_oti_get(env)->oti_str;
int rc;
dmu_tx_t *tx;
+ if (osd->od_dt_dev.dd_rdonly) {
+ CERROR("%s: someone try to free objects under "
+ "readonly mode, should be disabled.\n", osd_name(osd));
+ dump_stack();
+
+ return -EROFS;
+ }
+
rc = -dmu_free_long_range(osd->od_os, oid, 0, DMU_OBJECT_END);
if (rc != 0) {
CWARN("%s: Cannot truncate %llu: rc = %d\n",
tx = dmu_tx_create(osd->od_os);
dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END);
- dmu_tx_hold_zap(tx, osd->od_unlinkedid, FALSE, NULL);
+ osd_tx_hold_zap(tx, osd->od_unlinked->dn_object, osd->od_unlinked,
+ FALSE, NULL);
rc = -dmu_tx_assign(tx, TXG_WAIT);
if (rc != 0) {
CWARN("%s: Cannot assign tx for %llu: rc = %d\n",
goto failed;
}
- rc = -zap_remove_int(osd->od_os, osd->od_unlinkedid, oid, tx);
+ snprintf(key, sizeof(osd_oti_get(env)->oti_str), "%llx", oid);
+ rc = osd_zap_remove(osd, osd->od_unlinked->dn_object,
+ osd->od_unlinked, key, tx);
if (rc != 0) {
CWARN("%s: Cannot remove %llu from unlinked set: rc = %d\n",
osd->od_svname, oid, rc);
zap_cursor_t zc;
zap_attribute_t *za = &osd_oti_get(env)->oti_za;
- zap_cursor_init(&zc, osd->od_os, osd->od_unlinkedid);
+ zap_cursor_init(&zc, osd->od_os, osd->od_unlinked->dn_object);
while (zap_cursor_retrieve(&zc, za) == 0) {
/* If cannot free the object, leave it in the unlinked set,
* until the OSD is mounted again when obd_unlinked_drain()
* will be called. */
- if (osd_unlinked_object_free(osd, za->za_first_integer) != 0)
+ if (osd_unlinked_object_free(env, osd, za->za_first_integer))
break;
zap_cursor_advance(&zc);
}
struct osd_device *o, struct lustre_cfg *cfg)
{
char *mntdev = lustre_cfg_string(cfg, 1);
+ char *str = lustre_cfg_string(cfg, 2);
char *svname = lustre_cfg_string(cfg, 4);
dnode_t *rootdn;
const char *opts;
if (rc >= sizeof(o->od_svname))
RETURN(-E2BIG);
+ str = strstr(str, ":");
+ if (str) {
+ unsigned long flags;
+
+ rc = kstrtoul(str + 1, 10, &flags);
+ if (rc)
+ RETURN(-EINVAL);
+
+ if (flags & LMD_FLG_DEV_RDONLY) {
+ o->od_dt_dev.dd_rdonly = 1;
+ LCONSOLE_WARN("%s: set dev_rdonly on this device\n",
+ svname);
+ }
+ }
+
if (server_name_is_ost(o->od_svname))
o->od_is_ost = 1;
o->od_xattr_in_sa = B_TRUE;
o->od_max_blksz = osd_spa_maxblocksize(o->od_os->os_spa);
- rc = osd_objset_register_callbacks(o);
+ rc = __osd_obj2dnode(o->od_os, o->od_rootid, &rootdn);
if (rc)
GOTO(err, rc);
+ o->od_root = rootdn->dn_object;
+ osd_dnode_rele(rootdn);
- rc = __osd_obj2dnode(env, o->od_os, o->od_rootid, &rootdn);
+ rc = __osd_obj2dnode(o->od_os, DMU_USERUSED_OBJECT,
+ &o->od_userused_dn);
if (rc)
GOTO(err, rc);
- o->od_root = rootdn->dn_object;
- osd_dnode_rele(rootdn);
+ rc = __osd_obj2dnode(o->od_os, DMU_GROUPUSED_OBJECT,
+ &o->od_groupused_dn);
+ if (rc)
+ GOTO(err, rc);
/* 1. initialize oi before any file create or file open */
rc = osd_oi_init(env, o);
if (rc)
GOTO(err, rc);
- /* Use our own ZAP for inode accounting by default, this can be changed
- * via procfs to estimate the inode usage from the block usage */
- o->od_quota_iused_est = 0;
+ rc = osd_objset_register_callbacks(o);
+ if (rc)
+ GOTO(err, rc);
rc = osd_procfs_init(o, o->od_svname);
if (rc)
osd_unlinked_drain(env, o);
err:
- if (rc) {
+ if (rc && o->od_os) {
dmu_objset_disown(o->od_os, o);
o->od_os = NULL;
}
CERROR("%s: lost %d pinned dbuf(s)\n", o->od_svname,
atomic_read(&o->od_zerocopy_pin));
+ if (o->od_unlinked) {
+ osd_dnode_rele(o->od_unlinked);
+ o->od_unlinked = NULL;
+ }
+ if (o->od_userused_dn) {
+ osd_dnode_rele(o->od_userused_dn);
+ o->od_userused_dn = NULL;
+ }
+ if (o->od_groupused_dn) {
+ osd_dnode_rele(o->od_groupused_dn);
+ o->od_groupused_dn = NULL;
+ }
+
if (o->od_os != NULL) {
- /* force a txg sync to get all commit callbacks */
- txg_wait_synced(dmu_objset_pool(o->od_os), 0ULL);
+ if (!o->od_dt_dev.dd_rdonly)
+ /* force a txg sync to get all commit callbacks */
+ txg_wait_synced(dmu_objset_pool(o->od_os), 0ULL);
/* close the object set */
dmu_objset_disown(o->od_os, o);
ENTRY;
- osd_shutdown(env, o);
- osd_oi_fini(env, o);
-
if (o->od_os) {
osd_objset_unregister_callbacks(o);
- osd_sync(env, lu2dt_dev(d));
- txg_wait_callbacks(spa_get_dsl(dmu_objset_spa(o->od_os)));
+ if (!o->od_dt_dev.dd_rdonly) {
+ osd_sync(env, lu2dt_dev(d));
+ txg_wait_callbacks(
+ spa_get_dsl(dmu_objset_spa(o->od_os)));
+ }
}
+ /* now with all the callbacks completed we can cleanup the remainings */
+ osd_shutdown(env, o);
+ osd_oi_fini(env, o);
+
rc = osd_procfs_fini(o);
if (rc) {
CERROR("proc fini error %d\n", rc);
LASSERT(&o->od_dt_dev);
rc = class_process_proc_param(PARAM_OSD, lprocfs_osd_obd_vars,
cfg, &o->od_dt_dev);
- if (rc > 0 || rc == -ENOSYS)
+ if (rc > 0 || rc == -ENOSYS) {
rc = class_process_proc_param(PARAM_OST,
lprocfs_osd_obd_vars,
cfg, &o->od_dt_dev);
+ if (rc > 0)
+ rc = 0;
+ }
break;
}
default: