X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-zfs%2Fosd_handler.c;h=23d684c7dbad1be2a8a392b61c01a69f6ce1221f;hp=25ffbc6882ff6f144e438ed2eeb7312b47826656;hb=8d1639b5cf1edbc885876956dcd6189173c00955;hpb=bb6ed7dcde71b3878402f1ad4c2d88bc8edd8557 diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 25ffbc6..23d684c 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -23,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2012, 2016, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -66,6 +66,8 @@ struct lu_context_key osd_key; +static int osd_txg_sync_delay_us = -1; + /* Slab for OSD object allocation */ struct kmem_cache *osd_object_kmem; @@ -166,8 +168,6 @@ static void osd_trans_commit_cb(void *cb_data, int error) lu_device_put(lud); th->th_dev = NULL; - lu_context_exit(&th->th_ctx); - lu_context_fini(&th->th_ctx); OBD_FREE_PTR(oh); EXIT; @@ -225,8 +225,7 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d, /* add commit callback */ dmu_tx_callback_register(oh->ot_tx, osd_trans_commit_cb, oh); oh->ot_assigned = 1; - lu_context_init(&th->th_ctx, th->th_tags); - lu_context_enter(&th->th_ctx); + osd_oti_get(env)->oti_in_trans = 1; lu_device_get(&d->dd_lu_dev); } @@ -314,11 +313,16 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, /* XXX: Once dmu_tx_commit() called, oh/th could have been freed * by osd_trans_commit_cb already. */ dmu_tx_commit(oh->ot_tx); + osd_oti_get(env)->oti_in_trans = 0; osd_unlinked_list_emptify(env, osd, &unlinked, true); - if (sync) - txg_wait_synced(dmu_objset_pool(osd->od_os), txg); + if (sync) { + if (osd_txg_sync_delay_us < 0) + txg_wait_synced(dmu_objset_pool(osd->od_os), txg); + else + udelay(osd_txg_sync_delay_us); + } RETURN(rc); } @@ -360,7 +364,6 @@ static struct thandle *osd_trans_create(const struct lu_env *env, th = &oh->ot_super; th->th_dev = dt; th->th_result = 0; - th->th_tags = LCT_TX_HANDLE; RETURN(th); } @@ -611,6 +614,14 @@ static void osd_conf_get(const struct lu_env *env, param->ddp_max_extent_blks = (1 << (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)); param->ddp_extent_tax = osd_blk_insert_cost(osd); + + /* Preferred RPC size for efficient disk IO. 1MB shows good + * all-around performance for ZFS, but use blocksize (recordsize) + * by default if larger to avoid read-modify-write. */ + if (osd->od_max_blksz > ONE_MB_BRW_SIZE) + param->ddp_brw_size = osd->od_max_blksz; + else + param->ddp_brw_size = ONE_MB_BRW_SIZE; } /* @@ -783,6 +794,14 @@ static void osd_readonly_changed_cb(void *arg, uint64_t newval) osd->od_prop_rdonly = !!newval; } +#ifdef HAVE_DMU_OBJECT_ALLOC_DNSIZE +static void osd_dnodesize_changed_cb(void *arg, uint64_t newval) +{ + struct osd_device *osd = arg; + + osd->od_dnsize = newval; +} +#endif /* * This function unregisters all registered callbacks. It's harmless to * unregister callbacks that were never registered so it is used to safely @@ -798,6 +817,10 @@ static void osd_objset_unregister_callbacks(struct osd_device *o) osd_recordsize_changed_cb, o); (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY), osd_readonly_changed_cb, o); +#ifdef HAVE_DMU_OBJECT_ALLOC_DNSIZE + (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DNODESIZE), + osd_dnodesize_changed_cb, o); +#endif if (o->arc_prune_cb != NULL) { arc_remove_prune_callback(o->arc_prune_cb); @@ -834,6 +857,13 @@ static int osd_objset_register_callbacks(struct osd_device *o) if (rc) GOTO(err, rc); +#ifdef HAVE_DMU_OBJECT_ALLOC_DNSIZE + rc = -dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DNODESIZE), + osd_dnodesize_changed_cb, o); + if (rc) + GOTO(err, rc); +#endif + o->arc_prune_cb = arc_add_prune_callback(arc_prune_func, o); err: dsl_pool_config_exit(dp, FTAG); @@ -947,6 +977,7 @@ int osd_unlinked_object_free(const struct lu_env *env, struct osd_device *osd, } tx = dmu_tx_create(osd->od_os); + dmu_tx_mark_netfree(tx); dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END); osd_tx_hold_zap(tx, osd->od_unlinked->dn_object, osd->od_unlinked, FALSE, NULL); @@ -1028,6 +1059,9 @@ static int osd_mount(const struct lu_env *env, if (rc >= sizeof(o->od_svname)) RETURN(-E2BIG); + o->od_index_backup_stop = 0; + o->od_index = -1; /* -1 means index is invalid */ + rc = server_name2index(o->od_svname, &o->od_index, NULL); str = strstr(str, ":"); if (str) { unsigned long flags; @@ -1041,6 +1075,9 @@ static int osd_mount(const struct lu_env *env, LCONSOLE_WARN("%s: set dev_rdonly on this device\n", svname); } + + if (flags & LMD_FLG_NOSCRUB) + o->od_auto_scrub_interval = AS_NEVER; } if (server_name_is_ost(o->od_svname)) @@ -1069,10 +1106,14 @@ static int osd_mount(const struct lu_env *env, if (rc) GOTO(err, rc); - /* 1. initialize oi before any file create or file open */ - rc = osd_oi_init(env, o); - if (rc) - GOTO(err, rc); +#ifdef ZFS_PROJINHERIT + if (dmu_objset_projectquota_enabled(o->od_os)) { + rc = __osd_obj2dnode(o->od_os, DMU_PROJECTUSED_OBJECT, + &o->od_projectused_dn); + if (rc && rc != -ENOENT) + GOTO(err, rc); + } +#endif rc = lu_site_init(&o->od_site, osd2lu_dev(o)); if (rc) @@ -1087,6 +1128,12 @@ static int osd_mount(const struct lu_env *env, if (rc) GOTO(err, rc); + o->od_in_init = 1; + rc = osd_scrub_setup(env, o); + o->od_in_init = 0; + if (rc) + GOTO(err, rc); + rc = osd_procfs_init(o, o->od_svname); if (rc) GOTO(err, rc); @@ -1149,6 +1196,13 @@ static void osd_umount(const struct lu_env *env, struct osd_device *o) o->od_groupused_dn = NULL; } +#ifdef ZFS_PROJINHERIT + if (o->od_projectused_dn) { + osd_dnode_rele(o->od_projectused_dn); + o->od_projectused_dn = NULL; + } +#endif + if (o->od_os != NULL) { if (!o->od_dt_dev.dd_rdonly) /* force a txg sync to get all commit callbacks */ @@ -1176,6 +1230,9 @@ static int osd_device_init0(const struct lu_env *env, l->ld_ops = &osd_lu_ops; o->od_dt_dev.dd_ops = &osd_dt_ops; + sema_init(&o->od_otable_sem, 1); + INIT_LIST_HEAD(&o->od_ios_list); + o->od_auto_scrub_interval = AS_DEFAULT; out: RETURN(rc); @@ -1200,6 +1257,10 @@ static struct lu_device *osd_device_alloc(const struct lu_env *env, INIT_LIST_HEAD(&osl->osl_seq_list); rwlock_init(&osl->osl_seq_list_lock); sema_init(&osl->osl_seq_init_sem, 1); + INIT_LIST_HEAD(&dev->od_index_backup_list); + INIT_LIST_HEAD(&dev->od_index_restore_list); + spin_lock_init(&dev->od_lock); + dev->od_index_backup_policy = LIBP_NONE; rc = dt_device_init(&dev->od_dt_dev, type); if (rc == 0) { @@ -1258,7 +1319,7 @@ static struct lu_device *osd_device_fini(const struct lu_env *env, /* now with all the callbacks completed we can cleanup the remainings */ osd_shutdown(env, o); - osd_oi_fini(env, o); + osd_scrub_cleanup(env, o); rc = osd_procfs_fini(o); if (rc) { @@ -1294,6 +1355,9 @@ static int osd_process_config(const struct lu_env *env, rc = osd_mount(env, o, cfg); break; case LCFG_CLEANUP: + /* For the case LCFG_PRE_CLEANUP is not called in advance, + * that may happend if hit failure during mount process. */ + osd_index_backup(env, o, false); rc = osd_shutdown(env, o); break; case LCFG_PARAM: { @@ -1309,6 +1373,11 @@ static int osd_process_config(const struct lu_env *env, } break; } + case LCFG_PRE_CLEANUP: + osd_index_backup(env, o, + o->od_index_backup_policy != LIBP_NONE); + rc = 0; + break; default: rc = -ENOTTY; } @@ -1506,10 +1575,13 @@ static void __exit osd_exit(void) lu_kmem_fini(osd_caches); } -extern unsigned int osd_oi_count; module_param(osd_oi_count, int, 0444); MODULE_PARM_DESC(osd_oi_count, "Number of Object Index containers to be created, it's only valid for new filesystem."); +module_param(osd_txg_sync_delay_us, int, 0644); +MODULE_PARM_DESC(osd_txg_sync_delay_us, + "When zero or larger delay N usec instead of doing TXG sync"); + MODULE_AUTHOR("OpenSFS, Inc. "); MODULE_DESCRIPTION("Lustre Object Storage Device ("LUSTRE_OSD_ZFS_NAME")"); MODULE_VERSION(LUSTRE_VERSION_STRING);