Whamcloud - gitweb
LU-1303 osd: zfs-osd should initialize local objects on MDS
[fs/lustre-release.git] / lustre / osd-zfs / osd_handler.c
index fedf1e9..bba18f7 100644 (file)
@@ -131,6 +131,7 @@ static void osd_trans_commit_cb(void *cb_data, int error)
 {
        struct osd_thandle      *oh = cb_data;
        struct thandle          *th = &oh->ot_super;
+       struct osd_device       *osd = osd_dt_dev(th->th_dev);
        struct lu_device        *lud = &th->th_dev->dd_lu_dev;
        struct dt_txn_commit_cb *dcb, *tmp;
 
@@ -151,6 +152,14 @@ static void osd_trans_commit_cb(void *cb_data, int error)
        cfs_list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage)
                dcb->dcb_func(NULL, th, dcb, error);
 
+       /* Unlike ldiskfs, zfs updates space accounting at commit time.
+        * As a consequence, op_end is called only now to inform the quota slave
+        * component that reserved quota space is now accounted in usage and
+        * should be released. Quota space won't be adjusted at this point since
+        * we can't provide a suitable environment. It will be performed
+        * asynchronously by a lquota thread. */
+       qsd_op_end(NULL, osd->od_quota_slave, &oh->ot_quota_trans);
+
        lu_device_put(lud);
        th->th_dev = NULL;
        lu_context_exit(&th->th_ctx);
@@ -234,10 +243,23 @@ static int osd_trans_stop(const struct lu_env *env, struct thandle *th)
                LASSERT(oh->ot_tx);
                dmu_tx_abort(oh->ot_tx);
                osd_object_sa_dirty_rele(oh);
+               /* there won't be any commit, release reserved quota space now,
+                * if any */
+               qsd_op_end(env, osd->od_quota_slave, &oh->ot_quota_trans);
                OBD_FREE_PTR(oh);
                RETURN(0);
        }
 
+       /* When doing our own inode accounting, the ZAPs storing per-uid/gid
+        * usage are updated at operation execution time, so we should call
+        * qsd_op_end() straight away. Otherwise (for blk accounting maintained
+        * by ZFS and when #inode is estimated from #blks) accounting is updated
+        * at commit time and the call to qsd_op_end() must be delayed */
+       if (oh->ot_quota_trans.lqt_id_cnt > 0 &&
+                       !oh->ot_quota_trans.lqt_ids[0].lqi_is_blk &&
+                       !osd->od_quota_iused_est)
+               qsd_op_end(env, osd->od_quota_slave, &oh->ot_quota_trans);
+
        rc = dt_txn_hook_stop(env, th);
        if (rc != 0)
                CDEBUG(D_OTHER, "%s: transaction hook failed: rc = %d\n",
@@ -279,6 +301,7 @@ static struct thandle *osd_trans_create(const struct lu_env *env,
        CFS_INIT_LIST_HEAD(&oh->ot_dcb_list);
        CFS_INIT_LIST_HEAD(&oh->ot_sa_list);
        cfs_sema_init(&oh->ot_sa_lock, 1);
+       memset(&oh->ot_quota_trans, 0, sizeof(oh->ot_quota_trans));
        th = &oh->ot_super;
        th->th_dev = dt;
        th->th_result = 0;
@@ -522,6 +545,15 @@ static int osd_mount(const struct lu_env *env,
        if (rc)
                GOTO(err, rc);
 
+       rc = lu_site_init(&o->od_site, osd2lu_dev(o));
+       if (rc)
+               GOTO(err, rc);
+       o->od_site.ls_bottom_dev = osd2lu_dev(o);
+
+       rc = lu_site_init_finish(&o->od_site);
+       if (rc)
+               GOTO(err, rc);
+
        /* Use our own ZAP for inode accounting by default, this can be changed
         * via procfs to estimate the inode usage from the block usage */
        o->od_quota_iused_est = 0;
@@ -619,9 +651,13 @@ static struct lu_device *osd_device_free(const struct lu_env *env,
 
        cleanup_capa_hash(o->od_capa_hash);
        /* XXX: make osd top device in order to release reference */
-       /*d->ld_site->ls_top_dev = d;
+       d->ld_site->ls_top_dev = d;
        lu_site_purge(env, d->ld_site, -1);
-       lu_site_fini(&o->od_site);*/
+       if (!cfs_hash_is_empty(d->ld_site->ls_obj_hash)) {
+               LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
+               lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer);
+       }
+       lu_site_fini(&o->od_site);
        dt_device_fini(&o->od_dt_dev);
        OBD_FREE_PTR(o);
 
@@ -753,6 +789,15 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
        int                      rc = 0;
        ENTRY;
 
+       if (dev->ld_site && lu_device_is_md(dev->ld_site->ls_top_dev)) {
+               /* MDT/MDD still use old infrastructure to create
+                * special files */
+               rc = llo_local_objects_setup(env, lu2md_dev(pdev),
+                                            lu2dt_dev(dev));
+               if (rc)
+                       RETURN(rc);
+       }
+
        /* initialize quota slave instance */
        osd->od_quota_slave = qsd_init(env, osd->od_svname, &osd->od_dt_dev,
                                       osd->od_proc_entry);