Whamcloud - gitweb
LU-1883 osd: Fix niobuf_local offset usage
[fs/lustre-release.git] / lustre / osd-zfs / osd_handler.c
index e4f97cc..6bd4bf4 100644 (file)
@@ -131,6 +131,7 @@ static void osd_trans_commit_cb(void *cb_data, int error)
 {
        struct osd_thandle      *oh = cb_data;
        struct thandle          *th = &oh->ot_super;
+       struct osd_device       *osd = osd_dt_dev(th->th_dev);
        struct lu_device        *lud = &th->th_dev->dd_lu_dev;
        struct dt_txn_commit_cb *dcb, *tmp;
 
@@ -151,6 +152,14 @@ static void osd_trans_commit_cb(void *cb_data, int error)
        cfs_list_for_each_entry_safe(dcb, tmp, &oh->ot_dcb_list, dcb_linkage)
                dcb->dcb_func(NULL, th, dcb, error);
 
+       /* Unlike ldiskfs, zfs updates space accounting at commit time.
+        * As a consequence, op_end is called only now to inform the quota slave
+        * component that reserved quota space is now accounted in usage and
+        * should be released. Quota space won't be adjusted at this point since
+        * we can't provide a suitable environment. It will be performed
+        * asynchronously by a lquota thread. */
+       qsd_op_end(NULL, osd->od_quota_slave, &oh->ot_quota_trans);
+
        lu_device_put(lud);
        th->th_dev = NULL;
        lu_context_exit(&th->th_ctx);
@@ -234,10 +243,23 @@ static int osd_trans_stop(const struct lu_env *env, struct thandle *th)
                LASSERT(oh->ot_tx);
                dmu_tx_abort(oh->ot_tx);
                osd_object_sa_dirty_rele(oh);
+               /* there won't be any commit, release reserved quota space now,
+                * if any */
+               qsd_op_end(env, osd->od_quota_slave, &oh->ot_quota_trans);
                OBD_FREE_PTR(oh);
                RETURN(0);
        }
 
+       /* When doing our own inode accounting, the ZAPs storing per-uid/gid
+        * usage are updated at operation execution time, so we should call
+        * qsd_op_end() straight away. Otherwise (for blk accounting maintained
+        * by ZFS and when #inode is estimated from #blks) accounting is updated
+        * at commit time and the call to qsd_op_end() must be delayed */
+       if (oh->ot_quota_trans.lqt_id_cnt > 0 &&
+                       !oh->ot_quota_trans.lqt_ids[0].lqi_is_blk &&
+                       !osd->od_quota_iused_est)
+               qsd_op_end(env, osd->od_quota_slave, &oh->ot_quota_trans);
+
        rc = dt_txn_hook_stop(env, th);
        if (rc != 0)
                CDEBUG(D_OTHER, "%s: transaction hook failed: rc = %d\n",
@@ -279,6 +301,7 @@ static struct thandle *osd_trans_create(const struct lu_env *env,
        CFS_INIT_LIST_HEAD(&oh->ot_dcb_list);
        CFS_INIT_LIST_HEAD(&oh->ot_sa_list);
        cfs_sema_init(&oh->ot_sa_lock, 1);
+       memset(&oh->ot_quota_trans, 0, sizeof(oh->ot_quota_trans));
        th = &oh->ot_super;
        th->th_dev = dt;
        th->th_result = 0;
@@ -340,6 +363,8 @@ static void osd_conf_get(const struct lu_env *env,
        param->ddp_inodespace = OSD_DNODE_EST_COUNT;
        /* per-fragment overhead to be used by the client code */
        param->ddp_grant_frag = udmu_blk_insert_cost();
+
+       param->ddp_mnt = NULL;
 }
 
 /*
@@ -483,30 +508,20 @@ static int osd_shutdown(const struct lu_env *env, struct osd_device *o)
 static int osd_mount(const struct lu_env *env,
                     struct osd_device *o, struct lustre_cfg *cfg)
 {
-       char                            *dev  = lustre_cfg_string(cfg, 0);
-       struct lustre_mount_info        *lmi;
-       struct lustre_sb_info           *lsi;
-       dmu_buf_t                       *rootdb;
-       int                              rc;
+       char      *dev  = lustre_cfg_string(cfg, 1);
+       dmu_buf_t *rootdb;
+       int        rc;
        ENTRY;
 
        if (o->od_objset.os != NULL)
                RETURN(0);
 
-       lmi = server_get_mount(dev);
-       if (lmi == NULL) {
-               CERROR("Unknown mount point: '%s'\n", dev);
-               RETURN(-ENODEV);
-       }
-
-       lsi = s2lsi(lmi->lmi_sb);
-       dev = lsi->lsi_lmd->lmd_dev;
-
        if (strlen(dev) >= sizeof(o->od_mntdev))
                RETURN(-E2BIG);
 
        strcpy(o->od_mntdev, dev);
-       strcpy(o->od_svname, lsi->lsi_svname);
+       strncpy(o->od_svname, lustre_cfg_string(cfg, 4),
+               sizeof(o->od_svname) - 1);
 
        rc = -udmu_objset_open(o->od_mntdev, &o->od_objset);
        if (rc) {
@@ -530,6 +545,14 @@ static int osd_mount(const struct lu_env *env,
        if (rc)
                GOTO(err, rc);
 
+       rc = lu_site_init(&o->od_site, osd2lu_dev(o));
+       if (rc)
+               GOTO(err, rc);
+
+       rc = lu_site_init_finish(&o->od_site);
+       if (rc)
+               GOTO(err, rc);
+
        /* Use our own ZAP for inode accounting by default, this can be changed
         * via procfs to estimate the inode usage from the block usage */
        o->od_quota_iused_est = 0;
@@ -587,28 +610,36 @@ out:
        RETURN(rc);
 }
 
+static struct lu_device *osd_device_fini(const struct lu_env *env,
+                                        struct lu_device *dev);
+
 static struct lu_device *osd_device_alloc(const struct lu_env *env,
-                                         struct lu_device_type *t,
+                                         struct lu_device_type *type,
                                          struct lustre_cfg *cfg)
 {
-       struct osd_device       *o;
-       int                      rc;
+       struct osd_device *dev;
+       int                rc;
 
-       OBD_ALLOC_PTR(o);
-       if (o == NULL)
+       OBD_ALLOC_PTR(dev);
+       if (dev == NULL)
                return ERR_PTR(-ENOMEM);
 
-       rc = dt_device_init(&o->od_dt_dev, t);
+       rc = dt_device_init(&dev->od_dt_dev, type);
        if (rc == 0) {
-               rc = osd_device_init0(env, o, cfg);
+               rc = osd_device_init0(env, dev, cfg);
+               if (rc == 0) {
+                       rc = osd_mount(env, dev, cfg);
+                       if (rc)
+                               osd_device_fini(env, osd2lu_dev(dev));
+               }
                if (rc)
-                       dt_device_fini(&o->od_dt_dev);
+                       dt_device_fini(&dev->od_dt_dev);
        }
 
        if (unlikely(rc != 0))
-               OBD_FREE_PTR(o);
+               OBD_FREE_PTR(dev);
 
-       return rc == 0 ? osd2lu_dev(o) : ERR_PTR(rc);
+       return rc == 0 ? osd2lu_dev(dev) : ERR_PTR(rc);
 }
 
 static struct lu_device *osd_device_free(const struct lu_env *env,
@@ -619,9 +650,13 @@ static struct lu_device *osd_device_free(const struct lu_env *env,
 
        cleanup_capa_hash(o->od_capa_hash);
        /* XXX: make osd top device in order to release reference */
-       /*d->ld_site->ls_top_dev = d;
+       d->ld_site->ls_top_dev = d;
        lu_site_purge(env, d->ld_site, -1);
-       lu_site_fini(&o->od_site);*/
+       if (!cfs_hash_is_empty(d->ld_site->ls_obj_hash)) {
+               LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
+               lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer);
+       }
+       lu_site_fini(&o->od_site);
        dt_device_fini(&o->od_dt_dev);
        OBD_FREE_PTR(o);
 
@@ -631,9 +666,8 @@ static struct lu_device *osd_device_free(const struct lu_env *env,
 static struct lu_device *osd_device_fini(const struct lu_env *env,
                                         struct lu_device *d)
 {
-       struct osd_device        *o = osd_dev(d);
-       struct lustre_mount_info *lmi;
-       int rc;
+       struct osd_device *o = osd_dev(d);
+       int                rc;
        ENTRY;
 
 
@@ -655,10 +689,6 @@ static struct lu_device *osd_device_fini(const struct lu_env *env,
        if (o->od_objset.os)
                osd_umount(env, o);
 
-       lmi = server_get_mount_2(o->od_svname);
-       LASSERT(lmi);
-       server_put_mount(lmi->lmi_name, lmi->lmi_mnt);
-
        RETURN(NULL);
 }
 
@@ -699,6 +729,58 @@ static int osd_recovery_complete(const struct lu_env *env, struct lu_device *d)
        RETURN(0);
 }
 
+/*
+ * we use exports to track all osd users
+ */
+static int osd_obd_connect(const struct lu_env *env, struct obd_export **exp,
+                          struct obd_device *obd, struct obd_uuid *cluuid,
+                          struct obd_connect_data *data, void *localdata)
+{
+       struct osd_device    *osd = osd_dev(obd->obd_lu_dev);
+       struct lustre_handle  conn;
+       int                   rc;
+       ENTRY;
+
+       CDEBUG(D_CONFIG, "connect #%d\n", osd->od_connects);
+
+       rc = class_connect(&conn, obd, cluuid);
+       if (rc)
+               RETURN(rc);
+
+       *exp = class_conn2export(&conn);
+
+       cfs_spin_lock(&osd->od_objset.lock);
+       osd->od_connects++;
+       cfs_spin_unlock(&osd->od_objset.lock);
+
+       RETURN(0);
+}
+
+/*
+ * once last export (we don't count self-export) disappeared
+ * osd can be released
+ */
+static int osd_obd_disconnect(struct obd_export *exp)
+{
+       struct obd_device *obd = exp->exp_obd;
+       struct osd_device *osd = osd_dev(obd->obd_lu_dev);
+       int                rc, release = 0;
+       ENTRY;
+
+       /* Only disconnect the underlying layers on the final disconnect. */
+       cfs_spin_lock(&osd->od_objset.lock);
+       osd->od_connects--;
+       if (osd->od_connects == 0)
+               release = 1;
+       cfs_spin_unlock(&osd->od_objset.lock);
+
+       rc = class_disconnect(exp); /* bz 9811 */
+
+       if (rc == 0 && release)
+               class_manual_cleanup(obd);
+       RETURN(rc);
+}
+
 static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
                       struct lu_device *dev)
 {
@@ -756,6 +838,8 @@ static struct lu_device_type osd_device_type = {
 
 static struct obd_ops osd_obd_device_ops = {
        .o_owner       = THIS_MODULE,
+       .o_connect      = osd_obd_connect,
+       .o_disconnect   = osd_obd_disconnect
 };
 
 int __init osd_init(void)