From fa82de21f1b05d988cebaf45820d6eaa7d482ace Mon Sep 17 00:00:00 2001 From: alex Date: Fri, 2 Oct 2009 17:14:02 +0000 Subject: [PATCH] - dmu osd: don't call commit cb if we couldn't start transaction - dmu osd: xattr listing support - dmu osd: delete all xattrs when hosting object is being deleted - dmu osd: minor debug helping to investigate space leakage --- lustre/dmu-osd/osd_handler.c | 49 +++++++++---- lustre/dmu-osd/udmu.c | 161 ++++++++++++++++++++++++++++++++++++++++--- lustre/dmu-osd/udmu.h | 3 + 3 files changed, 187 insertions(+), 26 deletions(-) diff --git a/lustre/dmu-osd/osd_handler.c b/lustre/dmu-osd/osd_handler.c index 8a684d2..d0de129 100644 --- a/lustre/dmu-osd/osd_handler.c +++ b/lustre/dmu-osd/osd_handler.c @@ -155,7 +155,8 @@ struct osd_device { struct osd_thandle { struct thandle ot_super; dmu_tx_t *ot_tx; - __u32 ot_sync:1; + __u32 ot_sync:1, + ot_assigned:1; }; static int osd_root_get (const struct lu_env *env, @@ -552,8 +553,7 @@ static void osd_declare_object_delete(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_tx != NULL); - oid = udmu_object_get_id(obj->oo_db); - udmu_tx_hold_free(oh->ot_tx, oid, 0, DMU_OBJECT_END); + udmu_declare_object_delete(&osd->od_objset, oh->ot_tx, obj->oo_db); /* declare that we'll remove object from fid-dnode mapping */ osd_fid2str(buf, lu_object_fid(&obj->oo_dt.do_lu)); @@ -593,12 +593,13 @@ static int osd_object_destroy(const struct lu_env *env, struct osd_object *obj) oid = udmu_object_get_id(obj->oo_db); osd_fid2str(buf, lu_object_fid(&obj->oo_dt.do_lu)); + udmu_object_getattr(obj->oo_db, &va); + /* create tx */ th = osd_trans_create(env, &osd->od_dt_dev); - - if (IS_ERR(th)) { + if (IS_ERR(th)) RETURN (PTR_ERR(th)); - } + oh = container_of0(th, struct osd_thandle, ot_super); LASSERT(oh != NULL); LASSERT(oh->ot_tx != NULL); @@ -607,24 +608,29 @@ static int osd_object_destroy(const struct lu_env *env, struct osd_object *obj) osd_declare_object_delete(env, obj, th); /* start change */ - osd_trans_start(env, &osd->od_dt_dev, th); + rc = osd_trans_start(env, &osd->od_dt_dev, th); + if (rc) { + CERROR("osd_trans_start() failed with error %d\n", rc); + GOTO(out, rc); + } /* remove obj ref from main obj. dir */ rc = udmu_zap_delete(&osd->od_objset, zapdb, oh->ot_tx, buf); if (rc) { - CERROR("udmu_zap_delete() failed with error %d", rc); - RETURN (rc); + CERROR("udmu_zap_delete() failed with error %d\n", rc); + GOTO(out, rc); } - udmu_object_getattr(obj->oo_db, &va); /* kill object */ rc = udmu_object_delete(&osd->od_objset, &obj->oo_db, oh->ot_tx, osd_object_tag); if (rc) { - CERROR("udmu_object_delete() failed with error %d", rc); - RETURN (rc); + CERROR("udmu_object_delete() failed with error %d\n", rc); + GOTO(out, rc); } obj->oo_db = NULL; + +out: /* COMMIT changes */ osd_trans_stop(env, th); @@ -797,8 +803,6 @@ static struct thandle *osd_trans_create(const struct lu_env *env, lu_device_get(&dt->dd_lu_dev); lu_context_init(&th->th_ctx, LCT_TX_HANDLE); lu_context_enter(&th->th_ctx); - /* add commit callback */ - udmu_tx_cb_register(tx, osd_trans_commit_cb, (void *)oh); hook_res = dt_txn_hook_start(env, dt, th); if (hook_res != 0) @@ -824,7 +828,12 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d, rc = udmu_tx_assign(oh->ot_tx, TXG_WAIT); if (rc != 0) { /* dmu will call commit callback with error code during abort */ + CERROR("can't assign tx: %d\n", rc); udmu_tx_abort(oh->ot_tx); + } else { + /* add commit callback */ + udmu_tx_cb_register(oh->ot_tx, osd_trans_commit_cb, (void *)oh); + oh->ot_assigned = 1; } RETURN(-rc); @@ -843,6 +852,16 @@ static int osd_trans_stop(const struct lu_env *env, struct thandle *th) oh = container_of0(th, struct osd_thandle, ot_super); + if (oh->ot_assigned == 0) { + lu_device_put(&th->th_dev->dd_lu_dev); + th->th_dev = NULL; + lu_context_exit(&th->th_ctx); + lu_context_fini(&th->th_ctx); + OBD_FREE_PTR(oh); + + RETURN(0); + } + result = dt_txn_hook_stop(env, th); if (result != 0) CERROR("Failure in transaction hook: %d\n", result); @@ -2050,7 +2069,7 @@ int osd_xattr_list(const struct lu_env *env, LASSERT(dt_object_exists(dt)); down(&obj->oo_guard); - rc = -udmu_xattr_list(&osd->od_objset, obj->oo_db, + rc = udmu_xattr_list(&osd->od_objset, obj->oo_db, buf->lb_buf, buf->lb_len); up(&obj->oo_guard); diff --git a/lustre/dmu-osd/udmu.c b/lustre/dmu-osd/udmu.c index 413fe0d..2b17f94 100644 --- a/lustre/dmu-osd/udmu.c +++ b/lustre/dmu-osd/udmu.c @@ -116,8 +116,8 @@ void udmu_debug(int level) int udmu_objset_open(char *osname, udmu_objset_t *uos) { - int error; uint64_t version = ZPL_VERSION; + int error, len; memset(uos, 0, sizeof(udmu_objset_t)); @@ -160,12 +160,25 @@ int udmu_objset_open(char *osname, udmu_objset_t *uos) goto out; } ASSERT(uos->root != 0); + + strncpy(uos->name, osname, sizeof(uos->name)); out: if (error && uos->os != NULL) dmu_objset_close(uos->os); - return (error); +#if 0 + if (error == 0) { + uint64_t refdbytes, availbytes, usedobjs, availobjs; + + dmu_objset_space(uos->os, &refdbytes, &availbytes, + &usedobjs, &availobjs); + printk("MOUNT/%s: bytes: %Lu used, %Lu avail, " + "objs: %Lu: used, %Lu avail\n", uos->name, + refdbytes, availbytes, usedobjs, availobjs); + } +#endif + return error; } uint64_t udmu_get_txg(udmu_objset_t *uos, dmu_tx_t *tx) @@ -196,6 +209,19 @@ void udmu_objset_close(udmu_objset_t *uos) Presumably, we are only doing this to force commit callbacks to be called sooner. */ udmu_wait_synced(uos, NULL); +#if 0 + { + uint64_t refdbytes, availbytes, usedobjs, availobjs; + + dmu_objset_space(uos->os, &refdbytes, &availbytes, + &usedobjs, &availobjs); + printk("UMOUNT/%s: bytes: %Lu used, %Lu avail, " + "objs: %Lu: used, %Lu avail\n", uos->name, + refdbytes, availbytes, usedobjs, availobjs); + printk("%u creates, %u deletes\n", uos->creates, uos->deletes); + } +#endif + /* close the object set */ dmu_objset_close(uos->os); @@ -470,6 +496,7 @@ static void udmu_object_create_impl(objset_t *os, dmu_buf_t **dbp, dmu_tx_t *tx, void udmu_object_create(udmu_objset_t *uos, dmu_buf_t **dbp, dmu_tx_t *tx, void *tag) { + uos->creates++; udmu_object_create_impl(uos->os, dbp, tx, tag); } @@ -867,6 +894,44 @@ int udmu_object_punch(udmu_objset_t *uos, dmu_buf_t *db, dmu_tx_t *tx, return udmu_object_punch_impl(uos->os, db, tx, off, len); } +void udmu_declare_object_delete(udmu_objset_t *uos, dmu_tx_t *tx, dmu_buf_t *db) +{ + znode_phys_t *zp = db->db_data; + uint64_t oid = db->db_object, xid; + zap_attribute_t za; + zap_cursor_t *zc; + int rc; + + dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END); + + /* zap holding xattrs */ + if ((oid = zp->zp_xattr)) { + dmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END); + + rc = udmu_zap_cursor_init(&zc, uos, oid, 0); + if (rc) { + if (tx->tx_err == 0) + tx->tx_err = rc; + return; + } + while ((rc = zap_cursor_retrieve(zc, &za)) == 0) { + BUG_ON(za.za_integer_length != sizeof(uint64_t)); + BUG_ON(za.za_num_integers != 1); + + rc = zap_lookup(uos->os, zp->zp_xattr, za.za_name, + sizeof(uint64_t), 1, &xid); + if (rc) { + printk("error during xattr lookup: %d\n", rc); + break; + } + dmu_tx_hold_free(tx, xid, 0, DMU_OBJECT_END); + + zap_cursor_advance(zc); + } + udmu_zap_cursor_fini(zc); + } +} + /* * Delete a DMU object * @@ -876,25 +941,58 @@ int udmu_object_punch(udmu_objset_t *uos, dmu_buf_t *db, dmu_tx_t *tx, * * This will release db and set it to NULL to prevent further dbuf releases. */ -static int udmu_object_delete_impl(objset_t *os, dmu_buf_t **db, dmu_tx_t *tx, +static int udmu_object_delete_impl(udmu_objset_t *uos, dmu_buf_t **db, dmu_tx_t *tx, void *tag) { - uint64_t oid = (*db)->db_object; + znode_phys_t *zp = (*db)->db_data; + uint64_t oid, xid; + zap_attribute_t za; + zap_cursor_t *zc; + int rc; /* Assert that the transaction has been assigned to a transaction group. */ ASSERT(tx->tx_txg != 0); + /* zap holding xattrs */ + if ((oid = zp->zp_xattr)) { + + rc = udmu_zap_cursor_init(&zc, uos, oid, 0); + if (rc) + return rc; + while ((rc = zap_cursor_retrieve(zc, &za)) == 0) { + BUG_ON(za.za_integer_length != sizeof(uint64_t)); + BUG_ON(za.za_num_integers != 1); + + rc = zap_lookup(uos->os, zp->zp_xattr, za.za_name, + sizeof(uint64_t), 1, &xid); + if (rc) { + printk("error during xattr lookup: %d\n", rc); + break; + } + uos->deletes++; + dmu_object_free(uos->os, xid, tx); + + zap_cursor_advance(zc); + } + udmu_zap_cursor_fini(zc); + + uos->deletes++; + dmu_object_free(uos->os, zp->zp_xattr, tx); + } + + oid = (*db)->db_object; udmu_object_put_dmu_buf(*db, tag); *db = NULL; - return dmu_object_free(os, oid, tx); + uos->deletes++; + return dmu_object_free(uos->os, oid, tx); } int udmu_object_delete(udmu_objset_t *uos, dmu_buf_t **db, dmu_tx_t *tx, void *tag) { - return udmu_object_delete_impl(uos->os, db, tx, tag); + return udmu_object_delete_impl(uos, db, tx, tag); } /* @@ -957,7 +1055,19 @@ void udmu_tx_abort(dmu_tx_t *tx) int udmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) { - return (dmu_tx_assign(tx, txg_how)); + int rc; + rc = dmu_tx_assign(tx, txg_how); +#if 0 + if (rc == 28) { + uint64_t refdbytes, availbytes, usedobjs, availobjs; + + dmu_objset_space(tx->tx_objset, &refdbytes, &availbytes, + &usedobjs, &availobjs); + printk("ref %Lu, avail %Lu, used objs %Lu, avail objs %Lu\n", + refdbytes, availbytes, usedobjs, availobjs); + } +#endif + return rc; } void udmu_tx_wait(dmu_tx_t *tx) @@ -1172,6 +1282,7 @@ int udmu_xattr_set(udmu_objset_t *uos, dmu_buf_t *db, void *val, * Entry doesn't exist, we need to create a new one and a new * object to store the value. */ + uos->creates++; udmu_object_create_impl(uos->os, &xa_data_db, tx, FTAG); xa_data_obj = xa_data_db->db_object; error = zap_add(uos->os, zp->zp_xattr, name, sizeof(uint64_t), 1, @@ -1256,6 +1367,7 @@ int udmu_xattr_del(udmu_objset_t *uos, dmu_buf_t *db, * Entry exists. * We'll delete the existing object and ZAP entry. */ + uos->deletes++; error = dmu_object_free(uos->os, xa_data_obj, tx); if (error) goto out; @@ -1267,11 +1379,38 @@ out: return error; } -int udmu_xattr_list(udmu_objset_t *uos, dmu_buf_t *db, void *val, int vallen) +int udmu_xattr_list(udmu_objset_t *uos, dmu_buf_t *db, void *buf, int buflen) { - /* XXX: not implemented yet */ - BUG_ON(1); - return 0; + znode_phys_t *zp = db->db_data; + char key[MAXNAMELEN + 1]; + zap_cursor_t *zc; + int rc; + int remain = buflen; + int counted = 0; + + if (zp->zp_xattr == 0) + return 0; + + rc = udmu_zap_cursor_init(&zc, uos, zp->zp_xattr, 0); + if (rc) + return -rc; + + while ((rc = udmu_zap_cursor_retrieve_key(zc, key, MAXNAMELEN)) == 0) { + rc = strlen(key); + if (rc + 1 <= remain) { + memcpy(buf, key, rc); + buf += rc; + *((char *)buf) = '\0'; + buf++; + remain -= rc + 1; + } + counted += rc + 1; + udmu_zap_cursor_advance(zc); + } + + udmu_zap_cursor_fini(zc); + + return counted; } void udmu_freeze(udmu_objset_t *uos) diff --git a/lustre/dmu-osd/udmu.h b/lustre/dmu-osd/udmu.h index c20b7f1..b396131 100644 --- a/lustre/dmu-osd/udmu.h +++ b/lustre/dmu-osd/udmu.h @@ -133,6 +133,8 @@ typedef struct udmu_objset { struct zilog *zilog; uint64_t root; /* id of root znode */ uint64_t unlinkedobj; + int creates, deletes; + char name[128]; } udmu_objset_t; @@ -246,6 +248,7 @@ void udmu_object_setattr(dmu_buf_t *db, dmu_tx_t *tx, vnattr_t *vap); int udmu_object_punch(udmu_objset_t *uos, dmu_buf_t *db, dmu_tx_t *tx, uint64_t offset, uint64_t len); +void udmu_declare_object_delete(udmu_objset_t *uos, dmu_tx_t *tx, dmu_buf_t *db); int udmu_object_delete(udmu_objset_t *uos, dmu_buf_t **db, dmu_tx_t *tx, void *tag); /*udmu transaction API */ -- 1.8.3.1