X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd%2Fosd_handler.c;h=7235a3c39c10b232d13de4e5b917fc93994396dc;hb=95649d9bd63ede1eb1bfe3fa29c85be907436d60;hp=117547f0a010e6b0eec268d1ae4a7e2fded8fd72;hpb=9f463e30893138845eb571cc32426e8d1e04a9fa;p=fs%2Flustre-release.git diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c index 117547f..7235a3c 100644 --- a/lustre/osd/osd_handler.c +++ b/lustre/osd/osd_handler.c @@ -126,155 +126,7 @@ struct osd_object { #endif }; -static int osd_root_get (const struct lu_env *env, - struct dt_device *dev, struct lu_fid *f); - -static int lu_device_is_osd (const struct lu_device *d); -static void osd_mod_exit (void) __exit; -static int osd_mod_init (void) __init; -static int osd_type_init (struct lu_device_type *t); -static void osd_type_fini (struct lu_device_type *t); -static int osd_object_init (const struct lu_env *env, - struct lu_object *l, - const struct lu_object_conf *_); -static void osd_object_release(const struct lu_env *env, - struct lu_object *l); -static int osd_object_print (const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *o); -static struct lu_device *osd_device_free (const struct lu_env *env, - struct lu_device *m); -static void *osd_key_init (const struct lu_context *ctx, - struct lu_context_key *key); -static void osd_key_fini (const struct lu_context *ctx, - struct lu_context_key *key, void *data); -static void osd_key_exit (const struct lu_context *ctx, - struct lu_context_key *key, void *data); -static int osd_has_index (const struct osd_object *obj); -static void osd_object_init0 (struct osd_object *obj); -static int osd_device_init (const struct lu_env *env, - struct lu_device *d, const char *, - struct lu_device *); -static int osd_fid_lookup (const struct lu_env *env, - struct osd_object *obj, - const struct lu_fid *fid); -static void osd_inode_getattr (const struct lu_env *env, - struct inode *inode, struct lu_attr *attr); -static int osd_inode_setattr (const struct lu_env *env, - struct inode *inode, const struct lu_attr *attr); -static int osd_param_is_sane (const struct osd_device *dev, - const struct txn_param *param); -static int osd_index_iam_lookup(const struct lu_env *env, - struct dt_object *dt, - struct dt_rec *rec, const struct dt_key *key, - struct lustre_capa *capa); -static int osd_index_ea_lookup(const struct lu_env *env, - struct dt_object *dt, - struct dt_rec *rec, const struct dt_key *key, - struct lustre_capa *capa); -static int osd_index_iam_insert(const struct lu_env *env, - struct dt_object *dt, - const struct dt_rec *rec, - const struct dt_key *key, - struct thandle *handle, - struct lustre_capa *capa, - int ingore_quota); -static int osd_index_ea_insert (const struct lu_env *env, - struct dt_object *dt, - const struct dt_rec *rec, - const struct dt_key *key, - struct thandle *handle, - struct lustre_capa *capa, - int ingore_quota); -static int osd_index_iam_delete(const struct lu_env *env, - struct dt_object *dt, const struct dt_key *key, - struct thandle *handle, - struct lustre_capa *capa); -static int osd_index_ea_delete (const struct lu_env *env, - struct dt_object *dt, const struct dt_key *key, - struct thandle *handle, - struct lustre_capa *capa); - -static int osd_iam_index_probe (const struct lu_env *env, - struct osd_object *o, - const struct dt_index_features *feat); -static int osd_index_try (const struct lu_env *env, - struct dt_object *dt, - const struct dt_index_features *feat); -static void osd_index_fini (struct osd_object *o); - -static void osd_it_iam_fini (const struct lu_env *env, struct dt_it *di); -static int osd_it_iam_get (const struct lu_env *env, - struct dt_it *di, const struct dt_key *key); -static void osd_it_iam_put (const struct lu_env *env, struct dt_it *di); -static int osd_it_iam_next (const struct lu_env *env, struct dt_it *di); -static int osd_it_iam_key_size (const struct lu_env *env, - const struct dt_it *di); -static void osd_it_ea_fini (const struct lu_env *env, struct dt_it *di); -static int osd_it_ea_get (const struct lu_env *env, - struct dt_it *di, const struct dt_key *key); -static void osd_it_ea_put (const struct lu_env *env, struct dt_it *di); -static int osd_it_ea_next (const struct lu_env *env, struct dt_it *di); -static int osd_it_ea_key_size(const struct lu_env *env, - const struct dt_it *di); - -static void osd_conf_get (const struct lu_env *env, - const struct dt_device *dev, - struct dt_device_param *param); -static void osd_trans_stop (const struct lu_env *env, - struct thandle *th); -static int osd_object_is_root(const struct osd_object *obj); - -static struct osd_object *osd_obj (const struct lu_object *o); -static struct osd_device *osd_dev (const struct lu_device *d); -static struct osd_device *osd_dt_dev (const struct dt_device *d); -static struct osd_object *osd_dt_obj (const struct dt_object *d); -static struct osd_device *osd_obj2dev (const struct osd_object *o); -static struct lu_device *osd2lu_dev (struct osd_device *osd); -static struct lu_device *osd_device_fini (const struct lu_env *env, - struct lu_device *d); -static struct lu_device *osd_device_alloc (const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg); -static struct lu_object *osd_object_alloc (const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *d); -static struct inode *osd_iget (struct osd_thread_info *info, - struct osd_device *dev, - const struct osd_inode_id *id); -static struct super_block *osd_sb (const struct osd_device *dev); -static struct dt_it *osd_it_iam_init (const struct lu_env *env, - struct dt_object *dt, - struct lustre_capa *capa); -static struct dt_key *osd_it_iam_key (const struct lu_env *env, - const struct dt_it *di); -static struct dt_rec *osd_it_iam_rec (const struct lu_env *env, - const struct dt_it *di); -static struct dt_it *osd_it_ea_init (const struct lu_env *env, - struct dt_object *dt, - struct lustre_capa *capa); -static struct dt_key *osd_it_ea_key (const struct lu_env *env, - const struct dt_it *di); -static struct dt_rec *osd_it_ea_rec (const struct lu_env *env, - const struct dt_it *di); - -static struct timespec *osd_inode_time (const struct lu_env *env, - struct inode *inode, - __u64 seconds); -static struct thandle *osd_trans_start (const struct lu_env *env, - struct dt_device *d, - struct txn_param *p); -static journal_t *osd_journal (const struct osd_device *dev); - -static int __osd_ea_add_rec(struct osd_thread_info *info, - struct osd_object *pobj, - struct osd_object *cobj, - const char *name, - struct thandle *th); - -static const struct lu_device_type_operations osd_device_type_ops; -static struct lu_device_type osd_device_type; static const struct lu_object_operations osd_lu_obj_ops; -static struct obd_ops osd_obd_device_ops; static const struct lu_device_operations osd_lu_ops; static struct lu_context_key osd_key; static const struct dt_object_operations osd_obj_ops; @@ -292,6 +144,72 @@ struct osd_thandle { }; +/* + * Helpers. + */ +static int lu_device_is_osd(const struct lu_device *d) +{ + return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &osd_lu_ops); +} + +static struct osd_device *osd_dt_dev(const struct dt_device *d) +{ + LASSERT(lu_device_is_osd(&d->dd_lu_dev)); + return container_of0(d, struct osd_device, od_dt_dev); +} + +static struct osd_device *osd_dev(const struct lu_device *d) +{ + LASSERT(lu_device_is_osd(d)); + return osd_dt_dev(container_of0(d, struct dt_device, dd_lu_dev)); +} + +static struct osd_device *osd_obj2dev(const struct osd_object *o) +{ + return osd_dev(o->oo_dt.do_lu.lo_dev); +} + +static struct super_block *osd_sb(const struct osd_device *dev) +{ + return dev->od_mount->lmi_mnt->mnt_sb; +} + +static int osd_object_is_root(const struct osd_object *obj) +{ + return osd_sb(osd_obj2dev(obj))->s_root->d_inode == obj->oo_inode; +} + +static struct osd_object *osd_obj(const struct lu_object *o) +{ + LASSERT(lu_device_is_osd(o->lo_dev)); + return container_of0(o, struct osd_object, oo_dt.do_lu); +} + +static struct osd_object *osd_dt_obj(const struct dt_object *d) +{ + return osd_obj(&d->do_lu); +} + +static struct lu_device *osd2lu_dev(struct osd_device *osd) +{ + return &osd->od_dt_dev.dd_lu_dev; +} + +static journal_t *osd_journal(const struct osd_device *dev) +{ + return LDISKFS_SB(osd_sb(dev))->s_journal; +} + +static int osd_has_index(const struct osd_object *obj) +{ + return obj->oo_dt.do_index_ops != NULL; +} + +static int osd_object_invariant(const struct lu_object *l) +{ + return osd_invariant(osd_obj(l)); +} + #ifdef HAVE_QUOTA_SUPPORT static inline void osd_push_ctxt(const struct lu_env *env, struct osd_ctxt *save) @@ -317,32 +235,6 @@ osd_pop_ctxt(struct osd_ctxt *save) } #endif -/* - * Invariants, assertions. - */ - -/* - * XXX: do not enable this, until invariant checking code is made thread safe - * in the face of pdirops locking. - */ -#define OSD_INVARIANT_CHECKS (0) - -#if OSD_INVARIANT_CHECKS -static int osd_invariant(const struct osd_object *obj) -{ - return - obj != NULL && - ergo(obj->oo_inode != NULL, - obj->oo_inode->i_sb == osd_sb(osd_obj2dev(obj)) && - atomic_read(&obj->oo_inode->i_count) > 0) && - ergo(obj->oo_dir != NULL && - obj->oo_dir->od_conationer.ic_object != NULL, - obj->oo_dir->od_conationer.ic_object == obj->oo_inode); -} -#else -#define osd_invariant(obj) (1) -#endif - static inline struct osd_thread_info *osd_oti_get(const struct lu_env *env) { return lu_context_key_get(&env->le_ctx, &osd_key); @@ -413,6 +305,96 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env, } /* + * retrieve object from backend ext fs. + **/ +static struct inode *osd_iget(struct osd_thread_info *info, + struct osd_device *dev, + const struct osd_inode_id *id) +{ + struct inode *inode; + + inode = iget(osd_sb(dev), id->oii_ino); + if (inode == NULL) { + CERROR("no inode\n"); + inode = ERR_PTR(-EACCES); + } else if (id->oii_gen != OSD_OII_NOGEN && + inode->i_generation != id->oii_gen) { + iput(inode); + inode = ERR_PTR(-ESTALE); + } else if (inode->i_nlink == 0) { + /* due to parallel readdir and unlink, + * we can have dead inode here. */ + CWARN("stale inode\n"); + make_bad_inode(inode); + iput(inode); + inode = ERR_PTR(-ESTALE); + } else if (is_bad_inode(inode)) { + CERROR("bad inode %lx\n",inode->i_ino); + iput(inode); + inode = ERR_PTR(-ENOENT); + } + return inode; +} + +static int osd_fid_lookup(const struct lu_env *env, + struct osd_object *obj, const struct lu_fid *fid) +{ + struct osd_thread_info *info; + struct lu_device *ldev = obj->oo_dt.do_lu.lo_dev; + struct osd_device *dev; + struct osd_inode_id *id; + struct osd_oi *oi; + struct inode *inode; + int result; + + LINVRNT(osd_invariant(obj)); + LASSERT(obj->oo_inode == NULL); + LASSERT(fid_is_sane(fid)); + /* + * This assertion checks that osd layer sees only local + * fids. Unfortunately it is somewhat expensive (does a + * cache-lookup). Disabling it for production/acceptance-testing. + */ + LASSERT(1 || fid_is_local(env, ldev->ld_site, fid)); + + ENTRY; + + info = osd_oti_get(env); + dev = osd_dev(ldev); + id = &info->oti_id; + oi = &dev->od_oi; + + if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) + RETURN(-ENOENT); + + result = osd_oi_lookup(info, oi, fid, id); + if (result == 0) { + inode = osd_iget(info, dev, id); + if (!IS_ERR(inode)) { + obj->oo_inode = inode; + LASSERT(obj->oo_inode->i_sb == osd_sb(dev)); + if (dev->od_iop_mode) { + obj->oo_compat_dot_created = 1; + obj->oo_compat_dotdot_created = 1; + } + result = 0; + } else + /* + * If fid wasn't found in oi, inode-less object is + * created, for which lu_object_exists() returns + * false. This is used in a (frequent) case when + * objects are created as locking anchors or + * place holders for objects yet to be created. + */ + result = PTR_ERR(inode); + } else if (result == -ENOENT) + result = 0; + LINVRNT(osd_invariant(obj)); + + RETURN(result); +} + +/* * Concurrency: shouldn't matter. */ static void osd_object_init0(struct osd_object *obj) @@ -428,7 +410,7 @@ static void osd_object_init0(struct osd_object *obj) * life-cycle. */ static int osd_object_init(const struct lu_env *env, struct lu_object *l, - const struct lu_object_conf *_) + const struct lu_object_conf *unused) { struct osd_object *obj = osd_obj(l); int result; @@ -458,6 +440,9 @@ static void osd_object_free(const struct lu_env *env, struct lu_object *l) OBD_FREE_PTR(obj); } +/** + * IAM Iterator + */ static struct iam_path_descr *osd_it_ipd_get(const struct lu_env *env, const struct iam_container *bag) { @@ -514,6 +499,147 @@ enum { }; /* + * Journal + */ + +/* + * Concurrency: doesn't access mutable data. + */ +static int osd_param_is_sane(const struct osd_device *dev, + const struct txn_param *param) +{ + return param->tp_credits <= osd_journal(dev)->j_max_transaction_buffers; +} + +/* + * Concurrency: shouldn't matter. + */ +static void osd_trans_commit_cb(struct journal_callback *jcb, int error) +{ + struct osd_thandle *oh = container_of0(jcb, struct osd_thandle, ot_jcb); + struct thandle *th = &oh->ot_super; + struct dt_device *dev = th->th_dev; + struct lu_device *lud = &dev->dd_lu_dev; + + LASSERT(dev != NULL); + LASSERT(oh->ot_handle == NULL); + + if (error) { + CERROR("transaction @0x%p commit error: %d\n", th, error); + } else { + struct lu_env *env = &osd_dt_dev(dev)->od_env_for_commit; + /* + * This od_env_for_commit is only for commit usage. see + * "struct dt_device" + */ + lu_context_enter(&env->le_ctx); + dt_txn_hook_commit(env, th); + lu_context_exit(&env->le_ctx); + } + + lu_ref_del_at(&lud->ld_reference, oh->ot_dev_link, "osd-tx", th); + lu_device_put(lud); + th->th_dev = NULL; + + lu_context_exit(&th->th_ctx); + lu_context_fini(&th->th_ctx); + OBD_FREE_PTR(oh); +} + +/* + * Concurrency: shouldn't matter. + */ +static struct thandle *osd_trans_start(const struct lu_env *env, + struct dt_device *d, + struct txn_param *p) +{ + struct osd_device *dev = osd_dt_dev(d); + handle_t *jh; + struct osd_thandle *oh; + struct thandle *th; + int hook_res; + + ENTRY; + + hook_res = dt_txn_hook_start(env, d, p); + if (hook_res != 0) + RETURN(ERR_PTR(hook_res)); + + if (osd_param_is_sane(dev, p)) { + OBD_ALLOC_GFP(oh, sizeof *oh, CFS_ALLOC_IO); + if (oh != NULL) { + struct osd_thread_info *oti = osd_oti_get(env); + + /* + * XXX temporary stuff. Some abstraction layer should + * be used. + */ + + jh = journal_start(osd_journal(dev), p->tp_credits); + if (!IS_ERR(jh)) { + oh->ot_handle = jh; + th = &oh->ot_super; + th->th_dev = d; + th->th_result = 0; + jh->h_sync = p->tp_sync; + lu_device_get(&d->dd_lu_dev); + oh->ot_dev_link = lu_ref_add + (&d->dd_lu_dev.ld_reference, + "osd-tx", th); + /* add commit callback */ + lu_context_init(&th->th_ctx, LCT_TX_HANDLE); + lu_context_enter(&th->th_ctx); + journal_callback_set(jh, osd_trans_commit_cb, + (struct journal_callback *)&oh->ot_jcb); + LASSERT(oti->oti_txns == 0); + LASSERT(oti->oti_r_locks == 0); + LASSERT(oti->oti_w_locks == 0); + oti->oti_txns++; + } else { + OBD_FREE_PTR(oh); + th = (void *)jh; + } + } else + th = ERR_PTR(-ENOMEM); + } else { + CERROR("Invalid transaction parameters\n"); + th = ERR_PTR(-EINVAL); + } + + RETURN(th); +} + +/* + * Concurrency: shouldn't matter. + */ +static void osd_trans_stop(const struct lu_env *env, struct thandle *th) +{ + int result; + struct osd_thandle *oh; + struct osd_thread_info *oti = osd_oti_get(env); + + ENTRY; + + oh = container_of0(th, struct osd_thandle, ot_super); + if (oh->ot_handle != NULL) { + handle_t *hdl = oh->ot_handle; + + LASSERT(oti->oti_txns == 1); + oti->oti_txns--; + LASSERT(oti->oti_r_locks == 0); + LASSERT(oti->oti_w_locks == 0); + result = dt_txn_hook_stop(env, th); + if (result != 0) + CERROR("Failure in transaction hook: %d\n", result); + oh->ot_handle = NULL; + result = journal_stop(hdl); + if (result != 0) + CERROR("Failure to stop transaction: %d\n", result); + } + EXIT; +} + +/* * Concurrency: no concurrent access is possible that late in object * life-cycle. */ @@ -664,147 +790,6 @@ static struct lu_buf *osd_buf_get(const struct lu_env *env, void *area, ssize_t } /* - * Journal - */ - -/* - * Concurrency: doesn't access mutable data. - */ -static int osd_param_is_sane(const struct osd_device *dev, - const struct txn_param *param) -{ - return param->tp_credits <= osd_journal(dev)->j_max_transaction_buffers; -} - -/* - * Concurrency: shouldn't matter. - */ -static void osd_trans_commit_cb(struct journal_callback *jcb, int error) -{ - struct osd_thandle *oh = container_of0(jcb, struct osd_thandle, ot_jcb); - struct thandle *th = &oh->ot_super; - struct dt_device *dev = th->th_dev; - struct lu_device *lud = &dev->dd_lu_dev; - - LASSERT(dev != NULL); - LASSERT(oh->ot_handle == NULL); - - if (error) { - CERROR("transaction @0x%p commit error: %d\n", th, error); - } else { - struct lu_env *env = &osd_dt_dev(dev)->od_env_for_commit; - /* - * This od_env_for_commit is only for commit usage. see - * "struct dt_device" - */ - lu_context_enter(&env->le_ctx); - dt_txn_hook_commit(env, th); - lu_context_exit(&env->le_ctx); - } - - lu_ref_del_at(&lud->ld_reference, oh->ot_dev_link, "osd-tx", th); - lu_device_put(lud); - th->th_dev = NULL; - - lu_context_exit(&th->th_ctx); - lu_context_fini(&th->th_ctx); - OBD_FREE_PTR(oh); -} - -/* - * Concurrency: shouldn't matter. - */ -static struct thandle *osd_trans_start(const struct lu_env *env, - struct dt_device *d, - struct txn_param *p) -{ - struct osd_device *dev = osd_dt_dev(d); - handle_t *jh; - struct osd_thandle *oh; - struct thandle *th; - int hook_res; - - ENTRY; - - hook_res = dt_txn_hook_start(env, d, p); - if (hook_res != 0) - RETURN(ERR_PTR(hook_res)); - - if (osd_param_is_sane(dev, p)) { - OBD_ALLOC_GFP(oh, sizeof *oh, CFS_ALLOC_IO); - if (oh != NULL) { - struct osd_thread_info *oti = osd_oti_get(env); - - /* - * XXX temporary stuff. Some abstraction layer should - * be used. - */ - - jh = journal_start(osd_journal(dev), p->tp_credits); - if (!IS_ERR(jh)) { - oh->ot_handle = jh; - th = &oh->ot_super; - th->th_dev = d; - th->th_result = 0; - jh->h_sync = p->tp_sync; - lu_device_get(&d->dd_lu_dev); - oh->ot_dev_link = lu_ref_add - (&d->dd_lu_dev.ld_reference, - "osd-tx", th); - /* add commit callback */ - lu_context_init(&th->th_ctx, LCT_TX_HANDLE); - lu_context_enter(&th->th_ctx); - journal_callback_set(jh, osd_trans_commit_cb, - (struct journal_callback *)&oh->ot_jcb); - LASSERT(oti->oti_txns == 0); - LASSERT(oti->oti_r_locks == 0); - LASSERT(oti->oti_w_locks == 0); - oti->oti_txns++; - } else { - OBD_FREE_PTR(oh); - th = (void *)jh; - } - } else - th = ERR_PTR(-ENOMEM); - } else { - CERROR("Invalid transaction parameters\n"); - th = ERR_PTR(-EINVAL); - } - - RETURN(th); -} - -/* - * Concurrency: shouldn't matter. - */ -static void osd_trans_stop(const struct lu_env *env, struct thandle *th) -{ - int result; - struct osd_thandle *oh; - struct osd_thread_info *oti = osd_oti_get(env); - - ENTRY; - - oh = container_of0(th, struct osd_thandle, ot_super); - if (oh->ot_handle != NULL) { - handle_t *hdl = oh->ot_handle; - - LASSERT(oti->oti_txns == 1); - oti->oti_txns--; - LASSERT(oti->oti_r_locks == 0); - LASSERT(oti->oti_w_locks == 0); - result = dt_txn_hook_stop(env, th); - if (result != 0) - CERROR("Failure in transaction hook: %d\n", result); - oh->ot_handle = NULL; - result = journal_stop(hdl); - if (result != 0) - CERROR("Failure to stop transaction: %d\n", result); - } - EXIT; -} - -/* * Concurrency: shouldn't matter. */ static int osd_sync(const struct lu_env *env, struct dt_device *d) @@ -964,7 +949,7 @@ static const int osd_dto_credits_quota[DTO_NR] = { [DTO_INDEX_DELETE] = 20, /** * Unused now. - */ + */ [DTO_IDNEX_UPDATE] = 16, /* * Create a object. Same as create object in EXT3 filesystem. @@ -980,7 +965,7 @@ static const int osd_dto_credits_quota[DTO_NR] = { * INDEX_EXTRA_BLOCKS(8) + * 3(inode bits, groups, GDT) + * QUOTA(?) - */ + */ [DTO_OBJECT_DELETE] = 27, /** * Attr set credits. @@ -1099,6 +1084,16 @@ static void osd_object_write_unlock(const struct lu_env *env, up_write(&obj->oo_sem); } +static int osd_object_write_locked(const struct lu_env *env, + struct dt_object *dt) +{ + struct osd_object *obj = osd_dt_obj(dt); + + LINVRNT(osd_invariant(obj)); + + return obj->oo_owner == env; +} + static int capa_is_sane(const struct lu_env *env, struct osd_device *dev, struct lustre_capa *capa, @@ -1196,7 +1191,42 @@ static int osd_object_auth(const struct lu_env *env, struct dt_object *dt, return -EACCES; } - return 0; + return 0; +} + +static struct timespec *osd_inode_time(const struct lu_env *env, + struct inode *inode, __u64 seconds) +{ + struct osd_thread_info *oti = osd_oti_get(env); + struct timespec *t = &oti->oti_time; + + t->tv_sec = seconds; + t->tv_nsec = 0; + *t = timespec_trunc(*t, get_sb_time_gran(inode->i_sb)); + return t; +} + + +static void osd_inode_getattr(const struct lu_env *env, + struct inode *inode, struct lu_attr *attr) +{ + attr->la_valid |= LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE | + LA_SIZE | LA_BLOCKS | LA_UID | LA_GID | + LA_FLAGS | LA_NLINK | LA_RDEV | LA_BLKSIZE; + + attr->la_atime = LTIME_S(inode->i_atime); + attr->la_mtime = LTIME_S(inode->i_mtime); + attr->la_ctime = LTIME_S(inode->i_ctime); + attr->la_mode = inode->i_mode; + attr->la_size = i_size_read(inode); + attr->la_blocks = inode->i_blocks; + attr->la_uid = inode->i_uid; + attr->la_gid = inode->i_gid; + attr->la_flags = LDISKFS_I(inode)->i_flags; + attr->la_nlink = inode->i_nlink; + attr->la_rdev = inode->i_rdev; + attr->la_blksize = ll_inode_blksize(inode); + attr->la_blkbits = inode->i_blkbits; } static int osd_attr_get(const struct lu_env *env, @@ -1218,43 +1248,6 @@ static int osd_attr_get(const struct lu_env *env, return 0; } -static int osd_attr_set(const struct lu_env *env, - struct dt_object *dt, - const struct lu_attr *attr, - struct thandle *handle, - struct lustre_capa *capa) -{ - struct osd_object *obj = osd_dt_obj(dt); - int rc; - - LASSERT(handle != NULL); - LASSERT(dt_object_exists(dt)); - LASSERT(osd_invariant(obj)); - - if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE)) - return -EACCES; - - spin_lock(&obj->oo_guard); - rc = osd_inode_setattr(env, obj->oo_inode, attr); - spin_unlock(&obj->oo_guard); - - if (!rc) - mark_inode_dirty(obj->oo_inode); - return rc; -} - -static struct timespec *osd_inode_time(const struct lu_env *env, - struct inode *inode, __u64 seconds) -{ - struct osd_thread_info *oti = osd_oti_get(env); - struct timespec *t = &oti->oti_time; - - t->tv_sec = seconds; - t->tv_nsec = 0; - *t = timespec_trunc(*t, get_sb_time_gran(inode->i_sb)); - return t; -} - static int osd_inode_setattr(const struct lu_env *env, struct inode *inode, const struct lu_attr *attr) { @@ -1325,12 +1318,36 @@ static int osd_inode_setattr(const struct lu_env *env, return 0; } +static int osd_attr_set(const struct lu_env *env, + struct dt_object *dt, + const struct lu_attr *attr, + struct thandle *handle, + struct lustre_capa *capa) +{ + struct osd_object *obj = osd_dt_obj(dt); + int rc; + + LASSERT(handle != NULL); + LASSERT(dt_object_exists(dt)); + LASSERT(osd_invariant(obj)); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE)) + return -EACCES; + + spin_lock(&obj->oo_guard); + rc = osd_inode_setattr(env, obj->oo_inode, attr); + spin_unlock(&obj->oo_guard); + + if (!rc) + mark_inode_dirty(obj->oo_inode); + return rc; +} + /* * Object creation. * * XXX temporary solution. */ - static int osd_create_pre(struct osd_thread_info *info, struct osd_object *obj, struct lu_attr *attr, struct thandle *th) { @@ -1436,7 +1453,6 @@ extern int iam_lvar_create(struct inode *obj, int keysize, int ptrsize, extern int iam_lfix_create(struct inode *obj, int keysize, int ptrsize, int recsize, handle_t *handle); - enum { OSD_NAME_LEN = 255 }; @@ -1734,10 +1750,9 @@ static int osd_ea_fid_set(const struct lu_env *env, struct dt_object *dt, /** * Helper function to form igif */ -static inline void osd_igif_get(const struct lu_env *env, struct dentry *dentry, +static inline void osd_igif_get(const struct lu_env *env, struct inode *inode, struct lu_fid *fid) { - struct inode *inode = dentry->d_inode; lu_igif_build(fid, inode->i_ino, inode->i_generation); } @@ -1745,45 +1760,68 @@ static inline void osd_igif_get(const struct lu_env *env, struct dentry *dentry, * Helper function to pack the fid */ static inline void osd_fid_pack(const struct lu_env *env, const struct lu_fid *fid, - struct lu_fid_pack *pack) + struct dt_rec *pack) { - fid_pack(pack, fid, &osd_oti_get(env)->oti_fid); + fid_pack((struct lu_fid_pack *)pack, fid, &osd_oti_get(env)->oti_fid); } /** * Try to read the fid from inode ea into dt_rec, if return value * i.e. rc is +ve, then we got fid, otherwise we will have to form igif * - * \param rec, the data-structure into which fid/igif is read + * \param fid, object fid. * * \retval 0, on success */ -static int osd_ea_fid_get(const struct lu_env *env, struct dentry *dentry, - struct dt_rec *rec) +static int osd_ea_fid_get(const struct lu_env *env, struct osd_object *obj, + __u32 ino, struct lu_fid *fid) { - struct inode *inode = dentry->d_inode; struct osd_thread_info *info = osd_oti_get(env); struct lustre_mdt_attrs *mdt_attrs = &info->oti_mdt_attrs; - struct lu_fid *fid = &info->oti_fid; - int rc; + struct lu_device *ldev = obj->oo_dt.do_lu.lo_dev; + struct dentry *dentry = &info->oti_child_dentry; + struct osd_inode_id *id = &info->oti_id; + struct osd_device *dev; + struct inode *inode; + int rc; - LASSERT(inode->i_op != NULL && inode->i_op->getxattr != NULL); + ENTRY; + dev = osd_dev(ldev); + + id->oii_ino = ino; + id->oii_gen = OSD_OII_NOGEN; + + inode = osd_iget(info, dev, id); + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + GOTO(out,rc); + } + dentry->d_inode = inode; + LASSERT(inode->i_op != NULL && inode->i_op->getxattr != NULL); rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, (void *)mdt_attrs, sizeof *mdt_attrs); + /* Check LMA compatibility */ + if (rc > 0 && + (mdt_attrs->lma_incompat & ~cpu_to_be32(LMA_INCOMPAT_SUPP))) { + CWARN("Inode %lx: Unsupported incompat LMA feature(s) %#x\n", + inode->i_ino, be32_to_cpu(mdt_attrs->lma_incompat) & + ~LMA_INCOMPAT_SUPP); + return -ENOSYS; + } + if (rc > 0) { fid_be_to_cpu(fid, &mdt_attrs->lma_self_fid); rc = 0; } else if (rc == -ENODATA) { - osd_igif_get(env, dentry, fid); + osd_igif_get(env, inode, fid); rc = 0; } + iput(inode); - if (rc == 0) - osd_fid_pack(env, fid, (struct lu_fid_pack*)rec); - - return rc; +out: + RETURN(rc); } /** @@ -1901,7 +1939,6 @@ static int osd_xattr_get(const struct lu_env *env, return inode->i_op->getxattr(dentry, name, buf->lb_buf, buf->lb_len); } - /* * Concurrency: @dt is write locked. */ @@ -2084,174 +2121,48 @@ static int osd_object_sync(const struct lu_env *env, struct dt_object *dt) } /* - * Get the 64-bit version for an inode. - */ -static dt_obj_version_t osd_object_version_get(const struct lu_env *env, - struct dt_object *dt) -{ - struct inode *inode = osd_dt_obj(dt)->oo_inode; - - CDEBUG(D_INFO, "Get version "LPX64" for inode %lu\n", - LDISKFS_I(inode)->i_fs_version, inode->i_ino); - return LDISKFS_I(inode)->i_fs_version; -} - -/* - * Set the 64-bit version and return the old version. - */ -static void osd_object_version_set(const struct lu_env *env, struct dt_object *dt, - dt_obj_version_t new_version) -{ - struct inode *inode = osd_dt_obj(dt)->oo_inode; - - CDEBUG(D_INFO, "Set version "LPX64" (old "LPX64") for inode %lu\n", - new_version, LDISKFS_I(inode)->i_fs_version, inode->i_ino); - LDISKFS_I(inode)->i_fs_version = new_version; - /** Version is set after all inode operations are finished, - * so we should mark it dirty here */ - inode->i_sb->s_op->dirty_inode(inode); -} - -static int osd_data_get(const struct lu_env *env, struct dt_object *dt, - void **data) -{ - struct osd_object *obj = osd_dt_obj(dt); - ENTRY; - - *data = (void *)obj->oo_inode; - RETURN(0); -} - -static const struct dt_object_operations osd_obj_ops = { - .do_read_lock = osd_object_read_lock, - .do_write_lock = osd_object_write_lock, - .do_read_unlock = osd_object_read_unlock, - .do_write_unlock = osd_object_write_unlock, - .do_attr_get = osd_attr_get, - .do_attr_set = osd_attr_set, - .do_ah_init = osd_ah_init, - .do_create = osd_object_create, - .do_index_try = osd_index_try, - .do_ref_add = osd_object_ref_add, - .do_ref_del = osd_object_ref_del, - .do_xattr_get = osd_xattr_get, - .do_xattr_set = osd_xattr_set, - .do_xattr_del = osd_xattr_del, - .do_xattr_list = osd_xattr_list, - .do_capa_get = osd_capa_get, - .do_object_sync = osd_object_sync, - .do_version_get = osd_object_version_get, - .do_version_set = osd_object_version_set, - .do_data_get = osd_data_get, -}; - -/** - * dt_object_operations for interoperability mode - * (i.e. to run 2.0 mds on 1.8 disk) (b11826) - */ -static const struct dt_object_operations osd_obj_ea_ops = { - .do_read_lock = osd_object_read_lock, - .do_write_lock = osd_object_write_lock, - .do_read_unlock = osd_object_read_unlock, - .do_write_unlock = osd_object_write_unlock, - .do_attr_get = osd_attr_get, - .do_attr_set = osd_attr_set, - .do_ah_init = osd_ah_init, - .do_create = osd_object_ea_create, - .do_index_try = osd_index_try, - .do_ref_add = osd_object_ref_add, - .do_ref_del = osd_object_ref_del, - .do_xattr_get = osd_xattr_get, - .do_xattr_set = osd_xattr_set, - .do_xattr_del = osd_xattr_del, - .do_xattr_list = osd_xattr_list, - .do_capa_get = osd_capa_get, - .do_object_sync = osd_object_sync, - .do_version_get = osd_object_version_get, - .do_version_set = osd_object_version_set, - .do_data_get = osd_data_get, -}; - -/* - * Body operations. - */ - -/* - * XXX: Another layering violation for now. - * - * We don't want to use ->f_op->read methods, because generic file write - * - * - serializes on ->i_sem, and - * - * - does a lot of extra work like balance_dirty_pages(), - * - * which doesn't work for globally shared files like /last-received. - */ -int fsfilt_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs); -int fsfilt_ldiskfs_write_handle(struct inode *inode, void *buf, int bufsize, - loff_t *offs, handle_t *handle); - -static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt, - struct lu_buf *buf, loff_t *pos, - struct lustre_capa *capa) -{ - struct inode *inode = osd_dt_obj(dt)->oo_inode; - - if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_READ)) - RETURN(-EACCES); - - return fsfilt_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos); -} - -static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, - const struct lu_buf *buf, loff_t *pos, - struct thandle *handle, struct lustre_capa *capa, - int ignore_quota) -{ - struct inode *inode = osd_dt_obj(dt)->oo_inode; - struct osd_thandle *oh; - ssize_t result; -#ifdef HAVE_QUOTA_SUPPORT - cfs_cap_t save = current->cap_effective; -#endif - - LASSERT(handle != NULL); - - if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_WRITE)) - RETURN(-EACCES); - - oh = container_of(handle, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle->h_transaction != NULL); -#ifdef HAVE_QUOTA_SUPPORT - if (ignore_quota) - current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK; - else - current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK; -#endif - result = fsfilt_ldiskfs_write_handle(inode, buf->lb_buf, buf->lb_len, - pos, oh->ot_handle); -#ifdef HAVE_QUOTA_SUPPORT - current->cap_effective = save; -#endif - if (result == 0) - result = buf->lb_len; - return result; -} + * Get the 64-bit version for an inode. + */ +static dt_obj_version_t osd_object_version_get(const struct lu_env *env, + struct dt_object *dt) +{ + struct inode *inode = osd_dt_obj(dt)->oo_inode; -static const struct dt_body_operations osd_body_ops = { - .dbo_read = osd_read, - .dbo_write = osd_write -}; + CDEBUG(D_INFO, "Get version "LPX64" for inode %lu\n", + LDISKFS_I(inode)->i_fs_version, inode->i_ino); + return LDISKFS_I(inode)->i_fs_version; +} /* - * Index operations. + * Set the 64-bit version and return the old version. */ +static void osd_object_version_set(const struct lu_env *env, struct dt_object *dt, + dt_obj_version_t new_version) +{ + struct inode *inode = osd_dt_obj(dt)->oo_inode; -static int osd_object_is_root(const struct osd_object *obj) + CDEBUG(D_INFO, "Set version "LPX64" (old "LPX64") for inode %lu\n", + new_version, LDISKFS_I(inode)->i_fs_version, inode->i_ino); + LDISKFS_I(inode)->i_fs_version = new_version; + /** Version is set after all inode operations are finished, + * so we should mark it dirty here */ + inode->i_sb->s_op->dirty_inode(inode); +} + +static int osd_data_get(const struct lu_env *env, struct dt_object *dt, + void **data) { - return osd_sb(osd_obj2dev(obj))->s_root->d_inode == obj->oo_inode; + struct osd_object *obj = osd_dt_obj(dt); + ENTRY; + + *data = (void *)obj->oo_inode; + RETURN(0); } +/* + * Index operations. + */ + static int osd_iam_index_probe(const struct lu_env *env, struct osd_object *o, const struct dt_index_features *feat) { @@ -2313,6 +2224,7 @@ static int osd_iam_container_init(const struct lu_env *env, return result; } + /* * Concurrency: no external locking is necessary. */ @@ -2379,6 +2291,130 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt, return result; } +static const struct dt_object_operations osd_obj_ops = { + .do_read_lock = osd_object_read_lock, + .do_write_lock = osd_object_write_lock, + .do_read_unlock = osd_object_read_unlock, + .do_write_unlock = osd_object_write_unlock, + .do_write_locked = osd_object_write_locked, + .do_attr_get = osd_attr_get, + .do_attr_set = osd_attr_set, + .do_ah_init = osd_ah_init, + .do_create = osd_object_create, + .do_index_try = osd_index_try, + .do_ref_add = osd_object_ref_add, + .do_ref_del = osd_object_ref_del, + .do_xattr_get = osd_xattr_get, + .do_xattr_set = osd_xattr_set, + .do_xattr_del = osd_xattr_del, + .do_xattr_list = osd_xattr_list, + .do_capa_get = osd_capa_get, + .do_object_sync = osd_object_sync, + .do_version_get = osd_object_version_get, + .do_version_set = osd_object_version_set, + .do_data_get = osd_data_get, +}; + +/** + * dt_object_operations for interoperability mode + * (i.e. to run 2.0 mds on 1.8 disk) (b11826) + */ +static const struct dt_object_operations osd_obj_ea_ops = { + .do_read_lock = osd_object_read_lock, + .do_write_lock = osd_object_write_lock, + .do_read_unlock = osd_object_read_unlock, + .do_write_unlock = osd_object_write_unlock, + .do_write_locked = osd_object_write_locked, + .do_attr_get = osd_attr_get, + .do_attr_set = osd_attr_set, + .do_ah_init = osd_ah_init, + .do_create = osd_object_ea_create, + .do_index_try = osd_index_try, + .do_ref_add = osd_object_ref_add, + .do_ref_del = osd_object_ref_del, + .do_xattr_get = osd_xattr_get, + .do_xattr_set = osd_xattr_set, + .do_xattr_del = osd_xattr_del, + .do_xattr_list = osd_xattr_list, + .do_capa_get = osd_capa_get, + .do_object_sync = osd_object_sync, + .do_version_get = osd_object_version_get, + .do_version_set = osd_object_version_set, + .do_data_get = osd_data_get, +}; + +/* + * Body operations. + */ + +/* + * XXX: Another layering violation for now. + * + * We don't want to use ->f_op->read methods, because generic file write + * + * - serializes on ->i_sem, and + * + * - does a lot of extra work like balance_dirty_pages(), + * + * which doesn't work for globally shared files like /last-received. + */ +int fsfilt_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs); +int fsfilt_ldiskfs_write_handle(struct inode *inode, void *buf, int bufsize, + loff_t *offs, handle_t *handle); + +static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt, + struct lu_buf *buf, loff_t *pos, + struct lustre_capa *capa) +{ + struct inode *inode = osd_dt_obj(dt)->oo_inode; + + if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_READ)) + RETURN(-EACCES); + + return fsfilt_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos); +} + +static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, + const struct lu_buf *buf, loff_t *pos, + struct thandle *handle, struct lustre_capa *capa, + int ignore_quota) +{ + struct inode *inode = osd_dt_obj(dt)->oo_inode; + struct osd_thandle *oh; + ssize_t result; +#ifdef HAVE_QUOTA_SUPPORT + cfs_cap_t save = current->cap_effective; +#endif + + LASSERT(handle != NULL); + + if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_WRITE)) + RETURN(-EACCES); + + oh = container_of(handle, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle->h_transaction != NULL); +#ifdef HAVE_QUOTA_SUPPORT + if (ignore_quota) + current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK; + else + current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK; +#endif + result = fsfilt_ldiskfs_write_handle(inode, buf->lb_buf, buf->lb_len, + pos, oh->ot_handle); +#ifdef HAVE_QUOTA_SUPPORT + current->cap_effective = save; +#endif + if (result == 0) + result = buf->lb_len; + return result; +} + +static const struct dt_body_operations osd_body_ops = { + .dbo_read = osd_read, + .dbo_write = osd_write +}; + + /** * delete a (key, value) pair from index \a dt specified by \a key * @@ -2599,6 +2635,35 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt, } /** + * Calls ldiskfs_add_entry() to add directory entry + * into the directory. This is required for + * interoperability mode (b11826) + * + * \retval 0, on success + * \retval -ve, on error + */ +static int __osd_ea_add_rec(struct osd_thread_info *info, + struct osd_object *pobj, + struct osd_object *cobj, + const char *name, + struct thandle *th) +{ + struct dentry *child; + struct osd_thandle *oth; + struct inode *cinode = cobj->oo_inode; + int rc; + + oth = container_of(th, struct osd_thandle, ot_super); + LASSERT(oth->ot_handle != NULL); + LASSERT(oth->ot_handle->h_transaction != NULL); + + child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name)); + rc = ldiskfs_add_entry(oth->ot_handle, child, cinode); + + RETURN(rc); +} + +/** * Calls ldiskfs_add_dot_dotdot() to add dot and dotdot entries * into the directory.Also sets flags into osd object to * indicate dot and dotdot are created. This is required for @@ -2646,34 +2711,6 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info, return result; } -/** - * Calls ldiskfs_add_entry() to add directory entry - * into the directory. This is required for - * interoperability mode (b11826) - * - * \retval 0, on success - * \retval -ve, on error - */ -static int __osd_ea_add_rec(struct osd_thread_info *info, - struct osd_object *pobj, - struct osd_object *cobj, - const char *name, - struct thandle *th) -{ - struct dentry *child; - struct osd_thandle *oth; - struct inode *cinode = cobj->oo_inode; - int rc; - - oth = container_of(th, struct osd_thandle, ot_super); - LASSERT(oth->ot_handle != NULL); - LASSERT(oth->ot_handle->h_transaction != NULL); - - child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name)); - rc = ldiskfs_add_entry(oth->ot_handle, child, cinode); - - RETURN(rc); -} /** * It will call the appropriate osd_add* function and return the @@ -2708,14 +2745,12 @@ static int osd_ea_add_rec(const struct lu_env *env, static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, struct dt_rec *rec, const struct dt_key *key) { - struct inode *dir = obj->oo_inode; - struct osd_thread_info *info = osd_oti_get(env); - struct dentry *dentry; - struct osd_device *dev = osd_dev(obj->oo_dt.do_lu.lo_dev); - struct osd_inode_id *id = &info->oti_id; + struct inode *dir = obj->oo_inode; + struct osd_thread_info *info = osd_oti_get(env); + struct dentry *dentry; struct ldiskfs_dir_entry_2 *de; struct buffer_head *bh; - struct inode *inode; + struct lu_fid *fid = &info->oti_fid; int ino; int rc; @@ -2729,17 +2764,9 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, if (bh) { ino = le32_to_cpu(de->inode); brelse(bh); - id->oii_ino = ino; - id->oii_gen = OSD_OII_NOGEN; - - inode = osd_iget(info, dev, id); - if (!IS_ERR(inode)) { - dentry->d_inode = inode; - - rc = osd_ea_fid_get(env, dentry, rec); - iput(inode); - } else - rc = PTR_ERR(inode); + rc = osd_ea_fid_get(env, obj, ino, fid); + if (rc == 0) + osd_fid_pack(env, fid, rec); } else rc = -ENOENT; @@ -2997,15 +3024,89 @@ static int osd_it_iam_key_size(const struct lu_env *env, const struct dt_it *di) return iam_it_key_size(&it->oi_it); } +static inline void osd_it_append_attrs(struct lu_dirent*ent, + __u32 attr, + int len, + __u16 type) +{ + struct luda_type *lt; + const unsigned align = sizeof(struct luda_type) - 1; + + /* check if file type is required */ + if (attr & LUDA_TYPE) { + len = (len + align) & ~align; + + lt = (void *) ent->lde_name + len; + lt->lt_type = cpu_to_le16(CFS_DTTOIF(type)); + ent->lde_attrs |= LUDA_TYPE; + } + + ent->lde_attrs = cpu_to_le32(ent->lde_attrs); +} + +/** + * build lu direct from backend fs dirent. + */ + +static inline void osd_it_pack_dirent(struct lu_dirent *ent, + struct lu_fid *fid, + __u64 offset, + char *name, + __u16 namelen, + __u16 type, + __u32 attr) +{ + fid_cpu_to_le(&ent->lde_fid, fid); + ent->lde_attrs = LUDA_FID; + + ent->lde_hash = cpu_to_le64(offset); + ent->lde_reclen = cpu_to_le16(lu_dirent_calc_size(namelen, attr)); + + strncpy(ent->lde_name, name, namelen); + ent->lde_namelen = cpu_to_le16(namelen); + + /* append lustre attributes */ + osd_it_append_attrs(ent, attr, namelen, type); +} + /** * Return pointer to the record under iterator. */ -static struct dt_rec *osd_it_iam_rec(const struct lu_env *env, - const struct dt_it *di) +static int osd_it_iam_rec(const struct lu_env *env, + const struct dt_it *di, + struct lu_dirent *lde, + __u32 attr) { - struct osd_it_iam *it = (struct osd_it_iam *)di; + struct osd_it_iam *it = (struct osd_it_iam *)di; + struct osd_thread_info *info = osd_oti_get(env); + struct lu_fid *fid = &info->oti_fid; + const struct lu_fid_pack *rec; + char *name; + int namelen; + __u64 hash; + int rc; + + name = (char *)iam_it_key_get(&it->oi_it); + if (IS_ERR(name)) + RETURN(PTR_ERR(name)); + + namelen = iam_it_key_size(&it->oi_it); + + rec = (const struct lu_fid_pack *) iam_it_rec_get(&it->oi_it); + if (IS_ERR(rec)) + RETURN(PTR_ERR(rec)); + + rc = fid_unpack(rec, fid); + if (rc) + RETURN(rc); - return (struct dt_rec *)iam_it_rec_get(&it->oi_it); + hash = iam_it_store(&it->oi_it); + + /* IAM does not store object type in IAM index (dir) */ + osd_it_pack_dirent(lde, fid, hash, name, namelen, + 0, LUDA_FID); + + return 0; } /** @@ -3079,11 +3180,10 @@ static struct dt_it *osd_it_ea_init(const struct lu_env *env, it->oie_rd_dirent = 0; it->oie_it_dirent = 0; - it->oie_curr_pos = 0; - it->oie_next_pos = 0; it->oie_dirent = NULL; it->oie_buf = info->oti_it_ea_buf; it->oie_obj = obj; + it->oie_file.f_pos = 0; it->oie_file.f_dentry = obj_dentry; it->oie_file.f_mapping = obj->oo_inode->i_mapping; it->oie_file.f_op = obj->oo_inode->i_fop; @@ -3126,8 +3226,7 @@ static int osd_it_ea_get(const struct lu_env *env, ENTRY; LASSERT(((const char *)key)[0] == '\0'); - it->oie_curr_pos = 0; - it->oie_next_pos = 0; + it->oie_file.f_pos = 0; it->oie_rd_dirent = 0; it->oie_it_dirent = 0; it->oie_dirent = NULL; @@ -3154,8 +3253,8 @@ static void osd_it_ea_put(const struct lu_env *env, struct dt_it *di) * \retval 1, on buffer full */ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen, - loff_t offset, ino_t ino, - unsigned int d_type) + loff_t offset, __u64 ino, + unsigned d_type) { struct osd_it_ea *it = (struct osd_it_ea *)buf; struct osd_it_ea_dirent *ent = it->oie_dirent; @@ -3174,6 +3273,8 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen, ent->oied_ino = ino; ent->oied_off = offset; ent->oied_namelen = namelen; + ent->oied_type = d_type; + memcpy(ent->oied_name, name, namelen); it->oie_rd_dirent++; @@ -3200,14 +3301,12 @@ static int osd_ldiskfs_it_fill(const struct dt_it *di) ENTRY; it->oie_dirent = it->oie_buf; it->oie_rd_dirent = 0; - it->oie_file.f_pos = it->oie_curr_pos; down_read(&obj->oo_ext_idx_sem); result = inode->i_fop->readdir(&it->oie_file, it, (filldir_t) osd_ldiskfs_filldir); up_read(&obj->oo_ext_idx_sem); - it->oie_next_pos = it->oie_file.f_pos; if (it->oie_rd_dirent == 0) { result = -EIO; @@ -3244,9 +3343,7 @@ static int osd_it_ea_next(const struct lu_env *env, struct dt_it *di) it->oie_it_dirent++; RETURN(0); } else { - it->oie_curr_pos = it->oie_next_pos; - - if (it->oie_curr_pos == LDISKFS_HTREE_EOF) + if (it->oie_file.f_pos == LDISKFS_HTREE_EOF) rc = +1; else rc = osd_ldiskfs_it_fill(di); @@ -3284,47 +3381,40 @@ static int osd_it_ea_key_size(const struct lu_env *env, const struct dt_it *di) RETURN(it->oie_dirent->oied_namelen); } + /** * Returns the value (i.e. fid/igif) at current position from iterator's * in memory structure. * * \param di, struct osd_it_ea, iterator's in memory structure + * \param attr, attr requested for dirent. + * \param lde, lustre dirent * - * \retval value i.e. struct dt_rec on success + * \retval 0, no error and \param lde has correct lustre dirent. + * \retval -ve, on error */ -static struct dt_rec *osd_it_ea_rec(const struct lu_env *env, - const struct dt_it *di) +static inline int osd_it_ea_rec(const struct lu_env *env, + const struct dt_it *di, + struct lu_dirent *lde, + __u32 attr) { - struct osd_it_ea *it = (struct osd_it_ea *)di; - struct osd_object *obj = it->oie_obj; - struct osd_thread_info *info = osd_oti_get(env); - struct osd_inode_id *id = &info->oti_id; - struct lu_fid_pack *rec = &info->oti_pack; - struct lu_device *ldev = obj->oo_dt.do_lu.lo_dev; - struct dentry *dentry = &info->oti_child_dentry; - struct osd_device *dev; - struct inode *inode; - int rc; + struct osd_it_ea *it = (struct osd_it_ea *)di; + struct osd_object *obj = it->oie_obj; + struct osd_thread_info *info = osd_oti_get(env); + struct lu_fid *fid = &info->oti_fid; + int rc; ENTRY; - dev = osd_dev(ldev); - id->oii_ino = it->oie_dirent->oied_ino; - id->oii_gen = OSD_OII_NOGEN; - inode = osd_iget(info, dev, id); - if (!IS_ERR(inode)) { - dentry->d_inode = inode; - LASSERT(dentry->d_inode->i_sb == osd_sb(dev)); - } else { - RETURN((struct dt_rec *) PTR_ERR(inode)); - } - - rc = osd_ea_fid_get(env, dentry, (struct dt_rec*) rec); - if (rc != 0) - rec = ERR_PTR(rc); - iput(inode); - RETURN((struct dt_rec *)rec); + rc = osd_ea_fid_get(env, obj, it->oie_dirent->oied_ino, fid); + if (rc == 0) + osd_it_pack_dirent(lde, fid, it->oie_dirent->oied_off, + it->oie_dirent->oied_name, + it->oie_dirent->oied_namelen, + it->oie_dirent->oied_type, + attr); + RETURN(rc); } /** @@ -3359,7 +3449,7 @@ static int osd_it_ea_load(const struct lu_env *env, int rc; ENTRY; - it->oie_curr_pos = hash; + it->oie_file.f_pos = hash; rc = osd_ldiskfs_it_fill(di); if (rc == 0) @@ -3367,27 +3457,6 @@ static int osd_it_ea_load(const struct lu_env *env, RETURN(rc); } -/** - * Index and Iterator operations for interoperability - * mode (i.e. to run 2.0 mds on 1.8 disk) (b11826) - */ -static const struct dt_index_operations osd_index_ea_ops = { - .dio_lookup = osd_index_ea_lookup, - .dio_insert = osd_index_ea_insert, - .dio_delete = osd_index_ea_delete, - .dio_it = { - .init = osd_it_ea_init, - .fini = osd_it_ea_fini, - .get = osd_it_ea_get, - .put = osd_it_ea_put, - .next = osd_it_ea_next, - .key = osd_it_ea_key, - .key_size = osd_it_ea_key_size, - .rec = osd_it_ea_rec, - .store = osd_it_ea_store, - .load = osd_it_ea_load - } -}; /** * Index lookup function for interoperability mode (b11826). @@ -3419,14 +3488,26 @@ static int osd_index_ea_lookup(const struct lu_env *env, struct dt_object *dt, RETURN(rc); } -/* type constructor/destructor: osd_type_init, osd_type_fini */ -LU_TYPE_INIT_FINI(osd, &osd_key); - -static struct lu_context_key osd_key = { - .lct_tags = LCT_DT_THREAD | LCT_MD_THREAD, - .lct_init = osd_key_init, - .lct_fini = osd_key_fini, - .lct_exit = osd_key_exit +/** + * Index and Iterator operations for interoperability + * mode (i.e. to run 2.0 mds on 1.8 disk) (b11826) + */ +static const struct dt_index_operations osd_index_ea_ops = { + .dio_lookup = osd_index_ea_lookup, + .dio_insert = osd_index_ea_insert, + .dio_delete = osd_index_ea_delete, + .dio_it = { + .init = osd_it_ea_init, + .fini = osd_it_ea_fini, + .get = osd_it_ea_get, + .put = osd_it_ea_put, + .next = osd_it_ea_next, + .key = osd_it_ea_key, + .key_size = osd_it_ea_key_size, + .rec = osd_it_ea_rec, + .store = osd_it_ea_store, + .load = osd_it_ea_load + } }; static void *osd_key_init(const struct lu_context *ctx, @@ -3469,6 +3550,17 @@ static void osd_key_exit(const struct lu_context *ctx, LASSERT(info->oti_txns == 0); } +/* type constructor/destructor: osd_type_init, osd_type_fini */ +LU_TYPE_INIT_FINI(osd, &osd_key); + +static struct lu_context_key osd_key = { + .lct_tags = LCT_DT_THREAD | LCT_MD_THREAD, + .lct_init = osd_key_init, + .lct_fini = osd_key_fini, + .lct_exit = osd_key_exit +}; + + static int osd_device_init(const struct lu_env *env, struct lu_device *d, const char *name, struct lu_device *next) { @@ -3683,178 +3775,6 @@ out: RETURN(result); } -static struct inode *osd_iget(struct osd_thread_info *info, - struct osd_device *dev, - const struct osd_inode_id *id) -{ - struct inode *inode; - - inode = iget(osd_sb(dev), id->oii_ino); - if (inode == NULL) { - CERROR("no inode\n"); - inode = ERR_PTR(-EACCES); - } else if (id->oii_gen != OSD_OII_NOGEN && - inode->i_generation != id->oii_gen) { - iput(inode); - inode = ERR_PTR(-ESTALE); - } else if (inode->i_nlink == 0) { - /* due to parallel readdir and unlink, - * we can have dead inode here. */ - CWARN("stale inode\n"); - make_bad_inode(inode); - iput(inode); - inode = ERR_PTR(-ESTALE); - } else if (is_bad_inode(inode)) { - CERROR("bad inode %lx\n",inode->i_ino); - iput(inode); - inode = ERR_PTR(-ENOENT); - } - return inode; - -} - -static int osd_fid_lookup(const struct lu_env *env, - struct osd_object *obj, const struct lu_fid *fid) -{ - struct osd_thread_info *info; - struct lu_device *ldev = obj->oo_dt.do_lu.lo_dev; - struct osd_device *dev; - struct osd_inode_id *id; - struct osd_oi *oi; - struct inode *inode; - int result; - - LINVRNT(osd_invariant(obj)); - LASSERT(obj->oo_inode == NULL); - LASSERT(fid_is_sane(fid)); - /* - * This assertion checks that osd layer sees only local - * fids. Unfortunately it is somewhat expensive (does a - * cache-lookup). Disabling it for production/acceptance-testing. - */ - LASSERT(1 || fid_is_local(env, ldev->ld_site, fid)); - - ENTRY; - - info = osd_oti_get(env); - dev = osd_dev(ldev); - id = &info->oti_id; - oi = &dev->od_oi; - - if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) - RETURN(-ENOENT); - - result = osd_oi_lookup(info, oi, fid, id); - if (result == 0) { - inode = osd_iget(info, dev, id); - if (!IS_ERR(inode)) { - obj->oo_inode = inode; - LASSERT(obj->oo_inode->i_sb == osd_sb(dev)); - if (dev->od_iop_mode) { - obj->oo_compat_dot_created = 1; - obj->oo_compat_dotdot_created = 1; - } - result = 0; - } else - /* - * If fid wasn't found in oi, inode-less object is - * created, for which lu_object_exists() returns - * false. This is used in a (frequent) case when - * objects are created as locking anchors or - * place holders for objects yet to be created. - */ - result = PTR_ERR(inode); - } else if (result == -ENOENT) - result = 0; - LINVRNT(osd_invariant(obj)); - - RETURN(result); -} - -static void osd_inode_getattr(const struct lu_env *env, - struct inode *inode, struct lu_attr *attr) -{ - attr->la_valid |= LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE | - LA_SIZE | LA_BLOCKS | LA_UID | LA_GID | - LA_FLAGS | LA_NLINK | LA_RDEV | LA_BLKSIZE; - - attr->la_atime = LTIME_S(inode->i_atime); - attr->la_mtime = LTIME_S(inode->i_mtime); - attr->la_ctime = LTIME_S(inode->i_ctime); - attr->la_mode = inode->i_mode; - attr->la_size = i_size_read(inode); - attr->la_blocks = inode->i_blocks; - attr->la_uid = inode->i_uid; - attr->la_gid = inode->i_gid; - attr->la_flags = LDISKFS_I(inode)->i_flags; - attr->la_nlink = inode->i_nlink; - attr->la_rdev = inode->i_rdev; - attr->la_blksize = ll_inode_blksize(inode); - attr->la_blkbits = inode->i_blkbits; -} - -/* - * Helpers. - */ - -static int lu_device_is_osd(const struct lu_device *d) -{ - return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &osd_lu_ops); -} - -static struct osd_object *osd_obj(const struct lu_object *o) -{ - LASSERT(lu_device_is_osd(o->lo_dev)); - return container_of0(o, struct osd_object, oo_dt.do_lu); -} - -static struct osd_device *osd_dt_dev(const struct dt_device *d) -{ - LASSERT(lu_device_is_osd(&d->dd_lu_dev)); - return container_of0(d, struct osd_device, od_dt_dev); -} - -static struct osd_device *osd_dev(const struct lu_device *d) -{ - LASSERT(lu_device_is_osd(d)); - return osd_dt_dev(container_of0(d, struct dt_device, dd_lu_dev)); -} - -static struct osd_object *osd_dt_obj(const struct dt_object *d) -{ - return osd_obj(&d->do_lu); -} - -static struct osd_device *osd_obj2dev(const struct osd_object *o) -{ - return osd_dev(o->oo_dt.do_lu.lo_dev); -} - -static struct lu_device *osd2lu_dev(struct osd_device *osd) -{ - return &osd->od_dt_dev.dd_lu_dev; -} - -static struct super_block *osd_sb(const struct osd_device *dev) -{ - return dev->od_mount->lmi_mnt->mnt_sb; -} - -static journal_t *osd_journal(const struct osd_device *dev) -{ - return LDISKFS_SB(osd_sb(dev))->s_journal; -} - -static int osd_has_index(const struct osd_object *obj) -{ - return obj->oo_dt.do_index_ops != NULL; -} - -static int osd_object_invariant(const struct lu_object *l) -{ - return osd_invariant(osd_obj(l)); -} - static const struct lu_object_operations osd_lu_obj_ops = { .loo_object_init = osd_object_init, .loo_object_delete = osd_object_delete,