From 4980567857699c7f902ebda336ea98fdc4b83100 Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Thu, 15 Mar 2012 23:14:26 +0400 Subject: [PATCH] LU-911 osd: support for legacy OST objects in ldiskfs osd ldiskfs osd maintains O/ directory: recognize IDIFs and use O/ to lookup/create such objects by ldiskfs/vfs means. Signed-off-by: Mikhail Pershin Change-Id: I76207e5e0540c51923ceaa32a37e4b7a998b624e Reviewed-on: http://review.whamcloud.com/1836 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/lustre/lustre_idl.h | 19 +- lustre/include/lustre_fid.h | 3 - lustre/osd-ldiskfs/Makefile.in | 2 +- lustre/osd-ldiskfs/osd_compat.c | 582 +++++++++++++++++++++++++++++++++++++ lustre/osd-ldiskfs/osd_handler.c | 96 +++--- lustre/osd-ldiskfs/osd_igif.c | 2 +- lustre/osd-ldiskfs/osd_internal.h | 62 ++-- lustre/osd-ldiskfs/osd_io.c | 3 +- lustre/osd-ldiskfs/osd_oi.c | 37 ++- lustre/ost/ost_handler.c | 4 +- 10 files changed, 700 insertions(+), 110 deletions(-) create mode 100644 lustre/osd-ldiskfs/osd_compat.c diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 01c5f41..29af57d 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -464,7 +464,7 @@ static inline int fid_seq_is_mdt(const __u64 seq) static inline int fid_seq_is_rsvd(const __u64 seq) { - return seq <= FID_SEQ_RSVD; + return (seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD); }; static inline int fid_is_mdt0(const struct lu_fid *fid) @@ -608,21 +608,24 @@ static inline int fid_ostid_unpack(struct lu_fid *fid, struct ost_id *ostid, } /* pack an IDIF FID into an ostid (id/seq) for the wire/disk */ -static inline void ostid_idif_pack(struct lu_fid *fid, struct ost_id *ostid) +static inline void ostid_idif_pack(const struct lu_fid *fid, + struct ost_id *ostid) { ostid->oi_seq = FID_SEQ_OST_MDT0; ostid->oi_id = fid_idif_id(fid->f_seq, fid->f_oid, fid->f_ver); } /* pack a non-IDIF FID into an ostid (id/seq) for the wire/disk */ -static inline void ostid_fid_pack(struct lu_fid *fid, struct ost_id *ostid) +static inline void ostid_fid_pack(const struct lu_fid *fid, + struct ost_id *ostid) { ostid->oi_seq = fid_seq(fid); ostid->oi_id = fid_ver_oid(fid); } /* pack any OST FID into an ostid (id/seq) for the wire/disk */ -static inline int fid_ostid_pack(struct lu_fid *fid, struct ost_id *ostid) +static inline int fid_ostid_pack(const struct lu_fid *fid, + struct ost_id *ostid) { if (unlikely(fid_seq_is_igif(fid->f_seq))) { CERROR("bad IGIF, "DFID"\n", PFID(fid)); @@ -758,7 +761,7 @@ static inline int fid_is_sane(const struct lu_fid *fid) fid != NULL && ((fid_seq(fid) >= FID_SEQ_START && fid_oid(fid) != 0 && fid_ver(fid) == 0) || - fid_is_igif(fid)); + fid_is_igif(fid) || fid_seq_is_rsvd(fid_seq(fid))); } static inline int fid_is_zero(const struct lu_fid *fid) @@ -775,8 +778,10 @@ static inline int lu_fid_eq(const struct lu_fid *f0, /* Check that there is no alignment padding. */ CLASSERT(sizeof *f0 == sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver); - LASSERTF(fid_is_igif(f0) || fid_ver(f0) == 0, DFID, PFID(f0)); - LASSERTF(fid_is_igif(f1) || fid_ver(f1) == 0, DFID, PFID(f1)); + LASSERTF((fid_is_igif(f0) || fid_is_idif(f0)) || + fid_ver(f0) == 0, DFID, PFID(f0)); + LASSERTF((fid_is_igif(f1) || fid_is_idif(f1)) || + fid_ver(f1) == 0, DFID, PFID(f1)); return memcmp(f0, f1, sizeof *f0) == 0; } diff --git a/lustre/include/lustre_fid.h b/lustre/include/lustre_fid.h index f36d257..48b8c90 100644 --- a/lustre/include/lustre_fid.h +++ b/lustre/include/lustre_fid.h @@ -109,11 +109,8 @@ enum local_oid { MDD_ORPHAN_OID = 7UL, MDD_LOV_OBJ_OID = 8UL, MDD_CAPA_KEYS_OID = 9UL, - MDD_OBJECTS_OID = 10UL, /** \see mdt_mod_init */ MDT_LAST_RECV_OID = 11UL, - /** \see osd_mod_init */ - OSD_REM_OBJ_DIR_OID = 12UL, OSD_FS_ROOT_OID = 13UL, ACCT_USER_OID = 15UL, ACCT_GROUP_OID = 16UL, diff --git a/lustre/osd-ldiskfs/Makefile.in b/lustre/osd-ldiskfs/Makefile.in index 63a2098..9a9e0f6 100644 --- a/lustre/osd-ldiskfs/Makefile.in +++ b/lustre/osd-ldiskfs/Makefile.in @@ -1,6 +1,6 @@ MODULES := osd_ldiskfs osd_ldiskfs-objs := osd_handler.o osd_oi.o osd_igif.o osd_lproc.o osd_iam.o \ - osd_iam_lfix.o osd_iam_lvar.o osd_io.o + osd_iam_lfix.o osd_iam_lvar.o osd_io.o osd_compat.o EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs diff --git a/lustre/osd-ldiskfs/osd_compat.c b/lustre/osd-ldiskfs/osd_compat.c new file mode 100644 index 0000000..21b7ccf --- /dev/null +++ b/lustre/osd-ldiskfs/osd_compat.c @@ -0,0 +1,582 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Whamcloud, Inc. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/osd/osd_compat.c + * + * on-disk compatibility stuff for OST + * + * Author: Alex Zhuravlev + */ + +/* LUSTRE_VERSION_CODE */ +#include +/* prerequisite for linux/xattr.h */ +#include +/* prerequisite for linux/xattr.h */ +#include + +/* + * struct OBD_{ALLOC,FREE}*() + * OBD_FAIL_CHECK + */ +#include +#include + +#include "osd_internal.h" + +struct osd_compat_objid_seq { + /* protects on-fly initialization */ + cfs_semaphore_t dir_init_sem; + /* file storing last created objid */ + struct osd_inode_id last_id; + struct dentry *groot; /* O/ */ + struct dentry **dirs; /* O//d0-dXX */ +}; + +#define MAX_OBJID_GROUP (FID_SEQ_ECHO + 1) + +struct osd_compat_objid { + int subdir_count; + struct dentry *root; + struct osd_inode_id last_rcvd_id; + struct osd_inode_id last_seq_id; + struct osd_compat_objid_seq groups[MAX_OBJID_GROUP]; +}; + +static void osd_push_ctxt(const struct osd_device *dev, + struct lvfs_run_ctxt *newctxt, + struct lvfs_run_ctxt *save) +{ + OBD_SET_CTXT_MAGIC(newctxt); + newctxt->pwdmnt = dev->od_mnt; + newctxt->pwd = dev->od_mnt->mnt_root; + newctxt->fs = get_ds(); + + push_ctxt(save, newctxt, NULL); +} + +void osd_compat_seq_fini(struct osd_device *osd, int seq) +{ + struct osd_compat_objid_seq *grp; + struct osd_compat_objid *map = osd->od_ost_map; + int i; + + ENTRY; + + grp = &map->groups[seq]; + if (grp->groot ==NULL) + RETURN_EXIT; + LASSERT(grp->dirs); + + for (i = 0; i < map->subdir_count; i++) { + if (grp->dirs[i] == NULL) + break; + dput(grp->dirs[i]); + } + + OBD_FREE(grp->dirs, sizeof(struct dentry *) * map->subdir_count); + dput(grp->groot); + EXIT; +} + +int osd_compat_seq_init(struct osd_device *osd, int seq) +{ + struct osd_compat_objid_seq *grp; + struct osd_compat_objid *map; + struct dentry *d; + int rc = 0; + char name[32]; + int i; + ENTRY; + + map = osd->od_ost_map; + LASSERT(map); + LASSERT(map->root); + grp = &map->groups[seq]; + + if (grp->groot != NULL) + RETURN(0); + + cfs_down(&grp->dir_init_sem); + + sprintf(name, "%d", seq); + d = simple_mkdir(map->root, osd->od_mnt, name, 0755, 1); + if (IS_ERR(d)) { + rc = PTR_ERR(d); + GOTO(out, rc); + } else if (d->d_inode == NULL) { + rc = -EFAULT; + dput(d); + GOTO(out, rc); + } + + LASSERT(grp->dirs == NULL); + OBD_ALLOC(grp->dirs, sizeof(d) * map->subdir_count); + if (grp->dirs == NULL) { + dput(d); + GOTO(out, rc = -ENOMEM); + } + + grp->groot = d; + for (i = 0; i < map->subdir_count; i++) { + sprintf(name, "d%d", i); + d = simple_mkdir(grp->groot, osd->od_mnt, name, 0755, 1); + if (IS_ERR(d)) { + rc = PTR_ERR(d); + break; + } else if (d->d_inode == NULL) { + rc = -EFAULT; + dput(d); + break; + } + + grp->dirs[i] = d; + } + + if (rc) + osd_compat_seq_fini(osd, seq); +out: + cfs_up(&grp->dir_init_sem); + RETURN(rc); +} + +int osd_last_rcvd_subdir_count(struct osd_device *osd) +{ + struct lr_server_data lsd; + struct dentry *dlast; + loff_t off; + int rc = 0; + int count = 0; + + ENTRY; + + dlast = ll_lookup_one_len(LAST_RCVD, osd_sb(osd)->s_root, + strlen(LAST_RCVD)); + if (IS_ERR(dlast)) + return PTR_ERR(dlast); + else if (dlast->d_inode == NULL) + goto out; + + off = 0; + rc = osd_ldiskfs_read(dlast->d_inode, &lsd, sizeof(lsd), &off); + if (rc == sizeof(lsd)) { + CDEBUG(D_INFO, "read last_rcvd header, uuid = %s, " + "subdir count = %d\n", lsd.lsd_uuid, + lsd.lsd_subdir_count); + count = le16_to_cpu(lsd.lsd_subdir_count); + } else if (rc != 0) { + CERROR("Can't read last_rcvd file, rc = %d\n", rc); + if (rc > 0) + rc = -EFAULT; + goto out; + } else { + count = FILTER_SUBDIR_COUNT; + } + + rc = count; +out: + dput(dlast); + return rc; +} + +void osd_compat_fini(struct osd_device *dev) +{ + int i; + + ENTRY; + + if (dev->od_ost_map == NULL) + RETURN_EXIT; + + for (i = 0; i < MAX_OBJID_GROUP; i++) + osd_compat_seq_fini(dev, i); + + dput(dev->od_ost_map->root); + OBD_FREE_PTR(dev->od_ost_map); + dev->od_ost_map = NULL; + + EXIT; +} + +/* + * directory structure on legacy OST: + * + * O//d0-31/ + * O//LAST_ID + * last_rcvd + * LAST_GROUP + * CONFIGS + * + */ +int osd_compat_init(struct osd_device *dev) +{ + struct lvfs_run_ctxt new; + struct lvfs_run_ctxt save; + struct dentry *rootd = osd_sb(dev)->s_root; + struct dentry *d; + int rc; + int i; + + ENTRY; + + /* to get subdir count from last_rcvd */ + rc = osd_last_rcvd_subdir_count(dev); + if (rc <= 0) + RETURN(rc); + + dev->od_ost_map->subdir_count = rc; + rc = 0; + + OBD_ALLOC_PTR(dev->od_ost_map); + if (dev->od_ost_map == NULL) + RETURN(-ENOMEM); + + LASSERT(dev->od_fsops); + osd_push_ctxt(dev, &new, &save); + + d = simple_mkdir(rootd, dev->od_mnt, "O", 0755, 1); + pop_ctxt(&save, &new, NULL); + if (IS_ERR(d)) { + OBD_FREE_PTR(dev->od_ost_map); + RETURN(PTR_ERR(d)); + } + + dev->od_ost_map->root = d; + + /* Initialize all groups */ + for (i = 0; i < MAX_OBJID_GROUP; i++) { + cfs_sema_init(&dev->od_ost_map->groups[i].dir_init_sem, 1); + rc = osd_compat_seq_init(dev, i); + if (rc) { + osd_compat_fini(dev); + break; + } + } + + RETURN(rc); +} + +int osd_compat_del_entry(struct osd_thread_info *info, struct osd_device *osd, + struct dentry *dird, char *name, struct thandle *th) +{ + struct ldiskfs_dir_entry_2 *de; + struct buffer_head *bh; + struct osd_thandle *oh; + struct dentry *child; + struct inode *dir = dird->d_inode; + int rc; + + ENTRY; + + oh = container_of(th, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle != NULL); + LASSERT(oh->ot_handle->h_transaction != NULL); + + + child = &info->oti_child_dentry; + child->d_name.hash = 0; + child->d_name.name = name; + child->d_name.len = strlen(name); + child->d_parent = dird; + child->d_inode = NULL; + + LOCK_INODE_MUTEX(dir); + rc = -ENOENT; + bh = osd_ldiskfs_find_entry(dir, child, &de, NULL); + if (bh) { + rc = ldiskfs_delete_entry(oh->ot_handle, dir, de, bh); + brelse(bh); + } + UNLOCK_INODE_MUTEX(dir); + + RETURN(rc); +} + +int osd_compat_add_entry(struct osd_thread_info *info, struct osd_device *osd, + struct dentry *dir, char *name, + const struct osd_inode_id *id, struct thandle *th) +{ + struct osd_thandle *oh; + struct dentry *child; + struct inode *inode; + int rc; + + ENTRY; + + oh = container_of(th, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle != NULL); + LASSERT(oh->ot_handle->h_transaction != NULL); + + inode = &info->oti_inode; + inode->i_sb = osd_sb(osd); + inode->i_ino = id->oii_ino; + inode->i_generation = id->oii_gen; + + child = &info->oti_child_dentry; + child->d_name.hash = 0; + child->d_name.name = name; + child->d_name.len = strlen(name); + child->d_parent = dir; + child->d_inode = inode; + + LOCK_INODE_MUTEX(dir->d_inode); + rc = osd_ldiskfs_add_entry(oh->ot_handle, child, inode, NULL); + UNLOCK_INODE_MUTEX(dir->d_inode); + + RETURN(rc); +} + +int osd_compat_objid_lookup(struct osd_thread_info *info, + struct osd_device *dev, const struct lu_fid *fid, + struct osd_inode_id *id) +{ + struct osd_compat_objid *map; + struct dentry *d; + struct dentry *d_seq; + struct ost_id *ostid = &info->oti_ostid; + int rc = 0; + int dirn; + char name[32]; + struct ldiskfs_dir_entry_2 *de; + struct buffer_head *bh; + struct inode *dir; + ENTRY; + + /* on the very first lookup we find and open directories */ + + map = dev->od_ost_map; + LASSERT(map); + LASSERT(map->root); + + fid_ostid_pack(fid, ostid); + LASSERT(ostid->oi_seq < MAX_OBJID_GROUP); + LASSERT(map->subdir_count > 0); + LASSERT(map->groups[ostid->oi_seq].groot); + + dirn = ostid->oi_id & (map->subdir_count - 1); + d = map->groups[ostid->oi_seq].dirs[dirn]; + LASSERT(d); + + sprintf(name, "%llu", ostid->oi_id); + d_seq = &info->oti_child_dentry; + d_seq->d_parent = d; + d_seq->d_name.hash = 0; + d_seq->d_name.name = name; + /* XXX: we can use rc from sprintf() instead of strlen() */ + d_seq->d_name.len = strlen(name); + + dir = d->d_inode; + LOCK_INODE_MUTEX(dir); + bh = osd_ldiskfs_find_entry(dir, d_seq, &de, NULL); + UNLOCK_INODE_MUTEX(dir); + + rc = -ENOENT; + if (bh) { + struct inode *inode; + + id->oii_ino = le32_to_cpu(de->inode); + brelse(bh); + + id->oii_gen = OSD_OII_NOGEN; + inode = osd_iget(info, dev, id); + + if (IS_ERR(inode)) + GOTO(cleanup, rc = PTR_ERR(inode)); + rc = 0; + id->oii_gen = inode->i_generation; + iput(inode); + } + +cleanup: + RETURN(rc); +} + +int osd_compat_objid_insert(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, + const struct osd_inode_id *id, + struct thandle *th) +{ + struct osd_compat_objid *map; + struct dentry *d; + struct ost_id *ostid = &info->oti_ostid; + int dirn, rc = 0; + char name[32]; + ENTRY; + + map = osd->od_ost_map; + LASSERT(map); + LASSERT(map->root); + LASSERT(map->subdir_count > 0); + LASSERT(map->groups[ostid->oi_seq].groot); + + /* map fid to group:objid */ + fid_ostid_pack(fid, ostid); + dirn = ostid->oi_id & (map->subdir_count - 1); + d = map->groups[ostid->oi_seq].dirs[dirn]; + LASSERT(d); + + sprintf(name, "%llu", ostid->oi_id); + rc = osd_compat_add_entry(info, osd, d, name, id, th); + + RETURN(rc); +} + +int osd_compat_objid_delete(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, struct thandle *th) +{ + struct osd_compat_objid *map; + struct dentry *d; + struct ost_id *ostid = &info->oti_ostid; + int dirn, rc = 0; + char name[32]; + ENTRY; + + map = osd->od_ost_map; + LASSERT(map); + LASSERT(map->root); + LASSERT(map->subdir_count > 0); + LASSERT(map->groups[ostid->oi_seq].groot); + + /* map fid to group:objid */ + fid_ostid_pack(fid, ostid); + dirn = ostid->oi_id & (map->subdir_count - 1); + d = map->groups[ostid->oi_seq].dirs[dirn]; + LASSERT(d); + + sprintf(name, "%llu", ostid->oi_id); + rc = osd_compat_del_entry(info, osd, d, name, th); + + RETURN(rc); +} + +struct named_oid { + unsigned long oid; + char *name; +}; + +static const struct named_oid oids[] = { + { FLD_INDEX_OID, "" /* "fld" */ }, + { FID_SEQ_CTL_OID, "" /* "seq_ctl" */ }, + { FID_SEQ_SRV_OID, "" /* "seq_srv" */ }, + { MDD_ROOT_INDEX_OID, "" /* "ROOT" */ }, + { MDD_ORPHAN_OID, "" /* "PENDING" */ }, + { MDD_LOV_OBJ_OID, "" /* LOV_OBJID */ }, + { MDD_CAPA_KEYS_OID, "" /* CAPA_KEYS */ }, + { MDT_LAST_RECV_OID, "" /* LAST_RCVD */ }, + { OFD_LAST_RECV_OID, "" /* LAST_RCVD */ }, + { OFD_LAST_GROUP_OID, "" /* "LAST_GROUP" */ }, + { LLOG_CATALOGS_OID, "" /* "CATALOGS" */ }, + { MGS_CONFIGS_OID, "" /* MOUNT_CONFIGS_DIR */ }, + { OFD_HEALTH_CHECK_OID, "" /* HEALTH_CHECK */ }, + { 0, NULL } +}; + +static char *oid2name(const unsigned long oid) +{ + int i = 0; + + while (oids[i].oid) { + if (oids[i].oid == oid) + return oids[i].name; + i++; + } + return NULL; +} + +int osd_compat_spec_insert(struct osd_thread_info *info, + struct osd_device *osd, const struct lu_fid *fid, + const struct osd_inode_id *id, struct thandle *th) +{ + struct osd_compat_objid *map = osd->od_ost_map; + struct dentry *root = osd_sb(osd)->s_root; + char *name; + int rc = 0; + int seq; + ENTRY; + + if (fid_oid(fid) >= OFD_GROUP0_LAST_OID && + fid_oid(fid) < OFD_GROUP4K_LAST_OID) { + /* on creation of LAST_ID we create O/ hierarchy */ + LASSERT(map); + seq = fid_oid(fid) - OFD_GROUP0_LAST_OID; + LASSERT(seq < MAX_OBJID_GROUP); + LASSERT(map->groups[seq].groot); + } else { + name = oid2name(fid_oid(fid)); + if (name == NULL) + CWARN("UNKNOWN COMPAT FID "DFID"\n", PFID(fid)); + else if (name[0]) + rc = osd_compat_add_entry(info, osd, root, name, id, + th); + } + + RETURN(rc); +} + +int osd_compat_spec_lookup(struct osd_thread_info *info, + struct osd_device *osd, const struct lu_fid *fid, + struct osd_inode_id *id) +{ + struct dentry *dentry; + char *name; + int rc = -ERESTART; + + ENTRY; + + name = oid2name(fid_oid(fid)); + if (name == NULL || strlen(name) == 0) + return -ERESTART; + + dentry = ll_lookup_one_len(name, osd_sb(osd)->s_root, strlen(name)); + if (!IS_ERR(dentry)) { + if (dentry->d_inode) { + if (is_bad_inode(dentry->d_inode)) { + rc = -EIO; + } else { + id->oii_ino = dentry->d_inode->i_ino; + id->oii_gen = dentry->d_inode->i_generation; + rc = 0; + } + } + dput(dentry); + } + + RETURN(rc); +} + diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index e1dfcec..fa77bc9 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -164,10 +164,7 @@ static int osd_write_locked(const struct lu_env *env, struct osd_object *o) static int osd_root_get(const struct lu_env *env, struct dt_device *dev, struct lu_fid *f) { - struct inode *inode; - - inode = osd_sb(osd_dt_dev(dev))->s_root->d_inode; - LU_IGIF_BUILD(f, inode->i_ino, inode->i_generation); + lu_local_obj_fid(f, OSD_FS_ROOT_OID); return 0; } @@ -302,7 +299,7 @@ static int osd_fid_lookup(const struct lu_env *env, LINVRNT(osd_invariant(obj)); LASSERT(obj->oo_inode == NULL); - LASSERTF(fid_is_sane(fid) || osd_fid_is_root(fid), DFID, PFID(fid)); + LASSERTF(fid_is_sane(fid) || fid_is_idif(fid), DFID, PFID(fid)); /* * This assertion checks that osd layer sees only local * fids. Unfortunately it is somewhat expensive (does a @@ -751,7 +748,8 @@ static int osd_trans_stop(const struct lu_env *env, struct thandle *th) * IMPORTANT: we have to wait till any IO submited by the thread is * completed otherwise iobuf may be corrupted by different request */ - cfs_wait_event(iobuf->dr_wait, cfs_atomic_read(&iobuf->dr_numreqs)==0); + cfs_wait_event(iobuf->dr_wait, + cfs_atomic_read(&iobuf->dr_numreqs) == 0); if (!rc) rc = iobuf->dr_error; @@ -1432,7 +1430,7 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj, int result; struct osd_device *osd = osd_obj2dev(obj); struct osd_thandle *oth; - struct dt_object *parent; + struct dt_object *parent = NULL; struct inode *inode; #ifdef HAVE_QUOTA_SUPPORT struct osd_ctxt *save = &info->oti_ctxt; @@ -1453,15 +1451,13 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj, if (hint && hint->dah_parent) parent = hint->dah_parent; - else - parent = osd->od_obj_area; #ifdef HAVE_QUOTA_SUPPORT osd_push_ctxt(info->oti_env, save); #endif inode = ldiskfs_create_inode(oth->ot_handle, - parent ? osd_dt_obj(parent)->oo_inode : - osd_sb(osd)->s_root->d_inode, + parent ? osd_dt_obj(parent)->oo_inode : + osd_sb(osd)->s_root->d_inode, mode); #ifdef HAVE_QUOTA_SUPPORT osd_pop_ctxt(save); @@ -1999,7 +1995,6 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt, OSD_EXEC_OP(th, create); result = __osd_object_create(info, obj, attr, hint, dof, th); - /* objects under osd root shld have igif fid, so dont add fid EA */ if (result == 0 && fid_seq(fid) >= FID_SEQ_NORMAL) result = osd_ea_fid_set(env, dt, fid); @@ -2957,13 +2952,19 @@ static int __osd_ea_add_rec(struct osd_thread_info *info, child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name)); + /* XXX: remove fid_is_igif() check here. + * IGIF check is just to handle insertion of .. when it is 'ROOT', + * it is IGIF now but needs FID in dir entry as well for readdir + * to work. + * LU-838 should fix that and remove fid_is_igif() check */ if (fid_is_igif((struct lu_fid *)fid) || fid_is_norm((struct lu_fid *)fid)) { ldp = (struct ldiskfs_dentry_param *)info->oti_ldp; osd_get_ldiskfs_dirent_param(ldp, fid); - child->d_fsdata = (void*) ldp; - } else + child->d_fsdata = (void *)ldp; + } else { child->d_fsdata = NULL; + } rc = osd_ldiskfs_add_entry(oth->ot_handle, child, cinode, hlock); RETURN(rc); @@ -2988,10 +2989,10 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info, const struct dt_rec *dot_dot_fid, struct thandle *th) { - struct inode *inode = dir->oo_inode; + struct inode *inode = dir->oo_inode; struct ldiskfs_dentry_param *dot_ldp; struct ldiskfs_dentry_param *dot_dot_ldp; - struct osd_thandle *oth; + struct osd_thandle *oth; int result = 0; oth = container_of(th, struct osd_thandle, ot_super); @@ -3012,7 +3013,7 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info, if (!dir->oo_compat_dot_created) return -EINVAL; - if (fid_seq((struct lu_fid *)dot_fid) >= FID_SEQ_NORMAL) { + if (!fid_is_igif((struct lu_fid *)dot_fid)) { osd_get_ldiskfs_dirent_param(dot_ldp, dot_fid); osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid); } else { @@ -3975,11 +3976,9 @@ static int osd_device_init(const struct lu_env *env, struct lu_device *d, static int osd_shutdown(const struct lu_env *env, struct osd_device *o) { struct osd_thread_info *info = osd_oti_get(env); + ENTRY; - if (o->od_obj_area != NULL) { - lu_object_put(env, &o->od_obj_area->do_lu); - o->od_obj_area = NULL; - } + if (o->od_oi_table != NULL) osd_oi_fini(info, o); @@ -3998,6 +3997,7 @@ static int osd_mount(const struct lu_env *env, const char *dev = lustre_cfg_string(cfg, 0); struct lustre_disk_data *ldd; struct lustre_sb_info *lsi; + int rc = 0; ENTRY; @@ -4022,18 +4022,24 @@ static int osd_mount(const struct lu_env *env, LASSERT(lmi != NULL); /* save lustre_mount_info in dt_device */ o->od_mount = lmi; + o->od_mnt = lmi->lmi_mnt; lsi = s2lsi(lmi->lmi_sb); ldd = lsi->lsi_ldd; if (ldd->ldd_flags & LDD_F_IAM_DIR) { o->od_iop_mode = 0; - LCONSOLE_WARN("OSD: IAM mode enabled\n"); + LCONSOLE_WARN("%s: OSD: IAM mode enabled\n", dev); } else o->od_iop_mode = 1; - o->od_obj_area = NULL; - RETURN(0); + if (ldd->ldd_flags & LDD_F_SV_TYPE_OST) { + rc = osd_compat_init(o); + if (rc) + CERROR("%s: can't initialize compats: %d\n", dev, rc); + } + + RETURN(rc); } static struct lu_device *osd_device_fini(const struct lu_env *env, @@ -4042,6 +4048,8 @@ static struct lu_device *osd_device_fini(const struct lu_env *env, int rc; ENTRY; + osd_compat_fini(osd_dev(d)); + shrink_dcache_sb(osd_sb(osd_dev(d))); osd_sync(env, lu2dt_dev(d)); @@ -4131,19 +4139,15 @@ static int osd_recovery_complete(const struct lu_env *env, RETURN(0); } -static int osd_prepare(const struct lu_env *env, - struct lu_device *pdev, +static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, struct lu_device *dev) { - struct osd_device *osd = osd_dev(dev); - struct lustre_sb_info *lsi; - struct lustre_disk_data *ldd; - struct lustre_mount_info *lmi; + struct osd_device *osd = osd_dev(dev); struct osd_thread_info *oti = osd_oti_get(env); - struct dt_object *d; - int result; + int result; ENTRY; + /* 1. initialize oi before any file create or file open */ result = osd_oi_init(oti, osd); if (result < 0) @@ -4152,27 +4156,8 @@ static int osd_prepare(const struct lu_env *env, if (!lu_device_is_md(pdev)) RETURN(0); - lmi = osd->od_mount; - lsi = s2lsi(lmi->lmi_sb); - ldd = lsi->lsi_ldd; - /* 2. setup local objects */ result = llo_local_objects_setup(env, lu2md_dev(pdev), lu2dt_dev(dev)); - if (result) - goto out; - - /* 3. open remote object dir */ - d = dt_store_open(env, lu2dt_dev(dev), "", - remote_obj_dir, &oti->oti_fid); - if (!IS_ERR(d)) { - osd->od_obj_area = d; - result = 0; - } else { - result = PTR_ERR(d); - osd->od_obj_area = NULL; - } - -out: RETURN(result); } @@ -4220,19 +4205,11 @@ static struct obd_ops osd_obd_device_ops = { .o_owner = THIS_MODULE }; -static struct lu_local_obj_desc llod_osd_rem_obj_dir = { - .llod_name = remote_obj_dir, - .llod_oid = OSD_REM_OBJ_DIR_OID, - .llod_is_index = 1, - .llod_feat = &dt_directory_features, -}; - static int __init osd_mod_init(void) { struct lprocfs_static_vars lvars; osd_oi_mod_init(); - llo_local_obj_register(&llod_osd_rem_obj_dir); lprocfs_osd_init_vars(&lvars); return class_register_type(&osd_obd_device_ops, NULL, lvars.module_vars, LUSTRE_OSD_NAME, &osd_device_type); @@ -4240,7 +4217,6 @@ static int __init osd_mod_init(void) static void __exit osd_mod_exit(void) { - llo_local_obj_unregister(&llod_osd_rem_obj_dir); class_unregister_type(LUSTRE_OSD_NAME); } diff --git a/lustre/osd-ldiskfs/osd_igif.c b/lustre/osd-ldiskfs/osd_igif.c index 9bd8052..6a27c80 100644 --- a/lustre/osd-ldiskfs/osd_igif.c +++ b/lustre/osd-ldiskfs/osd_igif.c @@ -60,7 +60,7 @@ void lu_igif_to_id(const struct lu_fid *fid, struct osd_inode_id *id) { - LASSERT(osd_fid_is_igif(fid)); + LASSERT(fid_is_igif(fid)); id->oii_ino = lu_igif_ino(fid); id->oii_gen = lu_igif_gen(fid); } diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index b94c0f2..6af354c 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -107,7 +107,7 @@ struct osd_oi { * underlying index object, where fid->id mapping in stored. */ struct inode *oi_inode; - struct osd_directory oi_dir; + struct osd_directory oi_dir; }; extern const int osd_dto_credits_noquota[]; @@ -204,11 +204,7 @@ struct osd_device { struct dt_device od_dt_dev; /* information about underlying file system */ struct lustre_mount_info *od_mount; - /* - * XXX temporary stuff for object index: directory where every object - * is named by its fid. - */ - struct dt_object *od_obj_area; + struct vfsmount *od_mnt; /* object index */ struct osd_oi **od_oi_table; /* total number of OI containers */ @@ -239,6 +235,11 @@ struct osd_device { struct fsfilt_operations *od_fsops; + /* + * mapping for legacy OST objids + */ + struct osd_compat_objid *od_ost_map; + unsigned long long od_readcache_max_filesize; int od_read_cache; int od_writethrough_cache; @@ -441,6 +442,8 @@ struct osd_thread_info { struct lu_fid oti_fid; struct osd_inode_id oti_id; + struct ost_id oti_ostid; + /* * XXX temporary: for ->i_op calls. */ @@ -533,8 +536,26 @@ void osd_declare_qid(struct dt_object *dt, struct osd_thandle *oh, struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, const struct osd_inode_id *id); -int generic_error_remove_page(struct address_space *mapping, - struct page *page); + +int osd_compat_init(struct osd_device *dev); +void osd_compat_fini(struct osd_device *dev); +int osd_compat_objid_lookup(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id); +int osd_compat_objid_insert(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, + const struct osd_inode_id *id, struct thandle *th); +int osd_compat_objid_delete(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, struct thandle *th); +int osd_compat_spec_lookup(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id); +int osd_compat_spec_insert(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, + const struct osd_inode_id *id, struct thandle *th); /* * Invariants, assertions. @@ -562,28 +583,11 @@ static inline int osd_invariant(const struct osd_object *obj) #define osd_invariant(obj) (1) #endif -/* The on-disk extN format reserves inodes 0-11 for internal filesystem - * use, and these inodes will be invisible on client side, so the valid - * sequence for IGIF fid is 12-0xffffffff. But root inode (2#) will be seen - * on server side (osd), and it should be valid too here. - */ -#define OSD_ROOT_SEQ 2 -static inline int osd_fid_is_root(const struct lu_fid *fid) -{ - return fid_seq(fid) == OSD_ROOT_SEQ; -} - -static inline int osd_fid_is_igif(const struct lu_fid *fid) -{ - return fid_is_igif(fid) || osd_fid_is_root(fid); -} - static inline struct osd_oi *osd_fid2oi(struct osd_device *osd, const struct lu_fid *fid) { - if (!fid_is_norm(fid)) - return NULL; - + LASSERT(!fid_is_idif(fid)); + LASSERT(!fid_is_igif(fid)); LASSERT(osd->od_oi_table != NULL && osd->od_oi_count >= 1); /* It can work even od_oi_count equals to 1 although it's unexpected, * the only reason we set it to 1 is for performance measurement */ @@ -681,12 +685,14 @@ static inline void osd_ipd_put(const struct lu_env *env, bag->ic_descr->id_ops->id_ipd_free(ipd); } +int osd_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs); + static inline struct dentry *osd_child_dentry_by_inode(const struct lu_env *env, struct inode *inode, const char *name, const int namelen) { - struct osd_thread_info *info = osd_oti_get(env); + struct osd_thread_info *info = osd_oti_get(env); struct dentry *child_dentry = &info->oti_child_dentry; struct dentry *obj_dentry = &info->oti_obj_dentry; diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 08f26ab..d2b10e6 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -851,8 +851,7 @@ static int osd_ldiskfs_readlink(struct inode *inode, char *buffer, int buflen) return buflen; } -static int osd_ldiskfs_read(struct inode *inode, void *buf, int size, - loff_t *offs) +int osd_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs) { struct buffer_head *bh; unsigned long block; diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c index c1ce0a6..0918c75 100644 --- a/lustre/osd-ldiskfs/osd_oi.c +++ b/lustre/osd-ldiskfs/osd_oi.c @@ -444,17 +444,28 @@ int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, { struct lu_fid *oi_fid = &info->oti_fid; const struct dt_key *key; - int rc = 0; + int rc = 0; - if (osd_fid_is_igif(fid)) { + if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG) { + /* old OSD obj id */ + rc = osd_compat_objid_lookup(info, osd, fid, id); + } else if (fid_is_igif(fid)) { lu_igif_to_id(fid, id); rc = 0; + } else if (fid_is_fs_root(fid)) { + struct inode *inode = osd_sb(osd)->s_root->d_inode; + + id->oii_ino = inode->i_ino; + id->oii_gen = inode->i_generation; } else { - if (!fid_is_norm(fid)) - return -ENOENT; + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) { + rc = osd_compat_spec_lookup(info, osd, fid, id); + if (rc == 0 || rc != -ERESTART) + goto out; + } fid_cpu_to_be(oi_fid, fid); - key = (struct dt_key *) oi_fid; + key = (struct dt_key *)oi_fid; rc = osd_oi_iam_lookup(info, osd_fid2oi(osd, fid), (struct dt_rec *)id, key); @@ -467,6 +478,8 @@ int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, rc = -ENOENT; } } + +out: return rc; } @@ -524,9 +537,16 @@ int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd, struct osd_inode_id *id; const struct dt_key *key; - if (!fid_is_norm(fid)) + if (fid_is_igif(fid)) return 0; + if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG) + return osd_compat_objid_insert(info, osd, fid, id0, th); + + /* notice we don't return immediately, but continue to get into OI */ + if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) + osd_compat_spec_insert(info, osd, fid, id0, th); + fid_cpu_to_be(oi_fid, fid); key = (struct dt_key *)oi_fid; @@ -574,6 +594,11 @@ int osd_oi_delete(struct osd_thread_info *info, if (!fid_is_norm(fid)) return 0; + LASSERT(fid_seq(fid) != FID_SEQ_LOCAL_FILE); + + if (fid_is_idif(fid) || fid_seq(fid) == FID_SEQ_LLOG) + return osd_compat_objid_delete(info, osd, fid, th); + fid_cpu_to_be(oi_fid, fid); key = (struct dt_key *)oi_fid; diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 16d7201..25b8bf8 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -98,8 +98,8 @@ static int ost_validate_obdo(struct obd_export *exp, struct obdo *oa, if (ioobj) ioobj->ioo_seq = FID_SEQ_OST_MDT0; /* remove fid_seq_is_rsvd() after FID-on-OST allows SEQ > 9 */ - } else if (oa == NULL || - !(fid_seq_is_rsvd(oa->o_seq) || fid_seq_is_idif(oa->o_seq))) { + } else if (oa == NULL || !(fid_seq_is_rsvd(oa->o_seq) || + fid_seq_is_mdt0(oa->o_seq))) { CERROR("%s: client %s sent invalid object "POSTID"\n", exp->exp_obd->obd_name, obd_export_nid2str(exp), oa ? oa->o_id : -1, oa ? oa->o_seq : -1); -- 1.8.3.1