X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-ldiskfs%2Fosd_handler.c;h=2622a3503b245e1d8bc9ebbe140beb7ed44dcbee;hp=66600be7173fe7d5f98b145120c03df4326377dd;hb=91a4769006f1318bcaddf6ace4344b980f701e05;hpb=b2cb6fd1095f9c483b7bc1ebbbfdaef719aea87c diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 66600be..2622a35 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -72,16 +72,11 @@ /* llo_* api support */ #include -/* dt_acct_features */ -#include +#include -#ifdef HAVE_LDISKFS_PDO int ldiskfs_pdo = 1; CFS_MODULE_PARM(ldiskfs_pdo, "i", int, 0644, "ldiskfs with parallel directory operations"); -#else -int ldiskfs_pdo = 0; -#endif static const char dot[] = "."; static const char dotdot[] = ".."; @@ -151,29 +146,33 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env, l = &mo->oo_dt.do_lu; dt_object_init(&mo->oo_dt, NULL, d); - if (osd_dev(d)->od_iop_mode) - mo->oo_dt.do_ops = &osd_obj_ea_ops; - else - mo->oo_dt.do_ops = &osd_obj_ops; - + mo->oo_dt.do_ops = &osd_obj_ea_ops; l->lo_ops = &osd_lu_obj_ops; - cfs_init_rwsem(&mo->oo_sem); - cfs_init_rwsem(&mo->oo_ext_idx_sem); - cfs_spin_lock_init(&mo->oo_guard); + init_rwsem(&mo->oo_sem); + init_rwsem(&mo->oo_ext_idx_sem); + spin_lock_init(&mo->oo_guard); return l; } else { return NULL; } } -static int osd_get_lma(struct inode *inode, struct dentry *dentry, - struct lustre_mdt_attrs *lma) +static int osd_get_lma(struct osd_thread_info *info, struct inode *inode, + struct dentry *dentry, struct lustre_mdt_attrs *lma) { int rc; dentry->d_inode = inode; rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, (void *)lma, sizeof(*lma)); + if (rc == -ERANGE) { + /* try with old lma size */ + rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, + info->oti_mdt_attrs_old, + LMA_OLD_SIZE); + if (rc > 0) + memcpy(lma, info->oti_mdt_attrs_old, sizeof(*lma)); + } if (rc > 0) { /* Check LMA compatibility */ if (lma->lma_incompat & ~cpu_to_le32(LMA_INCOMPAT_SUPP)) { @@ -251,7 +250,7 @@ struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev, if (IS_ERR(inode)) return inode; - rc = osd_get_lma(inode, &info->oti_obj_dentry, lma); + rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma); if (rc == 0) { *fid = lma->lma_self_fid; } else if (rc == -ENODATA) { @@ -275,7 +274,7 @@ osd_iget_verify(struct osd_thread_info *info, struct osd_device *dev, if (IS_ERR(inode)) return inode; - rc = osd_get_lma(inode, &info->oti_obj_dentry, lma); + rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma); if (rc == -ENODATA) return inode; @@ -288,7 +287,7 @@ osd_iget_verify(struct osd_thread_info *info, struct osd_device *dev, CDEBUG(D_LFSCK, "inconsistent obj: "DFID", %lu, "DFID"\n", PFID(&lma->lma_self_fid), inode->i_ino, PFID(fid)); iput(inode); - return ERR_PTR(EREMCHG); + return ERR_PTR(-EREMCHG); } return inode; @@ -320,15 +319,18 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj, info = osd_oti_get(env); LASSERT(info); oic = &info->oti_cache; - id = &oic->oic_lid; if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) RETURN(-ENOENT); /* Search order: 1. per-thread cache. */ if (lu_fid_eq(fid, &oic->oic_fid)) { + id = &oic->oic_lid; goto iget; - } else if (!cfs_list_empty(&scrub->os_inconsistent_items)) { + } + + id = &info->oti_id; + if (!cfs_list_empty(&scrub->os_inconsistent_items)) { /* Search order: 2. OI scrub pending list. */ result = osd_oii_lookup(dev, fid, id); if (result == 0) @@ -395,10 +397,9 @@ trigger: obj->oo_inode = inode; LASSERT(obj->oo_inode->i_sb == osd_sb(dev)); - if (dev->od_iop_mode) { - obj->oo_compat_dot_created = 1; - obj->oo_compat_dotdot_created = 1; - } + + obj->oo_compat_dot_created = 1; + obj->oo_compat_dotdot_created = 1; if (!S_ISDIR(inode->i_mode) || !ldiskfs_pdo) /* done */ GOTO(out, result = 0); @@ -528,8 +529,7 @@ static void osd_th_started(struct osd_thandle *oth) /** * Helper function to convert time interval to microseconds packed in - * long int (default time units for the counter in "stats" initialized - * by lu_time_init() ) + * long int. */ static long interval_to_usec(cfs_time_t start, cfs_time_t end) { @@ -589,12 +589,12 @@ static void __osd_th_check_slow(void *oth, struct osd_device *dev, /* * Concurrency: doesn't access mutable data. */ -static int osd_param_is_sane(const struct osd_device *dev, - const struct thandle *th) +static int osd_param_is_not_sane(const struct osd_device *dev, + const struct thandle *th) { - struct osd_thandle *oh; - oh = container_of0(th, struct osd_thandle, ot_super); - return oh->ot_credits <= osd_journal(dev)->j_max_transaction_buffers; + struct osd_thandle *oh = container_of(th, typeof(*oh), ot_super); + + return oh->ot_credits > osd_journal(dev)->j_max_transaction_buffers; } /* @@ -690,30 +690,48 @@ int osd_trans_start(const struct lu_env *env, struct dt_device *d, if (rc != 0) GOTO(out, rc); - if (!osd_param_is_sane(dev, th)) { + if (unlikely(osd_param_is_not_sane(dev, th))) { + static unsigned long last_printed; + static int last_credits; + CWARN("%.16s: too many transaction credits (%d > %d)\n", LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, oh->ot_credits, osd_journal(dev)->j_max_transaction_buffers); - /* XXX Limit the credits to 'max_transaction_buffers', and - * let the underlying filesystem to catch the error if - * we really need so many credits. - * - * This should be removed when we can calculate the - * credits precisely. */ - oh->ot_credits = osd_journal(dev)->j_max_transaction_buffers; #ifdef OSD_TRACK_DECLARES - CERROR(" attr_set: %d, punch: %d, xattr_set: %d,\n", - oh->ot_declare_attr_set, oh->ot_declare_punch, - oh->ot_declare_xattr_set); - CERROR(" create: %d, ref_add: %d, ref_del: %d, write: %d\n", - oh->ot_declare_create, oh->ot_declare_ref_add, - oh->ot_declare_ref_del, oh->ot_declare_write); - CERROR(" insert: %d, delete: %d, destroy: %d\n", - oh->ot_declare_insert, oh->ot_declare_delete, - oh->ot_declare_destroy); + CWARN(" create: %u/%u, delete: %u/%u, destroy: %u/%u\n", + oh->ot_declare_create, oh->ot_declare_create_cred, + oh->ot_declare_delete, oh->ot_declare_delete_cred, + oh->ot_declare_destroy, oh->ot_declare_destroy_cred); + CWARN(" attr_set: %u/%u, xattr_set: %u/%u\n", + oh->ot_declare_attr_set, oh->ot_declare_attr_set_cred, + oh->ot_declare_xattr_set, oh->ot_declare_xattr_set_cred); + CWARN(" write: %u/%u, punch: %u/%u, quota %u/%u\n", + oh->ot_declare_write, oh->ot_declare_write_cred, + oh->ot_declare_punch, oh->ot_declare_punch_cred, + oh->ot_declare_quota, oh->ot_declare_quota_cred); + CWARN(" insert: %u/%u, delete: %u/%u\n", + oh->ot_declare_insert, oh->ot_declare_insert_cred, + oh->ot_declare_delete, oh->ot_declare_destroy_cred); + CWARN(" ref_add: %u/%u, ref_del: %u/%u\n", + oh->ot_declare_ref_add, oh->ot_declare_ref_add_cred, + oh->ot_declare_ref_del, oh->ot_declare_ref_del_cred); + + if (last_credits != oh->ot_credits && + time_after(jiffies, last_printed + 60 * HZ)) { + libcfs_debug_dumpstack(NULL); + last_credits = oh->ot_credits; + last_printed = jiffies; + } #endif - } + /* XXX Limit the credits to 'max_transaction_buffers', and + * let the underlying filesystem to catch the error if + * we really need so many credits. + * + * This should be removed when we can calculate the + * credits precisely. */ + oh->ot_credits = osd_journal(dev)->j_max_transaction_buffers; + } /* * XXX temporary stuff. Some abstraction layer should @@ -850,10 +868,10 @@ static void osd_object_delete(const struct lu_env *env, struct lu_object *l) /* Release granted quota to master if necessary */ qi->lqi_id.qid_uid = uid; - qsd_adjust_quota(env, qsd, &qi->lqi_id, USRQUOTA); + qsd_op_adjust(env, qsd, &qi->lqi_id, USRQUOTA); qi->lqi_id.qid_uid = gid; - qsd_adjust_quota(env, qsd, &qi->lqi_id, GRPQUOTA); + qsd_op_adjust(env, qsd, &qi->lqi_id, GRPQUOTA); } } } @@ -910,7 +928,7 @@ int osd_statfs(const struct lu_env *env, struct dt_device *d, ksfs = &osd_oti_get(env)->oti_ksfs; } - cfs_spin_lock(&osd->od_osfs_lock); + spin_lock(&osd->od_osfs_lock); /* cache 1 second */ if (cfs_time_before_64(osd->od_osfs_age, cfs_time_shift_64(-1))) { result = sb->s_op->statfs(sb->s_root, ksfs); @@ -922,9 +940,9 @@ int osd_statfs(const struct lu_env *env, struct dt_device *d, } } - if (likely(result == 0)) - *sfs = osd->od_statfs; - cfs_spin_unlock(&osd->od_osfs_lock); + if (likely(result == 0)) + *sfs = osd->od_statfs; + spin_unlock(&osd->od_osfs_lock); if (unlikely(env == NULL)) OBD_FREE_PTR(ksfs); @@ -1140,7 +1158,7 @@ static void osd_object_read_lock(const struct lu_env *env, LINVRNT(osd_invariant(obj)); LASSERT(obj->oo_owner != env); - cfs_down_read_nested(&obj->oo_sem, role); + down_read_nested(&obj->oo_sem, role); LASSERT(obj->oo_owner == NULL); oti->oti_r_locks++; @@ -1155,7 +1173,7 @@ static void osd_object_write_lock(const struct lu_env *env, LINVRNT(osd_invariant(obj)); LASSERT(obj->oo_owner != env); - cfs_down_write_nested(&obj->oo_sem, role); + down_write_nested(&obj->oo_sem, role); LASSERT(obj->oo_owner == NULL); obj->oo_owner = env; @@ -1172,7 +1190,7 @@ static void osd_object_read_unlock(const struct lu_env *env, LASSERT(oti->oti_r_locks > 0); oti->oti_r_locks--; - cfs_up_read(&obj->oo_sem); + up_read(&obj->oo_sem); } static void osd_object_write_unlock(const struct lu_env *env, @@ -1187,7 +1205,7 @@ static void osd_object_write_unlock(const struct lu_env *env, LASSERT(oti->oti_w_locks > 0); oti->oti_w_locks--; obj->oo_owner = NULL; - cfs_up_write(&obj->oo_sem); + up_write(&obj->oo_sem); } static int osd_object_write_locked(const struct lu_env *env, @@ -1226,14 +1244,14 @@ static int capa_is_sane(const struct lu_env *env, RETURN(-ESTALE); } - cfs_spin_lock(&capa_lock); - for (i = 0; i < 2; i++) { - if (keys[i].lk_keyid == capa->lc_keyid) { - oti->oti_capa_key = keys[i]; - break; - } - } - cfs_spin_unlock(&capa_lock); + spin_lock(&capa_lock); + for (i = 0; i < 2; i++) { + if (keys[i].lk_keyid == capa->lc_keyid) { + oti->oti_capa_key = keys[i]; + break; + } + } + spin_unlock(&capa_lock); if (i == 2) { DEBUG_CAPA(D_ERROR, capa, "no matched capa key"); @@ -1348,10 +1366,10 @@ static int osd_attr_get(const struct lu_env *env, if (osd_object_auth(env, dt, capa, CAPA_OPC_META_READ)) return -EACCES; - cfs_spin_lock(&obj->oo_guard); - osd_inode_getattr(env, obj->oo_inode, attr); - cfs_spin_unlock(&obj->oo_guard); - return 0; + spin_lock(&obj->oo_guard); + osd_inode_getattr(env, obj->oo_inode, attr); + spin_unlock(&obj->oo_guard); + return 0; } static int osd_declare_attr_set(const struct lu_env *env, @@ -1377,8 +1395,8 @@ static int osd_declare_attr_set(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, attr_set); - oh->ot_credits += osd_dto_credits_noquota[DTO_ATTR_SET_BASE]; + OSD_DECLARE_OP(oh, attr_set, + osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); if (attr == NULL || obj->oo_inode == NULL) RETURN(rc); @@ -1528,7 +1546,7 @@ static int osd_inode_setattr(const struct lu_env *env, if (bits & LA_GID) inode->i_gid = attr->la_gid; if (bits & LA_NLINK) - inode->i_nlink = attr->la_nlink; + set_nlink(inode, attr->la_nlink); if (bits & LA_RDEV) inode->i_rdev = attr->la_rdev; @@ -1586,14 +1604,15 @@ static int osd_attr_set(const struct lu_env *env, OSD_EXEC_OP(handle, attr_set); inode = obj->oo_inode; + ll_vfs_dq_init(inode); rc = osd_quota_transfer(inode, attr); if (rc) return rc; - cfs_spin_lock(&obj->oo_guard); - rc = osd_inode_setattr(env, inode, attr); - cfs_spin_unlock(&obj->oo_guard); + spin_lock(&obj->oo_guard); + rc = osd_inode_setattr(env, inode, attr); + spin_unlock(&obj->oo_guard); if (!rc) inode->i_sb->s_op->dirty_inode(inode); @@ -1669,7 +1688,6 @@ static int osd_mkdir(struct osd_thread_info *info, struct osd_object *obj, { int result; struct osd_thandle *oth; - struct osd_device *osd = osd_obj2dev(obj); __u32 mode = (attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX)); LASSERT(S_ISDIR(attr->la_mode)); @@ -1677,16 +1695,7 @@ static int osd_mkdir(struct osd_thread_info *info, struct osd_object *obj, oth = container_of(th, struct osd_thandle, ot_super); LASSERT(oth->ot_handle->h_transaction != NULL); result = osd_mkfile(info, obj, mode, hint, th); - if (result == 0 && osd->od_iop_mode == 0) { - LASSERT(obj->oo_inode != NULL); - /* - * XXX uh-oh... call low-level iam function directly. - */ - result = iam_lvar_create(obj->oo_inode, OSD_NAME_LEN, 4, - sizeof (struct osd_fid_pack), - oth->ot_handle); - } return result; } @@ -1770,7 +1779,8 @@ static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj, * This inode should be marked dirty for i_rdev. Currently * that is done in the osd_attr_init(). */ - init_special_inode(obj->oo_inode, mode, attr->la_rdev); + init_special_inode(obj->oo_inode, obj->oo_inode->i_mode, + attr->la_rdev); } LINVRNT(osd_invariant(obj)); return result; @@ -1872,11 +1882,6 @@ static int __osd_object_create(struct osd_thread_info *info, struct thandle *th) { int result; - __u32 umask; - - /* we drop umask so that permissions we pass are not affected */ - umask = current->fs->umask; - current->fs->umask = 0; result = osd_create_type_f(dof->dof_type)(info, obj, attr, hint, dof, th); @@ -1888,9 +1893,6 @@ static int __osd_object_create(struct osd_thread_info *info, unlock_new_inode(obj->oo_inode); } - /* restore previous umask value */ - current->fs->umask = umask; - return result; } @@ -1913,42 +1915,40 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj, } static int osd_declare_object_create(const struct lu_env *env, - struct dt_object *dt, - struct lu_attr *attr, - struct dt_allocation_hint *hint, - struct dt_object_format *dof, - struct thandle *handle) + struct dt_object *dt, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct dt_object_format *dof, + struct thandle *handle) { struct osd_thandle *oh; int rc; ENTRY; - LASSERT(handle != NULL); + LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle == NULL); + oh = container_of0(handle, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, create); - oh->ot_credits += osd_dto_credits_noquota[DTO_OBJECT_CREATE]; - /* XXX: So far, only normal fid needs be inserted into the oi, - * things could be changed later. Revise following code then. */ - if (fid_is_norm(lu_object_fid(&dt->do_lu))) { - OSD_DECLARE_OP(oh, insert); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + OSD_DECLARE_OP(oh, create, osd_dto_credits_noquota[DTO_OBJECT_CREATE]); + /* XXX: So far, only normal fid needs be inserted into the oi, + * things could be changed later. Revise following code then. */ + if (fid_is_norm(lu_object_fid(&dt->do_lu))) { /* Reuse idle OI block may cause additional one OI block * to be changed. */ - oh->ot_credits += 1; - } - /* If this is directory, then we expect . and .. to be inserted as - * well. The one directory block always needs to be created for the - * directory, so we could use DTO_WRITE_BASE here (GDT, block bitmap, - * block), there is no danger of needing a tree for the first block. - */ - if (attr && S_ISDIR(attr->la_mode)) { - OSD_DECLARE_OP(oh, insert); - OSD_DECLARE_OP(oh, insert); - oh->ot_credits += osd_dto_credits_noquota[DTO_WRITE_BASE]; - } + OSD_DECLARE_OP(oh, insert, + osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1); + } + /* If this is directory, then we expect . and .. to be inserted as + * well. The one directory block always needs to be created for the + * directory, so we could use DTO_WRITE_BASE here (GDT, block bitmap, + * block), there is no danger of needing a tree for the first block. + */ + if (attr && S_ISDIR(attr->la_mode)) { + OSD_DECLARE_OP(oh, insert, + osd_dto_credits_noquota[DTO_WRITE_BASE]); + OSD_DECLARE_OP(oh, insert, 0); + } if (!attr) RETURN(0); @@ -1998,40 +1998,36 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt, * Concurrency: must be locked */ static int osd_declare_object_destroy(const struct lu_env *env, - struct dt_object *dt, - struct thandle *th) + struct dt_object *dt, + struct thandle *th) { - struct osd_object *obj = osd_dt_obj(dt); - struct inode *inode = obj->oo_inode; - struct osd_thandle *oh; - int rc; - ENTRY; + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + struct osd_thandle *oh; + int rc; + ENTRY; - oh = container_of0(th, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle == NULL); - LASSERT(inode); + oh = container_of0(th, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle == NULL); + LASSERT(inode); - OSD_DECLARE_OP(oh, destroy); - OSD_DECLARE_OP(oh, delete); - oh->ot_credits += osd_dto_credits_noquota[DTO_OBJECT_DELETE]; - /* XXX: So far, only normal fid needs to be inserted into the OI, - * so only normal fid needs to be removed from the OI also. */ - if (fid_is_norm(lu_object_fid(&dt->do_lu))) { - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_DELETE]; - /* Recycle idle OI leaf may cause additional three OI blocks - * to be changed. */ - oh->ot_credits += 3; - } + OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_OBJECT_DELETE]); + /* XXX: So far, only normal fid needs to be inserted into the OI, + * so only normal fid needs to be removed from the OI also. + * Recycle idle OI leaf may cause additional three OI blocks + * to be changed. */ + OSD_DECLARE_OP(oh, destroy, fid_is_norm(lu_object_fid(&dt->do_lu)) ? + osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3 : 0); /* one less inode */ - rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, -1, oh, + rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, -1, oh, false, true, NULL, false); if (rc) RETURN(rc); /* data to be truncated */ - rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, 0, oh, true, - true, NULL, false); - RETURN(rc); + rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, 0, oh, + true, true, NULL, false); + RETURN(rc); } static int osd_object_destroy(const struct lu_env *env, @@ -2056,22 +2052,22 @@ static int osd_object_destroy(const struct lu_env *env, /* Parallel control for OI scrub. For most of cases, there is no * lock contention. So it will not affect unlink performance. */ - cfs_mutex_lock(&inode->i_mutex); - if (S_ISDIR(inode->i_mode)) { - LASSERT(osd_inode_unlinked(inode) || - inode->i_nlink == 1); - cfs_spin_lock(&obj->oo_guard); - inode->i_nlink = 0; - cfs_spin_unlock(&obj->oo_guard); - inode->i_sb->s_op->dirty_inode(inode); - } else { - LASSERT(osd_inode_unlinked(inode)); - } + mutex_lock(&inode->i_mutex); + if (S_ISDIR(inode->i_mode)) { + LASSERT(osd_inode_unlinked(inode) || + inode->i_nlink == 1); + spin_lock(&obj->oo_guard); + clear_nlink(inode); + spin_unlock(&obj->oo_guard); + inode->i_sb->s_op->dirty_inode(inode); + } else { + LASSERT(osd_inode_unlinked(inode)); + } OSD_EXEC_OP(th, destroy); result = osd_oi_delete(osd_oti_get(env), osd, fid, th); - cfs_mutex_unlock(&inode->i_mutex); + mutex_unlock(&inode->i_mutex); /* XXX: add to ext3 orphan list */ /* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */ @@ -2104,6 +2100,7 @@ static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt, if (fl & LU_XATTR_CREATE) fs_flags |= XATTR_CREATE; + ll_vfs_dq_init(inode); dentry->d_inode = inode; rc = inode->i_op->setxattr(dentry, name, buf->lb_buf, buf->lb_len, fs_flags); @@ -2235,10 +2232,9 @@ static int osd_declare_object_ref_add(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, ref_add); - oh->ot_credits += osd_dto_credits_noquota[DTO_ATTR_SET_BASE]; + OSD_DECLARE_OP(oh, ref_add, osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); - return 0; + return 0; } /* @@ -2257,29 +2253,33 @@ static int osd_object_ref_add(const struct lu_env *env, OSD_EXEC_OP(th, ref_add); - /* - * DIR_NLINK feature is set for compatibility reasons if: - * 1) nlinks > LDISKFS_LINK_MAX, or - * 2) nlinks == 2, since this indicates i_nlink was previously 1. - * - * It is easier to always set this flag (rather than check and set), - * since it has less overhead, and the superblock will be dirtied - * at some point. Both e2fsprogs and any Lustre-supported ldiskfs - * do not actually care whether this flag is set or not. - */ - cfs_spin_lock(&obj->oo_guard); - inode->i_nlink++; - if (S_ISDIR(inode->i_mode) && inode->i_nlink > 1) { - if (inode->i_nlink >= LDISKFS_LINK_MAX || - inode->i_nlink == 2) - inode->i_nlink = 1; - } - LASSERT(inode->i_nlink <= LDISKFS_LINK_MAX); - cfs_spin_unlock(&obj->oo_guard); - inode->i_sb->s_op->dirty_inode(inode); - LINVRNT(osd_invariant(obj)); + /* + * DIR_NLINK feature is set for compatibility reasons if: + * 1) nlinks > LDISKFS_LINK_MAX, or + * 2) nlinks == 2, since this indicates i_nlink was previously 1. + * + * It is easier to always set this flag (rather than check and set), + * since it has less overhead, and the superblock will be dirtied + * at some point. Both e2fsprogs and any Lustre-supported ldiskfs + * do not actually care whether this flag is set or not. + */ + spin_lock(&obj->oo_guard); + /* inc_nlink from 0 may cause WARN_ON */ + if(inode->i_nlink == 0) + set_nlink(inode, 1); + else + inc_nlink(inode); + if (S_ISDIR(inode->i_mode) && inode->i_nlink > 1) { + if (inode->i_nlink >= LDISKFS_LINK_MAX || + inode->i_nlink == 2) + set_nlink(inode, 1); + } + LASSERT(inode->i_nlink <= LDISKFS_LINK_MAX); + spin_unlock(&obj->oo_guard); + inode->i_sb->s_op->dirty_inode(inode); + LINVRNT(osd_invariant(obj)); - return 0; + return 0; } static int osd_declare_object_ref_del(const struct lu_env *env, @@ -2294,10 +2294,9 @@ static int osd_declare_object_ref_del(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, ref_del); - oh->ot_credits += osd_dto_credits_noquota[DTO_ATTR_SET_BASE]; + OSD_DECLARE_OP(oh, ref_del, osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); - return 0; + return 0; } /* @@ -2316,19 +2315,19 @@ static int osd_object_ref_del(const struct lu_env *env, struct dt_object *dt, OSD_EXEC_OP(th, ref_del); - cfs_spin_lock(&obj->oo_guard); - LASSERT(inode->i_nlink > 0); - inode->i_nlink--; - /* If this is/was a many-subdir directory (nlink > LDISKFS_LINK_MAX) - * then the nlink count is 1. Don't let it be set to 0 or the directory - * inode will be deleted incorrectly. */ - if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0) - inode->i_nlink++; - cfs_spin_unlock(&obj->oo_guard); - inode->i_sb->s_op->dirty_inode(inode); - LINVRNT(osd_invariant(obj)); + spin_lock(&obj->oo_guard); + LASSERT(inode->i_nlink > 0); + drop_nlink(inode); + /* If this is/was a many-subdir directory (nlink > LDISKFS_LINK_MAX) + * then the nlink count is 1. Don't let it be set to 0 or the directory + * inode will be deleted incorrectly. */ + if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0) + set_nlink(inode, 1); + spin_unlock(&obj->oo_guard); + inode->i_sb->s_op->dirty_inode(inode); + LINVRNT(osd_invariant(obj)); - return 0; + return 0; } /* @@ -2389,11 +2388,9 @@ static int osd_declare_xattr_set(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, xattr_set); - if (strcmp(name, XATTR_NAME_VERSION) == 0) - oh->ot_credits += osd_dto_credits_noquota[DTO_ATTR_SET_BASE]; - else - oh->ot_credits += osd_dto_credits_noquota[DTO_XATTR_SET]; + OSD_DECLARE_OP(oh, xattr_set, strcmp(name, XATTR_NAME_VERSION) == 0 ? + osd_dto_credits_noquota[DTO_ATTR_SET_BASE] : + osd_dto_credits_noquota[DTO_XATTR_SET]); return 0; } @@ -2475,10 +2472,9 @@ static int osd_declare_xattr_del(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, xattr_set); - oh->ot_credits += osd_dto_credits_noquota[DTO_XATTR_SET]; + OSD_DECLARE_OP(oh, xattr_set, osd_dto_credits_noquota[DTO_XATTR_SET]); - return 0; + return 0; } /* @@ -2504,6 +2500,7 @@ static int osd_xattr_del(const struct lu_env *env, struct dt_object *dt, OSD_EXEC_OP(handle, xattr_set); + ll_vfs_dq_init(inode); dentry->d_inode = inode; rc = inode->i_op->removexattr(dentry, name); return rc; @@ -2579,9 +2576,9 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env, RETURN(oc); } - cfs_spin_lock(&capa_lock); - *key = dev->od_capa_keys[1]; - cfs_spin_unlock(&capa_lock); + spin_lock(&capa_lock); + *key = dev->od_capa_keys[1]; + spin_unlock(&capa_lock); capa->lc_keyid = key->lk_keyid; capa->lc_expiry = cfs_time_current_sec() + dev->od_capa_timeout; @@ -2691,7 +2688,6 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt, int result; int skip_iam = 0; struct osd_object *obj = osd_dt_obj(dt); - struct osd_device *osd = osd_obj2dev(obj); LINVRNT(osd_invariant(obj)); LASSERT(dt_object_exists(dt)); @@ -2699,7 +2695,7 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt, if (osd_object_is_root(obj)) { dt->do_index_ops = &osd_index_ea_ops; result = 0; - } else if (feat == &dt_directory_features && osd->od_iop_mode) { + } else if (feat == &dt_directory_features) { dt->do_index_ops = &osd_index_ea_ops; if (S_ISDIR(obj->oo_inode->i_mode)) result = 0; @@ -2719,28 +2715,28 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt, OBD_ALLOC_PTR(dir); if (dir != NULL) { - cfs_spin_lock(&obj->oo_guard); - if (obj->oo_dir == NULL) - obj->oo_dir = dir; - else - /* - * Concurrent thread allocated container data. - */ - OBD_FREE_PTR(dir); - cfs_spin_unlock(&obj->oo_guard); - /* - * Now, that we have container data, serialize its - * initialization. - */ - cfs_down_write(&obj->oo_ext_idx_sem); - /* - * recheck under lock. - */ - if (!osd_has_index(obj)) - result = osd_iam_container_init(env, obj, dir); - else - result = 0; - cfs_up_write(&obj->oo_ext_idx_sem); + spin_lock(&obj->oo_guard); + if (obj->oo_dir == NULL) + obj->oo_dir = dir; + else + /* + * Concurrent thread allocated container data. + */ + OBD_FREE_PTR(dir); + spin_unlock(&obj->oo_guard); + /* + * Now, that we have container data, serialize its + * initialization. + */ + down_write(&obj->oo_ext_idx_sem); + /* + * recheck under lock. + */ + if (!osd_has_index(obj)) + result = osd_iam_container_init(env, obj, dir); + else + result = 0; + up_write(&obj->oo_ext_idx_sem); } else { result = -ENOMEM; } @@ -2848,10 +2844,9 @@ static int osd_index_declare_iam_delete(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, delete); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_DELETE]; + OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_INDEX_DELETE]); - return 0; + return 0; } /** @@ -2911,23 +2906,22 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt, } static int osd_index_declare_ea_delete(const struct lu_env *env, - struct dt_object *dt, - const struct dt_key *key, - struct thandle *handle) + struct dt_object *dt, + const struct dt_key *key, + struct thandle *handle) { - struct osd_thandle *oh; + struct osd_thandle *oh; struct inode *inode; int rc; ENTRY; - LASSERT(dt_object_exists(dt)); - LASSERT(handle != NULL); + LASSERT(dt_object_exists(dt)); + LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle == NULL); + oh = container_of0(handle, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, delete); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_DELETE]; + OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_INDEX_DELETE]); inode = osd_dt_obj(dt)->oo_inode; LASSERT(inode); @@ -2989,6 +2983,7 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt, if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_DELETE)) RETURN(-EACCES); + ll_vfs_dq_init(dir); dentry = osd_child_dentry_get(env, obj, (char *)key, strlen((char *)key)); @@ -2997,7 +2992,7 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt, ldiskfs_htree_lock(hlock, obj->oo_hl_head, dir, LDISKFS_HLOCK_DEL); } else { - cfs_down_write(&obj->oo_ext_idx_sem); + down_write(&obj->oo_ext_idx_sem); } bh = osd_ldiskfs_find_entry(dir, dentry, &de, hlock); @@ -3011,7 +3006,7 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt, if (hlock != NULL) ldiskfs_htree_unlock(hlock); else - cfs_up_write(&obj->oo_ext_idx_sem); + up_write(&obj->oo_ext_idx_sem); LASSERT(osd_invariant(obj)); RETURN(rc); @@ -3101,10 +3096,9 @@ static int osd_index_declare_iam_insert(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, insert); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + OSD_DECLARE_OP(oh, insert, osd_dto_credits_noquota[DTO_INDEX_INSERT]); - return 0; + return 0; } /** @@ -3208,6 +3202,8 @@ static int __osd_ea_add_rec(struct osd_thread_info *info, } else { child->d_fsdata = NULL; } + LASSERT(pobj->oo_inode); + ll_vfs_dq_init(pobj->oo_inode); rc = osd_ldiskfs_add_entry(oth->ot_handle, child, cinode, hlock); RETURN(rc); @@ -3299,7 +3295,7 @@ static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj, ldiskfs_htree_lock(hlock, pobj->oo_hl_head, pobj->oo_inode, 0); } else { - cfs_down_write(&pobj->oo_ext_idx_sem); + down_write(&pobj->oo_ext_idx_sem); } rc = osd_add_dot_dotdot(info, pobj, cinode, name, (struct dt_rec *)lu_object_fid(&pobj->oo_dt.do_lu), @@ -3309,7 +3305,7 @@ static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj, ldiskfs_htree_lock(hlock, pobj->oo_hl_head, pobj->oo_inode, LDISKFS_HLOCK_ADD); } else { - cfs_down_write(&pobj->oo_ext_idx_sem); + down_write(&pobj->oo_ext_idx_sem); } rc = __osd_ea_add_rec(info, pobj, cinode, name, fid, @@ -3318,12 +3314,12 @@ static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj, if (hlock != NULL) ldiskfs_htree_unlock(hlock); else - cfs_up_write(&pobj->oo_ext_idx_sem); + up_write(&pobj->oo_ext_idx_sem); return rc; } -static int +static void osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev, struct osd_idmap_cache *oic) { @@ -3335,15 +3331,15 @@ osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev, ENTRY; if (!fid_is_norm(fid) && !fid_is_igif(fid)) - RETURN(0); + RETURN_EXIT; again: rc = osd_oi_lookup(oti, dev, fid, id); if (rc != 0 && rc != -ENOENT) - RETURN(rc); + RETURN_EXIT; if (rc == 0 && osd_id_eq(id, &oic->oic_lid)) - RETURN(0); + RETURN_EXIT; if (thread_is_running(&scrub->os_thread)) { rc = osd_oii_insert(dev, oic, rc == -ENOENT); @@ -3354,7 +3350,7 @@ again: if (unlikely(rc == -EAGAIN)) goto again; - RETURN(rc); + RETURN_EXIT; } if (!dev->od_noscrub && ++once == 1) { @@ -3369,7 +3365,7 @@ again: goto again; } - RETURN(0); + EXIT; } /** @@ -3402,7 +3398,7 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, ldiskfs_htree_lock(hlock, obj->oo_hl_head, dir, LDISKFS_HLOCK_LOOKUP); } else { - cfs_down_read(&obj->oo_ext_idx_sem); + down_read(&obj->oo_ext_idx_sem); } bh = osd_ldiskfs_find_entry(dir, dentry, &de, hlock); @@ -3443,7 +3439,7 @@ out: if (hlock != NULL) ldiskfs_htree_unlock(hlock); else - cfs_up_read(&obj->oo_ext_idx_sem); + up_read(&obj->oo_ext_idx_sem); return rc; } @@ -3515,24 +3511,23 @@ static inline void osd_object_put(const struct lu_env *env, } static int osd_index_declare_ea_insert(const struct lu_env *env, - struct dt_object *dt, - const struct dt_rec *rec, - const struct dt_key *key, - struct thandle *handle) + struct dt_object *dt, + const struct dt_rec *rec, + const struct dt_key *key, + struct thandle *handle) { - struct osd_thandle *oh; + struct osd_thandle *oh; struct inode *inode; int rc; ENTRY; - LASSERT(dt_object_exists(dt)); - LASSERT(handle != NULL); + LASSERT(dt_object_exists(dt)); + LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle == NULL); + oh = container_of0(handle, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, insert); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + OSD_DECLARE_OP(oh, insert, osd_dto_credits_noquota[DTO_INDEX_INSERT]); inode = osd_dt_obj(dt)->oo_inode; LASSERT(inode); @@ -3867,6 +3862,7 @@ static const struct dt_index_operations osd_index_iam_ops = { } }; + /** * Creates or initializes iterator context. * @@ -4038,7 +4034,7 @@ static int osd_ldiskfs_it_fill(const struct lu_env *env, ldiskfs_htree_lock(hlock, obj->oo_hl_head, inode, LDISKFS_HLOCK_READDIR); } else { - cfs_down_read(&obj->oo_ext_idx_sem); + down_read(&obj->oo_ext_idx_sem); } result = inode->i_fop->readdir(&it->oie_file, it, @@ -4047,7 +4043,7 @@ static int osd_ldiskfs_it_fill(const struct lu_env *env, if (hlock != NULL) ldiskfs_htree_unlock(hlock); else - cfs_up_read(&obj->oo_ext_idx_sem); + up_read(&obj->oo_ext_idx_sem); if (it->oie_rd_dirent == 0) { result = -EIO; @@ -4154,8 +4150,10 @@ static inline int osd_it_ea_rec(const struct lu_env *env, if (!fid_is_sane(fid)) { rc = osd_ea_fid_get(env, obj, ino, fid, &oic->oic_lid); - if (rc != 0) + if (rc != 0) { + fid_zero(&oic->oic_fid); RETURN(rc); + } } else { osd_id_gen(&oic->oic_lid, ino, OSD_OII_NOGEN); } @@ -4373,6 +4371,10 @@ static int osd_mount(const struct lu_env *env, if (o->od_mnt != NULL) RETURN(0); + if (strlen(dev) >= sizeof(o->od_mntdev)) + RETURN(-E2BIG); + strcpy(o->od_mntdev, dev); + o->od_fsops = fsfilt_get_ops(mt_str(LDD_MT_LDISKFS)); if (o->od_fsops == NULL) { CERROR("Can't find fsfilt_ldiskfs\n"); @@ -4434,11 +4436,6 @@ static int osd_mount(const struct lu_env *env, GOTO(out, rc = -EINVAL); } - if (lmd_flags & LMD_FLG_IAM) { - o->od_iop_mode = 0; - LCONSOLE_WARN("%s: OSD: IAM mode enabled\n", name); - } else - o->od_iop_mode = 1; if (lmd_flags & LMD_FLG_NOSCRUB) o->od_noscrub = 1; @@ -4459,7 +4456,7 @@ static struct lu_device *osd_device_fini(const struct lu_env *env, rc = osd_shutdown(env, osd_dev(d)); - osd_compat_fini(osd_dev(d)); + osd_obj_map_fini(osd_dev(d)); shrink_dcache_sb(osd_sb(osd_dev(d))); osd_sync(env, lu2dt_dev(d)); @@ -4496,8 +4493,8 @@ static int osd_device_init0(const struct lu_env *env, l->ld_ops = &osd_lu_ops; o->od_dt_dev.dd_ops = &osd_dt_ops; - cfs_spin_lock_init(&o->od_osfs_lock); - cfs_mutex_init(&o->od_otable_mutex); + spin_lock_init(&o->od_osfs_lock); + mutex_init(&o->od_otable_mutex); o->od_osfs_age = cfs_time_shift_64(-1000); o->od_capa_hash = init_capa_hash(); @@ -4520,7 +4517,7 @@ static int osd_device_init0(const struct lu_env *env, strncpy(o->od_svname, lustre_cfg_string(cfg, 4), sizeof(o->od_svname) - 1); - rc = osd_compat_init(o); + rc = osd_obj_map_init(o); if (rc != 0) GOTO(out_scrub, rc); @@ -4557,7 +4554,7 @@ out_procfs: out_site: lu_site_fini(&o->od_site); out_compat: - osd_compat_fini(o); + osd_obj_map_fini(o); out_scrub: osd_scrub_cleanup(env, o); out_mnt: @@ -4584,6 +4581,9 @@ static struct lu_device *osd_device_alloc(const struct lu_env *env, rc = dt_device_init(&o->od_dt_dev, t); if (rc == 0) { + /* Because the ctx might be revived in dt_device_init, + * refill the env here */ + lu_env_refill((struct lu_env *)env); rc = osd_device_init0(env, o, cfg); if (rc) dt_device_fini(&o->od_dt_dev); @@ -4673,9 +4673,9 @@ static int osd_obd_connect(const struct lu_env *env, struct obd_export **exp, *exp = class_conn2export(&conn); - cfs_spin_lock(&osd->od_osfs_lock); + spin_lock(&osd->od_osfs_lock); osd->od_connects++; - cfs_spin_unlock(&osd->od_osfs_lock); + spin_unlock(&osd->od_osfs_lock); RETURN(0); } @@ -4692,11 +4692,11 @@ static int osd_obd_disconnect(struct obd_export *exp) ENTRY; /* Only disconnect the underlying layers on the final disconnect. */ - cfs_spin_lock(&osd->od_osfs_lock); + spin_lock(&osd->od_osfs_lock); osd->od_connects--; if (osd->od_connects == 0) release = 1; - cfs_spin_unlock(&osd->od_osfs_lock); + spin_unlock(&osd->od_osfs_lock); rc = class_disconnect(exp); /* bz 9811 */