X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdd%2Fmdd_dir.c;h=02626460923b7786d2061e9a4a2f10a87cc9fa4c;hb=b69b7de30c3977cb69a741099218bc4a81752717;hp=1bc386a9783d5b81b97182054fa7524f99d6ceb9;hpb=b2fa3d79a26e6a161e6470386a90e9061482b930;p=fs%2Flustre-release.git diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 1bc386a..0262646 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -57,6 +57,17 @@ static struct lu_name lname_dotdot = { sizeof(dotdot) - 1 }; +static inline int +mdd_name_check(struct mdd_device *m, const struct lu_name *ln) +{ + if (!lu_name_is_valid(ln)) + return -EINVAL; + else if (ln->ln_namelen > m->mdd_dt_conf.ddp_max_name_len) + return -ENAMETOOLONG; + else + return 0; +} + /* Get FID from name and parent */ static int __mdd_lookup(const struct lu_env *env, struct md_object *pobj, @@ -81,10 +92,6 @@ __mdd_lookup(const struct lu_env *env, struct md_object *pobj, RETURN(-ESTALE); } - /* The common filename length check. */ - if (unlikely(lname->ln_namelen > m->mdd_dt_conf.ddp_max_name_len)) - RETURN(-ENAMETOOLONG); - rc = mdd_permission_internal_locked(env, mdd_obj, pattr, mask, MOR_TGT_PARENT); if (rc) @@ -253,41 +260,55 @@ int mdd_is_subdir(const struct lu_env *env, struct md_object *mo, static int mdd_dir_is_empty(const struct lu_env *env, struct mdd_object *dir) { - struct dt_it *it; - struct dt_object *obj; - const struct dt_it_ops *iops; - int result; - ENTRY; + struct dt_it *it; + struct dt_object *obj; + const struct dt_it_ops *iops; + int result; + ENTRY; - obj = mdd_object_child(dir); - if (!dt_try_as_dir(env, obj)) - RETURN(-ENOTDIR); - - iops = &obj->do_index_ops->dio_it; - it = iops->init(env, obj, LUDA_64BITHASH, BYPASS_CAPA); - if (!IS_ERR(it)) { - result = iops->get(env, it, (const void *)""); - if (result > 0) { - int i; - for (result = 0, i = 0; result == 0 && i < 3; ++i) - result = iops->next(env, it); - if (result == 0) - result = -ENOTEMPTY; - else if (result == +1) - result = 0; - } else if (result == 0) - /* - * Huh? Index contains no zero key? - */ - result = -EIO; - - iops->put(env, it); - iops->fini(env, it); - } else - result = PTR_ERR(it); - RETURN(result); + obj = mdd_object_child(dir); + if (!dt_try_as_dir(env, obj)) + RETURN(-ENOTDIR); + + iops = &obj->do_index_ops->dio_it; + it = iops->init(env, obj, LUDA_64BITHASH, BYPASS_CAPA); + if (!IS_ERR(it)) { + result = iops->get(env, it, (const struct dt_key *)""); + if (result > 0) { + int i; + for (result = 0, i = 0; result == 0 && i < 3; ++i) + result = iops->next(env, it); + if (result == 0) + result = -ENOTEMPTY; + else if (result == 1) + result = 0; + } else if (result == 0) + /* + * Huh? Index contains no zero key? + */ + result = -EIO; + + iops->put(env, it); + iops->fini(env, it); + } else + result = PTR_ERR(it); + RETURN(result); } +/** + * Determine if the target object can be hard linked, and right now it only + * checks if the link count reach the maximum limit. Note: for ldiskfs, the + * directory nlink count might exceed the maximum link count(see + * osd_object_ref_add), so it only check nlink for non-directories. + * + * \param[in] env thread environment + * \param[in] obj object being linked to + * \param[in] la attributes of \a obj + * + * \retval 0 if \a obj can be hard linked + * \retval negative error if \a obj is a directory or has too + * many links + */ static int __mdd_may_link(const struct lu_env *env, struct mdd_object *obj, const struct lu_attr *la) { @@ -296,26 +317,34 @@ static int __mdd_may_link(const struct lu_env *env, struct mdd_object *obj, LASSERT(la != NULL); - if (!S_ISDIR(la->la_mode)) - RETURN(0); - - /* - * Subdir count limitation can be broken through. - */ - if (la->la_nlink >= m->mdd_dt_conf.ddp_max_nlink) + /* Subdir count limitation can be broken through + * (see osd_object_ref_add), so only check non-directory here. */ + if (!S_ISDIR(la->la_mode) && + la->la_nlink >= m->mdd_dt_conf.ddp_max_nlink) RETURN(-EMLINK); - else - RETURN(0); + + RETURN(0); } -/* +/** * Check whether it may create the cobj under the pobj. - * cobj maybe NULL + * + * \param[in] env execution environment + * \param[in] pobj the parent directory + * \param[in] pattr the attribute of the parent directory + * \param[in] cobj the child to be created + * \param[in] check_perm if check WRITE|EXEC permission for parent + * + * \retval = 0 create the child under this dir is allowed + * \retval negative errno create the child under this dir is + * not allowed */ -int mdd_may_create(const struct lu_env *env, - struct mdd_object *pobj, const struct lu_attr *pattr, - struct mdd_object *cobj, int check_perm, int check_nlink) +int mdd_may_create(const struct lu_env *env, struct mdd_object *pobj, + const struct lu_attr *pattr, struct mdd_object *cobj, + bool check_perm) { + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_buf *xbuf; int rc = 0; ENTRY; @@ -325,13 +354,23 @@ int mdd_may_create(const struct lu_env *env, if (mdd_is_dead_obj(pobj)) RETURN(-ENOENT); + /* If the parent is a sub-stripe, check whether it is dead */ + xbuf = mdd_buf_get(env, info->mti_key, sizeof(info->mti_key)); + rc = mdo_xattr_get(env, pobj, xbuf, XATTR_NAME_LMV, + mdd_object_capa(env, pobj)); + if (unlikely(rc > 0)) { + struct lmv_mds_md_v1 *lmv1 = xbuf->lb_buf; + + if (le32_to_cpu(lmv1->lmv_magic) == LMV_MAGIC_STRIPE && + le32_to_cpu(lmv1->lmv_hash_type) & LMV_HASH_FLAG_DEAD) + RETURN(-ESTALE); + } + rc = 0; + if (check_perm) rc = mdd_permission_internal_locked(env, pobj, pattr, MAY_WRITE | MAY_EXEC, MOR_TGT_PARENT); - if (!rc && check_nlink) - rc = __mdd_may_link(env, pobj, pattr); - RETURN(rc); } @@ -476,9 +515,20 @@ int mdd_may_delete(const struct lu_env *env, struct mdd_object *tpobj, RETURN(rc); } -/* - * tgt maybe NULL - * has mdd_write_lock on src already, but not on tgt yet +/** + * Check whether it can create the link file(linked to @src_obj) under + * the target directory(@tgt_obj), and src_obj has been locked by + * mdd_write_lock. + * + * \param[in] env execution environment + * \param[in] tgt_obj the target directory + * \param[in] tattr attributes of target directory + * \param[in] lname the link name + * \param[in] src_obj source object for link + * \param[in] cattr attributes for source object + * + * \retval = 0 it is allowed to create the link file under tgt_obj + * \retval negative error not allowed to create the link file */ static int mdd_link_sanity_check(const struct lu_env *env, struct mdd_object *tgt_obj, @@ -498,8 +548,9 @@ static int mdd_link_sanity_check(const struct lu_env *env, RETURN(-ESTALE); /* Local ops, no lookup before link, check filename length here. */ - if (lname && (lname->ln_namelen > m->mdd_dt_conf.ddp_max_name_len)) - RETURN(-ENAMETOOLONG); + rc = mdd_name_check(m, lname); + if (rc < 0) + RETURN(rc); if (mdd_is_immutable(src_obj) || mdd_is_append(src_obj)) RETURN(-EPERM); @@ -508,11 +559,9 @@ static int mdd_link_sanity_check(const struct lu_env *env, RETURN(-EPERM); LASSERT(src_obj != tgt_obj); - if (tgt_obj) { - rc = mdd_may_create(env, tgt_obj, tattr, NULL, 1, 0); - if (rc) - RETURN(rc); - } + rc = mdd_may_create(env, tgt_obj, tattr, NULL, true); + if (rc != 0) + RETURN(rc); rc = __mdd_may_link(env, src_obj, cattr); @@ -539,7 +588,8 @@ static int __mdd_index_delete_only(const struct lu_env *env, struct mdd_object * static int __mdd_index_insert_only(const struct lu_env *env, struct mdd_object *pobj, - const struct lu_fid *lf, const char *name, + const struct lu_fid *lf, __u32 type, + const char *name, struct thandle *handle, struct lustre_capa *capa) { @@ -548,12 +598,15 @@ static int __mdd_index_insert_only(const struct lu_env *env, ENTRY; if (dt_try_as_dir(env, next)) { - struct lu_ucred *uc = lu_ucred_check(env); - int ignore_quota; + struct dt_insert_rec *rec = &mdd_env_info(env)->mti_dt_rec; + struct lu_ucred *uc = lu_ucred_check(env); + int ignore_quota; + rec->rec_fid = lf; + rec->rec_type = type; ignore_quota = uc ? uc->uc_cap & CFS_CAP_SYS_RESOURCE_MASK : 1; rc = next->do_index_ops->dio_insert(env, next, - (struct dt_rec*)lf, + (const struct dt_rec *)rec, (const struct dt_key *)name, handle, capa, ignore_quota); } else { @@ -564,19 +617,21 @@ static int __mdd_index_insert_only(const struct lu_env *env, /* insert named index, add reference if isdir */ static int __mdd_index_insert(const struct lu_env *env, struct mdd_object *pobj, - const struct lu_fid *lf, const char *name, int is_dir, - struct thandle *handle, struct lustre_capa *capa) + const struct lu_fid *lf, __u32 type, + const char *name, struct thandle *handle, + struct lustre_capa *capa) { - int rc; - ENTRY; + int rc; + ENTRY; - rc = __mdd_index_insert_only(env, pobj, lf, name, handle, capa); - if (rc == 0 && is_dir) { - mdd_write_lock(env, pobj, MOR_TGT_PARENT); - mdo_ref_add(env, pobj, handle); - mdd_write_unlock(env, pobj); - } - RETURN(rc); + rc = __mdd_index_insert_only(env, pobj, lf, type, name, handle, capa); + if (rc == 0 && S_ISDIR(type)) { + mdd_write_lock(env, pobj, MOR_TGT_PARENT); + mdo_ref_add(env, pobj, handle); + mdd_write_unlock(env, pobj); + } + + RETURN(rc); } /* delete named index, drop reference if isdir */ @@ -700,7 +755,7 @@ int mdd_changelog_store(const struct lu_env *env, struct mdd_device *mdd, if (ctxt == NULL) return -ENXIO; - rc = llog_add(env, ctxt->loc_handle, &rec->cr_hdr, NULL, NULL, th); + rc = llog_add(env, ctxt->loc_handle, &rec->cr_hdr, NULL, th); llog_ctxt_put(ctxt); if (rc > 0) rc = 0; @@ -714,9 +769,9 @@ int mdd_changelog_store(const struct lu_env *env, struct mdd_device *mdd, * this will hopefully be fixed in llog rewrite * \retval 0 ok */ -int mdd_changelog_ext_store(const struct lu_env *env, struct mdd_device *mdd, - struct llog_changelog_ext_rec *rec, - struct thandle *th) +static int +mdd_changelog_ext_store(const struct lu_env *env, struct mdd_device *mdd, + struct llog_changelog_ext_rec *rec, struct thandle *th) { struct obd_device *obd = mdd2obd_dev(mdd); struct llog_ctxt *ctxt; @@ -739,7 +794,7 @@ int mdd_changelog_ext_store(const struct lu_env *env, struct mdd_device *mdd, return -ENXIO; /* nested journal transaction */ - rc = llog_add(env, ctxt->loc_handle, &rec->cr_hdr, NULL, NULL, th); + rc = llog_add(env, ctxt->loc_handle, &rec->cr_hdr, NULL, th); llog_ctxt_put(ctxt); if (rc > 0) rc = 0; @@ -1006,7 +1061,6 @@ int mdd_links_rename(const struct lu_env *env, struct linkea_data *ldata, int first, int check) { - int rc2 = 0; int rc = 0; ENTRY; @@ -1024,9 +1078,7 @@ int mdd_links_rename(const struct lu_env *env, rc = mdd_links_write(env, mdd_obj, ldata, handle); EXIT; out: - if (rc == 0) - rc = rc2; - if (rc) { + if (rc != 0) { int error = 1; if (rc == -EOVERFLOW || rc == -ENOSPC) error = 0; @@ -1061,10 +1113,10 @@ static inline int mdd_links_add(const struct lu_env *env, const struct lu_fid *pfid, const struct lu_name *lname, struct thandle *handle, - struct linkea_data *data, int first) + struct linkea_data *ldata, int first) { return mdd_links_rename(env, mdd_obj, NULL, NULL, - pfid, lname, handle, data, first, 0); + pfid, lname, handle, ldata, first, 0); } static inline int mdd_links_del(const struct lu_env *env, @@ -1183,18 +1235,19 @@ static inline int mdd_declare_links_del(const struct lu_env *env, } static int mdd_declare_link(const struct lu_env *env, - struct mdd_device *mdd, - struct mdd_object *p, - struct mdd_object *c, - const struct lu_name *name, + struct mdd_device *mdd, + struct mdd_object *p, + struct mdd_object *c, + const struct lu_name *name, struct thandle *handle, struct lu_attr *la, struct linkea_data *data) { - int rc; + int rc; - rc = mdo_declare_index_insert(env, p, mdo2fid(c), name->ln_name,handle); - if (rc) + rc = mdo_declare_index_insert(env, p, mdo2fid(c), mdd_object_type(c), + name->ln_name, handle); + if (rc != 0) return rc; rc = mdo_declare_ref_add(env, c, handle); @@ -1274,7 +1327,7 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj, rc = __mdd_index_insert_only(env, mdd_tobj, mdo2fid(mdd_sobj), - name, handle, + mdd_object_type(mdd_sobj), name, handle, mdd_object_capa(env, mdd_tobj)); if (rc != 0) { mdo_ref_del(env, mdd_sobj, handle); @@ -1315,23 +1368,58 @@ out_pending: return rc; } +static int mdd_mark_dead_object(const struct lu_env *env, + struct mdd_object *obj, struct thandle *handle, + bool declare) +{ + struct lu_attr *attr = MDD_ENV_VAR(env, la_for_start); + int rc; + + if (!declare) + obj->mod_flags |= DEAD_OBJ; + + if (!S_ISDIR(mdd_object_type(obj))) + return 0; + + attr->la_valid = LA_FLAGS; + attr->la_flags = LUSTRE_SLAVE_DEAD_FL; + + if (declare) + rc = mdo_declare_attr_set(env, obj, attr, handle); + else + rc = mdo_attr_set(env, obj, attr, handle, + mdd_object_capa(env, obj)); + + return rc; +} + static int mdd_declare_finish_unlink(const struct lu_env *env, struct mdd_object *obj, struct thandle *handle) { int rc; + rc = mdd_mark_dead_object(env, obj, handle, true); + if (rc != 0) + return rc; + rc = orph_declare_index_insert(env, obj, mdd_object_type(obj), handle); - if (rc) + if (rc != 0) + return rc; + + rc = mdo_declare_destroy(env, obj, handle); + if (rc != 0) return rc; - return mdo_declare_destroy(env, obj, handle); + return mdd_declare_links_del(env, obj, handle); } /* caller should take a lock before calling */ int mdd_finish_unlink(const struct lu_env *env, - struct mdd_object *obj, struct md_attr *ma, - struct thandle *th) + struct mdd_object *obj, struct md_attr *ma, + const struct mdd_object *pobj, + const struct lu_name *lname, + struct thandle *th) { int rc = 0; int is_dir = S_ISDIR(ma->ma_attr.la_mode); @@ -1340,7 +1428,9 @@ int mdd_finish_unlink(const struct lu_env *env, LASSERT(mdd_write_locked(env, obj) != 0); if (ma->ma_attr.la_nlink == 0 || is_dir) { - obj->mod_flags |= DEAD_OBJ; + rc = mdd_mark_dead_object(env, obj, th, false); + if (rc != 0) + RETURN(rc); /* add new orphan and the object * will be deleted during mdd_close() */ @@ -1360,9 +1450,12 @@ int mdd_finish_unlink(const struct lu_env *env, } else { rc = mdo_destroy(env, obj, th); } - } + } else if (!is_dir) { + /* old files may not have link ea; ignore errors */ + mdd_links_del(env, obj, mdo2fid(pobj), lname, th); + } - RETURN(rc); + RETURN(rc); } /* @@ -1425,10 +1518,6 @@ static int mdd_declare_unlink(const struct lu_env *env, struct mdd_device *mdd, if (rc) return rc; - rc = mdd_declare_links_del(env, c, handle); - if (rc != 0) - return rc; - /* FIXME: need changelog for remove entry */ rc = mdd_declare_changelog_store(env, mdd, name, handle); } @@ -1545,6 +1634,7 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, if (rc != 0) { __mdd_index_insert_only(env, mdd_pobj, mdo2fid(mdd_cobj), + mdd_object_type(mdd_cobj), name, handle, mdd_object_capa(env, mdd_pobj)); GOTO(cleanup, rc); @@ -1584,16 +1674,12 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, /* XXX: this transfer to ma will be removed with LOD/OSP */ ma->ma_attr = *cattr; ma->ma_valid |= MA_INODE; - rc = mdd_finish_unlink(env, mdd_cobj, ma, handle); + rc = mdd_finish_unlink(env, mdd_cobj, ma, mdd_pobj, lname, handle); /* fetch updated nlink */ if (rc == 0) rc = mdd_la_get(env, mdd_cobj, cattr, BYPASS_CAPA); - if (!is_dir) - /* old files may not have link ea; ignore errors */ - mdd_links_del(env, mdd_cobj, mdo2fid(mdd_pobj), lname, handle); - /* if object is removed then we can't get its attrs, use last get */ if (cattr->la_nlink == 0) { ma->ma_attr = *cattr; @@ -1724,7 +1810,7 @@ static int mdd_declare_object_initialize(const struct lu_env *env, struct lu_attr *attr, struct thandle *handle) { - int rc; + int rc; ENTRY; /* @@ -1739,13 +1825,13 @@ static int mdd_declare_object_initialize(const struct lu_env *env, attr->la_valid |= LA_MODE | LA_TYPE; if (rc == 0 && S_ISDIR(attr->la_mode)) { rc = mdo_declare_index_insert(env, child, mdo2fid(child), - dot, handle); - if (rc == 0) - rc = mdo_declare_ref_add(env, child, handle); + S_IFDIR, dot, handle); + if (rc == 0) + rc = mdo_declare_ref_add(env, child, handle); rc = mdo_declare_index_insert(env, child, mdo2fid(parent), - dotdot, handle); - } + S_IFDIR, dotdot, handle); + } RETURN(rc); } @@ -1773,19 +1859,35 @@ static int mdd_object_initialize(const struct lu_env *env, /* Add "." and ".." for newly created dir */ mdo_ref_add(env, child, handle); rc = __mdd_index_insert_only(env, child, mdo2fid(child), - dot, handle, BYPASS_CAPA); - if (rc == 0) - rc = __mdd_index_insert_only(env, child, pfid, - dotdot, handle, - BYPASS_CAPA); - if (rc != 0) - mdo_ref_del(env, child, handle); - } + S_IFDIR, dot, handle, BYPASS_CAPA); + if (rc == 0) + rc = __mdd_index_insert_only(env, child, pfid, S_IFDIR, + dotdot, handle, + BYPASS_CAPA); + if (rc != 0) + mdo_ref_del(env, child, handle); + } RETURN(rc); } -/* has not lock on pobj yet */ +/** + * This function checks whether it can create a file/dir under the + * directory(@pobj). The directory(@pobj) is not being locked by + * mdd lock. + * + * \param[in] env execution environment + * \param[in] pobj the directory to create files + * \param[in] pattr the attributes of the directory + * \param[in] lname the name of the created file/dir + * \param[in] cattr the attributes of the file/dir + * \param[in] spec create specification + * + * \retval = 0 it is allowed to create file/dir under + * the directory + * \retval negative error not allowed to create file/dir + * under the directory + */ static int mdd_create_sanity_check(const struct lu_env *env, struct md_object *pobj, const struct lu_attr *pattr, @@ -1793,12 +1895,13 @@ static int mdd_create_sanity_check(const struct lu_env *env, struct lu_attr *cattr, struct md_op_spec *spec) { - struct mdd_thread_info *info = mdd_env_info(env); - struct lu_fid *fid = &info->mti_fid; - struct mdd_object *obj = md2mdd_obj(pobj); - struct mdd_device *m = mdo2mdd(pobj); - int rc; - ENTRY; + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_fid *fid = &info->mti_fid; + struct mdd_object *obj = md2mdd_obj(pobj); + struct mdd_device *m = mdo2mdd(pobj); + bool check_perm = true; + int rc; + ENTRY; /* EEXIST check */ if (mdd_is_dead_obj(obj)) @@ -1817,20 +1920,17 @@ static int mdd_create_sanity_check(const struct lu_env *env, */ rc = __mdd_lookup(env, pobj, pattr, lname, fid, MAY_WRITE | MAY_EXEC); - if (rc != -ENOENT) - RETURN(rc ? : -EEXIST); - } else { - /* - * Check WRITE permission for the parent. - * EXEC permission have been checked - * when lookup before create already. - */ - rc = mdd_permission_internal_locked(env, obj, pattr, MAY_WRITE, - MOR_TGT_PARENT); - if (rc) - RETURN(rc); + if (rc != -ENOENT) + RETURN(rc ? : -EEXIST); + + /* Permission is already being checked in mdd_lookup */ + check_perm = false; } + rc = mdd_may_create(env, obj, pattr, NULL, check_perm); + if (rc != 0) + RETURN(rc); + /* sgid check */ if (pattr->la_mode & S_ISGID) { cattr->la_gid = pattr->la_gid; @@ -1840,6 +1940,10 @@ static int mdd_create_sanity_check(const struct lu_env *env, } } + rc = mdd_name_check(m, lname); + if (rc < 0) + RETURN(rc); + switch (cattr->la_mode & S_IFMT) { case S_IFLNK: { unsigned int symlen = strlen(spec->u.sp_symname) + 1; @@ -1958,17 +2062,18 @@ static int mdd_declare_create(const struct lu_env *env, struct mdd_device *mdd, GOTO(out, rc); } - if (spec->sp_cr_flags & MDS_OPEN_VOLATILE) { + if (unlikely(spec->sp_cr_flags & MDS_OPEN_VOLATILE)) { rc = orph_declare_index_insert(env, c, attr->la_mode, handle); if (rc) GOTO(out, rc); } else { struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix; - rc = mdo_declare_index_insert(env, p, mdo2fid(c), name->ln_name, - handle); - if (rc) + rc = mdo_declare_index_insert(env, p, mdo2fid(c), attr->la_mode, + name->ln_name, handle); + if (rc != 0) return rc; + rc = mdd_declare_links_add(env, c, handle, ldata); if (rc) return rc; @@ -1978,11 +2083,11 @@ static int mdd_declare_create(const struct lu_env *env, struct mdd_device *mdd, rc = mdo_declare_attr_set(env, p, la, handle); if (rc) return rc; - } - rc = mdd_declare_changelog_store(env, mdd, name, handle); - if (rc) - return rc; + rc = mdd_declare_changelog_store(env, mdd, name, handle); + if (rc) + return rc; + } /* XXX: For remote create, it should indicate the remote RPC * will be sent after local transaction is finished, which @@ -2054,6 +2159,43 @@ static int mdd_object_create(const struct lu_env *env, struct mdd_object *pobj, if (rc) GOTO(unlock, rc); + /* Note: In DNE phase I, for striped dir, though sub-stripes will be + * created in declare phase, they also needs to be added to master + * object as sub-directory entry. So it has to initialize the master + * object, then set dir striped EA.(in mdo_xattr_set) */ + rc = mdd_object_initialize(env, mdo2fid(pobj), son, attr, handle, + spec); + if (rc != 0) + GOTO(err_destroy, rc); + + /* + * in case of replay we just set LOVEA provided by the client + * XXX: I think it would be interesting to try "old" way where + * MDT calls this xattr_set(LOV) in a different transaction. + * probably this way we code can be made better. + */ + + /* During creation, there are only a few cases we need do xattr_set to + * create stripes. + * 1. regular file: see comments above. + * 2. create striped directory with provided stripeEA. + * 3. create striped directory because inherit default layout from the + * parent. */ + if (spec->no_create || + (S_ISREG(attr->la_mode) && spec->sp_cr_flags & MDS_OPEN_HAS_EA) || + S_ISDIR(attr->la_mode)) { + const struct lu_buf *buf; + + buf = mdd_buf_get_const(env, spec->u.sp_ea.eadata, + spec->u.sp_ea.eadatalen); + rc = mdo_xattr_set(env, son, buf, + S_ISDIR(attr->la_mode) ? XATTR_NAME_LMV : + XATTR_NAME_LOV, 0, + handle, BYPASS_CAPA); + if (rc != 0) + GOTO(err_destroy, rc); + } + #ifdef CONFIG_FS_POSIX_ACL if (def_acl_buf != NULL && def_acl_buf->lb_len > 0 && S_ISDIR(attr->la_mode)) { @@ -2074,29 +2216,6 @@ static int mdd_object_create(const struct lu_env *env, struct mdd_object *pobj, } #endif - rc = mdd_object_initialize(env, mdo2fid(pobj), son, attr, handle, - spec); - if (rc != 0) - GOTO(err_destroy, rc); - - /* - * in case of replay we just set LOVEA provided by the client - * XXX: I think it would be interesting to try "old" way where - * MDT calls this xattr_set(LOV) in a different transaction. - * probably this way we code can be made better. - */ - if (spec->no_create || (spec->sp_cr_flags & MDS_OPEN_HAS_EA && - S_ISREG(attr->la_mode))) { - const struct lu_buf *buf; - - buf = mdd_buf_get_const(env, spec->u.sp_ea.eadata, - spec->u.sp_ea.eadatalen); - rc = mdo_xattr_set(env, son, buf, XATTR_NAME_LOV, 0, handle, - BYPASS_CAPA); - if (rc != 0) - GOTO(err_destroy, rc); - } - if (S_ISLNK(attr->la_mode)) { struct lu_ucred *uc = lu_ucred_assert(env); struct dt_object *dt = mdd_object_child(son); @@ -2247,12 +2366,10 @@ static int mdd_create(const struct lu_env *env, struct md_object *pobj, if (unlikely(spec->sp_cr_flags & MDS_OPEN_VOLATILE)) { mdd_write_lock(env, son, MOR_TGT_CHILD); rc = __mdd_orphan_add(env, son, handle); - mdd_write_unlock(env, son); - if (rc != 0) - GOTO(err_created, rc); + GOTO(out_volatile, rc); } else { rc = __mdd_index_insert(env, mdd_pobj, mdo2fid(son), - name, S_ISDIR(attr->la_mode), handle, + attr->la_mode, name, handle, mdd_object_capa(env, mdd_pobj)); if (rc != 0) GOTO(err_created, rc); @@ -2286,29 +2403,37 @@ err_created: mdd_write_lock(env, son, MOR_TGT_CHILD); if (S_ISDIR(attr->la_mode)) { /* Drop the reference, no need to delete "."/"..", - * because the object to be destroied directly. */ + * because the object is to be destroyed directly. */ rc2 = mdo_ref_del(env, son, handle); if (rc2 != 0) { mdd_write_unlock(env, son); goto out_stop; } } +out_volatile: + /* For volatile files drop one link immediately, since there is + * no filename in the namespace, and save any error returned. */ rc2 = mdo_ref_del(env, son, handle); if (rc2 != 0) { mdd_write_unlock(env, son); + if (unlikely(rc == 0)) + rc = rc2; goto out_stop; } - mdo_destroy(env, son, handle); + /* Don't destroy the volatile object on success */ + if (likely(rc != 0)) + mdo_destroy(env, son, handle); mdd_write_unlock(env, son); - } + } - if (rc == 0 && fid_is_namespace_visible(mdo2fid(son))) + if (rc == 0 && fid_is_namespace_visible(mdo2fid(son)) && + likely((spec->sp_cr_flags & MDS_OPEN_VOLATILE) == 0)) rc = mdd_changelog_ns_store(env, mdd, - S_ISDIR(attr->la_mode) ? CL_MKDIR : - S_ISREG(attr->la_mode) ? CL_CREATE : - S_ISLNK(attr->la_mode) ? CL_SOFTLINK : CL_MKNOD, - 0, son, mdd_pobj, lname, handle); + S_ISDIR(attr->la_mode) ? CL_MKDIR : + S_ISREG(attr->la_mode) ? CL_CREATE : + S_ISLNK(attr->la_mode) ? CL_SOFTLINK : CL_MKNOD, + 0, son, mdd_pobj, lname, handle); out_stop: rc2 = mdd_trans_stop(env, mdd, rc, handle); if (rc == 0) @@ -2318,11 +2443,11 @@ out_free: /* if we vmalloced a large buffer drop it */ lu_buf_free(ldata->ld_buf); - /* The child object shouldn't be cached anymore */ - if (rc) + /* The child object shouldn't be cached anymore */ + if (rc) set_bit(LU_OBJECT_HEARD_BANSHEE, - &child->mo_lu.lo_header->loh_flags); - return rc; + &child->mo_lu.lo_header->loh_flags); + return rc; } /* @@ -2396,9 +2521,9 @@ static int mdd_rename_sanity_check(const struct lu_env *env, * processed in cld_rename before mdd_rename and enable * MDS_PERM_BYPASS). * So check may_create, but not check may_unlink. */ - if (!tobj) + if (tobj == NULL) rc = mdd_may_create(env, tgt_pobj, tpattr, NULL, - (src_pobj != tgt_pobj), 0); + (src_pobj != tgt_pobj)); else rc = mdd_may_delete(env, tgt_pobj, tpattr, tobj, tattr, cattr, (src_pobj != tgt_pobj), 1); @@ -2446,21 +2571,22 @@ static int mdd_declare_rename(const struct lu_env *env, if (mdd_spobj != mdd_tpobj) { rc = mdo_declare_index_delete(env, mdd_sobj, dotdot, handle); - if (rc) + if (rc != 0) return rc; rc = mdo_declare_index_insert(env, mdd_sobj, mdo2fid(mdd_tpobj), - dotdot, handle); - if (rc) + S_IFDIR, dotdot, handle); + if (rc != 0) return rc; } - /* new target child can be directory, - * counted by target dir's nlink */ - rc = mdo_declare_ref_add(env, mdd_tpobj, handle); - if (rc) - return rc; - } + + /* new target child can be directory, + * counted by target dir's nlink */ + rc = mdo_declare_ref_add(env, mdd_tpobj, handle); + if (rc != 0) + return rc; + } la->la_valid = LA_CTIME | LA_MTIME; rc = mdo_declare_attr_set(env, mdd_spobj, la, handle); @@ -2480,11 +2606,12 @@ static int mdd_declare_rename(const struct lu_env *env, if (rc) return rc; - /* new name */ - rc = mdo_declare_index_insert(env, mdd_tpobj, mdo2fid(mdd_sobj), - tname->ln_name, handle); - if (rc) - return rc; + /* new name */ + rc = mdo_declare_index_insert(env, mdd_tpobj, mdo2fid(mdd_sobj), + mdd_object_type(mdd_sobj), + tname->ln_name, handle); + if (rc != 0) + return rc; /* name from target dir (old name), we declare it unconditionally * as mdd_rename() calls delete unconditionally as well. so just @@ -2517,10 +2644,6 @@ static int mdd_declare_rename(const struct lu_env *env, if (rc) return rc; - rc = mdd_declare_links_del(env, mdd_tobj, handle); - if (rc) - return rc; - rc = mdd_declare_finish_unlink(env, mdd_tobj, handle); if (rc) return rc; @@ -2591,6 +2714,10 @@ static int mdd_rename(const struct lu_env *env, if (rc) GOTO(out_pending, rc); + rc = mdd_name_check(mdd, ltname); + if (rc < 0) + GOTO(out_pending, rc); + handle = mdd_trans_create(env, mdd); if (IS_ERR(handle)) GOTO(out_pending, rc = PTR_ERR(handle)); @@ -2624,12 +2751,13 @@ static int mdd_rename(const struct lu_env *env, if (is_dir && mdd_sobj && !lu_fid_eq(spobj_fid, tpobj_fid)) { rc = __mdd_index_delete_only(env, mdd_sobj, dotdot, handle, mdd_object_capa(env, mdd_sobj)); - if (rc) - GOTO(fixup_spobj2, rc); + if (rc != 0) + GOTO(fixup_spobj2, rc); - rc = __mdd_index_insert_only(env, mdd_sobj, tpobj_fid, dotdot, - handle, mdd_object_capa(env, mdd_sobj)); - if (rc) + rc = __mdd_index_insert_only(env, mdd_sobj, tpobj_fid, S_IFDIR, + dotdot, handle, + mdd_object_capa(env, mdd_sobj)); + if (rc != 0) GOTO(fixup_spobj, rc); } @@ -2649,9 +2777,9 @@ static int mdd_rename(const struct lu_env *env, } /* Insert new fid with target name into target dir */ - rc = __mdd_index_insert(env, mdd_tpobj, lf, tname, is_dir, handle, - mdd_object_capa(env, mdd_tpobj)); - if (rc) + rc = __mdd_index_insert(env, mdd_tpobj, lf, cattr->la_mode, + tname, handle, mdd_object_capa(env, mdd_tpobj)); + if (rc != 0) GOTO(fixup_tpobj, rc); LASSERT(ma->ma_attr.la_valid & LA_CTIME); @@ -2709,7 +2837,8 @@ static int mdd_rename(const struct lu_env *env, /* XXX: this transfer to ma will be removed with LOD/OSP */ ma->ma_attr = *tattr; ma->ma_valid |= MA_INODE; - rc = mdd_finish_unlink(env, mdd_tobj, ma, handle); + rc = mdd_finish_unlink(env, mdd_tobj, ma, mdd_tpobj, ltname, + handle); if (rc != 0) { CERROR("%s: Failed to unlink tobj " DFID": rc = %d\n", @@ -2780,15 +2909,14 @@ fixup_tpobj: mdo_ref_add(env, mdd_tobj, handle); } - rc2 = __mdd_index_insert(env, mdd_tpobj, - mdo2fid(mdd_tobj), tname, - is_dir, handle, - BYPASS_CAPA); - - if (rc2) - CWARN("tp obj fix error %d\n",rc2); - } - } + rc2 = __mdd_index_insert(env, mdd_tpobj, + mdo2fid(mdd_tobj), + mdd_object_type(mdd_tobj), + tname, handle, BYPASS_CAPA); + if (rc2 != 0) + CWARN("tp obj fix error: rc = %d\n", rc2); + } + } fixup_spobj: if (rc && is_dir && mdd_sobj && mdd_spobj != mdd_tpobj) { @@ -2800,32 +2928,36 @@ fixup_spobj: mdd2obd_dev(mdd)->obd_name, rc2); - rc2 = __mdd_index_insert_only(env, mdd_sobj, spobj_fid, + rc2 = __mdd_index_insert_only(env, mdd_sobj, spobj_fid, S_IFDIR, dotdot, handle, BYPASS_CAPA); - if (rc2) + if (rc2 != 0) CWARN("%s: sp obj dotdot insert error: rc = %d\n", mdd2obd_dev(mdd)->obd_name, rc2); } fixup_spobj2: - if (rc) { - rc2 = __mdd_index_insert(env, mdd_spobj, - lf, sname, is_dir, handle, BYPASS_CAPA); - if (rc2) - CWARN("sp obj fix error %d\n",rc2); - } + if (rc != 0) { + rc2 = __mdd_index_insert(env, mdd_spobj, lf, + mdd_object_type(mdd_sobj), sname, + handle, BYPASS_CAPA); + if (rc2 != 0) + CWARN("sp obj fix error: rc = %d\n", rc2); + } + cleanup: if (tobj_locked) mdd_write_unlock(env, mdd_tobj); + cleanup_unlocked: - if (rc == 0) + if (rc == 0) rc = mdd_changelog_ext_ns_store(env, mdd, CL_RENAME, cl_flags, mdd_tobj, tpobj_fid, lf, spobj_fid, ltname, lsname, handle); stop: - mdd_trans_stop(env, mdd, rc, handle); + mdd_trans_stop(env, mdd, rc, handle); + out_pending: mdd_object_put(env, mdd_sobj); return rc; @@ -2992,13 +3124,14 @@ static int mdd_update_linkea_internal(const struct lu_env *env, /* Insert new fid with target name into target dir */ rc = mdo_declare_index_delete(env, pobj, lname.ln_name, handle); - if (rc) + if (rc != 0) GOTO(next_put, rc); rc = mdo_declare_index_insert(env, pobj, - mdd_object_fid(mdd_tobj), - lname.ln_name, handle); - if (rc) + mdd_object_fid(mdd_tobj), + mdd_object_type(mdd_tobj), + lname.ln_name, handle); + if (rc != 0) GOTO(next_put, rc); rc = mdo_declare_ref_add(env, mdd_tobj, handle); @@ -3016,10 +3149,11 @@ static int mdd_update_linkea_internal(const struct lu_env *env, GOTO(next_put, rc); rc = __mdd_index_insert(env, pobj, - mdd_object_fid(mdd_tobj), - lname.ln_name, 0, handle, - mdd_object_capa(env, pobj)); - if (rc) + mdd_object_fid(mdd_tobj), + mdd_object_type(mdd_tobj), + lname.ln_name, handle, + mdd_object_capa(env, pobj)); + if (rc != 0) GOTO(next_put, rc); mdd_write_lock(env, mdd_tobj, MOR_SRC_CHILD); @@ -3192,7 +3326,7 @@ static int mdd_declare_migrate_create(const struct lu_env *env, return rc; } - mgr_easize = lmv_mds_md_size(2, LMV_MAGIC_MIGRATE); + mgr_easize = lmv_mds_md_size(2, LMV_MAGIC_V1); buf = mdd_buf_get_const(env, mgr_ea, mgr_easize); rc = mdo_declare_xattr_set(env, mdd_sobj, buf, XATTR_NAME_LMV, 0, handle); @@ -3200,7 +3334,7 @@ static int mdd_declare_migrate_create(const struct lu_env *env, return rc; la_flag->la_valid = LA_FLAGS; - la_flag->la_flags = LUSTRE_IMMUTABLE_FL; + la_flag->la_flags = la->la_flags | LUSTRE_IMMUTABLE_FL; mdd_flags_xlate(mdd_sobj, la_flag->la_flags); rc = mdo_declare_attr_set(env, mdd_sobj, la_flag, handle); @@ -3222,6 +3356,7 @@ static int mdd_migrate_create(const struct lu_env *env, struct thandle *handle; struct lmv_mds_md_v1 *mgr_ea; struct lu_attr *la_flag = MDD_ENV_VAR(env, la_for_fix); + struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint; int mgr_easize; int rc; ENTRY; @@ -3245,7 +3380,7 @@ static int mdd_migrate_create(const struct lu_env *env, RETURN(rc); } spec->u.sp_symname = link_buf.lb_buf; - } else{ + } else if S_ISREG(la->la_mode) { /* retrieve lov of the old object */ rc = mdd_get_lov_ea(env, mdd_sobj, &lmm_buf); if (rc != 0 && rc != -ENODATA) @@ -3258,13 +3393,16 @@ static int mdd_migrate_create(const struct lu_env *env, } mgr_ea = (struct lmv_mds_md_v1 *)info->mti_xattr_buf; - mgr_ea->lmv_magic = cpu_to_le32(LMV_MAGIC_MIGRATE); + memset(mgr_ea, 0, sizeof(mgr_ea)); + mgr_ea->lmv_magic = cpu_to_le32(LMV_MAGIC_V1); mgr_ea->lmv_stripe_count = cpu_to_le32(2); mgr_ea->lmv_master_mdt_index = mdd_seq_site(mdd)->ss_node_id; - mgr_ea->lmv_hash_type = cpu_to_le32(LMV_HASH_TYPE_MIGRATION); + mgr_ea->lmv_hash_type = cpu_to_le32(LMV_HASH_FLAG_MIGRATION); fid_cpu_to_le(&mgr_ea->lmv_stripe_fids[0], mdd_object_fid(mdd_sobj)); fid_cpu_to_le(&mgr_ea->lmv_stripe_fids[1], mdd_object_fid(mdd_tobj)); + mdd_object_make_hint(env, mdd_pobj, mdd_tobj, la, spec, hint); + handle = mdd_trans_create(env, mdd); if (IS_ERR(handle)) GOTO(out_free, rc = PTR_ERR(handle)); @@ -3286,22 +3424,14 @@ static int mdd_migrate_create(const struct lu_env *env, /* create the target object */ rc = mdd_object_create(env, mdd_pobj, mdd_tobj, la, spec, NULL, NULL, - NULL, handle); + hint, handle); if (rc != 0) GOTO(stop_trans, rc); - if (lmm_buf.lb_buf != NULL && lmm_buf.lb_len != 0) { - buf = mdd_buf_get_const(env, lmm_buf.lb_buf, lmm_buf.lb_len); - rc = mdo_xattr_set(env, mdd_tobj, buf, XATTR_NAME_LOV, - 0, handle, mdd_object_capa(env, mdd_sobj)); - if (rc != 0) - GOTO(stop_trans, rc); - } - /* Set MIGRATE EA on the source inode, so once the migration needs * to be re-done during failover, the re-do process can locate the * target object which is already being created. */ - mgr_easize = lmv_mds_md_size(2, LMV_MAGIC_MIGRATE); + mgr_easize = lmv_mds_md_size(2, LMV_MAGIC_V1); buf = mdd_buf_get_const(env, mgr_ea, mgr_easize); rc = mdo_xattr_set(env, mdd_sobj, buf, XATTR_NAME_LMV, 0, handle, mdd_object_capa(env, mdd_sobj)); @@ -3314,7 +3444,7 @@ static int mdd_migrate_create(const struct lu_env *env, * IMMUTALBE flag and MIGRATE EA, it need to clear IMMUTABLE * flag and approve the migration */ la_flag->la_valid = LA_FLAGS; - la_flag->la_flags = LUSTRE_IMMUTABLE_FL; + la_flag->la_flags = la->la_flags | LUSTRE_IMMUTABLE_FL; mdd_flags_xlate(mdd_sobj, la_flag->la_flags); rc = mdo_attr_set(env, mdd_sobj, la_flag, handle, mdd_object_capa(env, mdd_sobj)); @@ -3403,7 +3533,7 @@ static int mdd_migrate_entries(const struct lu_env *env, if (IS_ERR(child)) GOTO(out, rc = PTR_ERR(child)); - is_dir = S_ISDIR(lu_object_attr(&child->mod_obj.mo_lu)); + is_dir = S_ISDIR(mdd_object_type(child)); snprintf(name, ent->lde_namelen + 1, "%s", ent->lde_name); @@ -3430,6 +3560,7 @@ static int mdd_migrate_entries(const struct lu_env *env, if (likely(!target_exist)) { rc = mdo_declare_index_insert(env, mdd_tobj, &ent->lde_fid, + mdd_object_type(child), name, handle); if (rc != 0) GOTO(out_put, rc); @@ -3458,7 +3589,7 @@ static int mdd_migrate_entries(const struct lu_env *env, rc = mdo_declare_index_insert(env, child, mdd_object_fid(mdd_tobj), - dotdot, handle); + S_IFDIR, dotdot, handle); if (rc != 0) GOTO(out_put, rc); } @@ -3479,7 +3610,8 @@ static int mdd_migrate_entries(const struct lu_env *env, if (likely(!target_exist)) { rc = __mdd_index_insert(env, mdd_tobj, &ent->lde_fid, - name, is_dir, handle, + mdd_object_type(child), + name, handle, mdd_object_capa(env, mdd_tobj)); if (rc != 0) GOTO(out_put, rc); @@ -3504,7 +3636,7 @@ static int mdd_migrate_entries(const struct lu_env *env, GOTO(out_put, rc); rc = __mdd_index_insert_only(env, child, - mdd_object_fid(mdd_tobj), + mdd_object_fid(mdd_tobj), S_IFDIR, dotdot, handle, mdd_object_capa(env, child)); if (rc != 0) @@ -3602,6 +3734,7 @@ static int mdd_declare_migrate_update_name(const struct lu_env *env, /* new name */ rc = mdo_declare_index_insert(env, mdd_pobj, mdo2fid(mdd_tobj), + mdd_object_type(mdd_tobj), lname->ln_name, handle); if (rc != 0) return rc; @@ -3718,8 +3851,9 @@ static int mdd_migrate_update_name(const struct lu_env *env, } /* Insert new fid with target name into target dir */ - rc = __mdd_index_insert(env, mdd_pobj, mdd_object_fid(mdd_tobj), name, - is_dir, handle, mdd_object_capa(env, mdd_pobj)); + rc = __mdd_index_insert(env, mdd_pobj, mdd_object_fid(mdd_tobj), + mdd_object_type(mdd_tobj), name, + handle, mdd_object_capa(env, mdd_pobj)); if (rc != 0) GOTO(stop_trans, rc); @@ -3740,7 +3874,7 @@ static int mdd_migrate_update_name(const struct lu_env *env, GOTO(stop_trans, rc); ma->ma_attr = *so_attr; ma->ma_valid |= MA_INODE; - rc = mdd_finish_unlink(env, mdd_sobj, ma, handle); + rc = mdd_finish_unlink(env, mdd_sobj, ma, mdd_pobj, lname, handle); if (rc != 0) GOTO(stop_trans, rc); @@ -3779,7 +3913,7 @@ static int mdd_migrate_sanity_check(const struct lu_env *env, ENTRY; - mgr_easize = lmv_mds_md_size(2, LMV_MAGIC_MIGRATE); + mgr_easize = lmv_mds_md_size(2, LMV_MAGIC_V1); mgr_buf = lu_buf_check_and_alloc(&info->mti_big_buf, mgr_easize); if (mgr_buf->lb_buf == NULL) RETURN(-ENOMEM); @@ -3793,8 +3927,8 @@ static int mdd_migrate_sanity_check(const struct lu_env *env, * is being set by previous migration process, so it * needs to override the IMMUTE flag, otherwise the * following sanity check will fail */ - if (le32_to_cpu(lmm->lmv_md_v1.lmv_magic) == - LMV_MAGIC_MIGRATE) { + if (le32_to_cpu(lmm->lmv_md_v1.lmv_hash_type) & + LMV_HASH_FLAG_MIGRATION) { struct mdd_device *mdd = mdo2mdd(&sobj->mod_obj); sattr->la_flags &= ~LUSTRE_IMMUTABLE_FL;