X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd%2Fosd_handler.c;h=cf602367bd367840c02fb98e8c44c7e427acde98;hb=19a76a56775630855a75508ad837aedaf9f6bdcd;hp=6be19ed20648fe476ff797aba5f6722b5d116a9d;hpb=fbfd488a2f87ea43332ae16341887f68c0ffbde5;p=fs%2Flustre-release.git diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c index 6be19ed..cf60236 100644 --- a/lustre/osd/osd_handler.c +++ b/lustre/osd/osd_handler.c @@ -38,6 +38,7 @@ * Top-level entry points into osd module * * Author: Nikita Danilov + * Pravin Shelar : Added fid in dirent */ #ifndef EXPORT_SYMTAB @@ -55,15 +56,6 @@ #include /* XATTR_{REPLACE,CREATE} */ #include -/* - * XXX temporary stuff: direct access to ldiskfs/jdb. Interface between osd - * and file system is not yet specified. - */ -/* handle_t, journal_start(), journal_stop() */ -#include -/* LDISKFS_SB() */ -#include -#include /* simple_mkdir() */ #include @@ -106,18 +98,18 @@ struct osd_object { /** * to protect index ops. */ - struct rw_semaphore oo_ext_idx_sem; - struct rw_semaphore oo_sem; + cfs_rw_semaphore_t oo_ext_idx_sem; + cfs_rw_semaphore_t oo_sem; struct osd_directory *oo_dir; /** protects inode attributes. */ - spinlock_t oo_guard; + cfs_spinlock_t oo_guard; /** * Following two members are used to indicate the presence of dot and * dotdot in the given directory. This is required for interop mode * (b11826). */ - int oo_compat_dot_created; - int oo_compat_dotdot_created; + int oo_compat_dot_created; + int oo_compat_dotdot_created; const struct lu_env *oo_owner; #ifdef CONFIG_LOCKDEP @@ -295,9 +287,9 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env, mo->oo_dt.do_ops = &osd_obj_ops; l->lo_ops = &osd_lu_obj_ops; - init_rwsem(&mo->oo_sem); - init_rwsem(&mo->oo_ext_idx_sem); - spin_lock_init(&mo->oo_guard); + cfs_init_rwsem(&mo->oo_sem); + cfs_init_rwsem(&mo->oo_ext_idx_sem); + cfs_spin_lock_init(&mo->oo_guard); return l; } else return NULL; @@ -310,9 +302,16 @@ static struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, const struct osd_inode_id *id) { - struct inode *inode; + struct inode *inode = NULL; +#ifdef HAVE_EXT4_LDISKFS + inode = ldiskfs_iget(osd_sb(dev), id->oii_ino); + if (IS_ERR(inode)) + /* Newer kernels return an error instead of a NULL pointer */ + inode = NULL; +#else inode = iget(osd_sb(dev), id->oii_ino); +#endif if (inode == NULL) { CERROR("no inode\n"); inode = ERR_PTR(-EACCES); @@ -574,7 +573,7 @@ static struct thandle *osd_trans_start(const struct lu_env *env, * be used. */ - jh = journal_start(osd_journal(dev), p->tp_credits); + jh = ldiskfs_journal_start_sb(osd_sb(dev), p->tp_credits); if (!IS_ERR(jh)) { oh->ot_handle = jh; th = &oh->ot_super; @@ -588,8 +587,8 @@ static struct thandle *osd_trans_start(const struct lu_env *env, /* add commit callback */ lu_context_init(&th->th_ctx, LCT_TX_HANDLE); lu_context_enter(&th->th_ctx); - journal_callback_set(jh, osd_trans_commit_cb, - (struct journal_callback *)&oh->ot_jcb); + osd_journal_callback_set(jh, osd_trans_commit_cb, + (struct journal_callback *)&oh->ot_jcb); LASSERT(oti->oti_txns == 0); LASSERT(oti->oti_r_locks == 0); LASSERT(oti->oti_w_locks == 0); @@ -631,7 +630,7 @@ static void osd_trans_stop(const struct lu_env *env, struct thandle *th) if (result != 0) CERROR("Failure in transaction hook: %d\n", result); oh->ot_handle = NULL; - result = journal_stop(hdl); + result = ldiskfs_journal_stop(hdl); if (result != 0) CERROR("Failure to stop transaction: %d\n", result); } @@ -712,7 +711,7 @@ static void osd_object_release(const struct lu_env *env, LASSERT(!lu_object_is_dying(l->lo_header)); if (o->oo_inode != NULL && osd_inode_unlinked(o->oo_inode)) - set_bit(LU_OBJECT_HEARD_BANSHEE, &l->lo_header->loh_flags); + cfs_set_bit(LU_OBJECT_HEARD_BANSHEE, &l->lo_header->loh_flags); } /* @@ -739,13 +738,13 @@ static int osd_object_print(const struct lu_env *env, void *cookie, * Concurrency: shouldn't matter. */ int osd_statfs(const struct lu_env *env, struct dt_device *d, - struct kstatfs *sfs) + cfs_kstatfs_t *sfs) { struct osd_device *osd = osd_dt_dev(d); struct super_block *sb = osd_sb(osd); int result = 0; - spin_lock(&osd->od_osfs_lock); + cfs_spin_lock(&osd->od_osfs_lock); /* cache 1 second */ if (cfs_time_before_64(osd->od_osfs_age, cfs_time_shift_64(-1))) { result = ll_do_statfs(sb, &osd->od_kstatfs); @@ -755,7 +754,7 @@ int osd_statfs(const struct lu_env *env, struct dt_device *d, if (likely(result == 0)) *sfs = osd->od_kstatfs; - spin_unlock(&osd->od_osfs_lock); + cfs_spin_unlock(&osd->od_osfs_lock); return result; } @@ -909,8 +908,8 @@ static const int osd_dto_credits_noquota[DTO_NR] = { /** * Xattr set. The same as xattr of EXT3. * DATA_TRANS_BLOCKS(14) - * XXX Note: in original MDS implmentation INDEX_EXTRA_TRANS_BLOCKS are - * also counted in. Do not know why? + * XXX Note: in original MDS implmentation INDEX_EXTRA_TRANS_BLOCKS + * are also counted in. Do not know why? */ [DTO_XATTR_SET] = 14, [DTO_LOG_REC] = 14, @@ -924,9 +923,9 @@ static const int osd_dto_credits_noquota[DTO_NR] = { [DTO_WRITE_BLOCK] = 14, /** * Attr set credits for chown. - * 3 (inode bit, group, GDT) + * This is extra credits for setattr, and it is null without quota */ - [DTO_ATTR_SET_CHOWN]= 3 + [DTO_ATTR_SET_CHOWN]= 0 }; /** @@ -989,11 +988,11 @@ static const int osd_dto_credits_quota[DTO_NR] = { [DTO_WRITE_BLOCK] = 16, /** * Attr set credits for chown. - * 3 (inode bit, group, GDT) + + * It is added to already set setattr credits * 2 * QUOTA_INIT_BLOCKS(25) + * 2 * QUOTA_DEL_BLOCKS(9) */ - [DTO_ATTR_SET_CHOWN]= 71 + [DTO_ATTR_SET_CHOWN]= 68, }; static int osd_credit_get(const struct lu_env *env, struct dt_device *d, @@ -1033,7 +1032,7 @@ static void osd_object_read_lock(const struct lu_env *env, LINVRNT(osd_invariant(obj)); LASSERT(obj->oo_owner != env); - down_read_nested(&obj->oo_sem, role); + cfs_down_read_nested(&obj->oo_sem, role); LASSERT(obj->oo_owner == NULL); oti->oti_r_locks++; @@ -1048,7 +1047,7 @@ static void osd_object_write_lock(const struct lu_env *env, LINVRNT(osd_invariant(obj)); LASSERT(obj->oo_owner != env); - down_write_nested(&obj->oo_sem, role); + cfs_down_write_nested(&obj->oo_sem, role); LASSERT(obj->oo_owner == NULL); obj->oo_owner = env; @@ -1065,7 +1064,7 @@ static void osd_object_read_unlock(const struct lu_env *env, LASSERT(oti->oti_r_locks > 0); oti->oti_r_locks--; - up_read(&obj->oo_sem); + cfs_up_read(&obj->oo_sem); } static void osd_object_write_unlock(const struct lu_env *env, @@ -1080,7 +1079,7 @@ static void osd_object_write_unlock(const struct lu_env *env, LASSERT(oti->oti_w_locks > 0); oti->oti_w_locks--; obj->oo_owner = NULL; - up_write(&obj->oo_sem); + cfs_up_write(&obj->oo_sem); } static int osd_object_write_locked(const struct lu_env *env, @@ -1119,14 +1118,14 @@ static int capa_is_sane(const struct lu_env *env, RETURN(-ESTALE); } - spin_lock(&capa_lock); + cfs_spin_lock(&capa_lock); for (i = 0; i < 2; i++) { if (keys[i].lk_keyid == capa->lc_keyid) { oti->oti_capa_key = keys[i]; break; } } - spin_unlock(&capa_lock); + cfs_spin_unlock(&capa_lock); if (i == 2) { DEBUG_CAPA(D_ERROR, capa, "no matched capa key"); @@ -1241,9 +1240,9 @@ static int osd_attr_get(const struct lu_env *env, if (osd_object_auth(env, dt, capa, CAPA_OPC_META_READ)) return -EACCES; - spin_lock(&obj->oo_guard); + cfs_spin_lock(&obj->oo_guard); osd_inode_getattr(env, obj->oo_inode, attr); - spin_unlock(&obj->oo_guard); + cfs_spin_unlock(&obj->oo_guard); return 0; } @@ -1289,13 +1288,12 @@ static int osd_inode_setattr(const struct lu_env *env, i_size_write(inode, attr->la_size); } +#if 0 /* OSD should not change "i_blocks" which is used by quota. - * "i_blocks" should be changed by ldiskfs only. - * Enable this assignment for SOM purpose now, until it is - * stored in SOM EA. */ + * "i_blocks" should be changed by ldiskfs only. */ if (bits & LA_BLOCKS) inode->i_blocks = attr->la_blocks; - +#endif if (bits & LA_MODE) inode->i_mode = (inode->i_mode & S_IFMT) | (attr->la_mode & ~S_IFMT); @@ -1308,12 +1306,8 @@ static int osd_inode_setattr(const struct lu_env *env, if (bits & LA_RDEV) inode->i_rdev = attr->la_rdev; - if (bits & LA_FLAGS) { - struct ldiskfs_inode_info *li = LDISKFS_I(inode); - - li->i_flags = (li->i_flags & ~LDISKFS_FL_USER_MODIFIABLE) | - (attr->la_flags & LDISKFS_FL_USER_MODIFIABLE); - } + if (bits & LA_FLAGS) + inode->i_flags = ll_ext_to_inode_flags(attr->la_flags); return 0; } @@ -1333,9 +1327,9 @@ static int osd_attr_set(const struct lu_env *env, if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE)) return -EACCES; - spin_lock(&obj->oo_guard); + cfs_spin_lock(&obj->oo_guard); rc = osd_inode_setattr(env, obj->oo_inode, attr); - spin_unlock(&obj->oo_guard); + cfs_spin_unlock(&obj->oo_guard); if (!rc) mark_inode_dirty(obj->oo_inode); @@ -1382,7 +1376,7 @@ static struct dentry * osd_child_dentry_get(const struct lu_env *env, static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj, - umode_t mode, + cfs_umode_t mode, struct dt_allocation_hint *hint, struct thandle *th) { @@ -1524,7 +1518,7 @@ static int osd_mknod(struct osd_thread_info *info, struct osd_object *obj, struct dt_object_format *dof, struct thandle *th) { - umode_t mode = attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX); + cfs_umode_t mode = attr->la_mode & (S_IFMT | S_IRWXUGO | S_ISVTX); int result; LINVRNT(osd_invariant(obj)); @@ -1577,7 +1571,7 @@ static osd_obj_type_f osd_create_type_f(enum dt_format_type type) static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah, - struct dt_object *parent, umode_t child_mode) + struct dt_object *parent, cfs_umode_t child_mode) { LASSERT(ah); @@ -1689,9 +1683,9 @@ static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt, rc = inode->i_op->setxattr(dentry, name, buf->lb_buf, buf->lb_len, fs_flags); /* ctime should not be updated with server-side time. */ - spin_lock(&obj->oo_guard); + cfs_spin_lock(&obj->oo_guard); inode->i_ctime = *t; - spin_unlock(&obj->oo_guard); + cfs_spin_unlock(&obj->oo_guard); mark_inode_dirty(inode); return rc; } @@ -1712,8 +1706,8 @@ static int osd_ea_fid_set(const struct lu_env *env, struct dt_object *dt, struct osd_thread_info *info = osd_oti_get(env); struct lustre_mdt_attrs *mdt_attrs = &info->oti_mdt_attrs; - fid_cpu_to_be(&mdt_attrs->lma_self_fid, fid); - + lustre_lma_init(mdt_attrs, fid); + lustre_lma_swab(mdt_attrs); return __osd_xattr_set(env, dt, osd_buf_get(env, mdt_attrs, sizeof *mdt_attrs), XATTR_NAME_LMA, LU_XATTR_CREATE); @@ -1730,7 +1724,7 @@ static inline void osd_igif_get(const struct lu_env *env, struct inode *inode, } /** - * Helper function to pack the fid + * Helper function to pack the fid, ldiskfs stores fid in packed format. */ void osd_fid_pack(struct osd_fid_pack *pack, const struct dt_rec *fid, struct lu_fid *befider) @@ -1740,6 +1734,24 @@ void osd_fid_pack(struct osd_fid_pack *pack, const struct dt_rec *fid, pack->fp_len = sizeof(*befider) + 1; } +/** + * ldiskfs supports fid in dirent, it is passed in dentry->d_fsdata. + * lustre 1.8 also uses d_fsdata for passing other info to ldiskfs. + * To have compatilibility with 1.8 ldiskfs driver we need to have + * magic number at start of fid data. + * \ldiskfs_dentry_param is used only to pass fid from osd to ldiskfs. + * its inmemory API. + */ +void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param, + const struct dt_rec *fid) +{ + param->edp_magic = LDISKFS_LUFID_MAGIC; + param->edp_len = sizeof(struct lu_fid) + 1; + + fid_cpu_to_be((struct lu_fid *)param->edp_data, + (struct lu_fid *)fid); +} + int osd_fid_unpack(struct lu_fid *fid, const struct osd_fid_pack *pack) { int result; @@ -1761,9 +1773,9 @@ int osd_fid_unpack(struct lu_fid *fid, const struct osd_fid_pack *pack) * Try to read the fid from inode ea into dt_rec, if return value * i.e. rc is +ve, then we got fid, otherwise we will have to form igif * - * \param fid, object fid. + * \param fid object fid. * - * \retval 0, on success + * \retval 0 on success */ static int osd_ea_fid_get(const struct lu_env *env, struct osd_object *obj, __u32 ino, struct lu_fid *fid) @@ -1796,22 +1808,22 @@ static int osd_ea_fid_get(const struct lu_env *env, struct osd_object *obj, /* Check LMA compatibility */ if (rc > 0 && - (mdt_attrs->lma_incompat & ~cpu_to_be32(LMA_INCOMPAT_SUPP))) { + (mdt_attrs->lma_incompat & ~cpu_to_le32(LMA_INCOMPAT_SUPP))) { CWARN("Inode %lx: Unsupported incompat LMA feature(s) %#x\n", - inode->i_ino, be32_to_cpu(mdt_attrs->lma_incompat) & + inode->i_ino, le32_to_cpu(mdt_attrs->lma_incompat) & ~LMA_INCOMPAT_SUPP); return -ENOSYS; } if (rc > 0) { - fid_be_to_cpu(fid, &mdt_attrs->lma_self_fid); + lustre_lma_swab(mdt_attrs); + memcpy(fid, &mdt_attrs->lma_self_fid, sizeof(*fid)); rc = 0; } else if (rc == -ENODATA) { osd_igif_get(env, inode, fid); rc = 0; } iput(inode); - out: RETURN(rc); } @@ -1834,7 +1846,6 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt, struct osd_object *obj = osd_dt_obj(dt); struct osd_thread_info *info = osd_oti_get(env); int result; - int is_root = 0; ENTRY; @@ -1845,11 +1856,8 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt, result = __osd_object_create(info, obj, attr, hint, dof, th); - if (hint && hint->dah_parent) - is_root = osd_object_is_root(osd_dt_obj(hint->dah_parent)); - /* objects under osd root shld have igif fid, so dont add fid EA */ - if (result == 0 && is_root == 0) + if (result == 0 && fid_seq(fid) >= FID_SEQ_DISTRIBUTED_START) result = osd_ea_fid_set(env, dt, fid); if (result == 0) @@ -1875,10 +1883,10 @@ static void osd_object_ref_add(const struct lu_env *env, LASSERT(osd_write_locked(env, obj)); LASSERT(th != NULL); - spin_lock(&obj->oo_guard); + cfs_spin_lock(&obj->oo_guard); LASSERT(inode->i_nlink < LDISKFS_LINK_MAX); inode->i_nlink++; - spin_unlock(&obj->oo_guard); + cfs_spin_unlock(&obj->oo_guard); mark_inode_dirty(inode); LINVRNT(osd_invariant(obj)); } @@ -1898,10 +1906,10 @@ static void osd_object_ref_del(const struct lu_env *env, LASSERT(osd_write_locked(env, obj)); LASSERT(th != NULL); - spin_lock(&obj->oo_guard); + cfs_spin_lock(&obj->oo_guard); LASSERT(inode->i_nlink > 0); inode->i_nlink--; - spin_unlock(&obj->oo_guard); + cfs_spin_unlock(&obj->oo_guard); mark_inode_dirty(inode); LINVRNT(osd_invariant(obj)); } @@ -1998,9 +2006,9 @@ static int osd_xattr_del(const struct lu_env *env, *t = inode->i_ctime; rc = inode->i_op->removexattr(dentry, name); /* ctime should not be updated with server-side time. */ - spin_lock(&obj->oo_guard); + cfs_spin_lock(&obj->oo_guard); inode->i_ctime = *t; - spin_unlock(&obj->oo_guard); + cfs_spin_unlock(&obj->oo_guard); mark_inode_dirty(inode); return rc; } @@ -2047,9 +2055,9 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env, __u32 d[4], s[4]; s[0] = obj->oo_inode->i_uid; - get_random_bytes(&(s[1]), sizeof(__u32)); + ll_get_random_bytes(&(s[1]), sizeof(__u32)); s[2] = obj->oo_inode->i_gid; - get_random_bytes(&(s[3]), sizeof(__u32)); + ll_get_random_bytes(&(s[3]), sizeof(__u32)); rc = capa_encrypt_id(d, s, key->lk_key, CAPA_HMAC_KEY_MAX_LEN); if (unlikely(rc)) RETURN(ERR_PTR(rc)); @@ -2075,9 +2083,9 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env, RETURN(oc); } - spin_lock(&capa_lock); + cfs_spin_lock(&capa_lock); *key = dev->od_capa_keys[1]; - spin_unlock(&capa_lock); + cfs_spin_unlock(&capa_lock); capa->lc_keyid = key->lk_keyid; capa->lc_expiry = cfs_time_current_sec() + dev->od_capa_timeout; @@ -2235,7 +2243,7 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt, OBD_ALLOC_PTR(dir); if (dir != NULL) { - spin_lock(&obj->oo_guard); + cfs_spin_lock(&obj->oo_guard); if (obj->oo_dir == NULL) obj->oo_dir = dir; else @@ -2243,12 +2251,12 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt, * Concurrent thread allocated container data. */ OBD_FREE_PTR(dir); - spin_unlock(&obj->oo_guard); + cfs_spin_unlock(&obj->oo_guard); /* * Now, that we have container data, serialize its * initialization. */ - down_write(&obj->oo_ext_idx_sem); + cfs_down_write(&obj->oo_ext_idx_sem); /* * recheck under lock. */ @@ -2256,7 +2264,7 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt, result = osd_iam_container_init(env, obj, dir); else result = 0; - up_write(&obj->oo_ext_idx_sem); + cfs_up_write(&obj->oo_ext_idx_sem); } else result = -ENOMEM; } else @@ -2338,20 +2346,162 @@ static const struct dt_object_operations osd_obj_ea_ops = { * * which doesn't work for globally shared files like /last-received. */ -int fsfilt_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs); -int fsfilt_ldiskfs_write_handle(struct inode *inode, void *buf, int bufsize, - loff_t *offs, handle_t *handle); +static int osd_ldiskfs_readlink(struct inode *inode, char *buffer, int buflen) +{ + struct ldiskfs_inode_info *ei = LDISKFS_I(inode); + + memcpy(buffer, (char*)ei->i_data, buflen); + + return buflen; +} + +static int osd_ldiskfs_read(struct inode *inode, void *buf, int size, + loff_t *offs) +{ + struct buffer_head *bh; + unsigned long block; + int osize = size; + int blocksize; + int csize; + int boffs; + int err; + + /* prevent reading after eof */ + spin_lock(&inode->i_lock); + if (i_size_read(inode) < *offs + size) { + size = i_size_read(inode) - *offs; + spin_unlock(&inode->i_lock); + if (size < 0) { + CDEBUG(D_EXT2, "size %llu is too short to read @%llu\n", + i_size_read(inode), *offs); + return -EBADR; + } else if (size == 0) { + return 0; + } + } else { + spin_unlock(&inode->i_lock); + } + + blocksize = 1 << inode->i_blkbits; + + while (size > 0) { + block = *offs >> inode->i_blkbits; + boffs = *offs & (blocksize - 1); + csize = min(blocksize - boffs, size); + bh = ldiskfs_bread(NULL, inode, block, 0, &err); + if (!bh) { + CERROR("can't read block: %d\n", err); + return err; + } + + memcpy(buf, bh->b_data + boffs, csize); + brelse(bh); + + *offs += csize; + buf += csize; + size -= csize; + } + return osize; +} static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt, struct lu_buf *buf, loff_t *pos, struct lustre_capa *capa) { - struct inode *inode = osd_dt_obj(dt)->oo_inode; + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + int rc; if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_READ)) RETURN(-EACCES); - return fsfilt_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos); + /* Read small symlink from inode body as we need to maintain correct + * on-disk symlinks for ldiskfs. + */ + if (S_ISLNK(obj->oo_dt.do_lu.lo_header->loh_attr) && + (buf->lb_len <= sizeof (LDISKFS_I(inode)->i_data))) + rc = osd_ldiskfs_readlink(inode, buf->lb_buf, buf->lb_len); + else + rc = osd_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos); + + return rc; +} + +static int osd_ldiskfs_writelink(struct inode *inode, char *buffer, int buflen) +{ + + memcpy((char*)&LDISKFS_I(inode)->i_data, (char *)buffer, + buflen); + LDISKFS_I(inode)->i_disksize = buflen; + i_size_write(inode, buflen); + inode->i_sb->s_op->dirty_inode(inode); + + return 0; +} + +static int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize, + loff_t *offs, handle_t *handle) +{ + struct buffer_head *bh = NULL; + loff_t offset = *offs; + loff_t new_size = i_size_read(inode); + unsigned long block; + int blocksize = 1 << inode->i_blkbits; + int err = 0; + int size; + int boffs; + + while (bufsize > 0) { + if (bh != NULL) + brelse(bh); + + block = offset >> inode->i_blkbits; + boffs = offset & (blocksize - 1); + size = min(blocksize - boffs, bufsize); + bh = ldiskfs_bread(handle, inode, block, 1, &err); + if (!bh) { + CERROR("can't read/create block: %d\n", err); + break; + } + + err = ldiskfs_journal_get_write_access(handle, bh); + if (err) { + CERROR("journal_get_write_access() returned error %d\n", + err); + break; + } + LASSERTF(boffs + size <= bh->b_size, + "boffs %d size %d bh->b_size %lu", + boffs, size, (unsigned long)bh->b_size); + memcpy(bh->b_data + boffs, buf, size); + err = ldiskfs_journal_dirty_metadata(handle, bh); + if (err) + break; + + if (offset + size > new_size) + new_size = offset + size; + offset += size; + bufsize -= size; + buf += size; + } + if (bh) + brelse(bh); + + /* correct in-core and on-disk sizes */ + if (new_size > i_size_read(inode)) { + spin_lock(&inode->i_lock); + if (new_size > i_size_read(inode)) + i_size_write(inode, new_size); + if (i_size_read(inode) > LDISKFS_I(inode)->i_disksize) { + LDISKFS_I(inode)->i_disksize = i_size_read(inode); + inode->i_sb->s_op->dirty_inode(inode); + } + spin_unlock(&inode->i_lock); + } + + if (err == 0) + *offs = offset; + return err; } static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, @@ -2359,9 +2509,10 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, struct thandle *handle, struct lustre_capa *capa, int ignore_quota) { - struct inode *inode = osd_dt_obj(dt)->oo_inode; + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; struct osd_thandle *oh; - ssize_t result; + ssize_t result = 0; #ifdef HAVE_QUOTA_SUPPORT cfs_cap_t save = current->cap_effective; #endif @@ -2379,8 +2530,16 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, else current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK; #endif - result = fsfilt_ldiskfs_write_handle(inode, buf->lb_buf, buf->lb_len, - pos, oh->ot_handle); + /* Write small symlink to inode body as we need to maintain correct + * on-disk symlinks for ldiskfs. + */ + if(S_ISLNK(obj->oo_dt.do_lu.lo_header->loh_attr) && + (buf->lb_len < sizeof (LDISKFS_I(inode)->i_data))) + result = osd_ldiskfs_writelink(inode, buf->lb_buf, buf->lb_len); + else + result = osd_ldiskfs_write_record(inode, buf->lb_buf, + buf->lb_len, pos, + oh->ot_handle); #ifdef HAVE_QUOTA_SUPPORT current->cap_effective = save; #endif @@ -2398,7 +2557,7 @@ static const struct dt_body_operations osd_body_ops = { /** * delete a (key, value) pair from index \a dt specified by \a key * - * \param dt_object osd index object + * \param dt osd index object * \param key key for index * \param rec record reference * \param handle transaction handler @@ -2441,6 +2600,19 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt, RETURN(rc); } +static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de, + struct dt_rec *fid) +{ + struct osd_fid_pack *rec; + int rc = -ENODATA; + + if (de->file_type & LDISKFS_DIRENT_LUFID) { + rec = (struct osd_fid_pack *) (de->name + de->name_len + 1); + rc = osd_fid_unpack((struct lu_fid *)fid, rec); + } + RETURN(rc); +} + /** * Index delete function for interoperability mode (b11826). * It will remove the directory entry added by osd_index_ea_insert(). @@ -2480,8 +2652,8 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt, dentry = osd_child_dentry_get(env, obj, (char *)key, strlen((char *)key)); - down_write(&obj->oo_ext_idx_sem); - bh = ldiskfs_find_entry(dentry, &de); + cfs_down_write(&obj->oo_ext_idx_sem); + bh = ll_ldiskfs_find_entry(dir, dentry, &de); if (bh) { struct osd_thread_info *oti = osd_oti_get(env); struct timespec *ctime = &oti->oti_time; @@ -2492,16 +2664,16 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt, rc = ldiskfs_delete_entry(oh->ot_handle, dir, de, bh); /* xtime should not be updated with server-side time. */ - spin_lock(&obj->oo_guard); + cfs_spin_lock(&obj->oo_guard); dir->i_ctime = *ctime; dir->i_mtime = *mtime; - spin_unlock(&obj->oo_guard); + cfs_spin_unlock(&obj->oo_guard); mark_inode_dirty(dir); brelse(bh); } else rc = -ENOENT; - up_write(&obj->oo_ext_idx_sem); + cfs_up_write(&obj->oo_ext_idx_sem); LASSERT(osd_invariant(obj)); RETURN(rc); } @@ -2509,7 +2681,7 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt, /** * Lookup index for \a key and copy record to \a rec. * - * \param dt_object osd index object + * \param dt osd index object * \param key key for index * \param rec record reference * @@ -2547,7 +2719,7 @@ static int osd_index_iam_lookup(const struct lu_env *env, struct dt_object *dt, rc = iam_it_get(it, (struct iam_key *)key); if (rc >= 0) { if (S_ISDIR(obj->oo_inode->i_mode)) - iam_rec = (struct iam_rec *)oti->oti_fid_packed; + iam_rec = (struct iam_rec *)oti->oti_ldp; else iam_rec = (struct iam_rec *) rec; @@ -2589,7 +2761,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt, cfs_cap_t save = current->cap_effective; #endif struct osd_thread_info *oti = osd_oti_get(env); - struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_fid_packed; + struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_ldp; int rc; ENTRY; @@ -2639,13 +2811,14 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt, */ static int __osd_ea_add_rec(struct osd_thread_info *info, struct osd_object *pobj, - struct osd_object *cobj, + struct inode *cinode, const char *name, + const struct dt_rec *fid, struct thandle *th) { + struct ldiskfs_dentry_param *ldp; struct dentry *child; struct osd_thandle *oth; - struct inode *cinode = cobj->oo_inode; int rc; oth = container_of(th, struct osd_thandle, ot_super); @@ -2653,6 +2826,14 @@ static int __osd_ea_add_rec(struct osd_thread_info *info, LASSERT(oth->ot_handle->h_transaction != NULL); child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name)); + + if (fid_is_igif((struct lu_fid *)fid) || + fid_seq((struct lu_fid *)fid) >= FID_SEQ_DISTRIBUTED_START) { + ldp = (struct ldiskfs_dentry_param *)info->oti_ldp; + osd_get_ldiskfs_dirent_param(ldp, fid); + child->d_fsdata = (void*) ldp; + } else + child->d_fsdata = NULL; rc = ldiskfs_add_entry(oth->ot_handle, child, cinode); RETURN(rc); @@ -2672,11 +2853,14 @@ static int __osd_ea_add_rec(struct osd_thread_info *info, */ static int osd_add_dot_dotdot(struct osd_thread_info *info, struct osd_object *dir, - struct osd_object *obj, const char *name, + struct inode *parent_dir, const char *name, + const struct dt_rec *dot_fid, + const struct dt_rec *dot_dot_fid, struct thandle *th) { - struct inode *parent_dir = obj->oo_inode; struct inode *inode = dir->oo_inode; + struct ldiskfs_dentry_param *dot_ldp; + struct ldiskfs_dentry_param *dot_dot_ldp; struct osd_thandle *oth; int result = 0; @@ -2688,17 +2872,31 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info, if (dir->oo_compat_dot_created) { result = -EEXIST; } else { - LASSERT(obj == dir); + LASSERT(inode == parent_dir); dir->oo_compat_dot_created = 1; result = 0; } } else if(strcmp(name, dotdot) == 0) { + dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp; + dot_dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp2; + if (!dir->oo_compat_dot_created) return -EINVAL; - if (dir->oo_compat_dotdot_created) - return __osd_ea_add_rec(info, dir, obj, name, th); + if (fid_seq((struct lu_fid *) dot_fid) >= FID_SEQ_DISTRIBUTED_START) { + osd_get_ldiskfs_dirent_param(dot_ldp, dot_fid); + osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid); + } else { + dot_ldp = NULL; + dot_dot_ldp = NULL; + } + /* in case of rename, dotdot is already created */ + if (dir->oo_compat_dotdot_created) { + return __osd_ea_add_rec(info, dir, parent_dir, name, + dot_dot_fid, th); + } - result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir, inode); + result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir, inode, + dot_ldp, dot_dot_ldp); if (result == 0) dir->oo_compat_dotdot_created = 1; } @@ -2713,8 +2911,9 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info, */ static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj, - struct osd_object *cobj, + struct inode *cinode, const char *name, + const struct dt_rec *fid, struct thandle *th) { struct osd_thread_info *info = osd_oti_get(env); @@ -2722,9 +2921,11 @@ static int osd_ea_add_rec(const struct lu_env *env, if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] =='\0'))) - rc = osd_add_dot_dotdot(info, pobj, cobj, name, th); + rc = osd_add_dot_dotdot(info, pobj, cinode, name, + (struct dt_rec *)lu_object_fid(&pobj->oo_dt.do_lu), + fid, th); else - rc = __osd_ea_add_rec(info, pobj, cobj, name, th); + rc = __osd_ea_add_rec(info, pobj, cinode, name, fid, th); return rc; } @@ -2753,26 +2954,30 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, dentry = osd_child_dentry_get(env, obj, (char *)key, strlen((char *)key)); - down_read(&obj->oo_ext_idx_sem); - bh = ldiskfs_find_entry(dentry, &de); + cfs_down_read(&obj->oo_ext_idx_sem); + bh = ll_ldiskfs_find_entry(dir, dentry, &de); if (bh) { ino = le32_to_cpu(de->inode); + rc = osd_get_fid_from_dentry(de, rec); + + /* done with de, release bh */ brelse(bh); - rc = osd_ea_fid_get(env, obj, ino, fid); + if (rc != 0) + rc = osd_ea_fid_get(env, obj, ino, fid); } else rc = -ENOENT; - up_read(&obj->oo_ext_idx_sem); + cfs_up_read(&obj->oo_ext_idx_sem); RETURN (rc); } /** * Find the osd object for given fid. * - * \param fid, need to find the osd object having this fid + * \param fid need to find the osd object having this fid * - * \retval osd_object, on success - * \retval -ve, on error + * \retval osd_object on success + * \retval -ve on error */ struct osd_object *osd_object_find(const struct lu_env *env, struct dt_object *dt, @@ -2814,7 +3019,7 @@ struct osd_object *osd_object_find(const struct lu_env *env, /** * Put the osd object once done with it. * - * \param obj, osd object that needs to be put + * \param obj osd object that needs to be put */ static inline void osd_object_put(const struct lu_env *env, struct osd_object *obj) @@ -2827,8 +3032,8 @@ static inline void osd_object_put(const struct lu_env *env, * It will add the directory entry.This entry is needed to * maintain name->fid mapping. * - * \param key, it is key i.e. file entry to be inserted - * \param rec, it is value of given key i.e. fid + * \param key it is key i.e. file entry to be inserted + * \param rec it is value of given key i.e. fid * * \retval 0, on success * \retval -ve, on error @@ -2871,18 +3076,18 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt, else current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK; #endif - down_write(&obj->oo_ext_idx_sem); - rc = osd_ea_add_rec(env, obj, child, name, th); - up_write(&obj->oo_ext_idx_sem); + cfs_down_write(&obj->oo_ext_idx_sem); + rc = osd_ea_add_rec(env, obj, child->oo_inode, name, rec, th); + cfs_up_write(&obj->oo_ext_idx_sem); #ifdef HAVE_QUOTA_SUPPORT current->cap_effective = save; #endif osd_object_put(env, child); /* xtime should not be updated with server-side time. */ - spin_lock(&obj->oo_guard); + cfs_spin_lock(&obj->oo_guard); inode->i_ctime = *ctime; inode->i_mtime = *mtime; - spin_unlock(&obj->oo_guard); + cfs_spin_unlock(&obj->oo_guard); mark_inode_dirty(inode); } else { rc = PTR_ERR(child); @@ -3183,7 +3388,7 @@ static struct dt_it *osd_it_ea_init(const struct lu_env *env, /** * Destroy or finishes iterator context. * - * \param di, struct osd_it_ea, iterator structure to be destroyed + * \param di iterator structure to be destroyed */ static void osd_it_ea_fini(const struct lu_env *env, struct dt_it *di) { @@ -3234,18 +3439,20 @@ static void osd_it_ea_put(const struct lu_env *env, struct dt_it *di) * iterator's in-memory data structure with required * information i.e. name, namelen, rec_size etc. * - * \param buf, in which information to be filled in. - * \param name, name of the file in given dir + * \param buf in which information to be filled in. + * \param name name of the file in given dir * - * \retval 0, on success - * \retval 1, on buffer full + * \retval 0 on success + * \retval 1 on buffer full */ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen, loff_t offset, __u64 ino, unsigned d_type) { - struct osd_it_ea *it = (struct osd_it_ea *)buf; - struct osd_it_ea_dirent *ent = it->oie_dirent; + struct osd_it_ea *it = (struct osd_it_ea *)buf; + struct osd_it_ea_dirent *ent = it->oie_dirent; + struct lu_fid *fid = &ent->oied_fid; + struct osd_fid_pack *rec; ENTRY; /* this should never happen */ @@ -3258,6 +3465,17 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen, OSD_IT_EA_BUFSIZE) RETURN(1); + if (d_type & LDISKFS_DIRENT_LUFID) { + rec = (struct osd_fid_pack*) (name + namelen + 1); + + if (osd_fid_unpack(fid, rec) != 0) + fid_zero(fid); + + d_type &= ~LDISKFS_DIRENT_LUFID; + } else { + fid_zero(fid); + } + ent->oied_ino = ino; ent->oied_off = offset; ent->oied_namelen = namelen; @@ -3266,7 +3484,7 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen, memcpy(ent->oied_name, name, namelen); it->oie_rd_dirent++; - it->oie_dirent = (void *) ent + size_round(sizeof(*ent) + namelen); + it->oie_dirent = (void *) ent + cfs_size_round(sizeof(*ent) + namelen); RETURN(0); } @@ -3274,10 +3492,10 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen, * Calls ->readdir() to load a directory entry at a time * and stored it in iterator's in-memory data structure. * - * \param di, struct osd_it_ea, iterator's in memory structure + * \param di iterator's in memory structure * - * \retval 0, on success - * \retval -ve, on error + * \retval 0 on success + * \retval -ve on error */ static int osd_ldiskfs_it_fill(const struct dt_it *di) { @@ -3290,11 +3508,11 @@ static int osd_ldiskfs_it_fill(const struct dt_it *di) it->oie_dirent = it->oie_buf; it->oie_rd_dirent = 0; - down_read(&obj->oo_ext_idx_sem); + cfs_down_read(&obj->oo_ext_idx_sem); result = inode->i_fop->readdir(&it->oie_file, it, (filldir_t) osd_ldiskfs_filldir); - up_read(&obj->oo_ext_idx_sem); + cfs_up_read(&obj->oo_ext_idx_sem); if (it->oie_rd_dirent == 0) { result = -EIO; @@ -3311,11 +3529,11 @@ static int osd_ldiskfs_it_fill(const struct dt_it *di) * to load a directory entry at a time and stored it in * iterator's in-memory data structure. * - * \param di, struct osd_it_ea, iterator's in memory structure + * \param di iterator's in memory structure * - * \retval +ve, iterator reached to end - * \retval 0, iterator not reached to end - * \retval -ve, on error + * \retval +ve iterator reached to end + * \retval 0 iterator not reached to end + * \retval -ve on error */ static int osd_it_ea_next(const struct lu_env *env, struct dt_it *di) { @@ -3325,9 +3543,10 @@ static int osd_it_ea_next(const struct lu_env *env, struct dt_it *di) ENTRY; if (it->oie_it_dirent < it->oie_rd_dirent) { - it->oie_dirent = (void *) it->oie_dirent + - size_round(sizeof(struct osd_it_ea_dirent) + - it->oie_dirent->oied_namelen); + it->oie_dirent = + (void *) it->oie_dirent + + cfs_size_round(sizeof(struct osd_it_ea_dirent) + + it->oie_dirent->oied_namelen); it->oie_it_dirent++; RETURN(0); } else { @@ -3343,7 +3562,7 @@ static int osd_it_ea_next(const struct lu_env *env, struct dt_it *di) /** * Returns the key at current position from iterator's in memory structure. * - * \param di, struct osd_it_ea, iterator's in memory structure + * \param di iterator's in memory structure * * \retval key i.e. struct dt_key on success */ @@ -3358,7 +3577,7 @@ static struct dt_key *osd_it_ea_key(const struct lu_env *env, /** * Returns the key's size at current position from iterator's in memory structure. * - * \param di, struct osd_it_ea, iterator's in memory structure + * \param di iterator's in memory structure * * \retval key_size i.e. struct dt_key on success */ @@ -3374,12 +3593,12 @@ static int osd_it_ea_key_size(const struct lu_env *env, const struct dt_it *di) * Returns the value (i.e. fid/igif) at current position from iterator's * in memory structure. * - * \param di, struct osd_it_ea, iterator's in memory structure - * \param attr, attr requested for dirent. - * \param lde, lustre dirent + * \param di struct osd_it_ea, iterator's in memory structure + * \param attr attr requested for dirent. + * \param lde lustre dirent * - * \retval 0, no error and \param lde has correct lustre dirent. - * \retval -ve, on error + * \retval 0 no error and \param lde has correct lustre dirent. + * \retval -ve on error */ static inline int osd_it_ea_rec(const struct lu_env *env, const struct dt_it *di, @@ -3388,13 +3607,13 @@ static inline int osd_it_ea_rec(const struct lu_env *env, { struct osd_it_ea *it = (struct osd_it_ea *)di; struct osd_object *obj = it->oie_obj; - struct osd_thread_info *info = osd_oti_get(env); - struct lu_fid *fid = &info->oti_fid; - int rc; + struct lu_fid *fid = &it->oie_dirent->oied_fid; + int rc = 0; ENTRY; - rc = osd_ea_fid_get(env, obj, it->oie_dirent->oied_ino, fid); + if (!fid_is_sane(fid)) + rc = osd_ea_fid_get(env, obj, it->oie_dirent->oied_ino, fid); if (rc == 0) osd_it_pack_dirent(lde, fid, it->oie_dirent->oied_off, @@ -3409,7 +3628,7 @@ static inline int osd_it_ea_rec(const struct lu_env *env, * Returns a cookie for current position of the iterator head, so that * user can use this cookie to load/start the iterator next time. * - * \param di, struct osd_it_ea, iterator's in memory structure + * \param di iterator's in memory structure * * \retval cookie for current position, on success */ @@ -3425,10 +3644,10 @@ static __u64 osd_it_ea_store(const struct lu_env *env, const struct dt_it *di) * to load a directory entry at a time and stored it i inn, * in iterator's in-memory data structure. * - * \param di, struct osd_it_ea, iterator's in memory structure + * \param di struct osd_it_ea, iterator's in memory structure * - * \retval +ve, on success - * \retval -ve, on error + * \retval +ve on success + * \retval -ve on error */ static int osd_it_ea_load(const struct lu_env *env, const struct dt_it *di, __u64 hash) @@ -3587,7 +3806,6 @@ static int osd_mount(const struct lu_env *env, struct lustre_sb_info *lsi; ENTRY; - if (o->od_mount != NULL) { CERROR("Already mounted (%s)\n", dev); RETURN(-EEXIST); @@ -3657,7 +3875,7 @@ static struct lu_device *osd_device_alloc(const struct lu_env *env, l = osd2lu_dev(o); l->ld_ops = &osd_lu_ops; o->od_dt_dev.dd_ops = &osd_dt_ops; - spin_lock_init(&o->od_osfs_lock); + cfs_spin_lock_init(&o->od_osfs_lock); o->od_osfs_age = cfs_time_shift_64(-1000); o->od_capa_hash = init_capa_hash(); if (o->od_capa_hash == NULL) { @@ -3710,10 +3928,6 @@ static int osd_process_config(const struct lu_env *env, static int osd_recovery_complete(const struct lu_env *env, struct lu_device *d) { - struct osd_device *o = osd_dev(d); - ENTRY; - /* TODO: orphans handling */ - ldiskfs_orphan_cleanup(osd_sb(o), LDISKFS_SB(osd_sb(o))->s_es); RETURN(0); }