X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flclient%2Flcommon_cl.c;h=16d7fbd91ae6d87c8be65f82fa8be882bd51243e;hp=dc2be2f68032f176d608429d88dc063d7759ca1e;hb=7e2ecb0b3a79b27a002ec42106e43c43a6e5dbaf;hpb=02b25c91f1d06bd352fcc1a9ae91eeaabfc9f698 diff --git a/lustre/lclient/lcommon_cl.c b/lustre/lclient/lcommon_cl.c index dc2be2f..16d7fbd 100644 --- a/lustre/lclient/lcommon_cl.c +++ b/lustre/lclient/lcommon_cl.c @@ -26,10 +26,13 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. */ /* + * Copyright (c) 2011 Whamcloud, Inc. + */ +/* * This file is part of Lustre, http://www.lustre.org/ * Lustre is a trademark of Sun Microsystems, Inc. * @@ -60,16 +63,6 @@ #include #include #include -# include -# ifdef HAVE_XTIO_H -# include -# endif -# include -# include -# include -# ifdef HAVE_FILE_H -# include -# endif # include #endif @@ -300,7 +293,7 @@ static struct lu_env *ccc_inode_fini_env = NULL; * A mutex serializing calls to slp_inode_fini() under extreme memory * pressure, when environments cannot be allocated. */ -static DEFINE_MUTEX(ccc_inode_fini_guard); +static CFS_DEFINE_MUTEX(ccc_inode_fini_guard); static int dummy_refcheck; int ccc_global_init(struct lu_device_type *device_type) @@ -308,15 +301,26 @@ int ccc_global_init(struct lu_device_type *device_type) int result; result = lu_kmem_init(ccc_caches); - if (result == 0) { - result = lu_device_type_init(device_type); - ccc_inode_fini_env = cl_env_alloc(&dummy_refcheck, - LCT_REMEMBER|LCT_NOREF); - if (IS_ERR(ccc_inode_fini_env)) - result = PTR_ERR(ccc_inode_fini_env); - else - ccc_inode_fini_env->le_ctx.lc_cookie = 0x4; + if (result) + return result; + + result = lu_device_type_init(device_type); + if (result) + goto out_kmem; + + ccc_inode_fini_env = cl_env_alloc(&dummy_refcheck, + LCT_REMEMBER|LCT_NOREF); + if (IS_ERR(ccc_inode_fini_env)) { + result = PTR_ERR(ccc_inode_fini_env); + goto out_device; } + + ccc_inode_fini_env->le_ctx.lc_cookie = 0x4; + return 0; +out_device: + lu_device_type_fini(device_type); +out_kmem: + lu_kmem_fini(ccc_caches); return result; } @@ -629,9 +633,6 @@ int ccc_lock_fits_into(const struct lu_env *env, /* * Also, don't match incomplete write locks for read, otherwise read * would enqueue missing sub-locks in the write mode. - * - * XXX this is a candidate for generic locking policy, to be moved - * into cl_lock_lookup(). */ else if (need->cld_mode != descr->cld_mode) result = lock->cll_state >= CLS_ENQUEUED; @@ -685,16 +686,18 @@ void ccc_lock_state(const struct lu_env *env, if (rc == 0) { if (lock->cll_descr.cld_start == 0 && lock->cll_descr.cld_end == CL_PAGE_EOF) { - cl_isize_write(inode, attr->cat_kms); - CDEBUG(D_INODE, DFID" updating i_size %llu\n", + cl_isize_write_nolock(inode, attr->cat_kms); + CDEBUG(D_INODE|D_VFSTRACE, + DFID" updating i_size "LPU64"\n", PFID(lu_object_fid(&obj->co_lu)), (__u64)cl_isize_read(inode)); } cl_inode_mtime(inode) = attr->cat_mtime; cl_inode_atime(inode) = attr->cat_atime; cl_inode_ctime(inode) = attr->cat_ctime; - } else - CL_LOCK_DEBUG(D_ERROR, env, lock, "attr_get: %i\n", rc); + } else { + CL_LOCK_DEBUG(D_INFO, env, lock, "attr_get: %d\n", rc); + } cl_object_attr_unlock(obj); cl_isize_unlock(inode, 0); } @@ -725,7 +728,7 @@ int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io, CLOBINVRNT(env, obj, ccc_object_invariant(obj)); ENTRY; - CDEBUG(D_VFSTRACE, "lock: %i [%lu, %lu]\n", mode, start, end); + CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end); memset(&cio->cui_link, 0, sizeof cio->cui_link); @@ -738,8 +741,8 @@ int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io, descr->cld_obj = obj; descr->cld_start = start; descr->cld_end = end; + descr->cld_enq_flags = enqflags; - cio->cui_link.cill_enq_flags = enqflags; cl_io_lock_add(env, io, &cio->cui_link); RETURN(0); } @@ -751,12 +754,9 @@ void ccc_io_update_iov(const struct lu_env *env, size_t size = io->u.ci_rw.crw_count; cio->cui_iov_olen = 0; - if (!cl_is_normalio(env, io) || size == cio->cui_tot_count) + if (!cl_is_normalio(env, io)) return; - if (cio->cui_tot_nrsegs == 0) - cio->cui_tot_nrsegs = cio->cui_nrsegs; - for (i = 0; i < cio->cui_tot_nrsegs; i++) { struct iovec *iv = &cio->cui_iov[i]; @@ -934,9 +934,14 @@ int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, */ if (cl_isize_read(inode) < kms) { if (vfslock) - cl_isize_write(inode, kms); - else cl_isize_write_nolock(inode, kms); + else + cl_isize_write(inode, kms); + CDEBUG(D_VFSTRACE, + DFID" updating i_size "LPU64"\n", + PFID(lu_object_fid(&obj->co_lu)), + (__u64)cl_isize_read(inode)); + } } } @@ -955,6 +960,9 @@ void ccc_req_completion(const struct lu_env *env, { struct ccc_req *vrq; + if (ioret > 0) + cl_stats_tally(slice->crs_dev, slice->crs_req->crq_type, ioret); + vrq = cl2ccc_req(slice); OBD_SLAB_FREE_PTR(vrq, ccc_req_kmem); } @@ -967,13 +975,15 @@ void ccc_req_completion(const struct lu_env *env, * * - o_mode * - * - o_fid (filled with inode number?!) + * - o_parent_seq * * - o_[ug]id * - * - o_generation + * - o_parent_oid + * + * - o_parent_ver * - * - and IO epoch (stored in o_easize), + * - o_ioepoch, * * and capability. */ @@ -1001,13 +1011,24 @@ void ccc_req_attr_set(const struct lu_env *env, if (slice->crs_req->crq_type == CRT_WRITE) { if (flags & OBD_MD_FLEPOCH) { oa->o_valid |= OBD_MD_FLEPOCH; - oa->o_easize = cl_i2info(inode)->lli_ioepoch; + oa->o_ioepoch = cl_i2info(inode)->lli_ioepoch; valid_flags |= OBD_MD_FLMTIME|OBD_MD_FLCTIME| - OBD_MD_FLUID|OBD_MD_FLGID| - OBD_MD_FLFID|OBD_MD_FLGENER; + OBD_MD_FLUID|OBD_MD_FLGID; } } - obdo_from_inode(oa, inode, valid_flags & flags); + obdo_from_inode(oa, inode, &cl_i2info(inode)->lli_fid, + valid_flags & flags); +#ifdef __KERNEL__ + /* Bug11742 - set the OBD_FL_MMAP flag for memory mapped files */ + if (cfs_atomic_read(&(cl_inode2ccc(inode)->cob_mmap_cnt)) != 0) { + if (!(oa->o_valid & OBD_MD_FLFLAGS)) { + oa->o_valid |= OBD_MD_FLFLAGS; + oa->o_flags = OBD_FL_MMAP; + } else { + oa->o_flags |= OBD_FL_MMAP; + } + } +#endif } const struct cl_req_operations ccc_req_ops = { @@ -1015,9 +1036,8 @@ const struct cl_req_operations ccc_req_ops = { .cro_completion = ccc_req_completion }; -/* Setattr helpers */ -int cl_setattr_do_truncate(struct inode *inode, loff_t size, - struct obd_capa *capa) +int cl_setattr_ost(struct inode *inode, const struct iattr *attr, + struct obd_capa *capa) { struct lu_env *env; struct cl_io *io; @@ -1030,11 +1050,17 @@ int cl_setattr_do_truncate(struct inode *inode, loff_t size, if (IS_ERR(env)) RETURN(PTR_ERR(env)); - io = &ccc_env_info(env)->cti_io; + io = ccc_env_thread_io(env); io->ci_obj = cl_i2info(inode)->lli_clob; - io->u.ci_truncate.tr_size = size; - io->u.ci_truncate.tr_capa = capa; - if (cl_io_init(env, io, CIT_TRUNC, io->ci_obj) == 0) + + io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime); + io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime); + io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime); + io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size; + io->u.ci_setattr.sa_valid = attr->ia_valid; + io->u.ci_setattr.sa_capa = capa; + + if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) result = cl_io_loop(env, io); else result = io->ci_result; @@ -1043,45 +1069,6 @@ int cl_setattr_do_truncate(struct inode *inode, loff_t size, RETURN(result); } -int cl_setattr_ost(struct inode *inode, struct obd_capa *capa) -{ - struct cl_inode_info *lli = cl_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - int rc; - obd_flag flags; - struct obd_info oinfo = { { { 0 } } }; - struct obdo *oa; - - OBDO_ALLOC(oa); - if (oa) { - oa->o_id = lsm->lsm_object_id; - oa->o_gr = lsm->lsm_object_gr; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; - - flags = OBD_MD_FLTYPE | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLFID | OBD_MD_FLGENER | - OBD_MD_FLGROUP; - - obdo_from_inode(oa, inode, flags); - - oinfo.oi_oa = oa; - oinfo.oi_md = lsm; - oinfo.oi_capa = capa; - - /* XXX: this looks unnecessary now. */ - rc = obd_setattr_rqset(cl_i2sbi(inode)->ll_dt_exp, &oinfo, - NULL); - if (rc) - CERROR("obd_setattr_async fails: rc=%d\n", rc); - OBDO_FREE(oa); - } else { - rc = -ENOMEM; - } - return rc; -} - - /***************************************************************************** * * Type conversions. @@ -1188,7 +1175,7 @@ int cl_inode_init(struct inode *inode, struct lustre_md *md) struct cl_object *clob; struct lu_site *site; struct lu_fid *fid; - const struct cl_object_conf conf = { + struct cl_object_conf conf = { .coc_inode = inode, .u = { .coc_md = md @@ -1197,7 +1184,6 @@ int cl_inode_init(struct inode *inode, struct lustre_md *md) int result = 0; int refcheck; - /* LASSERT(inode->i_state & I_NEW); */ LASSERT(md->body->valid & OBD_MD_FLID); if (!S_ISREG(cl_inode_mode(inode))) @@ -1213,6 +1199,14 @@ int cl_inode_init(struct inode *inode, struct lustre_md *md) LASSERT(fid_is_sane(fid)); if (lli->lli_clob == NULL) { + /* clob is slave of inode, empty lli_clob means for new inode, + * there is no clob in cache with the given fid, so it is + * unnecessary to perform lookup-alloc-lookup-insert, just + * alloc and insert directly. */ +#ifdef __KERNEL__ + LASSERT(inode->i_state & I_NEW); +#endif + conf.coc_lu.loc_flags = LOC_F_NEW; clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev), fid, &conf); if (!IS_ERR(clob)) { @@ -1236,6 +1230,43 @@ int cl_inode_init(struct inode *inode, struct lustre_md *md) return result; } +/** + * Wait for others drop their references of the object at first, then we drop + * the last one, which will lead to the object be destroyed immediately. + * Must be called after cl_object_kill() against this object. + * + * The reason we want to do this is: destroying top object will wait for sub + * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs) + * to initiate top object destroying which may deadlock. See bz22520. + */ +static void cl_object_put_last(struct lu_env *env, struct cl_object *obj) +{ + struct lu_object_header *header = obj->co_lu.lo_header; + cfs_waitlink_t waiter; + + if (unlikely(cfs_atomic_read(&header->loh_ref) != 1)) { + struct lu_site *site = obj->co_lu.lo_dev->ld_site; + struct lu_site_bkt_data *bkt; + + bkt = lu_site_bkt_from_fid(site, &header->loh_fid); + + cfs_waitlink_init(&waiter); + cfs_waitq_add(&bkt->lsb_marche_funebre, &waiter); + + while (1) { + cfs_set_current_state(CFS_TASK_UNINT); + if (cfs_atomic_read(&header->loh_ref) == 1) + break; + cfs_waitq_wait(&waiter, CFS_TASK_UNINT); + } + + cfs_set_current_state(CFS_TASK_RUNNING); + cfs_waitq_del(&bkt->lsb_marche_funebre, &waiter); + } + + cl_object_put(env, obj); +} + void cl_inode_fini(struct inode *inode) { struct lu_env *env; @@ -1251,7 +1282,7 @@ void cl_inode_fini(struct inode *inode) env = cl_env_get(&refcheck); emergency = IS_ERR(env); if (emergency) { - mutex_lock(&ccc_inode_fini_guard); + cfs_mutex_lock(&ccc_inode_fini_guard); LASSERT(ccc_inode_fini_env != NULL); cl_env_implant(ccc_inode_fini_env, &refcheck); env = ccc_inode_fini_env; @@ -1263,11 +1294,11 @@ void cl_inode_fini(struct inode *inode) */ cl_object_kill(env, clob); lu_object_ref_del(&clob->co_lu, "inode", inode); - cl_object_put(env, clob); + cl_object_put_last(env, clob); lli->lli_clob = NULL; if (emergency) { cl_env_unplant(ccc_inode_fini_env, &refcheck); - mutex_unlock(&ccc_inode_fini_guard); + cfs_mutex_unlock(&ccc_inode_fini_guard); } else cl_env_put(env, &refcheck); cl_env_reexit(cookie); @@ -1298,25 +1329,12 @@ __u16 ll_dirent_type_get(struct lu_dirent *ent) /** * build inode number from passed @fid */ -ino_t cl_fid_build_ino(const struct lu_fid *fid) +__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32) { - ino_t ino; - ENTRY; - - if (fid_is_igif(fid)) { - ino = lu_igif_ino(fid); - RETURN(ino); - } - - /* Very stupid and having many downsides inode allocation algorithm - * based on fid. */ - ino = fid_flatten(fid) & 0xFFFFFFFF; - - if (unlikely(ino == 0)) - /* the first result ino is 0xFFC001, so this is rarely used */ - ino = 0xffbcde; - ino = ino | 0x80000000; - RETURN(ino); + if (BITS_PER_LONG == 32 || api32) + RETURN(fid_flatten32(fid)); + else + RETURN(fid_flatten(fid)); } /**