X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flclient%2Flcommon_cl.c;h=fede3b43c16ed4f753558f562150bc32cea43f9e;hp=d2a4d1847746ad1857bfdbbdec7d074fb1e8f4c3;hb=96a5daa0c08d7b42ec368080a2a7f0dfb110ef98;hpb=c5607338d96aa222319bebe58aa16785f463315e diff --git a/lustre/lclient/lcommon_cl.c b/lustre/lclient/lcommon_cl.c index d2a4d18..fede3b4 100644 --- a/lustre/lclient/lcommon_cl.c +++ b/lustre/lclient/lcommon_cl.c @@ -26,7 +26,7 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. */ /* @@ -60,16 +60,6 @@ #include #include #include -# include -# ifdef HAVE_XTIO_H -# include -# endif -# include -# include -# include -# ifdef HAVE_FILE_H -# include -# endif # include #endif @@ -300,7 +290,7 @@ static struct lu_env *ccc_inode_fini_env = NULL; * A mutex serializing calls to slp_inode_fini() under extreme memory * pressure, when environments cannot be allocated. */ -static DEFINE_MUTEX(ccc_inode_fini_guard); +static CFS_DEFINE_MUTEX(ccc_inode_fini_guard); static int dummy_refcheck; int ccc_global_init(struct lu_device_type *device_type) @@ -601,7 +591,6 @@ int ccc_lock_wait(const struct lu_env *env, const struct cl_lock_slice *slice) * cached lock "fits" into io. * * \param slice lock to be checked - * * \param io IO that wants a lock. * * \see lov_lock_fits_into(). @@ -626,12 +615,10 @@ int ccc_lock_fits_into(const struct lu_env *env, */ if (cio->cui_glimpse) result = descr->cld_mode != CLM_WRITE; + /* * Also, don't match incomplete write locks for read, otherwise read * would enqueue missing sub-locks in the write mode. - * - * XXX this is a candidate for generic locking policy, to be moved - * into cl_lock_lookup(). */ else if (need->cld_mode != descr->cld_mode) result = lock->cll_state >= CLS_ENQUEUED; @@ -685,16 +672,18 @@ void ccc_lock_state(const struct lu_env *env, if (rc == 0) { if (lock->cll_descr.cld_start == 0 && lock->cll_descr.cld_end == CL_PAGE_EOF) { - cl_isize_write(inode, attr->cat_kms); - CDEBUG(D_INODE, DFID" updating i_size %llu\n", + cl_isize_write_nolock(inode, attr->cat_kms); + CDEBUG(D_INODE|D_VFSTRACE, + DFID" updating i_size "LPU64"\n", PFID(lu_object_fid(&obj->co_lu)), (__u64)cl_isize_read(inode)); } cl_inode_mtime(inode) = attr->cat_mtime; cl_inode_atime(inode) = attr->cat_atime; cl_inode_ctime(inode) = attr->cat_ctime; - } else - CL_LOCK_DEBUG(D_ERROR, env, lock, "attr_get: %i\n", rc); + } else { + CL_LOCK_DEBUG(D_INFO, env, lock, "attr_get: %d\n", rc); + } cl_object_attr_unlock(obj); cl_isize_unlock(inode, 0); } @@ -725,15 +714,21 @@ int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io, CLOBINVRNT(env, obj, ccc_object_invariant(obj)); ENTRY; - CDEBUG(D_VFSTRACE, "lock: %i [%lu, %lu]\n", mode, start, end); + CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end); memset(&cio->cui_link, 0, sizeof cio->cui_link); - descr->cld_mode = mode; + + if (cio->cui_fd && (cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + descr->cld_mode = CLM_GROUP; + descr->cld_gid = cio->cui_fd->fd_grouplock.cg_gid; + } else { + descr->cld_mode = mode; + } descr->cld_obj = obj; descr->cld_start = start; descr->cld_end = end; + descr->cld_enq_flags = enqflags; - cio->cui_link.cill_enq_flags = enqflags; cl_io_lock_add(env, io, &cio->cui_link); RETURN(0); } @@ -745,12 +740,9 @@ void ccc_io_update_iov(const struct lu_env *env, size_t size = io->u.ci_rw.crw_count; cio->cui_iov_olen = 0; - if (cl_io_is_sendfile(io) || size == cio->cui_tot_count) + if (!cl_is_normalio(env, io)) return; - if (cio->cui_tot_nrsegs == 0) - cio->cui_tot_nrsegs = cio->cui_nrsegs; - for (i = 0; i < cio->cui_tot_nrsegs; i++) { struct iovec *iv = &cio->cui_iov[i]; @@ -793,7 +785,7 @@ void ccc_io_advance(const struct lu_env *env, CLOBINVRNT(env, obj, ccc_object_invariant(obj)); - if (!cl_io_is_sendfile(io) && io->ci_continue) { + if (cl_is_normalio(env, io) && io->ci_continue) { /* update the iov */ LASSERT(cio->cui_tot_nrsegs >= cio->cui_nrsegs); LASSERT(cio->cui_tot_count >= nob); @@ -928,9 +920,14 @@ int ccc_prep_size(const struct lu_env *env, struct cl_object *obj, */ if (cl_isize_read(inode) < kms) { if (vfslock) - cl_isize_write(inode, kms); - else cl_isize_write_nolock(inode, kms); + else + cl_isize_write(inode, kms); + CDEBUG(D_VFSTRACE, + DFID" updating i_size "LPU64"\n", + PFID(lu_object_fid(&obj->co_lu)), + (__u64)cl_isize_read(inode)); + } } } @@ -961,13 +958,15 @@ void ccc_req_completion(const struct lu_env *env, * * - o_mode * - * - o_fid (filled with inode number?!) + * - o_parent_seq * * - o_[ug]id * - * - o_generation + * - o_parent_oid + * + * - o_parent_ver * - * - and IO epoch (stored in o_easize), + * - o_ioepoch, * * and capability. */ @@ -995,13 +994,24 @@ void ccc_req_attr_set(const struct lu_env *env, if (slice->crs_req->crq_type == CRT_WRITE) { if (flags & OBD_MD_FLEPOCH) { oa->o_valid |= OBD_MD_FLEPOCH; - oa->o_easize = cl_i2info(inode)->lli_ioepoch; + oa->o_ioepoch = cl_i2info(inode)->lli_ioepoch; valid_flags |= OBD_MD_FLMTIME|OBD_MD_FLCTIME| - OBD_MD_FLUID|OBD_MD_FLGID| - OBD_MD_FLFID|OBD_MD_FLGENER; + OBD_MD_FLUID|OBD_MD_FLGID; + } + } + obdo_from_inode(oa, inode, &cl_i2info(inode)->lli_fid, + valid_flags & flags); +#ifdef __KERNEL__ + /* Bug11742 - set the OBD_FL_MMAP flag for memory mapped files */ + if (cfs_atomic_read(&(cl_inode2ccc(inode)->cob_mmap_cnt)) != 0) { + if (!(oa->o_valid & OBD_MD_FLFLAGS)) { + oa->o_valid |= OBD_MD_FLFLAGS; + oa->o_flags = OBD_FL_MMAP; + } else { + oa->o_flags |= OBD_FL_MMAP; } } - obdo_from_inode(oa, inode, valid_flags & flags); +#endif } const struct cl_req_operations ccc_req_ops = { @@ -1009,9 +1019,8 @@ const struct cl_req_operations ccc_req_ops = { .cro_completion = ccc_req_completion }; -/* Setattr helpers */ -int cl_setattr_do_truncate(struct inode *inode, loff_t size, - struct obd_capa *capa) +int cl_setattr_ost(struct inode *inode, const struct iattr *attr, + struct obd_capa *capa) { struct lu_env *env; struct cl_io *io; @@ -1024,11 +1033,17 @@ int cl_setattr_do_truncate(struct inode *inode, loff_t size, if (IS_ERR(env)) RETURN(PTR_ERR(env)); - io = &ccc_env_info(env)->cti_io; + io = ccc_env_thread_io(env); io->ci_obj = cl_i2info(inode)->lli_clob; - io->u.ci_truncate.tr_size = size; - io->u.ci_truncate.tr_capa = capa; - if (cl_io_init(env, io, CIT_TRUNC, io->ci_obj) == 0) + + io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime); + io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime); + io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime); + io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size; + io->u.ci_setattr.sa_valid = attr->ia_valid; + io->u.ci_setattr.sa_capa = capa; + + if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) result = cl_io_loop(env, io); else result = io->ci_result; @@ -1037,45 +1052,6 @@ int cl_setattr_do_truncate(struct inode *inode, loff_t size, RETURN(result); } -int cl_setattr_ost(struct inode *inode, struct obd_capa *capa) -{ - struct cl_inode_info *lli = cl_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - int rc; - obd_flag flags; - struct obd_info oinfo = { { { 0 } } }; - struct obdo *oa; - - OBDO_ALLOC(oa); - if (oa) { - oa->o_id = lsm->lsm_object_id; - oa->o_gr = lsm->lsm_object_gr; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; - - flags = OBD_MD_FLTYPE | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLFID | OBD_MD_FLGENER | - OBD_MD_FLGROUP; - - obdo_from_inode(oa, inode, flags); - - oinfo.oi_oa = oa; - oinfo.oi_md = lsm; - oinfo.oi_capa = capa; - - /* XXX: this looks unnecessary now. */ - rc = obd_setattr_rqset(cl_i2sbi(inode)->ll_dt_exp, &oinfo, - NULL); - if (rc) - CERROR("obd_setattr_async fails: rc=%d\n", rc); - OBDO_FREE(oa); - } else { - rc = -ENOMEM; - } - return rc; -} - - /***************************************************************************** * * Type conversions. @@ -1230,6 +1206,43 @@ int cl_inode_init(struct inode *inode, struct lustre_md *md) return result; } +/** + * Wait for others drop their references of the object at first, then we drop + * the last one, which will lead to the object be destroyed immediately. + * Must be called after cl_object_kill() against this object. + * + * The reason we want to do this is: destroying top object will wait for sub + * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs) + * to initiate top object destroying which may deadlock. See bz22520. + */ +static void cl_object_put_last(struct lu_env *env, struct cl_object *obj) +{ + struct lu_object_header *header = obj->co_lu.lo_header; + cfs_waitlink_t waiter; + + if (unlikely(cfs_atomic_read(&header->loh_ref) != 1)) { + struct lu_site *site = obj->co_lu.lo_dev->ld_site; + struct lu_site_bkt_data *bkt; + + bkt = lu_site_bkt_from_fid(site, &header->loh_fid); + + cfs_waitlink_init(&waiter); + cfs_waitq_add(&bkt->lsb_marche_funebre, &waiter); + + while (1) { + cfs_set_current_state(CFS_TASK_UNINT); + if (cfs_atomic_read(&header->loh_ref) == 1) + break; + cfs_waitq_wait(&waiter, CFS_TASK_UNINT); + } + + cfs_set_current_state(CFS_TASK_RUNNING); + cfs_waitq_del(&bkt->lsb_marche_funebre, &waiter); + } + + cl_object_put(env, obj); +} + void cl_inode_fini(struct inode *inode) { struct lu_env *env; @@ -1245,7 +1258,7 @@ void cl_inode_fini(struct inode *inode) env = cl_env_get(&refcheck); emergency = IS_ERR(env); if (emergency) { - mutex_lock(&ccc_inode_fini_guard); + cfs_mutex_lock(&ccc_inode_fini_guard); LASSERT(ccc_inode_fini_env != NULL); cl_env_implant(ccc_inode_fini_env, &refcheck); env = ccc_inode_fini_env; @@ -1257,11 +1270,11 @@ void cl_inode_fini(struct inode *inode) */ cl_object_kill(env, clob); lu_object_ref_del(&clob->co_lu, "inode", inode); - cl_object_put(env, clob); + cl_object_put_last(env, clob); lli->lli_clob = NULL; if (emergency) { cl_env_unplant(ccc_inode_fini_env, &refcheck); - mutex_unlock(&ccc_inode_fini_guard); + cfs_mutex_unlock(&ccc_inode_fini_guard); } else cl_env_put(env, &refcheck); cl_env_reexit(cookie); @@ -1289,3 +1302,34 @@ __u16 ll_dirent_type_get(struct lu_dirent *ent) } return type; } + +/** + * build inode number from passed @fid */ +__u64 cl_fid_build_ino(const struct lu_fid *fid, int need_32bit) +{ +#if BITS_PER_LONG == 32 + RETURN(fid_flatten32(fid)); +#else + if (need_32bit) + RETURN(fid_flatten32(fid)); + else + RETURN(fid_flatten(fid)); +#endif +} + +/** + * build inode generation from passed @fid. If our FID overflows the 32-bit + * inode number then return a non-zero generation to distinguish them. */ +__u32 cl_fid_build_gen(const struct lu_fid *fid) +{ + __u32 gen; + ENTRY; + + if (fid_is_igif(fid)) { + gen = lu_igif_gen(fid); + RETURN(gen); + } + + gen = (fid_flatten(fid) >> 32); + RETURN(gen); +}