4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * cl code shared between vvp and liblustre (and other Lustre clients in the
39 * Author: Nikita Danilov <nikita.danilov@sun.com>
42 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <libcfs/libcfs.h>
46 #include <linux/sched.h>
48 #include <linux/quotaops.h>
49 #include <linux/highmem.h>
50 #include <linux/pagemap.h>
51 #include <linux/rbtree.h>
54 #include <obd_support.h>
55 #include <lustre_fid.h>
56 #include <lustre_dlm.h>
57 #include <lustre_ver.h>
58 #include <lustre_mdc.h>
59 #include <cl_object.h>
61 #include "llite_internal.h"
64 * ccc_ prefix stands for "Common Client Code".
67 static struct kmem_cache *ccc_thread_kmem;
69 static struct lu_kmem_descr ccc_caches[] = {
71 .ckd_cache = &ccc_thread_kmem,
72 .ckd_name = "ccc_thread_kmem",
73 .ckd_size = sizeof (struct ccc_thread_info),
80 /*****************************************************************************
82 * Vvp device and device type functions.
86 void *ccc_key_init(const struct lu_context *ctx, struct lu_context_key *key)
88 struct ccc_thread_info *info;
90 OBD_SLAB_ALLOC_PTR_GFP(info, ccc_thread_kmem, GFP_NOFS);
92 info = ERR_PTR(-ENOMEM);
96 void ccc_key_fini(const struct lu_context *ctx,
97 struct lu_context_key *key, void *data)
99 struct ccc_thread_info *info = data;
100 OBD_SLAB_FREE_PTR(info, ccc_thread_kmem);
103 struct lu_context_key ccc_key = {
104 .lct_tags = LCT_CL_THREAD,
105 .lct_init = ccc_key_init,
106 .lct_fini = ccc_key_fini
110 * An `emergency' environment used by ccc_inode_fini() when cl_env_get()
111 * fails. Access to this environment is serialized by ccc_inode_fini_guard
114 static struct lu_env *ccc_inode_fini_env = NULL;
117 * A mutex serializing calls to slp_inode_fini() under extreme memory
118 * pressure, when environments cannot be allocated.
120 static DEFINE_MUTEX(ccc_inode_fini_guard);
121 static int dummy_refcheck;
123 int ccc_global_init(struct lu_device_type *device_type)
127 result = lu_kmem_init(ccc_caches);
131 result = lu_device_type_init(device_type);
135 ccc_inode_fini_env = cl_env_alloc(&dummy_refcheck,
136 LCT_REMEMBER|LCT_NOREF);
137 if (IS_ERR(ccc_inode_fini_env)) {
138 result = PTR_ERR(ccc_inode_fini_env);
142 ccc_inode_fini_env->le_ctx.lc_cookie = 0x4;
145 lu_device_type_fini(device_type);
147 lu_kmem_fini(ccc_caches);
151 void ccc_global_fini(struct lu_device_type *device_type)
153 if (ccc_inode_fini_env != NULL) {
154 cl_env_put(ccc_inode_fini_env, &dummy_refcheck);
155 ccc_inode_fini_env = NULL;
157 lu_device_type_fini(device_type);
158 lu_kmem_fini(ccc_caches);
161 int cl_setattr_ost(struct inode *inode, const struct iattr *attr,
162 struct obd_capa *capa)
171 env = cl_env_get(&refcheck);
173 RETURN(PTR_ERR(env));
175 io = ccc_env_thread_io(env);
176 io->ci_obj = ll_i2info(inode)->lli_clob;
178 io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime);
179 io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime);
180 io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
181 io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
182 io->u.ci_setattr.sa_valid = attr->ia_valid;
183 io->u.ci_setattr.sa_parent_fid = ll_inode2fid(inode);
184 io->u.ci_setattr.sa_capa = capa;
187 if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
188 struct vvp_io *vio = vvp_env_io(env);
190 if (attr->ia_valid & ATTR_FILE)
191 /* populate the file descriptor for ftruncate to honor
192 * group lock - see LU-787 */
193 vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file);
195 result = cl_io_loop(env, io);
197 result = io->ci_result;
200 if (unlikely(io->ci_need_restart))
202 /* HSM import case: file is released, cannot be restored
203 * no need to fail except if restore registration failed
205 if (result == -ENODATA && io->ci_restore_needed &&
206 io->ci_result != -ENODATA)
208 cl_env_put(env, &refcheck);
213 * Initialize or update CLIO structures for regular files when new
214 * meta-data arrives from the server.
216 * \param inode regular file inode
217 * \param md new file metadata from MDS
218 * - allocates cl_object if necessary,
219 * - updated layout, if object was already here.
221 int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
224 struct ll_inode_info *lli;
225 struct cl_object *clob;
226 struct lu_site *site;
228 struct cl_object_conf conf = {
237 LASSERT(md->body->mbo_valid & OBD_MD_FLID);
238 LASSERT(S_ISREG(inode->i_mode));
240 env = cl_env_get(&refcheck);
244 site = ll_i2sbi(inode)->ll_site;
245 lli = ll_i2info(inode);
247 LASSERT(fid_is_sane(fid));
249 if (lli->lli_clob == NULL) {
250 /* clob is slave of inode, empty lli_clob means for new inode,
251 * there is no clob in cache with the given fid, so it is
252 * unnecessary to perform lookup-alloc-lookup-insert, just
253 * alloc and insert directly. */
254 LASSERT(inode->i_state & I_NEW);
255 conf.coc_lu.loc_flags = LOC_F_NEW;
256 clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev),
260 * No locking is necessary, as new inode is
261 * locked by I_NEW bit.
263 lli->lli_clob = clob;
264 lli->lli_has_smd = lsm_has_objects(md->lsm);
265 lu_object_ref_add(&clob->co_lu, "inode", inode);
267 result = PTR_ERR(clob);
269 result = cl_conf_set(env, lli->lli_clob, &conf);
272 cl_env_put(env, &refcheck);
275 CERROR("Failure to initialize cl object "DFID": %d\n",
281 * Wait for others drop their references of the object at first, then we drop
282 * the last one, which will lead to the object be destroyed immediately.
283 * Must be called after cl_object_kill() against this object.
285 * The reason we want to do this is: destroying top object will wait for sub
286 * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs)
287 * to initiate top object destroying which may deadlock. See bz22520.
289 static void cl_object_put_last(struct lu_env *env, struct cl_object *obj)
291 struct lu_object_header *header = obj->co_lu.lo_header;
294 if (unlikely(atomic_read(&header->loh_ref) != 1)) {
295 struct lu_site *site = obj->co_lu.lo_dev->ld_site;
296 struct lu_site_bkt_data *bkt;
298 bkt = lu_site_bkt_from_fid(site, &header->loh_fid);
300 init_waitqueue_entry_current(&waiter);
301 add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
304 set_current_state(TASK_UNINTERRUPTIBLE);
305 if (atomic_read(&header->loh_ref) == 1)
307 waitq_wait(&waiter, TASK_UNINTERRUPTIBLE);
310 set_current_state(TASK_RUNNING);
311 remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
314 cl_object_put(env, obj);
317 void cl_inode_fini(struct inode *inode)
320 struct ll_inode_info *lli = ll_i2info(inode);
321 struct cl_object *clob = lli->lli_clob;
328 cookie = cl_env_reenter();
329 env = cl_env_get(&refcheck);
330 emergency = IS_ERR(env);
332 mutex_lock(&ccc_inode_fini_guard);
333 LASSERT(ccc_inode_fini_env != NULL);
334 cl_env_implant(ccc_inode_fini_env, &refcheck);
335 env = ccc_inode_fini_env;
338 * cl_object cache is a slave to inode cache (which, in turn
339 * is a slave to dentry cache), don't keep cl_object in memory
340 * when its master is evicted.
342 cl_object_kill(env, clob);
343 lu_object_ref_del(&clob->co_lu, "inode", inode);
344 cl_object_put_last(env, clob);
345 lli->lli_clob = NULL;
347 cl_env_unplant(ccc_inode_fini_env, &refcheck);
348 mutex_unlock(&ccc_inode_fini_guard);
350 cl_env_put(env, &refcheck);
351 cl_env_reexit(cookie);
356 * return IF_* type for given lu_dirent entry.
357 * IF_* flag shld be converted to particular OS file type in
358 * platform llite module.
360 __u16 ll_dirent_type_get(struct lu_dirent *ent)
363 struct luda_type *lt;
366 if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) {
367 const unsigned align = sizeof(struct luda_type) - 1;
369 len = le16_to_cpu(ent->lde_namelen);
370 len = (len + align) & ~align;
371 lt = (void *)ent->lde_name + len;
372 type = IFTODT(le16_to_cpu(lt->lt_type));
378 * build inode number from passed @fid */
379 __u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
381 if (BITS_PER_LONG == 32 || api32)
382 RETURN(fid_flatten32(fid));
384 RETURN(fid_flatten(fid));
388 * build inode generation from passed @fid. If our FID overflows the 32-bit
389 * inode number then return a non-zero generation to distinguish them. */
390 __u32 cl_fid_build_gen(const struct lu_fid *fid)
395 if (fid_is_igif(fid)) {
396 gen = lu_igif_gen(fid);
400 gen = (fid_flatten(fid) >> 32);
404 /* lsm is unreliable after hsm implementation as layout can be changed at
405 * any time. This is only to support old, non-clio-ized interfaces. It will
406 * cause deadlock if clio operations are called with this extra layout refcount
407 * because in case the layout changed during the IO, ll_layout_refresh() will
408 * have to wait for the refcount to become zero to destroy the older layout.
410 * Notice that the lsm returned by this function may not be valid unless called
411 * inside layout lock - MDS_INODELOCK_LAYOUT. */
412 struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode)
414 return lov_lsm_get(ll_i2info(inode)->lli_clob);
417 void inline ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm)
419 lov_lsm_put(ll_i2info(inode)->lli_clob, lsm);