4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include <linux/user_namespace.h>
49 #ifdef HAVE_UIDGID_HEADER
50 # include <linux/uidgid.h>
52 #include <lustre/ll_fiemap.h>
53 #include <lustre_ioctl.h>
55 #include "cl_object.h"
57 #include "llite_internal.h"
58 #include "vvp_internal.h"
61 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
63 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
66 static enum llioc_iter
67 ll_iocontrol_call(struct inode *inode, struct file *file,
68 unsigned int cmd, unsigned long arg, int *rcp);
70 static struct ll_file_data *ll_file_data_get(void)
72 struct ll_file_data *fd;
74 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
78 fd->fd_write_failed = false;
83 static void ll_file_data_put(struct ll_file_data *fd)
86 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
89 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
90 struct lustre_handle *fh)
92 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
93 op_data->op_attr.ia_mode = inode->i_mode;
94 op_data->op_attr.ia_atime = inode->i_atime;
95 op_data->op_attr.ia_mtime = inode->i_mtime;
96 op_data->op_attr.ia_ctime = inode->i_ctime;
97 op_data->op_attr.ia_size = i_size_read(inode);
98 op_data->op_attr_blocks = inode->i_blocks;
99 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
101 op_data->op_handle = *fh;
102 op_data->op_capa1 = ll_mdscapa_get(inode);
104 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
105 op_data->op_bias |= MDS_DATA_MODIFIED;
109 * Packs all the attributes into @op_data for the CLOSE rpc.
111 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
112 struct obd_client_handle *och)
116 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
117 ATTR_MTIME | ATTR_MTIME_SET |
118 ATTR_CTIME | ATTR_CTIME_SET;
120 if (!(och->och_flags & FMODE_WRITE))
123 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
126 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
127 ll_prep_md_op_data(op_data, inode, NULL, NULL,
128 0, 0, LUSTRE_OPC_ANY, NULL);
133 * Perform a close, possibly with a bias.
134 * The meaning of "data" depends on the value of "bias".
136 * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
137 * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
140 static int ll_close_inode_openhandle(struct obd_export *md_exp,
141 struct obd_client_handle *och,
143 enum mds_op_bias bias,
146 struct obd_export *exp = ll_i2mdexp(inode);
147 struct md_op_data *op_data;
148 struct ptlrpc_request *req = NULL;
149 struct obd_device *obd = class_exp2obd(exp);
155 * XXX: in case of LMV, is this correct to access
158 CERROR("Invalid MDC connection handle "LPX64"\n",
159 ll_i2mdexp(inode)->exp_handle.h_cookie);
163 OBD_ALLOC_PTR(op_data);
165 /* XXX We leak openhandle and request here. */
166 GOTO(out, rc = -ENOMEM);
168 ll_prepare_close(inode, op_data, och);
170 case MDS_CLOSE_LAYOUT_SWAP:
171 LASSERT(data != NULL);
172 op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
173 op_data->op_data_version = 0;
174 op_data->op_lease_handle = och->och_lease_handle;
175 op_data->op_fid2 = *ll_inode2fid(data);
178 case MDS_HSM_RELEASE:
179 LASSERT(data != NULL);
180 op_data->op_bias |= MDS_HSM_RELEASE;
181 op_data->op_data_version = *(__u64 *)data;
182 op_data->op_lease_handle = och->och_lease_handle;
183 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
187 LASSERT(data == NULL);
191 rc = md_close(md_exp, op_data, och->och_mod, &req);
193 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
194 ll_i2mdexp(inode)->exp_obd->obd_name,
195 PFID(ll_inode2fid(inode)), rc);
198 /* DATA_MODIFIED flag was successfully sent on close, cancel data
199 * modification flag. */
200 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
201 struct ll_inode_info *lli = ll_i2info(inode);
203 spin_lock(&lli->lli_lock);
204 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
205 spin_unlock(&lli->lli_lock);
209 op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
210 struct mdt_body *body;
212 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
213 if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
217 ll_finish_md_op_data(op_data);
221 md_clear_open_replay_data(md_exp, och);
222 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
225 if (req) /* This is close request */
226 ptlrpc_req_finished(req);
230 int ll_md_real_close(struct inode *inode, fmode_t fmode)
232 struct ll_inode_info *lli = ll_i2info(inode);
233 struct obd_client_handle **och_p;
234 struct obd_client_handle *och;
239 if (fmode & FMODE_WRITE) {
240 och_p = &lli->lli_mds_write_och;
241 och_usecount = &lli->lli_open_fd_write_count;
242 } else if (fmode & FMODE_EXEC) {
243 och_p = &lli->lli_mds_exec_och;
244 och_usecount = &lli->lli_open_fd_exec_count;
246 LASSERT(fmode & FMODE_READ);
247 och_p = &lli->lli_mds_read_och;
248 och_usecount = &lli->lli_open_fd_read_count;
251 mutex_lock(&lli->lli_och_mutex);
252 if (*och_usecount > 0) {
253 /* There are still users of this handle, so skip
255 mutex_unlock(&lli->lli_och_mutex);
261 mutex_unlock(&lli->lli_och_mutex);
264 /* There might be a race and this handle may already
266 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
267 och, inode, 0, NULL);
273 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
276 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
277 struct ll_inode_info *lli = ll_i2info(inode);
281 /* clear group lock, if present */
282 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
283 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
285 if (fd->fd_lease_och != NULL) {
288 /* Usually the lease is not released when the
289 * application crashed, we need to release here. */
290 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
291 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
292 PFID(&lli->lli_fid), rc, lease_broken);
294 fd->fd_lease_och = NULL;
297 if (fd->fd_och != NULL) {
298 rc = ll_close_inode_openhandle(md_exp, fd->fd_och, inode, 0,
304 /* Let's see if we have good enough OPEN lock on the file and if
305 we can skip talking to MDS */
306 if (file->f_dentry->d_inode) { /* Can this ever be false? */
308 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
309 struct lustre_handle lockh;
310 struct inode *inode = file->f_dentry->d_inode;
311 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
313 mutex_lock(&lli->lli_och_mutex);
314 if (fd->fd_omode & FMODE_WRITE) {
316 LASSERT(lli->lli_open_fd_write_count);
317 lli->lli_open_fd_write_count--;
318 } else if (fd->fd_omode & FMODE_EXEC) {
320 LASSERT(lli->lli_open_fd_exec_count);
321 lli->lli_open_fd_exec_count--;
324 LASSERT(lli->lli_open_fd_read_count);
325 lli->lli_open_fd_read_count--;
327 mutex_unlock(&lli->lli_och_mutex);
329 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
330 LDLM_IBITS, &policy, lockmode,
332 rc = ll_md_real_close(file->f_dentry->d_inode,
336 CERROR("released file has negative dentry: file = %p, "
337 "dentry = %p, name = %s\n",
338 file, file->f_dentry, file->f_dentry->d_name.name);
342 LUSTRE_FPRIVATE(file) = NULL;
343 ll_file_data_put(fd);
344 ll_capa_close(inode);
349 /* While this returns an error code, fput() the caller does not, so we need
350 * to make every effort to clean up all of our state here. Also, applications
351 * rarely check close errors and even if an error is returned they will not
352 * re-try the close call.
354 int ll_file_release(struct inode *inode, struct file *file)
356 struct ll_file_data *fd;
357 struct ll_sb_info *sbi = ll_i2sbi(inode);
358 struct ll_inode_info *lli = ll_i2info(inode);
362 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
363 PFID(ll_inode2fid(inode)), inode);
365 #ifdef CONFIG_FS_POSIX_ACL
366 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
367 inode == inode->i_sb->s_root->d_inode) {
368 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
371 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
372 fd->fd_flags &= ~LL_FILE_RMTACL;
373 rct_del(&sbi->ll_rct, current_pid());
374 et_search_free(&sbi->ll_et, current_pid());
379 if (inode->i_sb->s_root != file->f_dentry)
380 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
381 fd = LUSTRE_FPRIVATE(file);
384 /* The last ref on @file, maybe not the the owner pid of statahead,
385 * because parent and child process can share the same file handle. */
386 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
387 ll_deauthorize_statahead(inode, fd);
389 if (inode->i_sb->s_root == file->f_dentry) {
390 LUSTRE_FPRIVATE(file) = NULL;
391 ll_file_data_put(fd);
395 if (!S_ISDIR(inode->i_mode)) {
396 if (lli->lli_clob != NULL)
397 lov_read_and_clear_async_rc(lli->lli_clob);
398 lli->lli_async_rc = 0;
401 rc = ll_md_close(sbi->ll_md_exp, inode, file);
403 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
404 libcfs_debug_dumplog();
409 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
410 struct lookup_intent *itp)
412 struct dentry *de = file->f_dentry;
413 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
414 struct dentry *parent = de->d_parent;
415 const char *name = NULL;
417 struct md_op_data *op_data;
418 struct ptlrpc_request *req = NULL;
422 LASSERT(parent != NULL);
423 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
425 /* if server supports open-by-fid, or file name is invalid, don't pack
426 * name in open request */
427 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
428 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
429 name = de->d_name.name;
430 len = de->d_name.len;
433 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
434 name, len, 0, LUSTRE_OPC_ANY, NULL);
436 RETURN(PTR_ERR(op_data));
437 op_data->op_data = lmm;
438 op_data->op_data_size = lmmsize;
440 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
441 &ll_md_blocking_ast, 0);
442 ll_finish_md_op_data(op_data);
444 /* reason for keep own exit path - don`t flood log
445 * with messages with -ESTALE errors.
447 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
448 it_open_error(DISP_OPEN_OPEN, itp))
450 ll_release_openhandle(de, itp);
454 if (it_disposition(itp, DISP_LOOKUP_NEG))
455 GOTO(out, rc = -ENOENT);
457 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
458 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
459 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
463 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
464 if (!rc && itp->d.lustre.it_lock_mode)
465 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
468 ptlrpc_req_finished(req);
469 ll_intent_drop_lock(itp);
474 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
475 struct obd_client_handle *och)
477 struct ptlrpc_request *req = it->d.lustre.it_data;
478 struct mdt_body *body;
480 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
481 och->och_fh = body->mbo_handle;
482 och->och_fid = body->mbo_fid1;
483 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
484 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
485 och->och_flags = it->it_flags;
487 return md_set_open_replay_data(md_exp, och, it);
490 static int ll_local_open(struct file *file, struct lookup_intent *it,
491 struct ll_file_data *fd, struct obd_client_handle *och)
493 struct inode *inode = file->f_dentry->d_inode;
496 LASSERT(!LUSTRE_FPRIVATE(file));
503 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
508 LUSTRE_FPRIVATE(file) = fd;
509 ll_readahead_init(inode, &fd->fd_ras);
510 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
512 /* ll_cl_context initialize */
513 rwlock_init(&fd->fd_lock);
514 INIT_LIST_HEAD(&fd->fd_lccs);
519 /* Open a file, and (for the very first open) create objects on the OSTs at
520 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
521 * creation or open until ll_lov_setstripe() ioctl is called.
523 * If we already have the stripe MD locally then we don't request it in
524 * md_open(), by passing a lmm_size = 0.
526 * It is up to the application to ensure no other processes open this file
527 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
528 * used. We might be able to avoid races of that sort by getting lli_open_sem
529 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
530 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
532 int ll_file_open(struct inode *inode, struct file *file)
534 struct ll_inode_info *lli = ll_i2info(inode);
535 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
536 .it_flags = file->f_flags };
537 struct obd_client_handle **och_p = NULL;
538 __u64 *och_usecount = NULL;
539 struct ll_file_data *fd;
543 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
544 PFID(ll_inode2fid(inode)), inode, file->f_flags);
546 it = file->private_data; /* XXX: compat macro */
547 file->private_data = NULL; /* prevent ll_local_open assertion */
549 fd = ll_file_data_get();
551 GOTO(out_openerr, rc = -ENOMEM);
554 if (S_ISDIR(inode->i_mode))
555 ll_authorize_statahead(inode, fd);
557 if (inode->i_sb->s_root == file->f_dentry) {
558 LUSTRE_FPRIVATE(file) = fd;
562 if (!it || !it->d.lustre.it_disposition) {
563 /* Convert f_flags into access mode. We cannot use file->f_mode,
564 * because everything but O_ACCMODE mask was stripped from
566 if ((oit.it_flags + 1) & O_ACCMODE)
568 if (file->f_flags & O_TRUNC)
569 oit.it_flags |= FMODE_WRITE;
571 /* kernel only call f_op->open in dentry_open. filp_open calls
572 * dentry_open after call to open_namei that checks permissions.
573 * Only nfsd_open call dentry_open directly without checking
574 * permissions and because of that this code below is safe. */
575 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
576 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
578 /* We do not want O_EXCL here, presumably we opened the file
579 * already? XXX - NFS implications? */
580 oit.it_flags &= ~O_EXCL;
582 /* bug20584, if "it_flags" contains O_CREAT, the file will be
583 * created if necessary, then "IT_CREAT" should be set to keep
584 * consistent with it */
585 if (oit.it_flags & O_CREAT)
586 oit.it_op |= IT_CREAT;
592 /* Let's see if we have file open on MDS already. */
593 if (it->it_flags & FMODE_WRITE) {
594 och_p = &lli->lli_mds_write_och;
595 och_usecount = &lli->lli_open_fd_write_count;
596 } else if (it->it_flags & FMODE_EXEC) {
597 och_p = &lli->lli_mds_exec_och;
598 och_usecount = &lli->lli_open_fd_exec_count;
600 och_p = &lli->lli_mds_read_och;
601 och_usecount = &lli->lli_open_fd_read_count;
604 mutex_lock(&lli->lli_och_mutex);
605 if (*och_p) { /* Open handle is present */
606 if (it_disposition(it, DISP_OPEN_OPEN)) {
607 /* Well, there's extra open request that we do not need,
608 let's close it somehow. This will decref request. */
609 rc = it_open_error(DISP_OPEN_OPEN, it);
611 mutex_unlock(&lli->lli_och_mutex);
612 GOTO(out_openerr, rc);
615 ll_release_openhandle(file->f_dentry, it);
619 rc = ll_local_open(file, it, fd, NULL);
622 mutex_unlock(&lli->lli_och_mutex);
623 GOTO(out_openerr, rc);
626 LASSERT(*och_usecount == 0);
627 if (!it->d.lustre.it_disposition) {
628 /* We cannot just request lock handle now, new ELC code
629 means that one of other OPEN locks for this file
630 could be cancelled, and since blocking ast handler
631 would attempt to grab och_mutex as well, that would
632 result in a deadlock */
633 mutex_unlock(&lli->lli_och_mutex);
635 * Normally called under two situations:
637 * 2. A race/condition on MDS resulting in no open
638 * handle to be returned from LOOKUP|OPEN request,
639 * for example if the target entry was a symlink.
641 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
643 * Always specify MDS_OPEN_BY_FID because we don't want
644 * to get file with different fid.
646 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
647 rc = ll_intent_file_open(file, NULL, 0, it);
649 GOTO(out_openerr, rc);
653 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
655 GOTO(out_och_free, rc = -ENOMEM);
659 /* md_intent_lock() didn't get a request ref if there was an
660 * open error, so don't do cleanup on the request here
662 /* XXX (green): Should not we bail out on any error here, not
663 * just open error? */
664 rc = it_open_error(DISP_OPEN_OPEN, it);
666 GOTO(out_och_free, rc);
668 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
669 "inode %p: disposition %x, status %d\n", inode,
670 it_disposition(it, ~0), it->d.lustre.it_status);
672 rc = ll_local_open(file, it, fd, *och_p);
674 GOTO(out_och_free, rc);
676 mutex_unlock(&lli->lli_och_mutex);
679 /* Must do this outside lli_och_mutex lock to prevent deadlock where
680 different kind of OPEN lock for this same inode gets cancelled
681 by ldlm_cancel_lru */
682 if (!S_ISREG(inode->i_mode))
683 GOTO(out_och_free, rc);
687 cl_lov_delay_create_clear(&file->f_flags);
688 GOTO(out_och_free, rc);
692 if (och_p && *och_p) {
693 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
694 *och_p = NULL; /* OBD_FREE writes some magic there */
697 mutex_unlock(&lli->lli_och_mutex);
700 if (lli->lli_opendir_key == fd)
701 ll_deauthorize_statahead(inode, fd);
703 ll_file_data_put(fd);
705 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
708 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
709 ptlrpc_req_finished(it->d.lustre.it_data);
710 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
716 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
717 struct ldlm_lock_desc *desc, void *data, int flag)
720 struct lustre_handle lockh;
724 case LDLM_CB_BLOCKING:
725 ldlm_lock2handle(lock, &lockh);
726 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
728 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
732 case LDLM_CB_CANCELING:
740 * Acquire a lease and open the file.
742 static struct obd_client_handle *
743 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
746 struct lookup_intent it = { .it_op = IT_OPEN };
747 struct ll_sb_info *sbi = ll_i2sbi(inode);
748 struct md_op_data *op_data;
749 struct ptlrpc_request *req = NULL;
750 struct lustre_handle old_handle = { 0 };
751 struct obd_client_handle *och = NULL;
756 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
757 RETURN(ERR_PTR(-EINVAL));
760 struct ll_inode_info *lli = ll_i2info(inode);
761 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
762 struct obd_client_handle **och_p;
765 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
766 RETURN(ERR_PTR(-EPERM));
768 /* Get the openhandle of the file */
770 mutex_lock(&lli->lli_och_mutex);
771 if (fd->fd_lease_och != NULL) {
772 mutex_unlock(&lli->lli_och_mutex);
776 if (fd->fd_och == NULL) {
777 if (file->f_mode & FMODE_WRITE) {
778 LASSERT(lli->lli_mds_write_och != NULL);
779 och_p = &lli->lli_mds_write_och;
780 och_usecount = &lli->lli_open_fd_write_count;
782 LASSERT(lli->lli_mds_read_och != NULL);
783 och_p = &lli->lli_mds_read_och;
784 och_usecount = &lli->lli_open_fd_read_count;
786 if (*och_usecount == 1) {
793 mutex_unlock(&lli->lli_och_mutex);
794 if (rc < 0) /* more than 1 opener */
797 LASSERT(fd->fd_och != NULL);
798 old_handle = fd->fd_och->och_fh;
803 RETURN(ERR_PTR(-ENOMEM));
805 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
806 LUSTRE_OPC_ANY, NULL);
808 GOTO(out, rc = PTR_ERR(op_data));
810 /* To tell the MDT this openhandle is from the same owner */
811 op_data->op_handle = old_handle;
813 it.it_flags = fmode | open_flags;
814 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
815 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
816 &ll_md_blocking_lease_ast,
817 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
818 * it can be cancelled which may mislead applications that the lease is
820 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
821 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
822 * doesn't deal with openhandle, so normal openhandle will be leaked. */
823 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
824 ll_finish_md_op_data(op_data);
825 ptlrpc_req_finished(req);
827 GOTO(out_release_it, rc);
829 if (it_disposition(&it, DISP_LOOKUP_NEG))
830 GOTO(out_release_it, rc = -ENOENT);
832 rc = it_open_error(DISP_OPEN_OPEN, &it);
834 GOTO(out_release_it, rc);
836 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
837 ll_och_fill(sbi->ll_md_exp, &it, och);
839 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
840 GOTO(out_close, rc = -EOPNOTSUPP);
842 /* already get lease, handle lease lock */
843 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
844 if (it.d.lustre.it_lock_mode == 0 ||
845 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
846 /* open lock must return for lease */
847 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
848 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
849 it.d.lustre.it_lock_bits);
850 GOTO(out_close, rc = -EPROTO);
853 ll_intent_release(&it);
857 /* Cancel open lock */
858 if (it.d.lustre.it_lock_mode != 0) {
859 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
860 it.d.lustre.it_lock_mode);
861 it.d.lustre.it_lock_mode = 0;
862 och->och_lease_handle.cookie = 0ULL;
864 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, och, inode, 0, NULL);
866 CERROR("%s: error closing file "DFID": %d\n",
867 ll_get_fsname(inode->i_sb, NULL, 0),
868 PFID(&ll_i2info(inode)->lli_fid), rc2);
869 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
871 ll_intent_release(&it);
879 * Check whether a layout swap can be done between two inodes.
881 * \param[in] inode1 First inode to check
882 * \param[in] inode2 Second inode to check
884 * \retval 0 on success, layout swap can be performed between both inodes
885 * \retval negative error code if requirements are not met
887 static int ll_check_swap_layouts_validity(struct inode *inode1,
888 struct inode *inode2)
890 if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
893 if (inode_permission(inode1, MAY_WRITE) ||
894 inode_permission(inode2, MAY_WRITE))
897 if (inode1->i_sb != inode2->i_sb)
903 static int ll_swap_layouts_close(struct obd_client_handle *och,
904 struct inode *inode, struct inode *inode2)
906 const struct lu_fid *fid1 = ll_inode2fid(inode);
907 const struct lu_fid *fid2;
911 CDEBUG(D_INODE, "%s: biased close of file "DFID"\n",
912 ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
914 rc = ll_check_swap_layouts_validity(inode, inode2);
916 GOTO(out_free_och, rc);
918 /* We now know that inode2 is a lustre inode */
919 fid2 = ll_inode2fid(inode2);
921 rc = lu_fid_cmp(fid1, fid2);
923 GOTO(out_free_och, rc = -EINVAL);
925 /* Close the file and swap layouts between inode & inode2.
926 * NB: lease lock handle is released in mdc_close_layout_swap_pack()
927 * because we still need it to pack l_remote_handle to MDT. */
928 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
929 MDS_CLOSE_LAYOUT_SWAP, inode2);
931 och = NULL; /* freed in ll_close_inode_openhandle() */
941 * Release lease and close the file.
942 * It will check if the lease has ever broken.
944 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
947 struct ldlm_lock *lock;
948 bool cancelled = true;
952 lock = ldlm_handle2lock(&och->och_lease_handle);
954 lock_res_and_lock(lock);
955 cancelled = ldlm_is_cancel(lock);
956 unlock_res_and_lock(lock);
960 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
961 PFID(&ll_i2info(inode)->lli_fid), cancelled);
964 ldlm_cli_cancel(&och->och_lease_handle, 0);
965 if (lease_broken != NULL)
966 *lease_broken = cancelled;
968 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
974 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
976 struct ll_inode_info *lli = ll_i2info(inode);
977 struct cl_object *obj = lli->lli_clob;
978 struct cl_attr *attr = vvp_env_thread_attr(env);
986 ll_inode_size_lock(inode);
988 /* merge timestamps the most recently obtained from mds with
989 timestamps obtained from osts */
990 LTIME_S(inode->i_atime) = lli->lli_atime;
991 LTIME_S(inode->i_mtime) = lli->lli_mtime;
992 LTIME_S(inode->i_ctime) = lli->lli_ctime;
994 atime = LTIME_S(inode->i_atime);
995 mtime = LTIME_S(inode->i_mtime);
996 ctime = LTIME_S(inode->i_ctime);
998 cl_object_attr_lock(obj);
999 rc = cl_object_attr_get(env, obj, attr);
1000 cl_object_attr_unlock(obj);
1003 GOTO(out_size_unlock, rc);
1005 if (atime < attr->cat_atime)
1006 atime = attr->cat_atime;
1008 if (ctime < attr->cat_ctime)
1009 ctime = attr->cat_ctime;
1011 if (mtime < attr->cat_mtime)
1012 mtime = attr->cat_mtime;
1014 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1015 PFID(&lli->lli_fid), attr->cat_size);
1017 i_size_write(inode, attr->cat_size);
1018 inode->i_blocks = attr->cat_blocks;
1020 LTIME_S(inode->i_atime) = atime;
1021 LTIME_S(inode->i_mtime) = mtime;
1022 LTIME_S(inode->i_ctime) = ctime;
1025 ll_inode_size_unlock(inode);
1030 static bool file_is_noatime(const struct file *file)
1032 const struct vfsmount *mnt = file->f_path.mnt;
1033 const struct inode *inode = file->f_path.dentry->d_inode;
1035 /* Adapted from file_accessed() and touch_atime().*/
1036 if (file->f_flags & O_NOATIME)
1039 if (inode->i_flags & S_NOATIME)
1042 if (IS_NOATIME(inode))
1045 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1048 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1051 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1057 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1059 struct inode *inode = file->f_dentry->d_inode;
1061 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1063 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1064 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1065 file->f_flags & O_DIRECT ||
1068 io->ci_obj = ll_i2info(inode)->lli_clob;
1069 io->ci_lockreq = CILR_MAYBE;
1070 if (ll_file_nolock(file)) {
1071 io->ci_lockreq = CILR_NEVER;
1072 io->ci_no_srvlock = 1;
1073 } else if (file->f_flags & O_APPEND) {
1074 io->ci_lockreq = CILR_MANDATORY;
1077 io->ci_noatime = file_is_noatime(file);
1081 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1082 struct file *file, enum cl_io_type iot,
1083 loff_t *ppos, size_t count)
1085 struct vvp_io *vio = vvp_env_io(env);
1086 struct inode *inode = file->f_dentry->d_inode;
1087 struct ll_inode_info *lli = ll_i2info(inode);
1088 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1092 struct range_lock range;
1096 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1097 file->f_dentry->d_name.name, iot, *ppos, count);
1100 io = vvp_env_thread_io(env);
1101 ll_io_init(io, file, iot == CIT_WRITE);
1103 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1104 bool range_locked = false;
1106 if (file->f_flags & O_APPEND)
1107 range_lock_init(&range, 0, LUSTRE_EOF);
1109 range_lock_init(&range, *ppos, *ppos + count - 1);
1111 vio->vui_fd = LUSTRE_FPRIVATE(file);
1112 vio->vui_io_subtype = args->via_io_subtype;
1114 switch (vio->vui_io_subtype) {
1116 vio->vui_iov = args->u.normal.via_iov;
1117 vio->vui_nrsegs = args->u.normal.via_nrsegs;
1118 vio->vui_tot_nrsegs = vio->vui_nrsegs;
1119 vio->vui_iocb = args->u.normal.via_iocb;
1120 /* Direct IO reads must also take range lock,
1121 * or multiple reads will try to work on the same pages
1122 * See LU-6227 for details. */
1123 if (((iot == CIT_WRITE) ||
1124 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1125 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1126 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1128 rc = range_lock(&lli->lli_write_tree, &range);
1132 range_locked = true;
1136 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1137 vio->u.splice.vui_flags = args->u.splice.via_flags;
1140 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1144 ll_cl_add(file, env, io);
1145 rc = cl_io_loop(env, io);
1146 ll_cl_remove(file, env);
1149 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1151 range_unlock(&lli->lli_write_tree, &range);
1154 /* cl_io_rw_init() handled IO */
1158 if (io->ci_nob > 0) {
1159 result += io->ci_nob;
1160 count -= io->ci_nob;
1161 *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
1163 /* prepare IO restart */
1164 if (count > 0 && args->via_io_subtype == IO_NORMAL) {
1165 args->u.normal.via_iov = vio->vui_iov;
1166 args->u.normal.via_nrsegs = vio->vui_tot_nrsegs;
1171 cl_io_fini(env, io);
1173 if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
1175 "%s: restart %s from %lld, count:%zu, result: %zd\n",
1176 file->f_dentry->d_name.name,
1177 iot == CIT_READ ? "read" : "write",
1178 *ppos, count, result);
1182 if (iot == CIT_READ) {
1184 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1185 LPROC_LL_READ_BYTES, result);
1186 } else if (iot == CIT_WRITE) {
1188 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1189 LPROC_LL_WRITE_BYTES, result);
1190 fd->fd_write_failed = false;
1191 } else if (rc != -ERESTARTSYS) {
1192 fd->fd_write_failed = true;
1196 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1198 return result > 0 ? result : rc;
1202 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1204 static int ll_file_get_iov_count(const struct iovec *iov,
1205 unsigned long *nr_segs, size_t *count)
1210 for (seg = 0; seg < *nr_segs; seg++) {
1211 const struct iovec *iv = &iov[seg];
1214 * If any segment has a negative length, or the cumulative
1215 * length ever wraps negative then return -EINVAL.
1218 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1220 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1225 cnt -= iv->iov_len; /* This segment is no good */
1232 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1233 unsigned long nr_segs, loff_t pos)
1236 struct vvp_io_args *args;
1237 struct iovec *local_iov;
1243 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1247 env = cl_env_get(&refcheck);
1249 RETURN(PTR_ERR(env));
1252 local_iov = &ll_env_info(env)->lti_local_iov;
1255 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1256 if (local_iov == NULL) {
1257 cl_env_put(env, &refcheck);
1261 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1264 args = ll_env_args(env, IO_NORMAL);
1265 args->u.normal.via_iov = local_iov;
1266 args->u.normal.via_nrsegs = nr_segs;
1267 args->u.normal.via_iocb = iocb;
1269 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1270 &iocb->ki_pos, count);
1272 cl_env_put(env, &refcheck);
1275 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1280 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1284 struct iovec iov = { .iov_base = buf, .iov_len = count };
1285 struct kiocb *kiocb;
1290 env = cl_env_get(&refcheck);
1292 RETURN(PTR_ERR(env));
1294 kiocb = &ll_env_info(env)->lti_kiocb;
1295 init_sync_kiocb(kiocb, file);
1296 kiocb->ki_pos = *ppos;
1297 #ifdef HAVE_KIOCB_KI_LEFT
1298 kiocb->ki_left = count;
1300 kiocb->ki_nbytes = count;
1303 result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos);
1304 *ppos = kiocb->ki_pos;
1306 cl_env_put(env, &refcheck);
1311 * Write to a file (through the page cache).
1314 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1315 unsigned long nr_segs, loff_t pos)
1318 struct vvp_io_args *args;
1319 struct iovec *local_iov;
1325 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1329 env = cl_env_get(&refcheck);
1331 RETURN(PTR_ERR(env));
1334 local_iov = &ll_env_info(env)->lti_local_iov;
1337 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1338 if (local_iov == NULL) {
1339 cl_env_put(env, &refcheck);
1343 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1346 args = ll_env_args(env, IO_NORMAL);
1347 args->u.normal.via_iov = local_iov;
1348 args->u.normal.via_nrsegs = nr_segs;
1349 args->u.normal.via_iocb = iocb;
1351 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1352 &iocb->ki_pos, count);
1353 cl_env_put(env, &refcheck);
1356 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1361 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1362 size_t count, loff_t *ppos)
1365 struct iovec iov = { .iov_base = (void __user *)buf,
1367 struct kiocb *kiocb;
1372 env = cl_env_get(&refcheck);
1374 RETURN(PTR_ERR(env));
1376 kiocb = &ll_env_info(env)->lti_kiocb;
1377 init_sync_kiocb(kiocb, file);
1378 kiocb->ki_pos = *ppos;
1379 #ifdef HAVE_KIOCB_KI_LEFT
1380 kiocb->ki_left = count;
1382 kiocb->ki_nbytes = count;
1385 result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
1386 *ppos = kiocb->ki_pos;
1388 cl_env_put(env, &refcheck);
1393 * Send file content (through pagecache) somewhere with helper
1395 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1396 struct pipe_inode_info *pipe, size_t count,
1400 struct vvp_io_args *args;
1405 env = cl_env_get(&refcheck);
1407 RETURN(PTR_ERR(env));
1409 args = ll_env_args(env, IO_SPLICE);
1410 args->u.splice.via_pipe = pipe;
1411 args->u.splice.via_flags = flags;
1413 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1414 cl_env_put(env, &refcheck);
1418 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1419 __u64 flags, struct lov_user_md *lum,
1422 struct lookup_intent oit = {
1424 .it_flags = flags | MDS_OPEN_BY_FID,
1429 ll_inode_size_lock(inode);
1430 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1432 GOTO(out_unlock, rc);
1434 ll_release_openhandle(file->f_dentry, &oit);
1437 ll_inode_size_unlock(inode);
1438 ll_intent_release(&oit);
1439 cl_lov_delay_create_clear(&file->f_flags);
1444 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1445 struct lov_mds_md **lmmp, int *lmm_size,
1446 struct ptlrpc_request **request)
1448 struct ll_sb_info *sbi = ll_i2sbi(inode);
1449 struct mdt_body *body;
1450 struct lov_mds_md *lmm = NULL;
1451 struct ptlrpc_request *req = NULL;
1452 struct md_op_data *op_data;
1455 rc = ll_get_default_mdsize(sbi, &lmmsize);
1459 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1460 strlen(filename), lmmsize,
1461 LUSTRE_OPC_ANY, NULL);
1462 if (IS_ERR(op_data))
1463 RETURN(PTR_ERR(op_data));
1465 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1466 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1467 ll_finish_md_op_data(op_data);
1469 CDEBUG(D_INFO, "md_getattr_name failed "
1470 "on %s: rc %d\n", filename, rc);
1474 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1475 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1477 lmmsize = body->mbo_eadatasize;
1479 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1481 GOTO(out, rc = -ENODATA);
1484 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1485 LASSERT(lmm != NULL);
1487 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1488 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1489 GOTO(out, rc = -EPROTO);
1493 * This is coming from the MDS, so is probably in
1494 * little endian. We convert it to host endian before
1495 * passing it to userspace.
1497 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1500 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1501 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1504 /* if function called for directory - we should
1505 * avoid swab not existent lsm objects */
1506 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1507 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1508 if (S_ISREG(body->mbo_mode))
1509 lustre_swab_lov_user_md_objects(
1510 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1512 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1513 lustre_swab_lov_user_md_v3(
1514 (struct lov_user_md_v3 *)lmm);
1515 if (S_ISREG(body->mbo_mode))
1516 lustre_swab_lov_user_md_objects(
1517 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1524 *lmm_size = lmmsize;
1529 static int ll_lov_setea(struct inode *inode, struct file *file,
1532 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1533 struct lov_user_md *lump;
1534 int lum_size = sizeof(struct lov_user_md) +
1535 sizeof(struct lov_user_ost_data);
1539 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1542 OBD_ALLOC_LARGE(lump, lum_size);
1546 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1547 OBD_FREE_LARGE(lump, lum_size);
1551 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1553 OBD_FREE_LARGE(lump, lum_size);
1557 static int ll_file_getstripe(struct inode *inode,
1558 struct lov_user_md __user *lum)
1565 env = cl_env_get(&refcheck);
1567 RETURN(PTR_ERR(env));
1569 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1570 cl_env_put(env, &refcheck);
1574 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1577 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1578 struct lov_user_md *klum;
1580 __u64 flags = FMODE_WRITE;
1583 rc = ll_copy_user_md(lum, &klum);
1588 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1592 put_user(0, &lum->lmm_stripe_count);
1594 ll_layout_refresh(inode, &gen);
1595 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1598 OBD_FREE(klum, lum_size);
1603 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1605 struct ll_inode_info *lli = ll_i2info(inode);
1606 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1607 struct ll_grouplock grouplock;
1612 CWARN("group id for group lock must not be 0\n");
1616 if (ll_file_nolock(file))
1617 RETURN(-EOPNOTSUPP);
1619 spin_lock(&lli->lli_lock);
1620 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1621 CWARN("group lock already existed with gid %lu\n",
1622 fd->fd_grouplock.lg_gid);
1623 spin_unlock(&lli->lli_lock);
1626 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1627 spin_unlock(&lli->lli_lock);
1629 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1630 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1634 spin_lock(&lli->lli_lock);
1635 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1636 spin_unlock(&lli->lli_lock);
1637 CERROR("another thread just won the race\n");
1638 cl_put_grouplock(&grouplock);
1642 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1643 fd->fd_grouplock = grouplock;
1644 spin_unlock(&lli->lli_lock);
1646 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1650 static int ll_put_grouplock(struct inode *inode, struct file *file,
1653 struct ll_inode_info *lli = ll_i2info(inode);
1654 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1655 struct ll_grouplock grouplock;
1658 spin_lock(&lli->lli_lock);
1659 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1660 spin_unlock(&lli->lli_lock);
1661 CWARN("no group lock held\n");
1665 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1667 if (fd->fd_grouplock.lg_gid != arg) {
1668 CWARN("group lock %lu doesn't match current id %lu\n",
1669 arg, fd->fd_grouplock.lg_gid);
1670 spin_unlock(&lli->lli_lock);
1674 grouplock = fd->fd_grouplock;
1675 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1676 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1677 spin_unlock(&lli->lli_lock);
1679 cl_put_grouplock(&grouplock);
1680 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1685 * Close inode open handle
1687 * \param dentry [in] dentry which contains the inode
1688 * \param it [in,out] intent which contains open info and result
1691 * \retval <0 failure
1693 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1695 struct inode *inode = dentry->d_inode;
1696 struct obd_client_handle *och;
1702 /* Root ? Do nothing. */
1703 if (dentry->d_inode->i_sb->s_root == dentry)
1706 /* No open handle to close? Move away */
1707 if (!it_disposition(it, DISP_OPEN_OPEN))
1710 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1712 OBD_ALLOC(och, sizeof(*och));
1714 GOTO(out, rc = -ENOMEM);
1716 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1718 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1719 och, inode, 0, NULL);
1721 /* this one is in place of ll_file_open */
1722 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1723 ptlrpc_req_finished(it->d.lustre.it_data);
1724 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1730 * Get size for inode for which FIEMAP mapping is requested.
1731 * Make the FIEMAP get_info call and returns the result.
1732 * \param fiemap kernel buffer to hold extens
1733 * \param num_bytes kernel buffer size
1735 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1741 struct ll_fiemap_info_key fmkey = { .name = KEY_FIEMAP, };
1744 /* Checks for fiemap flags */
1745 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1746 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1750 /* Check for FIEMAP_FLAG_SYNC */
1751 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1752 rc = filemap_fdatawrite(inode->i_mapping);
1757 env = cl_env_get(&refcheck);
1759 RETURN(PTR_ERR(env));
1761 if (i_size_read(inode) == 0) {
1762 rc = ll_glimpse_size(inode);
1767 fmkey.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1768 obdo_from_inode(&fmkey.oa, inode, OBD_MD_FLSIZE);
1769 obdo_set_parent_fid(&fmkey.oa, &ll_i2info(inode)->lli_fid);
1771 /* If filesize is 0, then there would be no objects for mapping */
1772 if (fmkey.oa.o_size == 0) {
1773 fiemap->fm_mapped_extents = 0;
1777 fmkey.fiemap = *fiemap;
1779 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1780 &fmkey, fiemap, &num_bytes);
1782 cl_env_put(env, &refcheck);
1786 int ll_fid2path(struct inode *inode, void __user *arg)
1788 struct obd_export *exp = ll_i2mdexp(inode);
1789 const struct getinfo_fid2path __user *gfin = arg;
1791 struct getinfo_fid2path *gfout;
1797 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1798 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1801 /* Only need to get the buflen */
1802 if (get_user(pathlen, &gfin->gf_pathlen))
1805 if (pathlen > PATH_MAX)
1808 outsize = sizeof(*gfout) + pathlen;
1809 OBD_ALLOC(gfout, outsize);
1813 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1814 GOTO(gf_free, rc = -EFAULT);
1816 /* Call mdc_iocontrol */
1817 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1821 if (copy_to_user(arg, gfout, outsize))
1825 OBD_FREE(gfout, outsize);
1829 static int ll_ioctl_fiemap(struct inode *inode, struct fiemap __user *arg)
1831 struct fiemap *fiemap;
1837 /* Get the extent count so we can calculate the size of
1838 * required fiemap buffer */
1839 if (get_user(extent_count, &arg->fm_extent_count))
1843 (SIZE_MAX - sizeof(*fiemap)) / sizeof(struct ll_fiemap_extent))
1845 num_bytes = sizeof(*fiemap) + (extent_count *
1846 sizeof(struct ll_fiemap_extent));
1848 OBD_ALLOC_LARGE(fiemap, num_bytes);
1852 /* get the fiemap value */
1853 if (copy_from_user(fiemap, arg, sizeof(*fiemap)))
1854 GOTO(error, rc = -EFAULT);
1856 /* If fm_extent_count is non-zero, read the first extent since
1857 * it is used to calculate end_offset and device from previous
1859 if (extent_count != 0) {
1860 if (copy_from_user(&fiemap->fm_extents[0],
1861 (char __user *)arg + sizeof(*fiemap),
1862 sizeof(struct ll_fiemap_extent)))
1863 GOTO(error, rc = -EFAULT);
1866 rc = ll_do_fiemap(inode, fiemap, num_bytes);
1870 ret_bytes = sizeof(struct fiemap);
1872 if (extent_count != 0)
1873 ret_bytes += (fiemap->fm_mapped_extents *
1874 sizeof(struct ll_fiemap_extent));
1876 if (copy_to_user((void __user *)arg, fiemap, ret_bytes))
1880 OBD_FREE_LARGE(fiemap, num_bytes);
1885 * Read the data_version for inode.
1887 * This value is computed using stripe object version on OST.
1888 * Version is computed using server side locking.
1890 * @param flags if do sync on the OST side;
1892 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1893 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1895 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1897 struct cl_object *obj = ll_i2info(inode)->lli_clob;
1905 /* If no file object initialized, we consider its version is 0. */
1911 env = cl_env_get(&refcheck);
1913 RETURN(PTR_ERR(env));
1915 io = vvp_env_thread_io(env);
1917 io->u.ci_data_version.dv_data_version = 0;
1918 io->u.ci_data_version.dv_flags = flags;
1921 if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
1922 result = cl_io_loop(env, io);
1924 result = io->ci_result;
1926 *data_version = io->u.ci_data_version.dv_data_version;
1928 cl_io_fini(env, io);
1930 if (unlikely(io->ci_need_restart))
1933 cl_env_put(env, &refcheck);
1939 * Trigger a HSM release request for the provided inode.
1941 int ll_hsm_release(struct inode *inode)
1943 struct cl_env_nest nest;
1945 struct obd_client_handle *och = NULL;
1946 __u64 data_version = 0;
1950 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1951 ll_get_fsname(inode->i_sb, NULL, 0),
1952 PFID(&ll_i2info(inode)->lli_fid));
1954 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1956 GOTO(out, rc = PTR_ERR(och));
1958 /* Grab latest data_version and [am]time values */
1959 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1963 env = cl_env_nested_get(&nest);
1965 GOTO(out, rc = PTR_ERR(env));
1967 ll_merge_attr(env, inode);
1968 cl_env_nested_put(&nest, env);
1970 /* Release the file.
1971 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1972 * we still need it to pack l_remote_handle to MDT. */
1973 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
1974 MDS_HSM_RELEASE, &data_version);
1979 if (och != NULL && !IS_ERR(och)) /* close the file */
1980 ll_lease_close(och, inode, NULL);
1985 struct ll_swap_stack {
1988 struct inode *inode1;
1989 struct inode *inode2;
1994 static int ll_swap_layouts(struct file *file1, struct file *file2,
1995 struct lustre_swap_layouts *lsl)
1997 struct mdc_swap_layouts msl;
1998 struct md_op_data *op_data;
2001 struct ll_swap_stack *llss = NULL;
2004 OBD_ALLOC_PTR(llss);
2008 llss->inode1 = file1->f_dentry->d_inode;
2009 llss->inode2 = file2->f_dentry->d_inode;
2011 rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
2015 /* we use 2 bool because it is easier to swap than 2 bits */
2016 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2017 llss->check_dv1 = true;
2019 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2020 llss->check_dv2 = true;
2022 /* we cannot use lsl->sl_dvX directly because we may swap them */
2023 llss->dv1 = lsl->sl_dv1;
2024 llss->dv2 = lsl->sl_dv2;
2026 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2027 if (rc == 0) /* same file, done! */
2030 if (rc < 0) { /* sequentialize it */
2031 swap(llss->inode1, llss->inode2);
2033 swap(llss->dv1, llss->dv2);
2034 swap(llss->check_dv1, llss->check_dv2);
2038 if (gid != 0) { /* application asks to flush dirty cache */
2039 rc = ll_get_grouplock(llss->inode1, file1, gid);
2043 rc = ll_get_grouplock(llss->inode2, file2, gid);
2045 ll_put_grouplock(llss->inode1, file1, gid);
2050 /* ultimate check, before swaping the layouts we check if
2051 * dataversion has changed (if requested) */
2052 if (llss->check_dv1) {
2053 rc = ll_data_version(llss->inode1, &dv, 0);
2056 if (dv != llss->dv1)
2057 GOTO(putgl, rc = -EAGAIN);
2060 if (llss->check_dv2) {
2061 rc = ll_data_version(llss->inode2, &dv, 0);
2064 if (dv != llss->dv2)
2065 GOTO(putgl, rc = -EAGAIN);
2068 /* struct md_op_data is used to send the swap args to the mdt
2069 * only flags is missing, so we use struct mdc_swap_layouts
2070 * through the md_op_data->op_data */
2071 /* flags from user space have to be converted before they are send to
2072 * server, no flag is sent today, they are only used on the client */
2075 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2076 0, LUSTRE_OPC_ANY, &msl);
2077 if (IS_ERR(op_data))
2078 GOTO(free, rc = PTR_ERR(op_data));
2080 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2081 sizeof(*op_data), op_data, NULL);
2082 ll_finish_md_op_data(op_data);
2089 ll_put_grouplock(llss->inode2, file2, gid);
2090 ll_put_grouplock(llss->inode1, file1, gid);
2100 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2102 struct md_op_data *op_data;
2106 /* Detect out-of range masks */
2107 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2110 /* Non-root users are forbidden to set or clear flags which are
2111 * NOT defined in HSM_USER_MASK. */
2112 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2113 !cfs_capable(CFS_CAP_SYS_ADMIN))
2116 /* Detect out-of range archive id */
2117 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2118 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2121 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2122 LUSTRE_OPC_ANY, hss);
2123 if (IS_ERR(op_data))
2124 RETURN(PTR_ERR(op_data));
2126 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2127 sizeof(*op_data), op_data, NULL);
2129 ll_finish_md_op_data(op_data);
2134 static int ll_hsm_import(struct inode *inode, struct file *file,
2135 struct hsm_user_import *hui)
2137 struct hsm_state_set *hss = NULL;
2138 struct iattr *attr = NULL;
2142 if (!S_ISREG(inode->i_mode))
2148 GOTO(out, rc = -ENOMEM);
2150 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2151 hss->hss_archive_id = hui->hui_archive_id;
2152 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2153 rc = ll_hsm_state_set(inode, hss);
2157 OBD_ALLOC_PTR(attr);
2159 GOTO(out, rc = -ENOMEM);
2161 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2162 attr->ia_mode |= S_IFREG;
2163 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2164 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2165 attr->ia_size = hui->hui_size;
2166 attr->ia_mtime.tv_sec = hui->hui_mtime;
2167 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2168 attr->ia_atime.tv_sec = hui->hui_atime;
2169 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2171 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2172 ATTR_UID | ATTR_GID |
2173 ATTR_MTIME | ATTR_MTIME_SET |
2174 ATTR_ATIME | ATTR_ATIME_SET;
2176 mutex_lock(&inode->i_mutex);
2178 rc = ll_setattr_raw(file->f_dentry, attr, true);
2182 mutex_unlock(&inode->i_mutex);
2194 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2196 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2197 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2201 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2203 struct inode *inode = file->f_dentry->d_inode;
2204 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2208 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2209 PFID(ll_inode2fid(inode)), inode, cmd);
2210 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2212 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2213 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2217 case LL_IOC_GETFLAGS:
2218 /* Get the current value of the file flags */
2219 return put_user(fd->fd_flags, (int __user *)arg);
2220 case LL_IOC_SETFLAGS:
2221 case LL_IOC_CLRFLAGS:
2222 /* Set or clear specific file flags */
2223 /* XXX This probably needs checks to ensure the flags are
2224 * not abused, and to handle any flag side effects.
2226 if (get_user(flags, (int __user *) arg))
2229 if (cmd == LL_IOC_SETFLAGS) {
2230 if ((flags & LL_FILE_IGNORE_LOCK) &&
2231 !(file->f_flags & O_DIRECT)) {
2232 CERROR("%s: unable to disable locking on "
2233 "non-O_DIRECT file\n", current->comm);
2237 fd->fd_flags |= flags;
2239 fd->fd_flags &= ~flags;
2242 case LL_IOC_LOV_SETSTRIPE:
2243 RETURN(ll_lov_setstripe(inode, file, arg));
2244 case LL_IOC_LOV_SETEA:
2245 RETURN(ll_lov_setea(inode, file, arg));
2246 case LL_IOC_LOV_SWAP_LAYOUTS: {
2248 struct lustre_swap_layouts lsl;
2250 if (copy_from_user(&lsl, (char __user *)arg,
2251 sizeof(struct lustre_swap_layouts)))
2254 if ((file->f_flags & O_ACCMODE) == O_RDONLY)
2257 file2 = fget(lsl.sl_fd);
2261 /* O_WRONLY or O_RDWR */
2262 if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
2263 GOTO(out, rc = -EPERM);
2265 if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
2266 struct inode *inode2;
2267 struct ll_inode_info *lli;
2268 struct obd_client_handle *och = NULL;
2270 if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
2271 GOTO(out, rc = -EINVAL);
2273 lli = ll_i2info(inode);
2274 mutex_lock(&lli->lli_och_mutex);
2275 if (fd->fd_lease_och != NULL) {
2276 och = fd->fd_lease_och;
2277 fd->fd_lease_och = NULL;
2279 mutex_unlock(&lli->lli_och_mutex);
2281 GOTO(out, rc = -ENOLCK);
2282 inode2 = file2->f_dentry->d_inode;
2283 rc = ll_swap_layouts_close(och, inode, inode2);
2285 rc = ll_swap_layouts(file, file2, &lsl);
2291 case LL_IOC_LOV_GETSTRIPE:
2292 RETURN(ll_file_getstripe(inode,
2293 (struct lov_user_md __user *)arg));
2294 case FSFILT_IOC_FIEMAP:
2295 RETURN(ll_ioctl_fiemap(inode, (struct fiemap __user *)arg));
2296 case FSFILT_IOC_GETFLAGS:
2297 case FSFILT_IOC_SETFLAGS:
2298 RETURN(ll_iocontrol(inode, file, cmd, arg));
2299 case FSFILT_IOC_GETVERSION_OLD:
2300 case FSFILT_IOC_GETVERSION:
2301 RETURN(put_user(inode->i_generation, (int __user *)arg));
2302 case LL_IOC_GROUP_LOCK:
2303 RETURN(ll_get_grouplock(inode, file, arg));
2304 case LL_IOC_GROUP_UNLOCK:
2305 RETURN(ll_put_grouplock(inode, file, arg));
2306 case IOC_OBD_STATFS:
2307 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2309 /* We need to special case any other ioctls we want to handle,
2310 * to send them to the MDS/OST as appropriate and to properly
2311 * network encode the arg field.
2312 case FSFILT_IOC_SETVERSION_OLD:
2313 case FSFILT_IOC_SETVERSION:
2315 case LL_IOC_FLUSHCTX:
2316 RETURN(ll_flush_ctx(inode));
2317 case LL_IOC_PATH2FID: {
2318 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2319 sizeof(struct lu_fid)))
2324 case LL_IOC_GETPARENT:
2325 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2327 case OBD_IOC_FID2PATH:
2328 RETURN(ll_fid2path(inode, (void __user *)arg));
2329 case LL_IOC_DATA_VERSION: {
2330 struct ioc_data_version idv;
2333 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2336 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2337 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2340 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2346 case LL_IOC_GET_MDTIDX: {
2349 mdtidx = ll_get_mdt_idx(inode);
2353 if (put_user((int)mdtidx, (int __user *)arg))
2358 case OBD_IOC_GETDTNAME:
2359 case OBD_IOC_GETMDNAME:
2360 RETURN(ll_get_obd_name(inode, cmd, arg));
2361 case LL_IOC_HSM_STATE_GET: {
2362 struct md_op_data *op_data;
2363 struct hsm_user_state *hus;
2370 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2371 LUSTRE_OPC_ANY, hus);
2372 if (IS_ERR(op_data)) {
2374 RETURN(PTR_ERR(op_data));
2377 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2380 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2383 ll_finish_md_op_data(op_data);
2387 case LL_IOC_HSM_STATE_SET: {
2388 struct hsm_state_set *hss;
2395 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2400 rc = ll_hsm_state_set(inode, hss);
2405 case LL_IOC_HSM_ACTION: {
2406 struct md_op_data *op_data;
2407 struct hsm_current_action *hca;
2414 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2415 LUSTRE_OPC_ANY, hca);
2416 if (IS_ERR(op_data)) {
2418 RETURN(PTR_ERR(op_data));
2421 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2424 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2427 ll_finish_md_op_data(op_data);
2431 case LL_IOC_SET_LEASE: {
2432 struct ll_inode_info *lli = ll_i2info(inode);
2433 struct obd_client_handle *och = NULL;
2438 case LL_LEASE_WRLCK:
2439 if (!(file->f_mode & FMODE_WRITE))
2441 fmode = FMODE_WRITE;
2443 case LL_LEASE_RDLCK:
2444 if (!(file->f_mode & FMODE_READ))
2448 case LL_LEASE_UNLCK:
2449 mutex_lock(&lli->lli_och_mutex);
2450 if (fd->fd_lease_och != NULL) {
2451 och = fd->fd_lease_och;
2452 fd->fd_lease_och = NULL;
2454 mutex_unlock(&lli->lli_och_mutex);
2459 fmode = och->och_flags;
2460 rc = ll_lease_close(och, inode, &lease_broken);
2467 RETURN(ll_lease_type_from_fmode(fmode));
2472 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2474 /* apply for lease */
2475 och = ll_lease_open(inode, file, fmode, 0);
2477 RETURN(PTR_ERR(och));
2480 mutex_lock(&lli->lli_och_mutex);
2481 if (fd->fd_lease_och == NULL) {
2482 fd->fd_lease_och = och;
2485 mutex_unlock(&lli->lli_och_mutex);
2487 /* impossible now that only excl is supported for now */
2488 ll_lease_close(och, inode, &lease_broken);
2493 case LL_IOC_GET_LEASE: {
2494 struct ll_inode_info *lli = ll_i2info(inode);
2495 struct ldlm_lock *lock = NULL;
2498 mutex_lock(&lli->lli_och_mutex);
2499 if (fd->fd_lease_och != NULL) {
2500 struct obd_client_handle *och = fd->fd_lease_och;
2502 lock = ldlm_handle2lock(&och->och_lease_handle);
2504 lock_res_and_lock(lock);
2505 if (!ldlm_is_cancel(lock))
2506 fmode = och->och_flags;
2508 unlock_res_and_lock(lock);
2509 LDLM_LOCK_PUT(lock);
2512 mutex_unlock(&lli->lli_och_mutex);
2514 RETURN(ll_lease_type_from_fmode(fmode));
2516 case LL_IOC_HSM_IMPORT: {
2517 struct hsm_user_import *hui;
2523 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2528 rc = ll_hsm_import(inode, file, hui);
2538 ll_iocontrol_call(inode, file, cmd, arg, &err))
2541 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2542 (void __user *)arg));
2547 #ifndef HAVE_FILE_LLSEEK_SIZE
2548 static inline loff_t
2549 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2551 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2553 if (offset > maxsize)
2556 if (offset != file->f_pos) {
2557 file->f_pos = offset;
2558 file->f_version = 0;
2564 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2565 loff_t maxsize, loff_t eof)
2567 struct inode *inode = file->f_dentry->d_inode;
2575 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2576 * position-querying operation. Avoid rewriting the "same"
2577 * f_pos value back to the file because a concurrent read(),
2578 * write() or lseek() might have altered it
2583 * f_lock protects against read/modify/write race with other
2584 * SEEK_CURs. Note that parallel writes and reads behave
2587 mutex_lock(&inode->i_mutex);
2588 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2589 mutex_unlock(&inode->i_mutex);
2593 * In the generic case the entire file is data, so as long as
2594 * offset isn't at the end of the file then the offset is data.
2601 * There is a virtual hole at the end of the file, so as long as
2602 * offset isn't i_size or larger, return i_size.
2610 return llseek_execute(file, offset, maxsize);
2614 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2616 struct inode *inode = file->f_dentry->d_inode;
2617 loff_t retval, eof = 0;
2620 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2621 (origin == SEEK_CUR) ? file->f_pos : 0);
2622 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2623 PFID(ll_inode2fid(inode)), inode, retval, retval,
2625 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2627 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2628 retval = ll_glimpse_size(inode);
2631 eof = i_size_read(inode);
2634 retval = ll_generic_file_llseek_size(file, offset, origin,
2635 ll_file_maxbytes(inode), eof);
2639 static int ll_flush(struct file *file, fl_owner_t id)
2641 struct inode *inode = file->f_dentry->d_inode;
2642 struct ll_inode_info *lli = ll_i2info(inode);
2643 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2646 LASSERT(!S_ISDIR(inode->i_mode));
2648 /* catch async errors that were recorded back when async writeback
2649 * failed for pages in this mapping. */
2650 rc = lli->lli_async_rc;
2651 lli->lli_async_rc = 0;
2652 if (lli->lli_clob != NULL) {
2653 err = lov_read_and_clear_async_rc(lli->lli_clob);
2658 /* The application has been told write failure already.
2659 * Do not report failure again. */
2660 if (fd->fd_write_failed)
2662 return rc ? -EIO : 0;
2666 * Called to make sure a portion of file has been written out.
2667 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2669 * Return how many pages have been written.
2671 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2672 enum cl_fsync_mode mode, int ignore_layout)
2674 struct cl_env_nest nest;
2677 struct obd_capa *capa = NULL;
2678 struct cl_fsync_io *fio;
2682 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2683 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2686 env = cl_env_nested_get(&nest);
2688 RETURN(PTR_ERR(env));
2690 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2692 io = vvp_env_thread_io(env);
2693 io->ci_obj = ll_i2info(inode)->lli_clob;
2694 io->ci_ignore_layout = ignore_layout;
2696 /* initialize parameters for sync */
2697 fio = &io->u.ci_fsync;
2698 fio->fi_capa = capa;
2699 fio->fi_start = start;
2701 fio->fi_fid = ll_inode2fid(inode);
2702 fio->fi_mode = mode;
2703 fio->fi_nr_written = 0;
2705 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2706 result = cl_io_loop(env, io);
2708 result = io->ci_result;
2710 result = fio->fi_nr_written;
2711 cl_io_fini(env, io);
2712 cl_env_nested_put(&nest, env);
2720 * When dentry is provided (the 'else' case), *file->f_dentry may be
2721 * null and dentry must be used directly rather than pulled from
2722 * *file->f_dentry as is done otherwise.
2725 #ifdef HAVE_FILE_FSYNC_4ARGS
2726 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2728 struct dentry *dentry = file->f_dentry;
2729 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2730 int ll_fsync(struct file *file, int datasync)
2732 struct dentry *dentry = file->f_dentry;
2734 loff_t end = LLONG_MAX;
2736 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2739 loff_t end = LLONG_MAX;
2741 struct inode *inode = dentry->d_inode;
2742 struct ll_inode_info *lli = ll_i2info(inode);
2743 struct ptlrpc_request *req;
2744 struct obd_capa *oc;
2748 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2749 PFID(ll_inode2fid(inode)), inode);
2750 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2752 #ifdef HAVE_FILE_FSYNC_4ARGS
2753 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2754 mutex_lock(&inode->i_mutex);
2756 /* fsync's caller has already called _fdata{sync,write}, we want
2757 * that IO to finish before calling the osc and mdc sync methods */
2758 rc = filemap_fdatawait(inode->i_mapping);
2761 /* catch async errors that were recorded back when async writeback
2762 * failed for pages in this mapping. */
2763 if (!S_ISDIR(inode->i_mode)) {
2764 err = lli->lli_async_rc;
2765 lli->lli_async_rc = 0;
2768 err = lov_read_and_clear_async_rc(lli->lli_clob);
2773 oc = ll_mdscapa_get(inode);
2774 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2780 ptlrpc_req_finished(req);
2782 if (S_ISREG(inode->i_mode)) {
2783 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2785 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2786 if (rc == 0 && err < 0)
2789 fd->fd_write_failed = true;
2791 fd->fd_write_failed = false;
2794 #ifdef HAVE_FILE_FSYNC_4ARGS
2795 mutex_unlock(&inode->i_mutex);
2801 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2803 struct inode *inode = file->f_dentry->d_inode;
2804 struct ll_sb_info *sbi = ll_i2sbi(inode);
2805 struct ldlm_enqueue_info einfo = {
2806 .ei_type = LDLM_FLOCK,
2807 .ei_cb_cp = ldlm_flock_completion_ast,
2808 .ei_cbdata = file_lock,
2810 struct md_op_data *op_data;
2811 struct lustre_handle lockh = {0};
2812 ldlm_policy_data_t flock = {{0}};
2813 int fl_type = file_lock->fl_type;
2819 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2820 PFID(ll_inode2fid(inode)), file_lock);
2822 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2824 if (file_lock->fl_flags & FL_FLOCK) {
2825 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2826 /* flocks are whole-file locks */
2827 flock.l_flock.end = OFFSET_MAX;
2828 /* For flocks owner is determined by the local file desctiptor*/
2829 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2830 } else if (file_lock->fl_flags & FL_POSIX) {
2831 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2832 flock.l_flock.start = file_lock->fl_start;
2833 flock.l_flock.end = file_lock->fl_end;
2837 flock.l_flock.pid = file_lock->fl_pid;
2839 /* Somewhat ugly workaround for svc lockd.
2840 * lockd installs custom fl_lmops->lm_compare_owner that checks
2841 * for the fl_owner to be the same (which it always is on local node
2842 * I guess between lockd processes) and then compares pid.
2843 * As such we assign pid to the owner field to make it all work,
2844 * conflict with normal locks is unlikely since pid space and
2845 * pointer space for current->files are not intersecting */
2846 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2847 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2851 einfo.ei_mode = LCK_PR;
2854 /* An unlock request may or may not have any relation to
2855 * existing locks so we may not be able to pass a lock handle
2856 * via a normal ldlm_lock_cancel() request. The request may even
2857 * unlock a byte range in the middle of an existing lock. In
2858 * order to process an unlock request we need all of the same
2859 * information that is given with a normal read or write record
2860 * lock request. To avoid creating another ldlm unlock (cancel)
2861 * message we'll treat a LCK_NL flock request as an unlock. */
2862 einfo.ei_mode = LCK_NL;
2865 einfo.ei_mode = LCK_PW;
2868 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2883 flags = LDLM_FL_BLOCK_NOWAIT;
2889 flags = LDLM_FL_TEST_LOCK;
2892 CERROR("unknown fcntl lock command: %d\n", cmd);
2896 /* Save the old mode so that if the mode in the lock changes we
2897 * can decrement the appropriate reader or writer refcount. */
2898 file_lock->fl_type = einfo.ei_mode;
2900 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2901 LUSTRE_OPC_ANY, NULL);
2902 if (IS_ERR(op_data))
2903 RETURN(PTR_ERR(op_data));
2905 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2906 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2907 flock.l_flock.pid, flags, einfo.ei_mode,
2908 flock.l_flock.start, flock.l_flock.end);
2910 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2913 /* Restore the file lock type if not TEST lock. */
2914 if (!(flags & LDLM_FL_TEST_LOCK))
2915 file_lock->fl_type = fl_type;
2917 if ((file_lock->fl_flags & FL_FLOCK) &&
2918 (rc == 0 || file_lock->fl_type == F_UNLCK))
2919 rc2 = flock_lock_file_wait(file, file_lock);
2920 if ((file_lock->fl_flags & FL_POSIX) &&
2921 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2922 !(flags & LDLM_FL_TEST_LOCK))
2923 rc2 = posix_lock_file_wait(file, file_lock);
2925 if (rc2 && file_lock->fl_type != F_UNLCK) {
2926 einfo.ei_mode = LCK_NL;
2927 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2932 ll_finish_md_op_data(op_data);
2937 int ll_get_fid_by_name(struct inode *parent, const char *name,
2938 int namelen, struct lu_fid *fid)
2940 struct md_op_data *op_data = NULL;
2941 struct mdt_body *body;
2942 struct ptlrpc_request *req;
2946 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2947 LUSTRE_OPC_ANY, NULL);
2948 if (IS_ERR(op_data))
2949 RETURN(PTR_ERR(op_data));
2951 op_data->op_valid = OBD_MD_FLID;
2952 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2953 ll_finish_md_op_data(op_data);
2957 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2959 GOTO(out_req, rc = -EFAULT);
2961 *fid = body->mbo_fid1;
2963 ptlrpc_req_finished(req);
2967 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2968 const char *name, int namelen)
2970 struct dentry *dchild = NULL;
2971 struct inode *child_inode = NULL;
2972 struct md_op_data *op_data;
2973 struct ptlrpc_request *request = NULL;
2978 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2979 name, PFID(ll_inode2fid(parent)), mdtidx);
2981 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2982 0, LUSTRE_OPC_ANY, NULL);
2983 if (IS_ERR(op_data))
2984 RETURN(PTR_ERR(op_data));
2986 /* Get child FID first */
2987 qstr.hash = full_name_hash(name, namelen);
2990 dchild = d_lookup(file->f_dentry, &qstr);
2991 if (dchild != NULL) {
2992 if (dchild->d_inode != NULL) {
2993 child_inode = igrab(dchild->d_inode);
2994 if (child_inode != NULL) {
2995 mutex_lock(&child_inode->i_mutex);
2996 op_data->op_fid3 = *ll_inode2fid(child_inode);
2997 ll_invalidate_aliases(child_inode);
3002 rc = ll_get_fid_by_name(parent, name, namelen,
3008 if (!fid_is_sane(&op_data->op_fid3)) {
3009 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
3010 ll_get_fsname(parent->i_sb, NULL, 0), name,
3011 PFID(&op_data->op_fid3));
3012 GOTO(out_free, rc = -EINVAL);
3015 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
3020 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
3021 PFID(&op_data->op_fid3), mdtidx);
3022 GOTO(out_free, rc = 0);
3025 op_data->op_mds = mdtidx;
3026 op_data->op_cli_flags = CLI_MIGRATE;
3027 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
3028 namelen, name, namelen, &request);
3030 ll_update_times(request, parent);
3032 ptlrpc_req_finished(request);
3037 if (child_inode != NULL) {
3038 clear_nlink(child_inode);
3039 mutex_unlock(&child_inode->i_mutex);
3043 ll_finish_md_op_data(op_data);
3048 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3056 * test if some locks matching bits and l_req_mode are acquired
3057 * - bits can be in different locks
3058 * - if found clear the common lock bits in *bits
3059 * - the bits not found, are kept in *bits
3061 * \param bits [IN] searched lock bits [IN]
3062 * \param l_req_mode [IN] searched lock mode
3063 * \retval boolean, true iff all bits are found
3065 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3067 struct lustre_handle lockh;
3068 ldlm_policy_data_t policy;
3069 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3070 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3079 fid = &ll_i2info(inode)->lli_fid;
3080 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3081 ldlm_lockname[mode]);
3083 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3084 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3085 policy.l_inodebits.bits = *bits & (1 << i);
3086 if (policy.l_inodebits.bits == 0)
3089 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3090 &policy, mode, &lockh)) {
3091 struct ldlm_lock *lock;
3093 lock = ldlm_handle2lock(&lockh);
3096 ~(lock->l_policy_data.l_inodebits.bits);
3097 LDLM_LOCK_PUT(lock);
3099 *bits &= ~policy.l_inodebits.bits;
3106 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3107 struct lustre_handle *lockh, __u64 flags,
3110 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3115 fid = &ll_i2info(inode)->lli_fid;
3116 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3118 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3119 fid, LDLM_IBITS, &policy, mode, lockh);
3124 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3126 /* Already unlinked. Just update nlink and return success */
3127 if (rc == -ENOENT) {
3129 /* This path cannot be hit for regular files unless in
3130 * case of obscure races, so no need to to validate
3132 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3134 } else if (rc != 0) {
3135 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3136 "%s: revalidate FID "DFID" error: rc = %d\n",
3137 ll_get_fsname(inode->i_sb, NULL, 0),
3138 PFID(ll_inode2fid(inode)), rc);
3144 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3146 struct inode *inode = dentry->d_inode;
3147 struct ptlrpc_request *req = NULL;
3148 struct obd_export *exp;
3152 LASSERT(inode != NULL);
3154 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3155 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3157 exp = ll_i2mdexp(inode);
3159 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3160 * But under CMD case, it caused some lock issues, should be fixed
3161 * with new CMD ibits lock. See bug 12718 */
3162 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3163 struct lookup_intent oit = { .it_op = IT_GETATTR };
3164 struct md_op_data *op_data;
3166 if (ibits == MDS_INODELOCK_LOOKUP)
3167 oit.it_op = IT_LOOKUP;
3169 /* Call getattr by fid, so do not provide name at all. */
3170 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3171 dentry->d_inode, NULL, 0, 0,
3172 LUSTRE_OPC_ANY, NULL);
3173 if (IS_ERR(op_data))
3174 RETURN(PTR_ERR(op_data));
3176 rc = md_intent_lock(exp, op_data, &oit, &req,
3177 &ll_md_blocking_ast, 0);
3178 ll_finish_md_op_data(op_data);
3180 rc = ll_inode_revalidate_fini(inode, rc);
3184 rc = ll_revalidate_it_finish(req, &oit, dentry);
3186 ll_intent_release(&oit);
3190 /* Unlinked? Unhash dentry, so it is not picked up later by
3191 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3192 here to preserve get_cwd functionality on 2.6.
3194 if (!dentry->d_inode->i_nlink)
3195 d_lustre_invalidate(dentry, 0);
3197 ll_lookup_finish_locks(&oit, dentry);
3198 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3199 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3200 u64 valid = OBD_MD_FLGETATTR;
3201 struct md_op_data *op_data;
3204 if (S_ISREG(inode->i_mode)) {
3205 rc = ll_get_default_mdsize(sbi, &ealen);
3208 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3211 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3212 0, ealen, LUSTRE_OPC_ANY,
3214 if (IS_ERR(op_data))
3215 RETURN(PTR_ERR(op_data));
3217 op_data->op_valid = valid;
3218 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3219 * capa for this inode. Because we only keep capas of dirs
3221 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3222 ll_finish_md_op_data(op_data);
3224 rc = ll_inode_revalidate_fini(inode, rc);
3228 rc = ll_prep_inode(&inode, req, NULL, NULL);
3231 ptlrpc_req_finished(req);
3235 static int ll_merge_md_attr(struct inode *inode)
3237 struct cl_attr attr = { 0 };
3240 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3241 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3242 &attr, ll_md_blocking_ast);
3246 set_nlink(inode, attr.cat_nlink);
3247 inode->i_blocks = attr.cat_blocks;
3248 i_size_write(inode, attr.cat_size);
3250 ll_i2info(inode)->lli_atime = attr.cat_atime;
3251 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3252 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3258 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3260 struct inode *inode = dentry->d_inode;
3264 rc = __ll_inode_revalidate(dentry, ibits);
3268 /* if object isn't regular file, don't validate size */
3269 if (!S_ISREG(inode->i_mode)) {
3270 if (S_ISDIR(inode->i_mode) &&
3271 ll_i2info(inode)->lli_lsm_md != NULL) {
3272 rc = ll_merge_md_attr(inode);
3277 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3278 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3279 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3281 /* In case of restore, the MDT has the right size and has
3282 * already send it back without granting the layout lock,
3283 * inode is up-to-date so glimpse is useless.
3284 * Also to glimpse we need the layout, in case of a running
3285 * restore the MDT holds the layout lock so the glimpse will
3286 * block up to the end of restore (getattr will block)
3288 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3289 rc = ll_glimpse_size(inode);
3294 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3296 struct inode *inode = de->d_inode;
3297 struct ll_sb_info *sbi = ll_i2sbi(inode);
3298 struct ll_inode_info *lli = ll_i2info(inode);
3301 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3302 MDS_INODELOCK_LOOKUP);
3303 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3308 stat->dev = inode->i_sb->s_dev;
3309 if (ll_need_32bit_api(sbi))
3310 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3312 stat->ino = inode->i_ino;
3313 stat->mode = inode->i_mode;
3314 stat->uid = inode->i_uid;
3315 stat->gid = inode->i_gid;
3316 stat->rdev = inode->i_rdev;
3317 stat->atime = inode->i_atime;
3318 stat->mtime = inode->i_mtime;
3319 stat->ctime = inode->i_ctime;
3320 stat->blksize = 1 << inode->i_blkbits;
3322 stat->nlink = inode->i_nlink;
3323 stat->size = i_size_read(inode);
3324 stat->blocks = inode->i_blocks;
3329 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3330 __u64 start, __u64 len)
3334 struct ll_user_fiemap *fiemap;
3335 unsigned int extent_count = fieinfo->fi_extents_max;
3337 num_bytes = sizeof(*fiemap) + (extent_count *
3338 sizeof(struct ll_fiemap_extent));
3339 OBD_ALLOC_LARGE(fiemap, num_bytes);
3344 fiemap->fm_flags = fieinfo->fi_flags;
3345 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3346 fiemap->fm_start = start;
3347 fiemap->fm_length = len;
3348 if (extent_count > 0)
3349 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3350 sizeof(struct ll_fiemap_extent));
3352 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3354 fieinfo->fi_flags = fiemap->fm_flags;
3355 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3356 if (extent_count > 0)
3357 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3358 fiemap->fm_mapped_extents *
3359 sizeof(struct ll_fiemap_extent));
3361 OBD_FREE_LARGE(fiemap, num_bytes);
3365 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3367 struct ll_inode_info *lli = ll_i2info(inode);
3368 struct posix_acl *acl = NULL;
3371 spin_lock(&lli->lli_lock);
3372 /* VFS' acl_permission_check->check_acl will release the refcount */
3373 acl = posix_acl_dup(lli->lli_posix_acl);
3374 spin_unlock(&lli->lli_lock);
3379 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3381 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3382 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3384 ll_check_acl(struct inode *inode, int mask)
3387 # ifdef CONFIG_FS_POSIX_ACL
3388 struct posix_acl *acl;
3392 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3393 if (flags & IPERM_FLAG_RCU)
3396 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3401 rc = posix_acl_permission(inode, acl, mask);
3402 posix_acl_release(acl);
3405 # else /* !CONFIG_FS_POSIX_ACL */
3407 # endif /* CONFIG_FS_POSIX_ACL */
3409 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3411 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3412 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3414 # ifdef HAVE_INODE_PERMISION_2ARGS
3415 int ll_inode_permission(struct inode *inode, int mask)
3417 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3422 struct ll_sb_info *sbi;
3423 struct root_squash_info *squash;
3424 struct cred *cred = NULL;
3425 const struct cred *old_cred = NULL;
3427 bool squash_id = false;
3430 #ifdef MAY_NOT_BLOCK
3431 if (mask & MAY_NOT_BLOCK)
3433 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3434 if (flags & IPERM_FLAG_RCU)
3438 /* as root inode are NOT getting validated in lookup operation,
3439 * need to do it before permission check. */
3441 if (inode == inode->i_sb->s_root->d_inode) {
3442 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3443 MDS_INODELOCK_LOOKUP);
3448 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3449 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3451 /* squash fsuid/fsgid if needed */
3452 sbi = ll_i2sbi(inode);
3453 squash = &sbi->ll_squash;
3454 if (unlikely(squash->rsi_uid != 0 &&
3455 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3456 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3460 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3461 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3462 squash->rsi_uid, squash->rsi_gid);
3464 /* update current process's credentials
3465 * and FS capability */
3466 cred = prepare_creds();
3470 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3471 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3472 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3473 if ((1 << cap) & CFS_CAP_FS_MASK)
3474 cap_lower(cred->cap_effective, cap);
3476 old_cred = override_creds(cred);
3479 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3481 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3482 rc = lustre_check_remote_perm(inode, mask);
3484 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3486 /* restore current process's credentials and FS capability */
3488 revert_creds(old_cred);
3495 /* -o localflock - only provides locally consistent flock locks */
3496 struct file_operations ll_file_operations = {
3497 .read = ll_file_read,
3498 .aio_read = ll_file_aio_read,
3499 .write = ll_file_write,
3500 .aio_write = ll_file_aio_write,
3501 .unlocked_ioctl = ll_file_ioctl,
3502 .open = ll_file_open,
3503 .release = ll_file_release,
3504 .mmap = ll_file_mmap,
3505 .llseek = ll_file_seek,
3506 .splice_read = ll_file_splice_read,
3511 struct file_operations ll_file_operations_flock = {
3512 .read = ll_file_read,
3513 .aio_read = ll_file_aio_read,
3514 .write = ll_file_write,
3515 .aio_write = ll_file_aio_write,
3516 .unlocked_ioctl = ll_file_ioctl,
3517 .open = ll_file_open,
3518 .release = ll_file_release,
3519 .mmap = ll_file_mmap,
3520 .llseek = ll_file_seek,
3521 .splice_read = ll_file_splice_read,
3524 .flock = ll_file_flock,
3525 .lock = ll_file_flock
3528 /* These are for -o noflock - to return ENOSYS on flock calls */
3529 struct file_operations ll_file_operations_noflock = {
3530 .read = ll_file_read,
3531 .aio_read = ll_file_aio_read,
3532 .write = ll_file_write,
3533 .aio_write = ll_file_aio_write,
3534 .unlocked_ioctl = ll_file_ioctl,
3535 .open = ll_file_open,
3536 .release = ll_file_release,
3537 .mmap = ll_file_mmap,
3538 .llseek = ll_file_seek,
3539 .splice_read = ll_file_splice_read,
3542 .flock = ll_file_noflock,
3543 .lock = ll_file_noflock
3546 struct inode_operations ll_file_inode_operations = {
3547 .setattr = ll_setattr,
3548 .getattr = ll_getattr,
3549 .permission = ll_inode_permission,
3550 .setxattr = ll_setxattr,
3551 .getxattr = ll_getxattr,
3552 .listxattr = ll_listxattr,
3553 .removexattr = ll_removexattr,
3554 .fiemap = ll_fiemap,
3555 #ifdef HAVE_IOP_GET_ACL
3556 .get_acl = ll_get_acl,
3560 /* dynamic ioctl number support routins */
3561 static struct llioc_ctl_data {
3562 struct rw_semaphore ioc_sem;
3563 struct list_head ioc_head;
3565 __RWSEM_INITIALIZER(llioc.ioc_sem),
3566 LIST_HEAD_INIT(llioc.ioc_head)
3571 struct list_head iocd_list;
3572 unsigned int iocd_size;
3573 llioc_callback_t iocd_cb;
3574 unsigned int iocd_count;
3575 unsigned int iocd_cmd[0];
3578 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3581 struct llioc_data *in_data = NULL;
3584 if (cb == NULL || cmd == NULL ||
3585 count > LLIOC_MAX_CMD || count < 0)
3588 size = sizeof(*in_data) + count * sizeof(unsigned int);
3589 OBD_ALLOC(in_data, size);
3590 if (in_data == NULL)
3593 memset(in_data, 0, sizeof(*in_data));
3594 in_data->iocd_size = size;
3595 in_data->iocd_cb = cb;
3596 in_data->iocd_count = count;
3597 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3599 down_write(&llioc.ioc_sem);
3600 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3601 up_write(&llioc.ioc_sem);
3606 void ll_iocontrol_unregister(void *magic)
3608 struct llioc_data *tmp;
3613 down_write(&llioc.ioc_sem);
3614 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3616 unsigned int size = tmp->iocd_size;
3618 list_del(&tmp->iocd_list);
3619 up_write(&llioc.ioc_sem);
3621 OBD_FREE(tmp, size);
3625 up_write(&llioc.ioc_sem);
3627 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3630 EXPORT_SYMBOL(ll_iocontrol_register);
3631 EXPORT_SYMBOL(ll_iocontrol_unregister);
3633 static enum llioc_iter
3634 ll_iocontrol_call(struct inode *inode, struct file *file,
3635 unsigned int cmd, unsigned long arg, int *rcp)
3637 enum llioc_iter ret = LLIOC_CONT;
3638 struct llioc_data *data;
3639 int rc = -EINVAL, i;
3641 down_read(&llioc.ioc_sem);
3642 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3643 for (i = 0; i < data->iocd_count; i++) {
3644 if (cmd != data->iocd_cmd[i])
3647 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3651 if (ret == LLIOC_STOP)
3654 up_read(&llioc.ioc_sem);
3661 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3663 struct ll_inode_info *lli = ll_i2info(inode);
3664 struct cl_object *obj = lli->lli_clob;
3665 struct cl_env_nest nest;
3673 env = cl_env_nested_get(&nest);
3675 RETURN(PTR_ERR(env));
3677 rc = cl_conf_set(env, lli->lli_clob, conf);
3681 if (conf->coc_opc == OBJECT_CONF_SET) {
3682 struct ldlm_lock *lock = conf->coc_lock;
3683 struct cl_layout cl = {
3687 LASSERT(lock != NULL);
3688 LASSERT(ldlm_has_layout(lock));
3690 /* it can only be allowed to match after layout is
3691 * applied to inode otherwise false layout would be
3692 * seen. Applying layout shoud happen before dropping
3693 * the intent lock. */
3694 ldlm_lock_allow_match(lock);
3696 rc = cl_object_layout_get(env, obj, &cl);
3701 DFID": layout version change: %u -> %u\n",
3702 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3704 ll_layout_version_set(lli, cl.cl_layout_gen);
3708 cl_env_nested_put(&nest, env);
3713 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3714 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3717 struct ll_sb_info *sbi = ll_i2sbi(inode);
3718 struct obd_capa *oc;
3719 struct ptlrpc_request *req;
3720 struct mdt_body *body;
3727 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3728 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3729 lock->l_lvb_data, lock->l_lvb_len);
3731 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3734 /* if layout lock was granted right away, the layout is returned
3735 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3736 * blocked and then granted via completion ast, we have to fetch
3737 * layout here. Please note that we can't use the LVB buffer in
3738 * completion AST because it doesn't have a large enough buffer */
3739 oc = ll_mdscapa_get(inode);
3740 rc = ll_get_default_mdsize(sbi, &lmmsize);
3742 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3743 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3749 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3751 GOTO(out, rc = -EPROTO);
3753 lmmsize = body->mbo_eadatasize;
3754 if (lmmsize == 0) /* empty layout */
3757 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3759 GOTO(out, rc = -EFAULT);
3761 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3762 if (lvbdata == NULL)
3763 GOTO(out, rc = -ENOMEM);
3765 memcpy(lvbdata, lmm, lmmsize);
3766 lock_res_and_lock(lock);
3767 if (lock->l_lvb_data != NULL)
3768 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3770 lock->l_lvb_data = lvbdata;
3771 lock->l_lvb_len = lmmsize;
3772 unlock_res_and_lock(lock);
3777 ptlrpc_req_finished(req);
3782 * Apply the layout to the inode. Layout lock is held and will be released
3785 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3786 struct inode *inode)
3788 struct ll_inode_info *lli = ll_i2info(inode);
3789 struct ll_sb_info *sbi = ll_i2sbi(inode);
3790 struct ldlm_lock *lock;
3791 struct lustre_md md = { NULL };
3792 struct cl_object_conf conf;
3795 bool wait_layout = false;
3798 LASSERT(lustre_handle_is_used(lockh));
3800 lock = ldlm_handle2lock(lockh);
3801 LASSERT(lock != NULL);
3802 LASSERT(ldlm_has_layout(lock));
3804 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured",
3805 PFID(&lli->lli_fid), inode);
3807 /* in case this is a caching lock and reinstate with new inode */
3808 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3810 lock_res_and_lock(lock);
3811 lvb_ready = ldlm_is_lvb_ready(lock);
3812 unlock_res_and_lock(lock);
3813 /* checking lvb_ready is racy but this is okay. The worst case is
3814 * that multi processes may configure the file on the same time. */
3819 rc = ll_layout_fetch(inode, lock);
3823 /* for layout lock, lmm is returned in lock's lvb.
3824 * lvb_data is immutable if the lock is held so it's safe to access it
3825 * without res lock. See the description in ldlm_lock_decref_internal()
3826 * for the condition to free lvb_data of layout lock */
3827 if (lock->l_lvb_data != NULL) {
3828 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3829 lock->l_lvb_data, lock->l_lvb_len);
3831 CERROR("%s: file "DFID" unpackmd error: %d\n",
3832 ll_get_fsname(inode->i_sb, NULL, 0),
3833 PFID(&lli->lli_fid), rc);
3837 LASSERTF(md.lsm != NULL, "lvb_data = %p, lvb_len = %u\n",
3838 lock->l_lvb_data, lock->l_lvb_len);
3843 /* set layout to file. Unlikely this will fail as old layout was
3844 * surely eliminated */
3845 memset(&conf, 0, sizeof conf);
3846 conf.coc_opc = OBJECT_CONF_SET;
3847 conf.coc_inode = inode;
3848 conf.coc_lock = lock;
3849 conf.u.coc_md = &md;
3850 rc = ll_layout_conf(inode, &conf);
3853 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3855 /* refresh layout failed, need to wait */
3856 wait_layout = rc == -EBUSY;
3860 LDLM_LOCK_PUT(lock);
3861 ldlm_lock_decref(lockh, mode);
3863 /* wait for IO to complete if it's still being used. */
3865 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3866 ll_get_fsname(inode->i_sb, NULL, 0),
3867 PFID(&lli->lli_fid), inode);
3869 memset(&conf, 0, sizeof conf);
3870 conf.coc_opc = OBJECT_CONF_WAIT;
3871 conf.coc_inode = inode;
3872 rc = ll_layout_conf(inode, &conf);
3876 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3877 ll_get_fsname(inode->i_sb, NULL, 0),
3878 PFID(&lli->lli_fid), rc);
3883 static int ll_layout_refresh_locked(struct inode *inode)
3885 struct ll_inode_info *lli = ll_i2info(inode);
3886 struct ll_sb_info *sbi = ll_i2sbi(inode);
3887 struct md_op_data *op_data;
3888 struct lookup_intent it;
3889 struct lustre_handle lockh;
3891 struct ldlm_enqueue_info einfo = {
3892 .ei_type = LDLM_IBITS,
3894 .ei_cb_bl = &ll_md_blocking_ast,
3895 .ei_cb_cp = &ldlm_completion_ast,
3901 /* mostly layout lock is caching on the local side, so try to match
3902 * it before grabbing layout lock mutex. */
3903 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3904 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3905 if (mode != 0) { /* hit cached lock */
3906 rc = ll_layout_lock_set(&lockh, mode, inode);
3913 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3914 0, 0, LUSTRE_OPC_ANY, NULL);
3915 if (IS_ERR(op_data))
3916 RETURN(PTR_ERR(op_data));
3918 /* have to enqueue one */
3919 memset(&it, 0, sizeof(it));
3920 it.it_op = IT_LAYOUT;
3921 lockh.cookie = 0ULL;
3923 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3924 ll_get_fsname(inode->i_sb, NULL, 0),
3925 PFID(&lli->lli_fid), inode);
3927 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3928 if (it.d.lustre.it_data != NULL)
3929 ptlrpc_req_finished(it.d.lustre.it_data);
3930 it.d.lustre.it_data = NULL;
3932 ll_finish_md_op_data(op_data);
3934 mode = it.d.lustre.it_lock_mode;
3935 it.d.lustre.it_lock_mode = 0;
3936 ll_intent_drop_lock(&it);
3939 /* set lock data in case this is a new lock */
3940 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3941 rc = ll_layout_lock_set(&lockh, mode, inode);
3950 * This function checks if there exists a LAYOUT lock on the client side,
3951 * or enqueues it if it doesn't have one in cache.
3953 * This function will not hold layout lock so it may be revoked any time after
3954 * this function returns. Any operations depend on layout should be redone
3957 * This function should be called before lov_io_init() to get an uptodate
3958 * layout version, the caller should save the version number and after IO
3959 * is finished, this function should be called again to verify that layout
3960 * is not changed during IO time.
3962 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3964 struct ll_inode_info *lli = ll_i2info(inode);
3965 struct ll_sb_info *sbi = ll_i2sbi(inode);
3969 *gen = ll_layout_version_get(lli);
3970 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
3974 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3975 LASSERT(S_ISREG(inode->i_mode));
3977 /* take layout lock mutex to enqueue layout lock exclusively. */
3978 mutex_lock(&lli->lli_layout_mutex);
3980 rc = ll_layout_refresh_locked(inode);
3984 *gen = ll_layout_version_get(lli);
3986 mutex_unlock(&lli->lli_layout_mutex);
3992 * This function send a restore request to the MDT
3994 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3996 struct hsm_user_request *hur;
4000 len = sizeof(struct hsm_user_request) +
4001 sizeof(struct hsm_user_item);
4002 OBD_ALLOC(hur, len);
4006 hur->hur_request.hr_action = HUA_RESTORE;
4007 hur->hur_request.hr_archive_id = 0;
4008 hur->hur_request.hr_flags = 0;
4009 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
4010 sizeof(hur->hur_user_item[0].hui_fid));
4011 hur->hur_user_item[0].hui_extent.offset = offset;
4012 hur->hur_user_item[0].hui_extent.length = length;
4013 hur->hur_request.hr_itemcount = 1;
4014 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,