4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include <linux/user_namespace.h>
49 #ifdef HAVE_UIDGID_HEADER
50 # include <linux/uidgid.h>
52 #include <lustre/ll_fiemap.h>
53 #include <lustre_ioctl.h>
55 #include "cl_object.h"
57 #include "llite_internal.h"
58 #include "vvp_internal.h"
61 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
63 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
66 static enum llioc_iter
67 ll_iocontrol_call(struct inode *inode, struct file *file,
68 unsigned int cmd, unsigned long arg, int *rcp);
70 static struct ll_file_data *ll_file_data_get(void)
72 struct ll_file_data *fd;
74 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
78 fd->fd_write_failed = false;
83 static void ll_file_data_put(struct ll_file_data *fd)
86 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
90 * Packs all the attributes into @op_data for the CLOSE rpc.
92 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
93 struct obd_client_handle *och)
97 ll_prep_md_op_data(op_data, inode, NULL, NULL,
98 0, 0, LUSTRE_OPC_ANY, NULL);
100 op_data->op_attr.ia_mode = inode->i_mode;
101 op_data->op_attr.ia_atime = inode->i_atime;
102 op_data->op_attr.ia_mtime = inode->i_mtime;
103 op_data->op_attr.ia_ctime = inode->i_ctime;
104 op_data->op_attr.ia_size = i_size_read(inode);
105 op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
106 ATTR_MTIME | ATTR_MTIME_SET |
107 ATTR_CTIME | ATTR_CTIME_SET;
108 op_data->op_attr_blocks = inode->i_blocks;
109 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
110 op_data->op_handle = och->och_fh;
112 if (och->och_flags & FMODE_WRITE &&
113 ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED))
114 /* For HSM: if inode data has been modified, pack it so that
115 * MDT can set data dirty flag in the archive. */
116 op_data->op_bias |= MDS_DATA_MODIFIED;
122 * Perform a close, possibly with a bias.
123 * The meaning of "data" depends on the value of "bias".
125 * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
126 * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
129 static int ll_close_inode_openhandle(struct obd_export *md_exp,
130 struct obd_client_handle *och,
132 enum mds_op_bias bias,
135 struct obd_export *exp = ll_i2mdexp(inode);
136 struct md_op_data *op_data;
137 struct ptlrpc_request *req = NULL;
138 struct obd_device *obd = class_exp2obd(exp);
144 * XXX: in case of LMV, is this correct to access
147 CERROR("Invalid MDC connection handle "LPX64"\n",
148 ll_i2mdexp(inode)->exp_handle.h_cookie);
152 OBD_ALLOC_PTR(op_data);
154 /* XXX We leak openhandle and request here. */
155 GOTO(out, rc = -ENOMEM);
157 ll_prepare_close(inode, op_data, och);
159 case MDS_CLOSE_LAYOUT_SWAP:
160 LASSERT(data != NULL);
161 op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
162 op_data->op_data_version = 0;
163 op_data->op_lease_handle = och->och_lease_handle;
164 op_data->op_fid2 = *ll_inode2fid(data);
167 case MDS_HSM_RELEASE:
168 LASSERT(data != NULL);
169 op_data->op_bias |= MDS_HSM_RELEASE;
170 op_data->op_data_version = *(__u64 *)data;
171 op_data->op_lease_handle = och->och_lease_handle;
172 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
176 LASSERT(data == NULL);
180 rc = md_close(md_exp, op_data, och->och_mod, &req);
182 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
183 ll_i2mdexp(inode)->exp_obd->obd_name,
184 PFID(ll_inode2fid(inode)), rc);
188 op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
189 struct mdt_body *body;
191 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
192 if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
196 ll_finish_md_op_data(op_data);
200 md_clear_open_replay_data(md_exp, och);
201 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
204 if (req) /* This is close request */
205 ptlrpc_req_finished(req);
209 int ll_md_real_close(struct inode *inode, fmode_t fmode)
211 struct ll_inode_info *lli = ll_i2info(inode);
212 struct obd_client_handle **och_p;
213 struct obd_client_handle *och;
218 if (fmode & FMODE_WRITE) {
219 och_p = &lli->lli_mds_write_och;
220 och_usecount = &lli->lli_open_fd_write_count;
221 } else if (fmode & FMODE_EXEC) {
222 och_p = &lli->lli_mds_exec_och;
223 och_usecount = &lli->lli_open_fd_exec_count;
225 LASSERT(fmode & FMODE_READ);
226 och_p = &lli->lli_mds_read_och;
227 och_usecount = &lli->lli_open_fd_read_count;
230 mutex_lock(&lli->lli_och_mutex);
231 if (*och_usecount > 0) {
232 /* There are still users of this handle, so skip
234 mutex_unlock(&lli->lli_och_mutex);
240 mutex_unlock(&lli->lli_och_mutex);
243 /* There might be a race and this handle may already
245 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
246 och, inode, 0, NULL);
252 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
255 ldlm_policy_data_t policy = {
256 .l_inodebits = { MDS_INODELOCK_OPEN },
258 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
259 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
260 struct ll_inode_info *lli = ll_i2info(inode);
261 struct lustre_handle lockh;
266 /* clear group lock, if present */
267 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
268 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
270 if (fd->fd_lease_och != NULL) {
273 /* Usually the lease is not released when the
274 * application crashed, we need to release here. */
275 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
276 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
277 PFID(&lli->lli_fid), rc, lease_broken);
279 fd->fd_lease_och = NULL;
282 if (fd->fd_och != NULL) {
283 rc = ll_close_inode_openhandle(md_exp, fd->fd_och, inode, 0,
289 /* Let's see if we have good enough OPEN lock on the file and if
290 we can skip talking to MDS */
291 mutex_lock(&lli->lli_och_mutex);
292 if (fd->fd_omode & FMODE_WRITE) {
294 LASSERT(lli->lli_open_fd_write_count);
295 lli->lli_open_fd_write_count--;
296 } else if (fd->fd_omode & FMODE_EXEC) {
298 LASSERT(lli->lli_open_fd_exec_count);
299 lli->lli_open_fd_exec_count--;
302 LASSERT(lli->lli_open_fd_read_count);
303 lli->lli_open_fd_read_count--;
305 mutex_unlock(&lli->lli_och_mutex);
307 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
308 LDLM_IBITS, &policy, lockmode, &lockh))
309 rc = ll_md_real_close(inode, fd->fd_omode);
312 LUSTRE_FPRIVATE(file) = NULL;
313 ll_file_data_put(fd);
318 /* While this returns an error code, fput() the caller does not, so we need
319 * to make every effort to clean up all of our state here. Also, applications
320 * rarely check close errors and even if an error is returned they will not
321 * re-try the close call.
323 int ll_file_release(struct inode *inode, struct file *file)
325 struct ll_file_data *fd;
326 struct ll_sb_info *sbi = ll_i2sbi(inode);
327 struct ll_inode_info *lli = ll_i2info(inode);
331 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
332 PFID(ll_inode2fid(inode)), inode);
334 #ifdef CONFIG_FS_POSIX_ACL
335 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
336 inode == inode->i_sb->s_root->d_inode) {
337 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
340 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
341 fd->fd_flags &= ~LL_FILE_RMTACL;
342 rct_del(&sbi->ll_rct, current_pid());
343 et_search_free(&sbi->ll_et, current_pid());
348 if (inode->i_sb->s_root != file->f_path.dentry)
349 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
350 fd = LUSTRE_FPRIVATE(file);
353 /* The last ref on @file, maybe not the the owner pid of statahead,
354 * because parent and child process can share the same file handle. */
355 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
356 ll_deauthorize_statahead(inode, fd);
358 if (inode->i_sb->s_root == file->f_path.dentry) {
359 LUSTRE_FPRIVATE(file) = NULL;
360 ll_file_data_put(fd);
364 if (!S_ISDIR(inode->i_mode)) {
365 if (lli->lli_clob != NULL)
366 lov_read_and_clear_async_rc(lli->lli_clob);
367 lli->lli_async_rc = 0;
370 rc = ll_md_close(sbi->ll_md_exp, inode, file);
372 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
373 libcfs_debug_dumplog();
378 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
379 struct lookup_intent *itp)
381 struct dentry *de = file->f_path.dentry;
382 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
383 struct dentry *parent = de->d_parent;
384 const char *name = NULL;
386 struct md_op_data *op_data;
387 struct ptlrpc_request *req = NULL;
391 LASSERT(parent != NULL);
392 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
394 /* if server supports open-by-fid, or file name is invalid, don't pack
395 * name in open request */
396 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
397 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
398 name = de->d_name.name;
399 len = de->d_name.len;
402 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
403 name, len, 0, LUSTRE_OPC_ANY, NULL);
405 RETURN(PTR_ERR(op_data));
406 op_data->op_data = lmm;
407 op_data->op_data_size = lmmsize;
409 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
410 &ll_md_blocking_ast, 0);
411 ll_finish_md_op_data(op_data);
413 /* reason for keep own exit path - don`t flood log
414 * with messages with -ESTALE errors.
416 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
417 it_open_error(DISP_OPEN_OPEN, itp))
419 ll_release_openhandle(de, itp);
423 if (it_disposition(itp, DISP_LOOKUP_NEG))
424 GOTO(out, rc = -ENOENT);
426 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
427 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
428 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
432 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
433 if (!rc && itp->d.lustre.it_lock_mode)
434 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
437 ptlrpc_req_finished(req);
438 ll_intent_drop_lock(itp);
443 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
444 struct obd_client_handle *och)
446 struct ptlrpc_request *req = it->d.lustre.it_data;
447 struct mdt_body *body;
449 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
450 och->och_fh = body->mbo_handle;
451 och->och_fid = body->mbo_fid1;
452 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
453 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
454 och->och_flags = it->it_flags;
456 return md_set_open_replay_data(md_exp, och, it);
459 static int ll_local_open(struct file *file, struct lookup_intent *it,
460 struct ll_file_data *fd, struct obd_client_handle *och)
462 struct inode *inode = file->f_path.dentry->d_inode;
465 LASSERT(!LUSTRE_FPRIVATE(file));
472 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
477 LUSTRE_FPRIVATE(file) = fd;
478 ll_readahead_init(inode, &fd->fd_ras);
479 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
481 /* ll_cl_context initialize */
482 rwlock_init(&fd->fd_lock);
483 INIT_LIST_HEAD(&fd->fd_lccs);
488 /* Open a file, and (for the very first open) create objects on the OSTs at
489 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
490 * creation or open until ll_lov_setstripe() ioctl is called.
492 * If we already have the stripe MD locally then we don't request it in
493 * md_open(), by passing a lmm_size = 0.
495 * It is up to the application to ensure no other processes open this file
496 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
497 * used. We might be able to avoid races of that sort by getting lli_open_sem
498 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
499 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
501 int ll_file_open(struct inode *inode, struct file *file)
503 struct ll_inode_info *lli = ll_i2info(inode);
504 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
505 .it_flags = file->f_flags };
506 struct obd_client_handle **och_p = NULL;
507 __u64 *och_usecount = NULL;
508 struct ll_file_data *fd;
512 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
513 PFID(ll_inode2fid(inode)), inode, file->f_flags);
515 it = file->private_data; /* XXX: compat macro */
516 file->private_data = NULL; /* prevent ll_local_open assertion */
518 fd = ll_file_data_get();
520 GOTO(out_openerr, rc = -ENOMEM);
523 if (S_ISDIR(inode->i_mode))
524 ll_authorize_statahead(inode, fd);
526 if (inode->i_sb->s_root == file->f_path.dentry) {
527 LUSTRE_FPRIVATE(file) = fd;
531 if (!it || !it->d.lustre.it_disposition) {
532 /* Convert f_flags into access mode. We cannot use file->f_mode,
533 * because everything but O_ACCMODE mask was stripped from
535 if ((oit.it_flags + 1) & O_ACCMODE)
537 if (file->f_flags & O_TRUNC)
538 oit.it_flags |= FMODE_WRITE;
540 /* kernel only call f_op->open in dentry_open. filp_open calls
541 * dentry_open after call to open_namei that checks permissions.
542 * Only nfsd_open call dentry_open directly without checking
543 * permissions and because of that this code below is safe. */
544 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
545 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
547 /* We do not want O_EXCL here, presumably we opened the file
548 * already? XXX - NFS implications? */
549 oit.it_flags &= ~O_EXCL;
551 /* bug20584, if "it_flags" contains O_CREAT, the file will be
552 * created if necessary, then "IT_CREAT" should be set to keep
553 * consistent with it */
554 if (oit.it_flags & O_CREAT)
555 oit.it_op |= IT_CREAT;
561 /* Let's see if we have file open on MDS already. */
562 if (it->it_flags & FMODE_WRITE) {
563 och_p = &lli->lli_mds_write_och;
564 och_usecount = &lli->lli_open_fd_write_count;
565 } else if (it->it_flags & FMODE_EXEC) {
566 och_p = &lli->lli_mds_exec_och;
567 och_usecount = &lli->lli_open_fd_exec_count;
569 och_p = &lli->lli_mds_read_och;
570 och_usecount = &lli->lli_open_fd_read_count;
573 mutex_lock(&lli->lli_och_mutex);
574 if (*och_p) { /* Open handle is present */
575 if (it_disposition(it, DISP_OPEN_OPEN)) {
576 /* Well, there's extra open request that we do not need,
577 let's close it somehow. This will decref request. */
578 rc = it_open_error(DISP_OPEN_OPEN, it);
580 mutex_unlock(&lli->lli_och_mutex);
581 GOTO(out_openerr, rc);
584 ll_release_openhandle(file->f_path.dentry, it);
588 rc = ll_local_open(file, it, fd, NULL);
591 mutex_unlock(&lli->lli_och_mutex);
592 GOTO(out_openerr, rc);
595 LASSERT(*och_usecount == 0);
596 if (!it->d.lustre.it_disposition) {
597 /* We cannot just request lock handle now, new ELC code
598 means that one of other OPEN locks for this file
599 could be cancelled, and since blocking ast handler
600 would attempt to grab och_mutex as well, that would
601 result in a deadlock */
602 mutex_unlock(&lli->lli_och_mutex);
604 * Normally called under two situations:
606 * 2. A race/condition on MDS resulting in no open
607 * handle to be returned from LOOKUP|OPEN request,
608 * for example if the target entry was a symlink.
610 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
612 * Always specify MDS_OPEN_BY_FID because we don't want
613 * to get file with different fid.
615 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
616 rc = ll_intent_file_open(file, NULL, 0, it);
618 GOTO(out_openerr, rc);
622 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
624 GOTO(out_och_free, rc = -ENOMEM);
628 /* md_intent_lock() didn't get a request ref if there was an
629 * open error, so don't do cleanup on the request here
631 /* XXX (green): Should not we bail out on any error here, not
632 * just open error? */
633 rc = it_open_error(DISP_OPEN_OPEN, it);
635 GOTO(out_och_free, rc);
637 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
638 "inode %p: disposition %x, status %d\n", inode,
639 it_disposition(it, ~0), it->d.lustre.it_status);
641 rc = ll_local_open(file, it, fd, *och_p);
643 GOTO(out_och_free, rc);
645 mutex_unlock(&lli->lli_och_mutex);
648 /* Must do this outside lli_och_mutex lock to prevent deadlock where
649 different kind of OPEN lock for this same inode gets cancelled
650 by ldlm_cancel_lru */
651 if (!S_ISREG(inode->i_mode))
652 GOTO(out_och_free, rc);
654 cl_lov_delay_create_clear(&file->f_flags);
655 GOTO(out_och_free, rc);
659 if (och_p && *och_p) {
660 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
661 *och_p = NULL; /* OBD_FREE writes some magic there */
664 mutex_unlock(&lli->lli_och_mutex);
667 if (lli->lli_opendir_key == fd)
668 ll_deauthorize_statahead(inode, fd);
670 ll_file_data_put(fd);
672 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
675 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
676 ptlrpc_req_finished(it->d.lustre.it_data);
677 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
683 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
684 struct ldlm_lock_desc *desc, void *data, int flag)
687 struct lustre_handle lockh;
691 case LDLM_CB_BLOCKING:
692 ldlm_lock2handle(lock, &lockh);
693 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
695 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
699 case LDLM_CB_CANCELING:
707 * Acquire a lease and open the file.
709 static struct obd_client_handle *
710 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
713 struct lookup_intent it = { .it_op = IT_OPEN };
714 struct ll_sb_info *sbi = ll_i2sbi(inode);
715 struct md_op_data *op_data;
716 struct ptlrpc_request *req = NULL;
717 struct lustre_handle old_handle = { 0 };
718 struct obd_client_handle *och = NULL;
723 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
724 RETURN(ERR_PTR(-EINVAL));
727 struct ll_inode_info *lli = ll_i2info(inode);
728 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
729 struct obd_client_handle **och_p;
732 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
733 RETURN(ERR_PTR(-EPERM));
735 /* Get the openhandle of the file */
737 mutex_lock(&lli->lli_och_mutex);
738 if (fd->fd_lease_och != NULL) {
739 mutex_unlock(&lli->lli_och_mutex);
743 if (fd->fd_och == NULL) {
744 if (file->f_mode & FMODE_WRITE) {
745 LASSERT(lli->lli_mds_write_och != NULL);
746 och_p = &lli->lli_mds_write_och;
747 och_usecount = &lli->lli_open_fd_write_count;
749 LASSERT(lli->lli_mds_read_och != NULL);
750 och_p = &lli->lli_mds_read_och;
751 och_usecount = &lli->lli_open_fd_read_count;
753 if (*och_usecount == 1) {
760 mutex_unlock(&lli->lli_och_mutex);
761 if (rc < 0) /* more than 1 opener */
764 LASSERT(fd->fd_och != NULL);
765 old_handle = fd->fd_och->och_fh;
770 RETURN(ERR_PTR(-ENOMEM));
772 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
773 LUSTRE_OPC_ANY, NULL);
775 GOTO(out, rc = PTR_ERR(op_data));
777 /* To tell the MDT this openhandle is from the same owner */
778 op_data->op_handle = old_handle;
780 it.it_flags = fmode | open_flags;
781 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
782 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
783 &ll_md_blocking_lease_ast,
784 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
785 * it can be cancelled which may mislead applications that the lease is
787 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
788 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
789 * doesn't deal with openhandle, so normal openhandle will be leaked. */
790 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
791 ll_finish_md_op_data(op_data);
792 ptlrpc_req_finished(req);
794 GOTO(out_release_it, rc);
796 if (it_disposition(&it, DISP_LOOKUP_NEG))
797 GOTO(out_release_it, rc = -ENOENT);
799 rc = it_open_error(DISP_OPEN_OPEN, &it);
801 GOTO(out_release_it, rc);
803 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
804 ll_och_fill(sbi->ll_md_exp, &it, och);
806 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
807 GOTO(out_close, rc = -EOPNOTSUPP);
809 /* already get lease, handle lease lock */
810 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
811 if (it.d.lustre.it_lock_mode == 0 ||
812 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
813 /* open lock must return for lease */
814 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
815 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
816 it.d.lustre.it_lock_bits);
817 GOTO(out_close, rc = -EPROTO);
820 ll_intent_release(&it);
824 /* Cancel open lock */
825 if (it.d.lustre.it_lock_mode != 0) {
826 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
827 it.d.lustre.it_lock_mode);
828 it.d.lustre.it_lock_mode = 0;
829 och->och_lease_handle.cookie = 0ULL;
831 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, och, inode, 0, NULL);
833 CERROR("%s: error closing file "DFID": %d\n",
834 ll_get_fsname(inode->i_sb, NULL, 0),
835 PFID(&ll_i2info(inode)->lli_fid), rc2);
836 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
838 ll_intent_release(&it);
846 * Check whether a layout swap can be done between two inodes.
848 * \param[in] inode1 First inode to check
849 * \param[in] inode2 Second inode to check
851 * \retval 0 on success, layout swap can be performed between both inodes
852 * \retval negative error code if requirements are not met
854 static int ll_check_swap_layouts_validity(struct inode *inode1,
855 struct inode *inode2)
857 if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
860 if (inode_permission(inode1, MAY_WRITE) ||
861 inode_permission(inode2, MAY_WRITE))
864 if (inode1->i_sb != inode2->i_sb)
870 static int ll_swap_layouts_close(struct obd_client_handle *och,
871 struct inode *inode, struct inode *inode2)
873 const struct lu_fid *fid1 = ll_inode2fid(inode);
874 const struct lu_fid *fid2;
878 CDEBUG(D_INODE, "%s: biased close of file "DFID"\n",
879 ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
881 rc = ll_check_swap_layouts_validity(inode, inode2);
883 GOTO(out_free_och, rc);
885 /* We now know that inode2 is a lustre inode */
886 fid2 = ll_inode2fid(inode2);
888 rc = lu_fid_cmp(fid1, fid2);
890 GOTO(out_free_och, rc = -EINVAL);
892 /* Close the file and swap layouts between inode & inode2.
893 * NB: lease lock handle is released in mdc_close_layout_swap_pack()
894 * because we still need it to pack l_remote_handle to MDT. */
895 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
896 MDS_CLOSE_LAYOUT_SWAP, inode2);
898 och = NULL; /* freed in ll_close_inode_openhandle() */
908 * Release lease and close the file.
909 * It will check if the lease has ever broken.
911 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
914 struct ldlm_lock *lock;
915 bool cancelled = true;
919 lock = ldlm_handle2lock(&och->och_lease_handle);
921 lock_res_and_lock(lock);
922 cancelled = ldlm_is_cancel(lock);
923 unlock_res_and_lock(lock);
927 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
928 PFID(&ll_i2info(inode)->lli_fid), cancelled);
931 ldlm_cli_cancel(&och->och_lease_handle, 0);
932 if (lease_broken != NULL)
933 *lease_broken = cancelled;
935 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
941 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
943 struct ll_inode_info *lli = ll_i2info(inode);
944 struct cl_object *obj = lli->lli_clob;
945 struct cl_attr *attr = vvp_env_thread_attr(env);
953 ll_inode_size_lock(inode);
955 /* merge timestamps the most recently obtained from mds with
956 timestamps obtained from osts */
957 LTIME_S(inode->i_atime) = lli->lli_atime;
958 LTIME_S(inode->i_mtime) = lli->lli_mtime;
959 LTIME_S(inode->i_ctime) = lli->lli_ctime;
961 atime = LTIME_S(inode->i_atime);
962 mtime = LTIME_S(inode->i_mtime);
963 ctime = LTIME_S(inode->i_ctime);
965 cl_object_attr_lock(obj);
966 rc = cl_object_attr_get(env, obj, attr);
967 cl_object_attr_unlock(obj);
970 GOTO(out_size_unlock, rc);
972 if (atime < attr->cat_atime)
973 atime = attr->cat_atime;
975 if (ctime < attr->cat_ctime)
976 ctime = attr->cat_ctime;
978 if (mtime < attr->cat_mtime)
979 mtime = attr->cat_mtime;
981 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
982 PFID(&lli->lli_fid), attr->cat_size);
984 i_size_write(inode, attr->cat_size);
985 inode->i_blocks = attr->cat_blocks;
987 LTIME_S(inode->i_atime) = atime;
988 LTIME_S(inode->i_mtime) = mtime;
989 LTIME_S(inode->i_ctime) = ctime;
992 ll_inode_size_unlock(inode);
997 static bool file_is_noatime(const struct file *file)
999 const struct vfsmount *mnt = file->f_path.mnt;
1000 const struct inode *inode = file->f_path.dentry->d_inode;
1002 /* Adapted from file_accessed() and touch_atime().*/
1003 if (file->f_flags & O_NOATIME)
1006 if (inode->i_flags & S_NOATIME)
1009 if (IS_NOATIME(inode))
1012 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1015 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1018 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1024 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1026 struct inode *inode = file->f_path.dentry->d_inode;
1028 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1030 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1031 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1032 file->f_flags & O_DIRECT ||
1035 io->ci_obj = ll_i2info(inode)->lli_clob;
1036 io->ci_lockreq = CILR_MAYBE;
1037 if (ll_file_nolock(file)) {
1038 io->ci_lockreq = CILR_NEVER;
1039 io->ci_no_srvlock = 1;
1040 } else if (file->f_flags & O_APPEND) {
1041 io->ci_lockreq = CILR_MANDATORY;
1044 io->ci_noatime = file_is_noatime(file);
1048 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1049 struct file *file, enum cl_io_type iot,
1050 loff_t *ppos, size_t count)
1052 struct vvp_io *vio = vvp_env_io(env);
1053 struct inode *inode = file->f_path.dentry->d_inode;
1054 struct ll_inode_info *lli = ll_i2info(inode);
1055 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1059 struct range_lock range;
1063 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1064 file->f_path.dentry->d_name.name, iot, *ppos, count);
1067 io = vvp_env_thread_io(env);
1068 ll_io_init(io, file, iot == CIT_WRITE);
1070 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1071 bool range_locked = false;
1073 if (file->f_flags & O_APPEND)
1074 range_lock_init(&range, 0, LUSTRE_EOF);
1076 range_lock_init(&range, *ppos, *ppos + count - 1);
1078 vio->vui_fd = LUSTRE_FPRIVATE(file);
1079 vio->vui_io_subtype = args->via_io_subtype;
1081 switch (vio->vui_io_subtype) {
1083 vio->vui_iter = args->u.normal.via_iter;
1084 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1085 vio->vui_tot_nrsegs = vio->vui_iter->nr_segs;
1086 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1087 vio->vui_iocb = args->u.normal.via_iocb;
1088 /* Direct IO reads must also take range lock,
1089 * or multiple reads will try to work on the same pages
1090 * See LU-6227 for details. */
1091 if (((iot == CIT_WRITE) ||
1092 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1093 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1094 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1096 rc = range_lock(&lli->lli_write_tree, &range);
1100 range_locked = true;
1104 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1105 vio->u.splice.vui_flags = args->u.splice.via_flags;
1108 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1112 ll_cl_add(file, env, io);
1113 rc = cl_io_loop(env, io);
1114 ll_cl_remove(file, env);
1117 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1119 range_unlock(&lli->lli_write_tree, &range);
1122 /* cl_io_rw_init() handled IO */
1126 if (io->ci_nob > 0) {
1127 result += io->ci_nob;
1128 count -= io->ci_nob;
1129 *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
1131 /* prepare IO restart */
1132 if (count > 0 && args->via_io_subtype == IO_NORMAL) {
1133 args->u.normal.via_iter = vio->vui_iter;
1134 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1135 args->u.normal.via_iter->nr_segs = vio->vui_tot_nrsegs;
1136 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1141 cl_io_fini(env, io);
1143 if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
1145 "%s: restart %s from %lld, count:%zu, result: %zd\n",
1146 file->f_path.dentry->d_name.name,
1147 iot == CIT_READ ? "read" : "write",
1148 *ppos, count, result);
1152 if (iot == CIT_READ) {
1154 ll_stats_ops_tally(ll_i2sbi(inode),
1155 LPROC_LL_READ_BYTES, result);
1156 } else if (iot == CIT_WRITE) {
1158 ll_stats_ops_tally(ll_i2sbi(inode),
1159 LPROC_LL_WRITE_BYTES, result);
1160 fd->fd_write_failed = false;
1161 } else if (rc != -ERESTARTSYS) {
1162 fd->fd_write_failed = true;
1166 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1168 return result > 0 ? result : rc;
1172 * Read from a file (through the page cache).
1174 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1176 struct vvp_io_args *args;
1181 env = cl_env_get(&refcheck);
1183 return PTR_ERR(env);
1185 args = ll_env_args(env, IO_NORMAL);
1186 args->u.normal.via_iter = to;
1187 args->u.normal.via_iocb = iocb;
1189 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1190 &iocb->ki_pos, iov_iter_count(to));
1191 cl_env_put(env, &refcheck);
1196 * Write to a file (through the page cache).
1198 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1200 struct vvp_io_args *args;
1205 env = cl_env_get(&refcheck);
1207 return PTR_ERR(env);
1209 args = ll_env_args(env, IO_NORMAL);
1210 args->u.normal.via_iter = from;
1211 args->u.normal.via_iocb = iocb;
1213 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1214 &iocb->ki_pos, iov_iter_count(from));
1215 cl_env_put(env, &refcheck);
1219 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1221 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1223 static int ll_file_get_iov_count(const struct iovec *iov,
1224 unsigned long *nr_segs, size_t *count)
1229 for (seg = 0; seg < *nr_segs; seg++) {
1230 const struct iovec *iv = &iov[seg];
1233 * If any segment has a negative length, or the cumulative
1234 * length ever wraps negative then return -EINVAL.
1237 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1239 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1244 cnt -= iv->iov_len; /* This segment is no good */
1251 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1252 unsigned long nr_segs, loff_t pos)
1254 struct iovec *local_iov;
1255 struct iov_iter *to;
1260 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1268 env = cl_env_get(&refcheck);
1270 RETURN(PTR_ERR(env));
1272 local_iov = &ll_env_info(env)->lti_local_iov;
1275 cl_env_put(env, &refcheck);
1277 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1278 if (local_iov == NULL)
1281 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1289 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1290 iov_iter_init(to, READ, local_iov, nr_segs, iov_count);
1291 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1292 iov_iter_init(to, local_iov, nr_segs, iov_count, 0);
1293 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1295 result = ll_file_read_iter(iocb, to);
1300 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1305 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1309 struct iovec iov = { .iov_base = buf, .iov_len = count };
1310 struct kiocb *kiocb;
1315 env = cl_env_get(&refcheck);
1317 RETURN(PTR_ERR(env));
1319 kiocb = &ll_env_info(env)->lti_kiocb;
1320 init_sync_kiocb(kiocb, file);
1321 kiocb->ki_pos = *ppos;
1322 #ifdef HAVE_KIOCB_KI_LEFT
1323 kiocb->ki_left = count;
1324 #elif defined(HAVE_KI_NBYTES)
1325 kiocb->ki_nbytes = count;
1328 result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos);
1329 *ppos = kiocb->ki_pos;
1331 cl_env_put(env, &refcheck);
1336 * Write to a file (through the page cache).
1339 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1340 unsigned long nr_segs, loff_t pos)
1342 struct iovec *local_iov;
1343 struct iov_iter *from;
1348 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1356 env = cl_env_get(&refcheck);
1358 RETURN(PTR_ERR(env));
1360 local_iov = &ll_env_info(env)->lti_local_iov;
1363 cl_env_put(env, &refcheck);
1365 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1366 if (local_iov == NULL)
1369 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1372 OBD_ALLOC_PTR(from);
1377 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1378 iov_iter_init(from, WRITE, local_iov, nr_segs, iov_count);
1379 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1380 iov_iter_init(from, local_iov, nr_segs, iov_count, 0);
1381 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1383 result = ll_file_write_iter(iocb, from);
1388 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1393 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1394 size_t count, loff_t *ppos)
1397 struct iovec iov = { .iov_base = (void __user *)buf,
1399 struct kiocb *kiocb;
1404 env = cl_env_get(&refcheck);
1406 RETURN(PTR_ERR(env));
1408 kiocb = &ll_env_info(env)->lti_kiocb;
1409 init_sync_kiocb(kiocb, file);
1410 kiocb->ki_pos = *ppos;
1411 #ifdef HAVE_KIOCB_KI_LEFT
1412 kiocb->ki_left = count;
1413 #elif defined(HAVE_KI_NBYTES)
1414 kiocb->ki_nbytes = count;
1417 result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
1418 *ppos = kiocb->ki_pos;
1420 cl_env_put(env, &refcheck);
1423 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1426 * Send file content (through pagecache) somewhere with helper
1428 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1429 struct pipe_inode_info *pipe, size_t count,
1433 struct vvp_io_args *args;
1438 env = cl_env_get(&refcheck);
1440 RETURN(PTR_ERR(env));
1442 args = ll_env_args(env, IO_SPLICE);
1443 args->u.splice.via_pipe = pipe;
1444 args->u.splice.via_flags = flags;
1446 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1447 cl_env_put(env, &refcheck);
1451 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1452 __u64 flags, struct lov_user_md *lum,
1455 struct lookup_intent oit = {
1457 .it_flags = flags | MDS_OPEN_BY_FID,
1462 ll_inode_size_lock(inode);
1463 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1465 GOTO(out_unlock, rc);
1467 ll_release_openhandle(file->f_path.dentry, &oit);
1470 ll_inode_size_unlock(inode);
1471 ll_intent_release(&oit);
1472 cl_lov_delay_create_clear(&file->f_flags);
1477 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1478 struct lov_mds_md **lmmp, int *lmm_size,
1479 struct ptlrpc_request **request)
1481 struct ll_sb_info *sbi = ll_i2sbi(inode);
1482 struct mdt_body *body;
1483 struct lov_mds_md *lmm = NULL;
1484 struct ptlrpc_request *req = NULL;
1485 struct md_op_data *op_data;
1488 rc = ll_get_default_mdsize(sbi, &lmmsize);
1492 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1493 strlen(filename), lmmsize,
1494 LUSTRE_OPC_ANY, NULL);
1495 if (IS_ERR(op_data))
1496 RETURN(PTR_ERR(op_data));
1498 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1499 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1500 ll_finish_md_op_data(op_data);
1502 CDEBUG(D_INFO, "md_getattr_name failed "
1503 "on %s: rc %d\n", filename, rc);
1507 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1508 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1510 lmmsize = body->mbo_eadatasize;
1512 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1514 GOTO(out, rc = -ENODATA);
1517 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1518 LASSERT(lmm != NULL);
1520 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1521 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1522 GOTO(out, rc = -EPROTO);
1526 * This is coming from the MDS, so is probably in
1527 * little endian. We convert it to host endian before
1528 * passing it to userspace.
1530 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1533 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1534 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1537 /* if function called for directory - we should
1538 * avoid swab not existent lsm objects */
1539 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1540 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1541 if (S_ISREG(body->mbo_mode))
1542 lustre_swab_lov_user_md_objects(
1543 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1545 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1546 lustre_swab_lov_user_md_v3(
1547 (struct lov_user_md_v3 *)lmm);
1548 if (S_ISREG(body->mbo_mode))
1549 lustre_swab_lov_user_md_objects(
1550 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1557 *lmm_size = lmmsize;
1562 static int ll_lov_setea(struct inode *inode, struct file *file,
1565 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1566 struct lov_user_md *lump;
1567 int lum_size = sizeof(struct lov_user_md) +
1568 sizeof(struct lov_user_ost_data);
1572 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1575 OBD_ALLOC_LARGE(lump, lum_size);
1579 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1580 OBD_FREE_LARGE(lump, lum_size);
1584 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1586 OBD_FREE_LARGE(lump, lum_size);
1590 static int ll_file_getstripe(struct inode *inode,
1591 struct lov_user_md __user *lum)
1598 env = cl_env_get(&refcheck);
1600 RETURN(PTR_ERR(env));
1602 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1603 cl_env_put(env, &refcheck);
1607 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1610 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1611 struct lov_user_md *klum;
1613 __u64 flags = FMODE_WRITE;
1616 rc = ll_copy_user_md(lum, &klum);
1621 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1625 put_user(0, &lum->lmm_stripe_count);
1627 ll_layout_refresh(inode, &gen);
1628 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1631 OBD_FREE(klum, lum_size);
1636 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1638 struct ll_inode_info *lli = ll_i2info(inode);
1639 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1640 struct ll_grouplock grouplock;
1645 CWARN("group id for group lock must not be 0\n");
1649 if (ll_file_nolock(file))
1650 RETURN(-EOPNOTSUPP);
1652 spin_lock(&lli->lli_lock);
1653 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1654 CWARN("group lock already existed with gid %lu\n",
1655 fd->fd_grouplock.lg_gid);
1656 spin_unlock(&lli->lli_lock);
1659 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1660 spin_unlock(&lli->lli_lock);
1662 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1663 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1667 spin_lock(&lli->lli_lock);
1668 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1669 spin_unlock(&lli->lli_lock);
1670 CERROR("another thread just won the race\n");
1671 cl_put_grouplock(&grouplock);
1675 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1676 fd->fd_grouplock = grouplock;
1677 spin_unlock(&lli->lli_lock);
1679 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1683 static int ll_put_grouplock(struct inode *inode, struct file *file,
1686 struct ll_inode_info *lli = ll_i2info(inode);
1687 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1688 struct ll_grouplock grouplock;
1691 spin_lock(&lli->lli_lock);
1692 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1693 spin_unlock(&lli->lli_lock);
1694 CWARN("no group lock held\n");
1698 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1700 if (fd->fd_grouplock.lg_gid != arg) {
1701 CWARN("group lock %lu doesn't match current id %lu\n",
1702 arg, fd->fd_grouplock.lg_gid);
1703 spin_unlock(&lli->lli_lock);
1707 grouplock = fd->fd_grouplock;
1708 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1709 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1710 spin_unlock(&lli->lli_lock);
1712 cl_put_grouplock(&grouplock);
1713 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1718 * Close inode open handle
1720 * \param dentry [in] dentry which contains the inode
1721 * \param it [in,out] intent which contains open info and result
1724 * \retval <0 failure
1726 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1728 struct inode *inode = dentry->d_inode;
1729 struct obd_client_handle *och;
1735 /* Root ? Do nothing. */
1736 if (dentry->d_inode->i_sb->s_root == dentry)
1739 /* No open handle to close? Move away */
1740 if (!it_disposition(it, DISP_OPEN_OPEN))
1743 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1745 OBD_ALLOC(och, sizeof(*och));
1747 GOTO(out, rc = -ENOMEM);
1749 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1751 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1752 och, inode, 0, NULL);
1754 /* this one is in place of ll_file_open */
1755 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1756 ptlrpc_req_finished(it->d.lustre.it_data);
1757 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1763 * Get size for inode for which FIEMAP mapping is requested.
1764 * Make the FIEMAP get_info call and returns the result.
1765 * \param fiemap kernel buffer to hold extens
1766 * \param num_bytes kernel buffer size
1768 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1774 struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
1777 /* Checks for fiemap flags */
1778 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1779 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1783 /* Check for FIEMAP_FLAG_SYNC */
1784 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1785 rc = filemap_fdatawrite(inode->i_mapping);
1790 env = cl_env_get(&refcheck);
1792 RETURN(PTR_ERR(env));
1794 if (i_size_read(inode) == 0) {
1795 rc = ll_glimpse_size(inode);
1800 fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1801 obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
1802 obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
1804 /* If filesize is 0, then there would be no objects for mapping */
1805 if (fmkey.lfik_oa.o_size == 0) {
1806 fiemap->fm_mapped_extents = 0;
1810 fmkey.lfik_fiemap = *fiemap;
1812 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1813 &fmkey, fiemap, &num_bytes);
1815 cl_env_put(env, &refcheck);
1819 int ll_fid2path(struct inode *inode, void __user *arg)
1821 struct obd_export *exp = ll_i2mdexp(inode);
1822 const struct getinfo_fid2path __user *gfin = arg;
1824 struct getinfo_fid2path *gfout;
1830 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1831 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1834 /* Only need to get the buflen */
1835 if (get_user(pathlen, &gfin->gf_pathlen))
1838 if (pathlen > PATH_MAX)
1841 outsize = sizeof(*gfout) + pathlen;
1842 OBD_ALLOC(gfout, outsize);
1846 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1847 GOTO(gf_free, rc = -EFAULT);
1849 /* Call mdc_iocontrol */
1850 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1854 if (copy_to_user(arg, gfout, outsize))
1858 OBD_FREE(gfout, outsize);
1863 * Read the data_version for inode.
1865 * This value is computed using stripe object version on OST.
1866 * Version is computed using server side locking.
1868 * @param flags if do sync on the OST side;
1870 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1871 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1873 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1875 struct cl_object *obj = ll_i2info(inode)->lli_clob;
1883 /* If no file object initialized, we consider its version is 0. */
1889 env = cl_env_get(&refcheck);
1891 RETURN(PTR_ERR(env));
1893 io = vvp_env_thread_io(env);
1895 io->u.ci_data_version.dv_data_version = 0;
1896 io->u.ci_data_version.dv_flags = flags;
1899 if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
1900 result = cl_io_loop(env, io);
1902 result = io->ci_result;
1904 *data_version = io->u.ci_data_version.dv_data_version;
1906 cl_io_fini(env, io);
1908 if (unlikely(io->ci_need_restart))
1911 cl_env_put(env, &refcheck);
1917 * Trigger a HSM release request for the provided inode.
1919 int ll_hsm_release(struct inode *inode)
1921 struct cl_env_nest nest;
1923 struct obd_client_handle *och = NULL;
1924 __u64 data_version = 0;
1928 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1929 ll_get_fsname(inode->i_sb, NULL, 0),
1930 PFID(&ll_i2info(inode)->lli_fid));
1932 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1934 GOTO(out, rc = PTR_ERR(och));
1936 /* Grab latest data_version and [am]time values */
1937 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1941 env = cl_env_nested_get(&nest);
1943 GOTO(out, rc = PTR_ERR(env));
1945 ll_merge_attr(env, inode);
1946 cl_env_nested_put(&nest, env);
1948 /* Release the file.
1949 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1950 * we still need it to pack l_remote_handle to MDT. */
1951 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
1952 MDS_HSM_RELEASE, &data_version);
1957 if (och != NULL && !IS_ERR(och)) /* close the file */
1958 ll_lease_close(och, inode, NULL);
1963 struct ll_swap_stack {
1966 struct inode *inode1;
1967 struct inode *inode2;
1972 static int ll_swap_layouts(struct file *file1, struct file *file2,
1973 struct lustre_swap_layouts *lsl)
1975 struct mdc_swap_layouts msl;
1976 struct md_op_data *op_data;
1979 struct ll_swap_stack *llss = NULL;
1982 OBD_ALLOC_PTR(llss);
1986 llss->inode1 = file1->f_path.dentry->d_inode;
1987 llss->inode2 = file2->f_path.dentry->d_inode;
1989 rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
1993 /* we use 2 bool because it is easier to swap than 2 bits */
1994 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1995 llss->check_dv1 = true;
1997 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1998 llss->check_dv2 = true;
2000 /* we cannot use lsl->sl_dvX directly because we may swap them */
2001 llss->dv1 = lsl->sl_dv1;
2002 llss->dv2 = lsl->sl_dv2;
2004 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2005 if (rc == 0) /* same file, done! */
2008 if (rc < 0) { /* sequentialize it */
2009 swap(llss->inode1, llss->inode2);
2011 swap(llss->dv1, llss->dv2);
2012 swap(llss->check_dv1, llss->check_dv2);
2016 if (gid != 0) { /* application asks to flush dirty cache */
2017 rc = ll_get_grouplock(llss->inode1, file1, gid);
2021 rc = ll_get_grouplock(llss->inode2, file2, gid);
2023 ll_put_grouplock(llss->inode1, file1, gid);
2028 /* ultimate check, before swaping the layouts we check if
2029 * dataversion has changed (if requested) */
2030 if (llss->check_dv1) {
2031 rc = ll_data_version(llss->inode1, &dv, 0);
2034 if (dv != llss->dv1)
2035 GOTO(putgl, rc = -EAGAIN);
2038 if (llss->check_dv2) {
2039 rc = ll_data_version(llss->inode2, &dv, 0);
2042 if (dv != llss->dv2)
2043 GOTO(putgl, rc = -EAGAIN);
2046 /* struct md_op_data is used to send the swap args to the mdt
2047 * only flags is missing, so we use struct mdc_swap_layouts
2048 * through the md_op_data->op_data */
2049 /* flags from user space have to be converted before they are send to
2050 * server, no flag is sent today, they are only used on the client */
2053 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2054 0, LUSTRE_OPC_ANY, &msl);
2055 if (IS_ERR(op_data))
2056 GOTO(free, rc = PTR_ERR(op_data));
2058 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2059 sizeof(*op_data), op_data, NULL);
2060 ll_finish_md_op_data(op_data);
2067 ll_put_grouplock(llss->inode2, file2, gid);
2068 ll_put_grouplock(llss->inode1, file1, gid);
2078 int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2080 struct md_op_data *op_data;
2084 /* Detect out-of range masks */
2085 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2088 /* Non-root users are forbidden to set or clear flags which are
2089 * NOT defined in HSM_USER_MASK. */
2090 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2091 !cfs_capable(CFS_CAP_SYS_ADMIN))
2094 /* Detect out-of range archive id */
2095 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2096 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2099 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2100 LUSTRE_OPC_ANY, hss);
2101 if (IS_ERR(op_data))
2102 RETURN(PTR_ERR(op_data));
2104 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2105 sizeof(*op_data), op_data, NULL);
2107 ll_finish_md_op_data(op_data);
2112 static int ll_hsm_import(struct inode *inode, struct file *file,
2113 struct hsm_user_import *hui)
2115 struct hsm_state_set *hss = NULL;
2116 struct iattr *attr = NULL;
2120 if (!S_ISREG(inode->i_mode))
2126 GOTO(out, rc = -ENOMEM);
2128 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2129 hss->hss_archive_id = hui->hui_archive_id;
2130 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2131 rc = ll_hsm_state_set(inode, hss);
2135 OBD_ALLOC_PTR(attr);
2137 GOTO(out, rc = -ENOMEM);
2139 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2140 attr->ia_mode |= S_IFREG;
2141 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2142 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2143 attr->ia_size = hui->hui_size;
2144 attr->ia_mtime.tv_sec = hui->hui_mtime;
2145 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2146 attr->ia_atime.tv_sec = hui->hui_atime;
2147 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2149 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2150 ATTR_UID | ATTR_GID |
2151 ATTR_MTIME | ATTR_MTIME_SET |
2152 ATTR_ATIME | ATTR_ATIME_SET;
2154 mutex_lock(&inode->i_mutex);
2156 rc = ll_setattr_raw(file->f_path.dentry, attr, true);
2160 mutex_unlock(&inode->i_mutex);
2172 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2174 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2175 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2179 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2181 struct inode *inode = file->f_path.dentry->d_inode;
2182 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2186 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2187 PFID(ll_inode2fid(inode)), inode, cmd);
2188 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2190 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2191 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2195 case LL_IOC_GETFLAGS:
2196 /* Get the current value of the file flags */
2197 return put_user(fd->fd_flags, (int __user *)arg);
2198 case LL_IOC_SETFLAGS:
2199 case LL_IOC_CLRFLAGS:
2200 /* Set or clear specific file flags */
2201 /* XXX This probably needs checks to ensure the flags are
2202 * not abused, and to handle any flag side effects.
2204 if (get_user(flags, (int __user *) arg))
2207 if (cmd == LL_IOC_SETFLAGS) {
2208 if ((flags & LL_FILE_IGNORE_LOCK) &&
2209 !(file->f_flags & O_DIRECT)) {
2210 CERROR("%s: unable to disable locking on "
2211 "non-O_DIRECT file\n", current->comm);
2215 fd->fd_flags |= flags;
2217 fd->fd_flags &= ~flags;
2220 case LL_IOC_LOV_SETSTRIPE:
2221 RETURN(ll_lov_setstripe(inode, file, arg));
2222 case LL_IOC_LOV_SETEA:
2223 RETURN(ll_lov_setea(inode, file, arg));
2224 case LL_IOC_LOV_SWAP_LAYOUTS: {
2226 struct lustre_swap_layouts lsl;
2228 if (copy_from_user(&lsl, (char __user *)arg,
2229 sizeof(struct lustre_swap_layouts)))
2232 if ((file->f_flags & O_ACCMODE) == O_RDONLY)
2235 file2 = fget(lsl.sl_fd);
2239 /* O_WRONLY or O_RDWR */
2240 if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
2241 GOTO(out, rc = -EPERM);
2243 if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
2244 struct inode *inode2;
2245 struct ll_inode_info *lli;
2246 struct obd_client_handle *och = NULL;
2248 if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
2249 GOTO(out, rc = -EINVAL);
2251 lli = ll_i2info(inode);
2252 mutex_lock(&lli->lli_och_mutex);
2253 if (fd->fd_lease_och != NULL) {
2254 och = fd->fd_lease_och;
2255 fd->fd_lease_och = NULL;
2257 mutex_unlock(&lli->lli_och_mutex);
2259 GOTO(out, rc = -ENOLCK);
2260 inode2 = file2->f_path.dentry->d_inode;
2261 rc = ll_swap_layouts_close(och, inode, inode2);
2263 rc = ll_swap_layouts(file, file2, &lsl);
2269 case LL_IOC_LOV_GETSTRIPE:
2270 RETURN(ll_file_getstripe(inode,
2271 (struct lov_user_md __user *)arg));
2272 case FSFILT_IOC_GETFLAGS:
2273 case FSFILT_IOC_SETFLAGS:
2274 RETURN(ll_iocontrol(inode, file, cmd, arg));
2275 case FSFILT_IOC_GETVERSION_OLD:
2276 case FSFILT_IOC_GETVERSION:
2277 RETURN(put_user(inode->i_generation, (int __user *)arg));
2278 case LL_IOC_GROUP_LOCK:
2279 RETURN(ll_get_grouplock(inode, file, arg));
2280 case LL_IOC_GROUP_UNLOCK:
2281 RETURN(ll_put_grouplock(inode, file, arg));
2282 case IOC_OBD_STATFS:
2283 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2285 /* We need to special case any other ioctls we want to handle,
2286 * to send them to the MDS/OST as appropriate and to properly
2287 * network encode the arg field.
2288 case FSFILT_IOC_SETVERSION_OLD:
2289 case FSFILT_IOC_SETVERSION:
2291 case LL_IOC_FLUSHCTX:
2292 RETURN(ll_flush_ctx(inode));
2293 case LL_IOC_PATH2FID: {
2294 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2295 sizeof(struct lu_fid)))
2300 case LL_IOC_GETPARENT:
2301 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2303 case OBD_IOC_FID2PATH:
2304 RETURN(ll_fid2path(inode, (void __user *)arg));
2305 case LL_IOC_DATA_VERSION: {
2306 struct ioc_data_version idv;
2309 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2312 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2313 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2316 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2322 case LL_IOC_GET_MDTIDX: {
2325 mdtidx = ll_get_mdt_idx(inode);
2329 if (put_user((int)mdtidx, (int __user *)arg))
2334 case OBD_IOC_GETDTNAME:
2335 case OBD_IOC_GETMDNAME:
2336 RETURN(ll_get_obd_name(inode, cmd, arg));
2337 case LL_IOC_HSM_STATE_GET: {
2338 struct md_op_data *op_data;
2339 struct hsm_user_state *hus;
2346 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2347 LUSTRE_OPC_ANY, hus);
2348 if (IS_ERR(op_data)) {
2350 RETURN(PTR_ERR(op_data));
2353 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2356 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2359 ll_finish_md_op_data(op_data);
2363 case LL_IOC_HSM_STATE_SET: {
2364 struct hsm_state_set *hss;
2371 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2376 rc = ll_hsm_state_set(inode, hss);
2381 case LL_IOC_HSM_ACTION: {
2382 struct md_op_data *op_data;
2383 struct hsm_current_action *hca;
2390 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2391 LUSTRE_OPC_ANY, hca);
2392 if (IS_ERR(op_data)) {
2394 RETURN(PTR_ERR(op_data));
2397 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2400 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2403 ll_finish_md_op_data(op_data);
2407 case LL_IOC_SET_LEASE: {
2408 struct ll_inode_info *lli = ll_i2info(inode);
2409 struct obd_client_handle *och = NULL;
2414 case LL_LEASE_WRLCK:
2415 if (!(file->f_mode & FMODE_WRITE))
2417 fmode = FMODE_WRITE;
2419 case LL_LEASE_RDLCK:
2420 if (!(file->f_mode & FMODE_READ))
2424 case LL_LEASE_UNLCK:
2425 mutex_lock(&lli->lli_och_mutex);
2426 if (fd->fd_lease_och != NULL) {
2427 och = fd->fd_lease_och;
2428 fd->fd_lease_och = NULL;
2430 mutex_unlock(&lli->lli_och_mutex);
2435 fmode = och->och_flags;
2436 rc = ll_lease_close(och, inode, &lease_broken);
2443 RETURN(ll_lease_type_from_fmode(fmode));
2448 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2450 /* apply for lease */
2451 och = ll_lease_open(inode, file, fmode, 0);
2453 RETURN(PTR_ERR(och));
2456 mutex_lock(&lli->lli_och_mutex);
2457 if (fd->fd_lease_och == NULL) {
2458 fd->fd_lease_och = och;
2461 mutex_unlock(&lli->lli_och_mutex);
2463 /* impossible now that only excl is supported for now */
2464 ll_lease_close(och, inode, &lease_broken);
2469 case LL_IOC_GET_LEASE: {
2470 struct ll_inode_info *lli = ll_i2info(inode);
2471 struct ldlm_lock *lock = NULL;
2474 mutex_lock(&lli->lli_och_mutex);
2475 if (fd->fd_lease_och != NULL) {
2476 struct obd_client_handle *och = fd->fd_lease_och;
2478 lock = ldlm_handle2lock(&och->och_lease_handle);
2480 lock_res_and_lock(lock);
2481 if (!ldlm_is_cancel(lock))
2482 fmode = och->och_flags;
2484 unlock_res_and_lock(lock);
2485 LDLM_LOCK_PUT(lock);
2488 mutex_unlock(&lli->lli_och_mutex);
2490 RETURN(ll_lease_type_from_fmode(fmode));
2492 case LL_IOC_HSM_IMPORT: {
2493 struct hsm_user_import *hui;
2499 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2504 rc = ll_hsm_import(inode, file, hui);
2514 ll_iocontrol_call(inode, file, cmd, arg, &err))
2517 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2518 (void __user *)arg));
2523 #ifndef HAVE_FILE_LLSEEK_SIZE
2524 static inline loff_t
2525 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2527 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2529 if (offset > maxsize)
2532 if (offset != file->f_pos) {
2533 file->f_pos = offset;
2534 file->f_version = 0;
2540 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2541 loff_t maxsize, loff_t eof)
2543 struct inode *inode = file->f_path.dentry->d_inode;
2551 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2552 * position-querying operation. Avoid rewriting the "same"
2553 * f_pos value back to the file because a concurrent read(),
2554 * write() or lseek() might have altered it
2559 * f_lock protects against read/modify/write race with other
2560 * SEEK_CURs. Note that parallel writes and reads behave
2563 mutex_lock(&inode->i_mutex);
2564 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2565 mutex_unlock(&inode->i_mutex);
2569 * In the generic case the entire file is data, so as long as
2570 * offset isn't at the end of the file then the offset is data.
2577 * There is a virtual hole at the end of the file, so as long as
2578 * offset isn't i_size or larger, return i_size.
2586 return llseek_execute(file, offset, maxsize);
2590 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2592 struct inode *inode = file->f_path.dentry->d_inode;
2593 loff_t retval, eof = 0;
2596 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2597 (origin == SEEK_CUR) ? file->f_pos : 0);
2598 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2599 PFID(ll_inode2fid(inode)), inode, retval, retval,
2601 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2603 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2604 retval = ll_glimpse_size(inode);
2607 eof = i_size_read(inode);
2610 retval = ll_generic_file_llseek_size(file, offset, origin,
2611 ll_file_maxbytes(inode), eof);
2615 static int ll_flush(struct file *file, fl_owner_t id)
2617 struct inode *inode = file->f_path.dentry->d_inode;
2618 struct ll_inode_info *lli = ll_i2info(inode);
2619 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2622 LASSERT(!S_ISDIR(inode->i_mode));
2624 /* catch async errors that were recorded back when async writeback
2625 * failed for pages in this mapping. */
2626 rc = lli->lli_async_rc;
2627 lli->lli_async_rc = 0;
2628 if (lli->lli_clob != NULL) {
2629 err = lov_read_and_clear_async_rc(lli->lli_clob);
2634 /* The application has been told write failure already.
2635 * Do not report failure again. */
2636 if (fd->fd_write_failed)
2638 return rc ? -EIO : 0;
2642 * Called to make sure a portion of file has been written out.
2643 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2645 * Return how many pages have been written.
2647 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2648 enum cl_fsync_mode mode, int ignore_layout)
2650 struct cl_env_nest nest;
2653 struct cl_fsync_io *fio;
2657 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2658 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2661 env = cl_env_nested_get(&nest);
2663 RETURN(PTR_ERR(env));
2665 io = vvp_env_thread_io(env);
2666 io->ci_obj = ll_i2info(inode)->lli_clob;
2667 io->ci_ignore_layout = ignore_layout;
2669 /* initialize parameters for sync */
2670 fio = &io->u.ci_fsync;
2671 fio->fi_start = start;
2673 fio->fi_fid = ll_inode2fid(inode);
2674 fio->fi_mode = mode;
2675 fio->fi_nr_written = 0;
2677 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2678 result = cl_io_loop(env, io);
2680 result = io->ci_result;
2682 result = fio->fi_nr_written;
2683 cl_io_fini(env, io);
2684 cl_env_nested_put(&nest, env);
2690 * When dentry is provided (the 'else' case), *file->f_path.dentry may be
2691 * null and dentry must be used directly rather than pulled from
2692 * *file->f_path.dentry as is done otherwise.
2695 #ifdef HAVE_FILE_FSYNC_4ARGS
2696 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2698 struct dentry *dentry = file->f_path.dentry;
2699 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2700 int ll_fsync(struct file *file, int datasync)
2702 struct dentry *dentry = file->f_path.dentry;
2704 loff_t end = LLONG_MAX;
2706 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2709 loff_t end = LLONG_MAX;
2711 struct inode *inode = dentry->d_inode;
2712 struct ll_inode_info *lli = ll_i2info(inode);
2713 struct ptlrpc_request *req;
2717 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2718 PFID(ll_inode2fid(inode)), inode);
2719 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2721 #ifdef HAVE_FILE_FSYNC_4ARGS
2722 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2723 mutex_lock(&inode->i_mutex);
2725 /* fsync's caller has already called _fdata{sync,write}, we want
2726 * that IO to finish before calling the osc and mdc sync methods */
2727 rc = filemap_fdatawait(inode->i_mapping);
2730 /* catch async errors that were recorded back when async writeback
2731 * failed for pages in this mapping. */
2732 if (!S_ISDIR(inode->i_mode)) {
2733 err = lli->lli_async_rc;
2734 lli->lli_async_rc = 0;
2737 err = lov_read_and_clear_async_rc(lli->lli_clob);
2742 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
2746 ptlrpc_req_finished(req);
2748 if (S_ISREG(inode->i_mode)) {
2749 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2751 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2752 if (rc == 0 && err < 0)
2755 fd->fd_write_failed = true;
2757 fd->fd_write_failed = false;
2760 #ifdef HAVE_FILE_FSYNC_4ARGS
2761 mutex_unlock(&inode->i_mutex);
2767 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2769 struct inode *inode = file->f_path.dentry->d_inode;
2770 struct ll_sb_info *sbi = ll_i2sbi(inode);
2771 struct ldlm_enqueue_info einfo = {
2772 .ei_type = LDLM_FLOCK,
2773 .ei_cb_cp = ldlm_flock_completion_ast,
2774 .ei_cbdata = file_lock,
2776 struct md_op_data *op_data;
2777 struct lustre_handle lockh = {0};
2778 ldlm_policy_data_t flock = {{0}};
2779 int fl_type = file_lock->fl_type;
2785 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2786 PFID(ll_inode2fid(inode)), file_lock);
2788 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2790 if (file_lock->fl_flags & FL_FLOCK) {
2791 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2792 /* flocks are whole-file locks */
2793 flock.l_flock.end = OFFSET_MAX;
2794 /* For flocks owner is determined by the local file desctiptor*/
2795 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2796 } else if (file_lock->fl_flags & FL_POSIX) {
2797 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2798 flock.l_flock.start = file_lock->fl_start;
2799 flock.l_flock.end = file_lock->fl_end;
2803 flock.l_flock.pid = file_lock->fl_pid;
2805 /* Somewhat ugly workaround for svc lockd.
2806 * lockd installs custom fl_lmops->lm_compare_owner that checks
2807 * for the fl_owner to be the same (which it always is on local node
2808 * I guess between lockd processes) and then compares pid.
2809 * As such we assign pid to the owner field to make it all work,
2810 * conflict with normal locks is unlikely since pid space and
2811 * pointer space for current->files are not intersecting */
2812 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2813 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2817 einfo.ei_mode = LCK_PR;
2820 /* An unlock request may or may not have any relation to
2821 * existing locks so we may not be able to pass a lock handle
2822 * via a normal ldlm_lock_cancel() request. The request may even
2823 * unlock a byte range in the middle of an existing lock. In
2824 * order to process an unlock request we need all of the same
2825 * information that is given with a normal read or write record
2826 * lock request. To avoid creating another ldlm unlock (cancel)
2827 * message we'll treat a LCK_NL flock request as an unlock. */
2828 einfo.ei_mode = LCK_NL;
2831 einfo.ei_mode = LCK_PW;
2834 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2849 flags = LDLM_FL_BLOCK_NOWAIT;
2855 flags = LDLM_FL_TEST_LOCK;
2858 CERROR("unknown fcntl lock command: %d\n", cmd);
2862 /* Save the old mode so that if the mode in the lock changes we
2863 * can decrement the appropriate reader or writer refcount. */
2864 file_lock->fl_type = einfo.ei_mode;
2866 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2867 LUSTRE_OPC_ANY, NULL);
2868 if (IS_ERR(op_data))
2869 RETURN(PTR_ERR(op_data));
2871 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2872 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2873 flock.l_flock.pid, flags, einfo.ei_mode,
2874 flock.l_flock.start, flock.l_flock.end);
2876 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2879 /* Restore the file lock type if not TEST lock. */
2880 if (!(flags & LDLM_FL_TEST_LOCK))
2881 file_lock->fl_type = fl_type;
2883 if ((file_lock->fl_flags & FL_FLOCK) &&
2884 (rc == 0 || file_lock->fl_type == F_UNLCK))
2885 rc2 = flock_lock_file_wait(file, file_lock);
2886 if ((file_lock->fl_flags & FL_POSIX) &&
2887 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2888 !(flags & LDLM_FL_TEST_LOCK))
2889 rc2 = posix_lock_file_wait(file, file_lock);
2891 if (rc2 && file_lock->fl_type != F_UNLCK) {
2892 einfo.ei_mode = LCK_NL;
2893 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2898 ll_finish_md_op_data(op_data);
2903 int ll_get_fid_by_name(struct inode *parent, const char *name,
2904 int namelen, struct lu_fid *fid)
2906 struct md_op_data *op_data = NULL;
2907 struct mdt_body *body;
2908 struct ptlrpc_request *req;
2912 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2913 LUSTRE_OPC_ANY, NULL);
2914 if (IS_ERR(op_data))
2915 RETURN(PTR_ERR(op_data));
2917 op_data->op_valid = OBD_MD_FLID;
2918 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2919 ll_finish_md_op_data(op_data);
2923 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2925 GOTO(out_req, rc = -EFAULT);
2927 *fid = body->mbo_fid1;
2929 ptlrpc_req_finished(req);
2933 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2934 const char *name, int namelen)
2936 struct dentry *dchild = NULL;
2937 struct inode *child_inode = NULL;
2938 struct md_op_data *op_data;
2939 struct ptlrpc_request *request = NULL;
2944 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2945 name, PFID(ll_inode2fid(parent)), mdtidx);
2947 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2948 0, LUSTRE_OPC_ANY, NULL);
2949 if (IS_ERR(op_data))
2950 RETURN(PTR_ERR(op_data));
2952 /* Get child FID first */
2953 qstr.hash = full_name_hash(name, namelen);
2956 dchild = d_lookup(file->f_path.dentry, &qstr);
2957 if (dchild != NULL) {
2958 if (dchild->d_inode != NULL) {
2959 child_inode = igrab(dchild->d_inode);
2960 if (child_inode != NULL) {
2961 mutex_lock(&child_inode->i_mutex);
2962 op_data->op_fid3 = *ll_inode2fid(child_inode);
2963 ll_invalidate_aliases(child_inode);
2968 rc = ll_get_fid_by_name(parent, name, namelen,
2974 if (!fid_is_sane(&op_data->op_fid3)) {
2975 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
2976 ll_get_fsname(parent->i_sb, NULL, 0), name,
2977 PFID(&op_data->op_fid3));
2978 GOTO(out_free, rc = -EINVAL);
2981 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
2986 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
2987 PFID(&op_data->op_fid3), mdtidx);
2988 GOTO(out_free, rc = 0);
2991 op_data->op_mds = mdtidx;
2992 op_data->op_cli_flags = CLI_MIGRATE;
2993 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
2994 namelen, name, namelen, &request);
2996 ll_update_times(request, parent);
2998 ptlrpc_req_finished(request);
3003 if (child_inode != NULL) {
3004 clear_nlink(child_inode);
3005 mutex_unlock(&child_inode->i_mutex);
3009 ll_finish_md_op_data(op_data);
3014 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3022 * test if some locks matching bits and l_req_mode are acquired
3023 * - bits can be in different locks
3024 * - if found clear the common lock bits in *bits
3025 * - the bits not found, are kept in *bits
3027 * \param bits [IN] searched lock bits [IN]
3028 * \param l_req_mode [IN] searched lock mode
3029 * \retval boolean, true iff all bits are found
3031 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3033 struct lustre_handle lockh;
3034 ldlm_policy_data_t policy;
3035 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3036 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3045 fid = &ll_i2info(inode)->lli_fid;
3046 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3047 ldlm_lockname[mode]);
3049 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3050 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3051 policy.l_inodebits.bits = *bits & (1 << i);
3052 if (policy.l_inodebits.bits == 0)
3055 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3056 &policy, mode, &lockh)) {
3057 struct ldlm_lock *lock;
3059 lock = ldlm_handle2lock(&lockh);
3062 ~(lock->l_policy_data.l_inodebits.bits);
3063 LDLM_LOCK_PUT(lock);
3065 *bits &= ~policy.l_inodebits.bits;
3072 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3073 struct lustre_handle *lockh, __u64 flags,
3076 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3081 fid = &ll_i2info(inode)->lli_fid;
3082 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3084 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3085 fid, LDLM_IBITS, &policy, mode, lockh);
3090 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3092 /* Already unlinked. Just update nlink and return success */
3093 if (rc == -ENOENT) {
3095 /* If it is striped directory, and there is bad stripe
3096 * Let's revalidate the dentry again, instead of returning
3098 if (S_ISDIR(inode->i_mode) &&
3099 ll_i2info(inode)->lli_lsm_md != NULL)
3102 /* This path cannot be hit for regular files unless in
3103 * case of obscure races, so no need to to validate
3105 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3107 } else if (rc != 0) {
3108 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3109 "%s: revalidate FID "DFID" error: rc = %d\n",
3110 ll_get_fsname(inode->i_sb, NULL, 0),
3111 PFID(ll_inode2fid(inode)), rc);
3117 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3119 struct inode *inode = dentry->d_inode;
3120 struct ptlrpc_request *req = NULL;
3121 struct obd_export *exp;
3125 LASSERT(inode != NULL);
3127 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3128 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3130 exp = ll_i2mdexp(inode);
3132 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3133 * But under CMD case, it caused some lock issues, should be fixed
3134 * with new CMD ibits lock. See bug 12718 */
3135 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3136 struct lookup_intent oit = { .it_op = IT_GETATTR };
3137 struct md_op_data *op_data;
3139 if (ibits == MDS_INODELOCK_LOOKUP)
3140 oit.it_op = IT_LOOKUP;
3142 /* Call getattr by fid, so do not provide name at all. */
3143 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3144 dentry->d_inode, NULL, 0, 0,
3145 LUSTRE_OPC_ANY, NULL);
3146 if (IS_ERR(op_data))
3147 RETURN(PTR_ERR(op_data));
3149 rc = md_intent_lock(exp, op_data, &oit, &req,
3150 &ll_md_blocking_ast, 0);
3151 ll_finish_md_op_data(op_data);
3153 rc = ll_inode_revalidate_fini(inode, rc);
3157 rc = ll_revalidate_it_finish(req, &oit, dentry);
3159 ll_intent_release(&oit);
3163 /* Unlinked? Unhash dentry, so it is not picked up later by
3164 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3165 here to preserve get_cwd functionality on 2.6.
3167 if (!dentry->d_inode->i_nlink)
3168 d_lustre_invalidate(dentry, 0);
3170 ll_lookup_finish_locks(&oit, dentry);
3171 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3172 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3173 u64 valid = OBD_MD_FLGETATTR;
3174 struct md_op_data *op_data;
3177 if (S_ISREG(inode->i_mode)) {
3178 rc = ll_get_default_mdsize(sbi, &ealen);
3181 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3184 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3185 0, ealen, LUSTRE_OPC_ANY,
3187 if (IS_ERR(op_data))
3188 RETURN(PTR_ERR(op_data));
3190 op_data->op_valid = valid;
3191 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3192 ll_finish_md_op_data(op_data);
3194 rc = ll_inode_revalidate_fini(inode, rc);
3198 rc = ll_prep_inode(&inode, req, NULL, NULL);
3201 ptlrpc_req_finished(req);
3205 static int ll_merge_md_attr(struct inode *inode)
3207 struct cl_attr attr = { 0 };
3210 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3211 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3212 &attr, ll_md_blocking_ast);
3216 set_nlink(inode, attr.cat_nlink);
3217 inode->i_blocks = attr.cat_blocks;
3218 i_size_write(inode, attr.cat_size);
3220 ll_i2info(inode)->lli_atime = attr.cat_atime;
3221 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3222 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3228 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3230 struct inode *inode = dentry->d_inode;
3234 rc = __ll_inode_revalidate(dentry, ibits);
3238 /* if object isn't regular file, don't validate size */
3239 if (!S_ISREG(inode->i_mode)) {
3240 if (S_ISDIR(inode->i_mode) &&
3241 ll_i2info(inode)->lli_lsm_md != NULL) {
3242 rc = ll_merge_md_attr(inode);
3247 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3248 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3249 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3251 /* In case of restore, the MDT has the right size and has
3252 * already send it back without granting the layout lock,
3253 * inode is up-to-date so glimpse is useless.
3254 * Also to glimpse we need the layout, in case of a running
3255 * restore the MDT holds the layout lock so the glimpse will
3256 * block up to the end of restore (getattr will block)
3258 if (!ll_file_test_flag(ll_i2info(inode), LLIF_FILE_RESTORING))
3259 rc = ll_glimpse_size(inode);
3264 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3266 struct inode *inode = de->d_inode;
3267 struct ll_sb_info *sbi = ll_i2sbi(inode);
3268 struct ll_inode_info *lli = ll_i2info(inode);
3271 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3272 MDS_INODELOCK_LOOKUP);
3273 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3278 stat->dev = inode->i_sb->s_dev;
3279 if (ll_need_32bit_api(sbi))
3280 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3282 stat->ino = inode->i_ino;
3283 stat->mode = inode->i_mode;
3284 stat->uid = inode->i_uid;
3285 stat->gid = inode->i_gid;
3286 stat->rdev = inode->i_rdev;
3287 stat->atime = inode->i_atime;
3288 stat->mtime = inode->i_mtime;
3289 stat->ctime = inode->i_ctime;
3290 stat->blksize = 1 << inode->i_blkbits;
3292 stat->nlink = inode->i_nlink;
3293 stat->size = i_size_read(inode);
3294 stat->blocks = inode->i_blocks;
3299 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3300 __u64 start, __u64 len)
3304 struct fiemap *fiemap;
3305 unsigned int extent_count = fieinfo->fi_extents_max;
3307 num_bytes = sizeof(*fiemap) + (extent_count *
3308 sizeof(struct fiemap_extent));
3309 OBD_ALLOC_LARGE(fiemap, num_bytes);
3314 fiemap->fm_flags = fieinfo->fi_flags;
3315 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3316 fiemap->fm_start = start;
3317 fiemap->fm_length = len;
3318 if (extent_count > 0 &&
3319 copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3320 sizeof(struct fiemap_extent)) != 0)
3321 GOTO(out, rc = -EFAULT);
3323 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3325 fieinfo->fi_flags = fiemap->fm_flags;
3326 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3327 if (extent_count > 0 &&
3328 copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3329 fiemap->fm_mapped_extents *
3330 sizeof(struct fiemap_extent)) != 0)
3331 GOTO(out, rc = -EFAULT);
3333 OBD_FREE_LARGE(fiemap, num_bytes);
3337 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3339 struct ll_inode_info *lli = ll_i2info(inode);
3340 struct posix_acl *acl = NULL;
3343 spin_lock(&lli->lli_lock);
3344 /* VFS' acl_permission_check->check_acl will release the refcount */
3345 acl = posix_acl_dup(lli->lli_posix_acl);
3346 spin_unlock(&lli->lli_lock);
3351 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3353 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3354 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3356 ll_check_acl(struct inode *inode, int mask)
3359 # ifdef CONFIG_FS_POSIX_ACL
3360 struct posix_acl *acl;
3364 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3365 if (flags & IPERM_FLAG_RCU)
3368 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3373 rc = posix_acl_permission(inode, acl, mask);
3374 posix_acl_release(acl);
3377 # else /* !CONFIG_FS_POSIX_ACL */
3379 # endif /* CONFIG_FS_POSIX_ACL */
3381 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3383 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3384 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3386 # ifdef HAVE_INODE_PERMISION_2ARGS
3387 int ll_inode_permission(struct inode *inode, int mask)
3389 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3394 struct ll_sb_info *sbi;
3395 struct root_squash_info *squash;
3396 struct cred *cred = NULL;
3397 const struct cred *old_cred = NULL;
3399 bool squash_id = false;
3402 #ifdef MAY_NOT_BLOCK
3403 if (mask & MAY_NOT_BLOCK)
3405 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3406 if (flags & IPERM_FLAG_RCU)
3410 /* as root inode are NOT getting validated in lookup operation,
3411 * need to do it before permission check. */
3413 if (inode == inode->i_sb->s_root->d_inode) {
3414 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3415 MDS_INODELOCK_LOOKUP);
3420 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3421 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3423 /* squash fsuid/fsgid if needed */
3424 sbi = ll_i2sbi(inode);
3425 squash = &sbi->ll_squash;
3426 if (unlikely(squash->rsi_uid != 0 &&
3427 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3428 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3432 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3433 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3434 squash->rsi_uid, squash->rsi_gid);
3436 /* update current process's credentials
3437 * and FS capability */
3438 cred = prepare_creds();
3442 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3443 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3444 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3445 if ((1 << cap) & CFS_CAP_FS_MASK)
3446 cap_lower(cred->cap_effective, cap);
3448 old_cred = override_creds(cred);
3451 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3453 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3454 rc = lustre_check_remote_perm(inode, mask);
3456 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3458 /* restore current process's credentials and FS capability */
3460 revert_creds(old_cred);
3467 /* -o localflock - only provides locally consistent flock locks */
3468 struct file_operations ll_file_operations = {
3469 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3470 # ifdef HAVE_SYNC_READ_WRITE
3471 .read = new_sync_read,
3472 .write = new_sync_write,
3474 .read_iter = ll_file_read_iter,
3475 .write_iter = ll_file_write_iter,
3476 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3477 .read = ll_file_read,
3478 .aio_read = ll_file_aio_read,
3479 .write = ll_file_write,
3480 .aio_write = ll_file_aio_write,
3481 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3482 .unlocked_ioctl = ll_file_ioctl,
3483 .open = ll_file_open,
3484 .release = ll_file_release,
3485 .mmap = ll_file_mmap,
3486 .llseek = ll_file_seek,
3487 .splice_read = ll_file_splice_read,
3492 struct file_operations ll_file_operations_flock = {
3493 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3494 # ifdef HAVE_SYNC_READ_WRITE
3495 .read = new_sync_read,
3496 .write = new_sync_write,
3497 # endif /* HAVE_SYNC_READ_WRITE */
3498 .read_iter = ll_file_read_iter,
3499 .write_iter = ll_file_write_iter,
3500 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3501 .read = ll_file_read,
3502 .aio_read = ll_file_aio_read,
3503 .write = ll_file_write,
3504 .aio_write = ll_file_aio_write,
3505 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3506 .unlocked_ioctl = ll_file_ioctl,
3507 .open = ll_file_open,
3508 .release = ll_file_release,
3509 .mmap = ll_file_mmap,
3510 .llseek = ll_file_seek,
3511 .splice_read = ll_file_splice_read,
3514 .flock = ll_file_flock,
3515 .lock = ll_file_flock
3518 /* These are for -o noflock - to return ENOSYS on flock calls */
3519 struct file_operations ll_file_operations_noflock = {
3520 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3521 # ifdef HAVE_SYNC_READ_WRITE
3522 .read = new_sync_read,
3523 .write = new_sync_write,
3524 # endif /* HAVE_SYNC_READ_WRITE */
3525 .read_iter = ll_file_read_iter,
3526 .write_iter = ll_file_write_iter,
3527 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3528 .read = ll_file_read,
3529 .aio_read = ll_file_aio_read,
3530 .write = ll_file_write,
3531 .aio_write = ll_file_aio_write,
3532 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3533 .unlocked_ioctl = ll_file_ioctl,
3534 .open = ll_file_open,
3535 .release = ll_file_release,
3536 .mmap = ll_file_mmap,
3537 .llseek = ll_file_seek,
3538 .splice_read = ll_file_splice_read,
3541 .flock = ll_file_noflock,
3542 .lock = ll_file_noflock
3545 struct inode_operations ll_file_inode_operations = {
3546 .setattr = ll_setattr,
3547 .getattr = ll_getattr,
3548 .permission = ll_inode_permission,
3549 .setxattr = ll_setxattr,
3550 .getxattr = ll_getxattr,
3551 .listxattr = ll_listxattr,
3552 .removexattr = ll_removexattr,
3553 .fiemap = ll_fiemap,
3554 #ifdef HAVE_IOP_GET_ACL
3555 .get_acl = ll_get_acl,
3559 /* dynamic ioctl number support routins */
3560 static struct llioc_ctl_data {
3561 struct rw_semaphore ioc_sem;
3562 struct list_head ioc_head;
3564 __RWSEM_INITIALIZER(llioc.ioc_sem),
3565 LIST_HEAD_INIT(llioc.ioc_head)
3570 struct list_head iocd_list;
3571 unsigned int iocd_size;
3572 llioc_callback_t iocd_cb;
3573 unsigned int iocd_count;
3574 unsigned int iocd_cmd[0];
3577 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3580 struct llioc_data *in_data = NULL;
3583 if (cb == NULL || cmd == NULL ||
3584 count > LLIOC_MAX_CMD || count < 0)
3587 size = sizeof(*in_data) + count * sizeof(unsigned int);
3588 OBD_ALLOC(in_data, size);
3589 if (in_data == NULL)
3592 memset(in_data, 0, sizeof(*in_data));
3593 in_data->iocd_size = size;
3594 in_data->iocd_cb = cb;
3595 in_data->iocd_count = count;
3596 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3598 down_write(&llioc.ioc_sem);
3599 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3600 up_write(&llioc.ioc_sem);
3605 void ll_iocontrol_unregister(void *magic)
3607 struct llioc_data *tmp;
3612 down_write(&llioc.ioc_sem);
3613 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3615 unsigned int size = tmp->iocd_size;
3617 list_del(&tmp->iocd_list);
3618 up_write(&llioc.ioc_sem);
3620 OBD_FREE(tmp, size);
3624 up_write(&llioc.ioc_sem);
3626 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3629 EXPORT_SYMBOL(ll_iocontrol_register);
3630 EXPORT_SYMBOL(ll_iocontrol_unregister);
3632 static enum llioc_iter
3633 ll_iocontrol_call(struct inode *inode, struct file *file,
3634 unsigned int cmd, unsigned long arg, int *rcp)
3636 enum llioc_iter ret = LLIOC_CONT;
3637 struct llioc_data *data;
3638 int rc = -EINVAL, i;
3640 down_read(&llioc.ioc_sem);
3641 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3642 for (i = 0; i < data->iocd_count; i++) {
3643 if (cmd != data->iocd_cmd[i])
3646 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3650 if (ret == LLIOC_STOP)
3653 up_read(&llioc.ioc_sem);
3660 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3662 struct ll_inode_info *lli = ll_i2info(inode);
3663 struct cl_object *obj = lli->lli_clob;
3664 struct cl_env_nest nest;
3672 env = cl_env_nested_get(&nest);
3674 RETURN(PTR_ERR(env));
3676 rc = cl_conf_set(env, lli->lli_clob, conf);
3680 if (conf->coc_opc == OBJECT_CONF_SET) {
3681 struct ldlm_lock *lock = conf->coc_lock;
3682 struct cl_layout cl = {
3686 LASSERT(lock != NULL);
3687 LASSERT(ldlm_has_layout(lock));
3689 /* it can only be allowed to match after layout is
3690 * applied to inode otherwise false layout would be
3691 * seen. Applying layout shoud happen before dropping
3692 * the intent lock. */
3693 ldlm_lock_allow_match(lock);
3695 rc = cl_object_layout_get(env, obj, &cl);
3700 DFID": layout version change: %u -> %u\n",
3701 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3703 ll_layout_version_set(lli, cl.cl_layout_gen);
3707 cl_env_nested_put(&nest, env);
3712 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3713 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3716 struct ll_sb_info *sbi = ll_i2sbi(inode);
3717 struct ptlrpc_request *req;
3718 struct mdt_body *body;
3725 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3726 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3727 lock->l_lvb_data, lock->l_lvb_len);
3729 if (lock->l_lvb_data != NULL)
3732 /* if layout lock was granted right away, the layout is returned
3733 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3734 * blocked and then granted via completion ast, we have to fetch
3735 * layout here. Please note that we can't use the LVB buffer in
3736 * completion AST because it doesn't have a large enough buffer */
3737 rc = ll_get_default_mdsize(sbi, &lmmsize);
3739 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
3740 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3745 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3747 GOTO(out, rc = -EPROTO);
3749 lmmsize = body->mbo_eadatasize;
3750 if (lmmsize == 0) /* empty layout */
3753 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3755 GOTO(out, rc = -EFAULT);
3757 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3758 if (lvbdata == NULL)
3759 GOTO(out, rc = -ENOMEM);
3761 memcpy(lvbdata, lmm, lmmsize);
3762 lock_res_and_lock(lock);
3763 if (unlikely(lock->l_lvb_data == NULL)) {
3764 lock->l_lvb_type = LVB_T_LAYOUT;
3765 lock->l_lvb_data = lvbdata;
3766 lock->l_lvb_len = lmmsize;
3769 unlock_res_and_lock(lock);
3771 if (lvbdata != NULL)
3772 OBD_FREE_LARGE(lvbdata, lmmsize);
3777 ptlrpc_req_finished(req);
3782 * Apply the layout to the inode. Layout lock is held and will be released
3785 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3786 struct inode *inode)
3788 struct ll_inode_info *lli = ll_i2info(inode);
3789 struct ll_sb_info *sbi = ll_i2sbi(inode);
3790 struct ldlm_lock *lock;
3791 struct cl_object_conf conf;
3794 bool wait_layout = false;
3797 LASSERT(lustre_handle_is_used(lockh));
3799 lock = ldlm_handle2lock(lockh);
3800 LASSERT(lock != NULL);
3801 LASSERT(ldlm_has_layout(lock));
3803 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured",
3804 PFID(&lli->lli_fid), inode);
3806 /* in case this is a caching lock and reinstate with new inode */
3807 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3809 lock_res_and_lock(lock);
3810 lvb_ready = ldlm_is_lvb_ready(lock);
3811 unlock_res_and_lock(lock);
3812 /* checking lvb_ready is racy but this is okay. The worst case is
3813 * that multi processes may configure the file on the same time. */
3818 rc = ll_layout_fetch(inode, lock);
3822 /* for layout lock, lmm is stored in lock's lvb.
3823 * lvb_data is immutable if the lock is held so it's safe to access it
3826 * set layout to file. Unlikely this will fail as old layout was
3827 * surely eliminated */
3828 memset(&conf, 0, sizeof conf);
3829 conf.coc_opc = OBJECT_CONF_SET;
3830 conf.coc_inode = inode;
3831 conf.coc_lock = lock;
3832 conf.u.coc_layout.lb_buf = lock->l_lvb_data;
3833 conf.u.coc_layout.lb_len = lock->l_lvb_len;
3834 rc = ll_layout_conf(inode, &conf);
3836 /* refresh layout failed, need to wait */
3837 wait_layout = rc == -EBUSY;
3841 LDLM_LOCK_PUT(lock);
3842 ldlm_lock_decref(lockh, mode);
3844 /* wait for IO to complete if it's still being used. */
3846 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3847 ll_get_fsname(inode->i_sb, NULL, 0),
3848 PFID(&lli->lli_fid), inode);
3850 memset(&conf, 0, sizeof conf);
3851 conf.coc_opc = OBJECT_CONF_WAIT;
3852 conf.coc_inode = inode;
3853 rc = ll_layout_conf(inode, &conf);
3857 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3858 ll_get_fsname(inode->i_sb, NULL, 0),
3859 PFID(&lli->lli_fid), rc);
3864 static int ll_layout_refresh_locked(struct inode *inode)
3866 struct ll_inode_info *lli = ll_i2info(inode);
3867 struct ll_sb_info *sbi = ll_i2sbi(inode);
3868 struct md_op_data *op_data;
3869 struct lookup_intent it;
3870 struct lustre_handle lockh;
3872 struct ldlm_enqueue_info einfo = {
3873 .ei_type = LDLM_IBITS,
3875 .ei_cb_bl = &ll_md_blocking_ast,
3876 .ei_cb_cp = &ldlm_completion_ast,
3882 /* mostly layout lock is caching on the local side, so try to match
3883 * it before grabbing layout lock mutex. */
3884 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3885 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3886 if (mode != 0) { /* hit cached lock */
3887 rc = ll_layout_lock_set(&lockh, mode, inode);
3894 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3895 0, 0, LUSTRE_OPC_ANY, NULL);
3896 if (IS_ERR(op_data))
3897 RETURN(PTR_ERR(op_data));
3899 /* have to enqueue one */
3900 memset(&it, 0, sizeof(it));
3901 it.it_op = IT_LAYOUT;
3902 lockh.cookie = 0ULL;
3904 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3905 ll_get_fsname(inode->i_sb, NULL, 0),
3906 PFID(&lli->lli_fid), inode);
3908 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3909 if (it.d.lustre.it_data != NULL)
3910 ptlrpc_req_finished(it.d.lustre.it_data);
3911 it.d.lustre.it_data = NULL;
3913 ll_finish_md_op_data(op_data);
3915 mode = it.d.lustre.it_lock_mode;
3916 it.d.lustre.it_lock_mode = 0;
3917 ll_intent_drop_lock(&it);
3920 /* set lock data in case this is a new lock */
3921 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3922 rc = ll_layout_lock_set(&lockh, mode, inode);
3931 * This function checks if there exists a LAYOUT lock on the client side,
3932 * or enqueues it if it doesn't have one in cache.
3934 * This function will not hold layout lock so it may be revoked any time after
3935 * this function returns. Any operations depend on layout should be redone
3938 * This function should be called before lov_io_init() to get an uptodate
3939 * layout version, the caller should save the version number and after IO
3940 * is finished, this function should be called again to verify that layout
3941 * is not changed during IO time.
3943 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3945 struct ll_inode_info *lli = ll_i2info(inode);
3946 struct ll_sb_info *sbi = ll_i2sbi(inode);
3950 *gen = ll_layout_version_get(lli);
3951 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
3955 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3956 LASSERT(S_ISREG(inode->i_mode));
3958 /* take layout lock mutex to enqueue layout lock exclusively. */
3959 mutex_lock(&lli->lli_layout_mutex);
3961 rc = ll_layout_refresh_locked(inode);
3965 *gen = ll_layout_version_get(lli);
3967 mutex_unlock(&lli->lli_layout_mutex);
3973 * This function send a restore request to the MDT
3975 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3977 struct hsm_user_request *hur;
3981 len = sizeof(struct hsm_user_request) +
3982 sizeof(struct hsm_user_item);
3983 OBD_ALLOC(hur, len);
3987 hur->hur_request.hr_action = HUA_RESTORE;
3988 hur->hur_request.hr_archive_id = 0;
3989 hur->hur_request.hr_flags = 0;
3990 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3991 sizeof(hur->hur_user_item[0].hui_fid));
3992 hur->hur_user_item[0].hui_extent.offset = offset;
3993 hur->hur_user_item[0].hui_extent.length = length;
3994 hur->hur_request.hr_itemcount = 1;
3995 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,