4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2016, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
34 * Author: Peter Braam <braam@clusterfs.com>
35 * Author: Phil Schwan <phil@clusterfs.com>
36 * Author: Andreas Dilger <adilger@clusterfs.com>
39 #define DEBUG_SUBSYSTEM S_LLITE
40 #include <lustre_dlm.h>
41 #include <linux/pagemap.h>
42 #include <linux/file.h>
43 #include <linux/sched.h>
44 #include <linux/user_namespace.h>
45 #ifdef HAVE_UIDGID_HEADER
46 # include <linux/uidgid.h>
49 #include <uapi/linux/lustre/lustre_ioctl.h>
50 #include <lustre_swab.h>
52 #include "cl_object.h"
53 #include "llite_internal.h"
54 #include "vvp_internal.h"
57 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
59 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
62 static struct ll_file_data *ll_file_data_get(void)
64 struct ll_file_data *fd;
66 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
70 fd->fd_write_failed = false;
75 static void ll_file_data_put(struct ll_file_data *fd)
78 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
82 * Packs all the attributes into @op_data for the CLOSE rpc.
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 ll_prep_md_op_data(op_data, inode, NULL, NULL,
90 0, 0, LUSTRE_OPC_ANY, NULL);
92 op_data->op_attr.ia_mode = inode->i_mode;
93 op_data->op_attr.ia_atime = inode->i_atime;
94 op_data->op_attr.ia_mtime = inode->i_mtime;
95 op_data->op_attr.ia_ctime = inode->i_ctime;
96 op_data->op_attr.ia_size = i_size_read(inode);
97 op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
98 ATTR_MTIME | ATTR_MTIME_SET |
99 ATTR_CTIME | ATTR_CTIME_SET;
100 op_data->op_attr_blocks = inode->i_blocks;
101 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
102 op_data->op_handle = och->och_fh;
104 if (och->och_flags & FMODE_WRITE &&
105 ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED))
106 /* For HSM: if inode data has been modified, pack it so that
107 * MDT can set data dirty flag in the archive. */
108 op_data->op_bias |= MDS_DATA_MODIFIED;
114 * Perform a close, possibly with a bias.
115 * The meaning of "data" depends on the value of "bias".
117 * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
118 * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
121 static int ll_close_inode_openhandle(struct inode *inode,
122 struct obd_client_handle *och,
123 enum mds_op_bias bias, void *data)
125 struct obd_export *md_exp = ll_i2mdexp(inode);
126 const struct ll_inode_info *lli = ll_i2info(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
132 if (class_exp2obd(md_exp) == NULL) {
133 CERROR("%s: invalid MDC connection handle closing "DFID"\n",
134 ll_get_fsname(inode->i_sb, NULL, 0),
135 PFID(&lli->lli_fid));
139 OBD_ALLOC_PTR(op_data);
140 /* We leak openhandle and request here on error, but not much to be
141 * done in OOM case since app won't retry close on error either. */
143 GOTO(out, rc = -ENOMEM);
145 ll_prepare_close(inode, op_data, och);
147 case MDS_CLOSE_LAYOUT_MERGE:
148 case MDS_CLOSE_LAYOUT_SWAP:
149 LASSERT(data != NULL);
150 op_data->op_bias |= bias;
151 op_data->op_data_version = 0;
152 op_data->op_lease_handle = och->och_lease_handle;
153 op_data->op_fid2 = *ll_inode2fid(data);
156 case MDS_HSM_RELEASE:
157 LASSERT(data != NULL);
158 op_data->op_bias |= MDS_HSM_RELEASE;
159 op_data->op_data_version = *(__u64 *)data;
160 op_data->op_lease_handle = och->och_lease_handle;
161 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
165 LASSERT(data == NULL);
169 rc = md_close(md_exp, op_data, och->och_mod, &req);
170 if (rc != 0 && rc != -EINTR)
171 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
172 md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
174 if (rc == 0 && op_data->op_bias & bias) {
175 struct mdt_body *body;
177 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
178 if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
182 ll_finish_md_op_data(op_data);
186 md_clear_open_replay_data(md_exp, och);
187 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
190 ptlrpc_req_finished(req); /* This is close request */
194 int ll_md_real_close(struct inode *inode, fmode_t fmode)
196 struct ll_inode_info *lli = ll_i2info(inode);
197 struct obd_client_handle **och_p;
198 struct obd_client_handle *och;
203 if (fmode & FMODE_WRITE) {
204 och_p = &lli->lli_mds_write_och;
205 och_usecount = &lli->lli_open_fd_write_count;
206 } else if (fmode & FMODE_EXEC) {
207 och_p = &lli->lli_mds_exec_och;
208 och_usecount = &lli->lli_open_fd_exec_count;
210 LASSERT(fmode & FMODE_READ);
211 och_p = &lli->lli_mds_read_och;
212 och_usecount = &lli->lli_open_fd_read_count;
215 mutex_lock(&lli->lli_och_mutex);
216 if (*och_usecount > 0) {
217 /* There are still users of this handle, so skip
219 mutex_unlock(&lli->lli_och_mutex);
225 mutex_unlock(&lli->lli_och_mutex);
228 /* There might be a race and this handle may already
230 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
236 static int ll_md_close(struct inode *inode, struct file *file)
238 union ldlm_policy_data policy = {
239 .l_inodebits = { MDS_INODELOCK_OPEN },
241 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
242 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
243 struct ll_inode_info *lli = ll_i2info(inode);
244 struct lustre_handle lockh;
245 enum ldlm_mode lockmode;
249 /* clear group lock, if present */
250 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
251 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
253 if (fd->fd_lease_och != NULL) {
256 /* Usually the lease is not released when the
257 * application crashed, we need to release here. */
258 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
259 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
260 PFID(&lli->lli_fid), rc, lease_broken);
262 fd->fd_lease_och = NULL;
265 if (fd->fd_och != NULL) {
266 rc = ll_close_inode_openhandle(inode, fd->fd_och, 0, NULL);
271 /* Let's see if we have good enough OPEN lock on the file and if
272 we can skip talking to MDS */
273 mutex_lock(&lli->lli_och_mutex);
274 if (fd->fd_omode & FMODE_WRITE) {
276 LASSERT(lli->lli_open_fd_write_count);
277 lli->lli_open_fd_write_count--;
278 } else if (fd->fd_omode & FMODE_EXEC) {
280 LASSERT(lli->lli_open_fd_exec_count);
281 lli->lli_open_fd_exec_count--;
284 LASSERT(lli->lli_open_fd_read_count);
285 lli->lli_open_fd_read_count--;
287 mutex_unlock(&lli->lli_och_mutex);
289 if (!md_lock_match(ll_i2mdexp(inode), flags, ll_inode2fid(inode),
290 LDLM_IBITS, &policy, lockmode, &lockh))
291 rc = ll_md_real_close(inode, fd->fd_omode);
294 LUSTRE_FPRIVATE(file) = NULL;
295 ll_file_data_put(fd);
300 /* While this returns an error code, fput() the caller does not, so we need
301 * to make every effort to clean up all of our state here. Also, applications
302 * rarely check close errors and even if an error is returned they will not
303 * re-try the close call.
305 int ll_file_release(struct inode *inode, struct file *file)
307 struct ll_file_data *fd;
308 struct ll_sb_info *sbi = ll_i2sbi(inode);
309 struct ll_inode_info *lli = ll_i2info(inode);
313 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
314 PFID(ll_inode2fid(inode)), inode);
316 if (inode->i_sb->s_root != file_dentry(file))
317 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
318 fd = LUSTRE_FPRIVATE(file);
321 /* The last ref on @file, maybe not the the owner pid of statahead,
322 * because parent and child process can share the same file handle. */
323 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
324 ll_deauthorize_statahead(inode, fd);
326 if (inode->i_sb->s_root == file_dentry(file)) {
327 LUSTRE_FPRIVATE(file) = NULL;
328 ll_file_data_put(fd);
332 if (!S_ISDIR(inode->i_mode)) {
333 if (lli->lli_clob != NULL)
334 lov_read_and_clear_async_rc(lli->lli_clob);
335 lli->lli_async_rc = 0;
338 rc = ll_md_close(inode, file);
340 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
341 libcfs_debug_dumplog();
346 static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
347 struct lookup_intent *itp)
349 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
350 struct dentry *parent = de->d_parent;
351 const char *name = NULL;
353 struct md_op_data *op_data;
354 struct ptlrpc_request *req = NULL;
358 LASSERT(parent != NULL);
359 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
361 /* if server supports open-by-fid, or file name is invalid, don't pack
362 * name in open request */
363 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
364 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
365 name = de->d_name.name;
366 len = de->d_name.len;
369 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
370 name, len, 0, LUSTRE_OPC_ANY, NULL);
372 RETURN(PTR_ERR(op_data));
373 op_data->op_data = lmm;
374 op_data->op_data_size = lmmsize;
376 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
377 &ll_md_blocking_ast, 0);
378 ll_finish_md_op_data(op_data);
380 /* reason for keep own exit path - don`t flood log
381 * with messages with -ESTALE errors.
383 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
384 it_open_error(DISP_OPEN_OPEN, itp))
386 ll_release_openhandle(de, itp);
390 if (it_disposition(itp, DISP_LOOKUP_NEG))
391 GOTO(out, rc = -ENOENT);
393 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
394 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
395 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
399 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
400 if (!rc && itp->it_lock_mode)
401 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
404 ptlrpc_req_finished(req);
405 ll_intent_drop_lock(itp);
407 /* We did open by fid, but by the time we got to the server,
408 * the object disappeared. If this is a create, we cannot really
409 * tell the userspace that the file it was trying to create
410 * does not exist. Instead let's return -ESTALE, and the VFS will
411 * retry the create with LOOKUP_REVAL that we are going to catch
412 * in ll_revalidate_dentry() and use lookup then.
414 if (rc == -ENOENT && itp->it_op & IT_CREAT)
420 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
421 struct obd_client_handle *och)
423 struct mdt_body *body;
425 body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
426 och->och_fh = body->mbo_handle;
427 och->och_fid = body->mbo_fid1;
428 och->och_lease_handle.cookie = it->it_lock_handle;
429 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
430 och->och_flags = it->it_flags;
432 return md_set_open_replay_data(md_exp, och, it);
435 static int ll_local_open(struct file *file, struct lookup_intent *it,
436 struct ll_file_data *fd, struct obd_client_handle *och)
438 struct inode *inode = file_inode(file);
441 LASSERT(!LUSTRE_FPRIVATE(file));
448 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
453 LUSTRE_FPRIVATE(file) = fd;
454 ll_readahead_init(inode, &fd->fd_ras);
455 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
457 /* ll_cl_context initialize */
458 rwlock_init(&fd->fd_lock);
459 INIT_LIST_HEAD(&fd->fd_lccs);
464 /* Open a file, and (for the very first open) create objects on the OSTs at
465 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
466 * creation or open until ll_lov_setstripe() ioctl is called.
468 * If we already have the stripe MD locally then we don't request it in
469 * md_open(), by passing a lmm_size = 0.
471 * It is up to the application to ensure no other processes open this file
472 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
473 * used. We might be able to avoid races of that sort by getting lli_open_sem
474 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
475 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
477 int ll_file_open(struct inode *inode, struct file *file)
479 struct ll_inode_info *lli = ll_i2info(inode);
480 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
481 .it_flags = file->f_flags };
482 struct obd_client_handle **och_p = NULL;
483 __u64 *och_usecount = NULL;
484 struct ll_file_data *fd;
488 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
489 PFID(ll_inode2fid(inode)), inode, file->f_flags);
491 it = file->private_data; /* XXX: compat macro */
492 file->private_data = NULL; /* prevent ll_local_open assertion */
494 fd = ll_file_data_get();
496 GOTO(out_openerr, rc = -ENOMEM);
499 if (S_ISDIR(inode->i_mode))
500 ll_authorize_statahead(inode, fd);
502 if (inode->i_sb->s_root == file_dentry(file)) {
503 LUSTRE_FPRIVATE(file) = fd;
507 if (!it || !it->it_disposition) {
508 /* Convert f_flags into access mode. We cannot use file->f_mode,
509 * because everything but O_ACCMODE mask was stripped from
511 if ((oit.it_flags + 1) & O_ACCMODE)
513 if (file->f_flags & O_TRUNC)
514 oit.it_flags |= FMODE_WRITE;
516 /* kernel only call f_op->open in dentry_open. filp_open calls
517 * dentry_open after call to open_namei that checks permissions.
518 * Only nfsd_open call dentry_open directly without checking
519 * permissions and because of that this code below is safe. */
520 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
521 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
523 /* We do not want O_EXCL here, presumably we opened the file
524 * already? XXX - NFS implications? */
525 oit.it_flags &= ~O_EXCL;
527 /* bug20584, if "it_flags" contains O_CREAT, the file will be
528 * created if necessary, then "IT_CREAT" should be set to keep
529 * consistent with it */
530 if (oit.it_flags & O_CREAT)
531 oit.it_op |= IT_CREAT;
537 /* Let's see if we have file open on MDS already. */
538 if (it->it_flags & FMODE_WRITE) {
539 och_p = &lli->lli_mds_write_och;
540 och_usecount = &lli->lli_open_fd_write_count;
541 } else if (it->it_flags & FMODE_EXEC) {
542 och_p = &lli->lli_mds_exec_och;
543 och_usecount = &lli->lli_open_fd_exec_count;
545 och_p = &lli->lli_mds_read_och;
546 och_usecount = &lli->lli_open_fd_read_count;
549 mutex_lock(&lli->lli_och_mutex);
550 if (*och_p) { /* Open handle is present */
551 if (it_disposition(it, DISP_OPEN_OPEN)) {
552 /* Well, there's extra open request that we do not need,
553 let's close it somehow. This will decref request. */
554 rc = it_open_error(DISP_OPEN_OPEN, it);
556 mutex_unlock(&lli->lli_och_mutex);
557 GOTO(out_openerr, rc);
560 ll_release_openhandle(file_dentry(file), it);
564 rc = ll_local_open(file, it, fd, NULL);
567 mutex_unlock(&lli->lli_och_mutex);
568 GOTO(out_openerr, rc);
571 LASSERT(*och_usecount == 0);
572 if (!it->it_disposition) {
573 struct ll_dentry_data *ldd = ll_d2d(file->f_path.dentry);
574 /* We cannot just request lock handle now, new ELC code
575 means that one of other OPEN locks for this file
576 could be cancelled, and since blocking ast handler
577 would attempt to grab och_mutex as well, that would
578 result in a deadlock */
579 mutex_unlock(&lli->lli_och_mutex);
581 * Normally called under two situations:
583 * 2. A race/condition on MDS resulting in no open
584 * handle to be returned from LOOKUP|OPEN request,
585 * for example if the target entry was a symlink.
587 * Only fetch MDS_OPEN_LOCK if this is in NFS path,
588 * marked by a bit set in ll_iget_for_nfs. Clear the
589 * bit so that it's not confusing later callers.
591 * NB; when ldd is NULL, it must have come via normal
592 * lookup path only, since ll_iget_for_nfs always calls
595 if (ldd && ldd->lld_nfs_dentry) {
596 ldd->lld_nfs_dentry = 0;
597 it->it_flags |= MDS_OPEN_LOCK;
601 * Always specify MDS_OPEN_BY_FID because we don't want
602 * to get file with different fid.
604 it->it_flags |= MDS_OPEN_BY_FID;
605 rc = ll_intent_file_open(file_dentry(file), NULL, 0,
608 GOTO(out_openerr, rc);
612 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
614 GOTO(out_och_free, rc = -ENOMEM);
618 /* md_intent_lock() didn't get a request ref if there was an
619 * open error, so don't do cleanup on the request here
621 /* XXX (green): Should not we bail out on any error here, not
622 * just open error? */
623 rc = it_open_error(DISP_OPEN_OPEN, it);
625 GOTO(out_och_free, rc);
627 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
628 "inode %p: disposition %x, status %d\n", inode,
629 it_disposition(it, ~0), it->it_status);
631 rc = ll_local_open(file, it, fd, *och_p);
633 GOTO(out_och_free, rc);
635 mutex_unlock(&lli->lli_och_mutex);
638 /* Must do this outside lli_och_mutex lock to prevent deadlock where
639 different kind of OPEN lock for this same inode gets cancelled
640 by ldlm_cancel_lru */
641 if (!S_ISREG(inode->i_mode))
642 GOTO(out_och_free, rc);
644 cl_lov_delay_create_clear(&file->f_flags);
645 GOTO(out_och_free, rc);
649 if (och_p && *och_p) {
650 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
651 *och_p = NULL; /* OBD_FREE writes some magic there */
654 mutex_unlock(&lli->lli_och_mutex);
657 if (lli->lli_opendir_key == fd)
658 ll_deauthorize_statahead(inode, fd);
660 ll_file_data_put(fd);
662 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
665 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
666 ptlrpc_req_finished(it->it_request);
667 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
673 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
674 struct ldlm_lock_desc *desc, void *data, int flag)
677 struct lustre_handle lockh;
681 case LDLM_CB_BLOCKING:
682 ldlm_lock2handle(lock, &lockh);
683 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
685 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
689 case LDLM_CB_CANCELING:
697 * When setting a lease on a file, we take ownership of the lli_mds_*_och
698 * and save it as fd->fd_och so as to force client to reopen the file even
699 * if it has an open lock in cache already.
701 static int ll_lease_och_acquire(struct inode *inode, struct file *file,
702 struct lustre_handle *old_handle)
704 struct ll_inode_info *lli = ll_i2info(inode);
705 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
706 struct obd_client_handle **och_p;
711 /* Get the openhandle of the file */
712 mutex_lock(&lli->lli_och_mutex);
713 if (fd->fd_lease_och != NULL)
714 GOTO(out_unlock, rc = -EBUSY);
716 if (fd->fd_och == NULL) {
717 if (file->f_mode & FMODE_WRITE) {
718 LASSERT(lli->lli_mds_write_och != NULL);
719 och_p = &lli->lli_mds_write_och;
720 och_usecount = &lli->lli_open_fd_write_count;
722 LASSERT(lli->lli_mds_read_och != NULL);
723 och_p = &lli->lli_mds_read_och;
724 och_usecount = &lli->lli_open_fd_read_count;
727 if (*och_usecount > 1)
728 GOTO(out_unlock, rc = -EBUSY);
735 *old_handle = fd->fd_och->och_fh;
739 mutex_unlock(&lli->lli_och_mutex);
744 * Release ownership on lli_mds_*_och when putting back a file lease.
746 static int ll_lease_och_release(struct inode *inode, struct file *file)
748 struct ll_inode_info *lli = ll_i2info(inode);
749 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
750 struct obd_client_handle **och_p;
751 struct obd_client_handle *old_och = NULL;
756 mutex_lock(&lli->lli_och_mutex);
757 if (file->f_mode & FMODE_WRITE) {
758 och_p = &lli->lli_mds_write_och;
759 och_usecount = &lli->lli_open_fd_write_count;
761 och_p = &lli->lli_mds_read_och;
762 och_usecount = &lli->lli_open_fd_read_count;
765 /* The file may have been open by another process (broken lease) so
766 * *och_p is not NULL. In this case we should simply increase usecount
769 if (*och_p != NULL) {
770 old_och = fd->fd_och;
777 mutex_unlock(&lli->lli_och_mutex);
780 rc = ll_close_inode_openhandle(inode, old_och, 0, NULL);
786 * Acquire a lease and open the file.
788 static struct obd_client_handle *
789 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
792 struct lookup_intent it = { .it_op = IT_OPEN };
793 struct ll_sb_info *sbi = ll_i2sbi(inode);
794 struct md_op_data *op_data;
795 struct ptlrpc_request *req = NULL;
796 struct lustre_handle old_handle = { 0 };
797 struct obd_client_handle *och = NULL;
802 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
803 RETURN(ERR_PTR(-EINVAL));
806 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
807 RETURN(ERR_PTR(-EPERM));
809 rc = ll_lease_och_acquire(inode, file, &old_handle);
816 RETURN(ERR_PTR(-ENOMEM));
818 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
819 LUSTRE_OPC_ANY, NULL);
821 GOTO(out, rc = PTR_ERR(op_data));
823 /* To tell the MDT this openhandle is from the same owner */
824 op_data->op_handle = old_handle;
826 it.it_flags = fmode | open_flags;
827 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
828 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
829 &ll_md_blocking_lease_ast,
830 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
831 * it can be cancelled which may mislead applications that the lease is
833 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
834 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
835 * doesn't deal with openhandle, so normal openhandle will be leaked. */
836 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
837 ll_finish_md_op_data(op_data);
838 ptlrpc_req_finished(req);
840 GOTO(out_release_it, rc);
842 if (it_disposition(&it, DISP_LOOKUP_NEG))
843 GOTO(out_release_it, rc = -ENOENT);
845 rc = it_open_error(DISP_OPEN_OPEN, &it);
847 GOTO(out_release_it, rc);
849 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
850 ll_och_fill(sbi->ll_md_exp, &it, och);
852 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
853 GOTO(out_close, rc = -EOPNOTSUPP);
855 /* already get lease, handle lease lock */
856 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
857 if (it.it_lock_mode == 0 ||
858 it.it_lock_bits != MDS_INODELOCK_OPEN) {
859 /* open lock must return for lease */
860 CERROR(DFID "lease granted but no open lock, %d/%llu.\n",
861 PFID(ll_inode2fid(inode)), it.it_lock_mode,
863 GOTO(out_close, rc = -EPROTO);
866 ll_intent_release(&it);
870 /* Cancel open lock */
871 if (it.it_lock_mode != 0) {
872 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
875 och->och_lease_handle.cookie = 0ULL;
877 rc2 = ll_close_inode_openhandle(inode, och, 0, NULL);
879 CERROR("%s: error closing file "DFID": %d\n",
880 ll_get_fsname(inode->i_sb, NULL, 0),
881 PFID(&ll_i2info(inode)->lli_fid), rc2);
882 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
884 ll_intent_release(&it);
892 * Check whether a layout swap can be done between two inodes.
894 * \param[in] inode1 First inode to check
895 * \param[in] inode2 Second inode to check
897 * \retval 0 on success, layout swap can be performed between both inodes
898 * \retval negative error code if requirements are not met
900 static int ll_check_swap_layouts_validity(struct inode *inode1,
901 struct inode *inode2)
903 if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
906 if (inode_permission(inode1, MAY_WRITE) ||
907 inode_permission(inode2, MAY_WRITE))
910 if (inode1->i_sb != inode2->i_sb)
916 static int ll_swap_layouts_close(struct obd_client_handle *och,
917 struct inode *inode, struct inode *inode2,
920 const struct lu_fid *fid1 = ll_inode2fid(inode);
921 const struct lu_fid *fid2;
922 enum mds_op_bias bias;
926 CDEBUG(D_INODE, "%s: biased close of file "DFID"\n",
927 ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
929 rc = ll_check_swap_layouts_validity(inode, inode2);
931 GOTO(out_free_och, rc);
933 /* We now know that inode2 is a lustre inode */
934 fid2 = ll_inode2fid(inode2);
936 rc = lu_fid_cmp(fid1, fid2);
938 GOTO(out_free_och, rc = -EINVAL);
941 case SWAP_LAYOUTS_CLOSE:
942 bias = MDS_CLOSE_LAYOUT_SWAP;
944 case MERGE_LAYOUTS_CLOSE:
945 bias = MDS_CLOSE_LAYOUT_MERGE;
948 GOTO(out_free_och, rc = -EOPNOTSUPP);
951 /* Close the file and {swap,merge} layouts between inode & inode2.
952 * NB: lease lock handle is released in mdc_close_layout_swap_pack()
953 * because we still need it to pack l_remote_handle to MDT. */
954 rc = ll_close_inode_openhandle(inode, och, bias, inode2);
956 och = NULL; /* freed in ll_close_inode_openhandle() */
966 * Release lease and close the file.
967 * It will check if the lease has ever broken.
969 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
972 struct ldlm_lock *lock;
973 bool cancelled = true;
977 lock = ldlm_handle2lock(&och->och_lease_handle);
979 lock_res_and_lock(lock);
980 cancelled = ldlm_is_cancel(lock);
981 unlock_res_and_lock(lock);
985 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
986 PFID(&ll_i2info(inode)->lli_fid), cancelled);
989 ldlm_cli_cancel(&och->och_lease_handle, 0);
991 if (lease_broken != NULL)
992 *lease_broken = cancelled;
994 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
998 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
1000 struct ll_inode_info *lli = ll_i2info(inode);
1001 struct cl_object *obj = lli->lli_clob;
1002 struct cl_attr *attr = vvp_env_thread_attr(env);
1010 ll_inode_size_lock(inode);
1012 /* Merge timestamps the most recently obtained from MDS with
1013 * timestamps obtained from OSTs.
1015 * Do not overwrite atime of inode because it may be refreshed
1016 * by file_accessed() function. If the read was served by cache
1017 * data, there is no RPC to be sent so that atime may not be
1018 * transferred to OSTs at all. MDT only updates atime at close time
1019 * if it's at least 'mdd.*.atime_diff' older.
1020 * All in all, the atime in Lustre does not strictly comply with
1021 * POSIX. Solving this problem needs to send an RPC to MDT for each
1022 * read, this will hurt performance. */
1023 if (LTIME_S(inode->i_atime) < lli->lli_atime || lli->lli_update_atime) {
1024 LTIME_S(inode->i_atime) = lli->lli_atime;
1025 lli->lli_update_atime = 0;
1027 LTIME_S(inode->i_mtime) = lli->lli_mtime;
1028 LTIME_S(inode->i_ctime) = lli->lli_ctime;
1030 atime = LTIME_S(inode->i_atime);
1031 mtime = LTIME_S(inode->i_mtime);
1032 ctime = LTIME_S(inode->i_ctime);
1034 cl_object_attr_lock(obj);
1035 rc = cl_object_attr_get(env, obj, attr);
1036 cl_object_attr_unlock(obj);
1039 GOTO(out_size_unlock, rc = (rc == -ENODATA ? 0 : rc));
1041 if (atime < attr->cat_atime)
1042 atime = attr->cat_atime;
1044 if (ctime < attr->cat_ctime)
1045 ctime = attr->cat_ctime;
1047 if (mtime < attr->cat_mtime)
1048 mtime = attr->cat_mtime;
1050 CDEBUG(D_VFSTRACE, DFID" updating i_size %llu\n",
1051 PFID(&lli->lli_fid), attr->cat_size);
1053 i_size_write(inode, attr->cat_size);
1054 inode->i_blocks = attr->cat_blocks;
1056 LTIME_S(inode->i_atime) = atime;
1057 LTIME_S(inode->i_mtime) = mtime;
1058 LTIME_S(inode->i_ctime) = ctime;
1061 ll_inode_size_unlock(inode);
1066 static bool file_is_noatime(const struct file *file)
1068 const struct vfsmount *mnt = file->f_path.mnt;
1069 const struct inode *inode = file_inode((struct file *)file);
1071 /* Adapted from file_accessed() and touch_atime().*/
1072 if (file->f_flags & O_NOATIME)
1075 if (inode->i_flags & S_NOATIME)
1078 if (IS_NOATIME(inode))
1081 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1084 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1087 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1093 static int ll_file_io_ptask(struct cfs_ptask *ptask);
1095 static void ll_io_init(struct cl_io *io, struct file *file, enum cl_io_type iot)
1097 struct inode *inode = file_inode(file);
1098 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1100 memset(&io->u.ci_rw.rw_iter, 0, sizeof(io->u.ci_rw.rw_iter));
1101 init_sync_kiocb(&io->u.ci_rw.rw_iocb, file);
1102 io->u.ci_rw.rw_file = file;
1103 io->u.ci_rw.rw_ptask = ll_file_io_ptask;
1104 io->u.ci_rw.rw_nonblock = !!(file->f_flags & O_NONBLOCK);
1105 io->ci_lock_no_expand = fd->ll_lock_no_expand;
1107 if (iot == CIT_WRITE) {
1108 io->u.ci_rw.rw_append = !!(file->f_flags & O_APPEND);
1109 io->u.ci_rw.rw_sync = !!(file->f_flags & O_SYNC ||
1110 file->f_flags & O_DIRECT ||
1113 io->ci_obj = ll_i2info(inode)->lli_clob;
1114 io->ci_lockreq = CILR_MAYBE;
1115 if (ll_file_nolock(file)) {
1116 io->ci_lockreq = CILR_NEVER;
1117 io->ci_no_srvlock = 1;
1118 } else if (file->f_flags & O_APPEND) {
1119 io->ci_lockreq = CILR_MANDATORY;
1121 io->ci_noatime = file_is_noatime(file);
1122 if (ll_i2sbi(inode)->ll_flags & LL_SBI_PIO)
1123 io->ci_pio = !io->u.ci_rw.rw_append;
1128 static int ll_file_io_ptask(struct cfs_ptask *ptask)
1130 struct cl_io_pt *pt = ptask->pt_cbdata;
1131 struct file *file = pt->cip_file;
1134 loff_t pos = pt->cip_pos;
1139 env = cl_env_get(&refcheck);
1141 RETURN(PTR_ERR(env));
1143 CDEBUG(D_VFSTRACE, "%s: %s range: [%llu, %llu)\n",
1144 file_dentry(file)->d_name.name,
1145 pt->cip_iot == CIT_READ ? "read" : "write",
1146 pos, pos + pt->cip_count);
1149 io = vvp_env_thread_io(env);
1150 ll_io_init(io, file, pt->cip_iot);
1151 io->u.ci_rw.rw_iter = pt->cip_iter;
1152 io->u.ci_rw.rw_iocb = pt->cip_iocb;
1153 io->ci_pio = 0; /* It's already in parallel task */
1155 rc = cl_io_rw_init(env, io, pt->cip_iot, pos,
1156 pt->cip_count - pt->cip_result);
1158 struct vvp_io *vio = vvp_env_io(env);
1160 vio->vui_io_subtype = IO_NORMAL;
1161 vio->vui_fd = LUSTRE_FPRIVATE(file);
1163 ll_cl_add(file, env, io, LCC_RW);
1164 rc = cl_io_loop(env, io);
1165 ll_cl_remove(file, env);
1167 /* cl_io_rw_init() handled IO */
1171 if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LLITE_PTASK_IO_FAIL, 0)) {
1177 if (io->ci_nob > 0) {
1178 pt->cip_result += io->ci_nob;
1179 iov_iter_advance(&pt->cip_iter, io->ci_nob);
1181 pt->cip_iocb.ki_pos = pos;
1182 #ifdef HAVE_KIOCB_KI_LEFT
1183 pt->cip_iocb.ki_left = pt->cip_count - pt->cip_result;
1184 #elif defined(HAVE_KI_NBYTES)
1185 pt->cip_iocb.ki_nbytes = pt->cip_count - pt->cip_result;
1189 cl_io_fini(env, io);
1191 if ((rc == 0 || rc == -ENODATA) &&
1192 pt->cip_result < pt->cip_count &&
1193 io->ci_need_restart) {
1195 "%s: restart %s range: [%llu, %llu) ret: %zd, rc: %d\n",
1196 file_dentry(file)->d_name.name,
1197 pt->cip_iot == CIT_READ ? "read" : "write",
1198 pos, pos + pt->cip_count - pt->cip_result,
1199 pt->cip_result, rc);
1203 CDEBUG(D_VFSTRACE, "%s: %s ret: %zd, rc: %d\n",
1204 file_dentry(file)->d_name.name,
1205 pt->cip_iot == CIT_READ ? "read" : "write",
1206 pt->cip_result, rc);
1208 cl_env_put(env, &refcheck);
1209 RETURN(pt->cip_result > 0 ? 0 : rc);
1213 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1214 struct file *file, enum cl_io_type iot,
1215 loff_t *ppos, size_t count)
1217 struct range_lock range;
1218 struct vvp_io *vio = vvp_env_io(env);
1219 struct inode *inode = file_inode(file);
1220 struct ll_inode_info *lli = ll_i2info(inode);
1221 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1229 CDEBUG(D_VFSTRACE, "%s: %s range: [%llu, %llu)\n",
1230 file_dentry(file)->d_name.name,
1231 iot == CIT_READ ? "read" : "write", pos, pos + count);
1234 io = vvp_env_thread_io(env);
1235 ll_io_init(io, file, iot);
1236 if (args->via_io_subtype == IO_NORMAL) {
1237 io->u.ci_rw.rw_iter = *args->u.normal.via_iter;
1238 io->u.ci_rw.rw_iocb = *args->u.normal.via_iocb;
1243 if (cl_io_rw_init(env, io, iot, pos, count) == 0) {
1244 bool range_locked = false;
1246 if (file->f_flags & O_APPEND)
1247 range_lock_init(&range, 0, LUSTRE_EOF);
1249 range_lock_init(&range, pos, pos + count - 1);
1251 vio->vui_fd = LUSTRE_FPRIVATE(file);
1252 vio->vui_io_subtype = args->via_io_subtype;
1254 switch (vio->vui_io_subtype) {
1256 /* Direct IO reads must also take range lock,
1257 * or multiple reads will try to work on the same pages
1258 * See LU-6227 for details. */
1259 if (((iot == CIT_WRITE) ||
1260 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1261 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1262 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1264 rc = range_lock(&lli->lli_write_tree, &range);
1268 range_locked = true;
1272 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1273 vio->u.splice.vui_flags = args->u.splice.via_flags;
1276 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1280 ll_cl_add(file, env, io, LCC_RW);
1281 if (io->ci_pio && iot == CIT_WRITE && !IS_NOSEC(inode) &&
1282 !lli->lli_inode_locked) {
1284 lli->lli_inode_locked = 1;
1286 rc = cl_io_loop(env, io);
1287 if (lli->lli_inode_locked) {
1288 lli->lli_inode_locked = 0;
1289 inode_unlock(inode);
1291 ll_cl_remove(file, env);
1294 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1296 range_unlock(&lli->lli_write_tree, &range);
1299 /* cl_io_rw_init() handled IO */
1303 if (io->ci_nob > 0) {
1304 result += io->ci_nob;
1305 count -= io->ci_nob;
1307 if (args->via_io_subtype == IO_NORMAL) {
1308 iov_iter_advance(args->u.normal.via_iter, io->ci_nob);
1310 args->u.normal.via_iocb->ki_pos = pos;
1311 #ifdef HAVE_KIOCB_KI_LEFT
1312 args->u.normal.via_iocb->ki_left = count;
1313 #elif defined(HAVE_KI_NBYTES)
1314 args->u.normal.via_iocb->ki_nbytes = count;
1318 pos = io->u.ci_rw.rw_range.cir_pos;
1322 cl_io_fini(env, io);
1324 if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
1326 "%s: restart %s range: [%llu, %llu) ret: %zd, rc: %d\n",
1327 file_dentry(file)->d_name.name,
1328 iot == CIT_READ ? "read" : "write",
1329 pos, pos + count, result, rc);
1333 if (iot == CIT_READ) {
1335 ll_stats_ops_tally(ll_i2sbi(inode),
1336 LPROC_LL_READ_BYTES, result);
1337 } else if (iot == CIT_WRITE) {
1339 ll_stats_ops_tally(ll_i2sbi(inode),
1340 LPROC_LL_WRITE_BYTES, result);
1341 fd->fd_write_failed = false;
1342 } else if (result == 0 && rc == 0) {
1345 fd->fd_write_failed = true;
1347 fd->fd_write_failed = false;
1348 } else if (rc != -ERESTARTSYS) {
1349 fd->fd_write_failed = true;
1353 CDEBUG(D_VFSTRACE, "%s: %s *ppos: %llu, pos: %llu, ret: %zd, rc: %d\n",
1354 file_dentry(file)->d_name.name,
1355 iot == CIT_READ ? "read" : "write", *ppos, pos, result, rc);
1359 RETURN(result > 0 ? result : rc);
1363 * The purpose of fast read is to overcome per I/O overhead and improve IOPS
1364 * especially for small I/O.
1366 * To serve a read request, CLIO has to create and initialize a cl_io and
1367 * then request DLM lock. This has turned out to have siginificant overhead
1368 * and affects the performance of small I/O dramatically.
1370 * It's not necessary to create a cl_io for each I/O. Under the help of read
1371 * ahead, most of the pages being read are already in memory cache and we can
1372 * read those pages directly because if the pages exist, the corresponding DLM
1373 * lock must exist so that page content must be valid.
1375 * In fast read implementation, the llite speculatively finds and reads pages
1376 * in memory cache. There are three scenarios for fast read:
1377 * - If the page exists and is uptodate, kernel VM will provide the data and
1378 * CLIO won't be intervened;
1379 * - If the page was brought into memory by read ahead, it will be exported
1380 * and read ahead parameters will be updated;
1381 * - Otherwise the page is not in memory, we can't do fast read. Therefore,
1382 * it will go back and invoke normal read, i.e., a cl_io will be created
1383 * and DLM lock will be requested.
1385 * POSIX compliance: posix standard states that read is intended to be atomic.
1386 * Lustre read implementation is in line with Linux kernel read implementation
1387 * and neither of them complies with POSIX standard in this matter. Fast read
1388 * doesn't make the situation worse on single node but it may interleave write
1389 * results from multiple nodes due to short read handling in ll_file_aio_read().
1391 * \param env - lu_env
1392 * \param iocb - kiocb from kernel
1393 * \param iter - user space buffers where the data will be copied
1395 * \retval - number of bytes have been read, or error code if error occurred.
1398 ll_do_fast_read(struct kiocb *iocb, struct iov_iter *iter)
1402 if (!ll_sbi_has_fast_read(ll_i2sbi(file_inode(iocb->ki_filp))))
1405 /* NB: we can't do direct IO for fast read because it will need a lock
1406 * to make IO engine happy. */
1407 if (iocb->ki_filp->f_flags & O_DIRECT)
1410 result = generic_file_read_iter(iocb, iter);
1412 /* If the first page is not in cache, generic_file_aio_read() will be
1413 * returned with -ENODATA.
1414 * See corresponding code in ll_readpage(). */
1415 if (result == -ENODATA)
1419 ll_stats_ops_tally(ll_i2sbi(file_inode(iocb->ki_filp)),
1420 LPROC_LL_READ_BYTES, result);
1426 * Read from a file (through the page cache).
1428 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1431 struct vvp_io_args *args;
1436 result = ll_do_fast_read(iocb, to);
1437 if (result < 0 || iov_iter_count(to) == 0)
1440 env = cl_env_get(&refcheck);
1442 return PTR_ERR(env);
1444 args = ll_env_args(env, IO_NORMAL);
1445 args->u.normal.via_iter = to;
1446 args->u.normal.via_iocb = iocb;
1448 rc2 = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1449 &iocb->ki_pos, iov_iter_count(to));
1452 else if (result == 0)
1455 cl_env_put(env, &refcheck);
1461 * Write to a file (through the page cache).
1463 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1465 struct vvp_io_args *args;
1470 env = cl_env_get(&refcheck);
1472 return PTR_ERR(env);
1474 args = ll_env_args(env, IO_NORMAL);
1475 args->u.normal.via_iter = from;
1476 args->u.normal.via_iocb = iocb;
1478 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1479 &iocb->ki_pos, iov_iter_count(from));
1480 cl_env_put(env, &refcheck);
1484 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1486 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1488 static int ll_file_get_iov_count(const struct iovec *iov,
1489 unsigned long *nr_segs, size_t *count)
1494 for (seg = 0; seg < *nr_segs; seg++) {
1495 const struct iovec *iv = &iov[seg];
1498 * If any segment has a negative length, or the cumulative
1499 * length ever wraps negative then return -EINVAL.
1502 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1504 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1509 cnt -= iv->iov_len; /* This segment is no good */
1516 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1517 unsigned long nr_segs, loff_t pos)
1524 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1528 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1529 iov_iter_init(&to, READ, iov, nr_segs, iov_count);
1530 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1531 iov_iter_init(&to, iov, nr_segs, iov_count, 0);
1532 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1534 result = ll_file_read_iter(iocb, &to);
1539 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1542 struct iovec iov = { .iov_base = buf, .iov_len = count };
1547 init_sync_kiocb(&kiocb, file);
1548 kiocb.ki_pos = *ppos;
1549 #ifdef HAVE_KIOCB_KI_LEFT
1550 kiocb.ki_left = count;
1551 #elif defined(HAVE_KI_NBYTES)
1552 kiocb.i_nbytes = count;
1555 result = ll_file_aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
1556 *ppos = kiocb.ki_pos;
1562 * Write to a file (through the page cache).
1565 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1566 unsigned long nr_segs, loff_t pos)
1568 struct iov_iter from;
1573 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1577 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1578 iov_iter_init(&from, WRITE, iov, nr_segs, iov_count);
1579 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1580 iov_iter_init(&from, iov, nr_segs, iov_count, 0);
1581 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1583 result = ll_file_write_iter(iocb, &from);
1588 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1589 size_t count, loff_t *ppos)
1592 struct iovec iov = { .iov_base = (void __user *)buf,
1594 struct kiocb *kiocb;
1599 env = cl_env_get(&refcheck);
1601 RETURN(PTR_ERR(env));
1603 kiocb = &ll_env_info(env)->lti_kiocb;
1604 init_sync_kiocb(kiocb, file);
1605 kiocb->ki_pos = *ppos;
1606 #ifdef HAVE_KIOCB_KI_LEFT
1607 kiocb->ki_left = count;
1608 #elif defined(HAVE_KI_NBYTES)
1609 kiocb->ki_nbytes = count;
1612 result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
1613 *ppos = kiocb->ki_pos;
1615 cl_env_put(env, &refcheck);
1618 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1621 * Send file content (through pagecache) somewhere with helper
1623 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1624 struct pipe_inode_info *pipe, size_t count,
1628 struct vvp_io_args *args;
1633 env = cl_env_get(&refcheck);
1635 RETURN(PTR_ERR(env));
1637 args = ll_env_args(env, IO_SPLICE);
1638 args->u.splice.via_pipe = pipe;
1639 args->u.splice.via_flags = flags;
1641 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1642 cl_env_put(env, &refcheck);
1646 int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
1647 __u64 flags, struct lov_user_md *lum, int lum_size)
1649 struct lookup_intent oit = {
1651 .it_flags = flags | MDS_OPEN_BY_FID,
1656 ll_inode_size_lock(inode);
1657 rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
1659 GOTO(out_unlock, rc);
1661 ll_release_openhandle(dentry, &oit);
1664 ll_inode_size_unlock(inode);
1665 ll_intent_release(&oit);
1670 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1671 struct lov_mds_md **lmmp, int *lmm_size,
1672 struct ptlrpc_request **request)
1674 struct ll_sb_info *sbi = ll_i2sbi(inode);
1675 struct mdt_body *body;
1676 struct lov_mds_md *lmm = NULL;
1677 struct ptlrpc_request *req = NULL;
1678 struct md_op_data *op_data;
1681 rc = ll_get_default_mdsize(sbi, &lmmsize);
1685 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1686 strlen(filename), lmmsize,
1687 LUSTRE_OPC_ANY, NULL);
1688 if (IS_ERR(op_data))
1689 RETURN(PTR_ERR(op_data));
1691 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1692 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1693 ll_finish_md_op_data(op_data);
1695 CDEBUG(D_INFO, "md_getattr_name failed "
1696 "on %s: rc %d\n", filename, rc);
1700 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1701 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1703 lmmsize = body->mbo_eadatasize;
1705 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1707 GOTO(out, rc = -ENODATA);
1710 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1711 LASSERT(lmm != NULL);
1713 if (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1) &&
1714 lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3) &&
1715 lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_COMP_V1))
1716 GOTO(out, rc = -EPROTO);
1719 * This is coming from the MDS, so is probably in
1720 * little endian. We convert it to host endian before
1721 * passing it to userspace.
1723 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1726 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1) ||
1727 lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1728 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1729 if (le32_to_cpu(lmm->lmm_pattern) &
1730 LOV_PATTERN_F_RELEASED)
1734 /* if function called for directory - we should
1735 * avoid swab not existent lsm objects */
1736 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1737 lustre_swab_lov_user_md_v1(
1738 (struct lov_user_md_v1 *)lmm);
1739 if (S_ISREG(body->mbo_mode))
1740 lustre_swab_lov_user_md_objects(
1741 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1743 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1744 lustre_swab_lov_user_md_v3(
1745 (struct lov_user_md_v3 *)lmm);
1746 if (S_ISREG(body->mbo_mode))
1747 lustre_swab_lov_user_md_objects(
1748 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1750 } else if (lmm->lmm_magic ==
1751 cpu_to_le32(LOV_MAGIC_COMP_V1)) {
1752 lustre_swab_lov_comp_md_v1(
1753 (struct lov_comp_md_v1 *)lmm);
1759 *lmm_size = lmmsize;
1764 static int ll_lov_setea(struct inode *inode, struct file *file,
1767 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1768 struct lov_user_md *lump;
1769 int lum_size = sizeof(struct lov_user_md) +
1770 sizeof(struct lov_user_ost_data);
1774 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1777 OBD_ALLOC_LARGE(lump, lum_size);
1781 if (copy_from_user(lump, arg, lum_size))
1782 GOTO(out_lump, rc = -EFAULT);
1784 rc = ll_lov_setstripe_ea_info(inode, file_dentry(file), flags, lump,
1786 cl_lov_delay_create_clear(&file->f_flags);
1789 OBD_FREE_LARGE(lump, lum_size);
1793 static int ll_file_getstripe(struct inode *inode, void __user *lum, size_t size)
1800 env = cl_env_get(&refcheck);
1802 RETURN(PTR_ERR(env));
1804 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum, size);
1805 cl_env_put(env, &refcheck);
1809 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1812 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1813 struct lov_user_md *klum;
1815 __u64 flags = FMODE_WRITE;
1818 rc = ll_copy_user_md(lum, &klum);
1823 rc = ll_lov_setstripe_ea_info(inode, file_dentry(file), flags, klum,
1828 rc = put_user(0, &lum->lmm_stripe_count);
1832 rc = ll_layout_refresh(inode, &gen);
1836 rc = ll_file_getstripe(inode, arg, lum_size);
1838 cl_lov_delay_create_clear(&file->f_flags);
1841 OBD_FREE(klum, lum_size);
1846 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1848 struct ll_inode_info *lli = ll_i2info(inode);
1849 struct cl_object *obj = lli->lli_clob;
1850 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1851 struct ll_grouplock grouplock;
1856 CWARN("group id for group lock must not be 0\n");
1860 if (ll_file_nolock(file))
1861 RETURN(-EOPNOTSUPP);
1863 spin_lock(&lli->lli_lock);
1864 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1865 CWARN("group lock already existed with gid %lu\n",
1866 fd->fd_grouplock.lg_gid);
1867 spin_unlock(&lli->lli_lock);
1870 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1871 spin_unlock(&lli->lli_lock);
1874 * XXX: group lock needs to protect all OST objects while PFL
1875 * can add new OST objects during the IO, so we'd instantiate
1876 * all OST objects before getting its group lock.
1881 struct cl_layout cl = {
1882 .cl_is_composite = false,
1885 env = cl_env_get(&refcheck);
1887 RETURN(PTR_ERR(env));
1889 rc = cl_object_layout_get(env, obj, &cl);
1890 if (!rc && cl.cl_is_composite)
1891 rc = ll_layout_write_intent(inode, 0, OBD_OBJECT_EOF);
1893 cl_env_put(env, &refcheck);
1898 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1899 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1903 spin_lock(&lli->lli_lock);
1904 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1905 spin_unlock(&lli->lli_lock);
1906 CERROR("another thread just won the race\n");
1907 cl_put_grouplock(&grouplock);
1911 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1912 fd->fd_grouplock = grouplock;
1913 spin_unlock(&lli->lli_lock);
1915 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1919 static int ll_put_grouplock(struct inode *inode, struct file *file,
1922 struct ll_inode_info *lli = ll_i2info(inode);
1923 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1924 struct ll_grouplock grouplock;
1927 spin_lock(&lli->lli_lock);
1928 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1929 spin_unlock(&lli->lli_lock);
1930 CWARN("no group lock held\n");
1934 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1936 if (fd->fd_grouplock.lg_gid != arg) {
1937 CWARN("group lock %lu doesn't match current id %lu\n",
1938 arg, fd->fd_grouplock.lg_gid);
1939 spin_unlock(&lli->lli_lock);
1943 grouplock = fd->fd_grouplock;
1944 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1945 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1946 spin_unlock(&lli->lli_lock);
1948 cl_put_grouplock(&grouplock);
1949 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1954 * Close inode open handle
1956 * \param dentry [in] dentry which contains the inode
1957 * \param it [in,out] intent which contains open info and result
1960 * \retval <0 failure
1962 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1964 struct inode *inode = dentry->d_inode;
1965 struct obd_client_handle *och;
1971 /* Root ? Do nothing. */
1972 if (dentry->d_inode->i_sb->s_root == dentry)
1975 /* No open handle to close? Move away */
1976 if (!it_disposition(it, DISP_OPEN_OPEN))
1979 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1981 OBD_ALLOC(och, sizeof(*och));
1983 GOTO(out, rc = -ENOMEM);
1985 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1987 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
1989 /* this one is in place of ll_file_open */
1990 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1991 ptlrpc_req_finished(it->it_request);
1992 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1998 * Get size for inode for which FIEMAP mapping is requested.
1999 * Make the FIEMAP get_info call and returns the result.
2000 * \param fiemap kernel buffer to hold extens
2001 * \param num_bytes kernel buffer size
2003 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
2009 struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
2012 /* Checks for fiemap flags */
2013 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
2014 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
2018 /* Check for FIEMAP_FLAG_SYNC */
2019 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
2020 rc = filemap_fdatawrite(inode->i_mapping);
2025 env = cl_env_get(&refcheck);
2027 RETURN(PTR_ERR(env));
2029 if (i_size_read(inode) == 0) {
2030 rc = ll_glimpse_size(inode);
2035 fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2036 obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
2037 obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
2039 /* If filesize is 0, then there would be no objects for mapping */
2040 if (fmkey.lfik_oa.o_size == 0) {
2041 fiemap->fm_mapped_extents = 0;
2045 fmkey.lfik_fiemap = *fiemap;
2047 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
2048 &fmkey, fiemap, &num_bytes);
2050 cl_env_put(env, &refcheck);
2054 int ll_fid2path(struct inode *inode, void __user *arg)
2056 struct obd_export *exp = ll_i2mdexp(inode);
2057 const struct getinfo_fid2path __user *gfin = arg;
2059 struct getinfo_fid2path *gfout;
2065 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
2066 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
2069 /* Only need to get the buflen */
2070 if (get_user(pathlen, &gfin->gf_pathlen))
2073 if (pathlen > PATH_MAX)
2076 outsize = sizeof(*gfout) + pathlen;
2077 OBD_ALLOC(gfout, outsize);
2081 if (copy_from_user(gfout, arg, sizeof(*gfout)))
2082 GOTO(gf_free, rc = -EFAULT);
2083 /* append root FID after gfout to let MDT know the root FID so that it
2084 * can lookup the correct path, this is mainly for fileset.
2085 * old server without fileset mount support will ignore this. */
2086 *gfout->gf_u.gf_root_fid = *ll_inode2fid(inode);
2088 /* Call mdc_iocontrol */
2089 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
2093 if (copy_to_user(arg, gfout, outsize))
2097 OBD_FREE(gfout, outsize);
2102 * Read the data_version for inode.
2104 * This value is computed using stripe object version on OST.
2105 * Version is computed using server side locking.
2107 * @param flags if do sync on the OST side;
2109 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
2110 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
2112 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
2114 struct cl_object *obj = ll_i2info(inode)->lli_clob;
2122 /* If no file object initialized, we consider its version is 0. */
2128 env = cl_env_get(&refcheck);
2130 RETURN(PTR_ERR(env));
2132 io = vvp_env_thread_io(env);
2134 io->u.ci_data_version.dv_data_version = 0;
2135 io->u.ci_data_version.dv_flags = flags;
2138 if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
2139 result = cl_io_loop(env, io);
2141 result = io->ci_result;
2143 *data_version = io->u.ci_data_version.dv_data_version;
2145 cl_io_fini(env, io);
2147 if (unlikely(io->ci_need_restart))
2150 cl_env_put(env, &refcheck);
2156 * Trigger a HSM release request for the provided inode.
2158 int ll_hsm_release(struct inode *inode)
2161 struct obd_client_handle *och = NULL;
2162 __u64 data_version = 0;
2167 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
2168 ll_get_fsname(inode->i_sb, NULL, 0),
2169 PFID(&ll_i2info(inode)->lli_fid));
2171 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
2173 GOTO(out, rc = PTR_ERR(och));
2175 /* Grab latest data_version and [am]time values */
2176 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
2180 env = cl_env_get(&refcheck);
2182 GOTO(out, rc = PTR_ERR(env));
2184 ll_merge_attr(env, inode);
2185 cl_env_put(env, &refcheck);
2187 /* Release the file.
2188 * NB: lease lock handle is released in mdc_hsm_release_pack() because
2189 * we still need it to pack l_remote_handle to MDT. */
2190 rc = ll_close_inode_openhandle(inode, och, MDS_HSM_RELEASE,
2196 if (och != NULL && !IS_ERR(och)) /* close the file */
2197 ll_lease_close(och, inode, NULL);
2202 struct ll_swap_stack {
2205 struct inode *inode1;
2206 struct inode *inode2;
2211 static int ll_swap_layouts(struct file *file1, struct file *file2,
2212 struct lustre_swap_layouts *lsl)
2214 struct mdc_swap_layouts msl;
2215 struct md_op_data *op_data;
2218 struct ll_swap_stack *llss = NULL;
2221 OBD_ALLOC_PTR(llss);
2225 llss->inode1 = file_inode(file1);
2226 llss->inode2 = file_inode(file2);
2228 rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
2232 /* we use 2 bool because it is easier to swap than 2 bits */
2233 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2234 llss->check_dv1 = true;
2236 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2237 llss->check_dv2 = true;
2239 /* we cannot use lsl->sl_dvX directly because we may swap them */
2240 llss->dv1 = lsl->sl_dv1;
2241 llss->dv2 = lsl->sl_dv2;
2243 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2244 if (rc == 0) /* same file, done! */
2247 if (rc < 0) { /* sequentialize it */
2248 swap(llss->inode1, llss->inode2);
2250 swap(llss->dv1, llss->dv2);
2251 swap(llss->check_dv1, llss->check_dv2);
2255 if (gid != 0) { /* application asks to flush dirty cache */
2256 rc = ll_get_grouplock(llss->inode1, file1, gid);
2260 rc = ll_get_grouplock(llss->inode2, file2, gid);
2262 ll_put_grouplock(llss->inode1, file1, gid);
2267 /* ultimate check, before swaping the layouts we check if
2268 * dataversion has changed (if requested) */
2269 if (llss->check_dv1) {
2270 rc = ll_data_version(llss->inode1, &dv, 0);
2273 if (dv != llss->dv1)
2274 GOTO(putgl, rc = -EAGAIN);
2277 if (llss->check_dv2) {
2278 rc = ll_data_version(llss->inode2, &dv, 0);
2281 if (dv != llss->dv2)
2282 GOTO(putgl, rc = -EAGAIN);
2285 /* struct md_op_data is used to send the swap args to the mdt
2286 * only flags is missing, so we use struct mdc_swap_layouts
2287 * through the md_op_data->op_data */
2288 /* flags from user space have to be converted before they are send to
2289 * server, no flag is sent today, they are only used on the client */
2292 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2293 0, LUSTRE_OPC_ANY, &msl);
2294 if (IS_ERR(op_data))
2295 GOTO(free, rc = PTR_ERR(op_data));
2297 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2298 sizeof(*op_data), op_data, NULL);
2299 ll_finish_md_op_data(op_data);
2306 ll_put_grouplock(llss->inode2, file2, gid);
2307 ll_put_grouplock(llss->inode1, file1, gid);
2317 int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2319 struct md_op_data *op_data;
2323 /* Detect out-of range masks */
2324 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2327 /* Non-root users are forbidden to set or clear flags which are
2328 * NOT defined in HSM_USER_MASK. */
2329 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2330 !cfs_capable(CFS_CAP_SYS_ADMIN))
2333 /* Detect out-of range archive id */
2334 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2335 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2338 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2339 LUSTRE_OPC_ANY, hss);
2340 if (IS_ERR(op_data))
2341 RETURN(PTR_ERR(op_data));
2343 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2344 sizeof(*op_data), op_data, NULL);
2346 ll_finish_md_op_data(op_data);
2351 static int ll_hsm_import(struct inode *inode, struct file *file,
2352 struct hsm_user_import *hui)
2354 struct hsm_state_set *hss = NULL;
2355 struct iattr *attr = NULL;
2359 if (!S_ISREG(inode->i_mode))
2365 GOTO(out, rc = -ENOMEM);
2367 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2368 hss->hss_archive_id = hui->hui_archive_id;
2369 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2370 rc = ll_hsm_state_set(inode, hss);
2374 OBD_ALLOC_PTR(attr);
2376 GOTO(out, rc = -ENOMEM);
2378 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2379 attr->ia_mode |= S_IFREG;
2380 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2381 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2382 attr->ia_size = hui->hui_size;
2383 attr->ia_mtime.tv_sec = hui->hui_mtime;
2384 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2385 attr->ia_atime.tv_sec = hui->hui_atime;
2386 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2388 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2389 ATTR_UID | ATTR_GID |
2390 ATTR_MTIME | ATTR_MTIME_SET |
2391 ATTR_ATIME | ATTR_ATIME_SET;
2395 rc = ll_setattr_raw(file_dentry(file), attr, true);
2399 inode_unlock(inode);
2411 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2413 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2414 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2417 static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
2419 struct inode *inode = file_inode(file);
2421 .ia_valid = ATTR_ATIME | ATTR_ATIME_SET |
2422 ATTR_MTIME | ATTR_MTIME_SET |
2423 ATTR_CTIME | ATTR_CTIME_SET,
2425 .tv_sec = lfu->lfu_atime_sec,
2426 .tv_nsec = lfu->lfu_atime_nsec,
2429 .tv_sec = lfu->lfu_mtime_sec,
2430 .tv_nsec = lfu->lfu_mtime_nsec,
2433 .tv_sec = lfu->lfu_ctime_sec,
2434 .tv_nsec = lfu->lfu_ctime_nsec,
2440 if (!capable(CAP_SYS_ADMIN))
2443 if (!S_ISREG(inode->i_mode))
2447 rc = ll_setattr_raw(file_dentry(file), &ia, false);
2448 inode_unlock(inode);
2453 static enum cl_lock_mode cl_mode_user_to_kernel(enum lock_mode_user mode)
2456 case MODE_READ_USER:
2458 case MODE_WRITE_USER:
2465 static const char *const user_lockname[] = LOCK_MODE_NAMES;
2467 /* Used to allow the upper layers of the client to request an LDLM lock
2468 * without doing an actual read or write.
2470 * Used for ladvise lockahead to manually request specific locks.
2472 * \param[in] file file this ladvise lock request is on
2473 * \param[in] ladvise ladvise struct describing this lock request
2475 * \retval 0 success, no detailed result available (sync requests
2476 * and requests sent to the server [not handled locally]
2477 * cannot return detailed results)
2478 * \retval LLA_RESULT_{SAME,DIFFERENT} - detailed result of the lock request,
2479 * see definitions for details.
2480 * \retval negative negative errno on error
2482 int ll_file_lock_ahead(struct file *file, struct llapi_lu_ladvise *ladvise)
2484 struct lu_env *env = NULL;
2485 struct cl_io *io = NULL;
2486 struct cl_lock *lock = NULL;
2487 struct cl_lock_descr *descr = NULL;
2488 struct dentry *dentry = file->f_path.dentry;
2489 struct inode *inode = dentry->d_inode;
2490 enum cl_lock_mode cl_mode;
2491 off_t start = ladvise->lla_start;
2492 off_t end = ladvise->lla_end;
2498 CDEBUG(D_VFSTRACE, "Lock request: file=%.*s, inode=%p, mode=%s "
2499 "start=%llu, end=%llu\n", dentry->d_name.len,
2500 dentry->d_name.name, dentry->d_inode,
2501 user_lockname[ladvise->lla_lockahead_mode], (__u64) start,
2504 cl_mode = cl_mode_user_to_kernel(ladvise->lla_lockahead_mode);
2506 GOTO(out, result = cl_mode);
2508 /* Get IO environment */
2509 result = cl_io_get(inode, &env, &io, &refcheck);
2513 result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
2516 * nothing to do for this io. This currently happens when
2517 * stripe sub-object's are not yet created.
2519 result = io->ci_result;
2520 } else if (result == 0) {
2521 lock = vvp_env_lock(env);
2522 descr = &lock->cll_descr;
2524 descr->cld_obj = io->ci_obj;
2525 /* Convert byte offsets to pages */
2526 descr->cld_start = cl_index(io->ci_obj, start);
2527 descr->cld_end = cl_index(io->ci_obj, end);
2528 descr->cld_mode = cl_mode;
2529 /* CEF_MUST is used because we do not want to convert a
2530 * lockahead request to a lockless lock */
2531 descr->cld_enq_flags = CEF_MUST | CEF_LOCK_NO_EXPAND |
2534 if (ladvise->lla_peradvice_flags & LF_ASYNC)
2535 descr->cld_enq_flags |= CEF_SPECULATIVE;
2537 result = cl_lock_request(env, io, lock);
2539 /* On success, we need to release the lock */
2541 cl_lock_release(env, lock);
2543 cl_io_fini(env, io);
2544 cl_env_put(env, &refcheck);
2546 /* -ECANCELED indicates a matching lock with a different extent
2547 * was already present, and -EEXIST indicates a matching lock
2548 * on exactly the same extent was already present.
2549 * We convert them to positive values for userspace to make
2550 * recognizing true errors easier.
2551 * Note we can only return these detailed results on async requests,
2552 * as sync requests look the same as i/o requests for locking. */
2553 if (result == -ECANCELED)
2554 result = LLA_RESULT_DIFFERENT;
2555 else if (result == -EEXIST)
2556 result = LLA_RESULT_SAME;
2561 static const char *const ladvise_names[] = LU_LADVISE_NAMES;
2563 static int ll_ladvise_sanity(struct inode *inode,
2564 struct llapi_lu_ladvise *ladvise)
2566 enum lu_ladvise_type advice = ladvise->lla_advice;
2567 /* Note the peradvice flags is a 32 bit field, so per advice flags must
2568 * be in the first 32 bits of enum ladvise_flags */
2569 __u32 flags = ladvise->lla_peradvice_flags;
2570 /* 3 lines at 80 characters per line, should be plenty */
2573 if (advice > LU_LADVISE_MAX || advice == LU_LADVISE_INVALID) {
2575 CDEBUG(D_VFSTRACE, "%s: advice with value '%d' not recognized,"
2576 "last supported advice is %s (value '%d'): rc = %d\n",
2577 ll_get_fsname(inode->i_sb, NULL, 0), advice,
2578 ladvise_names[LU_LADVISE_MAX-1], LU_LADVISE_MAX-1, rc);
2582 /* Per-advice checks */
2584 case LU_LADVISE_LOCKNOEXPAND:
2585 if (flags & ~LF_LOCKNOEXPAND_MASK) {
2587 CDEBUG(D_VFSTRACE, "%s: Invalid flags (%x) for %s: "
2589 ll_get_fsname(inode->i_sb, NULL, 0), flags,
2590 ladvise_names[advice], rc);
2594 case LU_LADVISE_LOCKAHEAD:
2595 /* Currently only READ and WRITE modes can be requested */
2596 if (ladvise->lla_lockahead_mode >= MODE_MAX_USER ||
2597 ladvise->lla_lockahead_mode == 0) {
2599 CDEBUG(D_VFSTRACE, "%s: Invalid mode (%d) for %s: "
2601 ll_get_fsname(inode->i_sb, NULL, 0),
2602 ladvise->lla_lockahead_mode,
2603 ladvise_names[advice], rc);
2606 case LU_LADVISE_WILLREAD:
2607 case LU_LADVISE_DONTNEED:
2609 /* Note fall through above - These checks apply to all advices
2610 * except LOCKNOEXPAND */
2611 if (flags & ~LF_DEFAULT_MASK) {
2613 CDEBUG(D_VFSTRACE, "%s: Invalid flags (%x) for %s: "
2615 ll_get_fsname(inode->i_sb, NULL, 0), flags,
2616 ladvise_names[advice], rc);
2619 if (ladvise->lla_start >= ladvise->lla_end) {
2621 CDEBUG(D_VFSTRACE, "%s: Invalid range (%llu to %llu) "
2622 "for %s: rc = %d\n",
2623 ll_get_fsname(inode->i_sb, NULL, 0),
2624 ladvise->lla_start, ladvise->lla_end,
2625 ladvise_names[advice], rc);
2637 * Give file access advices
2639 * The ladvise interface is similar to Linux fadvise() system call, except it
2640 * forwards the advices directly from Lustre client to server. The server side
2641 * codes will apply appropriate read-ahead and caching techniques for the
2642 * corresponding files.
2644 * A typical workload for ladvise is e.g. a bunch of different clients are
2645 * doing small random reads of a file, so prefetching pages into OSS cache
2646 * with big linear reads before the random IO is a net benefit. Fetching
2647 * all that data into each client cache with fadvise() may not be, due to
2648 * much more data being sent to the client.
2650 static int ll_ladvise(struct inode *inode, struct file *file, __u64 flags,
2651 struct llapi_lu_ladvise *ladvise)
2655 struct cl_ladvise_io *lio;
2660 env = cl_env_get(&refcheck);
2662 RETURN(PTR_ERR(env));
2664 io = vvp_env_thread_io(env);
2665 io->ci_obj = ll_i2info(inode)->lli_clob;
2667 /* initialize parameters for ladvise */
2668 lio = &io->u.ci_ladvise;
2669 lio->li_start = ladvise->lla_start;
2670 lio->li_end = ladvise->lla_end;
2671 lio->li_fid = ll_inode2fid(inode);
2672 lio->li_advice = ladvise->lla_advice;
2673 lio->li_flags = flags;
2675 if (cl_io_init(env, io, CIT_LADVISE, io->ci_obj) == 0)
2676 rc = cl_io_loop(env, io);
2680 cl_io_fini(env, io);
2681 cl_env_put(env, &refcheck);
2685 static int ll_lock_noexpand(struct file *file, int flags)
2687 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2689 fd->ll_lock_no_expand = !(flags & LF_UNSET);
2694 int ll_ioctl_fsgetxattr(struct inode *inode, unsigned int cmd,
2697 struct fsxattr fsxattr;
2699 if (copy_from_user(&fsxattr,
2700 (const struct fsxattr __user *)arg,
2704 fsxattr.fsx_projid = ll_i2info(inode)->lli_projid;
2705 if (copy_to_user((struct fsxattr __user *)arg,
2706 &fsxattr, sizeof(fsxattr)))
2712 int ll_ioctl_fssetxattr(struct inode *inode, unsigned int cmd,
2716 struct md_op_data *op_data;
2717 struct ptlrpc_request *req = NULL;
2719 struct fsxattr fsxattr;
2721 /* only root could change project ID */
2722 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2725 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2726 LUSTRE_OPC_ANY, NULL);
2727 if (IS_ERR(op_data))
2728 RETURN(PTR_ERR(op_data));
2730 if (copy_from_user(&fsxattr,
2731 (const struct fsxattr __user *)arg,
2733 GOTO(out_fsxattr1, rc = -EFAULT);
2735 op_data->op_projid = fsxattr.fsx_projid;
2736 op_data->op_attr.ia_valid |= MDS_ATTR_PROJID;
2737 rc = md_setattr(ll_i2sbi(inode)->ll_md_exp, op_data, NULL,
2739 ptlrpc_req_finished(req);
2742 ll_finish_md_op_data(op_data);
2749 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2751 struct inode *inode = file_inode(file);
2752 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2756 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2757 PFID(ll_inode2fid(inode)), inode, cmd);
2758 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2760 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2761 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2765 case LL_IOC_GETFLAGS:
2766 /* Get the current value of the file flags */
2767 return put_user(fd->fd_flags, (int __user *)arg);
2768 case LL_IOC_SETFLAGS:
2769 case LL_IOC_CLRFLAGS:
2770 /* Set or clear specific file flags */
2771 /* XXX This probably needs checks to ensure the flags are
2772 * not abused, and to handle any flag side effects.
2774 if (get_user(flags, (int __user *) arg))
2777 if (cmd == LL_IOC_SETFLAGS) {
2778 if ((flags & LL_FILE_IGNORE_LOCK) &&
2779 !(file->f_flags & O_DIRECT)) {
2780 CERROR("%s: unable to disable locking on "
2781 "non-O_DIRECT file\n", current->comm);
2785 fd->fd_flags |= flags;
2787 fd->fd_flags &= ~flags;
2790 case LL_IOC_LOV_SETSTRIPE:
2791 case LL_IOC_LOV_SETSTRIPE_NEW:
2792 RETURN(ll_lov_setstripe(inode, file, (void __user *)arg));
2793 case LL_IOC_LOV_SETEA:
2794 RETURN(ll_lov_setea(inode, file, (void __user *)arg));
2795 case LL_IOC_LOV_SWAP_LAYOUTS: {
2797 struct lustre_swap_layouts lsl;
2800 if (copy_from_user(&lsl, (char __user *)arg,
2801 sizeof(struct lustre_swap_layouts)))
2804 if ((file->f_flags & O_ACCMODE) == O_RDONLY)
2807 file2 = fget(lsl.sl_fd);
2811 /* O_WRONLY or O_RDWR */
2812 if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
2813 GOTO(out, rc = -EPERM);
2815 intent = lsl.sl_flags & INTENT_LAYOUTS_CLOSE;
2817 struct inode *inode2;
2818 struct ll_inode_info *lli;
2819 struct obd_client_handle *och = NULL;
2821 lli = ll_i2info(inode);
2822 mutex_lock(&lli->lli_och_mutex);
2823 if (fd->fd_lease_och != NULL) {
2824 och = fd->fd_lease_och;
2825 fd->fd_lease_och = NULL;
2827 mutex_unlock(&lli->lli_och_mutex);
2829 GOTO(out, rc = -ENOLCK);
2830 inode2 = file_inode(file2);
2831 rc = ll_swap_layouts_close(och, inode, inode2, intent);
2833 rc = ll_swap_layouts(file, file2, &lsl);
2839 case LL_IOC_LOV_GETSTRIPE:
2840 case LL_IOC_LOV_GETSTRIPE_NEW:
2841 RETURN(ll_file_getstripe(inode, (void __user *)arg, 0));
2842 case FSFILT_IOC_GETFLAGS:
2843 case FSFILT_IOC_SETFLAGS:
2844 RETURN(ll_iocontrol(inode, file, cmd, arg));
2845 case FSFILT_IOC_GETVERSION_OLD:
2846 case FSFILT_IOC_GETVERSION:
2847 RETURN(put_user(inode->i_generation, (int __user *)arg));
2848 case LL_IOC_GROUP_LOCK:
2849 RETURN(ll_get_grouplock(inode, file, arg));
2850 case LL_IOC_GROUP_UNLOCK:
2851 RETURN(ll_put_grouplock(inode, file, arg));
2852 case IOC_OBD_STATFS:
2853 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2855 /* We need to special case any other ioctls we want to handle,
2856 * to send them to the MDS/OST as appropriate and to properly
2857 * network encode the arg field.
2858 case FSFILT_IOC_SETVERSION_OLD:
2859 case FSFILT_IOC_SETVERSION:
2861 case LL_IOC_FLUSHCTX:
2862 RETURN(ll_flush_ctx(inode));
2863 case LL_IOC_PATH2FID: {
2864 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2865 sizeof(struct lu_fid)))
2870 case LL_IOC_GETPARENT:
2871 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2873 case OBD_IOC_FID2PATH:
2874 RETURN(ll_fid2path(inode, (void __user *)arg));
2875 case LL_IOC_DATA_VERSION: {
2876 struct ioc_data_version idv;
2879 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2882 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2883 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2886 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2892 case LL_IOC_GET_MDTIDX: {
2895 mdtidx = ll_get_mdt_idx(inode);
2899 if (put_user((int)mdtidx, (int __user *)arg))
2904 case OBD_IOC_GETDTNAME:
2905 case OBD_IOC_GETMDNAME:
2906 RETURN(ll_get_obd_name(inode, cmd, arg));
2907 case LL_IOC_HSM_STATE_GET: {
2908 struct md_op_data *op_data;
2909 struct hsm_user_state *hus;
2916 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2917 LUSTRE_OPC_ANY, hus);
2918 if (IS_ERR(op_data)) {
2920 RETURN(PTR_ERR(op_data));
2923 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2926 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2929 ll_finish_md_op_data(op_data);
2933 case LL_IOC_HSM_STATE_SET: {
2934 struct hsm_state_set *hss;
2941 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2946 rc = ll_hsm_state_set(inode, hss);
2951 case LL_IOC_HSM_ACTION: {
2952 struct md_op_data *op_data;
2953 struct hsm_current_action *hca;
2960 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2961 LUSTRE_OPC_ANY, hca);
2962 if (IS_ERR(op_data)) {
2964 RETURN(PTR_ERR(op_data));
2967 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2970 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2973 ll_finish_md_op_data(op_data);
2977 case LL_IOC_SET_LEASE: {
2978 struct ll_inode_info *lli = ll_i2info(inode);
2979 struct obd_client_handle *och = NULL;
2984 case LL_LEASE_WRLCK:
2985 if (!(file->f_mode & FMODE_WRITE))
2987 fmode = FMODE_WRITE;
2989 case LL_LEASE_RDLCK:
2990 if (!(file->f_mode & FMODE_READ))
2994 case LL_LEASE_UNLCK:
2995 mutex_lock(&lli->lli_och_mutex);
2996 if (fd->fd_lease_och != NULL) {
2997 och = fd->fd_lease_och;
2998 fd->fd_lease_och = NULL;
3000 mutex_unlock(&lli->lli_och_mutex);
3005 fmode = och->och_flags;
3006 rc = ll_lease_close(och, inode, &lease_broken);
3010 rc = ll_lease_och_release(inode, file);
3017 RETURN(ll_lease_type_from_fmode(fmode));
3022 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
3024 /* apply for lease */
3025 och = ll_lease_open(inode, file, fmode, 0);
3027 RETURN(PTR_ERR(och));
3030 mutex_lock(&lli->lli_och_mutex);
3031 if (fd->fd_lease_och == NULL) {
3032 fd->fd_lease_och = och;
3035 mutex_unlock(&lli->lli_och_mutex);
3037 /* impossible now that only excl is supported for now */
3038 ll_lease_close(och, inode, &lease_broken);
3043 case LL_IOC_GET_LEASE: {
3044 struct ll_inode_info *lli = ll_i2info(inode);
3045 struct ldlm_lock *lock = NULL;
3048 mutex_lock(&lli->lli_och_mutex);
3049 if (fd->fd_lease_och != NULL) {
3050 struct obd_client_handle *och = fd->fd_lease_och;
3052 lock = ldlm_handle2lock(&och->och_lease_handle);
3054 lock_res_and_lock(lock);
3055 if (!ldlm_is_cancel(lock))
3056 fmode = och->och_flags;
3058 unlock_res_and_lock(lock);
3059 LDLM_LOCK_PUT(lock);
3062 mutex_unlock(&lli->lli_och_mutex);
3064 RETURN(ll_lease_type_from_fmode(fmode));
3066 case LL_IOC_HSM_IMPORT: {
3067 struct hsm_user_import *hui;
3073 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
3078 rc = ll_hsm_import(inode, file, hui);
3083 case LL_IOC_FUTIMES_3: {
3084 struct ll_futimes_3 lfu;
3086 if (copy_from_user(&lfu,
3087 (const struct ll_futimes_3 __user *)arg,
3091 RETURN(ll_file_futimes_3(file, &lfu));
3093 case LL_IOC_LADVISE: {
3094 struct llapi_ladvise_hdr *k_ladvise_hdr;
3095 struct llapi_ladvise_hdr __user *u_ladvise_hdr;
3098 int alloc_size = sizeof(*k_ladvise_hdr);
3101 u_ladvise_hdr = (void __user *)arg;
3102 OBD_ALLOC_PTR(k_ladvise_hdr);
3103 if (k_ladvise_hdr == NULL)
3106 if (copy_from_user(k_ladvise_hdr, u_ladvise_hdr, alloc_size))
3107 GOTO(out_ladvise, rc = -EFAULT);
3109 if (k_ladvise_hdr->lah_magic != LADVISE_MAGIC ||
3110 k_ladvise_hdr->lah_count < 1)
3111 GOTO(out_ladvise, rc = -EINVAL);
3113 num_advise = k_ladvise_hdr->lah_count;
3114 if (num_advise >= LAH_COUNT_MAX)
3115 GOTO(out_ladvise, rc = -EFBIG);
3117 OBD_FREE_PTR(k_ladvise_hdr);
3118 alloc_size = offsetof(typeof(*k_ladvise_hdr),
3119 lah_advise[num_advise]);
3120 OBD_ALLOC(k_ladvise_hdr, alloc_size);
3121 if (k_ladvise_hdr == NULL)
3125 * TODO: submit multiple advices to one server in a single RPC
3127 if (copy_from_user(k_ladvise_hdr, u_ladvise_hdr, alloc_size))
3128 GOTO(out_ladvise, rc = -EFAULT);
3130 for (i = 0; i < num_advise; i++) {
3131 struct llapi_lu_ladvise *k_ladvise =
3132 &k_ladvise_hdr->lah_advise[i];
3133 struct llapi_lu_ladvise __user *u_ladvise =
3134 &u_ladvise_hdr->lah_advise[i];
3136 rc = ll_ladvise_sanity(inode, k_ladvise);
3138 GOTO(out_ladvise, rc);
3140 switch (k_ladvise->lla_advice) {
3141 case LU_LADVISE_LOCKNOEXPAND:
3142 rc = ll_lock_noexpand(file,
3143 k_ladvise->lla_peradvice_flags);
3144 GOTO(out_ladvise, rc);
3145 case LU_LADVISE_LOCKAHEAD:
3147 rc = ll_file_lock_ahead(file, k_ladvise);
3150 GOTO(out_ladvise, rc);
3153 &u_ladvise->lla_lockahead_result))
3154 GOTO(out_ladvise, rc = -EFAULT);
3157 rc = ll_ladvise(inode, file,
3158 k_ladvise_hdr->lah_flags,
3161 GOTO(out_ladvise, rc);
3168 OBD_FREE(k_ladvise_hdr, alloc_size);
3171 case LL_IOC_FSGETXATTR:
3172 RETURN(ll_ioctl_fsgetxattr(inode, cmd, arg));
3173 case LL_IOC_FSSETXATTR:
3174 RETURN(ll_ioctl_fssetxattr(inode, cmd, arg));
3176 RETURN(put_user(PAGE_SIZE, (int __user *)arg));
3178 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
3179 (void __user *)arg));
3183 #ifndef HAVE_FILE_LLSEEK_SIZE
3184 static inline loff_t
3185 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
3187 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
3189 if (offset > maxsize)
3192 if (offset != file->f_pos) {
3193 file->f_pos = offset;
3194 file->f_version = 0;
3200 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
3201 loff_t maxsize, loff_t eof)
3203 struct inode *inode = file_inode(file);
3211 * Here we special-case the lseek(fd, 0, SEEK_CUR)
3212 * position-querying operation. Avoid rewriting the "same"
3213 * f_pos value back to the file because a concurrent read(),
3214 * write() or lseek() might have altered it
3219 * f_lock protects against read/modify/write race with other
3220 * SEEK_CURs. Note that parallel writes and reads behave
3224 offset = llseek_execute(file, file->f_pos + offset, maxsize);
3225 inode_unlock(inode);
3229 * In the generic case the entire file is data, so as long as
3230 * offset isn't at the end of the file then the offset is data.
3237 * There is a virtual hole at the end of the file, so as long as
3238 * offset isn't i_size or larger, return i_size.
3246 return llseek_execute(file, offset, maxsize);
3250 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
3252 struct inode *inode = file_inode(file);
3253 loff_t retval, eof = 0;
3256 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
3257 (origin == SEEK_CUR) ? file->f_pos : 0);
3258 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
3259 PFID(ll_inode2fid(inode)), inode, retval, retval,
3261 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
3263 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
3264 retval = ll_glimpse_size(inode);
3267 eof = i_size_read(inode);
3270 retval = ll_generic_file_llseek_size(file, offset, origin,
3271 ll_file_maxbytes(inode), eof);
3275 static int ll_flush(struct file *file, fl_owner_t id)
3277 struct inode *inode = file_inode(file);
3278 struct ll_inode_info *lli = ll_i2info(inode);
3279 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
3282 LASSERT(!S_ISDIR(inode->i_mode));
3284 /* catch async errors that were recorded back when async writeback
3285 * failed for pages in this mapping. */
3286 rc = lli->lli_async_rc;
3287 lli->lli_async_rc = 0;
3288 if (lli->lli_clob != NULL) {
3289 err = lov_read_and_clear_async_rc(lli->lli_clob);
3294 /* The application has been told write failure already.
3295 * Do not report failure again. */
3296 if (fd->fd_write_failed)
3298 return rc ? -EIO : 0;
3302 * Called to make sure a portion of file has been written out.
3303 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
3305 * Return how many pages have been written.
3307 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
3308 enum cl_fsync_mode mode, int ignore_layout)
3312 struct cl_fsync_io *fio;
3317 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
3318 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
3321 env = cl_env_get(&refcheck);
3323 RETURN(PTR_ERR(env));
3325 io = vvp_env_thread_io(env);
3326 io->ci_obj = ll_i2info(inode)->lli_clob;
3327 io->ci_ignore_layout = ignore_layout;
3329 /* initialize parameters for sync */
3330 fio = &io->u.ci_fsync;
3331 fio->fi_start = start;
3333 fio->fi_fid = ll_inode2fid(inode);
3334 fio->fi_mode = mode;
3335 fio->fi_nr_written = 0;
3337 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
3338 result = cl_io_loop(env, io);
3340 result = io->ci_result;
3342 result = fio->fi_nr_written;
3343 cl_io_fini(env, io);
3344 cl_env_put(env, &refcheck);
3350 * When dentry is provided (the 'else' case), file_dentry() may be
3351 * null and dentry must be used directly rather than pulled from
3352 * file_dentry() as is done otherwise.
3355 #ifdef HAVE_FILE_FSYNC_4ARGS
3356 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3358 struct dentry *dentry = file_dentry(file);
3360 #elif defined(HAVE_FILE_FSYNC_2ARGS)
3361 int ll_fsync(struct file *file, int datasync)
3363 struct dentry *dentry = file_dentry(file);
3365 loff_t end = LLONG_MAX;
3367 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
3370 loff_t end = LLONG_MAX;
3372 struct inode *inode = dentry->d_inode;
3373 struct ll_inode_info *lli = ll_i2info(inode);
3374 struct ptlrpc_request *req;
3378 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
3379 PFID(ll_inode2fid(inode)), inode);
3380 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
3382 #ifdef HAVE_FILE_FSYNC_4ARGS
3383 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
3384 lock_inode = !lli->lli_inode_locked;
3388 /* fsync's caller has already called _fdata{sync,write}, we want
3389 * that IO to finish before calling the osc and mdc sync methods */
3390 rc = filemap_fdatawait(inode->i_mapping);
3393 /* catch async errors that were recorded back when async writeback
3394 * failed for pages in this mapping. */
3395 if (!S_ISDIR(inode->i_mode)) {
3396 err = lli->lli_async_rc;
3397 lli->lli_async_rc = 0;
3400 if (lli->lli_clob != NULL) {
3401 err = lov_read_and_clear_async_rc(lli->lli_clob);
3407 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
3411 ptlrpc_req_finished(req);
3413 if (S_ISREG(inode->i_mode)) {
3414 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
3416 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
3417 if (rc == 0 && err < 0)
3420 fd->fd_write_failed = true;
3422 fd->fd_write_failed = false;
3425 #ifdef HAVE_FILE_FSYNC_4ARGS
3427 inode_unlock(inode);
3433 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
3435 struct inode *inode = file_inode(file);
3436 struct ll_sb_info *sbi = ll_i2sbi(inode);
3437 struct ldlm_enqueue_info einfo = {
3438 .ei_type = LDLM_FLOCK,
3439 .ei_cb_cp = ldlm_flock_completion_ast,
3440 .ei_cbdata = file_lock,
3442 struct md_op_data *op_data;
3443 struct lustre_handle lockh = { 0 };
3444 union ldlm_policy_data flock = { { 0 } };
3445 int fl_type = file_lock->fl_type;
3451 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
3452 PFID(ll_inode2fid(inode)), file_lock);
3454 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
3456 if (file_lock->fl_flags & FL_FLOCK) {
3457 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
3458 /* flocks are whole-file locks */
3459 flock.l_flock.end = OFFSET_MAX;
3460 /* For flocks owner is determined by the local file desctiptor*/
3461 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
3462 } else if (file_lock->fl_flags & FL_POSIX) {
3463 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
3464 flock.l_flock.start = file_lock->fl_start;
3465 flock.l_flock.end = file_lock->fl_end;
3469 flock.l_flock.pid = file_lock->fl_pid;
3471 /* Somewhat ugly workaround for svc lockd.
3472 * lockd installs custom fl_lmops->lm_compare_owner that checks
3473 * for the fl_owner to be the same (which it always is on local node
3474 * I guess between lockd processes) and then compares pid.
3475 * As such we assign pid to the owner field to make it all work,
3476 * conflict with normal locks is unlikely since pid space and
3477 * pointer space for current->files are not intersecting */
3478 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
3479 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
3483 einfo.ei_mode = LCK_PR;
3486 /* An unlock request may or may not have any relation to
3487 * existing locks so we may not be able to pass a lock handle
3488 * via a normal ldlm_lock_cancel() request. The request may even
3489 * unlock a byte range in the middle of an existing lock. In
3490 * order to process an unlock request we need all of the same
3491 * information that is given with a normal read or write record
3492 * lock request. To avoid creating another ldlm unlock (cancel)
3493 * message we'll treat a LCK_NL flock request as an unlock. */
3494 einfo.ei_mode = LCK_NL;
3497 einfo.ei_mode = LCK_PW;
3500 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
3515 flags = LDLM_FL_BLOCK_NOWAIT;
3521 flags = LDLM_FL_TEST_LOCK;
3524 CERROR("unknown fcntl lock command: %d\n", cmd);
3528 /* Save the old mode so that if the mode in the lock changes we
3529 * can decrement the appropriate reader or writer refcount. */
3530 file_lock->fl_type = einfo.ei_mode;
3532 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
3533 LUSTRE_OPC_ANY, NULL);
3534 if (IS_ERR(op_data))
3535 RETURN(PTR_ERR(op_data));
3537 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags=%#llx, mode=%u, "
3538 "start=%llu, end=%llu\n", PFID(ll_inode2fid(inode)),
3539 flock.l_flock.pid, flags, einfo.ei_mode,
3540 flock.l_flock.start, flock.l_flock.end);
3542 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, op_data, &lockh,
3545 /* Restore the file lock type if not TEST lock. */
3546 if (!(flags & LDLM_FL_TEST_LOCK))
3547 file_lock->fl_type = fl_type;
3549 #ifdef HAVE_LOCKS_LOCK_FILE_WAIT
3550 if ((rc == 0 || file_lock->fl_type == F_UNLCK) &&
3551 !(flags & LDLM_FL_TEST_LOCK))
3552 rc2 = locks_lock_file_wait(file, file_lock);
3554 if ((file_lock->fl_flags & FL_FLOCK) &&
3555 (rc == 0 || file_lock->fl_type == F_UNLCK))
3556 rc2 = flock_lock_file_wait(file, file_lock);
3557 if ((file_lock->fl_flags & FL_POSIX) &&
3558 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
3559 !(flags & LDLM_FL_TEST_LOCK))
3560 rc2 = posix_lock_file_wait(file, file_lock);
3561 #endif /* HAVE_LOCKS_LOCK_FILE_WAIT */
3563 if (rc2 && file_lock->fl_type != F_UNLCK) {
3564 einfo.ei_mode = LCK_NL;
3565 md_enqueue(sbi->ll_md_exp, &einfo, &flock, op_data,
3570 ll_finish_md_op_data(op_data);
3575 int ll_get_fid_by_name(struct inode *parent, const char *name,
3576 int namelen, struct lu_fid *fid,
3577 struct inode **inode)
3579 struct md_op_data *op_data = NULL;
3580 struct mdt_body *body;
3581 struct ptlrpc_request *req;
3585 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
3586 LUSTRE_OPC_ANY, NULL);
3587 if (IS_ERR(op_data))
3588 RETURN(PTR_ERR(op_data));
3590 op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
3591 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
3592 ll_finish_md_op_data(op_data);
3596 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3598 GOTO(out_req, rc = -EFAULT);
3600 *fid = body->mbo_fid1;
3603 rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
3605 ptlrpc_req_finished(req);
3609 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
3610 const char *name, int namelen)
3612 struct dentry *dchild = NULL;
3613 struct inode *child_inode = NULL;
3614 struct md_op_data *op_data;
3615 struct ptlrpc_request *request = NULL;
3616 struct obd_client_handle *och = NULL;
3618 struct mdt_body *body;
3620 __u64 data_version = 0;
3623 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
3624 name, PFID(ll_inode2fid(parent)), mdtidx);
3626 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
3627 0, LUSTRE_OPC_ANY, NULL);
3628 if (IS_ERR(op_data))
3629 RETURN(PTR_ERR(op_data));
3631 /* Get child FID first */
3632 qstr.hash = ll_full_name_hash(file_dentry(file), name, namelen);
3635 dchild = d_lookup(file_dentry(file), &qstr);
3636 if (dchild != NULL) {
3637 if (dchild->d_inode != NULL)
3638 child_inode = igrab(dchild->d_inode);
3642 if (child_inode == NULL) {
3643 rc = ll_get_fid_by_name(parent, name, namelen,
3644 &op_data->op_fid3, &child_inode);
3649 if (child_inode == NULL)
3650 GOTO(out_free, rc = -EINVAL);
3653 * lfs migrate command needs to be blocked on the client
3654 * by checking the migrate FID against the FID of the
3657 if (child_inode == parent->i_sb->s_root->d_inode)
3658 GOTO(out_iput, rc = -EINVAL);
3660 inode_lock(child_inode);
3661 op_data->op_fid3 = *ll_inode2fid(child_inode);
3662 if (!fid_is_sane(&op_data->op_fid3)) {
3663 CERROR("%s: migrate %s, but FID "DFID" is insane\n",
3664 ll_get_fsname(parent->i_sb, NULL, 0), name,
3665 PFID(&op_data->op_fid3));
3666 GOTO(out_unlock, rc = -EINVAL);
3669 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
3671 GOTO(out_unlock, rc);
3674 CDEBUG(D_INFO, "%s: "DFID" is already on MDT%04x\n", name,
3675 PFID(&op_data->op_fid3), mdtidx);
3676 GOTO(out_unlock, rc = 0);
3679 if (S_ISREG(child_inode->i_mode)) {
3680 och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
3684 GOTO(out_unlock, rc);
3687 rc = ll_data_version(child_inode, &data_version,
3690 GOTO(out_close, rc);
3692 op_data->op_handle = och->och_fh;
3693 op_data->op_data = och->och_mod;
3694 op_data->op_data_version = data_version;
3695 op_data->op_lease_handle = och->och_lease_handle;
3696 op_data->op_bias |= MDS_RENAME_MIGRATE;
3699 op_data->op_mds = mdtidx;
3700 op_data->op_cli_flags = CLI_MIGRATE;
3701 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
3702 namelen, name, namelen, &request);
3704 LASSERT(request != NULL);
3705 ll_update_times(request, parent);
3707 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
3708 LASSERT(body != NULL);
3710 /* If the server does release layout lock, then we cleanup
3711 * the client och here, otherwise release it in out_close: */
3713 body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
3714 obd_mod_put(och->och_mod);
3715 md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp,
3717 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
3723 if (request != NULL) {
3724 ptlrpc_req_finished(request);
3728 /* Try again if the file layout has changed. */
3729 if (rc == -EAGAIN && S_ISREG(child_inode->i_mode))
3733 if (och != NULL) /* close the file */
3734 ll_lease_close(och, child_inode, NULL);
3736 clear_nlink(child_inode);
3738 inode_unlock(child_inode);
3742 ll_finish_md_op_data(op_data);
3747 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3755 * test if some locks matching bits and l_req_mode are acquired
3756 * - bits can be in different locks
3757 * - if found clear the common lock bits in *bits
3758 * - the bits not found, are kept in *bits
3760 * \param bits [IN] searched lock bits [IN]
3761 * \param l_req_mode [IN] searched lock mode
3762 * \retval boolean, true iff all bits are found
3764 int ll_have_md_lock(struct inode *inode, __u64 *bits, enum ldlm_mode l_req_mode)
3766 struct lustre_handle lockh;
3767 union ldlm_policy_data policy;
3768 enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
3769 (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
3778 fid = &ll_i2info(inode)->lli_fid;
3779 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3780 ldlm_lockname[mode]);
3782 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3783 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3784 policy.l_inodebits.bits = *bits & (1 << i);
3785 if (policy.l_inodebits.bits == 0)
3788 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3789 &policy, mode, &lockh)) {
3790 struct ldlm_lock *lock;
3792 lock = ldlm_handle2lock(&lockh);
3795 ~(lock->l_policy_data.l_inodebits.bits);
3796 LDLM_LOCK_PUT(lock);
3798 *bits &= ~policy.l_inodebits.bits;
3805 enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
3806 struct lustre_handle *lockh, __u64 flags,
3807 enum ldlm_mode mode)
3809 union ldlm_policy_data policy = { .l_inodebits = { bits } };
3814 fid = &ll_i2info(inode)->lli_fid;
3815 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3817 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3818 fid, LDLM_IBITS, &policy, mode, lockh);
3823 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3825 /* Already unlinked. Just update nlink and return success */
3826 if (rc == -ENOENT) {
3828 /* If it is striped directory, and there is bad stripe
3829 * Let's revalidate the dentry again, instead of returning
3831 if (S_ISDIR(inode->i_mode) &&
3832 ll_i2info(inode)->lli_lsm_md != NULL)
3835 /* This path cannot be hit for regular files unless in
3836 * case of obscure races, so no need to to validate
3838 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3840 } else if (rc != 0) {
3841 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3842 "%s: revalidate FID "DFID" error: rc = %d\n",
3843 ll_get_fsname(inode->i_sb, NULL, 0),
3844 PFID(ll_inode2fid(inode)), rc);
3850 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3852 struct inode *inode = dentry->d_inode;
3853 struct ptlrpc_request *req = NULL;
3854 struct obd_export *exp;
3858 LASSERT(inode != NULL);
3860 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3861 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3863 exp = ll_i2mdexp(inode);
3865 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3866 * But under CMD case, it caused some lock issues, should be fixed
3867 * with new CMD ibits lock. See bug 12718 */
3868 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3869 struct lookup_intent oit = { .it_op = IT_GETATTR };
3870 struct md_op_data *op_data;
3872 if (ibits == MDS_INODELOCK_LOOKUP)
3873 oit.it_op = IT_LOOKUP;
3875 /* Call getattr by fid, so do not provide name at all. */
3876 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3877 dentry->d_inode, NULL, 0, 0,
3878 LUSTRE_OPC_ANY, NULL);
3879 if (IS_ERR(op_data))
3880 RETURN(PTR_ERR(op_data));
3882 rc = md_intent_lock(exp, op_data, &oit, &req,
3883 &ll_md_blocking_ast, 0);
3884 ll_finish_md_op_data(op_data);
3886 rc = ll_inode_revalidate_fini(inode, rc);
3890 rc = ll_revalidate_it_finish(req, &oit, dentry);
3892 ll_intent_release(&oit);
3896 /* Unlinked? Unhash dentry, so it is not picked up later by
3897 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3898 here to preserve get_cwd functionality on 2.6.
3900 if (!dentry->d_inode->i_nlink) {
3901 ll_lock_dcache(inode);
3902 d_lustre_invalidate(dentry, 0);
3903 ll_unlock_dcache(inode);
3906 ll_lookup_finish_locks(&oit, dentry);
3907 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3908 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3909 u64 valid = OBD_MD_FLGETATTR;
3910 struct md_op_data *op_data;
3913 if (S_ISREG(inode->i_mode)) {
3914 rc = ll_get_default_mdsize(sbi, &ealen);
3917 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3920 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3921 0, ealen, LUSTRE_OPC_ANY,
3923 if (IS_ERR(op_data))
3924 RETURN(PTR_ERR(op_data));
3926 op_data->op_valid = valid;
3927 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3928 ll_finish_md_op_data(op_data);
3930 rc = ll_inode_revalidate_fini(inode, rc);
3934 rc = ll_prep_inode(&inode, req, NULL, NULL);
3937 ptlrpc_req_finished(req);
3941 static int ll_merge_md_attr(struct inode *inode)
3943 struct cl_attr attr = { 0 };
3946 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3947 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3948 &attr, ll_md_blocking_ast);
3952 set_nlink(inode, attr.cat_nlink);
3953 inode->i_blocks = attr.cat_blocks;
3954 i_size_write(inode, attr.cat_size);
3956 ll_i2info(inode)->lli_atime = attr.cat_atime;
3957 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3958 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3964 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3966 struct inode *inode = dentry->d_inode;
3970 rc = __ll_inode_revalidate(dentry, ibits);
3974 /* if object isn't regular file, don't validate size */
3975 if (!S_ISREG(inode->i_mode)) {
3976 if (S_ISDIR(inode->i_mode) &&
3977 ll_i2info(inode)->lli_lsm_md != NULL) {
3978 rc = ll_merge_md_attr(inode);
3983 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3984 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3985 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3987 /* In case of restore, the MDT has the right size and has
3988 * already send it back without granting the layout lock,
3989 * inode is up-to-date so glimpse is useless.
3990 * Also to glimpse we need the layout, in case of a running
3991 * restore the MDT holds the layout lock so the glimpse will
3992 * block up to the end of restore (getattr will block)
3994 if (!ll_file_test_flag(ll_i2info(inode), LLIF_FILE_RESTORING))
3995 rc = ll_glimpse_size(inode);
4000 static inline dev_t ll_compat_encode_dev(dev_t dev)
4002 /* The compat_sys_*stat*() syscalls will fail unless the
4003 * device majors and minors are both less than 256. Note that
4004 * the value returned here will be passed through
4005 * old_encode_dev() in cp_compat_stat(). And so we are not
4006 * trying to return a valid compat (u16) device number, just
4007 * one that will pass the old_valid_dev() check. */
4009 return MKDEV(MAJOR(dev) & 0xff, MINOR(dev) & 0xff);
4012 #ifdef HAVE_INODEOPS_ENHANCED_GETATTR
4013 int ll_getattr(const struct path *path, struct kstat *stat,
4014 u32 request_mask, unsigned int flags)
4017 struct dentry *de = path->dentry;
4019 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
4022 struct inode *inode = de->d_inode;
4023 struct ll_sb_info *sbi = ll_i2sbi(inode);
4024 struct ll_inode_info *lli = ll_i2info(inode);
4027 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
4028 MDS_INODELOCK_LOOKUP);
4029 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
4034 OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30);
4036 if (ll_need_32bit_api(sbi)) {
4037 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
4038 stat->dev = ll_compat_encode_dev(inode->i_sb->s_dev);
4039 stat->rdev = ll_compat_encode_dev(inode->i_rdev);
4041 stat->ino = inode->i_ino;
4042 stat->dev = inode->i_sb->s_dev;
4043 stat->rdev = inode->i_rdev;
4046 stat->mode = inode->i_mode;
4047 stat->uid = inode->i_uid;
4048 stat->gid = inode->i_gid;
4049 stat->atime = inode->i_atime;
4050 stat->mtime = inode->i_mtime;
4051 stat->ctime = inode->i_ctime;
4052 stat->blksize = sbi->ll_stat_blksize ?: 1 << inode->i_blkbits;
4054 stat->nlink = inode->i_nlink;
4055 stat->size = i_size_read(inode);
4056 stat->blocks = inode->i_blocks;
4061 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4062 __u64 start, __u64 len)
4066 struct fiemap *fiemap;
4067 unsigned int extent_count = fieinfo->fi_extents_max;
4069 num_bytes = sizeof(*fiemap) + (extent_count *
4070 sizeof(struct fiemap_extent));
4071 OBD_ALLOC_LARGE(fiemap, num_bytes);
4076 fiemap->fm_flags = fieinfo->fi_flags;
4077 fiemap->fm_extent_count = fieinfo->fi_extents_max;
4078 fiemap->fm_start = start;
4079 fiemap->fm_length = len;
4080 if (extent_count > 0 &&
4081 copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
4082 sizeof(struct fiemap_extent)) != 0)
4083 GOTO(out, rc = -EFAULT);
4085 rc = ll_do_fiemap(inode, fiemap, num_bytes);
4087 fieinfo->fi_flags = fiemap->fm_flags;
4088 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
4089 if (extent_count > 0 &&
4090 copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
4091 fiemap->fm_mapped_extents *
4092 sizeof(struct fiemap_extent)) != 0)
4093 GOTO(out, rc = -EFAULT);
4095 OBD_FREE_LARGE(fiemap, num_bytes);
4099 struct posix_acl *ll_get_acl(struct inode *inode, int type)
4101 struct ll_inode_info *lli = ll_i2info(inode);
4102 struct posix_acl *acl = NULL;
4105 spin_lock(&lli->lli_lock);
4106 /* VFS' acl_permission_check->check_acl will release the refcount */
4107 acl = posix_acl_dup(lli->lli_posix_acl);
4108 spin_unlock(&lli->lli_lock);
4113 #ifdef HAVE_IOP_SET_ACL
4114 #ifdef CONFIG_FS_POSIX_ACL
4115 int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type)
4117 const char *name = NULL;
4124 case ACL_TYPE_ACCESS:
4126 rc = posix_acl_update_mode(inode, &inode->i_mode, &acl);
4130 name = XATTR_NAME_POSIX_ACL_ACCESS;
4132 case ACL_TYPE_DEFAULT:
4133 if (!S_ISDIR(inode->i_mode))
4134 GOTO(out, rc = acl ? -EACCES : 0);
4135 name = XATTR_NAME_POSIX_ACL_DEFAULT;
4138 GOTO(out, rc = -EINVAL);
4142 size = posix_acl_xattr_size(acl->a_count);
4143 value = kmalloc(size, GFP_NOFS);
4145 GOTO(out, rc = -ENOMEM);
4147 rc = posix_acl_to_xattr(&init_user_ns, acl, value, size);
4152 /* dentry is only used for *.lov attributes so it's safe to be NULL */
4153 rc = __vfs_setxattr(NULL, inode, name, value, size, XATTR_CREATE);
4158 set_cached_acl(inode, type, acl);
4160 forget_cached_acl(inode, type);
4163 #endif /* CONFIG_FS_POSIX_ACL */
4164 #endif /* HAVE_IOP_SET_ACL */
4166 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
4168 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
4169 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
4171 ll_check_acl(struct inode *inode, int mask)
4174 # ifdef CONFIG_FS_POSIX_ACL
4175 struct posix_acl *acl;
4179 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
4180 if (flags & IPERM_FLAG_RCU)
4183 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
4188 rc = posix_acl_permission(inode, acl, mask);
4189 posix_acl_release(acl);
4192 # else /* !CONFIG_FS_POSIX_ACL */
4194 # endif /* CONFIG_FS_POSIX_ACL */
4196 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
4198 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
4199 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
4201 # ifdef HAVE_INODE_PERMISION_2ARGS
4202 int ll_inode_permission(struct inode *inode, int mask)
4204 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
4209 struct ll_sb_info *sbi;
4210 struct root_squash_info *squash;
4211 struct cred *cred = NULL;
4212 const struct cred *old_cred = NULL;
4214 bool squash_id = false;
4217 #ifdef MAY_NOT_BLOCK
4218 if (mask & MAY_NOT_BLOCK)
4220 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
4221 if (flags & IPERM_FLAG_RCU)
4225 /* as root inode are NOT getting validated in lookup operation,
4226 * need to do it before permission check. */
4228 if (inode == inode->i_sb->s_root->d_inode) {
4229 rc = __ll_inode_revalidate(inode->i_sb->s_root,
4230 MDS_INODELOCK_LOOKUP);
4235 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
4236 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
4238 /* squash fsuid/fsgid if needed */
4239 sbi = ll_i2sbi(inode);
4240 squash = &sbi->ll_squash;
4241 if (unlikely(squash->rsi_uid != 0 &&
4242 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
4243 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
4247 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
4248 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
4249 squash->rsi_uid, squash->rsi_gid);
4251 /* update current process's credentials
4252 * and FS capability */
4253 cred = prepare_creds();
4257 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
4258 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
4259 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
4260 if ((1 << cap) & CFS_CAP_FS_MASK)
4261 cap_lower(cred->cap_effective, cap);
4263 old_cred = override_creds(cred);
4266 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
4267 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
4268 /* restore current process's credentials and FS capability */
4270 revert_creds(old_cred);
4277 /* -o localflock - only provides locally consistent flock locks */
4278 struct file_operations ll_file_operations = {
4279 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
4280 # ifdef HAVE_SYNC_READ_WRITE
4281 .read = new_sync_read,
4282 .write = new_sync_write,
4284 .read_iter = ll_file_read_iter,
4285 .write_iter = ll_file_write_iter,
4286 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
4287 .read = ll_file_read,
4288 .aio_read = ll_file_aio_read,
4289 .write = ll_file_write,
4290 .aio_write = ll_file_aio_write,
4291 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
4292 .unlocked_ioctl = ll_file_ioctl,
4293 .open = ll_file_open,
4294 .release = ll_file_release,
4295 .mmap = ll_file_mmap,
4296 .llseek = ll_file_seek,
4297 .splice_read = ll_file_splice_read,
4302 struct file_operations ll_file_operations_flock = {
4303 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
4304 # ifdef HAVE_SYNC_READ_WRITE
4305 .read = new_sync_read,
4306 .write = new_sync_write,
4307 # endif /* HAVE_SYNC_READ_WRITE */
4308 .read_iter = ll_file_read_iter,
4309 .write_iter = ll_file_write_iter,
4310 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
4311 .read = ll_file_read,
4312 .aio_read = ll_file_aio_read,
4313 .write = ll_file_write,
4314 .aio_write = ll_file_aio_write,
4315 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
4316 .unlocked_ioctl = ll_file_ioctl,
4317 .open = ll_file_open,
4318 .release = ll_file_release,
4319 .mmap = ll_file_mmap,
4320 .llseek = ll_file_seek,
4321 .splice_read = ll_file_splice_read,
4324 .flock = ll_file_flock,
4325 .lock = ll_file_flock
4328 /* These are for -o noflock - to return ENOSYS on flock calls */
4329 struct file_operations ll_file_operations_noflock = {
4330 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
4331 # ifdef HAVE_SYNC_READ_WRITE
4332 .read = new_sync_read,
4333 .write = new_sync_write,
4334 # endif /* HAVE_SYNC_READ_WRITE */
4335 .read_iter = ll_file_read_iter,
4336 .write_iter = ll_file_write_iter,
4337 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
4338 .read = ll_file_read,
4339 .aio_read = ll_file_aio_read,
4340 .write = ll_file_write,
4341 .aio_write = ll_file_aio_write,
4342 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
4343 .unlocked_ioctl = ll_file_ioctl,
4344 .open = ll_file_open,
4345 .release = ll_file_release,
4346 .mmap = ll_file_mmap,
4347 .llseek = ll_file_seek,
4348 .splice_read = ll_file_splice_read,
4351 .flock = ll_file_noflock,
4352 .lock = ll_file_noflock
4355 struct inode_operations ll_file_inode_operations = {
4356 .setattr = ll_setattr,
4357 .getattr = ll_getattr,
4358 .permission = ll_inode_permission,
4359 #ifdef HAVE_IOP_XATTR
4360 .setxattr = ll_setxattr,
4361 .getxattr = ll_getxattr,
4362 .removexattr = ll_removexattr,
4364 .listxattr = ll_listxattr,
4365 .fiemap = ll_fiemap,
4366 #ifdef HAVE_IOP_GET_ACL
4367 .get_acl = ll_get_acl,
4369 #ifdef HAVE_IOP_SET_ACL
4370 .set_acl = ll_set_acl,
4374 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
4376 struct ll_inode_info *lli = ll_i2info(inode);
4377 struct cl_object *obj = lli->lli_clob;
4386 env = cl_env_get(&refcheck);
4388 RETURN(PTR_ERR(env));
4390 rc = cl_conf_set(env, lli->lli_clob, conf);
4394 if (conf->coc_opc == OBJECT_CONF_SET) {
4395 struct ldlm_lock *lock = conf->coc_lock;
4396 struct cl_layout cl = {
4400 LASSERT(lock != NULL);
4401 LASSERT(ldlm_has_layout(lock));
4403 /* it can only be allowed to match after layout is
4404 * applied to inode otherwise false layout would be
4405 * seen. Applying layout shoud happen before dropping
4406 * the intent lock. */
4407 ldlm_lock_allow_match(lock);
4409 rc = cl_object_layout_get(env, obj, &cl);
4414 DFID": layout version change: %u -> %u\n",
4415 PFID(&lli->lli_fid), ll_layout_version_get(lli),
4417 ll_layout_version_set(lli, cl.cl_layout_gen);
4421 cl_env_put(env, &refcheck);
4426 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
4427 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
4430 struct ll_sb_info *sbi = ll_i2sbi(inode);
4431 struct ptlrpc_request *req;
4432 struct mdt_body *body;
4439 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
4440 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
4441 lock->l_lvb_data, lock->l_lvb_len);
4443 if (lock->l_lvb_data != NULL)
4446 /* if layout lock was granted right away, the layout is returned
4447 * within DLM_LVB of dlm reply; otherwise if the lock was ever
4448 * blocked and then granted via completion ast, we have to fetch
4449 * layout here. Please note that we can't use the LVB buffer in
4450 * completion AST because it doesn't have a large enough buffer */
4451 rc = ll_get_default_mdsize(sbi, &lmmsize);
4453 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
4454 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
4459 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
4461 GOTO(out, rc = -EPROTO);
4463 lmmsize = body->mbo_eadatasize;
4464 if (lmmsize == 0) /* empty layout */
4467 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
4469 GOTO(out, rc = -EFAULT);
4471 OBD_ALLOC_LARGE(lvbdata, lmmsize);
4472 if (lvbdata == NULL)
4473 GOTO(out, rc = -ENOMEM);
4475 memcpy(lvbdata, lmm, lmmsize);
4476 lock_res_and_lock(lock);
4477 if (unlikely(lock->l_lvb_data == NULL)) {
4478 lock->l_lvb_type = LVB_T_LAYOUT;
4479 lock->l_lvb_data = lvbdata;
4480 lock->l_lvb_len = lmmsize;
4483 unlock_res_and_lock(lock);
4486 OBD_FREE_LARGE(lvbdata, lmmsize);
4491 ptlrpc_req_finished(req);
4496 * Apply the layout to the inode. Layout lock is held and will be released
4499 static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
4500 struct inode *inode)
4502 struct ll_inode_info *lli = ll_i2info(inode);
4503 struct ll_sb_info *sbi = ll_i2sbi(inode);
4504 struct ldlm_lock *lock;
4505 struct cl_object_conf conf;
4508 bool wait_layout = false;
4511 LASSERT(lustre_handle_is_used(lockh));
4513 lock = ldlm_handle2lock(lockh);
4514 LASSERT(lock != NULL);
4515 LASSERT(ldlm_has_layout(lock));
4517 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured",
4518 PFID(&lli->lli_fid), inode);
4520 /* in case this is a caching lock and reinstate with new inode */
4521 md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL);
4523 lock_res_and_lock(lock);
4524 lvb_ready = ldlm_is_lvb_ready(lock);
4525 unlock_res_and_lock(lock);
4527 /* checking lvb_ready is racy but this is okay. The worst case is
4528 * that multi processes may configure the file on the same time. */
4532 rc = ll_layout_fetch(inode, lock);
4536 /* for layout lock, lmm is stored in lock's lvb.
4537 * lvb_data is immutable if the lock is held so it's safe to access it
4540 * set layout to file. Unlikely this will fail as old layout was
4541 * surely eliminated */
4542 memset(&conf, 0, sizeof conf);
4543 conf.coc_opc = OBJECT_CONF_SET;
4544 conf.coc_inode = inode;
4545 conf.coc_lock = lock;
4546 conf.u.coc_layout.lb_buf = lock->l_lvb_data;
4547 conf.u.coc_layout.lb_len = lock->l_lvb_len;
4548 rc = ll_layout_conf(inode, &conf);
4550 /* refresh layout failed, need to wait */
4551 wait_layout = rc == -EBUSY;
4554 LDLM_LOCK_PUT(lock);
4555 ldlm_lock_decref(lockh, mode);
4557 /* wait for IO to complete if it's still being used. */
4559 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
4560 ll_get_fsname(inode->i_sb, NULL, 0),
4561 PFID(&lli->lli_fid), inode);
4563 memset(&conf, 0, sizeof conf);
4564 conf.coc_opc = OBJECT_CONF_WAIT;
4565 conf.coc_inode = inode;
4566 rc = ll_layout_conf(inode, &conf);
4570 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
4571 ll_get_fsname(inode->i_sb, NULL, 0),
4572 PFID(&lli->lli_fid), rc);
4578 * Issue layout intent RPC to MDS.
4579 * \param inode [in] file inode
4580 * \param intent [in] layout intent
4582 * \retval 0 on success
4583 * \retval < 0 error code
4585 static int ll_layout_intent(struct inode *inode, struct layout_intent *intent)
4587 struct ll_inode_info *lli = ll_i2info(inode);
4588 struct ll_sb_info *sbi = ll_i2sbi(inode);
4589 struct md_op_data *op_data;
4590 struct lookup_intent it;
4591 struct ptlrpc_request *req;
4595 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
4596 0, 0, LUSTRE_OPC_ANY, NULL);
4597 if (IS_ERR(op_data))
4598 RETURN(PTR_ERR(op_data));
4600 op_data->op_data = intent;
4601 op_data->op_data_size = sizeof(*intent);
4603 memset(&it, 0, sizeof(it));
4604 it.it_op = IT_LAYOUT;
4605 if (intent->li_opc == LAYOUT_INTENT_WRITE ||
4606 intent->li_opc == LAYOUT_INTENT_TRUNC)
4607 it.it_flags = FMODE_WRITE;
4609 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
4610 ll_get_fsname(inode->i_sb, NULL, 0),
4611 PFID(&lli->lli_fid), inode);
4613 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
4614 &ll_md_blocking_ast, 0);
4615 if (it.it_request != NULL)
4616 ptlrpc_req_finished(it.it_request);
4617 it.it_request = NULL;
4619 ll_finish_md_op_data(op_data);
4621 /* set lock data in case this is a new lock */
4623 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
4625 ll_intent_drop_lock(&it);
4631 * This function checks if there exists a LAYOUT lock on the client side,
4632 * or enqueues it if it doesn't have one in cache.
4634 * This function will not hold layout lock so it may be revoked any time after
4635 * this function returns. Any operations depend on layout should be redone
4638 * This function should be called before lov_io_init() to get an uptodate
4639 * layout version, the caller should save the version number and after IO
4640 * is finished, this function should be called again to verify that layout
4641 * is not changed during IO time.
4643 int ll_layout_refresh(struct inode *inode, __u32 *gen)
4645 struct ll_inode_info *lli = ll_i2info(inode);
4646 struct ll_sb_info *sbi = ll_i2sbi(inode);
4647 struct lustre_handle lockh;
4648 struct layout_intent intent = {
4649 .li_opc = LAYOUT_INTENT_ACCESS,
4651 enum ldlm_mode mode;
4655 *gen = ll_layout_version_get(lli);
4656 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
4660 LASSERT(fid_is_sane(ll_inode2fid(inode)));
4661 LASSERT(S_ISREG(inode->i_mode));
4663 /* take layout lock mutex to enqueue layout lock exclusively. */
4664 mutex_lock(&lli->lli_layout_mutex);
4667 /* mostly layout lock is caching on the local side, so try to
4668 * match it before grabbing layout lock mutex. */
4669 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
4670 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
4671 if (mode != 0) { /* hit cached lock */
4672 rc = ll_layout_lock_set(&lockh, mode, inode);
4678 rc = ll_layout_intent(inode, &intent);
4684 *gen = ll_layout_version_get(lli);
4685 mutex_unlock(&lli->lli_layout_mutex);
4691 * Issue layout intent RPC indicating where in a file an IO is about to write.
4693 * \param[in] inode file inode.
4694 * \param[in] start start offset of fille in bytes where an IO is about to
4696 * \param[in] end exclusive end offset in bytes of the write range.
4698 * \retval 0 on success
4699 * \retval < 0 error code
4701 int ll_layout_write_intent(struct inode *inode, __u64 start, __u64 end)
4703 struct layout_intent intent = {
4704 .li_opc = LAYOUT_INTENT_WRITE,
4711 rc = ll_layout_intent(inode, &intent);
4717 * This function send a restore request to the MDT
4719 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
4721 struct hsm_user_request *hur;
4725 len = sizeof(struct hsm_user_request) +
4726 sizeof(struct hsm_user_item);
4727 OBD_ALLOC(hur, len);
4731 hur->hur_request.hr_action = HUA_RESTORE;
4732 hur->hur_request.hr_archive_id = 0;
4733 hur->hur_request.hr_flags = 0;
4734 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
4735 sizeof(hur->hur_user_item[0].hui_fid));
4736 hur->hur_user_item[0].hui_extent.offset = offset;
4737 hur->hur_user_item[0].hui_extent.length = length;
4738 hur->hur_request.hr_itemcount = 1;
4739 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,