4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include <linux/user_namespace.h>
49 #ifdef HAVE_UIDGID_HEADER
50 # include <linux/uidgid.h>
52 #include <lustre/ll_fiemap.h>
54 #include <lustre_ioctl.h>
55 #include <lustre_swab.h>
57 #include "cl_object.h"
58 #include "llite_internal.h"
59 #include "vvp_internal.h"
62 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
64 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
67 static enum llioc_iter
68 ll_iocontrol_call(struct inode *inode, struct file *file,
69 unsigned int cmd, unsigned long arg, int *rcp);
71 static struct ll_file_data *ll_file_data_get(void)
73 struct ll_file_data *fd;
75 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
79 fd->fd_write_failed = false;
84 static void ll_file_data_put(struct ll_file_data *fd)
87 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
91 * Packs all the attributes into @op_data for the CLOSE rpc.
93 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
94 struct obd_client_handle *och)
98 ll_prep_md_op_data(op_data, inode, NULL, NULL,
99 0, 0, LUSTRE_OPC_ANY, NULL);
101 op_data->op_attr.ia_mode = inode->i_mode;
102 op_data->op_attr.ia_atime = inode->i_atime;
103 op_data->op_attr.ia_mtime = inode->i_mtime;
104 op_data->op_attr.ia_ctime = inode->i_ctime;
105 op_data->op_attr.ia_size = i_size_read(inode);
106 op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
107 ATTR_MTIME | ATTR_MTIME_SET |
108 ATTR_CTIME | ATTR_CTIME_SET;
109 op_data->op_attr_blocks = inode->i_blocks;
110 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
111 op_data->op_handle = och->och_fh;
113 if (och->och_flags & FMODE_WRITE &&
114 ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED))
115 /* For HSM: if inode data has been modified, pack it so that
116 * MDT can set data dirty flag in the archive. */
117 op_data->op_bias |= MDS_DATA_MODIFIED;
123 * Perform a close, possibly with a bias.
124 * The meaning of "data" depends on the value of "bias".
126 * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
127 * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
130 static int ll_close_inode_openhandle(struct inode *inode,
131 struct obd_client_handle *och,
132 enum mds_op_bias bias, void *data)
134 struct obd_export *md_exp = ll_i2mdexp(inode);
135 const struct ll_inode_info *lli = ll_i2info(inode);
136 struct md_op_data *op_data;
137 struct ptlrpc_request *req = NULL;
141 if (class_exp2obd(md_exp) == NULL) {
142 CERROR("%s: invalid MDC connection handle closing "DFID"\n",
143 ll_get_fsname(inode->i_sb, NULL, 0),
144 PFID(&lli->lli_fid));
148 OBD_ALLOC_PTR(op_data);
149 /* We leak openhandle and request here on error, but not much to be
150 * done in OOM case since app won't retry close on error either. */
152 GOTO(out, rc = -ENOMEM);
154 ll_prepare_close(inode, op_data, och);
156 case MDS_CLOSE_LAYOUT_SWAP:
157 LASSERT(data != NULL);
158 op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
159 op_data->op_data_version = 0;
160 op_data->op_lease_handle = och->och_lease_handle;
161 op_data->op_fid2 = *ll_inode2fid(data);
164 case MDS_HSM_RELEASE:
165 LASSERT(data != NULL);
166 op_data->op_bias |= MDS_HSM_RELEASE;
167 op_data->op_data_version = *(__u64 *)data;
168 op_data->op_lease_handle = och->och_lease_handle;
169 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
173 LASSERT(data == NULL);
177 rc = md_close(md_exp, op_data, och->och_mod, &req);
178 if (rc != 0 && rc != -EINTR)
179 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
180 md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
183 op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
184 struct mdt_body *body;
186 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
187 if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
191 ll_finish_md_op_data(op_data);
195 md_clear_open_replay_data(md_exp, och);
196 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
199 ptlrpc_req_finished(req); /* This is close request */
203 int ll_md_real_close(struct inode *inode, fmode_t fmode)
205 struct ll_inode_info *lli = ll_i2info(inode);
206 struct obd_client_handle **och_p;
207 struct obd_client_handle *och;
212 if (fmode & FMODE_WRITE) {
213 och_p = &lli->lli_mds_write_och;
214 och_usecount = &lli->lli_open_fd_write_count;
215 } else if (fmode & FMODE_EXEC) {
216 och_p = &lli->lli_mds_exec_och;
217 och_usecount = &lli->lli_open_fd_exec_count;
219 LASSERT(fmode & FMODE_READ);
220 och_p = &lli->lli_mds_read_och;
221 och_usecount = &lli->lli_open_fd_read_count;
224 mutex_lock(&lli->lli_och_mutex);
225 if (*och_usecount > 0) {
226 /* There are still users of this handle, so skip
228 mutex_unlock(&lli->lli_och_mutex);
234 mutex_unlock(&lli->lli_och_mutex);
237 /* There might be a race and this handle may already
239 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
245 static int ll_md_close(struct inode *inode, struct file *file)
247 union ldlm_policy_data policy = {
248 .l_inodebits = { MDS_INODELOCK_OPEN },
250 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
251 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
252 struct ll_inode_info *lli = ll_i2info(inode);
253 struct lustre_handle lockh;
254 enum ldlm_mode lockmode;
258 /* clear group lock, if present */
259 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
260 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
262 if (fd->fd_lease_och != NULL) {
265 /* Usually the lease is not released when the
266 * application crashed, we need to release here. */
267 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
268 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
269 PFID(&lli->lli_fid), rc, lease_broken);
271 fd->fd_lease_och = NULL;
274 if (fd->fd_och != NULL) {
275 rc = ll_close_inode_openhandle(inode, fd->fd_och, 0, NULL);
280 /* Let's see if we have good enough OPEN lock on the file and if
281 we can skip talking to MDS */
282 mutex_lock(&lli->lli_och_mutex);
283 if (fd->fd_omode & FMODE_WRITE) {
285 LASSERT(lli->lli_open_fd_write_count);
286 lli->lli_open_fd_write_count--;
287 } else if (fd->fd_omode & FMODE_EXEC) {
289 LASSERT(lli->lli_open_fd_exec_count);
290 lli->lli_open_fd_exec_count--;
293 LASSERT(lli->lli_open_fd_read_count);
294 lli->lli_open_fd_read_count--;
296 mutex_unlock(&lli->lli_och_mutex);
298 if (!md_lock_match(ll_i2mdexp(inode), flags, ll_inode2fid(inode),
299 LDLM_IBITS, &policy, lockmode, &lockh))
300 rc = ll_md_real_close(inode, fd->fd_omode);
303 LUSTRE_FPRIVATE(file) = NULL;
304 ll_file_data_put(fd);
309 /* While this returns an error code, fput() the caller does not, so we need
310 * to make every effort to clean up all of our state here. Also, applications
311 * rarely check close errors and even if an error is returned they will not
312 * re-try the close call.
314 int ll_file_release(struct inode *inode, struct file *file)
316 struct ll_file_data *fd;
317 struct ll_sb_info *sbi = ll_i2sbi(inode);
318 struct ll_inode_info *lli = ll_i2info(inode);
322 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
323 PFID(ll_inode2fid(inode)), inode);
325 #ifdef CONFIG_FS_POSIX_ACL
326 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
327 inode == inode->i_sb->s_root->d_inode) {
328 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
331 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
332 fd->fd_flags &= ~LL_FILE_RMTACL;
333 rct_del(&sbi->ll_rct, current_pid());
334 et_search_free(&sbi->ll_et, current_pid());
339 if (inode->i_sb->s_root != file->f_path.dentry)
340 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
341 fd = LUSTRE_FPRIVATE(file);
344 /* The last ref on @file, maybe not the the owner pid of statahead,
345 * because parent and child process can share the same file handle. */
346 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
347 ll_deauthorize_statahead(inode, fd);
349 if (inode->i_sb->s_root == file->f_path.dentry) {
350 LUSTRE_FPRIVATE(file) = NULL;
351 ll_file_data_put(fd);
355 if (!S_ISDIR(inode->i_mode)) {
356 if (lli->lli_clob != NULL)
357 lov_read_and_clear_async_rc(lli->lli_clob);
358 lli->lli_async_rc = 0;
361 rc = ll_md_close(inode, file);
363 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
364 libcfs_debug_dumplog();
369 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
370 struct lookup_intent *itp)
372 struct dentry *de = file->f_path.dentry;
373 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
374 struct dentry *parent = de->d_parent;
375 const char *name = NULL;
377 struct md_op_data *op_data;
378 struct ptlrpc_request *req = NULL;
382 LASSERT(parent != NULL);
383 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
385 /* if server supports open-by-fid, or file name is invalid, don't pack
386 * name in open request */
387 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
388 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
389 name = de->d_name.name;
390 len = de->d_name.len;
393 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
394 name, len, 0, LUSTRE_OPC_ANY, NULL);
396 RETURN(PTR_ERR(op_data));
397 op_data->op_data = lmm;
398 op_data->op_data_size = lmmsize;
400 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
401 &ll_md_blocking_ast, 0);
402 ll_finish_md_op_data(op_data);
404 /* reason for keep own exit path - don`t flood log
405 * with messages with -ESTALE errors.
407 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
408 it_open_error(DISP_OPEN_OPEN, itp))
410 ll_release_openhandle(de, itp);
414 if (it_disposition(itp, DISP_LOOKUP_NEG))
415 GOTO(out, rc = -ENOENT);
417 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
418 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
419 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
423 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
424 if (!rc && itp->d.lustre.it_lock_mode)
425 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
428 ptlrpc_req_finished(req);
429 ll_intent_drop_lock(itp);
434 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
435 struct obd_client_handle *och)
437 struct ptlrpc_request *req = it->d.lustre.it_data;
438 struct mdt_body *body;
440 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
441 och->och_fh = body->mbo_handle;
442 och->och_fid = body->mbo_fid1;
443 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
444 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
445 och->och_flags = it->it_flags;
447 return md_set_open_replay_data(md_exp, och, it);
450 static int ll_local_open(struct file *file, struct lookup_intent *it,
451 struct ll_file_data *fd, struct obd_client_handle *och)
453 struct inode *inode = file->f_path.dentry->d_inode;
456 LASSERT(!LUSTRE_FPRIVATE(file));
463 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
468 LUSTRE_FPRIVATE(file) = fd;
469 ll_readahead_init(inode, &fd->fd_ras);
470 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
472 /* ll_cl_context initialize */
473 rwlock_init(&fd->fd_lock);
474 INIT_LIST_HEAD(&fd->fd_lccs);
479 /* Open a file, and (for the very first open) create objects on the OSTs at
480 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
481 * creation or open until ll_lov_setstripe() ioctl is called.
483 * If we already have the stripe MD locally then we don't request it in
484 * md_open(), by passing a lmm_size = 0.
486 * It is up to the application to ensure no other processes open this file
487 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
488 * used. We might be able to avoid races of that sort by getting lli_open_sem
489 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
490 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
492 int ll_file_open(struct inode *inode, struct file *file)
494 struct ll_inode_info *lli = ll_i2info(inode);
495 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
496 .it_flags = file->f_flags };
497 struct obd_client_handle **och_p = NULL;
498 __u64 *och_usecount = NULL;
499 struct ll_file_data *fd;
503 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
504 PFID(ll_inode2fid(inode)), inode, file->f_flags);
506 it = file->private_data; /* XXX: compat macro */
507 file->private_data = NULL; /* prevent ll_local_open assertion */
509 fd = ll_file_data_get();
511 GOTO(out_openerr, rc = -ENOMEM);
514 if (S_ISDIR(inode->i_mode))
515 ll_authorize_statahead(inode, fd);
517 if (inode->i_sb->s_root == file->f_path.dentry) {
518 LUSTRE_FPRIVATE(file) = fd;
522 if (!it || !it->d.lustre.it_disposition) {
523 /* Convert f_flags into access mode. We cannot use file->f_mode,
524 * because everything but O_ACCMODE mask was stripped from
526 if ((oit.it_flags + 1) & O_ACCMODE)
528 if (file->f_flags & O_TRUNC)
529 oit.it_flags |= FMODE_WRITE;
531 /* kernel only call f_op->open in dentry_open. filp_open calls
532 * dentry_open after call to open_namei that checks permissions.
533 * Only nfsd_open call dentry_open directly without checking
534 * permissions and because of that this code below is safe. */
535 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
536 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
538 /* We do not want O_EXCL here, presumably we opened the file
539 * already? XXX - NFS implications? */
540 oit.it_flags &= ~O_EXCL;
542 /* bug20584, if "it_flags" contains O_CREAT, the file will be
543 * created if necessary, then "IT_CREAT" should be set to keep
544 * consistent with it */
545 if (oit.it_flags & O_CREAT)
546 oit.it_op |= IT_CREAT;
552 /* Let's see if we have file open on MDS already. */
553 if (it->it_flags & FMODE_WRITE) {
554 och_p = &lli->lli_mds_write_och;
555 och_usecount = &lli->lli_open_fd_write_count;
556 } else if (it->it_flags & FMODE_EXEC) {
557 och_p = &lli->lli_mds_exec_och;
558 och_usecount = &lli->lli_open_fd_exec_count;
560 och_p = &lli->lli_mds_read_och;
561 och_usecount = &lli->lli_open_fd_read_count;
564 mutex_lock(&lli->lli_och_mutex);
565 if (*och_p) { /* Open handle is present */
566 if (it_disposition(it, DISP_OPEN_OPEN)) {
567 /* Well, there's extra open request that we do not need,
568 let's close it somehow. This will decref request. */
569 rc = it_open_error(DISP_OPEN_OPEN, it);
571 mutex_unlock(&lli->lli_och_mutex);
572 GOTO(out_openerr, rc);
575 ll_release_openhandle(file->f_path.dentry, it);
579 rc = ll_local_open(file, it, fd, NULL);
582 mutex_unlock(&lli->lli_och_mutex);
583 GOTO(out_openerr, rc);
586 LASSERT(*och_usecount == 0);
587 if (!it->d.lustre.it_disposition) {
588 /* We cannot just request lock handle now, new ELC code
589 means that one of other OPEN locks for this file
590 could be cancelled, and since blocking ast handler
591 would attempt to grab och_mutex as well, that would
592 result in a deadlock */
593 mutex_unlock(&lli->lli_och_mutex);
595 * Normally called under two situations:
597 * 2. A race/condition on MDS resulting in no open
598 * handle to be returned from LOOKUP|OPEN request,
599 * for example if the target entry was a symlink.
601 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
603 * Always specify MDS_OPEN_BY_FID because we don't want
604 * to get file with different fid.
606 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
607 rc = ll_intent_file_open(file, NULL, 0, it);
609 GOTO(out_openerr, rc);
613 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
615 GOTO(out_och_free, rc = -ENOMEM);
619 /* md_intent_lock() didn't get a request ref if there was an
620 * open error, so don't do cleanup on the request here
622 /* XXX (green): Should not we bail out on any error here, not
623 * just open error? */
624 rc = it_open_error(DISP_OPEN_OPEN, it);
626 GOTO(out_och_free, rc);
628 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
629 "inode %p: disposition %x, status %d\n", inode,
630 it_disposition(it, ~0), it->d.lustre.it_status);
632 rc = ll_local_open(file, it, fd, *och_p);
634 GOTO(out_och_free, rc);
636 mutex_unlock(&lli->lli_och_mutex);
639 /* Must do this outside lli_och_mutex lock to prevent deadlock where
640 different kind of OPEN lock for this same inode gets cancelled
641 by ldlm_cancel_lru */
642 if (!S_ISREG(inode->i_mode))
643 GOTO(out_och_free, rc);
645 cl_lov_delay_create_clear(&file->f_flags);
646 GOTO(out_och_free, rc);
650 if (och_p && *och_p) {
651 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
652 *och_p = NULL; /* OBD_FREE writes some magic there */
655 mutex_unlock(&lli->lli_och_mutex);
658 if (lli->lli_opendir_key == fd)
659 ll_deauthorize_statahead(inode, fd);
661 ll_file_data_put(fd);
663 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
666 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
667 ptlrpc_req_finished(it->d.lustre.it_data);
668 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
674 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
675 struct ldlm_lock_desc *desc, void *data, int flag)
678 struct lustre_handle lockh;
682 case LDLM_CB_BLOCKING:
683 ldlm_lock2handle(lock, &lockh);
684 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
686 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
690 case LDLM_CB_CANCELING:
698 * Acquire a lease and open the file.
700 static struct obd_client_handle *
701 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
704 struct lookup_intent it = { .it_op = IT_OPEN };
705 struct ll_sb_info *sbi = ll_i2sbi(inode);
706 struct md_op_data *op_data;
707 struct ptlrpc_request *req = NULL;
708 struct lustre_handle old_handle = { 0 };
709 struct obd_client_handle *och = NULL;
714 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
715 RETURN(ERR_PTR(-EINVAL));
718 struct ll_inode_info *lli = ll_i2info(inode);
719 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
720 struct obd_client_handle **och_p;
723 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
724 RETURN(ERR_PTR(-EPERM));
726 /* Get the openhandle of the file */
728 mutex_lock(&lli->lli_och_mutex);
729 if (fd->fd_lease_och != NULL) {
730 mutex_unlock(&lli->lli_och_mutex);
734 if (fd->fd_och == NULL) {
735 if (file->f_mode & FMODE_WRITE) {
736 LASSERT(lli->lli_mds_write_och != NULL);
737 och_p = &lli->lli_mds_write_och;
738 och_usecount = &lli->lli_open_fd_write_count;
740 LASSERT(lli->lli_mds_read_och != NULL);
741 och_p = &lli->lli_mds_read_och;
742 och_usecount = &lli->lli_open_fd_read_count;
744 if (*och_usecount == 1) {
751 mutex_unlock(&lli->lli_och_mutex);
752 if (rc < 0) /* more than 1 opener */
755 LASSERT(fd->fd_och != NULL);
756 old_handle = fd->fd_och->och_fh;
761 RETURN(ERR_PTR(-ENOMEM));
763 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
764 LUSTRE_OPC_ANY, NULL);
766 GOTO(out, rc = PTR_ERR(op_data));
768 /* To tell the MDT this openhandle is from the same owner */
769 op_data->op_handle = old_handle;
771 it.it_flags = fmode | open_flags;
772 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
773 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
774 &ll_md_blocking_lease_ast,
775 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
776 * it can be cancelled which may mislead applications that the lease is
778 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
779 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
780 * doesn't deal with openhandle, so normal openhandle will be leaked. */
781 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
782 ll_finish_md_op_data(op_data);
783 ptlrpc_req_finished(req);
785 GOTO(out_release_it, rc);
787 if (it_disposition(&it, DISP_LOOKUP_NEG))
788 GOTO(out_release_it, rc = -ENOENT);
790 rc = it_open_error(DISP_OPEN_OPEN, &it);
792 GOTO(out_release_it, rc);
794 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
795 ll_och_fill(sbi->ll_md_exp, &it, och);
797 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
798 GOTO(out_close, rc = -EOPNOTSUPP);
800 /* already get lease, handle lease lock */
801 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
802 if (it.d.lustre.it_lock_mode == 0 ||
803 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
804 /* open lock must return for lease */
805 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
806 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
807 it.d.lustre.it_lock_bits);
808 GOTO(out_close, rc = -EPROTO);
811 ll_intent_release(&it);
815 /* Cancel open lock */
816 if (it.d.lustre.it_lock_mode != 0) {
817 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
818 it.d.lustre.it_lock_mode);
819 it.d.lustre.it_lock_mode = 0;
820 och->och_lease_handle.cookie = 0ULL;
822 rc2 = ll_close_inode_openhandle(inode, och, 0, NULL);
824 CERROR("%s: error closing file "DFID": %d\n",
825 ll_get_fsname(inode->i_sb, NULL, 0),
826 PFID(&ll_i2info(inode)->lli_fid), rc2);
827 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
829 ll_intent_release(&it);
837 * Check whether a layout swap can be done between two inodes.
839 * \param[in] inode1 First inode to check
840 * \param[in] inode2 Second inode to check
842 * \retval 0 on success, layout swap can be performed between both inodes
843 * \retval negative error code if requirements are not met
845 static int ll_check_swap_layouts_validity(struct inode *inode1,
846 struct inode *inode2)
848 if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
851 if (inode_permission(inode1, MAY_WRITE) ||
852 inode_permission(inode2, MAY_WRITE))
855 if (inode1->i_sb != inode2->i_sb)
861 static int ll_swap_layouts_close(struct obd_client_handle *och,
862 struct inode *inode, struct inode *inode2)
864 const struct lu_fid *fid1 = ll_inode2fid(inode);
865 const struct lu_fid *fid2;
869 CDEBUG(D_INODE, "%s: biased close of file "DFID"\n",
870 ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
872 rc = ll_check_swap_layouts_validity(inode, inode2);
874 GOTO(out_free_och, rc);
876 /* We now know that inode2 is a lustre inode */
877 fid2 = ll_inode2fid(inode2);
879 rc = lu_fid_cmp(fid1, fid2);
881 GOTO(out_free_och, rc = -EINVAL);
883 /* Close the file and swap layouts between inode & inode2.
884 * NB: lease lock handle is released in mdc_close_layout_swap_pack()
885 * because we still need it to pack l_remote_handle to MDT. */
886 rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP,
889 och = NULL; /* freed in ll_close_inode_openhandle() */
899 * Release lease and close the file.
900 * It will check if the lease has ever broken.
902 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
905 struct ldlm_lock *lock;
906 bool cancelled = true;
910 lock = ldlm_handle2lock(&och->och_lease_handle);
912 lock_res_and_lock(lock);
913 cancelled = ldlm_is_cancel(lock);
914 unlock_res_and_lock(lock);
918 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
919 PFID(&ll_i2info(inode)->lli_fid), cancelled);
922 ldlm_cli_cancel(&och->och_lease_handle, 0);
923 if (lease_broken != NULL)
924 *lease_broken = cancelled;
926 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
930 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
932 struct ll_inode_info *lli = ll_i2info(inode);
933 struct cl_object *obj = lli->lli_clob;
934 struct cl_attr *attr = vvp_env_thread_attr(env);
942 ll_inode_size_lock(inode);
944 /* merge timestamps the most recently obtained from mds with
945 timestamps obtained from osts */
946 LTIME_S(inode->i_atime) = lli->lli_atime;
947 LTIME_S(inode->i_mtime) = lli->lli_mtime;
948 LTIME_S(inode->i_ctime) = lli->lli_ctime;
950 atime = LTIME_S(inode->i_atime);
951 mtime = LTIME_S(inode->i_mtime);
952 ctime = LTIME_S(inode->i_ctime);
954 cl_object_attr_lock(obj);
955 rc = cl_object_attr_get(env, obj, attr);
956 cl_object_attr_unlock(obj);
959 GOTO(out_size_unlock, rc);
961 if (atime < attr->cat_atime)
962 atime = attr->cat_atime;
964 if (ctime < attr->cat_ctime)
965 ctime = attr->cat_ctime;
967 if (mtime < attr->cat_mtime)
968 mtime = attr->cat_mtime;
970 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
971 PFID(&lli->lli_fid), attr->cat_size);
973 i_size_write(inode, attr->cat_size);
974 inode->i_blocks = attr->cat_blocks;
976 LTIME_S(inode->i_atime) = atime;
977 LTIME_S(inode->i_mtime) = mtime;
978 LTIME_S(inode->i_ctime) = ctime;
981 ll_inode_size_unlock(inode);
986 static bool file_is_noatime(const struct file *file)
988 const struct vfsmount *mnt = file->f_path.mnt;
989 const struct inode *inode = file->f_path.dentry->d_inode;
991 /* Adapted from file_accessed() and touch_atime().*/
992 if (file->f_flags & O_NOATIME)
995 if (inode->i_flags & S_NOATIME)
998 if (IS_NOATIME(inode))
1001 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1004 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1007 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1013 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1015 struct inode *inode = file->f_path.dentry->d_inode;
1017 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1019 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1020 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1021 file->f_flags & O_DIRECT ||
1024 io->ci_obj = ll_i2info(inode)->lli_clob;
1025 io->ci_lockreq = CILR_MAYBE;
1026 if (ll_file_nolock(file)) {
1027 io->ci_lockreq = CILR_NEVER;
1028 io->ci_no_srvlock = 1;
1029 } else if (file->f_flags & O_APPEND) {
1030 io->ci_lockreq = CILR_MANDATORY;
1033 io->ci_noatime = file_is_noatime(file);
1037 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1038 struct file *file, enum cl_io_type iot,
1039 loff_t *ppos, size_t count)
1041 struct vvp_io *vio = vvp_env_io(env);
1042 struct inode *inode = file->f_path.dentry->d_inode;
1043 struct ll_inode_info *lli = ll_i2info(inode);
1044 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1048 struct range_lock range;
1052 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1053 file->f_path.dentry->d_name.name, iot, *ppos, count);
1056 io = vvp_env_thread_io(env);
1057 ll_io_init(io, file, iot == CIT_WRITE);
1059 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1060 bool range_locked = false;
1062 if (file->f_flags & O_APPEND)
1063 range_lock_init(&range, 0, LUSTRE_EOF);
1065 range_lock_init(&range, *ppos, *ppos + count - 1);
1067 vio->vui_fd = LUSTRE_FPRIVATE(file);
1068 vio->vui_io_subtype = args->via_io_subtype;
1070 switch (vio->vui_io_subtype) {
1072 vio->vui_iter = args->u.normal.via_iter;
1073 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1074 vio->vui_tot_nrsegs = vio->vui_iter->nr_segs;
1075 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1076 vio->vui_iocb = args->u.normal.via_iocb;
1077 /* Direct IO reads must also take range lock,
1078 * or multiple reads will try to work on the same pages
1079 * See LU-6227 for details. */
1080 if (((iot == CIT_WRITE) ||
1081 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1082 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1083 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1085 rc = range_lock(&lli->lli_write_tree, &range);
1089 range_locked = true;
1093 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1094 vio->u.splice.vui_flags = args->u.splice.via_flags;
1097 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1101 ll_cl_add(file, env, io);
1102 rc = cl_io_loop(env, io);
1103 ll_cl_remove(file, env);
1106 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1108 range_unlock(&lli->lli_write_tree, &range);
1111 /* cl_io_rw_init() handled IO */
1115 if (io->ci_nob > 0) {
1116 result += io->ci_nob;
1117 count -= io->ci_nob;
1118 *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
1120 /* prepare IO restart */
1121 if (count > 0 && args->via_io_subtype == IO_NORMAL) {
1122 args->u.normal.via_iter = vio->vui_iter;
1123 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1124 args->u.normal.via_iter->nr_segs = vio->vui_tot_nrsegs;
1125 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1130 cl_io_fini(env, io);
1132 if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
1134 "%s: restart %s from %lld, count:%zu, result: %zd\n",
1135 file->f_path.dentry->d_name.name,
1136 iot == CIT_READ ? "read" : "write",
1137 *ppos, count, result);
1141 if (iot == CIT_READ) {
1143 ll_stats_ops_tally(ll_i2sbi(inode),
1144 LPROC_LL_READ_BYTES, result);
1145 } else if (iot == CIT_WRITE) {
1147 ll_stats_ops_tally(ll_i2sbi(inode),
1148 LPROC_LL_WRITE_BYTES, result);
1149 fd->fd_write_failed = false;
1150 } else if (result == 0 && rc == 0) {
1153 fd->fd_write_failed = true;
1155 fd->fd_write_failed = false;
1156 } else if (rc != -ERESTARTSYS) {
1157 fd->fd_write_failed = true;
1161 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1163 return result > 0 ? result : rc;
1167 * Read from a file (through the page cache).
1169 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1171 struct vvp_io_args *args;
1176 env = cl_env_get(&refcheck);
1178 return PTR_ERR(env);
1180 args = ll_env_args(env, IO_NORMAL);
1181 args->u.normal.via_iter = to;
1182 args->u.normal.via_iocb = iocb;
1184 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1185 &iocb->ki_pos, iov_iter_count(to));
1186 cl_env_put(env, &refcheck);
1191 * Write to a file (through the page cache).
1193 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1195 struct vvp_io_args *args;
1200 env = cl_env_get(&refcheck);
1202 return PTR_ERR(env);
1204 args = ll_env_args(env, IO_NORMAL);
1205 args->u.normal.via_iter = from;
1206 args->u.normal.via_iocb = iocb;
1208 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1209 &iocb->ki_pos, iov_iter_count(from));
1210 cl_env_put(env, &refcheck);
1214 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1216 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1218 static int ll_file_get_iov_count(const struct iovec *iov,
1219 unsigned long *nr_segs, size_t *count)
1224 for (seg = 0; seg < *nr_segs; seg++) {
1225 const struct iovec *iv = &iov[seg];
1228 * If any segment has a negative length, or the cumulative
1229 * length ever wraps negative then return -EINVAL.
1232 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1234 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1239 cnt -= iv->iov_len; /* This segment is no good */
1246 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1247 unsigned long nr_segs, loff_t pos)
1249 struct iovec *local_iov;
1250 struct iov_iter *to;
1253 struct lu_env *env = NULL;
1257 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1263 env = cl_env_get(&refcheck);
1265 RETURN(PTR_ERR(env));
1267 local_iov = &ll_env_info(env)->lti_local_iov;
1271 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1272 if (local_iov == NULL)
1275 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1283 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1284 iov_iter_init(to, READ, local_iov, nr_segs, iov_count);
1285 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1286 iov_iter_init(to, local_iov, nr_segs, iov_count, 0);
1287 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1289 result = ll_file_read_iter(iocb, to);
1294 cl_env_put(env, &refcheck);
1296 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1301 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1305 struct iovec iov = { .iov_base = buf, .iov_len = count };
1306 struct kiocb *kiocb;
1311 env = cl_env_get(&refcheck);
1313 RETURN(PTR_ERR(env));
1315 kiocb = &ll_env_info(env)->lti_kiocb;
1316 init_sync_kiocb(kiocb, file);
1317 kiocb->ki_pos = *ppos;
1318 #ifdef HAVE_KIOCB_KI_LEFT
1319 kiocb->ki_left = count;
1320 #elif defined(HAVE_KI_NBYTES)
1321 kiocb->ki_nbytes = count;
1324 result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos);
1325 *ppos = kiocb->ki_pos;
1327 cl_env_put(env, &refcheck);
1332 * Write to a file (through the page cache).
1335 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1336 unsigned long nr_segs, loff_t pos)
1338 struct iovec *local_iov;
1339 struct iov_iter *from;
1342 struct lu_env *env = NULL;
1346 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1351 env = cl_env_get(&refcheck);
1353 RETURN(PTR_ERR(env));
1355 local_iov = &ll_env_info(env)->lti_local_iov;
1358 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1359 if (local_iov == NULL)
1362 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1365 OBD_ALLOC_PTR(from);
1370 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1371 iov_iter_init(from, WRITE, local_iov, nr_segs, iov_count);
1372 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1373 iov_iter_init(from, local_iov, nr_segs, iov_count, 0);
1374 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1376 result = ll_file_write_iter(iocb, from);
1381 cl_env_put(env, &refcheck);
1383 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1388 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1389 size_t count, loff_t *ppos)
1392 struct iovec iov = { .iov_base = (void __user *)buf,
1394 struct kiocb *kiocb;
1399 env = cl_env_get(&refcheck);
1401 RETURN(PTR_ERR(env));
1403 kiocb = &ll_env_info(env)->lti_kiocb;
1404 init_sync_kiocb(kiocb, file);
1405 kiocb->ki_pos = *ppos;
1406 #ifdef HAVE_KIOCB_KI_LEFT
1407 kiocb->ki_left = count;
1408 #elif defined(HAVE_KI_NBYTES)
1409 kiocb->ki_nbytes = count;
1412 result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
1413 *ppos = kiocb->ki_pos;
1415 cl_env_put(env, &refcheck);
1418 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1421 * Send file content (through pagecache) somewhere with helper
1423 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1424 struct pipe_inode_info *pipe, size_t count,
1428 struct vvp_io_args *args;
1433 env = cl_env_get(&refcheck);
1435 RETURN(PTR_ERR(env));
1437 args = ll_env_args(env, IO_SPLICE);
1438 args->u.splice.via_pipe = pipe;
1439 args->u.splice.via_flags = flags;
1441 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1442 cl_env_put(env, &refcheck);
1446 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1447 __u64 flags, struct lov_user_md *lum,
1450 struct lookup_intent oit = {
1452 .it_flags = flags | MDS_OPEN_BY_FID,
1457 ll_inode_size_lock(inode);
1458 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1460 GOTO(out_unlock, rc);
1462 ll_release_openhandle(file->f_path.dentry, &oit);
1465 ll_inode_size_unlock(inode);
1466 ll_intent_release(&oit);
1467 cl_lov_delay_create_clear(&file->f_flags);
1472 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1473 struct lov_mds_md **lmmp, int *lmm_size,
1474 struct ptlrpc_request **request)
1476 struct ll_sb_info *sbi = ll_i2sbi(inode);
1477 struct mdt_body *body;
1478 struct lov_mds_md *lmm = NULL;
1479 struct ptlrpc_request *req = NULL;
1480 struct md_op_data *op_data;
1483 rc = ll_get_default_mdsize(sbi, &lmmsize);
1487 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1488 strlen(filename), lmmsize,
1489 LUSTRE_OPC_ANY, NULL);
1490 if (IS_ERR(op_data))
1491 RETURN(PTR_ERR(op_data));
1493 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1494 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1495 ll_finish_md_op_data(op_data);
1497 CDEBUG(D_INFO, "md_getattr_name failed "
1498 "on %s: rc %d\n", filename, rc);
1502 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1503 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1505 lmmsize = body->mbo_eadatasize;
1507 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1509 GOTO(out, rc = -ENODATA);
1512 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1513 LASSERT(lmm != NULL);
1515 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1516 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1517 GOTO(out, rc = -EPROTO);
1521 * This is coming from the MDS, so is probably in
1522 * little endian. We convert it to host endian before
1523 * passing it to userspace.
1525 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1528 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1529 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1532 /* if function called for directory - we should
1533 * avoid swab not existent lsm objects */
1534 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1535 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1536 if (S_ISREG(body->mbo_mode))
1537 lustre_swab_lov_user_md_objects(
1538 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1540 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1541 lustre_swab_lov_user_md_v3(
1542 (struct lov_user_md_v3 *)lmm);
1543 if (S_ISREG(body->mbo_mode))
1544 lustre_swab_lov_user_md_objects(
1545 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1552 *lmm_size = lmmsize;
1557 static int ll_lov_setea(struct inode *inode, struct file *file,
1560 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1561 struct lov_user_md *lump;
1562 int lum_size = sizeof(struct lov_user_md) +
1563 sizeof(struct lov_user_ost_data);
1567 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1570 OBD_ALLOC_LARGE(lump, lum_size);
1574 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1575 OBD_FREE_LARGE(lump, lum_size);
1579 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1581 OBD_FREE_LARGE(lump, lum_size);
1585 static int ll_file_getstripe(struct inode *inode,
1586 struct lov_user_md __user *lum)
1593 env = cl_env_get(&refcheck);
1595 RETURN(PTR_ERR(env));
1597 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1598 cl_env_put(env, &refcheck);
1602 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1605 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1606 struct lov_user_md *klum;
1608 __u64 flags = FMODE_WRITE;
1611 rc = ll_copy_user_md(lum, &klum);
1616 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1620 put_user(0, &lum->lmm_stripe_count);
1622 ll_layout_refresh(inode, &gen);
1623 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1626 OBD_FREE(klum, lum_size);
1631 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1633 struct ll_inode_info *lli = ll_i2info(inode);
1634 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1635 struct ll_grouplock grouplock;
1640 CWARN("group id for group lock must not be 0\n");
1644 if (ll_file_nolock(file))
1645 RETURN(-EOPNOTSUPP);
1647 spin_lock(&lli->lli_lock);
1648 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1649 CWARN("group lock already existed with gid %lu\n",
1650 fd->fd_grouplock.lg_gid);
1651 spin_unlock(&lli->lli_lock);
1654 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1655 spin_unlock(&lli->lli_lock);
1657 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1658 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1662 spin_lock(&lli->lli_lock);
1663 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1664 spin_unlock(&lli->lli_lock);
1665 CERROR("another thread just won the race\n");
1666 cl_put_grouplock(&grouplock);
1670 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1671 fd->fd_grouplock = grouplock;
1672 spin_unlock(&lli->lli_lock);
1674 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1678 static int ll_put_grouplock(struct inode *inode, struct file *file,
1681 struct ll_inode_info *lli = ll_i2info(inode);
1682 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1683 struct ll_grouplock grouplock;
1686 spin_lock(&lli->lli_lock);
1687 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1688 spin_unlock(&lli->lli_lock);
1689 CWARN("no group lock held\n");
1693 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1695 if (fd->fd_grouplock.lg_gid != arg) {
1696 CWARN("group lock %lu doesn't match current id %lu\n",
1697 arg, fd->fd_grouplock.lg_gid);
1698 spin_unlock(&lli->lli_lock);
1702 grouplock = fd->fd_grouplock;
1703 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1704 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1705 spin_unlock(&lli->lli_lock);
1707 cl_put_grouplock(&grouplock);
1708 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1713 * Close inode open handle
1715 * \param dentry [in] dentry which contains the inode
1716 * \param it [in,out] intent which contains open info and result
1719 * \retval <0 failure
1721 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1723 struct inode *inode = dentry->d_inode;
1724 struct obd_client_handle *och;
1730 /* Root ? Do nothing. */
1731 if (dentry->d_inode->i_sb->s_root == dentry)
1734 /* No open handle to close? Move away */
1735 if (!it_disposition(it, DISP_OPEN_OPEN))
1738 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1740 OBD_ALLOC(och, sizeof(*och));
1742 GOTO(out, rc = -ENOMEM);
1744 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1746 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
1748 /* this one is in place of ll_file_open */
1749 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1750 ptlrpc_req_finished(it->d.lustre.it_data);
1751 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1757 * Get size for inode for which FIEMAP mapping is requested.
1758 * Make the FIEMAP get_info call and returns the result.
1759 * \param fiemap kernel buffer to hold extens
1760 * \param num_bytes kernel buffer size
1762 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1768 struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
1771 /* Checks for fiemap flags */
1772 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1773 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1777 /* Check for FIEMAP_FLAG_SYNC */
1778 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1779 rc = filemap_fdatawrite(inode->i_mapping);
1784 env = cl_env_get(&refcheck);
1786 RETURN(PTR_ERR(env));
1788 if (i_size_read(inode) == 0) {
1789 rc = ll_glimpse_size(inode);
1794 fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1795 obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
1796 obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
1798 /* If filesize is 0, then there would be no objects for mapping */
1799 if (fmkey.lfik_oa.o_size == 0) {
1800 fiemap->fm_mapped_extents = 0;
1804 fmkey.lfik_fiemap = *fiemap;
1806 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1807 &fmkey, fiemap, &num_bytes);
1809 cl_env_put(env, &refcheck);
1813 int ll_fid2path(struct inode *inode, void __user *arg)
1815 struct obd_export *exp = ll_i2mdexp(inode);
1816 const struct getinfo_fid2path __user *gfin = arg;
1818 struct getinfo_fid2path *gfout;
1824 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1825 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1828 /* Only need to get the buflen */
1829 if (get_user(pathlen, &gfin->gf_pathlen))
1832 if (pathlen > PATH_MAX)
1835 outsize = sizeof(*gfout) + pathlen;
1836 OBD_ALLOC(gfout, outsize);
1840 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1841 GOTO(gf_free, rc = -EFAULT);
1843 /* Call mdc_iocontrol */
1844 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1848 if (copy_to_user(arg, gfout, outsize))
1852 OBD_FREE(gfout, outsize);
1857 * Read the data_version for inode.
1859 * This value is computed using stripe object version on OST.
1860 * Version is computed using server side locking.
1862 * @param flags if do sync on the OST side;
1864 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1865 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1867 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1869 struct cl_object *obj = ll_i2info(inode)->lli_clob;
1877 /* If no file object initialized, we consider its version is 0. */
1883 env = cl_env_get(&refcheck);
1885 RETURN(PTR_ERR(env));
1887 io = vvp_env_thread_io(env);
1889 io->u.ci_data_version.dv_data_version = 0;
1890 io->u.ci_data_version.dv_flags = flags;
1893 if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
1894 result = cl_io_loop(env, io);
1896 result = io->ci_result;
1898 *data_version = io->u.ci_data_version.dv_data_version;
1900 cl_io_fini(env, io);
1902 if (unlikely(io->ci_need_restart))
1905 cl_env_put(env, &refcheck);
1911 * Trigger a HSM release request for the provided inode.
1913 int ll_hsm_release(struct inode *inode)
1915 struct cl_env_nest nest;
1917 struct obd_client_handle *och = NULL;
1918 __u64 data_version = 0;
1922 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1923 ll_get_fsname(inode->i_sb, NULL, 0),
1924 PFID(&ll_i2info(inode)->lli_fid));
1926 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1928 GOTO(out, rc = PTR_ERR(och));
1930 /* Grab latest data_version and [am]time values */
1931 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1935 env = cl_env_nested_get(&nest);
1937 GOTO(out, rc = PTR_ERR(env));
1939 ll_merge_attr(env, inode);
1940 cl_env_nested_put(&nest, env);
1942 /* Release the file.
1943 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1944 * we still need it to pack l_remote_handle to MDT. */
1945 rc = ll_close_inode_openhandle(inode, och, MDS_HSM_RELEASE,
1951 if (och != NULL && !IS_ERR(och)) /* close the file */
1952 ll_lease_close(och, inode, NULL);
1957 struct ll_swap_stack {
1960 struct inode *inode1;
1961 struct inode *inode2;
1966 static int ll_swap_layouts(struct file *file1, struct file *file2,
1967 struct lustre_swap_layouts *lsl)
1969 struct mdc_swap_layouts msl;
1970 struct md_op_data *op_data;
1973 struct ll_swap_stack *llss = NULL;
1976 OBD_ALLOC_PTR(llss);
1980 llss->inode1 = file1->f_path.dentry->d_inode;
1981 llss->inode2 = file2->f_path.dentry->d_inode;
1983 rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
1987 /* we use 2 bool because it is easier to swap than 2 bits */
1988 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1989 llss->check_dv1 = true;
1991 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1992 llss->check_dv2 = true;
1994 /* we cannot use lsl->sl_dvX directly because we may swap them */
1995 llss->dv1 = lsl->sl_dv1;
1996 llss->dv2 = lsl->sl_dv2;
1998 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1999 if (rc == 0) /* same file, done! */
2002 if (rc < 0) { /* sequentialize it */
2003 swap(llss->inode1, llss->inode2);
2005 swap(llss->dv1, llss->dv2);
2006 swap(llss->check_dv1, llss->check_dv2);
2010 if (gid != 0) { /* application asks to flush dirty cache */
2011 rc = ll_get_grouplock(llss->inode1, file1, gid);
2015 rc = ll_get_grouplock(llss->inode2, file2, gid);
2017 ll_put_grouplock(llss->inode1, file1, gid);
2022 /* ultimate check, before swaping the layouts we check if
2023 * dataversion has changed (if requested) */
2024 if (llss->check_dv1) {
2025 rc = ll_data_version(llss->inode1, &dv, 0);
2028 if (dv != llss->dv1)
2029 GOTO(putgl, rc = -EAGAIN);
2032 if (llss->check_dv2) {
2033 rc = ll_data_version(llss->inode2, &dv, 0);
2036 if (dv != llss->dv2)
2037 GOTO(putgl, rc = -EAGAIN);
2040 /* struct md_op_data is used to send the swap args to the mdt
2041 * only flags is missing, so we use struct mdc_swap_layouts
2042 * through the md_op_data->op_data */
2043 /* flags from user space have to be converted before they are send to
2044 * server, no flag is sent today, they are only used on the client */
2047 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2048 0, LUSTRE_OPC_ANY, &msl);
2049 if (IS_ERR(op_data))
2050 GOTO(free, rc = PTR_ERR(op_data));
2052 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2053 sizeof(*op_data), op_data, NULL);
2054 ll_finish_md_op_data(op_data);
2061 ll_put_grouplock(llss->inode2, file2, gid);
2062 ll_put_grouplock(llss->inode1, file1, gid);
2072 int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2074 struct md_op_data *op_data;
2078 /* Detect out-of range masks */
2079 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2082 /* Non-root users are forbidden to set or clear flags which are
2083 * NOT defined in HSM_USER_MASK. */
2084 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2085 !cfs_capable(CFS_CAP_SYS_ADMIN))
2088 /* Detect out-of range archive id */
2089 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2090 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2093 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2094 LUSTRE_OPC_ANY, hss);
2095 if (IS_ERR(op_data))
2096 RETURN(PTR_ERR(op_data));
2098 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2099 sizeof(*op_data), op_data, NULL);
2101 ll_finish_md_op_data(op_data);
2106 static int ll_hsm_import(struct inode *inode, struct file *file,
2107 struct hsm_user_import *hui)
2109 struct hsm_state_set *hss = NULL;
2110 struct iattr *attr = NULL;
2114 if (!S_ISREG(inode->i_mode))
2120 GOTO(out, rc = -ENOMEM);
2122 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2123 hss->hss_archive_id = hui->hui_archive_id;
2124 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2125 rc = ll_hsm_state_set(inode, hss);
2129 OBD_ALLOC_PTR(attr);
2131 GOTO(out, rc = -ENOMEM);
2133 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2134 attr->ia_mode |= S_IFREG;
2135 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2136 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2137 attr->ia_size = hui->hui_size;
2138 attr->ia_mtime.tv_sec = hui->hui_mtime;
2139 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2140 attr->ia_atime.tv_sec = hui->hui_atime;
2141 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2143 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2144 ATTR_UID | ATTR_GID |
2145 ATTR_MTIME | ATTR_MTIME_SET |
2146 ATTR_ATIME | ATTR_ATIME_SET;
2148 mutex_lock(&inode->i_mutex);
2150 rc = ll_setattr_raw(file->f_path.dentry, attr, true);
2154 mutex_unlock(&inode->i_mutex);
2166 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2168 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2169 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2172 static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
2174 struct inode *inode = file->f_path.dentry->d_inode;
2176 .ia_valid = ATTR_ATIME | ATTR_ATIME_SET |
2177 ATTR_MTIME | ATTR_MTIME_SET |
2178 ATTR_CTIME | ATTR_CTIME_SET,
2180 .tv_sec = lfu->lfu_atime_sec,
2181 .tv_nsec = lfu->lfu_atime_nsec,
2184 .tv_sec = lfu->lfu_mtime_sec,
2185 .tv_nsec = lfu->lfu_mtime_nsec,
2188 .tv_sec = lfu->lfu_ctime_sec,
2189 .tv_nsec = lfu->lfu_ctime_nsec,
2195 if (!capable(CAP_SYS_ADMIN))
2198 if (!S_ISREG(inode->i_mode))
2201 mutex_lock(&inode->i_mutex);
2202 rc = ll_setattr_raw(file->f_path.dentry, &ia, false);
2203 mutex_unlock(&inode->i_mutex);
2209 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2211 struct inode *inode = file->f_path.dentry->d_inode;
2212 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2216 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2217 PFID(ll_inode2fid(inode)), inode, cmd);
2218 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2220 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2221 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2225 case LL_IOC_GETFLAGS:
2226 /* Get the current value of the file flags */
2227 return put_user(fd->fd_flags, (int __user *)arg);
2228 case LL_IOC_SETFLAGS:
2229 case LL_IOC_CLRFLAGS:
2230 /* Set or clear specific file flags */
2231 /* XXX This probably needs checks to ensure the flags are
2232 * not abused, and to handle any flag side effects.
2234 if (get_user(flags, (int __user *) arg))
2237 if (cmd == LL_IOC_SETFLAGS) {
2238 if ((flags & LL_FILE_IGNORE_LOCK) &&
2239 !(file->f_flags & O_DIRECT)) {
2240 CERROR("%s: unable to disable locking on "
2241 "non-O_DIRECT file\n", current->comm);
2245 fd->fd_flags |= flags;
2247 fd->fd_flags &= ~flags;
2250 case LL_IOC_LOV_SETSTRIPE:
2251 RETURN(ll_lov_setstripe(inode, file, arg));
2252 case LL_IOC_LOV_SETEA:
2253 RETURN(ll_lov_setea(inode, file, arg));
2254 case LL_IOC_LOV_SWAP_LAYOUTS: {
2256 struct lustre_swap_layouts lsl;
2258 if (copy_from_user(&lsl, (char __user *)arg,
2259 sizeof(struct lustre_swap_layouts)))
2262 if ((file->f_flags & O_ACCMODE) == O_RDONLY)
2265 file2 = fget(lsl.sl_fd);
2269 /* O_WRONLY or O_RDWR */
2270 if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
2271 GOTO(out, rc = -EPERM);
2273 if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
2274 struct inode *inode2;
2275 struct ll_inode_info *lli;
2276 struct obd_client_handle *och = NULL;
2278 if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
2279 GOTO(out, rc = -EINVAL);
2281 lli = ll_i2info(inode);
2282 mutex_lock(&lli->lli_och_mutex);
2283 if (fd->fd_lease_och != NULL) {
2284 och = fd->fd_lease_och;
2285 fd->fd_lease_och = NULL;
2287 mutex_unlock(&lli->lli_och_mutex);
2289 GOTO(out, rc = -ENOLCK);
2290 inode2 = file2->f_path.dentry->d_inode;
2291 rc = ll_swap_layouts_close(och, inode, inode2);
2293 rc = ll_swap_layouts(file, file2, &lsl);
2299 case LL_IOC_LOV_GETSTRIPE:
2300 RETURN(ll_file_getstripe(inode,
2301 (struct lov_user_md __user *)arg));
2302 case FSFILT_IOC_GETFLAGS:
2303 case FSFILT_IOC_SETFLAGS:
2304 RETURN(ll_iocontrol(inode, file, cmd, arg));
2305 case FSFILT_IOC_GETVERSION_OLD:
2306 case FSFILT_IOC_GETVERSION:
2307 RETURN(put_user(inode->i_generation, (int __user *)arg));
2308 case LL_IOC_GROUP_LOCK:
2309 RETURN(ll_get_grouplock(inode, file, arg));
2310 case LL_IOC_GROUP_UNLOCK:
2311 RETURN(ll_put_grouplock(inode, file, arg));
2312 case IOC_OBD_STATFS:
2313 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2315 /* We need to special case any other ioctls we want to handle,
2316 * to send them to the MDS/OST as appropriate and to properly
2317 * network encode the arg field.
2318 case FSFILT_IOC_SETVERSION_OLD:
2319 case FSFILT_IOC_SETVERSION:
2321 case LL_IOC_FLUSHCTX:
2322 RETURN(ll_flush_ctx(inode));
2323 case LL_IOC_PATH2FID: {
2324 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2325 sizeof(struct lu_fid)))
2330 case LL_IOC_GETPARENT:
2331 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2333 case OBD_IOC_FID2PATH:
2334 RETURN(ll_fid2path(inode, (void __user *)arg));
2335 case LL_IOC_DATA_VERSION: {
2336 struct ioc_data_version idv;
2339 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2342 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2343 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2346 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2352 case LL_IOC_GET_MDTIDX: {
2355 mdtidx = ll_get_mdt_idx(inode);
2359 if (put_user((int)mdtidx, (int __user *)arg))
2364 case OBD_IOC_GETDTNAME:
2365 case OBD_IOC_GETMDNAME:
2366 RETURN(ll_get_obd_name(inode, cmd, arg));
2367 case LL_IOC_HSM_STATE_GET: {
2368 struct md_op_data *op_data;
2369 struct hsm_user_state *hus;
2376 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2377 LUSTRE_OPC_ANY, hus);
2378 if (IS_ERR(op_data)) {
2380 RETURN(PTR_ERR(op_data));
2383 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2386 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2389 ll_finish_md_op_data(op_data);
2393 case LL_IOC_HSM_STATE_SET: {
2394 struct hsm_state_set *hss;
2401 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2406 rc = ll_hsm_state_set(inode, hss);
2411 case LL_IOC_HSM_ACTION: {
2412 struct md_op_data *op_data;
2413 struct hsm_current_action *hca;
2420 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2421 LUSTRE_OPC_ANY, hca);
2422 if (IS_ERR(op_data)) {
2424 RETURN(PTR_ERR(op_data));
2427 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2430 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2433 ll_finish_md_op_data(op_data);
2437 case LL_IOC_SET_LEASE: {
2438 struct ll_inode_info *lli = ll_i2info(inode);
2439 struct obd_client_handle *och = NULL;
2444 case LL_LEASE_WRLCK:
2445 if (!(file->f_mode & FMODE_WRITE))
2447 fmode = FMODE_WRITE;
2449 case LL_LEASE_RDLCK:
2450 if (!(file->f_mode & FMODE_READ))
2454 case LL_LEASE_UNLCK:
2455 mutex_lock(&lli->lli_och_mutex);
2456 if (fd->fd_lease_och != NULL) {
2457 och = fd->fd_lease_och;
2458 fd->fd_lease_och = NULL;
2460 mutex_unlock(&lli->lli_och_mutex);
2465 fmode = och->och_flags;
2466 rc = ll_lease_close(och, inode, &lease_broken);
2473 RETURN(ll_lease_type_from_fmode(fmode));
2478 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2480 /* apply for lease */
2481 och = ll_lease_open(inode, file, fmode, 0);
2483 RETURN(PTR_ERR(och));
2486 mutex_lock(&lli->lli_och_mutex);
2487 if (fd->fd_lease_och == NULL) {
2488 fd->fd_lease_och = och;
2491 mutex_unlock(&lli->lli_och_mutex);
2493 /* impossible now that only excl is supported for now */
2494 ll_lease_close(och, inode, &lease_broken);
2499 case LL_IOC_GET_LEASE: {
2500 struct ll_inode_info *lli = ll_i2info(inode);
2501 struct ldlm_lock *lock = NULL;
2504 mutex_lock(&lli->lli_och_mutex);
2505 if (fd->fd_lease_och != NULL) {
2506 struct obd_client_handle *och = fd->fd_lease_och;
2508 lock = ldlm_handle2lock(&och->och_lease_handle);
2510 lock_res_and_lock(lock);
2511 if (!ldlm_is_cancel(lock))
2512 fmode = och->och_flags;
2514 unlock_res_and_lock(lock);
2515 LDLM_LOCK_PUT(lock);
2518 mutex_unlock(&lli->lli_och_mutex);
2520 RETURN(ll_lease_type_from_fmode(fmode));
2522 case LL_IOC_HSM_IMPORT: {
2523 struct hsm_user_import *hui;
2529 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2534 rc = ll_hsm_import(inode, file, hui);
2539 case LL_IOC_FUTIMES_3: {
2540 struct ll_futimes_3 lfu;
2542 if (copy_from_user(&lfu,
2543 (const struct ll_futimes_3 __user *)arg,
2547 RETURN(ll_file_futimes_3(file, &lfu));
2553 ll_iocontrol_call(inode, file, cmd, arg, &err))
2556 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2557 (void __user *)arg));
2562 #ifndef HAVE_FILE_LLSEEK_SIZE
2563 static inline loff_t
2564 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2566 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2568 if (offset > maxsize)
2571 if (offset != file->f_pos) {
2572 file->f_pos = offset;
2573 file->f_version = 0;
2579 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2580 loff_t maxsize, loff_t eof)
2582 struct inode *inode = file->f_path.dentry->d_inode;
2590 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2591 * position-querying operation. Avoid rewriting the "same"
2592 * f_pos value back to the file because a concurrent read(),
2593 * write() or lseek() might have altered it
2598 * f_lock protects against read/modify/write race with other
2599 * SEEK_CURs. Note that parallel writes and reads behave
2602 mutex_lock(&inode->i_mutex);
2603 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2604 mutex_unlock(&inode->i_mutex);
2608 * In the generic case the entire file is data, so as long as
2609 * offset isn't at the end of the file then the offset is data.
2616 * There is a virtual hole at the end of the file, so as long as
2617 * offset isn't i_size or larger, return i_size.
2625 return llseek_execute(file, offset, maxsize);
2629 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2631 struct inode *inode = file->f_path.dentry->d_inode;
2632 loff_t retval, eof = 0;
2635 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2636 (origin == SEEK_CUR) ? file->f_pos : 0);
2637 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2638 PFID(ll_inode2fid(inode)), inode, retval, retval,
2640 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2642 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2643 retval = ll_glimpse_size(inode);
2646 eof = i_size_read(inode);
2649 retval = ll_generic_file_llseek_size(file, offset, origin,
2650 ll_file_maxbytes(inode), eof);
2654 static int ll_flush(struct file *file, fl_owner_t id)
2656 struct inode *inode = file->f_path.dentry->d_inode;
2657 struct ll_inode_info *lli = ll_i2info(inode);
2658 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2661 LASSERT(!S_ISDIR(inode->i_mode));
2663 /* catch async errors that were recorded back when async writeback
2664 * failed for pages in this mapping. */
2665 rc = lli->lli_async_rc;
2666 lli->lli_async_rc = 0;
2667 if (lli->lli_clob != NULL) {
2668 err = lov_read_and_clear_async_rc(lli->lli_clob);
2673 /* The application has been told write failure already.
2674 * Do not report failure again. */
2675 if (fd->fd_write_failed)
2677 return rc ? -EIO : 0;
2681 * Called to make sure a portion of file has been written out.
2682 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2684 * Return how many pages have been written.
2686 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2687 enum cl_fsync_mode mode, int ignore_layout)
2689 struct cl_env_nest nest;
2692 struct cl_fsync_io *fio;
2696 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2697 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2700 env = cl_env_nested_get(&nest);
2702 RETURN(PTR_ERR(env));
2704 io = vvp_env_thread_io(env);
2705 io->ci_obj = ll_i2info(inode)->lli_clob;
2706 io->ci_ignore_layout = ignore_layout;
2708 /* initialize parameters for sync */
2709 fio = &io->u.ci_fsync;
2710 fio->fi_start = start;
2712 fio->fi_fid = ll_inode2fid(inode);
2713 fio->fi_mode = mode;
2714 fio->fi_nr_written = 0;
2716 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2717 result = cl_io_loop(env, io);
2719 result = io->ci_result;
2721 result = fio->fi_nr_written;
2722 cl_io_fini(env, io);
2723 cl_env_nested_put(&nest, env);
2729 * When dentry is provided (the 'else' case), *file->f_path.dentry may be
2730 * null and dentry must be used directly rather than pulled from
2731 * *file->f_path.dentry as is done otherwise.
2734 #ifdef HAVE_FILE_FSYNC_4ARGS
2735 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2737 struct dentry *dentry = file->f_path.dentry;
2738 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2739 int ll_fsync(struct file *file, int datasync)
2741 struct dentry *dentry = file->f_path.dentry;
2743 loff_t end = LLONG_MAX;
2745 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2748 loff_t end = LLONG_MAX;
2750 struct inode *inode = dentry->d_inode;
2751 struct ll_inode_info *lli = ll_i2info(inode);
2752 struct ptlrpc_request *req;
2756 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2757 PFID(ll_inode2fid(inode)), inode);
2758 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2760 #ifdef HAVE_FILE_FSYNC_4ARGS
2761 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2762 mutex_lock(&inode->i_mutex);
2764 /* fsync's caller has already called _fdata{sync,write}, we want
2765 * that IO to finish before calling the osc and mdc sync methods */
2766 rc = filemap_fdatawait(inode->i_mapping);
2769 /* catch async errors that were recorded back when async writeback
2770 * failed for pages in this mapping. */
2771 if (!S_ISDIR(inode->i_mode)) {
2772 err = lli->lli_async_rc;
2773 lli->lli_async_rc = 0;
2776 err = lov_read_and_clear_async_rc(lli->lli_clob);
2781 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
2785 ptlrpc_req_finished(req);
2787 if (S_ISREG(inode->i_mode)) {
2788 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2790 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2791 if (rc == 0 && err < 0)
2794 fd->fd_write_failed = true;
2796 fd->fd_write_failed = false;
2799 #ifdef HAVE_FILE_FSYNC_4ARGS
2800 mutex_unlock(&inode->i_mutex);
2806 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2808 struct inode *inode = file->f_path.dentry->d_inode;
2809 struct ll_sb_info *sbi = ll_i2sbi(inode);
2810 struct ldlm_enqueue_info einfo = {
2811 .ei_type = LDLM_FLOCK,
2812 .ei_cb_cp = ldlm_flock_completion_ast,
2813 .ei_cbdata = file_lock,
2815 struct md_op_data *op_data;
2816 struct lustre_handle lockh = { 0 };
2817 union ldlm_policy_data flock = { { 0 } };
2818 int fl_type = file_lock->fl_type;
2824 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2825 PFID(ll_inode2fid(inode)), file_lock);
2827 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2829 if (file_lock->fl_flags & FL_FLOCK) {
2830 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2831 /* flocks are whole-file locks */
2832 flock.l_flock.end = OFFSET_MAX;
2833 /* For flocks owner is determined by the local file desctiptor*/
2834 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2835 } else if (file_lock->fl_flags & FL_POSIX) {
2836 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2837 flock.l_flock.start = file_lock->fl_start;
2838 flock.l_flock.end = file_lock->fl_end;
2842 flock.l_flock.pid = file_lock->fl_pid;
2844 /* Somewhat ugly workaround for svc lockd.
2845 * lockd installs custom fl_lmops->lm_compare_owner that checks
2846 * for the fl_owner to be the same (which it always is on local node
2847 * I guess between lockd processes) and then compares pid.
2848 * As such we assign pid to the owner field to make it all work,
2849 * conflict with normal locks is unlikely since pid space and
2850 * pointer space for current->files are not intersecting */
2851 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2852 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2856 einfo.ei_mode = LCK_PR;
2859 /* An unlock request may or may not have any relation to
2860 * existing locks so we may not be able to pass a lock handle
2861 * via a normal ldlm_lock_cancel() request. The request may even
2862 * unlock a byte range in the middle of an existing lock. In
2863 * order to process an unlock request we need all of the same
2864 * information that is given with a normal read or write record
2865 * lock request. To avoid creating another ldlm unlock (cancel)
2866 * message we'll treat a LCK_NL flock request as an unlock. */
2867 einfo.ei_mode = LCK_NL;
2870 einfo.ei_mode = LCK_PW;
2873 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2888 flags = LDLM_FL_BLOCK_NOWAIT;
2894 flags = LDLM_FL_TEST_LOCK;
2897 CERROR("unknown fcntl lock command: %d\n", cmd);
2901 /* Save the old mode so that if the mode in the lock changes we
2902 * can decrement the appropriate reader or writer refcount. */
2903 file_lock->fl_type = einfo.ei_mode;
2905 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2906 LUSTRE_OPC_ANY, NULL);
2907 if (IS_ERR(op_data))
2908 RETURN(PTR_ERR(op_data));
2910 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2911 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2912 flock.l_flock.pid, flags, einfo.ei_mode,
2913 flock.l_flock.start, flock.l_flock.end);
2915 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2918 /* Restore the file lock type if not TEST lock. */
2919 if (!(flags & LDLM_FL_TEST_LOCK))
2920 file_lock->fl_type = fl_type;
2922 if ((file_lock->fl_flags & FL_FLOCK) &&
2923 (rc == 0 || file_lock->fl_type == F_UNLCK))
2924 rc2 = flock_lock_file_wait(file, file_lock);
2925 if ((file_lock->fl_flags & FL_POSIX) &&
2926 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2927 !(flags & LDLM_FL_TEST_LOCK))
2928 rc2 = posix_lock_file_wait(file, file_lock);
2930 if (rc2 && file_lock->fl_type != F_UNLCK) {
2931 einfo.ei_mode = LCK_NL;
2932 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2937 ll_finish_md_op_data(op_data);
2942 int ll_get_fid_by_name(struct inode *parent, const char *name,
2943 int namelen, struct lu_fid *fid,
2944 struct inode **inode)
2946 struct md_op_data *op_data = NULL;
2947 struct mdt_body *body;
2948 struct ptlrpc_request *req;
2952 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2953 LUSTRE_OPC_ANY, NULL);
2954 if (IS_ERR(op_data))
2955 RETURN(PTR_ERR(op_data));
2957 op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
2958 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2959 ll_finish_md_op_data(op_data);
2963 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2965 GOTO(out_req, rc = -EFAULT);
2967 *fid = body->mbo_fid1;
2970 rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
2972 ptlrpc_req_finished(req);
2976 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2977 const char *name, int namelen)
2979 struct dentry *dchild = NULL;
2980 struct inode *child_inode = NULL;
2981 struct md_op_data *op_data;
2982 struct ptlrpc_request *request = NULL;
2983 struct obd_client_handle *och = NULL;
2985 struct mdt_body *body;
2987 __u64 data_version = 0;
2990 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2991 name, PFID(ll_inode2fid(parent)), mdtidx);
2993 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2994 0, LUSTRE_OPC_ANY, NULL);
2995 if (IS_ERR(op_data))
2996 RETURN(PTR_ERR(op_data));
2998 /* Get child FID first */
2999 qstr.hash = full_name_hash(name, namelen);
3002 dchild = d_lookup(file->f_path.dentry, &qstr);
3003 if (dchild != NULL) {
3004 if (dchild->d_inode != NULL)
3005 child_inode = igrab(dchild->d_inode);
3009 if (child_inode == NULL) {
3010 rc = ll_get_fid_by_name(parent, name, namelen,
3011 &op_data->op_fid3, &child_inode);
3016 if (child_inode == NULL)
3017 GOTO(out_free, rc = -EINVAL);
3019 mutex_lock(&child_inode->i_mutex);
3020 op_data->op_fid3 = *ll_inode2fid(child_inode);
3021 if (!fid_is_sane(&op_data->op_fid3)) {
3022 CERROR("%s: migrate %s, but FID "DFID" is insane\n",
3023 ll_get_fsname(parent->i_sb, NULL, 0), name,
3024 PFID(&op_data->op_fid3));
3025 GOTO(out_unlock, rc = -EINVAL);
3028 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
3030 GOTO(out_unlock, rc);
3033 CDEBUG(D_INFO, "%s: "DFID" is already on MDT%04x\n", name,
3034 PFID(&op_data->op_fid3), mdtidx);
3035 GOTO(out_unlock, rc = 0);
3038 if (S_ISREG(child_inode->i_mode)) {
3039 och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
3043 GOTO(out_unlock, rc);
3046 rc = ll_data_version(child_inode, &data_version,
3049 GOTO(out_close, rc);
3051 op_data->op_handle = och->och_fh;
3052 op_data->op_data = och->och_mod;
3053 op_data->op_data_version = data_version;
3054 op_data->op_lease_handle = och->och_lease_handle;
3055 op_data->op_bias |= MDS_RENAME_MIGRATE;
3058 op_data->op_mds = mdtidx;
3059 op_data->op_cli_flags = CLI_MIGRATE;
3060 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
3061 namelen, name, namelen, &request);
3063 ll_update_times(request, parent);
3065 if (request != NULL) {
3066 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
3068 ptlrpc_req_finished(request);
3069 GOTO(out_close, rc = -EPROTO);
3072 /* If the server does release layout lock, then we cleanup
3073 * the client och here, otherwise release it in out_close: */
3075 body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
3076 obd_mod_put(och->och_mod);
3077 md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp,
3079 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
3083 ptlrpc_req_finished(request);
3086 /* Try again if the file layout has changed. */
3087 if (rc == -EAGAIN && S_ISREG(child_inode->i_mode)) {
3092 if (och != NULL) /* close the file */
3093 ll_lease_close(och, child_inode, NULL);
3095 clear_nlink(child_inode);
3097 mutex_unlock(&child_inode->i_mutex);
3100 ll_finish_md_op_data(op_data);
3105 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3113 * test if some locks matching bits and l_req_mode are acquired
3114 * - bits can be in different locks
3115 * - if found clear the common lock bits in *bits
3116 * - the bits not found, are kept in *bits
3118 * \param bits [IN] searched lock bits [IN]
3119 * \param l_req_mode [IN] searched lock mode
3120 * \retval boolean, true iff all bits are found
3122 int ll_have_md_lock(struct inode *inode, __u64 *bits, enum ldlm_mode l_req_mode)
3124 struct lustre_handle lockh;
3125 union ldlm_policy_data policy;
3126 enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
3127 (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
3136 fid = &ll_i2info(inode)->lli_fid;
3137 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3138 ldlm_lockname[mode]);
3140 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3141 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3142 policy.l_inodebits.bits = *bits & (1 << i);
3143 if (policy.l_inodebits.bits == 0)
3146 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3147 &policy, mode, &lockh)) {
3148 struct ldlm_lock *lock;
3150 lock = ldlm_handle2lock(&lockh);
3153 ~(lock->l_policy_data.l_inodebits.bits);
3154 LDLM_LOCK_PUT(lock);
3156 *bits &= ~policy.l_inodebits.bits;
3163 enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
3164 struct lustre_handle *lockh, __u64 flags,
3165 enum ldlm_mode mode)
3167 union ldlm_policy_data policy = { .l_inodebits = { bits } };
3172 fid = &ll_i2info(inode)->lli_fid;
3173 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3175 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3176 fid, LDLM_IBITS, &policy, mode, lockh);
3181 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3183 /* Already unlinked. Just update nlink and return success */
3184 if (rc == -ENOENT) {
3186 /* If it is striped directory, and there is bad stripe
3187 * Let's revalidate the dentry again, instead of returning
3189 if (S_ISDIR(inode->i_mode) &&
3190 ll_i2info(inode)->lli_lsm_md != NULL)
3193 /* This path cannot be hit for regular files unless in
3194 * case of obscure races, so no need to to validate
3196 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3198 } else if (rc != 0) {
3199 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3200 "%s: revalidate FID "DFID" error: rc = %d\n",
3201 ll_get_fsname(inode->i_sb, NULL, 0),
3202 PFID(ll_inode2fid(inode)), rc);
3208 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3210 struct inode *inode = dentry->d_inode;
3211 struct ptlrpc_request *req = NULL;
3212 struct obd_export *exp;
3216 LASSERT(inode != NULL);
3218 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3219 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3221 exp = ll_i2mdexp(inode);
3223 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3224 * But under CMD case, it caused some lock issues, should be fixed
3225 * with new CMD ibits lock. See bug 12718 */
3226 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3227 struct lookup_intent oit = { .it_op = IT_GETATTR };
3228 struct md_op_data *op_data;
3230 if (ibits == MDS_INODELOCK_LOOKUP)
3231 oit.it_op = IT_LOOKUP;
3233 /* Call getattr by fid, so do not provide name at all. */
3234 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3235 dentry->d_inode, NULL, 0, 0,
3236 LUSTRE_OPC_ANY, NULL);
3237 if (IS_ERR(op_data))
3238 RETURN(PTR_ERR(op_data));
3240 rc = md_intent_lock(exp, op_data, &oit, &req,
3241 &ll_md_blocking_ast, 0);
3242 ll_finish_md_op_data(op_data);
3244 rc = ll_inode_revalidate_fini(inode, rc);
3248 rc = ll_revalidate_it_finish(req, &oit, dentry);
3250 ll_intent_release(&oit);
3254 /* Unlinked? Unhash dentry, so it is not picked up later by
3255 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3256 here to preserve get_cwd functionality on 2.6.
3258 if (!dentry->d_inode->i_nlink)
3259 d_lustre_invalidate(dentry, 0);
3261 ll_lookup_finish_locks(&oit, dentry);
3262 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3263 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3264 u64 valid = OBD_MD_FLGETATTR;
3265 struct md_op_data *op_data;
3268 if (S_ISREG(inode->i_mode)) {
3269 rc = ll_get_default_mdsize(sbi, &ealen);
3272 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3275 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3276 0, ealen, LUSTRE_OPC_ANY,
3278 if (IS_ERR(op_data))
3279 RETURN(PTR_ERR(op_data));
3281 op_data->op_valid = valid;
3282 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3283 ll_finish_md_op_data(op_data);
3285 rc = ll_inode_revalidate_fini(inode, rc);
3289 rc = ll_prep_inode(&inode, req, NULL, NULL);
3292 ptlrpc_req_finished(req);
3296 static int ll_merge_md_attr(struct inode *inode)
3298 struct cl_attr attr = { 0 };
3301 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3302 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3303 &attr, ll_md_blocking_ast);
3307 set_nlink(inode, attr.cat_nlink);
3308 inode->i_blocks = attr.cat_blocks;
3309 i_size_write(inode, attr.cat_size);
3311 ll_i2info(inode)->lli_atime = attr.cat_atime;
3312 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3313 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3319 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3321 struct inode *inode = dentry->d_inode;
3325 rc = __ll_inode_revalidate(dentry, ibits);
3329 /* if object isn't regular file, don't validate size */
3330 if (!S_ISREG(inode->i_mode)) {
3331 if (S_ISDIR(inode->i_mode) &&
3332 ll_i2info(inode)->lli_lsm_md != NULL) {
3333 rc = ll_merge_md_attr(inode);
3338 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3339 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3340 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3342 /* In case of restore, the MDT has the right size and has
3343 * already send it back without granting the layout lock,
3344 * inode is up-to-date so glimpse is useless.
3345 * Also to glimpse we need the layout, in case of a running
3346 * restore the MDT holds the layout lock so the glimpse will
3347 * block up to the end of restore (getattr will block)
3349 if (!ll_file_test_flag(ll_i2info(inode), LLIF_FILE_RESTORING))
3350 rc = ll_glimpse_size(inode);
3355 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3357 struct inode *inode = de->d_inode;
3358 struct ll_sb_info *sbi = ll_i2sbi(inode);
3359 struct ll_inode_info *lli = ll_i2info(inode);
3362 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3363 MDS_INODELOCK_LOOKUP);
3364 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3369 stat->dev = inode->i_sb->s_dev;
3370 if (ll_need_32bit_api(sbi))
3371 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3373 stat->ino = inode->i_ino;
3374 stat->mode = inode->i_mode;
3375 stat->uid = inode->i_uid;
3376 stat->gid = inode->i_gid;
3377 stat->rdev = inode->i_rdev;
3378 stat->atime = inode->i_atime;
3379 stat->mtime = inode->i_mtime;
3380 stat->ctime = inode->i_ctime;
3381 stat->blksize = 1 << inode->i_blkbits;
3383 stat->nlink = inode->i_nlink;
3384 stat->size = i_size_read(inode);
3385 stat->blocks = inode->i_blocks;
3390 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3391 __u64 start, __u64 len)
3395 struct fiemap *fiemap;
3396 unsigned int extent_count = fieinfo->fi_extents_max;
3398 num_bytes = sizeof(*fiemap) + (extent_count *
3399 sizeof(struct fiemap_extent));
3400 OBD_ALLOC_LARGE(fiemap, num_bytes);
3405 fiemap->fm_flags = fieinfo->fi_flags;
3406 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3407 fiemap->fm_start = start;
3408 fiemap->fm_length = len;
3409 if (extent_count > 0 &&
3410 copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3411 sizeof(struct fiemap_extent)) != 0)
3412 GOTO(out, rc = -EFAULT);
3414 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3416 fieinfo->fi_flags = fiemap->fm_flags;
3417 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3418 if (extent_count > 0 &&
3419 copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3420 fiemap->fm_mapped_extents *
3421 sizeof(struct fiemap_extent)) != 0)
3422 GOTO(out, rc = -EFAULT);
3424 OBD_FREE_LARGE(fiemap, num_bytes);
3428 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3430 struct ll_inode_info *lli = ll_i2info(inode);
3431 struct posix_acl *acl = NULL;
3434 spin_lock(&lli->lli_lock);
3435 /* VFS' acl_permission_check->check_acl will release the refcount */
3436 acl = posix_acl_dup(lli->lli_posix_acl);
3437 spin_unlock(&lli->lli_lock);
3442 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3444 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3445 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3447 ll_check_acl(struct inode *inode, int mask)
3450 # ifdef CONFIG_FS_POSIX_ACL
3451 struct posix_acl *acl;
3455 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3456 if (flags & IPERM_FLAG_RCU)
3459 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3464 rc = posix_acl_permission(inode, acl, mask);
3465 posix_acl_release(acl);
3468 # else /* !CONFIG_FS_POSIX_ACL */
3470 # endif /* CONFIG_FS_POSIX_ACL */
3472 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3474 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3475 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3477 # ifdef HAVE_INODE_PERMISION_2ARGS
3478 int ll_inode_permission(struct inode *inode, int mask)
3480 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3485 struct ll_sb_info *sbi;
3486 struct root_squash_info *squash;
3487 struct cred *cred = NULL;
3488 const struct cred *old_cred = NULL;
3490 bool squash_id = false;
3493 #ifdef MAY_NOT_BLOCK
3494 if (mask & MAY_NOT_BLOCK)
3496 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3497 if (flags & IPERM_FLAG_RCU)
3501 /* as root inode are NOT getting validated in lookup operation,
3502 * need to do it before permission check. */
3504 if (inode == inode->i_sb->s_root->d_inode) {
3505 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3506 MDS_INODELOCK_LOOKUP);
3511 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3512 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3514 /* squash fsuid/fsgid if needed */
3515 sbi = ll_i2sbi(inode);
3516 squash = &sbi->ll_squash;
3517 if (unlikely(squash->rsi_uid != 0 &&
3518 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3519 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3523 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3524 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3525 squash->rsi_uid, squash->rsi_gid);
3527 /* update current process's credentials
3528 * and FS capability */
3529 cred = prepare_creds();
3533 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3534 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3535 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3536 if ((1 << cap) & CFS_CAP_FS_MASK)
3537 cap_lower(cred->cap_effective, cap);
3539 old_cred = override_creds(cred);
3542 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3544 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3545 rc = lustre_check_remote_perm(inode, mask);
3547 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3549 /* restore current process's credentials and FS capability */
3551 revert_creds(old_cred);
3558 /* -o localflock - only provides locally consistent flock locks */
3559 struct file_operations ll_file_operations = {
3560 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3561 # ifdef HAVE_SYNC_READ_WRITE
3562 .read = new_sync_read,
3563 .write = new_sync_write,
3565 .read_iter = ll_file_read_iter,
3566 .write_iter = ll_file_write_iter,
3567 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3568 .read = ll_file_read,
3569 .aio_read = ll_file_aio_read,
3570 .write = ll_file_write,
3571 .aio_write = ll_file_aio_write,
3572 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3573 .unlocked_ioctl = ll_file_ioctl,
3574 .open = ll_file_open,
3575 .release = ll_file_release,
3576 .mmap = ll_file_mmap,
3577 .llseek = ll_file_seek,
3578 .splice_read = ll_file_splice_read,
3583 struct file_operations ll_file_operations_flock = {
3584 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3585 # ifdef HAVE_SYNC_READ_WRITE
3586 .read = new_sync_read,
3587 .write = new_sync_write,
3588 # endif /* HAVE_SYNC_READ_WRITE */
3589 .read_iter = ll_file_read_iter,
3590 .write_iter = ll_file_write_iter,
3591 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3592 .read = ll_file_read,
3593 .aio_read = ll_file_aio_read,
3594 .write = ll_file_write,
3595 .aio_write = ll_file_aio_write,
3596 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3597 .unlocked_ioctl = ll_file_ioctl,
3598 .open = ll_file_open,
3599 .release = ll_file_release,
3600 .mmap = ll_file_mmap,
3601 .llseek = ll_file_seek,
3602 .splice_read = ll_file_splice_read,
3605 .flock = ll_file_flock,
3606 .lock = ll_file_flock
3609 /* These are for -o noflock - to return ENOSYS on flock calls */
3610 struct file_operations ll_file_operations_noflock = {
3611 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3612 # ifdef HAVE_SYNC_READ_WRITE
3613 .read = new_sync_read,
3614 .write = new_sync_write,
3615 # endif /* HAVE_SYNC_READ_WRITE */
3616 .read_iter = ll_file_read_iter,
3617 .write_iter = ll_file_write_iter,
3618 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3619 .read = ll_file_read,
3620 .aio_read = ll_file_aio_read,
3621 .write = ll_file_write,
3622 .aio_write = ll_file_aio_write,
3623 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3624 .unlocked_ioctl = ll_file_ioctl,
3625 .open = ll_file_open,
3626 .release = ll_file_release,
3627 .mmap = ll_file_mmap,
3628 .llseek = ll_file_seek,
3629 .splice_read = ll_file_splice_read,
3632 .flock = ll_file_noflock,
3633 .lock = ll_file_noflock
3636 struct inode_operations ll_file_inode_operations = {
3637 .setattr = ll_setattr,
3638 .getattr = ll_getattr,
3639 .permission = ll_inode_permission,
3640 .setxattr = ll_setxattr,
3641 .getxattr = ll_getxattr,
3642 .listxattr = ll_listxattr,
3643 .removexattr = ll_removexattr,
3644 .fiemap = ll_fiemap,
3645 #ifdef HAVE_IOP_GET_ACL
3646 .get_acl = ll_get_acl,
3650 /* dynamic ioctl number support routins */
3651 static struct llioc_ctl_data {
3652 struct rw_semaphore ioc_sem;
3653 struct list_head ioc_head;
3655 __RWSEM_INITIALIZER(llioc.ioc_sem),
3656 LIST_HEAD_INIT(llioc.ioc_head)
3661 struct list_head iocd_list;
3662 unsigned int iocd_size;
3663 llioc_callback_t iocd_cb;
3664 unsigned int iocd_count;
3665 unsigned int iocd_cmd[0];
3668 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3671 struct llioc_data *in_data = NULL;
3674 if (cb == NULL || cmd == NULL ||
3675 count > LLIOC_MAX_CMD || count < 0)
3678 size = sizeof(*in_data) + count * sizeof(unsigned int);
3679 OBD_ALLOC(in_data, size);
3680 if (in_data == NULL)
3683 memset(in_data, 0, sizeof(*in_data));
3684 in_data->iocd_size = size;
3685 in_data->iocd_cb = cb;
3686 in_data->iocd_count = count;
3687 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3689 down_write(&llioc.ioc_sem);
3690 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3691 up_write(&llioc.ioc_sem);
3696 void ll_iocontrol_unregister(void *magic)
3698 struct llioc_data *tmp;
3703 down_write(&llioc.ioc_sem);
3704 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3706 unsigned int size = tmp->iocd_size;
3708 list_del(&tmp->iocd_list);
3709 up_write(&llioc.ioc_sem);
3711 OBD_FREE(tmp, size);
3715 up_write(&llioc.ioc_sem);
3717 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3720 EXPORT_SYMBOL(ll_iocontrol_register);
3721 EXPORT_SYMBOL(ll_iocontrol_unregister);
3723 static enum llioc_iter
3724 ll_iocontrol_call(struct inode *inode, struct file *file,
3725 unsigned int cmd, unsigned long arg, int *rcp)
3727 enum llioc_iter ret = LLIOC_CONT;
3728 struct llioc_data *data;
3729 int rc = -EINVAL, i;
3731 down_read(&llioc.ioc_sem);
3732 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3733 for (i = 0; i < data->iocd_count; i++) {
3734 if (cmd != data->iocd_cmd[i])
3737 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3741 if (ret == LLIOC_STOP)
3744 up_read(&llioc.ioc_sem);
3751 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3753 struct ll_inode_info *lli = ll_i2info(inode);
3754 struct cl_object *obj = lli->lli_clob;
3755 struct cl_env_nest nest;
3763 env = cl_env_nested_get(&nest);
3765 RETURN(PTR_ERR(env));
3767 rc = cl_conf_set(env, lli->lli_clob, conf);
3771 if (conf->coc_opc == OBJECT_CONF_SET) {
3772 struct ldlm_lock *lock = conf->coc_lock;
3773 struct cl_layout cl = {
3777 LASSERT(lock != NULL);
3778 LASSERT(ldlm_has_layout(lock));
3780 /* it can only be allowed to match after layout is
3781 * applied to inode otherwise false layout would be
3782 * seen. Applying layout shoud happen before dropping
3783 * the intent lock. */
3784 ldlm_lock_allow_match(lock);
3786 rc = cl_object_layout_get(env, obj, &cl);
3791 DFID": layout version change: %u -> %u\n",
3792 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3794 ll_layout_version_set(lli, cl.cl_layout_gen);
3798 cl_env_nested_put(&nest, env);
3803 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3804 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3807 struct ll_sb_info *sbi = ll_i2sbi(inode);
3808 struct ptlrpc_request *req;
3809 struct mdt_body *body;
3816 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3817 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3818 lock->l_lvb_data, lock->l_lvb_len);
3820 if (lock->l_lvb_data != NULL)
3823 /* if layout lock was granted right away, the layout is returned
3824 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3825 * blocked and then granted via completion ast, we have to fetch
3826 * layout here. Please note that we can't use the LVB buffer in
3827 * completion AST because it doesn't have a large enough buffer */
3828 rc = ll_get_default_mdsize(sbi, &lmmsize);
3830 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
3831 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3836 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3838 GOTO(out, rc = -EPROTO);
3840 lmmsize = body->mbo_eadatasize;
3841 if (lmmsize == 0) /* empty layout */
3844 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3846 GOTO(out, rc = -EFAULT);
3848 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3849 if (lvbdata == NULL)
3850 GOTO(out, rc = -ENOMEM);
3852 memcpy(lvbdata, lmm, lmmsize);
3853 lock_res_and_lock(lock);
3854 if (unlikely(lock->l_lvb_data == NULL)) {
3855 lock->l_lvb_type = LVB_T_LAYOUT;
3856 lock->l_lvb_data = lvbdata;
3857 lock->l_lvb_len = lmmsize;
3860 unlock_res_and_lock(lock);
3862 if (lvbdata != NULL)
3863 OBD_FREE_LARGE(lvbdata, lmmsize);
3868 ptlrpc_req_finished(req);
3873 * Apply the layout to the inode. Layout lock is held and will be released
3876 static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
3877 struct inode *inode)
3879 struct ll_inode_info *lli = ll_i2info(inode);
3880 struct ll_sb_info *sbi = ll_i2sbi(inode);
3881 struct ldlm_lock *lock;
3882 struct cl_object_conf conf;
3885 bool wait_layout = false;
3888 LASSERT(lustre_handle_is_used(lockh));
3890 lock = ldlm_handle2lock(lockh);
3891 LASSERT(lock != NULL);
3892 LASSERT(ldlm_has_layout(lock));
3894 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured",
3895 PFID(&lli->lli_fid), inode);
3897 /* in case this is a caching lock and reinstate with new inode */
3898 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3900 lock_res_and_lock(lock);
3901 lvb_ready = ldlm_is_lvb_ready(lock);
3902 unlock_res_and_lock(lock);
3903 /* checking lvb_ready is racy but this is okay. The worst case is
3904 * that multi processes may configure the file on the same time. */
3909 rc = ll_layout_fetch(inode, lock);
3913 /* for layout lock, lmm is stored in lock's lvb.
3914 * lvb_data is immutable if the lock is held so it's safe to access it
3917 * set layout to file. Unlikely this will fail as old layout was
3918 * surely eliminated */
3919 memset(&conf, 0, sizeof conf);
3920 conf.coc_opc = OBJECT_CONF_SET;
3921 conf.coc_inode = inode;
3922 conf.coc_lock = lock;
3923 conf.u.coc_layout.lb_buf = lock->l_lvb_data;
3924 conf.u.coc_layout.lb_len = lock->l_lvb_len;
3925 rc = ll_layout_conf(inode, &conf);
3927 /* refresh layout failed, need to wait */
3928 wait_layout = rc == -EBUSY;
3932 LDLM_LOCK_PUT(lock);
3933 ldlm_lock_decref(lockh, mode);
3935 /* wait for IO to complete if it's still being used. */
3937 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3938 ll_get_fsname(inode->i_sb, NULL, 0),
3939 PFID(&lli->lli_fid), inode);
3941 memset(&conf, 0, sizeof conf);
3942 conf.coc_opc = OBJECT_CONF_WAIT;
3943 conf.coc_inode = inode;
3944 rc = ll_layout_conf(inode, &conf);
3948 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3949 ll_get_fsname(inode->i_sb, NULL, 0),
3950 PFID(&lli->lli_fid), rc);
3955 static int ll_layout_refresh_locked(struct inode *inode)
3957 struct ll_inode_info *lli = ll_i2info(inode);
3958 struct ll_sb_info *sbi = ll_i2sbi(inode);
3959 struct md_op_data *op_data;
3960 struct lookup_intent it;
3961 struct lustre_handle lockh;
3962 enum ldlm_mode mode;
3963 struct ldlm_enqueue_info einfo = {
3964 .ei_type = LDLM_IBITS,
3966 .ei_cb_bl = &ll_md_blocking_ast,
3967 .ei_cb_cp = &ldlm_completion_ast,
3973 /* mostly layout lock is caching on the local side, so try to match
3974 * it before grabbing layout lock mutex. */
3975 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3976 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3977 if (mode != 0) { /* hit cached lock */
3978 rc = ll_layout_lock_set(&lockh, mode, inode);
3985 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3986 0, 0, LUSTRE_OPC_ANY, NULL);
3987 if (IS_ERR(op_data))
3988 RETURN(PTR_ERR(op_data));
3990 /* have to enqueue one */
3991 memset(&it, 0, sizeof(it));
3992 it.it_op = IT_LAYOUT;
3993 lockh.cookie = 0ULL;
3995 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3996 ll_get_fsname(inode->i_sb, NULL, 0),
3997 PFID(&lli->lli_fid), inode);
3999 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
4000 if (it.d.lustre.it_data != NULL)
4001 ptlrpc_req_finished(it.d.lustre.it_data);
4002 it.d.lustre.it_data = NULL;
4004 ll_finish_md_op_data(op_data);
4006 mode = it.d.lustre.it_lock_mode;
4007 it.d.lustre.it_lock_mode = 0;
4008 ll_intent_drop_lock(&it);
4011 /* set lock data in case this is a new lock */
4012 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
4013 rc = ll_layout_lock_set(&lockh, mode, inode);
4022 * This function checks if there exists a LAYOUT lock on the client side,
4023 * or enqueues it if it doesn't have one in cache.
4025 * This function will not hold layout lock so it may be revoked any time after
4026 * this function returns. Any operations depend on layout should be redone
4029 * This function should be called before lov_io_init() to get an uptodate
4030 * layout version, the caller should save the version number and after IO
4031 * is finished, this function should be called again to verify that layout
4032 * is not changed during IO time.
4034 int ll_layout_refresh(struct inode *inode, __u32 *gen)
4036 struct ll_inode_info *lli = ll_i2info(inode);
4037 struct ll_sb_info *sbi = ll_i2sbi(inode);
4041 *gen = ll_layout_version_get(lli);
4042 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
4046 LASSERT(fid_is_sane(ll_inode2fid(inode)));
4047 LASSERT(S_ISREG(inode->i_mode));
4049 /* take layout lock mutex to enqueue layout lock exclusively. */
4050 mutex_lock(&lli->lli_layout_mutex);
4052 rc = ll_layout_refresh_locked(inode);
4056 *gen = ll_layout_version_get(lli);
4058 mutex_unlock(&lli->lli_layout_mutex);
4064 * This function send a restore request to the MDT
4066 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
4068 struct hsm_user_request *hur;
4072 len = sizeof(struct hsm_user_request) +
4073 sizeof(struct hsm_user_item);
4074 OBD_ALLOC(hur, len);
4078 hur->hur_request.hr_action = HUA_RESTORE;
4079 hur->hur_request.hr_archive_id = 0;
4080 hur->hur_request.hr_flags = 0;
4081 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
4082 sizeof(hur->hur_user_item[0].hui_fid));
4083 hur->hur_user_item[0].hui_extent.offset = offset;
4084 hur->hur_user_item[0].hui_extent.length = length;
4085 hur->hur_request.hr_itemcount = 1;
4086 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,