4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include <linux/user_namespace.h>
49 #ifdef HAVE_UIDGID_HEADER
50 # include <linux/uidgid.h>
52 #include <lustre/ll_fiemap.h>
54 #include <lustre_ioctl.h>
55 #include <lustre_swab.h>
57 #include "cl_object.h"
58 #include "llite_internal.h"
59 #include "vvp_internal.h"
62 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
64 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
67 static enum llioc_iter
68 ll_iocontrol_call(struct inode *inode, struct file *file,
69 unsigned int cmd, unsigned long arg, int *rcp);
71 static struct ll_file_data *ll_file_data_get(void)
73 struct ll_file_data *fd;
75 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
79 fd->fd_write_failed = false;
84 static void ll_file_data_put(struct ll_file_data *fd)
87 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
91 * Packs all the attributes into @op_data for the CLOSE rpc.
93 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
94 struct obd_client_handle *och)
98 ll_prep_md_op_data(op_data, inode, NULL, NULL,
99 0, 0, LUSTRE_OPC_ANY, NULL);
101 op_data->op_attr.ia_mode = inode->i_mode;
102 op_data->op_attr.ia_atime = inode->i_atime;
103 op_data->op_attr.ia_mtime = inode->i_mtime;
104 op_data->op_attr.ia_ctime = inode->i_ctime;
105 op_data->op_attr.ia_size = i_size_read(inode);
106 op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
107 ATTR_MTIME | ATTR_MTIME_SET |
108 ATTR_CTIME | ATTR_CTIME_SET;
109 op_data->op_attr_blocks = inode->i_blocks;
110 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
111 op_data->op_handle = och->och_fh;
113 if (och->och_flags & FMODE_WRITE &&
114 ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED))
115 /* For HSM: if inode data has been modified, pack it so that
116 * MDT can set data dirty flag in the archive. */
117 op_data->op_bias |= MDS_DATA_MODIFIED;
123 * Perform a close, possibly with a bias.
124 * The meaning of "data" depends on the value of "bias".
126 * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
127 * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
130 static int ll_close_inode_openhandle(struct inode *inode,
131 struct obd_client_handle *och,
132 enum mds_op_bias bias, void *data)
134 struct obd_export *md_exp = ll_i2mdexp(inode);
135 const struct ll_inode_info *lli = ll_i2info(inode);
136 struct md_op_data *op_data;
137 struct ptlrpc_request *req = NULL;
141 if (class_exp2obd(md_exp) == NULL) {
142 CERROR("%s: invalid MDC connection handle closing "DFID"\n",
143 ll_get_fsname(inode->i_sb, NULL, 0),
144 PFID(&lli->lli_fid));
148 OBD_ALLOC_PTR(op_data);
149 /* We leak openhandle and request here on error, but not much to be
150 * done in OOM case since app won't retry close on error either. */
152 GOTO(out, rc = -ENOMEM);
154 ll_prepare_close(inode, op_data, och);
156 case MDS_CLOSE_LAYOUT_SWAP:
157 LASSERT(data != NULL);
158 op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
159 op_data->op_data_version = 0;
160 op_data->op_lease_handle = och->och_lease_handle;
161 op_data->op_fid2 = *ll_inode2fid(data);
164 case MDS_HSM_RELEASE:
165 LASSERT(data != NULL);
166 op_data->op_bias |= MDS_HSM_RELEASE;
167 op_data->op_data_version = *(__u64 *)data;
168 op_data->op_lease_handle = och->och_lease_handle;
169 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
173 LASSERT(data == NULL);
177 rc = md_close(md_exp, op_data, och->och_mod, &req);
178 if (rc != 0 && rc != -EINTR)
179 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
180 md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
183 op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
184 struct mdt_body *body;
186 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
187 if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
191 ll_finish_md_op_data(op_data);
195 md_clear_open_replay_data(md_exp, och);
196 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
199 ptlrpc_req_finished(req); /* This is close request */
203 int ll_md_real_close(struct inode *inode, fmode_t fmode)
205 struct ll_inode_info *lli = ll_i2info(inode);
206 struct obd_client_handle **och_p;
207 struct obd_client_handle *och;
212 if (fmode & FMODE_WRITE) {
213 och_p = &lli->lli_mds_write_och;
214 och_usecount = &lli->lli_open_fd_write_count;
215 } else if (fmode & FMODE_EXEC) {
216 och_p = &lli->lli_mds_exec_och;
217 och_usecount = &lli->lli_open_fd_exec_count;
219 LASSERT(fmode & FMODE_READ);
220 och_p = &lli->lli_mds_read_och;
221 och_usecount = &lli->lli_open_fd_read_count;
224 mutex_lock(&lli->lli_och_mutex);
225 if (*och_usecount > 0) {
226 /* There are still users of this handle, so skip
228 mutex_unlock(&lli->lli_och_mutex);
234 mutex_unlock(&lli->lli_och_mutex);
237 /* There might be a race and this handle may already
239 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
245 static int ll_md_close(struct inode *inode, struct file *file)
247 union ldlm_policy_data policy = {
248 .l_inodebits = { MDS_INODELOCK_OPEN },
250 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
251 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
252 struct ll_inode_info *lli = ll_i2info(inode);
253 struct lustre_handle lockh;
254 enum ldlm_mode lockmode;
258 /* clear group lock, if present */
259 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
260 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
262 if (fd->fd_lease_och != NULL) {
265 /* Usually the lease is not released when the
266 * application crashed, we need to release here. */
267 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
268 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
269 PFID(&lli->lli_fid), rc, lease_broken);
271 fd->fd_lease_och = NULL;
274 if (fd->fd_och != NULL) {
275 rc = ll_close_inode_openhandle(inode, fd->fd_och, 0, NULL);
280 /* Let's see if we have good enough OPEN lock on the file and if
281 we can skip talking to MDS */
282 mutex_lock(&lli->lli_och_mutex);
283 if (fd->fd_omode & FMODE_WRITE) {
285 LASSERT(lli->lli_open_fd_write_count);
286 lli->lli_open_fd_write_count--;
287 } else if (fd->fd_omode & FMODE_EXEC) {
289 LASSERT(lli->lli_open_fd_exec_count);
290 lli->lli_open_fd_exec_count--;
293 LASSERT(lli->lli_open_fd_read_count);
294 lli->lli_open_fd_read_count--;
296 mutex_unlock(&lli->lli_och_mutex);
298 if (!md_lock_match(ll_i2mdexp(inode), flags, ll_inode2fid(inode),
299 LDLM_IBITS, &policy, lockmode, &lockh))
300 rc = ll_md_real_close(inode, fd->fd_omode);
303 LUSTRE_FPRIVATE(file) = NULL;
304 ll_file_data_put(fd);
309 /* While this returns an error code, fput() the caller does not, so we need
310 * to make every effort to clean up all of our state here. Also, applications
311 * rarely check close errors and even if an error is returned they will not
312 * re-try the close call.
314 int ll_file_release(struct inode *inode, struct file *file)
316 struct ll_file_data *fd;
317 struct ll_sb_info *sbi = ll_i2sbi(inode);
318 struct ll_inode_info *lli = ll_i2info(inode);
322 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
323 PFID(ll_inode2fid(inode)), inode);
325 #ifdef CONFIG_FS_POSIX_ACL
326 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
327 inode == inode->i_sb->s_root->d_inode) {
328 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
331 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
332 fd->fd_flags &= ~LL_FILE_RMTACL;
333 rct_del(&sbi->ll_rct, current_pid());
334 et_search_free(&sbi->ll_et, current_pid());
339 if (inode->i_sb->s_root != file->f_path.dentry)
340 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
341 fd = LUSTRE_FPRIVATE(file);
344 /* The last ref on @file, maybe not the the owner pid of statahead,
345 * because parent and child process can share the same file handle. */
346 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
347 ll_deauthorize_statahead(inode, fd);
349 if (inode->i_sb->s_root == file->f_path.dentry) {
350 LUSTRE_FPRIVATE(file) = NULL;
351 ll_file_data_put(fd);
355 if (!S_ISDIR(inode->i_mode)) {
356 if (lli->lli_clob != NULL)
357 lov_read_and_clear_async_rc(lli->lli_clob);
358 lli->lli_async_rc = 0;
361 rc = ll_md_close(inode, file);
363 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
364 libcfs_debug_dumplog();
369 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
370 struct lookup_intent *itp)
372 struct dentry *de = file->f_path.dentry;
373 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
374 struct dentry *parent = de->d_parent;
375 const char *name = NULL;
377 struct md_op_data *op_data;
378 struct ptlrpc_request *req = NULL;
382 LASSERT(parent != NULL);
383 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
385 /* if server supports open-by-fid, or file name is invalid, don't pack
386 * name in open request */
387 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
388 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
389 name = de->d_name.name;
390 len = de->d_name.len;
393 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
394 name, len, 0, LUSTRE_OPC_ANY, NULL);
396 RETURN(PTR_ERR(op_data));
397 op_data->op_data = lmm;
398 op_data->op_data_size = lmmsize;
400 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
401 &ll_md_blocking_ast, 0);
402 ll_finish_md_op_data(op_data);
404 /* reason for keep own exit path - don`t flood log
405 * with messages with -ESTALE errors.
407 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
408 it_open_error(DISP_OPEN_OPEN, itp))
410 ll_release_openhandle(de, itp);
414 if (it_disposition(itp, DISP_LOOKUP_NEG))
415 GOTO(out, rc = -ENOENT);
417 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
418 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
419 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
423 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
424 if (!rc && itp->d.lustre.it_lock_mode)
425 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
428 ptlrpc_req_finished(req);
429 ll_intent_drop_lock(itp);
434 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
435 struct obd_client_handle *och)
437 struct ptlrpc_request *req = it->d.lustre.it_data;
438 struct mdt_body *body;
440 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
441 och->och_fh = body->mbo_handle;
442 och->och_fid = body->mbo_fid1;
443 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
444 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
445 och->och_flags = it->it_flags;
447 return md_set_open_replay_data(md_exp, och, it);
450 static int ll_local_open(struct file *file, struct lookup_intent *it,
451 struct ll_file_data *fd, struct obd_client_handle *och)
453 struct inode *inode = file->f_path.dentry->d_inode;
456 LASSERT(!LUSTRE_FPRIVATE(file));
463 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
468 LUSTRE_FPRIVATE(file) = fd;
469 ll_readahead_init(inode, &fd->fd_ras);
470 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
472 /* ll_cl_context initialize */
473 rwlock_init(&fd->fd_lock);
474 INIT_LIST_HEAD(&fd->fd_lccs);
479 /* Open a file, and (for the very first open) create objects on the OSTs at
480 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
481 * creation or open until ll_lov_setstripe() ioctl is called.
483 * If we already have the stripe MD locally then we don't request it in
484 * md_open(), by passing a lmm_size = 0.
486 * It is up to the application to ensure no other processes open this file
487 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
488 * used. We might be able to avoid races of that sort by getting lli_open_sem
489 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
490 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
492 int ll_file_open(struct inode *inode, struct file *file)
494 struct ll_inode_info *lli = ll_i2info(inode);
495 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
496 .it_flags = file->f_flags };
497 struct obd_client_handle **och_p = NULL;
498 __u64 *och_usecount = NULL;
499 struct ll_file_data *fd;
503 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
504 PFID(ll_inode2fid(inode)), inode, file->f_flags);
506 it = file->private_data; /* XXX: compat macro */
507 file->private_data = NULL; /* prevent ll_local_open assertion */
509 fd = ll_file_data_get();
511 GOTO(out_openerr, rc = -ENOMEM);
514 if (S_ISDIR(inode->i_mode))
515 ll_authorize_statahead(inode, fd);
517 if (inode->i_sb->s_root == file->f_path.dentry) {
518 LUSTRE_FPRIVATE(file) = fd;
522 if (!it || !it->d.lustre.it_disposition) {
523 /* Convert f_flags into access mode. We cannot use file->f_mode,
524 * because everything but O_ACCMODE mask was stripped from
526 if ((oit.it_flags + 1) & O_ACCMODE)
528 if (file->f_flags & O_TRUNC)
529 oit.it_flags |= FMODE_WRITE;
531 /* kernel only call f_op->open in dentry_open. filp_open calls
532 * dentry_open after call to open_namei that checks permissions.
533 * Only nfsd_open call dentry_open directly without checking
534 * permissions and because of that this code below is safe. */
535 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
536 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
538 /* We do not want O_EXCL here, presumably we opened the file
539 * already? XXX - NFS implications? */
540 oit.it_flags &= ~O_EXCL;
542 /* bug20584, if "it_flags" contains O_CREAT, the file will be
543 * created if necessary, then "IT_CREAT" should be set to keep
544 * consistent with it */
545 if (oit.it_flags & O_CREAT)
546 oit.it_op |= IT_CREAT;
552 /* Let's see if we have file open on MDS already. */
553 if (it->it_flags & FMODE_WRITE) {
554 och_p = &lli->lli_mds_write_och;
555 och_usecount = &lli->lli_open_fd_write_count;
556 } else if (it->it_flags & FMODE_EXEC) {
557 och_p = &lli->lli_mds_exec_och;
558 och_usecount = &lli->lli_open_fd_exec_count;
560 och_p = &lli->lli_mds_read_och;
561 och_usecount = &lli->lli_open_fd_read_count;
564 mutex_lock(&lli->lli_och_mutex);
565 if (*och_p) { /* Open handle is present */
566 if (it_disposition(it, DISP_OPEN_OPEN)) {
567 /* Well, there's extra open request that we do not need,
568 let's close it somehow. This will decref request. */
569 rc = it_open_error(DISP_OPEN_OPEN, it);
571 mutex_unlock(&lli->lli_och_mutex);
572 GOTO(out_openerr, rc);
575 ll_release_openhandle(file->f_path.dentry, it);
579 rc = ll_local_open(file, it, fd, NULL);
582 mutex_unlock(&lli->lli_och_mutex);
583 GOTO(out_openerr, rc);
586 LASSERT(*och_usecount == 0);
587 if (!it->d.lustre.it_disposition) {
588 /* We cannot just request lock handle now, new ELC code
589 means that one of other OPEN locks for this file
590 could be cancelled, and since blocking ast handler
591 would attempt to grab och_mutex as well, that would
592 result in a deadlock */
593 mutex_unlock(&lli->lli_och_mutex);
595 * Normally called under two situations:
597 * 2. A race/condition on MDS resulting in no open
598 * handle to be returned from LOOKUP|OPEN request,
599 * for example if the target entry was a symlink.
601 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
603 * Always specify MDS_OPEN_BY_FID because we don't want
604 * to get file with different fid.
606 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
607 rc = ll_intent_file_open(file, NULL, 0, it);
609 GOTO(out_openerr, rc);
613 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
615 GOTO(out_och_free, rc = -ENOMEM);
619 /* md_intent_lock() didn't get a request ref if there was an
620 * open error, so don't do cleanup on the request here
622 /* XXX (green): Should not we bail out on any error here, not
623 * just open error? */
624 rc = it_open_error(DISP_OPEN_OPEN, it);
626 GOTO(out_och_free, rc);
628 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
629 "inode %p: disposition %x, status %d\n", inode,
630 it_disposition(it, ~0), it->d.lustre.it_status);
632 rc = ll_local_open(file, it, fd, *och_p);
634 GOTO(out_och_free, rc);
636 mutex_unlock(&lli->lli_och_mutex);
639 /* Must do this outside lli_och_mutex lock to prevent deadlock where
640 different kind of OPEN lock for this same inode gets cancelled
641 by ldlm_cancel_lru */
642 if (!S_ISREG(inode->i_mode))
643 GOTO(out_och_free, rc);
645 cl_lov_delay_create_clear(&file->f_flags);
646 GOTO(out_och_free, rc);
650 if (och_p && *och_p) {
651 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
652 *och_p = NULL; /* OBD_FREE writes some magic there */
655 mutex_unlock(&lli->lli_och_mutex);
658 if (lli->lli_opendir_key == fd)
659 ll_deauthorize_statahead(inode, fd);
661 ll_file_data_put(fd);
663 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
666 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
667 ptlrpc_req_finished(it->d.lustre.it_data);
668 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
674 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
675 struct ldlm_lock_desc *desc, void *data, int flag)
678 struct lustre_handle lockh;
682 case LDLM_CB_BLOCKING:
683 ldlm_lock2handle(lock, &lockh);
684 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
686 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
690 case LDLM_CB_CANCELING:
698 * Acquire a lease and open the file.
700 static struct obd_client_handle *
701 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
704 struct lookup_intent it = { .it_op = IT_OPEN };
705 struct ll_sb_info *sbi = ll_i2sbi(inode);
706 struct md_op_data *op_data;
707 struct ptlrpc_request *req = NULL;
708 struct lustre_handle old_handle = { 0 };
709 struct obd_client_handle *och = NULL;
714 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
715 RETURN(ERR_PTR(-EINVAL));
718 struct ll_inode_info *lli = ll_i2info(inode);
719 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
720 struct obd_client_handle **och_p;
723 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
724 RETURN(ERR_PTR(-EPERM));
726 /* Get the openhandle of the file */
728 mutex_lock(&lli->lli_och_mutex);
729 if (fd->fd_lease_och != NULL) {
730 mutex_unlock(&lli->lli_och_mutex);
734 if (fd->fd_och == NULL) {
735 if (file->f_mode & FMODE_WRITE) {
736 LASSERT(lli->lli_mds_write_och != NULL);
737 och_p = &lli->lli_mds_write_och;
738 och_usecount = &lli->lli_open_fd_write_count;
740 LASSERT(lli->lli_mds_read_och != NULL);
741 och_p = &lli->lli_mds_read_och;
742 och_usecount = &lli->lli_open_fd_read_count;
744 if (*och_usecount == 1) {
751 mutex_unlock(&lli->lli_och_mutex);
752 if (rc < 0) /* more than 1 opener */
755 LASSERT(fd->fd_och != NULL);
756 old_handle = fd->fd_och->och_fh;
761 RETURN(ERR_PTR(-ENOMEM));
763 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
764 LUSTRE_OPC_ANY, NULL);
766 GOTO(out, rc = PTR_ERR(op_data));
768 /* To tell the MDT this openhandle is from the same owner */
769 op_data->op_handle = old_handle;
771 it.it_flags = fmode | open_flags;
772 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
773 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
774 &ll_md_blocking_lease_ast,
775 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
776 * it can be cancelled which may mislead applications that the lease is
778 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
779 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
780 * doesn't deal with openhandle, so normal openhandle will be leaked. */
781 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
782 ll_finish_md_op_data(op_data);
783 ptlrpc_req_finished(req);
785 GOTO(out_release_it, rc);
787 if (it_disposition(&it, DISP_LOOKUP_NEG))
788 GOTO(out_release_it, rc = -ENOENT);
790 rc = it_open_error(DISP_OPEN_OPEN, &it);
792 GOTO(out_release_it, rc);
794 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
795 ll_och_fill(sbi->ll_md_exp, &it, och);
797 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
798 GOTO(out_close, rc = -EOPNOTSUPP);
800 /* already get lease, handle lease lock */
801 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
802 if (it.d.lustre.it_lock_mode == 0 ||
803 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
804 /* open lock must return for lease */
805 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
806 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
807 it.d.lustre.it_lock_bits);
808 GOTO(out_close, rc = -EPROTO);
811 ll_intent_release(&it);
815 /* Cancel open lock */
816 if (it.d.lustre.it_lock_mode != 0) {
817 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
818 it.d.lustre.it_lock_mode);
819 it.d.lustre.it_lock_mode = 0;
820 och->och_lease_handle.cookie = 0ULL;
822 rc2 = ll_close_inode_openhandle(inode, och, 0, NULL);
824 CERROR("%s: error closing file "DFID": %d\n",
825 ll_get_fsname(inode->i_sb, NULL, 0),
826 PFID(&ll_i2info(inode)->lli_fid), rc2);
827 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
829 ll_intent_release(&it);
837 * Check whether a layout swap can be done between two inodes.
839 * \param[in] inode1 First inode to check
840 * \param[in] inode2 Second inode to check
842 * \retval 0 on success, layout swap can be performed between both inodes
843 * \retval negative error code if requirements are not met
845 static int ll_check_swap_layouts_validity(struct inode *inode1,
846 struct inode *inode2)
848 if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
851 if (inode_permission(inode1, MAY_WRITE) ||
852 inode_permission(inode2, MAY_WRITE))
855 if (inode1->i_sb != inode2->i_sb)
861 static int ll_swap_layouts_close(struct obd_client_handle *och,
862 struct inode *inode, struct inode *inode2)
864 const struct lu_fid *fid1 = ll_inode2fid(inode);
865 const struct lu_fid *fid2;
869 CDEBUG(D_INODE, "%s: biased close of file "DFID"\n",
870 ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
872 rc = ll_check_swap_layouts_validity(inode, inode2);
874 GOTO(out_free_och, rc);
876 /* We now know that inode2 is a lustre inode */
877 fid2 = ll_inode2fid(inode2);
879 rc = lu_fid_cmp(fid1, fid2);
881 GOTO(out_free_och, rc = -EINVAL);
883 /* Close the file and swap layouts between inode & inode2.
884 * NB: lease lock handle is released in mdc_close_layout_swap_pack()
885 * because we still need it to pack l_remote_handle to MDT. */
886 rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP,
889 och = NULL; /* freed in ll_close_inode_openhandle() */
899 * Release lease and close the file.
900 * It will check if the lease has ever broken.
902 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
905 struct ldlm_lock *lock;
906 bool cancelled = true;
910 lock = ldlm_handle2lock(&och->och_lease_handle);
912 lock_res_and_lock(lock);
913 cancelled = ldlm_is_cancel(lock);
914 unlock_res_and_lock(lock);
918 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
919 PFID(&ll_i2info(inode)->lli_fid), cancelled);
922 ldlm_cli_cancel(&och->och_lease_handle, 0);
923 if (lease_broken != NULL)
924 *lease_broken = cancelled;
926 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
930 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
932 struct ll_inode_info *lli = ll_i2info(inode);
933 struct cl_object *obj = lli->lli_clob;
934 struct cl_attr *attr = vvp_env_thread_attr(env);
942 ll_inode_size_lock(inode);
944 /* merge timestamps the most recently obtained from mds with
945 timestamps obtained from osts */
946 LTIME_S(inode->i_atime) = lli->lli_atime;
947 LTIME_S(inode->i_mtime) = lli->lli_mtime;
948 LTIME_S(inode->i_ctime) = lli->lli_ctime;
950 atime = LTIME_S(inode->i_atime);
951 mtime = LTIME_S(inode->i_mtime);
952 ctime = LTIME_S(inode->i_ctime);
954 cl_object_attr_lock(obj);
955 rc = cl_object_attr_get(env, obj, attr);
956 cl_object_attr_unlock(obj);
959 GOTO(out_size_unlock, rc);
961 if (atime < attr->cat_atime)
962 atime = attr->cat_atime;
964 if (ctime < attr->cat_ctime)
965 ctime = attr->cat_ctime;
967 if (mtime < attr->cat_mtime)
968 mtime = attr->cat_mtime;
970 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
971 PFID(&lli->lli_fid), attr->cat_size);
973 i_size_write(inode, attr->cat_size);
974 inode->i_blocks = attr->cat_blocks;
976 LTIME_S(inode->i_atime) = atime;
977 LTIME_S(inode->i_mtime) = mtime;
978 LTIME_S(inode->i_ctime) = ctime;
981 ll_inode_size_unlock(inode);
986 static bool file_is_noatime(const struct file *file)
988 const struct vfsmount *mnt = file->f_path.mnt;
989 const struct inode *inode = file->f_path.dentry->d_inode;
991 /* Adapted from file_accessed() and touch_atime().*/
992 if (file->f_flags & O_NOATIME)
995 if (inode->i_flags & S_NOATIME)
998 if (IS_NOATIME(inode))
1001 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1004 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1007 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1013 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1015 struct inode *inode = file->f_path.dentry->d_inode;
1017 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1019 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1020 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1021 file->f_flags & O_DIRECT ||
1024 io->ci_obj = ll_i2info(inode)->lli_clob;
1025 io->ci_lockreq = CILR_MAYBE;
1026 if (ll_file_nolock(file)) {
1027 io->ci_lockreq = CILR_NEVER;
1028 io->ci_no_srvlock = 1;
1029 } else if (file->f_flags & O_APPEND) {
1030 io->ci_lockreq = CILR_MANDATORY;
1033 io->ci_noatime = file_is_noatime(file);
1037 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1038 struct file *file, enum cl_io_type iot,
1039 loff_t *ppos, size_t count)
1041 struct vvp_io *vio = vvp_env_io(env);
1042 struct inode *inode = file->f_path.dentry->d_inode;
1043 struct ll_inode_info *lli = ll_i2info(inode);
1044 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1048 struct range_lock range;
1052 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1053 file->f_path.dentry->d_name.name, iot, *ppos, count);
1056 io = vvp_env_thread_io(env);
1057 ll_io_init(io, file, iot == CIT_WRITE);
1059 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1060 bool range_locked = false;
1062 if (file->f_flags & O_APPEND)
1063 range_lock_init(&range, 0, LUSTRE_EOF);
1065 range_lock_init(&range, *ppos, *ppos + count - 1);
1067 vio->vui_fd = LUSTRE_FPRIVATE(file);
1068 vio->vui_io_subtype = args->via_io_subtype;
1070 switch (vio->vui_io_subtype) {
1072 vio->vui_iter = args->u.normal.via_iter;
1073 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1074 vio->vui_tot_nrsegs = vio->vui_iter->nr_segs;
1075 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1076 vio->vui_iocb = args->u.normal.via_iocb;
1077 /* Direct IO reads must also take range lock,
1078 * or multiple reads will try to work on the same pages
1079 * See LU-6227 for details. */
1080 if (((iot == CIT_WRITE) ||
1081 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1082 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1083 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1085 rc = range_lock(&lli->lli_write_tree, &range);
1089 range_locked = true;
1093 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1094 vio->u.splice.vui_flags = args->u.splice.via_flags;
1097 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1101 ll_cl_add(file, env, io);
1102 rc = cl_io_loop(env, io);
1103 ll_cl_remove(file, env);
1106 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1108 range_unlock(&lli->lli_write_tree, &range);
1111 /* cl_io_rw_init() handled IO */
1115 if (io->ci_nob > 0) {
1116 result += io->ci_nob;
1117 count -= io->ci_nob;
1118 *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
1120 /* prepare IO restart */
1121 if (count > 0 && args->via_io_subtype == IO_NORMAL) {
1122 args->u.normal.via_iter = vio->vui_iter;
1123 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1124 args->u.normal.via_iter->nr_segs = vio->vui_tot_nrsegs;
1125 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1130 cl_io_fini(env, io);
1132 if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
1134 "%s: restart %s from %lld, count:%zu, result: %zd\n",
1135 file->f_path.dentry->d_name.name,
1136 iot == CIT_READ ? "read" : "write",
1137 *ppos, count, result);
1141 if (iot == CIT_READ) {
1143 ll_stats_ops_tally(ll_i2sbi(inode),
1144 LPROC_LL_READ_BYTES, result);
1145 } else if (iot == CIT_WRITE) {
1147 ll_stats_ops_tally(ll_i2sbi(inode),
1148 LPROC_LL_WRITE_BYTES, result);
1149 fd->fd_write_failed = false;
1150 } else if (result == 0 && rc == 0) {
1153 fd->fd_write_failed = true;
1155 fd->fd_write_failed = false;
1156 } else if (rc != -ERESTARTSYS) {
1157 fd->fd_write_failed = true;
1161 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1163 return result > 0 ? result : rc;
1167 * Read from a file (through the page cache).
1169 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1171 struct vvp_io_args *args;
1176 env = cl_env_get(&refcheck);
1178 return PTR_ERR(env);
1180 args = ll_env_args(env, IO_NORMAL);
1181 args->u.normal.via_iter = to;
1182 args->u.normal.via_iocb = iocb;
1184 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1185 &iocb->ki_pos, iov_iter_count(to));
1186 cl_env_put(env, &refcheck);
1191 * Write to a file (through the page cache).
1193 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1195 struct vvp_io_args *args;
1200 env = cl_env_get(&refcheck);
1202 return PTR_ERR(env);
1204 args = ll_env_args(env, IO_NORMAL);
1205 args->u.normal.via_iter = from;
1206 args->u.normal.via_iocb = iocb;
1208 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1209 &iocb->ki_pos, iov_iter_count(from));
1210 cl_env_put(env, &refcheck);
1214 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1216 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1218 static int ll_file_get_iov_count(const struct iovec *iov,
1219 unsigned long *nr_segs, size_t *count)
1224 for (seg = 0; seg < *nr_segs; seg++) {
1225 const struct iovec *iv = &iov[seg];
1228 * If any segment has a negative length, or the cumulative
1229 * length ever wraps negative then return -EINVAL.
1232 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1234 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1239 cnt -= iv->iov_len; /* This segment is no good */
1246 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1247 unsigned long nr_segs, loff_t pos)
1249 struct iovec *local_iov;
1250 struct iov_iter *to;
1253 struct lu_env *env = NULL;
1257 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1263 env = cl_env_get(&refcheck);
1265 RETURN(PTR_ERR(env));
1267 local_iov = &ll_env_info(env)->lti_local_iov;
1271 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1272 if (local_iov == NULL)
1275 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1283 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1284 iov_iter_init(to, READ, local_iov, nr_segs, iov_count);
1285 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1286 iov_iter_init(to, local_iov, nr_segs, iov_count, 0);
1287 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1289 result = ll_file_read_iter(iocb, to);
1294 cl_env_put(env, &refcheck);
1296 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1301 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1305 struct iovec iov = { .iov_base = buf, .iov_len = count };
1306 struct kiocb *kiocb;
1311 env = cl_env_get(&refcheck);
1313 RETURN(PTR_ERR(env));
1315 kiocb = &ll_env_info(env)->lti_kiocb;
1316 init_sync_kiocb(kiocb, file);
1317 kiocb->ki_pos = *ppos;
1318 #ifdef HAVE_KIOCB_KI_LEFT
1319 kiocb->ki_left = count;
1320 #elif defined(HAVE_KI_NBYTES)
1321 kiocb->ki_nbytes = count;
1324 result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos);
1325 *ppos = kiocb->ki_pos;
1327 cl_env_put(env, &refcheck);
1332 * Write to a file (through the page cache).
1335 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1336 unsigned long nr_segs, loff_t pos)
1338 struct iovec *local_iov;
1339 struct iov_iter *from;
1342 struct lu_env *env = NULL;
1346 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1351 env = cl_env_get(&refcheck);
1353 RETURN(PTR_ERR(env));
1355 local_iov = &ll_env_info(env)->lti_local_iov;
1358 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1359 if (local_iov == NULL)
1362 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1365 OBD_ALLOC_PTR(from);
1370 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1371 iov_iter_init(from, WRITE, local_iov, nr_segs, iov_count);
1372 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1373 iov_iter_init(from, local_iov, nr_segs, iov_count, 0);
1374 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1376 result = ll_file_write_iter(iocb, from);
1381 cl_env_put(env, &refcheck);
1383 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1388 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1389 size_t count, loff_t *ppos)
1392 struct iovec iov = { .iov_base = (void __user *)buf,
1394 struct kiocb *kiocb;
1399 env = cl_env_get(&refcheck);
1401 RETURN(PTR_ERR(env));
1403 kiocb = &ll_env_info(env)->lti_kiocb;
1404 init_sync_kiocb(kiocb, file);
1405 kiocb->ki_pos = *ppos;
1406 #ifdef HAVE_KIOCB_KI_LEFT
1407 kiocb->ki_left = count;
1408 #elif defined(HAVE_KI_NBYTES)
1409 kiocb->ki_nbytes = count;
1412 result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
1413 *ppos = kiocb->ki_pos;
1415 cl_env_put(env, &refcheck);
1418 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1421 * Send file content (through pagecache) somewhere with helper
1423 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1424 struct pipe_inode_info *pipe, size_t count,
1428 struct vvp_io_args *args;
1433 env = cl_env_get(&refcheck);
1435 RETURN(PTR_ERR(env));
1437 args = ll_env_args(env, IO_SPLICE);
1438 args->u.splice.via_pipe = pipe;
1439 args->u.splice.via_flags = flags;
1441 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1442 cl_env_put(env, &refcheck);
1446 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1447 __u64 flags, struct lov_user_md *lum,
1450 struct lookup_intent oit = {
1452 .it_flags = flags | MDS_OPEN_BY_FID,
1457 ll_inode_size_lock(inode);
1458 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1460 GOTO(out_unlock, rc);
1462 ll_release_openhandle(file->f_path.dentry, &oit);
1465 ll_inode_size_unlock(inode);
1466 ll_intent_release(&oit);
1467 cl_lov_delay_create_clear(&file->f_flags);
1472 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1473 struct lov_mds_md **lmmp, int *lmm_size,
1474 struct ptlrpc_request **request)
1476 struct ll_sb_info *sbi = ll_i2sbi(inode);
1477 struct mdt_body *body;
1478 struct lov_mds_md *lmm = NULL;
1479 struct ptlrpc_request *req = NULL;
1480 struct md_op_data *op_data;
1483 rc = ll_get_default_mdsize(sbi, &lmmsize);
1487 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1488 strlen(filename), lmmsize,
1489 LUSTRE_OPC_ANY, NULL);
1490 if (IS_ERR(op_data))
1491 RETURN(PTR_ERR(op_data));
1493 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1494 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1495 ll_finish_md_op_data(op_data);
1497 CDEBUG(D_INFO, "md_getattr_name failed "
1498 "on %s: rc %d\n", filename, rc);
1502 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1503 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1505 lmmsize = body->mbo_eadatasize;
1507 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1509 GOTO(out, rc = -ENODATA);
1512 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1513 LASSERT(lmm != NULL);
1515 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1516 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1517 GOTO(out, rc = -EPROTO);
1521 * This is coming from the MDS, so is probably in
1522 * little endian. We convert it to host endian before
1523 * passing it to userspace.
1525 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1528 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1529 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1532 /* if function called for directory - we should
1533 * avoid swab not existent lsm objects */
1534 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1535 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1536 if (S_ISREG(body->mbo_mode))
1537 lustre_swab_lov_user_md_objects(
1538 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1540 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1541 lustre_swab_lov_user_md_v3(
1542 (struct lov_user_md_v3 *)lmm);
1543 if (S_ISREG(body->mbo_mode))
1544 lustre_swab_lov_user_md_objects(
1545 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1552 *lmm_size = lmmsize;
1557 static int ll_lov_setea(struct inode *inode, struct file *file,
1560 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1561 struct lov_user_md *lump;
1562 int lum_size = sizeof(struct lov_user_md) +
1563 sizeof(struct lov_user_ost_data);
1567 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1570 OBD_ALLOC_LARGE(lump, lum_size);
1574 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size))
1575 GOTO(out_lump, rc = -EFAULT);
1577 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1580 OBD_FREE_LARGE(lump, lum_size);
1584 static int ll_file_getstripe(struct inode *inode,
1585 struct lov_user_md __user *lum)
1592 env = cl_env_get(&refcheck);
1594 RETURN(PTR_ERR(env));
1596 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1597 cl_env_put(env, &refcheck);
1601 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1604 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1605 struct lov_user_md *klum;
1607 __u64 flags = FMODE_WRITE;
1610 rc = ll_copy_user_md(lum, &klum);
1615 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1619 put_user(0, &lum->lmm_stripe_count);
1621 ll_layout_refresh(inode, &gen);
1622 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1625 OBD_FREE(klum, lum_size);
1630 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1632 struct ll_inode_info *lli = ll_i2info(inode);
1633 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1634 struct ll_grouplock grouplock;
1639 CWARN("group id for group lock must not be 0\n");
1643 if (ll_file_nolock(file))
1644 RETURN(-EOPNOTSUPP);
1646 spin_lock(&lli->lli_lock);
1647 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1648 CWARN("group lock already existed with gid %lu\n",
1649 fd->fd_grouplock.lg_gid);
1650 spin_unlock(&lli->lli_lock);
1653 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1654 spin_unlock(&lli->lli_lock);
1656 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1657 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1661 spin_lock(&lli->lli_lock);
1662 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1663 spin_unlock(&lli->lli_lock);
1664 CERROR("another thread just won the race\n");
1665 cl_put_grouplock(&grouplock);
1669 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1670 fd->fd_grouplock = grouplock;
1671 spin_unlock(&lli->lli_lock);
1673 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1677 static int ll_put_grouplock(struct inode *inode, struct file *file,
1680 struct ll_inode_info *lli = ll_i2info(inode);
1681 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1682 struct ll_grouplock grouplock;
1685 spin_lock(&lli->lli_lock);
1686 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1687 spin_unlock(&lli->lli_lock);
1688 CWARN("no group lock held\n");
1692 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1694 if (fd->fd_grouplock.lg_gid != arg) {
1695 CWARN("group lock %lu doesn't match current id %lu\n",
1696 arg, fd->fd_grouplock.lg_gid);
1697 spin_unlock(&lli->lli_lock);
1701 grouplock = fd->fd_grouplock;
1702 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1703 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1704 spin_unlock(&lli->lli_lock);
1706 cl_put_grouplock(&grouplock);
1707 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1712 * Close inode open handle
1714 * \param dentry [in] dentry which contains the inode
1715 * \param it [in,out] intent which contains open info and result
1718 * \retval <0 failure
1720 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1722 struct inode *inode = dentry->d_inode;
1723 struct obd_client_handle *och;
1729 /* Root ? Do nothing. */
1730 if (dentry->d_inode->i_sb->s_root == dentry)
1733 /* No open handle to close? Move away */
1734 if (!it_disposition(it, DISP_OPEN_OPEN))
1737 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1739 OBD_ALLOC(och, sizeof(*och));
1741 GOTO(out, rc = -ENOMEM);
1743 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1745 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
1747 /* this one is in place of ll_file_open */
1748 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1749 ptlrpc_req_finished(it->d.lustre.it_data);
1750 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1756 * Get size for inode for which FIEMAP mapping is requested.
1757 * Make the FIEMAP get_info call and returns the result.
1758 * \param fiemap kernel buffer to hold extens
1759 * \param num_bytes kernel buffer size
1761 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1767 struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
1770 /* Checks for fiemap flags */
1771 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1772 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1776 /* Check for FIEMAP_FLAG_SYNC */
1777 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1778 rc = filemap_fdatawrite(inode->i_mapping);
1783 env = cl_env_get(&refcheck);
1785 RETURN(PTR_ERR(env));
1787 if (i_size_read(inode) == 0) {
1788 rc = ll_glimpse_size(inode);
1793 fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1794 obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
1795 obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
1797 /* If filesize is 0, then there would be no objects for mapping */
1798 if (fmkey.lfik_oa.o_size == 0) {
1799 fiemap->fm_mapped_extents = 0;
1803 fmkey.lfik_fiemap = *fiemap;
1805 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1806 &fmkey, fiemap, &num_bytes);
1808 cl_env_put(env, &refcheck);
1812 int ll_fid2path(struct inode *inode, void __user *arg)
1814 struct obd_export *exp = ll_i2mdexp(inode);
1815 const struct getinfo_fid2path __user *gfin = arg;
1817 struct getinfo_fid2path *gfout;
1823 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1824 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1827 /* Only need to get the buflen */
1828 if (get_user(pathlen, &gfin->gf_pathlen))
1831 if (pathlen > PATH_MAX)
1834 outsize = sizeof(*gfout) + pathlen;
1835 OBD_ALLOC(gfout, outsize);
1839 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1840 GOTO(gf_free, rc = -EFAULT);
1842 /* Call mdc_iocontrol */
1843 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1847 if (copy_to_user(arg, gfout, outsize))
1851 OBD_FREE(gfout, outsize);
1856 * Read the data_version for inode.
1858 * This value is computed using stripe object version on OST.
1859 * Version is computed using server side locking.
1861 * @param flags if do sync on the OST side;
1863 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1864 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1866 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1868 struct cl_object *obj = ll_i2info(inode)->lli_clob;
1876 /* If no file object initialized, we consider its version is 0. */
1882 env = cl_env_get(&refcheck);
1884 RETURN(PTR_ERR(env));
1886 io = vvp_env_thread_io(env);
1888 io->u.ci_data_version.dv_data_version = 0;
1889 io->u.ci_data_version.dv_flags = flags;
1892 if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
1893 result = cl_io_loop(env, io);
1895 result = io->ci_result;
1897 *data_version = io->u.ci_data_version.dv_data_version;
1899 cl_io_fini(env, io);
1901 if (unlikely(io->ci_need_restart))
1904 cl_env_put(env, &refcheck);
1910 * Trigger a HSM release request for the provided inode.
1912 int ll_hsm_release(struct inode *inode)
1914 struct cl_env_nest nest;
1916 struct obd_client_handle *och = NULL;
1917 __u64 data_version = 0;
1921 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1922 ll_get_fsname(inode->i_sb, NULL, 0),
1923 PFID(&ll_i2info(inode)->lli_fid));
1925 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1927 GOTO(out, rc = PTR_ERR(och));
1929 /* Grab latest data_version and [am]time values */
1930 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1934 env = cl_env_nested_get(&nest);
1936 GOTO(out, rc = PTR_ERR(env));
1938 ll_merge_attr(env, inode);
1939 cl_env_nested_put(&nest, env);
1941 /* Release the file.
1942 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1943 * we still need it to pack l_remote_handle to MDT. */
1944 rc = ll_close_inode_openhandle(inode, och, MDS_HSM_RELEASE,
1950 if (och != NULL && !IS_ERR(och)) /* close the file */
1951 ll_lease_close(och, inode, NULL);
1956 struct ll_swap_stack {
1959 struct inode *inode1;
1960 struct inode *inode2;
1965 static int ll_swap_layouts(struct file *file1, struct file *file2,
1966 struct lustre_swap_layouts *lsl)
1968 struct mdc_swap_layouts msl;
1969 struct md_op_data *op_data;
1972 struct ll_swap_stack *llss = NULL;
1975 OBD_ALLOC_PTR(llss);
1979 llss->inode1 = file1->f_path.dentry->d_inode;
1980 llss->inode2 = file2->f_path.dentry->d_inode;
1982 rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
1986 /* we use 2 bool because it is easier to swap than 2 bits */
1987 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1988 llss->check_dv1 = true;
1990 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1991 llss->check_dv2 = true;
1993 /* we cannot use lsl->sl_dvX directly because we may swap them */
1994 llss->dv1 = lsl->sl_dv1;
1995 llss->dv2 = lsl->sl_dv2;
1997 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1998 if (rc == 0) /* same file, done! */
2001 if (rc < 0) { /* sequentialize it */
2002 swap(llss->inode1, llss->inode2);
2004 swap(llss->dv1, llss->dv2);
2005 swap(llss->check_dv1, llss->check_dv2);
2009 if (gid != 0) { /* application asks to flush dirty cache */
2010 rc = ll_get_grouplock(llss->inode1, file1, gid);
2014 rc = ll_get_grouplock(llss->inode2, file2, gid);
2016 ll_put_grouplock(llss->inode1, file1, gid);
2021 /* ultimate check, before swaping the layouts we check if
2022 * dataversion has changed (if requested) */
2023 if (llss->check_dv1) {
2024 rc = ll_data_version(llss->inode1, &dv, 0);
2027 if (dv != llss->dv1)
2028 GOTO(putgl, rc = -EAGAIN);
2031 if (llss->check_dv2) {
2032 rc = ll_data_version(llss->inode2, &dv, 0);
2035 if (dv != llss->dv2)
2036 GOTO(putgl, rc = -EAGAIN);
2039 /* struct md_op_data is used to send the swap args to the mdt
2040 * only flags is missing, so we use struct mdc_swap_layouts
2041 * through the md_op_data->op_data */
2042 /* flags from user space have to be converted before they are send to
2043 * server, no flag is sent today, they are only used on the client */
2046 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2047 0, LUSTRE_OPC_ANY, &msl);
2048 if (IS_ERR(op_data))
2049 GOTO(free, rc = PTR_ERR(op_data));
2051 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2052 sizeof(*op_data), op_data, NULL);
2053 ll_finish_md_op_data(op_data);
2060 ll_put_grouplock(llss->inode2, file2, gid);
2061 ll_put_grouplock(llss->inode1, file1, gid);
2071 int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2073 struct md_op_data *op_data;
2077 /* Detect out-of range masks */
2078 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2081 /* Non-root users are forbidden to set or clear flags which are
2082 * NOT defined in HSM_USER_MASK. */
2083 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2084 !cfs_capable(CFS_CAP_SYS_ADMIN))
2087 /* Detect out-of range archive id */
2088 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2089 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2092 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2093 LUSTRE_OPC_ANY, hss);
2094 if (IS_ERR(op_data))
2095 RETURN(PTR_ERR(op_data));
2097 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2098 sizeof(*op_data), op_data, NULL);
2100 ll_finish_md_op_data(op_data);
2105 static int ll_hsm_import(struct inode *inode, struct file *file,
2106 struct hsm_user_import *hui)
2108 struct hsm_state_set *hss = NULL;
2109 struct iattr *attr = NULL;
2113 if (!S_ISREG(inode->i_mode))
2119 GOTO(out, rc = -ENOMEM);
2121 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2122 hss->hss_archive_id = hui->hui_archive_id;
2123 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2124 rc = ll_hsm_state_set(inode, hss);
2128 OBD_ALLOC_PTR(attr);
2130 GOTO(out, rc = -ENOMEM);
2132 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2133 attr->ia_mode |= S_IFREG;
2134 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2135 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2136 attr->ia_size = hui->hui_size;
2137 attr->ia_mtime.tv_sec = hui->hui_mtime;
2138 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2139 attr->ia_atime.tv_sec = hui->hui_atime;
2140 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2142 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2143 ATTR_UID | ATTR_GID |
2144 ATTR_MTIME | ATTR_MTIME_SET |
2145 ATTR_ATIME | ATTR_ATIME_SET;
2147 mutex_lock(&inode->i_mutex);
2149 rc = ll_setattr_raw(file->f_path.dentry, attr, true);
2153 mutex_unlock(&inode->i_mutex);
2165 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2167 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2168 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2171 static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
2173 struct inode *inode = file->f_path.dentry->d_inode;
2175 .ia_valid = ATTR_ATIME | ATTR_ATIME_SET |
2176 ATTR_MTIME | ATTR_MTIME_SET |
2177 ATTR_CTIME | ATTR_CTIME_SET,
2179 .tv_sec = lfu->lfu_atime_sec,
2180 .tv_nsec = lfu->lfu_atime_nsec,
2183 .tv_sec = lfu->lfu_mtime_sec,
2184 .tv_nsec = lfu->lfu_mtime_nsec,
2187 .tv_sec = lfu->lfu_ctime_sec,
2188 .tv_nsec = lfu->lfu_ctime_nsec,
2194 if (!capable(CAP_SYS_ADMIN))
2197 if (!S_ISREG(inode->i_mode))
2200 mutex_lock(&inode->i_mutex);
2201 rc = ll_setattr_raw(file->f_path.dentry, &ia, false);
2202 mutex_unlock(&inode->i_mutex);
2208 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2210 struct inode *inode = file->f_path.dentry->d_inode;
2211 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2215 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2216 PFID(ll_inode2fid(inode)), inode, cmd);
2217 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2219 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2220 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2224 case LL_IOC_GETFLAGS:
2225 /* Get the current value of the file flags */
2226 return put_user(fd->fd_flags, (int __user *)arg);
2227 case LL_IOC_SETFLAGS:
2228 case LL_IOC_CLRFLAGS:
2229 /* Set or clear specific file flags */
2230 /* XXX This probably needs checks to ensure the flags are
2231 * not abused, and to handle any flag side effects.
2233 if (get_user(flags, (int __user *) arg))
2236 if (cmd == LL_IOC_SETFLAGS) {
2237 if ((flags & LL_FILE_IGNORE_LOCK) &&
2238 !(file->f_flags & O_DIRECT)) {
2239 CERROR("%s: unable to disable locking on "
2240 "non-O_DIRECT file\n", current->comm);
2244 fd->fd_flags |= flags;
2246 fd->fd_flags &= ~flags;
2249 case LL_IOC_LOV_SETSTRIPE:
2250 RETURN(ll_lov_setstripe(inode, file, arg));
2251 case LL_IOC_LOV_SETEA:
2252 RETURN(ll_lov_setea(inode, file, arg));
2253 case LL_IOC_LOV_SWAP_LAYOUTS: {
2255 struct lustre_swap_layouts lsl;
2257 if (copy_from_user(&lsl, (char __user *)arg,
2258 sizeof(struct lustre_swap_layouts)))
2261 if ((file->f_flags & O_ACCMODE) == O_RDONLY)
2264 file2 = fget(lsl.sl_fd);
2268 /* O_WRONLY or O_RDWR */
2269 if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
2270 GOTO(out, rc = -EPERM);
2272 if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
2273 struct inode *inode2;
2274 struct ll_inode_info *lli;
2275 struct obd_client_handle *och = NULL;
2277 if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
2278 GOTO(out, rc = -EINVAL);
2280 lli = ll_i2info(inode);
2281 mutex_lock(&lli->lli_och_mutex);
2282 if (fd->fd_lease_och != NULL) {
2283 och = fd->fd_lease_och;
2284 fd->fd_lease_och = NULL;
2286 mutex_unlock(&lli->lli_och_mutex);
2288 GOTO(out, rc = -ENOLCK);
2289 inode2 = file2->f_path.dentry->d_inode;
2290 rc = ll_swap_layouts_close(och, inode, inode2);
2292 rc = ll_swap_layouts(file, file2, &lsl);
2298 case LL_IOC_LOV_GETSTRIPE:
2299 RETURN(ll_file_getstripe(inode,
2300 (struct lov_user_md __user *)arg));
2301 case FSFILT_IOC_GETFLAGS:
2302 case FSFILT_IOC_SETFLAGS:
2303 RETURN(ll_iocontrol(inode, file, cmd, arg));
2304 case FSFILT_IOC_GETVERSION_OLD:
2305 case FSFILT_IOC_GETVERSION:
2306 RETURN(put_user(inode->i_generation, (int __user *)arg));
2307 case LL_IOC_GROUP_LOCK:
2308 RETURN(ll_get_grouplock(inode, file, arg));
2309 case LL_IOC_GROUP_UNLOCK:
2310 RETURN(ll_put_grouplock(inode, file, arg));
2311 case IOC_OBD_STATFS:
2312 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2314 /* We need to special case any other ioctls we want to handle,
2315 * to send them to the MDS/OST as appropriate and to properly
2316 * network encode the arg field.
2317 case FSFILT_IOC_SETVERSION_OLD:
2318 case FSFILT_IOC_SETVERSION:
2320 case LL_IOC_FLUSHCTX:
2321 RETURN(ll_flush_ctx(inode));
2322 case LL_IOC_PATH2FID: {
2323 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2324 sizeof(struct lu_fid)))
2329 case LL_IOC_GETPARENT:
2330 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2332 case OBD_IOC_FID2PATH:
2333 RETURN(ll_fid2path(inode, (void __user *)arg));
2334 case LL_IOC_DATA_VERSION: {
2335 struct ioc_data_version idv;
2338 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2341 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2342 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2345 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2351 case LL_IOC_GET_MDTIDX: {
2354 mdtidx = ll_get_mdt_idx(inode);
2358 if (put_user((int)mdtidx, (int __user *)arg))
2363 case OBD_IOC_GETDTNAME:
2364 case OBD_IOC_GETMDNAME:
2365 RETURN(ll_get_obd_name(inode, cmd, arg));
2366 case LL_IOC_HSM_STATE_GET: {
2367 struct md_op_data *op_data;
2368 struct hsm_user_state *hus;
2375 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2376 LUSTRE_OPC_ANY, hus);
2377 if (IS_ERR(op_data)) {
2379 RETURN(PTR_ERR(op_data));
2382 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2385 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2388 ll_finish_md_op_data(op_data);
2392 case LL_IOC_HSM_STATE_SET: {
2393 struct hsm_state_set *hss;
2400 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2405 rc = ll_hsm_state_set(inode, hss);
2410 case LL_IOC_HSM_ACTION: {
2411 struct md_op_data *op_data;
2412 struct hsm_current_action *hca;
2419 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2420 LUSTRE_OPC_ANY, hca);
2421 if (IS_ERR(op_data)) {
2423 RETURN(PTR_ERR(op_data));
2426 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2429 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2432 ll_finish_md_op_data(op_data);
2436 case LL_IOC_SET_LEASE: {
2437 struct ll_inode_info *lli = ll_i2info(inode);
2438 struct obd_client_handle *och = NULL;
2443 case LL_LEASE_WRLCK:
2444 if (!(file->f_mode & FMODE_WRITE))
2446 fmode = FMODE_WRITE;
2448 case LL_LEASE_RDLCK:
2449 if (!(file->f_mode & FMODE_READ))
2453 case LL_LEASE_UNLCK:
2454 mutex_lock(&lli->lli_och_mutex);
2455 if (fd->fd_lease_och != NULL) {
2456 och = fd->fd_lease_och;
2457 fd->fd_lease_och = NULL;
2459 mutex_unlock(&lli->lli_och_mutex);
2464 fmode = och->och_flags;
2465 rc = ll_lease_close(och, inode, &lease_broken);
2472 RETURN(ll_lease_type_from_fmode(fmode));
2477 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2479 /* apply for lease */
2480 och = ll_lease_open(inode, file, fmode, 0);
2482 RETURN(PTR_ERR(och));
2485 mutex_lock(&lli->lli_och_mutex);
2486 if (fd->fd_lease_och == NULL) {
2487 fd->fd_lease_och = och;
2490 mutex_unlock(&lli->lli_och_mutex);
2492 /* impossible now that only excl is supported for now */
2493 ll_lease_close(och, inode, &lease_broken);
2498 case LL_IOC_GET_LEASE: {
2499 struct ll_inode_info *lli = ll_i2info(inode);
2500 struct ldlm_lock *lock = NULL;
2503 mutex_lock(&lli->lli_och_mutex);
2504 if (fd->fd_lease_och != NULL) {
2505 struct obd_client_handle *och = fd->fd_lease_och;
2507 lock = ldlm_handle2lock(&och->och_lease_handle);
2509 lock_res_and_lock(lock);
2510 if (!ldlm_is_cancel(lock))
2511 fmode = och->och_flags;
2513 unlock_res_and_lock(lock);
2514 LDLM_LOCK_PUT(lock);
2517 mutex_unlock(&lli->lli_och_mutex);
2519 RETURN(ll_lease_type_from_fmode(fmode));
2521 case LL_IOC_HSM_IMPORT: {
2522 struct hsm_user_import *hui;
2528 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2533 rc = ll_hsm_import(inode, file, hui);
2538 case LL_IOC_FUTIMES_3: {
2539 struct ll_futimes_3 lfu;
2541 if (copy_from_user(&lfu,
2542 (const struct ll_futimes_3 __user *)arg,
2546 RETURN(ll_file_futimes_3(file, &lfu));
2552 ll_iocontrol_call(inode, file, cmd, arg, &err))
2555 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2556 (void __user *)arg));
2561 #ifndef HAVE_FILE_LLSEEK_SIZE
2562 static inline loff_t
2563 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2565 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2567 if (offset > maxsize)
2570 if (offset != file->f_pos) {
2571 file->f_pos = offset;
2572 file->f_version = 0;
2578 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2579 loff_t maxsize, loff_t eof)
2581 struct inode *inode = file->f_path.dentry->d_inode;
2589 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2590 * position-querying operation. Avoid rewriting the "same"
2591 * f_pos value back to the file because a concurrent read(),
2592 * write() or lseek() might have altered it
2597 * f_lock protects against read/modify/write race with other
2598 * SEEK_CURs. Note that parallel writes and reads behave
2601 mutex_lock(&inode->i_mutex);
2602 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2603 mutex_unlock(&inode->i_mutex);
2607 * In the generic case the entire file is data, so as long as
2608 * offset isn't at the end of the file then the offset is data.
2615 * There is a virtual hole at the end of the file, so as long as
2616 * offset isn't i_size or larger, return i_size.
2624 return llseek_execute(file, offset, maxsize);
2628 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2630 struct inode *inode = file->f_path.dentry->d_inode;
2631 loff_t retval, eof = 0;
2634 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2635 (origin == SEEK_CUR) ? file->f_pos : 0);
2636 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2637 PFID(ll_inode2fid(inode)), inode, retval, retval,
2639 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2641 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2642 retval = ll_glimpse_size(inode);
2645 eof = i_size_read(inode);
2648 retval = ll_generic_file_llseek_size(file, offset, origin,
2649 ll_file_maxbytes(inode), eof);
2653 static int ll_flush(struct file *file, fl_owner_t id)
2655 struct inode *inode = file->f_path.dentry->d_inode;
2656 struct ll_inode_info *lli = ll_i2info(inode);
2657 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2660 LASSERT(!S_ISDIR(inode->i_mode));
2662 /* catch async errors that were recorded back when async writeback
2663 * failed for pages in this mapping. */
2664 rc = lli->lli_async_rc;
2665 lli->lli_async_rc = 0;
2666 if (lli->lli_clob != NULL) {
2667 err = lov_read_and_clear_async_rc(lli->lli_clob);
2672 /* The application has been told write failure already.
2673 * Do not report failure again. */
2674 if (fd->fd_write_failed)
2676 return rc ? -EIO : 0;
2680 * Called to make sure a portion of file has been written out.
2681 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2683 * Return how many pages have been written.
2685 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2686 enum cl_fsync_mode mode, int ignore_layout)
2688 struct cl_env_nest nest;
2691 struct cl_fsync_io *fio;
2695 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2696 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2699 env = cl_env_nested_get(&nest);
2701 RETURN(PTR_ERR(env));
2703 io = vvp_env_thread_io(env);
2704 io->ci_obj = ll_i2info(inode)->lli_clob;
2705 io->ci_ignore_layout = ignore_layout;
2707 /* initialize parameters for sync */
2708 fio = &io->u.ci_fsync;
2709 fio->fi_start = start;
2711 fio->fi_fid = ll_inode2fid(inode);
2712 fio->fi_mode = mode;
2713 fio->fi_nr_written = 0;
2715 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2716 result = cl_io_loop(env, io);
2718 result = io->ci_result;
2720 result = fio->fi_nr_written;
2721 cl_io_fini(env, io);
2722 cl_env_nested_put(&nest, env);
2728 * When dentry is provided (the 'else' case), *file->f_path.dentry may be
2729 * null and dentry must be used directly rather than pulled from
2730 * *file->f_path.dentry as is done otherwise.
2733 #ifdef HAVE_FILE_FSYNC_4ARGS
2734 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2736 struct dentry *dentry = file->f_path.dentry;
2737 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2738 int ll_fsync(struct file *file, int datasync)
2740 struct dentry *dentry = file->f_path.dentry;
2742 loff_t end = LLONG_MAX;
2744 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2747 loff_t end = LLONG_MAX;
2749 struct inode *inode = dentry->d_inode;
2750 struct ll_inode_info *lli = ll_i2info(inode);
2751 struct ptlrpc_request *req;
2755 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2756 PFID(ll_inode2fid(inode)), inode);
2757 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2759 #ifdef HAVE_FILE_FSYNC_4ARGS
2760 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2761 mutex_lock(&inode->i_mutex);
2763 /* fsync's caller has already called _fdata{sync,write}, we want
2764 * that IO to finish before calling the osc and mdc sync methods */
2765 rc = filemap_fdatawait(inode->i_mapping);
2768 /* catch async errors that were recorded back when async writeback
2769 * failed for pages in this mapping. */
2770 if (!S_ISDIR(inode->i_mode)) {
2771 err = lli->lli_async_rc;
2772 lli->lli_async_rc = 0;
2775 err = lov_read_and_clear_async_rc(lli->lli_clob);
2780 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
2784 ptlrpc_req_finished(req);
2786 if (S_ISREG(inode->i_mode)) {
2787 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2789 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2790 if (rc == 0 && err < 0)
2793 fd->fd_write_failed = true;
2795 fd->fd_write_failed = false;
2798 #ifdef HAVE_FILE_FSYNC_4ARGS
2799 mutex_unlock(&inode->i_mutex);
2805 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2807 struct inode *inode = file->f_path.dentry->d_inode;
2808 struct ll_sb_info *sbi = ll_i2sbi(inode);
2809 struct ldlm_enqueue_info einfo = {
2810 .ei_type = LDLM_FLOCK,
2811 .ei_cb_cp = ldlm_flock_completion_ast,
2812 .ei_cbdata = file_lock,
2814 struct md_op_data *op_data;
2815 struct lustre_handle lockh = { 0 };
2816 union ldlm_policy_data flock = { { 0 } };
2817 int fl_type = file_lock->fl_type;
2823 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2824 PFID(ll_inode2fid(inode)), file_lock);
2826 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2828 if (file_lock->fl_flags & FL_FLOCK) {
2829 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2830 /* flocks are whole-file locks */
2831 flock.l_flock.end = OFFSET_MAX;
2832 /* For flocks owner is determined by the local file desctiptor*/
2833 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2834 } else if (file_lock->fl_flags & FL_POSIX) {
2835 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2836 flock.l_flock.start = file_lock->fl_start;
2837 flock.l_flock.end = file_lock->fl_end;
2841 flock.l_flock.pid = file_lock->fl_pid;
2843 /* Somewhat ugly workaround for svc lockd.
2844 * lockd installs custom fl_lmops->lm_compare_owner that checks
2845 * for the fl_owner to be the same (which it always is on local node
2846 * I guess between lockd processes) and then compares pid.
2847 * As such we assign pid to the owner field to make it all work,
2848 * conflict with normal locks is unlikely since pid space and
2849 * pointer space for current->files are not intersecting */
2850 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2851 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2855 einfo.ei_mode = LCK_PR;
2858 /* An unlock request may or may not have any relation to
2859 * existing locks so we may not be able to pass a lock handle
2860 * via a normal ldlm_lock_cancel() request. The request may even
2861 * unlock a byte range in the middle of an existing lock. In
2862 * order to process an unlock request we need all of the same
2863 * information that is given with a normal read or write record
2864 * lock request. To avoid creating another ldlm unlock (cancel)
2865 * message we'll treat a LCK_NL flock request as an unlock. */
2866 einfo.ei_mode = LCK_NL;
2869 einfo.ei_mode = LCK_PW;
2872 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2887 flags = LDLM_FL_BLOCK_NOWAIT;
2893 flags = LDLM_FL_TEST_LOCK;
2896 CERROR("unknown fcntl lock command: %d\n", cmd);
2900 /* Save the old mode so that if the mode in the lock changes we
2901 * can decrement the appropriate reader or writer refcount. */
2902 file_lock->fl_type = einfo.ei_mode;
2904 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2905 LUSTRE_OPC_ANY, NULL);
2906 if (IS_ERR(op_data))
2907 RETURN(PTR_ERR(op_data));
2909 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2910 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2911 flock.l_flock.pid, flags, einfo.ei_mode,
2912 flock.l_flock.start, flock.l_flock.end);
2914 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2917 /* Restore the file lock type if not TEST lock. */
2918 if (!(flags & LDLM_FL_TEST_LOCK))
2919 file_lock->fl_type = fl_type;
2921 #ifdef HAVE_LOCKS_LOCK_FILE_WAIT
2922 if ((rc == 0 || file_lock->fl_type == F_UNLCK) &&
2923 !(flags & LDLM_FL_TEST_LOCK))
2924 rc2 = locks_lock_file_wait(file, file_lock);
2926 if ((file_lock->fl_flags & FL_FLOCK) &&
2927 (rc == 0 || file_lock->fl_type == F_UNLCK))
2928 rc2 = flock_lock_file_wait(file, file_lock);
2929 if ((file_lock->fl_flags & FL_POSIX) &&
2930 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2931 !(flags & LDLM_FL_TEST_LOCK))
2932 rc2 = posix_lock_file_wait(file, file_lock);
2933 #endif /* HAVE_LOCKS_LOCK_FILE_WAIT */
2935 if (rc2 && file_lock->fl_type != F_UNLCK) {
2936 einfo.ei_mode = LCK_NL;
2937 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2942 ll_finish_md_op_data(op_data);
2947 int ll_get_fid_by_name(struct inode *parent, const char *name,
2948 int namelen, struct lu_fid *fid,
2949 struct inode **inode)
2951 struct md_op_data *op_data = NULL;
2952 struct mdt_body *body;
2953 struct ptlrpc_request *req;
2957 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2958 LUSTRE_OPC_ANY, NULL);
2959 if (IS_ERR(op_data))
2960 RETURN(PTR_ERR(op_data));
2962 op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
2963 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2964 ll_finish_md_op_data(op_data);
2968 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2970 GOTO(out_req, rc = -EFAULT);
2972 *fid = body->mbo_fid1;
2975 rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
2977 ptlrpc_req_finished(req);
2981 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2982 const char *name, int namelen)
2984 struct dentry *dchild = NULL;
2985 struct inode *child_inode = NULL;
2986 struct md_op_data *op_data;
2987 struct ptlrpc_request *request = NULL;
2988 struct obd_client_handle *och = NULL;
2990 struct mdt_body *body;
2992 __u64 data_version = 0;
2995 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2996 name, PFID(ll_inode2fid(parent)), mdtidx);
2998 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2999 0, LUSTRE_OPC_ANY, NULL);
3000 if (IS_ERR(op_data))
3001 RETURN(PTR_ERR(op_data));
3003 /* Get child FID first */
3004 qstr.hash = full_name_hash(name, namelen);
3007 dchild = d_lookup(file->f_path.dentry, &qstr);
3008 if (dchild != NULL) {
3009 if (dchild->d_inode != NULL)
3010 child_inode = igrab(dchild->d_inode);
3014 if (child_inode == NULL) {
3015 rc = ll_get_fid_by_name(parent, name, namelen,
3016 &op_data->op_fid3, &child_inode);
3021 if (child_inode == NULL)
3022 GOTO(out_free, rc = -EINVAL);
3024 mutex_lock(&child_inode->i_mutex);
3025 op_data->op_fid3 = *ll_inode2fid(child_inode);
3026 if (!fid_is_sane(&op_data->op_fid3)) {
3027 CERROR("%s: migrate %s, but FID "DFID" is insane\n",
3028 ll_get_fsname(parent->i_sb, NULL, 0), name,
3029 PFID(&op_data->op_fid3));
3030 GOTO(out_unlock, rc = -EINVAL);
3033 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
3035 GOTO(out_unlock, rc);
3038 CDEBUG(D_INFO, "%s: "DFID" is already on MDT%04x\n", name,
3039 PFID(&op_data->op_fid3), mdtidx);
3040 GOTO(out_unlock, rc = 0);
3043 if (S_ISREG(child_inode->i_mode)) {
3044 och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
3048 GOTO(out_unlock, rc);
3051 rc = ll_data_version(child_inode, &data_version,
3054 GOTO(out_close, rc);
3056 op_data->op_handle = och->och_fh;
3057 op_data->op_data = och->och_mod;
3058 op_data->op_data_version = data_version;
3059 op_data->op_lease_handle = och->och_lease_handle;
3060 op_data->op_bias |= MDS_RENAME_MIGRATE;
3063 op_data->op_mds = mdtidx;
3064 op_data->op_cli_flags = CLI_MIGRATE;
3065 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
3066 namelen, name, namelen, &request);
3068 ll_update_times(request, parent);
3070 if (request != NULL) {
3071 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
3073 ptlrpc_req_finished(request);
3074 GOTO(out_close, rc = -EPROTO);
3077 /* If the server does release layout lock, then we cleanup
3078 * the client och here, otherwise release it in out_close: */
3080 body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
3081 obd_mod_put(och->och_mod);
3082 md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp,
3084 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
3088 ptlrpc_req_finished(request);
3091 /* Try again if the file layout has changed. */
3092 if (rc == -EAGAIN && S_ISREG(child_inode->i_mode)) {
3097 if (och != NULL) /* close the file */
3098 ll_lease_close(och, child_inode, NULL);
3100 clear_nlink(child_inode);
3102 mutex_unlock(&child_inode->i_mutex);
3105 ll_finish_md_op_data(op_data);
3110 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3118 * test if some locks matching bits and l_req_mode are acquired
3119 * - bits can be in different locks
3120 * - if found clear the common lock bits in *bits
3121 * - the bits not found, are kept in *bits
3123 * \param bits [IN] searched lock bits [IN]
3124 * \param l_req_mode [IN] searched lock mode
3125 * \retval boolean, true iff all bits are found
3127 int ll_have_md_lock(struct inode *inode, __u64 *bits, enum ldlm_mode l_req_mode)
3129 struct lustre_handle lockh;
3130 union ldlm_policy_data policy;
3131 enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
3132 (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
3141 fid = &ll_i2info(inode)->lli_fid;
3142 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3143 ldlm_lockname[mode]);
3145 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3146 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3147 policy.l_inodebits.bits = *bits & (1 << i);
3148 if (policy.l_inodebits.bits == 0)
3151 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3152 &policy, mode, &lockh)) {
3153 struct ldlm_lock *lock;
3155 lock = ldlm_handle2lock(&lockh);
3158 ~(lock->l_policy_data.l_inodebits.bits);
3159 LDLM_LOCK_PUT(lock);
3161 *bits &= ~policy.l_inodebits.bits;
3168 enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
3169 struct lustre_handle *lockh, __u64 flags,
3170 enum ldlm_mode mode)
3172 union ldlm_policy_data policy = { .l_inodebits = { bits } };
3177 fid = &ll_i2info(inode)->lli_fid;
3178 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3180 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3181 fid, LDLM_IBITS, &policy, mode, lockh);
3186 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3188 /* Already unlinked. Just update nlink and return success */
3189 if (rc == -ENOENT) {
3191 /* If it is striped directory, and there is bad stripe
3192 * Let's revalidate the dentry again, instead of returning
3194 if (S_ISDIR(inode->i_mode) &&
3195 ll_i2info(inode)->lli_lsm_md != NULL)
3198 /* This path cannot be hit for regular files unless in
3199 * case of obscure races, so no need to to validate
3201 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3203 } else if (rc != 0) {
3204 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3205 "%s: revalidate FID "DFID" error: rc = %d\n",
3206 ll_get_fsname(inode->i_sb, NULL, 0),
3207 PFID(ll_inode2fid(inode)), rc);
3213 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3215 struct inode *inode = dentry->d_inode;
3216 struct ptlrpc_request *req = NULL;
3217 struct obd_export *exp;
3221 LASSERT(inode != NULL);
3223 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3224 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3226 exp = ll_i2mdexp(inode);
3228 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3229 * But under CMD case, it caused some lock issues, should be fixed
3230 * with new CMD ibits lock. See bug 12718 */
3231 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3232 struct lookup_intent oit = { .it_op = IT_GETATTR };
3233 struct md_op_data *op_data;
3235 if (ibits == MDS_INODELOCK_LOOKUP)
3236 oit.it_op = IT_LOOKUP;
3238 /* Call getattr by fid, so do not provide name at all. */
3239 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3240 dentry->d_inode, NULL, 0, 0,
3241 LUSTRE_OPC_ANY, NULL);
3242 if (IS_ERR(op_data))
3243 RETURN(PTR_ERR(op_data));
3245 rc = md_intent_lock(exp, op_data, &oit, &req,
3246 &ll_md_blocking_ast, 0);
3247 ll_finish_md_op_data(op_data);
3249 rc = ll_inode_revalidate_fini(inode, rc);
3253 rc = ll_revalidate_it_finish(req, &oit, dentry);
3255 ll_intent_release(&oit);
3259 /* Unlinked? Unhash dentry, so it is not picked up later by
3260 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3261 here to preserve get_cwd functionality on 2.6.
3263 if (!dentry->d_inode->i_nlink)
3264 d_lustre_invalidate(dentry, 0);
3266 ll_lookup_finish_locks(&oit, dentry);
3267 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3268 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3269 u64 valid = OBD_MD_FLGETATTR;
3270 struct md_op_data *op_data;
3273 if (S_ISREG(inode->i_mode)) {
3274 rc = ll_get_default_mdsize(sbi, &ealen);
3277 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3280 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3281 0, ealen, LUSTRE_OPC_ANY,
3283 if (IS_ERR(op_data))
3284 RETURN(PTR_ERR(op_data));
3286 op_data->op_valid = valid;
3287 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3288 ll_finish_md_op_data(op_data);
3290 rc = ll_inode_revalidate_fini(inode, rc);
3294 rc = ll_prep_inode(&inode, req, NULL, NULL);
3297 ptlrpc_req_finished(req);
3301 static int ll_merge_md_attr(struct inode *inode)
3303 struct cl_attr attr = { 0 };
3306 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3307 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3308 &attr, ll_md_blocking_ast);
3312 set_nlink(inode, attr.cat_nlink);
3313 inode->i_blocks = attr.cat_blocks;
3314 i_size_write(inode, attr.cat_size);
3316 ll_i2info(inode)->lli_atime = attr.cat_atime;
3317 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3318 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3324 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3326 struct inode *inode = dentry->d_inode;
3330 rc = __ll_inode_revalidate(dentry, ibits);
3334 /* if object isn't regular file, don't validate size */
3335 if (!S_ISREG(inode->i_mode)) {
3336 if (S_ISDIR(inode->i_mode) &&
3337 ll_i2info(inode)->lli_lsm_md != NULL) {
3338 rc = ll_merge_md_attr(inode);
3343 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3344 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3345 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3347 /* In case of restore, the MDT has the right size and has
3348 * already send it back without granting the layout lock,
3349 * inode is up-to-date so glimpse is useless.
3350 * Also to glimpse we need the layout, in case of a running
3351 * restore the MDT holds the layout lock so the glimpse will
3352 * block up to the end of restore (getattr will block)
3354 if (!ll_file_test_flag(ll_i2info(inode), LLIF_FILE_RESTORING))
3355 rc = ll_glimpse_size(inode);
3360 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3362 struct inode *inode = de->d_inode;
3363 struct ll_sb_info *sbi = ll_i2sbi(inode);
3364 struct ll_inode_info *lli = ll_i2info(inode);
3367 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3368 MDS_INODELOCK_LOOKUP);
3369 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3374 stat->dev = inode->i_sb->s_dev;
3375 if (ll_need_32bit_api(sbi))
3376 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3378 stat->ino = inode->i_ino;
3379 stat->mode = inode->i_mode;
3380 stat->uid = inode->i_uid;
3381 stat->gid = inode->i_gid;
3382 stat->rdev = inode->i_rdev;
3383 stat->atime = inode->i_atime;
3384 stat->mtime = inode->i_mtime;
3385 stat->ctime = inode->i_ctime;
3386 stat->blksize = 1 << inode->i_blkbits;
3388 stat->nlink = inode->i_nlink;
3389 stat->size = i_size_read(inode);
3390 stat->blocks = inode->i_blocks;
3395 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3396 __u64 start, __u64 len)
3400 struct fiemap *fiemap;
3401 unsigned int extent_count = fieinfo->fi_extents_max;
3403 num_bytes = sizeof(*fiemap) + (extent_count *
3404 sizeof(struct fiemap_extent));
3405 OBD_ALLOC_LARGE(fiemap, num_bytes);
3410 fiemap->fm_flags = fieinfo->fi_flags;
3411 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3412 fiemap->fm_start = start;
3413 fiemap->fm_length = len;
3414 if (extent_count > 0 &&
3415 copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3416 sizeof(struct fiemap_extent)) != 0)
3417 GOTO(out, rc = -EFAULT);
3419 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3421 fieinfo->fi_flags = fiemap->fm_flags;
3422 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3423 if (extent_count > 0 &&
3424 copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3425 fiemap->fm_mapped_extents *
3426 sizeof(struct fiemap_extent)) != 0)
3427 GOTO(out, rc = -EFAULT);
3429 OBD_FREE_LARGE(fiemap, num_bytes);
3433 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3435 struct ll_inode_info *lli = ll_i2info(inode);
3436 struct posix_acl *acl = NULL;
3439 spin_lock(&lli->lli_lock);
3440 /* VFS' acl_permission_check->check_acl will release the refcount */
3441 acl = posix_acl_dup(lli->lli_posix_acl);
3442 spin_unlock(&lli->lli_lock);
3447 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3449 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3450 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3452 ll_check_acl(struct inode *inode, int mask)
3455 # ifdef CONFIG_FS_POSIX_ACL
3456 struct posix_acl *acl;
3460 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3461 if (flags & IPERM_FLAG_RCU)
3464 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3469 rc = posix_acl_permission(inode, acl, mask);
3470 posix_acl_release(acl);
3473 # else /* !CONFIG_FS_POSIX_ACL */
3475 # endif /* CONFIG_FS_POSIX_ACL */
3477 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3479 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3480 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3482 # ifdef HAVE_INODE_PERMISION_2ARGS
3483 int ll_inode_permission(struct inode *inode, int mask)
3485 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3490 struct ll_sb_info *sbi;
3491 struct root_squash_info *squash;
3492 struct cred *cred = NULL;
3493 const struct cred *old_cred = NULL;
3495 bool squash_id = false;
3498 #ifdef MAY_NOT_BLOCK
3499 if (mask & MAY_NOT_BLOCK)
3501 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3502 if (flags & IPERM_FLAG_RCU)
3506 /* as root inode are NOT getting validated in lookup operation,
3507 * need to do it before permission check. */
3509 if (inode == inode->i_sb->s_root->d_inode) {
3510 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3511 MDS_INODELOCK_LOOKUP);
3516 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3517 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3519 /* squash fsuid/fsgid if needed */
3520 sbi = ll_i2sbi(inode);
3521 squash = &sbi->ll_squash;
3522 if (unlikely(squash->rsi_uid != 0 &&
3523 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3524 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3528 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3529 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3530 squash->rsi_uid, squash->rsi_gid);
3532 /* update current process's credentials
3533 * and FS capability */
3534 cred = prepare_creds();
3538 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3539 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3540 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3541 if ((1 << cap) & CFS_CAP_FS_MASK)
3542 cap_lower(cred->cap_effective, cap);
3544 old_cred = override_creds(cred);
3547 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3549 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3550 rc = lustre_check_remote_perm(inode, mask);
3552 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3554 /* restore current process's credentials and FS capability */
3556 revert_creds(old_cred);
3563 /* -o localflock - only provides locally consistent flock locks */
3564 struct file_operations ll_file_operations = {
3565 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3566 # ifdef HAVE_SYNC_READ_WRITE
3567 .read = new_sync_read,
3568 .write = new_sync_write,
3570 .read_iter = ll_file_read_iter,
3571 .write_iter = ll_file_write_iter,
3572 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3573 .read = ll_file_read,
3574 .aio_read = ll_file_aio_read,
3575 .write = ll_file_write,
3576 .aio_write = ll_file_aio_write,
3577 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3578 .unlocked_ioctl = ll_file_ioctl,
3579 .open = ll_file_open,
3580 .release = ll_file_release,
3581 .mmap = ll_file_mmap,
3582 .llseek = ll_file_seek,
3583 .splice_read = ll_file_splice_read,
3588 struct file_operations ll_file_operations_flock = {
3589 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3590 # ifdef HAVE_SYNC_READ_WRITE
3591 .read = new_sync_read,
3592 .write = new_sync_write,
3593 # endif /* HAVE_SYNC_READ_WRITE */
3594 .read_iter = ll_file_read_iter,
3595 .write_iter = ll_file_write_iter,
3596 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3597 .read = ll_file_read,
3598 .aio_read = ll_file_aio_read,
3599 .write = ll_file_write,
3600 .aio_write = ll_file_aio_write,
3601 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3602 .unlocked_ioctl = ll_file_ioctl,
3603 .open = ll_file_open,
3604 .release = ll_file_release,
3605 .mmap = ll_file_mmap,
3606 .llseek = ll_file_seek,
3607 .splice_read = ll_file_splice_read,
3610 .flock = ll_file_flock,
3611 .lock = ll_file_flock
3614 /* These are for -o noflock - to return ENOSYS on flock calls */
3615 struct file_operations ll_file_operations_noflock = {
3616 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3617 # ifdef HAVE_SYNC_READ_WRITE
3618 .read = new_sync_read,
3619 .write = new_sync_write,
3620 # endif /* HAVE_SYNC_READ_WRITE */
3621 .read_iter = ll_file_read_iter,
3622 .write_iter = ll_file_write_iter,
3623 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3624 .read = ll_file_read,
3625 .aio_read = ll_file_aio_read,
3626 .write = ll_file_write,
3627 .aio_write = ll_file_aio_write,
3628 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3629 .unlocked_ioctl = ll_file_ioctl,
3630 .open = ll_file_open,
3631 .release = ll_file_release,
3632 .mmap = ll_file_mmap,
3633 .llseek = ll_file_seek,
3634 .splice_read = ll_file_splice_read,
3637 .flock = ll_file_noflock,
3638 .lock = ll_file_noflock
3641 struct inode_operations ll_file_inode_operations = {
3642 .setattr = ll_setattr,
3643 .getattr = ll_getattr,
3644 .permission = ll_inode_permission,
3645 .setxattr = ll_setxattr,
3646 .getxattr = ll_getxattr,
3647 .listxattr = ll_listxattr,
3648 .removexattr = ll_removexattr,
3649 .fiemap = ll_fiemap,
3650 #ifdef HAVE_IOP_GET_ACL
3651 .get_acl = ll_get_acl,
3655 /* dynamic ioctl number support routins */
3656 static struct llioc_ctl_data {
3657 struct rw_semaphore ioc_sem;
3658 struct list_head ioc_head;
3660 __RWSEM_INITIALIZER(llioc.ioc_sem),
3661 LIST_HEAD_INIT(llioc.ioc_head)
3666 struct list_head iocd_list;
3667 unsigned int iocd_size;
3668 llioc_callback_t iocd_cb;
3669 unsigned int iocd_count;
3670 unsigned int iocd_cmd[0];
3673 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3676 struct llioc_data *in_data = NULL;
3679 if (cb == NULL || cmd == NULL ||
3680 count > LLIOC_MAX_CMD || count < 0)
3683 size = sizeof(*in_data) + count * sizeof(unsigned int);
3684 OBD_ALLOC(in_data, size);
3685 if (in_data == NULL)
3688 memset(in_data, 0, sizeof(*in_data));
3689 in_data->iocd_size = size;
3690 in_data->iocd_cb = cb;
3691 in_data->iocd_count = count;
3692 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3694 down_write(&llioc.ioc_sem);
3695 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3696 up_write(&llioc.ioc_sem);
3701 void ll_iocontrol_unregister(void *magic)
3703 struct llioc_data *tmp;
3708 down_write(&llioc.ioc_sem);
3709 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3711 unsigned int size = tmp->iocd_size;
3713 list_del(&tmp->iocd_list);
3714 up_write(&llioc.ioc_sem);
3716 OBD_FREE(tmp, size);
3720 up_write(&llioc.ioc_sem);
3722 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3725 EXPORT_SYMBOL(ll_iocontrol_register);
3726 EXPORT_SYMBOL(ll_iocontrol_unregister);
3728 static enum llioc_iter
3729 ll_iocontrol_call(struct inode *inode, struct file *file,
3730 unsigned int cmd, unsigned long arg, int *rcp)
3732 enum llioc_iter ret = LLIOC_CONT;
3733 struct llioc_data *data;
3734 int rc = -EINVAL, i;
3736 down_read(&llioc.ioc_sem);
3737 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3738 for (i = 0; i < data->iocd_count; i++) {
3739 if (cmd != data->iocd_cmd[i])
3742 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3746 if (ret == LLIOC_STOP)
3749 up_read(&llioc.ioc_sem);
3756 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3758 struct ll_inode_info *lli = ll_i2info(inode);
3759 struct cl_object *obj = lli->lli_clob;
3760 struct cl_env_nest nest;
3768 env = cl_env_nested_get(&nest);
3770 RETURN(PTR_ERR(env));
3772 rc = cl_conf_set(env, lli->lli_clob, conf);
3776 if (conf->coc_opc == OBJECT_CONF_SET) {
3777 struct ldlm_lock *lock = conf->coc_lock;
3778 struct cl_layout cl = {
3782 LASSERT(lock != NULL);
3783 LASSERT(ldlm_has_layout(lock));
3785 /* it can only be allowed to match after layout is
3786 * applied to inode otherwise false layout would be
3787 * seen. Applying layout shoud happen before dropping
3788 * the intent lock. */
3789 ldlm_lock_allow_match(lock);
3791 rc = cl_object_layout_get(env, obj, &cl);
3796 DFID": layout version change: %u -> %u\n",
3797 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3799 ll_layout_version_set(lli, cl.cl_layout_gen);
3803 cl_env_nested_put(&nest, env);
3808 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3809 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3812 struct ll_sb_info *sbi = ll_i2sbi(inode);
3813 struct ptlrpc_request *req;
3814 struct mdt_body *body;
3821 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3822 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3823 lock->l_lvb_data, lock->l_lvb_len);
3825 if (lock->l_lvb_data != NULL)
3828 /* if layout lock was granted right away, the layout is returned
3829 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3830 * blocked and then granted via completion ast, we have to fetch
3831 * layout here. Please note that we can't use the LVB buffer in
3832 * completion AST because it doesn't have a large enough buffer */
3833 rc = ll_get_default_mdsize(sbi, &lmmsize);
3835 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
3836 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3841 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3843 GOTO(out, rc = -EPROTO);
3845 lmmsize = body->mbo_eadatasize;
3846 if (lmmsize == 0) /* empty layout */
3849 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3851 GOTO(out, rc = -EFAULT);
3853 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3854 if (lvbdata == NULL)
3855 GOTO(out, rc = -ENOMEM);
3857 memcpy(lvbdata, lmm, lmmsize);
3858 lock_res_and_lock(lock);
3859 if (unlikely(lock->l_lvb_data == NULL)) {
3860 lock->l_lvb_type = LVB_T_LAYOUT;
3861 lock->l_lvb_data = lvbdata;
3862 lock->l_lvb_len = lmmsize;
3865 unlock_res_and_lock(lock);
3868 OBD_FREE_LARGE(lvbdata, lmmsize);
3873 ptlrpc_req_finished(req);
3878 * Apply the layout to the inode. Layout lock is held and will be released
3881 static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
3882 struct inode *inode)
3884 struct ll_inode_info *lli = ll_i2info(inode);
3885 struct ll_sb_info *sbi = ll_i2sbi(inode);
3886 struct ldlm_lock *lock;
3887 struct cl_object_conf conf;
3890 bool wait_layout = false;
3893 LASSERT(lustre_handle_is_used(lockh));
3895 lock = ldlm_handle2lock(lockh);
3896 LASSERT(lock != NULL);
3897 LASSERT(ldlm_has_layout(lock));
3899 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured",
3900 PFID(&lli->lli_fid), inode);
3902 /* in case this is a caching lock and reinstate with new inode */
3903 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3905 lock_res_and_lock(lock);
3906 lvb_ready = ldlm_is_lvb_ready(lock);
3907 unlock_res_and_lock(lock);
3908 /* checking lvb_ready is racy but this is okay. The worst case is
3909 * that multi processes may configure the file on the same time. */
3914 rc = ll_layout_fetch(inode, lock);
3918 /* for layout lock, lmm is stored in lock's lvb.
3919 * lvb_data is immutable if the lock is held so it's safe to access it
3922 * set layout to file. Unlikely this will fail as old layout was
3923 * surely eliminated */
3924 memset(&conf, 0, sizeof conf);
3925 conf.coc_opc = OBJECT_CONF_SET;
3926 conf.coc_inode = inode;
3927 conf.coc_lock = lock;
3928 conf.u.coc_layout.lb_buf = lock->l_lvb_data;
3929 conf.u.coc_layout.lb_len = lock->l_lvb_len;
3930 rc = ll_layout_conf(inode, &conf);
3932 /* refresh layout failed, need to wait */
3933 wait_layout = rc == -EBUSY;
3937 LDLM_LOCK_PUT(lock);
3938 ldlm_lock_decref(lockh, mode);
3940 /* wait for IO to complete if it's still being used. */
3942 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3943 ll_get_fsname(inode->i_sb, NULL, 0),
3944 PFID(&lli->lli_fid), inode);
3946 memset(&conf, 0, sizeof conf);
3947 conf.coc_opc = OBJECT_CONF_WAIT;
3948 conf.coc_inode = inode;
3949 rc = ll_layout_conf(inode, &conf);
3953 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3954 ll_get_fsname(inode->i_sb, NULL, 0),
3955 PFID(&lli->lli_fid), rc);
3960 static int ll_layout_refresh_locked(struct inode *inode)
3962 struct ll_inode_info *lli = ll_i2info(inode);
3963 struct ll_sb_info *sbi = ll_i2sbi(inode);
3964 struct md_op_data *op_data;
3965 struct lookup_intent it;
3966 struct lustre_handle lockh;
3967 enum ldlm_mode mode;
3968 struct ldlm_enqueue_info einfo = {
3969 .ei_type = LDLM_IBITS,
3971 .ei_cb_bl = &ll_md_blocking_ast,
3972 .ei_cb_cp = &ldlm_completion_ast,
3978 /* mostly layout lock is caching on the local side, so try to match
3979 * it before grabbing layout lock mutex. */
3980 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3981 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3982 if (mode != 0) { /* hit cached lock */
3983 rc = ll_layout_lock_set(&lockh, mode, inode);
3990 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3991 0, 0, LUSTRE_OPC_ANY, NULL);
3992 if (IS_ERR(op_data))
3993 RETURN(PTR_ERR(op_data));
3995 /* have to enqueue one */
3996 memset(&it, 0, sizeof(it));
3997 it.it_op = IT_LAYOUT;
3998 lockh.cookie = 0ULL;
4000 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
4001 ll_get_fsname(inode->i_sb, NULL, 0),
4002 PFID(&lli->lli_fid), inode);
4004 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
4005 if (it.d.lustre.it_data != NULL)
4006 ptlrpc_req_finished(it.d.lustre.it_data);
4007 it.d.lustre.it_data = NULL;
4009 ll_finish_md_op_data(op_data);
4011 mode = it.d.lustre.it_lock_mode;
4012 it.d.lustre.it_lock_mode = 0;
4013 ll_intent_drop_lock(&it);
4016 /* set lock data in case this is a new lock */
4017 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
4018 rc = ll_layout_lock_set(&lockh, mode, inode);
4027 * This function checks if there exists a LAYOUT lock on the client side,
4028 * or enqueues it if it doesn't have one in cache.
4030 * This function will not hold layout lock so it may be revoked any time after
4031 * this function returns. Any operations depend on layout should be redone
4034 * This function should be called before lov_io_init() to get an uptodate
4035 * layout version, the caller should save the version number and after IO
4036 * is finished, this function should be called again to verify that layout
4037 * is not changed during IO time.
4039 int ll_layout_refresh(struct inode *inode, __u32 *gen)
4041 struct ll_inode_info *lli = ll_i2info(inode);
4042 struct ll_sb_info *sbi = ll_i2sbi(inode);
4046 *gen = ll_layout_version_get(lli);
4047 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
4051 LASSERT(fid_is_sane(ll_inode2fid(inode)));
4052 LASSERT(S_ISREG(inode->i_mode));
4054 /* take layout lock mutex to enqueue layout lock exclusively. */
4055 mutex_lock(&lli->lli_layout_mutex);
4057 rc = ll_layout_refresh_locked(inode);
4061 *gen = ll_layout_version_get(lli);
4063 mutex_unlock(&lli->lli_layout_mutex);
4069 * This function send a restore request to the MDT
4071 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
4073 struct hsm_user_request *hur;
4077 len = sizeof(struct hsm_user_request) +
4078 sizeof(struct hsm_user_item);
4079 OBD_ALLOC(hur, len);
4083 hur->hur_request.hr_action = HUA_RESTORE;
4084 hur->hur_request.hr_archive_id = 0;
4085 hur->hur_request.hr_flags = 0;
4086 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
4087 sizeof(hur->hur_user_item[0].hui_fid));
4088 hur->hur_user_item[0].hui_extent.offset = offset;
4089 hur->hur_user_item[0].hui_extent.length = length;
4090 hur->hur_request.hr_itemcount = 1;
4091 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,