4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include <linux/user_namespace.h>
49 #ifdef HAVE_UIDGID_HEADER
50 # include <linux/uidgid.h>
52 #include <lustre/ll_fiemap.h>
53 #include <lustre_ioctl.h>
55 #include "cl_object.h"
57 #include "llite_internal.h"
58 #include "vvp_internal.h"
61 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
63 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
66 static enum llioc_iter
67 ll_iocontrol_call(struct inode *inode, struct file *file,
68 unsigned int cmd, unsigned long arg, int *rcp);
70 static struct ll_file_data *ll_file_data_get(void)
72 struct ll_file_data *fd;
74 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
78 fd->fd_write_failed = false;
83 static void ll_file_data_put(struct ll_file_data *fd)
86 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
90 * Packs all the attributes into @op_data for the CLOSE rpc.
92 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
93 struct obd_client_handle *och)
97 ll_prep_md_op_data(op_data, inode, NULL, NULL,
98 0, 0, LUSTRE_OPC_ANY, NULL);
100 op_data->op_attr.ia_mode = inode->i_mode;
101 op_data->op_attr.ia_atime = inode->i_atime;
102 op_data->op_attr.ia_mtime = inode->i_mtime;
103 op_data->op_attr.ia_ctime = inode->i_ctime;
104 op_data->op_attr.ia_size = i_size_read(inode);
105 op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
106 ATTR_MTIME | ATTR_MTIME_SET |
107 ATTR_CTIME | ATTR_CTIME_SET;
108 op_data->op_attr_blocks = inode->i_blocks;
109 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
110 op_data->op_handle = och->och_fh;
112 if (och->och_flags & FMODE_WRITE &&
113 ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED))
114 /* For HSM: if inode data has been modified, pack it so that
115 * MDT can set data dirty flag in the archive. */
116 op_data->op_bias |= MDS_DATA_MODIFIED;
122 * Perform a close, possibly with a bias.
123 * The meaning of "data" depends on the value of "bias".
125 * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
126 * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
129 static int ll_close_inode_openhandle(struct obd_export *md_exp,
130 struct obd_client_handle *och,
132 enum mds_op_bias bias,
135 struct obd_export *exp = ll_i2mdexp(inode);
136 struct md_op_data *op_data;
137 struct ptlrpc_request *req = NULL;
138 struct obd_device *obd = class_exp2obd(exp);
144 * XXX: in case of LMV, is this correct to access
147 CERROR("Invalid MDC connection handle "LPX64"\n",
148 ll_i2mdexp(inode)->exp_handle.h_cookie);
152 OBD_ALLOC_PTR(op_data);
154 /* XXX We leak openhandle and request here. */
155 GOTO(out, rc = -ENOMEM);
157 ll_prepare_close(inode, op_data, och);
159 case MDS_CLOSE_LAYOUT_SWAP:
160 LASSERT(data != NULL);
161 op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
162 op_data->op_data_version = 0;
163 op_data->op_lease_handle = och->och_lease_handle;
164 op_data->op_fid2 = *ll_inode2fid(data);
167 case MDS_HSM_RELEASE:
168 LASSERT(data != NULL);
169 op_data->op_bias |= MDS_HSM_RELEASE;
170 op_data->op_data_version = *(__u64 *)data;
171 op_data->op_lease_handle = och->och_lease_handle;
172 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
176 LASSERT(data == NULL);
180 rc = md_close(md_exp, op_data, och->och_mod, &req);
182 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
183 ll_i2mdexp(inode)->exp_obd->obd_name,
184 PFID(ll_inode2fid(inode)), rc);
188 op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
189 struct mdt_body *body;
191 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
192 if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
196 ll_finish_md_op_data(op_data);
200 md_clear_open_replay_data(md_exp, och);
201 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
204 if (req) /* This is close request */
205 ptlrpc_req_finished(req);
209 int ll_md_real_close(struct inode *inode, fmode_t fmode)
211 struct ll_inode_info *lli = ll_i2info(inode);
212 struct obd_client_handle **och_p;
213 struct obd_client_handle *och;
218 if (fmode & FMODE_WRITE) {
219 och_p = &lli->lli_mds_write_och;
220 och_usecount = &lli->lli_open_fd_write_count;
221 } else if (fmode & FMODE_EXEC) {
222 och_p = &lli->lli_mds_exec_och;
223 och_usecount = &lli->lli_open_fd_exec_count;
225 LASSERT(fmode & FMODE_READ);
226 och_p = &lli->lli_mds_read_och;
227 och_usecount = &lli->lli_open_fd_read_count;
230 mutex_lock(&lli->lli_och_mutex);
231 if (*och_usecount > 0) {
232 /* There are still users of this handle, so skip
234 mutex_unlock(&lli->lli_och_mutex);
240 mutex_unlock(&lli->lli_och_mutex);
243 /* There might be a race and this handle may already
245 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
246 och, inode, 0, NULL);
252 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
255 union ldlm_policy_data policy = {
256 .l_inodebits = { MDS_INODELOCK_OPEN },
258 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
259 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
260 struct ll_inode_info *lli = ll_i2info(inode);
261 struct lustre_handle lockh;
262 enum ldlm_mode lockmode;
266 /* clear group lock, if present */
267 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
268 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
270 if (fd->fd_lease_och != NULL) {
273 /* Usually the lease is not released when the
274 * application crashed, we need to release here. */
275 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
276 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
277 PFID(&lli->lli_fid), rc, lease_broken);
279 fd->fd_lease_och = NULL;
282 if (fd->fd_och != NULL) {
283 rc = ll_close_inode_openhandle(md_exp, fd->fd_och, inode, 0,
289 /* Let's see if we have good enough OPEN lock on the file and if
290 we can skip talking to MDS */
291 mutex_lock(&lli->lli_och_mutex);
292 if (fd->fd_omode & FMODE_WRITE) {
294 LASSERT(lli->lli_open_fd_write_count);
295 lli->lli_open_fd_write_count--;
296 } else if (fd->fd_omode & FMODE_EXEC) {
298 LASSERT(lli->lli_open_fd_exec_count);
299 lli->lli_open_fd_exec_count--;
302 LASSERT(lli->lli_open_fd_read_count);
303 lli->lli_open_fd_read_count--;
305 mutex_unlock(&lli->lli_och_mutex);
307 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
308 LDLM_IBITS, &policy, lockmode, &lockh))
309 rc = ll_md_real_close(inode, fd->fd_omode);
312 LUSTRE_FPRIVATE(file) = NULL;
313 ll_file_data_put(fd);
318 /* While this returns an error code, fput() the caller does not, so we need
319 * to make every effort to clean up all of our state here. Also, applications
320 * rarely check close errors and even if an error is returned they will not
321 * re-try the close call.
323 int ll_file_release(struct inode *inode, struct file *file)
325 struct ll_file_data *fd;
326 struct ll_sb_info *sbi = ll_i2sbi(inode);
327 struct ll_inode_info *lli = ll_i2info(inode);
331 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
332 PFID(ll_inode2fid(inode)), inode);
334 #ifdef CONFIG_FS_POSIX_ACL
335 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
336 inode == inode->i_sb->s_root->d_inode) {
337 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
340 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
341 fd->fd_flags &= ~LL_FILE_RMTACL;
342 rct_del(&sbi->ll_rct, current_pid());
343 et_search_free(&sbi->ll_et, current_pid());
348 if (inode->i_sb->s_root != file->f_path.dentry)
349 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
350 fd = LUSTRE_FPRIVATE(file);
353 /* The last ref on @file, maybe not the the owner pid of statahead,
354 * because parent and child process can share the same file handle. */
355 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
356 ll_deauthorize_statahead(inode, fd);
358 if (inode->i_sb->s_root == file->f_path.dentry) {
359 LUSTRE_FPRIVATE(file) = NULL;
360 ll_file_data_put(fd);
364 if (!S_ISDIR(inode->i_mode)) {
365 if (lli->lli_clob != NULL)
366 lov_read_and_clear_async_rc(lli->lli_clob);
367 lli->lli_async_rc = 0;
370 rc = ll_md_close(sbi->ll_md_exp, inode, file);
372 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
373 libcfs_debug_dumplog();
378 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
379 struct lookup_intent *itp)
381 struct dentry *de = file->f_path.dentry;
382 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
383 struct dentry *parent = de->d_parent;
384 const char *name = NULL;
386 struct md_op_data *op_data;
387 struct ptlrpc_request *req = NULL;
391 LASSERT(parent != NULL);
392 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
394 /* if server supports open-by-fid, or file name is invalid, don't pack
395 * name in open request */
396 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
397 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
398 name = de->d_name.name;
399 len = de->d_name.len;
402 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
403 name, len, 0, LUSTRE_OPC_ANY, NULL);
405 RETURN(PTR_ERR(op_data));
406 op_data->op_data = lmm;
407 op_data->op_data_size = lmmsize;
409 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
410 &ll_md_blocking_ast, 0);
411 ll_finish_md_op_data(op_data);
413 /* reason for keep own exit path - don`t flood log
414 * with messages with -ESTALE errors.
416 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
417 it_open_error(DISP_OPEN_OPEN, itp))
419 ll_release_openhandle(de, itp);
423 if (it_disposition(itp, DISP_LOOKUP_NEG))
424 GOTO(out, rc = -ENOENT);
426 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
427 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
428 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
432 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
433 if (!rc && itp->d.lustre.it_lock_mode)
434 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
437 ptlrpc_req_finished(req);
438 ll_intent_drop_lock(itp);
443 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
444 struct obd_client_handle *och)
446 struct ptlrpc_request *req = it->d.lustre.it_data;
447 struct mdt_body *body;
449 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
450 och->och_fh = body->mbo_handle;
451 och->och_fid = body->mbo_fid1;
452 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
453 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
454 och->och_flags = it->it_flags;
456 return md_set_open_replay_data(md_exp, och, it);
459 static int ll_local_open(struct file *file, struct lookup_intent *it,
460 struct ll_file_data *fd, struct obd_client_handle *och)
462 struct inode *inode = file->f_path.dentry->d_inode;
465 LASSERT(!LUSTRE_FPRIVATE(file));
472 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
477 LUSTRE_FPRIVATE(file) = fd;
478 ll_readahead_init(inode, &fd->fd_ras);
479 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
481 /* ll_cl_context initialize */
482 rwlock_init(&fd->fd_lock);
483 INIT_LIST_HEAD(&fd->fd_lccs);
488 /* Open a file, and (for the very first open) create objects on the OSTs at
489 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
490 * creation or open until ll_lov_setstripe() ioctl is called.
492 * If we already have the stripe MD locally then we don't request it in
493 * md_open(), by passing a lmm_size = 0.
495 * It is up to the application to ensure no other processes open this file
496 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
497 * used. We might be able to avoid races of that sort by getting lli_open_sem
498 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
499 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
501 int ll_file_open(struct inode *inode, struct file *file)
503 struct ll_inode_info *lli = ll_i2info(inode);
504 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
505 .it_flags = file->f_flags };
506 struct obd_client_handle **och_p = NULL;
507 __u64 *och_usecount = NULL;
508 struct ll_file_data *fd;
512 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
513 PFID(ll_inode2fid(inode)), inode, file->f_flags);
515 it = file->private_data; /* XXX: compat macro */
516 file->private_data = NULL; /* prevent ll_local_open assertion */
518 fd = ll_file_data_get();
520 GOTO(out_openerr, rc = -ENOMEM);
523 if (S_ISDIR(inode->i_mode))
524 ll_authorize_statahead(inode, fd);
526 if (inode->i_sb->s_root == file->f_path.dentry) {
527 LUSTRE_FPRIVATE(file) = fd;
531 if (!it || !it->d.lustre.it_disposition) {
532 /* Convert f_flags into access mode. We cannot use file->f_mode,
533 * because everything but O_ACCMODE mask was stripped from
535 if ((oit.it_flags + 1) & O_ACCMODE)
537 if (file->f_flags & O_TRUNC)
538 oit.it_flags |= FMODE_WRITE;
540 /* kernel only call f_op->open in dentry_open. filp_open calls
541 * dentry_open after call to open_namei that checks permissions.
542 * Only nfsd_open call dentry_open directly without checking
543 * permissions and because of that this code below is safe. */
544 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
545 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
547 /* We do not want O_EXCL here, presumably we opened the file
548 * already? XXX - NFS implications? */
549 oit.it_flags &= ~O_EXCL;
551 /* bug20584, if "it_flags" contains O_CREAT, the file will be
552 * created if necessary, then "IT_CREAT" should be set to keep
553 * consistent with it */
554 if (oit.it_flags & O_CREAT)
555 oit.it_op |= IT_CREAT;
561 /* Let's see if we have file open on MDS already. */
562 if (it->it_flags & FMODE_WRITE) {
563 och_p = &lli->lli_mds_write_och;
564 och_usecount = &lli->lli_open_fd_write_count;
565 } else if (it->it_flags & FMODE_EXEC) {
566 och_p = &lli->lli_mds_exec_och;
567 och_usecount = &lli->lli_open_fd_exec_count;
569 och_p = &lli->lli_mds_read_och;
570 och_usecount = &lli->lli_open_fd_read_count;
573 mutex_lock(&lli->lli_och_mutex);
574 if (*och_p) { /* Open handle is present */
575 if (it_disposition(it, DISP_OPEN_OPEN)) {
576 /* Well, there's extra open request that we do not need,
577 let's close it somehow. This will decref request. */
578 rc = it_open_error(DISP_OPEN_OPEN, it);
580 mutex_unlock(&lli->lli_och_mutex);
581 GOTO(out_openerr, rc);
584 ll_release_openhandle(file->f_path.dentry, it);
588 rc = ll_local_open(file, it, fd, NULL);
591 mutex_unlock(&lli->lli_och_mutex);
592 GOTO(out_openerr, rc);
595 LASSERT(*och_usecount == 0);
596 if (!it->d.lustre.it_disposition) {
597 /* We cannot just request lock handle now, new ELC code
598 means that one of other OPEN locks for this file
599 could be cancelled, and since blocking ast handler
600 would attempt to grab och_mutex as well, that would
601 result in a deadlock */
602 mutex_unlock(&lli->lli_och_mutex);
604 * Normally called under two situations:
606 * 2. A race/condition on MDS resulting in no open
607 * handle to be returned from LOOKUP|OPEN request,
608 * for example if the target entry was a symlink.
610 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
612 * Always specify MDS_OPEN_BY_FID because we don't want
613 * to get file with different fid.
615 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
616 rc = ll_intent_file_open(file, NULL, 0, it);
618 GOTO(out_openerr, rc);
622 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
624 GOTO(out_och_free, rc = -ENOMEM);
628 /* md_intent_lock() didn't get a request ref if there was an
629 * open error, so don't do cleanup on the request here
631 /* XXX (green): Should not we bail out on any error here, not
632 * just open error? */
633 rc = it_open_error(DISP_OPEN_OPEN, it);
635 GOTO(out_och_free, rc);
637 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
638 "inode %p: disposition %x, status %d\n", inode,
639 it_disposition(it, ~0), it->d.lustre.it_status);
641 rc = ll_local_open(file, it, fd, *och_p);
643 GOTO(out_och_free, rc);
645 mutex_unlock(&lli->lli_och_mutex);
648 /* Must do this outside lli_och_mutex lock to prevent deadlock where
649 different kind of OPEN lock for this same inode gets cancelled
650 by ldlm_cancel_lru */
651 if (!S_ISREG(inode->i_mode))
652 GOTO(out_och_free, rc);
654 cl_lov_delay_create_clear(&file->f_flags);
655 GOTO(out_och_free, rc);
659 if (och_p && *och_p) {
660 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
661 *och_p = NULL; /* OBD_FREE writes some magic there */
664 mutex_unlock(&lli->lli_och_mutex);
667 if (lli->lli_opendir_key == fd)
668 ll_deauthorize_statahead(inode, fd);
670 ll_file_data_put(fd);
672 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
675 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
676 ptlrpc_req_finished(it->d.lustre.it_data);
677 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
683 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
684 struct ldlm_lock_desc *desc, void *data, int flag)
687 struct lustre_handle lockh;
691 case LDLM_CB_BLOCKING:
692 ldlm_lock2handle(lock, &lockh);
693 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
695 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
699 case LDLM_CB_CANCELING:
707 * Acquire a lease and open the file.
709 static struct obd_client_handle *
710 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
713 struct lookup_intent it = { .it_op = IT_OPEN };
714 struct ll_sb_info *sbi = ll_i2sbi(inode);
715 struct md_op_data *op_data;
716 struct ptlrpc_request *req = NULL;
717 struct lustre_handle old_handle = { 0 };
718 struct obd_client_handle *och = NULL;
723 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
724 RETURN(ERR_PTR(-EINVAL));
727 struct ll_inode_info *lli = ll_i2info(inode);
728 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
729 struct obd_client_handle **och_p;
732 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
733 RETURN(ERR_PTR(-EPERM));
735 /* Get the openhandle of the file */
737 mutex_lock(&lli->lli_och_mutex);
738 if (fd->fd_lease_och != NULL) {
739 mutex_unlock(&lli->lli_och_mutex);
743 if (fd->fd_och == NULL) {
744 if (file->f_mode & FMODE_WRITE) {
745 LASSERT(lli->lli_mds_write_och != NULL);
746 och_p = &lli->lli_mds_write_och;
747 och_usecount = &lli->lli_open_fd_write_count;
749 LASSERT(lli->lli_mds_read_och != NULL);
750 och_p = &lli->lli_mds_read_och;
751 och_usecount = &lli->lli_open_fd_read_count;
753 if (*och_usecount == 1) {
760 mutex_unlock(&lli->lli_och_mutex);
761 if (rc < 0) /* more than 1 opener */
764 LASSERT(fd->fd_och != NULL);
765 old_handle = fd->fd_och->och_fh;
770 RETURN(ERR_PTR(-ENOMEM));
772 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
773 LUSTRE_OPC_ANY, NULL);
775 GOTO(out, rc = PTR_ERR(op_data));
777 /* To tell the MDT this openhandle is from the same owner */
778 op_data->op_handle = old_handle;
780 it.it_flags = fmode | open_flags;
781 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
782 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
783 &ll_md_blocking_lease_ast,
784 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
785 * it can be cancelled which may mislead applications that the lease is
787 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
788 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
789 * doesn't deal with openhandle, so normal openhandle will be leaked. */
790 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
791 ll_finish_md_op_data(op_data);
792 ptlrpc_req_finished(req);
794 GOTO(out_release_it, rc);
796 if (it_disposition(&it, DISP_LOOKUP_NEG))
797 GOTO(out_release_it, rc = -ENOENT);
799 rc = it_open_error(DISP_OPEN_OPEN, &it);
801 GOTO(out_release_it, rc);
803 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
804 ll_och_fill(sbi->ll_md_exp, &it, och);
806 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
807 GOTO(out_close, rc = -EOPNOTSUPP);
809 /* already get lease, handle lease lock */
810 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
811 if (it.d.lustre.it_lock_mode == 0 ||
812 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
813 /* open lock must return for lease */
814 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
815 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
816 it.d.lustre.it_lock_bits);
817 GOTO(out_close, rc = -EPROTO);
820 ll_intent_release(&it);
824 /* Cancel open lock */
825 if (it.d.lustre.it_lock_mode != 0) {
826 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
827 it.d.lustre.it_lock_mode);
828 it.d.lustre.it_lock_mode = 0;
829 och->och_lease_handle.cookie = 0ULL;
831 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, och, inode, 0, NULL);
833 CERROR("%s: error closing file "DFID": %d\n",
834 ll_get_fsname(inode->i_sb, NULL, 0),
835 PFID(&ll_i2info(inode)->lli_fid), rc2);
836 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
838 ll_intent_release(&it);
846 * Check whether a layout swap can be done between two inodes.
848 * \param[in] inode1 First inode to check
849 * \param[in] inode2 Second inode to check
851 * \retval 0 on success, layout swap can be performed between both inodes
852 * \retval negative error code if requirements are not met
854 static int ll_check_swap_layouts_validity(struct inode *inode1,
855 struct inode *inode2)
857 if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
860 if (inode_permission(inode1, MAY_WRITE) ||
861 inode_permission(inode2, MAY_WRITE))
864 if (inode1->i_sb != inode2->i_sb)
870 static int ll_swap_layouts_close(struct obd_client_handle *och,
871 struct inode *inode, struct inode *inode2)
873 const struct lu_fid *fid1 = ll_inode2fid(inode);
874 const struct lu_fid *fid2;
878 CDEBUG(D_INODE, "%s: biased close of file "DFID"\n",
879 ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
881 rc = ll_check_swap_layouts_validity(inode, inode2);
883 GOTO(out_free_och, rc);
885 /* We now know that inode2 is a lustre inode */
886 fid2 = ll_inode2fid(inode2);
888 rc = lu_fid_cmp(fid1, fid2);
890 GOTO(out_free_och, rc = -EINVAL);
892 /* Close the file and swap layouts between inode & inode2.
893 * NB: lease lock handle is released in mdc_close_layout_swap_pack()
894 * because we still need it to pack l_remote_handle to MDT. */
895 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
896 MDS_CLOSE_LAYOUT_SWAP, inode2);
898 och = NULL; /* freed in ll_close_inode_openhandle() */
908 * Release lease and close the file.
909 * It will check if the lease has ever broken.
911 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
914 struct ldlm_lock *lock;
915 bool cancelled = true;
919 lock = ldlm_handle2lock(&och->och_lease_handle);
921 lock_res_and_lock(lock);
922 cancelled = ldlm_is_cancel(lock);
923 unlock_res_and_lock(lock);
927 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
928 PFID(&ll_i2info(inode)->lli_fid), cancelled);
931 ldlm_cli_cancel(&och->och_lease_handle, 0);
932 if (lease_broken != NULL)
933 *lease_broken = cancelled;
935 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
941 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
943 struct ll_inode_info *lli = ll_i2info(inode);
944 struct cl_object *obj = lli->lli_clob;
945 struct cl_attr *attr = vvp_env_thread_attr(env);
953 ll_inode_size_lock(inode);
955 /* merge timestamps the most recently obtained from mds with
956 timestamps obtained from osts */
957 LTIME_S(inode->i_atime) = lli->lli_atime;
958 LTIME_S(inode->i_mtime) = lli->lli_mtime;
959 LTIME_S(inode->i_ctime) = lli->lli_ctime;
961 atime = LTIME_S(inode->i_atime);
962 mtime = LTIME_S(inode->i_mtime);
963 ctime = LTIME_S(inode->i_ctime);
965 cl_object_attr_lock(obj);
966 rc = cl_object_attr_get(env, obj, attr);
967 cl_object_attr_unlock(obj);
970 GOTO(out_size_unlock, rc);
972 if (atime < attr->cat_atime)
973 atime = attr->cat_atime;
975 if (ctime < attr->cat_ctime)
976 ctime = attr->cat_ctime;
978 if (mtime < attr->cat_mtime)
979 mtime = attr->cat_mtime;
981 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
982 PFID(&lli->lli_fid), attr->cat_size);
984 i_size_write(inode, attr->cat_size);
985 inode->i_blocks = attr->cat_blocks;
987 LTIME_S(inode->i_atime) = atime;
988 LTIME_S(inode->i_mtime) = mtime;
989 LTIME_S(inode->i_ctime) = ctime;
992 ll_inode_size_unlock(inode);
997 static bool file_is_noatime(const struct file *file)
999 const struct vfsmount *mnt = file->f_path.mnt;
1000 const struct inode *inode = file->f_path.dentry->d_inode;
1002 /* Adapted from file_accessed() and touch_atime().*/
1003 if (file->f_flags & O_NOATIME)
1006 if (inode->i_flags & S_NOATIME)
1009 if (IS_NOATIME(inode))
1012 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1015 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1018 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1024 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1026 struct inode *inode = file->f_path.dentry->d_inode;
1028 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1030 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1031 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1032 file->f_flags & O_DIRECT ||
1035 io->ci_obj = ll_i2info(inode)->lli_clob;
1036 io->ci_lockreq = CILR_MAYBE;
1037 if (ll_file_nolock(file)) {
1038 io->ci_lockreq = CILR_NEVER;
1039 io->ci_no_srvlock = 1;
1040 } else if (file->f_flags & O_APPEND) {
1041 io->ci_lockreq = CILR_MANDATORY;
1044 io->ci_noatime = file_is_noatime(file);
1048 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1049 struct file *file, enum cl_io_type iot,
1050 loff_t *ppos, size_t count)
1052 struct vvp_io *vio = vvp_env_io(env);
1053 struct inode *inode = file->f_path.dentry->d_inode;
1054 struct ll_inode_info *lli = ll_i2info(inode);
1055 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1059 struct range_lock range;
1063 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1064 file->f_path.dentry->d_name.name, iot, *ppos, count);
1067 io = vvp_env_thread_io(env);
1068 ll_io_init(io, file, iot == CIT_WRITE);
1070 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1071 bool range_locked = false;
1073 if (file->f_flags & O_APPEND)
1074 range_lock_init(&range, 0, LUSTRE_EOF);
1076 range_lock_init(&range, *ppos, *ppos + count - 1);
1078 vio->vui_fd = LUSTRE_FPRIVATE(file);
1079 vio->vui_io_subtype = args->via_io_subtype;
1081 switch (vio->vui_io_subtype) {
1083 vio->vui_iter = args->u.normal.via_iter;
1084 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1085 vio->vui_tot_nrsegs = vio->vui_iter->nr_segs;
1086 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1087 vio->vui_iocb = args->u.normal.via_iocb;
1088 /* Direct IO reads must also take range lock,
1089 * or multiple reads will try to work on the same pages
1090 * See LU-6227 for details. */
1091 if (((iot == CIT_WRITE) ||
1092 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1093 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1094 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1096 rc = range_lock(&lli->lli_write_tree, &range);
1100 range_locked = true;
1104 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1105 vio->u.splice.vui_flags = args->u.splice.via_flags;
1108 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1112 ll_cl_add(file, env, io);
1113 rc = cl_io_loop(env, io);
1114 ll_cl_remove(file, env);
1117 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1119 range_unlock(&lli->lli_write_tree, &range);
1122 /* cl_io_rw_init() handled IO */
1126 if (io->ci_nob > 0) {
1127 result += io->ci_nob;
1128 count -= io->ci_nob;
1129 *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
1131 /* prepare IO restart */
1132 if (count > 0 && args->via_io_subtype == IO_NORMAL) {
1133 args->u.normal.via_iter = vio->vui_iter;
1134 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1135 args->u.normal.via_iter->nr_segs = vio->vui_tot_nrsegs;
1136 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1141 cl_io_fini(env, io);
1143 if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
1145 "%s: restart %s from %lld, count:%zu, result: %zd\n",
1146 file->f_path.dentry->d_name.name,
1147 iot == CIT_READ ? "read" : "write",
1148 *ppos, count, result);
1152 if (iot == CIT_READ) {
1154 ll_stats_ops_tally(ll_i2sbi(inode),
1155 LPROC_LL_READ_BYTES, result);
1156 } else if (iot == CIT_WRITE) {
1158 ll_stats_ops_tally(ll_i2sbi(inode),
1159 LPROC_LL_WRITE_BYTES, result);
1160 fd->fd_write_failed = false;
1161 } else if (rc != -ERESTARTSYS) {
1162 fd->fd_write_failed = true;
1166 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1168 return result > 0 ? result : rc;
1172 * Read from a file (through the page cache).
1174 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1176 struct vvp_io_args *args;
1181 env = cl_env_get(&refcheck);
1183 return PTR_ERR(env);
1185 args = ll_env_args(env, IO_NORMAL);
1186 args->u.normal.via_iter = to;
1187 args->u.normal.via_iocb = iocb;
1189 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1190 &iocb->ki_pos, iov_iter_count(to));
1191 cl_env_put(env, &refcheck);
1196 * Write to a file (through the page cache).
1198 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1200 struct vvp_io_args *args;
1205 env = cl_env_get(&refcheck);
1207 return PTR_ERR(env);
1209 args = ll_env_args(env, IO_NORMAL);
1210 args->u.normal.via_iter = from;
1211 args->u.normal.via_iocb = iocb;
1213 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1214 &iocb->ki_pos, iov_iter_count(from));
1215 cl_env_put(env, &refcheck);
1219 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1221 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1223 static int ll_file_get_iov_count(const struct iovec *iov,
1224 unsigned long *nr_segs, size_t *count)
1229 for (seg = 0; seg < *nr_segs; seg++) {
1230 const struct iovec *iv = &iov[seg];
1233 * If any segment has a negative length, or the cumulative
1234 * length ever wraps negative then return -EINVAL.
1237 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1239 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1244 cnt -= iv->iov_len; /* This segment is no good */
1251 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1252 unsigned long nr_segs, loff_t pos)
1254 struct iovec *local_iov;
1255 struct iov_iter *to;
1260 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1268 env = cl_env_get(&refcheck);
1270 RETURN(PTR_ERR(env));
1272 local_iov = &ll_env_info(env)->lti_local_iov;
1275 cl_env_put(env, &refcheck);
1277 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1278 if (local_iov == NULL)
1281 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1289 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1290 iov_iter_init(to, READ, local_iov, nr_segs, iov_count);
1291 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1292 iov_iter_init(to, local_iov, nr_segs, iov_count, 0);
1293 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1295 result = ll_file_read_iter(iocb, to);
1300 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1305 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1309 struct iovec iov = { .iov_base = buf, .iov_len = count };
1310 struct kiocb *kiocb;
1315 env = cl_env_get(&refcheck);
1317 RETURN(PTR_ERR(env));
1319 kiocb = &ll_env_info(env)->lti_kiocb;
1320 init_sync_kiocb(kiocb, file);
1321 kiocb->ki_pos = *ppos;
1322 #ifdef HAVE_KIOCB_KI_LEFT
1323 kiocb->ki_left = count;
1324 #elif defined(HAVE_KI_NBYTES)
1325 kiocb->ki_nbytes = count;
1328 result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos);
1329 *ppos = kiocb->ki_pos;
1331 cl_env_put(env, &refcheck);
1336 * Write to a file (through the page cache).
1339 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1340 unsigned long nr_segs, loff_t pos)
1342 struct iovec *local_iov;
1343 struct iov_iter *from;
1348 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1356 env = cl_env_get(&refcheck);
1358 RETURN(PTR_ERR(env));
1360 local_iov = &ll_env_info(env)->lti_local_iov;
1363 cl_env_put(env, &refcheck);
1365 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1366 if (local_iov == NULL)
1369 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1372 OBD_ALLOC_PTR(from);
1377 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1378 iov_iter_init(from, WRITE, local_iov, nr_segs, iov_count);
1379 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1380 iov_iter_init(from, local_iov, nr_segs, iov_count, 0);
1381 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1383 result = ll_file_write_iter(iocb, from);
1388 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1393 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1394 size_t count, loff_t *ppos)
1397 struct iovec iov = { .iov_base = (void __user *)buf,
1399 struct kiocb *kiocb;
1404 env = cl_env_get(&refcheck);
1406 RETURN(PTR_ERR(env));
1408 kiocb = &ll_env_info(env)->lti_kiocb;
1409 init_sync_kiocb(kiocb, file);
1410 kiocb->ki_pos = *ppos;
1411 #ifdef HAVE_KIOCB_KI_LEFT
1412 kiocb->ki_left = count;
1413 #elif defined(HAVE_KI_NBYTES)
1414 kiocb->ki_nbytes = count;
1417 result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
1418 *ppos = kiocb->ki_pos;
1420 cl_env_put(env, &refcheck);
1423 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1426 * Send file content (through pagecache) somewhere with helper
1428 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1429 struct pipe_inode_info *pipe, size_t count,
1433 struct vvp_io_args *args;
1438 env = cl_env_get(&refcheck);
1440 RETURN(PTR_ERR(env));
1442 args = ll_env_args(env, IO_SPLICE);
1443 args->u.splice.via_pipe = pipe;
1444 args->u.splice.via_flags = flags;
1446 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1447 cl_env_put(env, &refcheck);
1451 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1452 __u64 flags, struct lov_user_md *lum,
1455 struct lookup_intent oit = {
1457 .it_flags = flags | MDS_OPEN_BY_FID,
1462 ll_inode_size_lock(inode);
1463 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1465 GOTO(out_unlock, rc);
1467 ll_release_openhandle(file->f_path.dentry, &oit);
1470 ll_inode_size_unlock(inode);
1471 ll_intent_release(&oit);
1472 cl_lov_delay_create_clear(&file->f_flags);
1477 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1478 struct lov_mds_md **lmmp, int *lmm_size,
1479 struct ptlrpc_request **request)
1481 struct ll_sb_info *sbi = ll_i2sbi(inode);
1482 struct mdt_body *body;
1483 struct lov_mds_md *lmm = NULL;
1484 struct ptlrpc_request *req = NULL;
1485 struct md_op_data *op_data;
1488 rc = ll_get_default_mdsize(sbi, &lmmsize);
1492 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1493 strlen(filename), lmmsize,
1494 LUSTRE_OPC_ANY, NULL);
1495 if (IS_ERR(op_data))
1496 RETURN(PTR_ERR(op_data));
1498 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1499 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1500 ll_finish_md_op_data(op_data);
1502 CDEBUG(D_INFO, "md_getattr_name failed "
1503 "on %s: rc %d\n", filename, rc);
1507 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1508 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1510 lmmsize = body->mbo_eadatasize;
1512 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1514 GOTO(out, rc = -ENODATA);
1517 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1518 LASSERT(lmm != NULL);
1520 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1521 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1522 GOTO(out, rc = -EPROTO);
1526 * This is coming from the MDS, so is probably in
1527 * little endian. We convert it to host endian before
1528 * passing it to userspace.
1530 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1533 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1534 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1537 /* if function called for directory - we should
1538 * avoid swab not existent lsm objects */
1539 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1540 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1541 if (S_ISREG(body->mbo_mode))
1542 lustre_swab_lov_user_md_objects(
1543 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1545 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1546 lustre_swab_lov_user_md_v3(
1547 (struct lov_user_md_v3 *)lmm);
1548 if (S_ISREG(body->mbo_mode))
1549 lustre_swab_lov_user_md_objects(
1550 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1557 *lmm_size = lmmsize;
1562 static int ll_lov_setea(struct inode *inode, struct file *file,
1565 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1566 struct lov_user_md *lump;
1567 int lum_size = sizeof(struct lov_user_md) +
1568 sizeof(struct lov_user_ost_data);
1572 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1575 OBD_ALLOC_LARGE(lump, lum_size);
1579 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1580 OBD_FREE_LARGE(lump, lum_size);
1584 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1586 OBD_FREE_LARGE(lump, lum_size);
1590 static int ll_file_getstripe(struct inode *inode,
1591 struct lov_user_md __user *lum)
1598 env = cl_env_get(&refcheck);
1600 RETURN(PTR_ERR(env));
1602 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1603 cl_env_put(env, &refcheck);
1607 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1610 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1611 struct lov_user_md *klum;
1613 __u64 flags = FMODE_WRITE;
1616 rc = ll_copy_user_md(lum, &klum);
1621 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1625 put_user(0, &lum->lmm_stripe_count);
1627 ll_layout_refresh(inode, &gen);
1628 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1631 OBD_FREE(klum, lum_size);
1636 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1638 struct ll_inode_info *lli = ll_i2info(inode);
1639 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1640 struct ll_grouplock grouplock;
1645 CWARN("group id for group lock must not be 0\n");
1649 if (ll_file_nolock(file))
1650 RETURN(-EOPNOTSUPP);
1652 spin_lock(&lli->lli_lock);
1653 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1654 CWARN("group lock already existed with gid %lu\n",
1655 fd->fd_grouplock.lg_gid);
1656 spin_unlock(&lli->lli_lock);
1659 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1660 spin_unlock(&lli->lli_lock);
1662 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1663 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1667 spin_lock(&lli->lli_lock);
1668 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1669 spin_unlock(&lli->lli_lock);
1670 CERROR("another thread just won the race\n");
1671 cl_put_grouplock(&grouplock);
1675 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1676 fd->fd_grouplock = grouplock;
1677 spin_unlock(&lli->lli_lock);
1679 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1683 static int ll_put_grouplock(struct inode *inode, struct file *file,
1686 struct ll_inode_info *lli = ll_i2info(inode);
1687 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1688 struct ll_grouplock grouplock;
1691 spin_lock(&lli->lli_lock);
1692 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1693 spin_unlock(&lli->lli_lock);
1694 CWARN("no group lock held\n");
1698 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1700 if (fd->fd_grouplock.lg_gid != arg) {
1701 CWARN("group lock %lu doesn't match current id %lu\n",
1702 arg, fd->fd_grouplock.lg_gid);
1703 spin_unlock(&lli->lli_lock);
1707 grouplock = fd->fd_grouplock;
1708 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1709 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1710 spin_unlock(&lli->lli_lock);
1712 cl_put_grouplock(&grouplock);
1713 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1718 * Close inode open handle
1720 * \param dentry [in] dentry which contains the inode
1721 * \param it [in,out] intent which contains open info and result
1724 * \retval <0 failure
1726 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1728 struct inode *inode = dentry->d_inode;
1729 struct obd_client_handle *och;
1735 /* Root ? Do nothing. */
1736 if (dentry->d_inode->i_sb->s_root == dentry)
1739 /* No open handle to close? Move away */
1740 if (!it_disposition(it, DISP_OPEN_OPEN))
1743 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1745 OBD_ALLOC(och, sizeof(*och));
1747 GOTO(out, rc = -ENOMEM);
1749 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1751 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1752 och, inode, 0, NULL);
1754 /* this one is in place of ll_file_open */
1755 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1756 ptlrpc_req_finished(it->d.lustre.it_data);
1757 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1763 * Get size for inode for which FIEMAP mapping is requested.
1764 * Make the FIEMAP get_info call and returns the result.
1765 * \param fiemap kernel buffer to hold extens
1766 * \param num_bytes kernel buffer size
1768 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1774 struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
1777 /* Checks for fiemap flags */
1778 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1779 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1783 /* Check for FIEMAP_FLAG_SYNC */
1784 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1785 rc = filemap_fdatawrite(inode->i_mapping);
1790 env = cl_env_get(&refcheck);
1792 RETURN(PTR_ERR(env));
1794 if (i_size_read(inode) == 0) {
1795 rc = ll_glimpse_size(inode);
1800 fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1801 obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
1802 obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
1804 /* If filesize is 0, then there would be no objects for mapping */
1805 if (fmkey.lfik_oa.o_size == 0) {
1806 fiemap->fm_mapped_extents = 0;
1810 fmkey.lfik_fiemap = *fiemap;
1812 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1813 &fmkey, fiemap, &num_bytes);
1815 cl_env_put(env, &refcheck);
1819 int ll_fid2path(struct inode *inode, void __user *arg)
1821 struct obd_export *exp = ll_i2mdexp(inode);
1822 const struct getinfo_fid2path __user *gfin = arg;
1824 struct getinfo_fid2path *gfout;
1830 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1831 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1834 /* Only need to get the buflen */
1835 if (get_user(pathlen, &gfin->gf_pathlen))
1838 if (pathlen > PATH_MAX)
1841 outsize = sizeof(*gfout) + pathlen;
1842 OBD_ALLOC(gfout, outsize);
1846 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1847 GOTO(gf_free, rc = -EFAULT);
1849 /* Call mdc_iocontrol */
1850 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1854 if (copy_to_user(arg, gfout, outsize))
1858 OBD_FREE(gfout, outsize);
1863 * Read the data_version for inode.
1865 * This value is computed using stripe object version on OST.
1866 * Version is computed using server side locking.
1868 * @param flags if do sync on the OST side;
1870 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1871 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1873 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1875 struct cl_object *obj = ll_i2info(inode)->lli_clob;
1883 /* If no file object initialized, we consider its version is 0. */
1889 env = cl_env_get(&refcheck);
1891 RETURN(PTR_ERR(env));
1893 io = vvp_env_thread_io(env);
1895 io->u.ci_data_version.dv_data_version = 0;
1896 io->u.ci_data_version.dv_flags = flags;
1899 if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
1900 result = cl_io_loop(env, io);
1902 result = io->ci_result;
1904 *data_version = io->u.ci_data_version.dv_data_version;
1906 cl_io_fini(env, io);
1908 if (unlikely(io->ci_need_restart))
1911 cl_env_put(env, &refcheck);
1917 * Trigger a HSM release request for the provided inode.
1919 int ll_hsm_release(struct inode *inode)
1921 struct cl_env_nest nest;
1923 struct obd_client_handle *och = NULL;
1924 __u64 data_version = 0;
1928 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1929 ll_get_fsname(inode->i_sb, NULL, 0),
1930 PFID(&ll_i2info(inode)->lli_fid));
1932 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1934 GOTO(out, rc = PTR_ERR(och));
1936 /* Grab latest data_version and [am]time values */
1937 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1941 env = cl_env_nested_get(&nest);
1943 GOTO(out, rc = PTR_ERR(env));
1945 ll_merge_attr(env, inode);
1946 cl_env_nested_put(&nest, env);
1948 /* Release the file.
1949 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1950 * we still need it to pack l_remote_handle to MDT. */
1951 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
1952 MDS_HSM_RELEASE, &data_version);
1957 if (och != NULL && !IS_ERR(och)) /* close the file */
1958 ll_lease_close(och, inode, NULL);
1963 struct ll_swap_stack {
1966 struct inode *inode1;
1967 struct inode *inode2;
1972 static int ll_swap_layouts(struct file *file1, struct file *file2,
1973 struct lustre_swap_layouts *lsl)
1975 struct mdc_swap_layouts msl;
1976 struct md_op_data *op_data;
1979 struct ll_swap_stack *llss = NULL;
1982 OBD_ALLOC_PTR(llss);
1986 llss->inode1 = file1->f_path.dentry->d_inode;
1987 llss->inode2 = file2->f_path.dentry->d_inode;
1989 rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
1993 /* we use 2 bool because it is easier to swap than 2 bits */
1994 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1995 llss->check_dv1 = true;
1997 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1998 llss->check_dv2 = true;
2000 /* we cannot use lsl->sl_dvX directly because we may swap them */
2001 llss->dv1 = lsl->sl_dv1;
2002 llss->dv2 = lsl->sl_dv2;
2004 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2005 if (rc == 0) /* same file, done! */
2008 if (rc < 0) { /* sequentialize it */
2009 swap(llss->inode1, llss->inode2);
2011 swap(llss->dv1, llss->dv2);
2012 swap(llss->check_dv1, llss->check_dv2);
2016 if (gid != 0) { /* application asks to flush dirty cache */
2017 rc = ll_get_grouplock(llss->inode1, file1, gid);
2021 rc = ll_get_grouplock(llss->inode2, file2, gid);
2023 ll_put_grouplock(llss->inode1, file1, gid);
2028 /* ultimate check, before swaping the layouts we check if
2029 * dataversion has changed (if requested) */
2030 if (llss->check_dv1) {
2031 rc = ll_data_version(llss->inode1, &dv, 0);
2034 if (dv != llss->dv1)
2035 GOTO(putgl, rc = -EAGAIN);
2038 if (llss->check_dv2) {
2039 rc = ll_data_version(llss->inode2, &dv, 0);
2042 if (dv != llss->dv2)
2043 GOTO(putgl, rc = -EAGAIN);
2046 /* struct md_op_data is used to send the swap args to the mdt
2047 * only flags is missing, so we use struct mdc_swap_layouts
2048 * through the md_op_data->op_data */
2049 /* flags from user space have to be converted before they are send to
2050 * server, no flag is sent today, they are only used on the client */
2053 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2054 0, LUSTRE_OPC_ANY, &msl);
2055 if (IS_ERR(op_data))
2056 GOTO(free, rc = PTR_ERR(op_data));
2058 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2059 sizeof(*op_data), op_data, NULL);
2060 ll_finish_md_op_data(op_data);
2067 ll_put_grouplock(llss->inode2, file2, gid);
2068 ll_put_grouplock(llss->inode1, file1, gid);
2078 int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2080 struct md_op_data *op_data;
2084 /* Detect out-of range masks */
2085 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2088 /* Non-root users are forbidden to set or clear flags which are
2089 * NOT defined in HSM_USER_MASK. */
2090 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2091 !cfs_capable(CFS_CAP_SYS_ADMIN))
2094 /* Detect out-of range archive id */
2095 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2096 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2099 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2100 LUSTRE_OPC_ANY, hss);
2101 if (IS_ERR(op_data))
2102 RETURN(PTR_ERR(op_data));
2104 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2105 sizeof(*op_data), op_data, NULL);
2107 ll_finish_md_op_data(op_data);
2112 static int ll_hsm_import(struct inode *inode, struct file *file,
2113 struct hsm_user_import *hui)
2115 struct hsm_state_set *hss = NULL;
2116 struct iattr *attr = NULL;
2120 if (!S_ISREG(inode->i_mode))
2126 GOTO(out, rc = -ENOMEM);
2128 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2129 hss->hss_archive_id = hui->hui_archive_id;
2130 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2131 rc = ll_hsm_state_set(inode, hss);
2135 OBD_ALLOC_PTR(attr);
2137 GOTO(out, rc = -ENOMEM);
2139 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2140 attr->ia_mode |= S_IFREG;
2141 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2142 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2143 attr->ia_size = hui->hui_size;
2144 attr->ia_mtime.tv_sec = hui->hui_mtime;
2145 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2146 attr->ia_atime.tv_sec = hui->hui_atime;
2147 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2149 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2150 ATTR_UID | ATTR_GID |
2151 ATTR_MTIME | ATTR_MTIME_SET |
2152 ATTR_ATIME | ATTR_ATIME_SET;
2154 mutex_lock(&inode->i_mutex);
2156 rc = ll_setattr_raw(file->f_path.dentry, attr, true);
2160 mutex_unlock(&inode->i_mutex);
2172 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2174 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2175 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2178 static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
2180 struct inode *inode = file->f_path.dentry->d_inode;
2182 .ia_valid = ATTR_ATIME | ATTR_ATIME_SET |
2183 ATTR_MTIME | ATTR_MTIME_SET |
2184 ATTR_CTIME | ATTR_CTIME_SET,
2186 .tv_sec = lfu->lfu_atime_sec,
2187 .tv_nsec = lfu->lfu_atime_nsec,
2190 .tv_sec = lfu->lfu_mtime_sec,
2191 .tv_nsec = lfu->lfu_mtime_nsec,
2194 .tv_sec = lfu->lfu_ctime_sec,
2195 .tv_nsec = lfu->lfu_ctime_nsec,
2201 if (!capable(CAP_SYS_ADMIN))
2204 if (!S_ISREG(inode->i_mode))
2207 mutex_lock(&inode->i_mutex);
2208 rc = ll_setattr_raw(file->f_path.dentry, &ia, false);
2209 mutex_unlock(&inode->i_mutex);
2215 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2217 struct inode *inode = file->f_path.dentry->d_inode;
2218 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2222 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2223 PFID(ll_inode2fid(inode)), inode, cmd);
2224 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2226 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2227 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2231 case LL_IOC_GETFLAGS:
2232 /* Get the current value of the file flags */
2233 return put_user(fd->fd_flags, (int __user *)arg);
2234 case LL_IOC_SETFLAGS:
2235 case LL_IOC_CLRFLAGS:
2236 /* Set or clear specific file flags */
2237 /* XXX This probably needs checks to ensure the flags are
2238 * not abused, and to handle any flag side effects.
2240 if (get_user(flags, (int __user *) arg))
2243 if (cmd == LL_IOC_SETFLAGS) {
2244 if ((flags & LL_FILE_IGNORE_LOCK) &&
2245 !(file->f_flags & O_DIRECT)) {
2246 CERROR("%s: unable to disable locking on "
2247 "non-O_DIRECT file\n", current->comm);
2251 fd->fd_flags |= flags;
2253 fd->fd_flags &= ~flags;
2256 case LL_IOC_LOV_SETSTRIPE:
2257 RETURN(ll_lov_setstripe(inode, file, arg));
2258 case LL_IOC_LOV_SETEA:
2259 RETURN(ll_lov_setea(inode, file, arg));
2260 case LL_IOC_LOV_SWAP_LAYOUTS: {
2262 struct lustre_swap_layouts lsl;
2264 if (copy_from_user(&lsl, (char __user *)arg,
2265 sizeof(struct lustre_swap_layouts)))
2268 if ((file->f_flags & O_ACCMODE) == O_RDONLY)
2271 file2 = fget(lsl.sl_fd);
2275 /* O_WRONLY or O_RDWR */
2276 if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
2277 GOTO(out, rc = -EPERM);
2279 if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
2280 struct inode *inode2;
2281 struct ll_inode_info *lli;
2282 struct obd_client_handle *och = NULL;
2284 if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
2285 GOTO(out, rc = -EINVAL);
2287 lli = ll_i2info(inode);
2288 mutex_lock(&lli->lli_och_mutex);
2289 if (fd->fd_lease_och != NULL) {
2290 och = fd->fd_lease_och;
2291 fd->fd_lease_och = NULL;
2293 mutex_unlock(&lli->lli_och_mutex);
2295 GOTO(out, rc = -ENOLCK);
2296 inode2 = file2->f_path.dentry->d_inode;
2297 rc = ll_swap_layouts_close(och, inode, inode2);
2299 rc = ll_swap_layouts(file, file2, &lsl);
2305 case LL_IOC_LOV_GETSTRIPE:
2306 RETURN(ll_file_getstripe(inode,
2307 (struct lov_user_md __user *)arg));
2308 case FSFILT_IOC_GETFLAGS:
2309 case FSFILT_IOC_SETFLAGS:
2310 RETURN(ll_iocontrol(inode, file, cmd, arg));
2311 case FSFILT_IOC_GETVERSION_OLD:
2312 case FSFILT_IOC_GETVERSION:
2313 RETURN(put_user(inode->i_generation, (int __user *)arg));
2314 case LL_IOC_GROUP_LOCK:
2315 RETURN(ll_get_grouplock(inode, file, arg));
2316 case LL_IOC_GROUP_UNLOCK:
2317 RETURN(ll_put_grouplock(inode, file, arg));
2318 case IOC_OBD_STATFS:
2319 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2321 /* We need to special case any other ioctls we want to handle,
2322 * to send them to the MDS/OST as appropriate and to properly
2323 * network encode the arg field.
2324 case FSFILT_IOC_SETVERSION_OLD:
2325 case FSFILT_IOC_SETVERSION:
2327 case LL_IOC_FLUSHCTX:
2328 RETURN(ll_flush_ctx(inode));
2329 case LL_IOC_PATH2FID: {
2330 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2331 sizeof(struct lu_fid)))
2336 case LL_IOC_GETPARENT:
2337 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2339 case OBD_IOC_FID2PATH:
2340 RETURN(ll_fid2path(inode, (void __user *)arg));
2341 case LL_IOC_DATA_VERSION: {
2342 struct ioc_data_version idv;
2345 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2348 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2349 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2352 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2358 case LL_IOC_GET_MDTIDX: {
2361 mdtidx = ll_get_mdt_idx(inode);
2365 if (put_user((int)mdtidx, (int __user *)arg))
2370 case OBD_IOC_GETDTNAME:
2371 case OBD_IOC_GETMDNAME:
2372 RETURN(ll_get_obd_name(inode, cmd, arg));
2373 case LL_IOC_HSM_STATE_GET: {
2374 struct md_op_data *op_data;
2375 struct hsm_user_state *hus;
2382 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2383 LUSTRE_OPC_ANY, hus);
2384 if (IS_ERR(op_data)) {
2386 RETURN(PTR_ERR(op_data));
2389 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2392 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2395 ll_finish_md_op_data(op_data);
2399 case LL_IOC_HSM_STATE_SET: {
2400 struct hsm_state_set *hss;
2407 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2412 rc = ll_hsm_state_set(inode, hss);
2417 case LL_IOC_HSM_ACTION: {
2418 struct md_op_data *op_data;
2419 struct hsm_current_action *hca;
2426 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2427 LUSTRE_OPC_ANY, hca);
2428 if (IS_ERR(op_data)) {
2430 RETURN(PTR_ERR(op_data));
2433 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2436 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2439 ll_finish_md_op_data(op_data);
2443 case LL_IOC_SET_LEASE: {
2444 struct ll_inode_info *lli = ll_i2info(inode);
2445 struct obd_client_handle *och = NULL;
2450 case LL_LEASE_WRLCK:
2451 if (!(file->f_mode & FMODE_WRITE))
2453 fmode = FMODE_WRITE;
2455 case LL_LEASE_RDLCK:
2456 if (!(file->f_mode & FMODE_READ))
2460 case LL_LEASE_UNLCK:
2461 mutex_lock(&lli->lli_och_mutex);
2462 if (fd->fd_lease_och != NULL) {
2463 och = fd->fd_lease_och;
2464 fd->fd_lease_och = NULL;
2466 mutex_unlock(&lli->lli_och_mutex);
2471 fmode = och->och_flags;
2472 rc = ll_lease_close(och, inode, &lease_broken);
2479 RETURN(ll_lease_type_from_fmode(fmode));
2484 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2486 /* apply for lease */
2487 och = ll_lease_open(inode, file, fmode, 0);
2489 RETURN(PTR_ERR(och));
2492 mutex_lock(&lli->lli_och_mutex);
2493 if (fd->fd_lease_och == NULL) {
2494 fd->fd_lease_och = och;
2497 mutex_unlock(&lli->lli_och_mutex);
2499 /* impossible now that only excl is supported for now */
2500 ll_lease_close(och, inode, &lease_broken);
2505 case LL_IOC_GET_LEASE: {
2506 struct ll_inode_info *lli = ll_i2info(inode);
2507 struct ldlm_lock *lock = NULL;
2510 mutex_lock(&lli->lli_och_mutex);
2511 if (fd->fd_lease_och != NULL) {
2512 struct obd_client_handle *och = fd->fd_lease_och;
2514 lock = ldlm_handle2lock(&och->och_lease_handle);
2516 lock_res_and_lock(lock);
2517 if (!ldlm_is_cancel(lock))
2518 fmode = och->och_flags;
2520 unlock_res_and_lock(lock);
2521 LDLM_LOCK_PUT(lock);
2524 mutex_unlock(&lli->lli_och_mutex);
2526 RETURN(ll_lease_type_from_fmode(fmode));
2528 case LL_IOC_HSM_IMPORT: {
2529 struct hsm_user_import *hui;
2535 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2540 rc = ll_hsm_import(inode, file, hui);
2545 case LL_IOC_FUTIMES_3: {
2546 struct ll_futimes_3 lfu;
2548 if (copy_from_user(&lfu,
2549 (const struct ll_futimes_3 __user *)arg,
2553 RETURN(ll_file_futimes_3(file, &lfu));
2559 ll_iocontrol_call(inode, file, cmd, arg, &err))
2562 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2563 (void __user *)arg));
2568 #ifndef HAVE_FILE_LLSEEK_SIZE
2569 static inline loff_t
2570 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2572 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2574 if (offset > maxsize)
2577 if (offset != file->f_pos) {
2578 file->f_pos = offset;
2579 file->f_version = 0;
2585 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2586 loff_t maxsize, loff_t eof)
2588 struct inode *inode = file->f_path.dentry->d_inode;
2596 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2597 * position-querying operation. Avoid rewriting the "same"
2598 * f_pos value back to the file because a concurrent read(),
2599 * write() or lseek() might have altered it
2604 * f_lock protects against read/modify/write race with other
2605 * SEEK_CURs. Note that parallel writes and reads behave
2608 mutex_lock(&inode->i_mutex);
2609 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2610 mutex_unlock(&inode->i_mutex);
2614 * In the generic case the entire file is data, so as long as
2615 * offset isn't at the end of the file then the offset is data.
2622 * There is a virtual hole at the end of the file, so as long as
2623 * offset isn't i_size or larger, return i_size.
2631 return llseek_execute(file, offset, maxsize);
2635 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2637 struct inode *inode = file->f_path.dentry->d_inode;
2638 loff_t retval, eof = 0;
2641 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2642 (origin == SEEK_CUR) ? file->f_pos : 0);
2643 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2644 PFID(ll_inode2fid(inode)), inode, retval, retval,
2646 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2648 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2649 retval = ll_glimpse_size(inode);
2652 eof = i_size_read(inode);
2655 retval = ll_generic_file_llseek_size(file, offset, origin,
2656 ll_file_maxbytes(inode), eof);
2660 static int ll_flush(struct file *file, fl_owner_t id)
2662 struct inode *inode = file->f_path.dentry->d_inode;
2663 struct ll_inode_info *lli = ll_i2info(inode);
2664 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2667 LASSERT(!S_ISDIR(inode->i_mode));
2669 /* catch async errors that were recorded back when async writeback
2670 * failed for pages in this mapping. */
2671 rc = lli->lli_async_rc;
2672 lli->lli_async_rc = 0;
2673 if (lli->lli_clob != NULL) {
2674 err = lov_read_and_clear_async_rc(lli->lli_clob);
2679 /* The application has been told write failure already.
2680 * Do not report failure again. */
2681 if (fd->fd_write_failed)
2683 return rc ? -EIO : 0;
2687 * Called to make sure a portion of file has been written out.
2688 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2690 * Return how many pages have been written.
2692 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2693 enum cl_fsync_mode mode, int ignore_layout)
2695 struct cl_env_nest nest;
2698 struct cl_fsync_io *fio;
2702 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2703 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2706 env = cl_env_nested_get(&nest);
2708 RETURN(PTR_ERR(env));
2710 io = vvp_env_thread_io(env);
2711 io->ci_obj = ll_i2info(inode)->lli_clob;
2712 io->ci_ignore_layout = ignore_layout;
2714 /* initialize parameters for sync */
2715 fio = &io->u.ci_fsync;
2716 fio->fi_start = start;
2718 fio->fi_fid = ll_inode2fid(inode);
2719 fio->fi_mode = mode;
2720 fio->fi_nr_written = 0;
2722 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2723 result = cl_io_loop(env, io);
2725 result = io->ci_result;
2727 result = fio->fi_nr_written;
2728 cl_io_fini(env, io);
2729 cl_env_nested_put(&nest, env);
2735 * When dentry is provided (the 'else' case), *file->f_path.dentry may be
2736 * null and dentry must be used directly rather than pulled from
2737 * *file->f_path.dentry as is done otherwise.
2740 #ifdef HAVE_FILE_FSYNC_4ARGS
2741 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2743 struct dentry *dentry = file->f_path.dentry;
2744 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2745 int ll_fsync(struct file *file, int datasync)
2747 struct dentry *dentry = file->f_path.dentry;
2749 loff_t end = LLONG_MAX;
2751 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2754 loff_t end = LLONG_MAX;
2756 struct inode *inode = dentry->d_inode;
2757 struct ll_inode_info *lli = ll_i2info(inode);
2758 struct ptlrpc_request *req;
2762 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2763 PFID(ll_inode2fid(inode)), inode);
2764 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2766 #ifdef HAVE_FILE_FSYNC_4ARGS
2767 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2768 mutex_lock(&inode->i_mutex);
2770 /* fsync's caller has already called _fdata{sync,write}, we want
2771 * that IO to finish before calling the osc and mdc sync methods */
2772 rc = filemap_fdatawait(inode->i_mapping);
2775 /* catch async errors that were recorded back when async writeback
2776 * failed for pages in this mapping. */
2777 if (!S_ISDIR(inode->i_mode)) {
2778 err = lli->lli_async_rc;
2779 lli->lli_async_rc = 0;
2782 err = lov_read_and_clear_async_rc(lli->lli_clob);
2787 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
2791 ptlrpc_req_finished(req);
2793 if (S_ISREG(inode->i_mode)) {
2794 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2796 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2797 if (rc == 0 && err < 0)
2800 fd->fd_write_failed = true;
2802 fd->fd_write_failed = false;
2805 #ifdef HAVE_FILE_FSYNC_4ARGS
2806 mutex_unlock(&inode->i_mutex);
2812 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2814 struct inode *inode = file->f_path.dentry->d_inode;
2815 struct ll_sb_info *sbi = ll_i2sbi(inode);
2816 struct ldlm_enqueue_info einfo = {
2817 .ei_type = LDLM_FLOCK,
2818 .ei_cb_cp = ldlm_flock_completion_ast,
2819 .ei_cbdata = file_lock,
2821 struct md_op_data *op_data;
2822 struct lustre_handle lockh = { 0 };
2823 union ldlm_policy_data flock = { { 0 } };
2824 int fl_type = file_lock->fl_type;
2830 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2831 PFID(ll_inode2fid(inode)), file_lock);
2833 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2835 if (file_lock->fl_flags & FL_FLOCK) {
2836 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2837 /* flocks are whole-file locks */
2838 flock.l_flock.end = OFFSET_MAX;
2839 /* For flocks owner is determined by the local file desctiptor*/
2840 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2841 } else if (file_lock->fl_flags & FL_POSIX) {
2842 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2843 flock.l_flock.start = file_lock->fl_start;
2844 flock.l_flock.end = file_lock->fl_end;
2848 flock.l_flock.pid = file_lock->fl_pid;
2850 /* Somewhat ugly workaround for svc lockd.
2851 * lockd installs custom fl_lmops->lm_compare_owner that checks
2852 * for the fl_owner to be the same (which it always is on local node
2853 * I guess between lockd processes) and then compares pid.
2854 * As such we assign pid to the owner field to make it all work,
2855 * conflict with normal locks is unlikely since pid space and
2856 * pointer space for current->files are not intersecting */
2857 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2858 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2862 einfo.ei_mode = LCK_PR;
2865 /* An unlock request may or may not have any relation to
2866 * existing locks so we may not be able to pass a lock handle
2867 * via a normal ldlm_lock_cancel() request. The request may even
2868 * unlock a byte range in the middle of an existing lock. In
2869 * order to process an unlock request we need all of the same
2870 * information that is given with a normal read or write record
2871 * lock request. To avoid creating another ldlm unlock (cancel)
2872 * message we'll treat a LCK_NL flock request as an unlock. */
2873 einfo.ei_mode = LCK_NL;
2876 einfo.ei_mode = LCK_PW;
2879 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2894 flags = LDLM_FL_BLOCK_NOWAIT;
2900 flags = LDLM_FL_TEST_LOCK;
2903 CERROR("unknown fcntl lock command: %d\n", cmd);
2907 /* Save the old mode so that if the mode in the lock changes we
2908 * can decrement the appropriate reader or writer refcount. */
2909 file_lock->fl_type = einfo.ei_mode;
2911 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2912 LUSTRE_OPC_ANY, NULL);
2913 if (IS_ERR(op_data))
2914 RETURN(PTR_ERR(op_data));
2916 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2917 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2918 flock.l_flock.pid, flags, einfo.ei_mode,
2919 flock.l_flock.start, flock.l_flock.end);
2921 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2924 /* Restore the file lock type if not TEST lock. */
2925 if (!(flags & LDLM_FL_TEST_LOCK))
2926 file_lock->fl_type = fl_type;
2928 if ((file_lock->fl_flags & FL_FLOCK) &&
2929 (rc == 0 || file_lock->fl_type == F_UNLCK))
2930 rc2 = flock_lock_file_wait(file, file_lock);
2931 if ((file_lock->fl_flags & FL_POSIX) &&
2932 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2933 !(flags & LDLM_FL_TEST_LOCK))
2934 rc2 = posix_lock_file_wait(file, file_lock);
2936 if (rc2 && file_lock->fl_type != F_UNLCK) {
2937 einfo.ei_mode = LCK_NL;
2938 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2943 ll_finish_md_op_data(op_data);
2948 int ll_get_fid_by_name(struct inode *parent, const char *name,
2949 int namelen, struct lu_fid *fid,
2950 struct inode **inode)
2952 struct md_op_data *op_data = NULL;
2953 struct mdt_body *body;
2954 struct ptlrpc_request *req;
2958 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2959 LUSTRE_OPC_ANY, NULL);
2960 if (IS_ERR(op_data))
2961 RETURN(PTR_ERR(op_data));
2963 op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
2964 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2965 ll_finish_md_op_data(op_data);
2969 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2971 GOTO(out_req, rc = -EFAULT);
2973 *fid = body->mbo_fid1;
2976 rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
2978 ptlrpc_req_finished(req);
2982 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2983 const char *name, int namelen)
2985 struct dentry *dchild = NULL;
2986 struct inode *child_inode = NULL;
2987 struct md_op_data *op_data;
2988 struct ptlrpc_request *request = NULL;
2989 struct obd_client_handle *och = NULL;
2991 struct mdt_body *body;
2993 __u64 data_version = 0;
2996 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2997 name, PFID(ll_inode2fid(parent)), mdtidx);
2999 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
3000 0, LUSTRE_OPC_ANY, NULL);
3001 if (IS_ERR(op_data))
3002 RETURN(PTR_ERR(op_data));
3004 /* Get child FID first */
3005 qstr.hash = full_name_hash(name, namelen);
3008 dchild = d_lookup(file->f_path.dentry, &qstr);
3009 if (dchild != NULL) {
3010 if (dchild->d_inode != NULL)
3011 child_inode = igrab(dchild->d_inode);
3015 if (child_inode == NULL) {
3016 rc = ll_get_fid_by_name(parent, name, namelen,
3017 &op_data->op_fid3, &child_inode);
3022 if (child_inode == NULL)
3023 GOTO(out_free, rc = -EINVAL);
3025 mutex_lock(&child_inode->i_mutex);
3026 op_data->op_fid3 = *ll_inode2fid(child_inode);
3027 if (!fid_is_sane(&op_data->op_fid3)) {
3028 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
3029 ll_get_fsname(parent->i_sb, NULL, 0), name,
3030 PFID(&op_data->op_fid3));
3031 GOTO(out_free, rc = -EINVAL);
3034 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
3039 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
3040 PFID(&op_data->op_fid3), mdtidx);
3041 GOTO(out_free, rc = 0);
3044 if (S_ISREG(child_inode->i_mode)) {
3045 och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
3052 rc = ll_data_version(child_inode, &data_version,
3057 op_data->op_handle = och->och_fh;
3058 op_data->op_data = och->och_mod;
3059 op_data->op_data_version = data_version;
3060 op_data->op_lease_handle = och->och_lease_handle;
3061 op_data->op_bias |= MDS_RENAME_MIGRATE;
3064 op_data->op_mds = mdtidx;
3065 op_data->op_cli_flags = CLI_MIGRATE;
3066 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
3067 namelen, name, namelen, &request);
3069 ll_update_times(request, parent);
3071 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
3073 GOTO(out_free, rc = -EPROTO);
3075 /* If the server does release layout lock, then we cleanup
3076 * the client och here, otherwise release it in out_free: */
3077 if (och != NULL && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
3078 obd_mod_put(och->och_mod);
3079 md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, och);
3080 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
3085 ptlrpc_req_finished(request);
3086 /* Try again if the file layout has changed. */
3087 if (rc == -EAGAIN && S_ISREG(child_inode->i_mode))
3090 if (child_inode != NULL) {
3091 if (och != NULL) /* close the file */
3092 ll_lease_close(och, child_inode, NULL);
3093 clear_nlink(child_inode);
3094 mutex_unlock(&child_inode->i_mutex);
3098 ll_finish_md_op_data(op_data);
3103 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3111 * test if some locks matching bits and l_req_mode are acquired
3112 * - bits can be in different locks
3113 * - if found clear the common lock bits in *bits
3114 * - the bits not found, are kept in *bits
3116 * \param bits [IN] searched lock bits [IN]
3117 * \param l_req_mode [IN] searched lock mode
3118 * \retval boolean, true iff all bits are found
3120 int ll_have_md_lock(struct inode *inode, __u64 *bits, enum ldlm_mode l_req_mode)
3122 struct lustre_handle lockh;
3123 union ldlm_policy_data policy;
3124 enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
3125 (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
3134 fid = &ll_i2info(inode)->lli_fid;
3135 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3136 ldlm_lockname[mode]);
3138 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3139 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3140 policy.l_inodebits.bits = *bits & (1 << i);
3141 if (policy.l_inodebits.bits == 0)
3144 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3145 &policy, mode, &lockh)) {
3146 struct ldlm_lock *lock;
3148 lock = ldlm_handle2lock(&lockh);
3151 ~(lock->l_policy_data.l_inodebits.bits);
3152 LDLM_LOCK_PUT(lock);
3154 *bits &= ~policy.l_inodebits.bits;
3161 enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
3162 struct lustre_handle *lockh, __u64 flags,
3163 enum ldlm_mode mode)
3165 union ldlm_policy_data policy = { .l_inodebits = { bits } };
3170 fid = &ll_i2info(inode)->lli_fid;
3171 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3173 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3174 fid, LDLM_IBITS, &policy, mode, lockh);
3179 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3181 /* Already unlinked. Just update nlink and return success */
3182 if (rc == -ENOENT) {
3184 /* If it is striped directory, and there is bad stripe
3185 * Let's revalidate the dentry again, instead of returning
3187 if (S_ISDIR(inode->i_mode) &&
3188 ll_i2info(inode)->lli_lsm_md != NULL)
3191 /* This path cannot be hit for regular files unless in
3192 * case of obscure races, so no need to to validate
3194 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3196 } else if (rc != 0) {
3197 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3198 "%s: revalidate FID "DFID" error: rc = %d\n",
3199 ll_get_fsname(inode->i_sb, NULL, 0),
3200 PFID(ll_inode2fid(inode)), rc);
3206 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3208 struct inode *inode = dentry->d_inode;
3209 struct ptlrpc_request *req = NULL;
3210 struct obd_export *exp;
3214 LASSERT(inode != NULL);
3216 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3217 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3219 exp = ll_i2mdexp(inode);
3221 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3222 * But under CMD case, it caused some lock issues, should be fixed
3223 * with new CMD ibits lock. See bug 12718 */
3224 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3225 struct lookup_intent oit = { .it_op = IT_GETATTR };
3226 struct md_op_data *op_data;
3228 if (ibits == MDS_INODELOCK_LOOKUP)
3229 oit.it_op = IT_LOOKUP;
3231 /* Call getattr by fid, so do not provide name at all. */
3232 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3233 dentry->d_inode, NULL, 0, 0,
3234 LUSTRE_OPC_ANY, NULL);
3235 if (IS_ERR(op_data))
3236 RETURN(PTR_ERR(op_data));
3238 rc = md_intent_lock(exp, op_data, &oit, &req,
3239 &ll_md_blocking_ast, 0);
3240 ll_finish_md_op_data(op_data);
3242 rc = ll_inode_revalidate_fini(inode, rc);
3246 rc = ll_revalidate_it_finish(req, &oit, dentry);
3248 ll_intent_release(&oit);
3252 /* Unlinked? Unhash dentry, so it is not picked up later by
3253 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3254 here to preserve get_cwd functionality on 2.6.
3256 if (!dentry->d_inode->i_nlink)
3257 d_lustre_invalidate(dentry, 0);
3259 ll_lookup_finish_locks(&oit, dentry);
3260 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3261 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3262 u64 valid = OBD_MD_FLGETATTR;
3263 struct md_op_data *op_data;
3266 if (S_ISREG(inode->i_mode)) {
3267 rc = ll_get_default_mdsize(sbi, &ealen);
3270 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3273 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3274 0, ealen, LUSTRE_OPC_ANY,
3276 if (IS_ERR(op_data))
3277 RETURN(PTR_ERR(op_data));
3279 op_data->op_valid = valid;
3280 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3281 ll_finish_md_op_data(op_data);
3283 rc = ll_inode_revalidate_fini(inode, rc);
3287 rc = ll_prep_inode(&inode, req, NULL, NULL);
3290 ptlrpc_req_finished(req);
3294 static int ll_merge_md_attr(struct inode *inode)
3296 struct cl_attr attr = { 0 };
3299 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3300 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3301 &attr, ll_md_blocking_ast);
3305 set_nlink(inode, attr.cat_nlink);
3306 inode->i_blocks = attr.cat_blocks;
3307 i_size_write(inode, attr.cat_size);
3309 ll_i2info(inode)->lli_atime = attr.cat_atime;
3310 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3311 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3317 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3319 struct inode *inode = dentry->d_inode;
3323 rc = __ll_inode_revalidate(dentry, ibits);
3327 /* if object isn't regular file, don't validate size */
3328 if (!S_ISREG(inode->i_mode)) {
3329 if (S_ISDIR(inode->i_mode) &&
3330 ll_i2info(inode)->lli_lsm_md != NULL) {
3331 rc = ll_merge_md_attr(inode);
3336 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3337 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3338 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3340 /* In case of restore, the MDT has the right size and has
3341 * already send it back without granting the layout lock,
3342 * inode is up-to-date so glimpse is useless.
3343 * Also to glimpse we need the layout, in case of a running
3344 * restore the MDT holds the layout lock so the glimpse will
3345 * block up to the end of restore (getattr will block)
3347 if (!ll_file_test_flag(ll_i2info(inode), LLIF_FILE_RESTORING))
3348 rc = ll_glimpse_size(inode);
3353 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3355 struct inode *inode = de->d_inode;
3356 struct ll_sb_info *sbi = ll_i2sbi(inode);
3357 struct ll_inode_info *lli = ll_i2info(inode);
3360 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3361 MDS_INODELOCK_LOOKUP);
3362 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3367 stat->dev = inode->i_sb->s_dev;
3368 if (ll_need_32bit_api(sbi))
3369 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3371 stat->ino = inode->i_ino;
3372 stat->mode = inode->i_mode;
3373 stat->uid = inode->i_uid;
3374 stat->gid = inode->i_gid;
3375 stat->rdev = inode->i_rdev;
3376 stat->atime = inode->i_atime;
3377 stat->mtime = inode->i_mtime;
3378 stat->ctime = inode->i_ctime;
3379 stat->blksize = 1 << inode->i_blkbits;
3381 stat->nlink = inode->i_nlink;
3382 stat->size = i_size_read(inode);
3383 stat->blocks = inode->i_blocks;
3388 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3389 __u64 start, __u64 len)
3393 struct fiemap *fiemap;
3394 unsigned int extent_count = fieinfo->fi_extents_max;
3396 num_bytes = sizeof(*fiemap) + (extent_count *
3397 sizeof(struct fiemap_extent));
3398 OBD_ALLOC_LARGE(fiemap, num_bytes);
3403 fiemap->fm_flags = fieinfo->fi_flags;
3404 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3405 fiemap->fm_start = start;
3406 fiemap->fm_length = len;
3407 if (extent_count > 0 &&
3408 copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3409 sizeof(struct fiemap_extent)) != 0)
3410 GOTO(out, rc = -EFAULT);
3412 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3414 fieinfo->fi_flags = fiemap->fm_flags;
3415 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3416 if (extent_count > 0 &&
3417 copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3418 fiemap->fm_mapped_extents *
3419 sizeof(struct fiemap_extent)) != 0)
3420 GOTO(out, rc = -EFAULT);
3422 OBD_FREE_LARGE(fiemap, num_bytes);
3426 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3428 struct ll_inode_info *lli = ll_i2info(inode);
3429 struct posix_acl *acl = NULL;
3432 spin_lock(&lli->lli_lock);
3433 /* VFS' acl_permission_check->check_acl will release the refcount */
3434 acl = posix_acl_dup(lli->lli_posix_acl);
3435 spin_unlock(&lli->lli_lock);
3440 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3442 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3443 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3445 ll_check_acl(struct inode *inode, int mask)
3448 # ifdef CONFIG_FS_POSIX_ACL
3449 struct posix_acl *acl;
3453 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3454 if (flags & IPERM_FLAG_RCU)
3457 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3462 rc = posix_acl_permission(inode, acl, mask);
3463 posix_acl_release(acl);
3466 # else /* !CONFIG_FS_POSIX_ACL */
3468 # endif /* CONFIG_FS_POSIX_ACL */
3470 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3472 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3473 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3475 # ifdef HAVE_INODE_PERMISION_2ARGS
3476 int ll_inode_permission(struct inode *inode, int mask)
3478 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3483 struct ll_sb_info *sbi;
3484 struct root_squash_info *squash;
3485 struct cred *cred = NULL;
3486 const struct cred *old_cred = NULL;
3488 bool squash_id = false;
3491 #ifdef MAY_NOT_BLOCK
3492 if (mask & MAY_NOT_BLOCK)
3494 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3495 if (flags & IPERM_FLAG_RCU)
3499 /* as root inode are NOT getting validated in lookup operation,
3500 * need to do it before permission check. */
3502 if (inode == inode->i_sb->s_root->d_inode) {
3503 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3504 MDS_INODELOCK_LOOKUP);
3509 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3510 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3512 /* squash fsuid/fsgid if needed */
3513 sbi = ll_i2sbi(inode);
3514 squash = &sbi->ll_squash;
3515 if (unlikely(squash->rsi_uid != 0 &&
3516 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3517 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3521 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3522 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3523 squash->rsi_uid, squash->rsi_gid);
3525 /* update current process's credentials
3526 * and FS capability */
3527 cred = prepare_creds();
3531 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3532 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3533 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3534 if ((1 << cap) & CFS_CAP_FS_MASK)
3535 cap_lower(cred->cap_effective, cap);
3537 old_cred = override_creds(cred);
3540 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3542 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3543 rc = lustre_check_remote_perm(inode, mask);
3545 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3547 /* restore current process's credentials and FS capability */
3549 revert_creds(old_cred);
3556 /* -o localflock - only provides locally consistent flock locks */
3557 struct file_operations ll_file_operations = {
3558 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3559 # ifdef HAVE_SYNC_READ_WRITE
3560 .read = new_sync_read,
3561 .write = new_sync_write,
3563 .read_iter = ll_file_read_iter,
3564 .write_iter = ll_file_write_iter,
3565 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3566 .read = ll_file_read,
3567 .aio_read = ll_file_aio_read,
3568 .write = ll_file_write,
3569 .aio_write = ll_file_aio_write,
3570 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3571 .unlocked_ioctl = ll_file_ioctl,
3572 .open = ll_file_open,
3573 .release = ll_file_release,
3574 .mmap = ll_file_mmap,
3575 .llseek = ll_file_seek,
3576 .splice_read = ll_file_splice_read,
3581 struct file_operations ll_file_operations_flock = {
3582 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3583 # ifdef HAVE_SYNC_READ_WRITE
3584 .read = new_sync_read,
3585 .write = new_sync_write,
3586 # endif /* HAVE_SYNC_READ_WRITE */
3587 .read_iter = ll_file_read_iter,
3588 .write_iter = ll_file_write_iter,
3589 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3590 .read = ll_file_read,
3591 .aio_read = ll_file_aio_read,
3592 .write = ll_file_write,
3593 .aio_write = ll_file_aio_write,
3594 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3595 .unlocked_ioctl = ll_file_ioctl,
3596 .open = ll_file_open,
3597 .release = ll_file_release,
3598 .mmap = ll_file_mmap,
3599 .llseek = ll_file_seek,
3600 .splice_read = ll_file_splice_read,
3603 .flock = ll_file_flock,
3604 .lock = ll_file_flock
3607 /* These are for -o noflock - to return ENOSYS on flock calls */
3608 struct file_operations ll_file_operations_noflock = {
3609 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3610 # ifdef HAVE_SYNC_READ_WRITE
3611 .read = new_sync_read,
3612 .write = new_sync_write,
3613 # endif /* HAVE_SYNC_READ_WRITE */
3614 .read_iter = ll_file_read_iter,
3615 .write_iter = ll_file_write_iter,
3616 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3617 .read = ll_file_read,
3618 .aio_read = ll_file_aio_read,
3619 .write = ll_file_write,
3620 .aio_write = ll_file_aio_write,
3621 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3622 .unlocked_ioctl = ll_file_ioctl,
3623 .open = ll_file_open,
3624 .release = ll_file_release,
3625 .mmap = ll_file_mmap,
3626 .llseek = ll_file_seek,
3627 .splice_read = ll_file_splice_read,
3630 .flock = ll_file_noflock,
3631 .lock = ll_file_noflock
3634 struct inode_operations ll_file_inode_operations = {
3635 .setattr = ll_setattr,
3636 .getattr = ll_getattr,
3637 .permission = ll_inode_permission,
3638 .setxattr = ll_setxattr,
3639 .getxattr = ll_getxattr,
3640 .listxattr = ll_listxattr,
3641 .removexattr = ll_removexattr,
3642 .fiemap = ll_fiemap,
3643 #ifdef HAVE_IOP_GET_ACL
3644 .get_acl = ll_get_acl,
3648 /* dynamic ioctl number support routins */
3649 static struct llioc_ctl_data {
3650 struct rw_semaphore ioc_sem;
3651 struct list_head ioc_head;
3653 __RWSEM_INITIALIZER(llioc.ioc_sem),
3654 LIST_HEAD_INIT(llioc.ioc_head)
3659 struct list_head iocd_list;
3660 unsigned int iocd_size;
3661 llioc_callback_t iocd_cb;
3662 unsigned int iocd_count;
3663 unsigned int iocd_cmd[0];
3666 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3669 struct llioc_data *in_data = NULL;
3672 if (cb == NULL || cmd == NULL ||
3673 count > LLIOC_MAX_CMD || count < 0)
3676 size = sizeof(*in_data) + count * sizeof(unsigned int);
3677 OBD_ALLOC(in_data, size);
3678 if (in_data == NULL)
3681 memset(in_data, 0, sizeof(*in_data));
3682 in_data->iocd_size = size;
3683 in_data->iocd_cb = cb;
3684 in_data->iocd_count = count;
3685 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3687 down_write(&llioc.ioc_sem);
3688 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3689 up_write(&llioc.ioc_sem);
3694 void ll_iocontrol_unregister(void *magic)
3696 struct llioc_data *tmp;
3701 down_write(&llioc.ioc_sem);
3702 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3704 unsigned int size = tmp->iocd_size;
3706 list_del(&tmp->iocd_list);
3707 up_write(&llioc.ioc_sem);
3709 OBD_FREE(tmp, size);
3713 up_write(&llioc.ioc_sem);
3715 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3718 EXPORT_SYMBOL(ll_iocontrol_register);
3719 EXPORT_SYMBOL(ll_iocontrol_unregister);
3721 static enum llioc_iter
3722 ll_iocontrol_call(struct inode *inode, struct file *file,
3723 unsigned int cmd, unsigned long arg, int *rcp)
3725 enum llioc_iter ret = LLIOC_CONT;
3726 struct llioc_data *data;
3727 int rc = -EINVAL, i;
3729 down_read(&llioc.ioc_sem);
3730 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3731 for (i = 0; i < data->iocd_count; i++) {
3732 if (cmd != data->iocd_cmd[i])
3735 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3739 if (ret == LLIOC_STOP)
3742 up_read(&llioc.ioc_sem);
3749 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3751 struct ll_inode_info *lli = ll_i2info(inode);
3752 struct cl_object *obj = lli->lli_clob;
3753 struct cl_env_nest nest;
3761 env = cl_env_nested_get(&nest);
3763 RETURN(PTR_ERR(env));
3765 rc = cl_conf_set(env, lli->lli_clob, conf);
3769 if (conf->coc_opc == OBJECT_CONF_SET) {
3770 struct ldlm_lock *lock = conf->coc_lock;
3771 struct cl_layout cl = {
3775 LASSERT(lock != NULL);
3776 LASSERT(ldlm_has_layout(lock));
3778 /* it can only be allowed to match after layout is
3779 * applied to inode otherwise false layout would be
3780 * seen. Applying layout shoud happen before dropping
3781 * the intent lock. */
3782 ldlm_lock_allow_match(lock);
3784 rc = cl_object_layout_get(env, obj, &cl);
3789 DFID": layout version change: %u -> %u\n",
3790 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3792 ll_layout_version_set(lli, cl.cl_layout_gen);
3796 cl_env_nested_put(&nest, env);
3801 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3802 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3805 struct ll_sb_info *sbi = ll_i2sbi(inode);
3806 struct ptlrpc_request *req;
3807 struct mdt_body *body;
3814 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3815 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3816 lock->l_lvb_data, lock->l_lvb_len);
3818 if (lock->l_lvb_data != NULL)
3821 /* if layout lock was granted right away, the layout is returned
3822 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3823 * blocked and then granted via completion ast, we have to fetch
3824 * layout here. Please note that we can't use the LVB buffer in
3825 * completion AST because it doesn't have a large enough buffer */
3826 rc = ll_get_default_mdsize(sbi, &lmmsize);
3828 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
3829 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3834 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3836 GOTO(out, rc = -EPROTO);
3838 lmmsize = body->mbo_eadatasize;
3839 if (lmmsize == 0) /* empty layout */
3842 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3844 GOTO(out, rc = -EFAULT);
3846 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3847 if (lvbdata == NULL)
3848 GOTO(out, rc = -ENOMEM);
3850 memcpy(lvbdata, lmm, lmmsize);
3851 lock_res_and_lock(lock);
3852 if (unlikely(lock->l_lvb_data == NULL)) {
3853 lock->l_lvb_type = LVB_T_LAYOUT;
3854 lock->l_lvb_data = lvbdata;
3855 lock->l_lvb_len = lmmsize;
3858 unlock_res_and_lock(lock);
3860 if (lvbdata != NULL)
3861 OBD_FREE_LARGE(lvbdata, lmmsize);
3866 ptlrpc_req_finished(req);
3871 * Apply the layout to the inode. Layout lock is held and will be released
3874 static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
3875 struct inode *inode)
3877 struct ll_inode_info *lli = ll_i2info(inode);
3878 struct ll_sb_info *sbi = ll_i2sbi(inode);
3879 struct ldlm_lock *lock;
3880 struct cl_object_conf conf;
3883 bool wait_layout = false;
3886 LASSERT(lustre_handle_is_used(lockh));
3888 lock = ldlm_handle2lock(lockh);
3889 LASSERT(lock != NULL);
3890 LASSERT(ldlm_has_layout(lock));
3892 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured",
3893 PFID(&lli->lli_fid), inode);
3895 /* in case this is a caching lock and reinstate with new inode */
3896 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3898 lock_res_and_lock(lock);
3899 lvb_ready = ldlm_is_lvb_ready(lock);
3900 unlock_res_and_lock(lock);
3901 /* checking lvb_ready is racy but this is okay. The worst case is
3902 * that multi processes may configure the file on the same time. */
3907 rc = ll_layout_fetch(inode, lock);
3911 /* for layout lock, lmm is stored in lock's lvb.
3912 * lvb_data is immutable if the lock is held so it's safe to access it
3915 * set layout to file. Unlikely this will fail as old layout was
3916 * surely eliminated */
3917 memset(&conf, 0, sizeof conf);
3918 conf.coc_opc = OBJECT_CONF_SET;
3919 conf.coc_inode = inode;
3920 conf.coc_lock = lock;
3921 conf.u.coc_layout.lb_buf = lock->l_lvb_data;
3922 conf.u.coc_layout.lb_len = lock->l_lvb_len;
3923 rc = ll_layout_conf(inode, &conf);
3925 /* refresh layout failed, need to wait */
3926 wait_layout = rc == -EBUSY;
3930 LDLM_LOCK_PUT(lock);
3931 ldlm_lock_decref(lockh, mode);
3933 /* wait for IO to complete if it's still being used. */
3935 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3936 ll_get_fsname(inode->i_sb, NULL, 0),
3937 PFID(&lli->lli_fid), inode);
3939 memset(&conf, 0, sizeof conf);
3940 conf.coc_opc = OBJECT_CONF_WAIT;
3941 conf.coc_inode = inode;
3942 rc = ll_layout_conf(inode, &conf);
3946 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3947 ll_get_fsname(inode->i_sb, NULL, 0),
3948 PFID(&lli->lli_fid), rc);
3953 static int ll_layout_refresh_locked(struct inode *inode)
3955 struct ll_inode_info *lli = ll_i2info(inode);
3956 struct ll_sb_info *sbi = ll_i2sbi(inode);
3957 struct md_op_data *op_data;
3958 struct lookup_intent it;
3959 struct lustre_handle lockh;
3960 enum ldlm_mode mode;
3961 struct ldlm_enqueue_info einfo = {
3962 .ei_type = LDLM_IBITS,
3964 .ei_cb_bl = &ll_md_blocking_ast,
3965 .ei_cb_cp = &ldlm_completion_ast,
3971 /* mostly layout lock is caching on the local side, so try to match
3972 * it before grabbing layout lock mutex. */
3973 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3974 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3975 if (mode != 0) { /* hit cached lock */
3976 rc = ll_layout_lock_set(&lockh, mode, inode);
3983 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3984 0, 0, LUSTRE_OPC_ANY, NULL);
3985 if (IS_ERR(op_data))
3986 RETURN(PTR_ERR(op_data));
3988 /* have to enqueue one */
3989 memset(&it, 0, sizeof(it));
3990 it.it_op = IT_LAYOUT;
3991 lockh.cookie = 0ULL;
3993 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3994 ll_get_fsname(inode->i_sb, NULL, 0),
3995 PFID(&lli->lli_fid), inode);
3997 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3998 if (it.d.lustre.it_data != NULL)
3999 ptlrpc_req_finished(it.d.lustre.it_data);
4000 it.d.lustre.it_data = NULL;
4002 ll_finish_md_op_data(op_data);
4004 mode = it.d.lustre.it_lock_mode;
4005 it.d.lustre.it_lock_mode = 0;
4006 ll_intent_drop_lock(&it);
4009 /* set lock data in case this is a new lock */
4010 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
4011 rc = ll_layout_lock_set(&lockh, mode, inode);
4020 * This function checks if there exists a LAYOUT lock on the client side,
4021 * or enqueues it if it doesn't have one in cache.
4023 * This function will not hold layout lock so it may be revoked any time after
4024 * this function returns. Any operations depend on layout should be redone
4027 * This function should be called before lov_io_init() to get an uptodate
4028 * layout version, the caller should save the version number and after IO
4029 * is finished, this function should be called again to verify that layout
4030 * is not changed during IO time.
4032 int ll_layout_refresh(struct inode *inode, __u32 *gen)
4034 struct ll_inode_info *lli = ll_i2info(inode);
4035 struct ll_sb_info *sbi = ll_i2sbi(inode);
4039 *gen = ll_layout_version_get(lli);
4040 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
4044 LASSERT(fid_is_sane(ll_inode2fid(inode)));
4045 LASSERT(S_ISREG(inode->i_mode));
4047 /* take layout lock mutex to enqueue layout lock exclusively. */
4048 mutex_lock(&lli->lli_layout_mutex);
4050 rc = ll_layout_refresh_locked(inode);
4054 *gen = ll_layout_version_get(lli);
4056 mutex_unlock(&lli->lli_layout_mutex);
4062 * This function send a restore request to the MDT
4064 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
4066 struct hsm_user_request *hur;
4070 len = sizeof(struct hsm_user_request) +
4071 sizeof(struct hsm_user_item);
4072 OBD_ALLOC(hur, len);
4076 hur->hur_request.hr_action = HUA_RESTORE;
4077 hur->hur_request.hr_archive_id = 0;
4078 hur->hur_request.hr_flags = 0;
4079 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
4080 sizeof(hur->hur_user_item[0].hui_fid));
4081 hur->hur_user_item[0].hui_extent.offset = offset;
4082 hur->hur_user_item[0].hui_extent.length = length;
4083 hur->hur_request.hr_itemcount = 1;
4084 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,