4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include <linux/user_namespace.h>
49 #ifdef HAVE_UIDGID_HEADER
50 # include <linux/uidgid.h>
52 #include <lustre/ll_fiemap.h>
53 #include <lustre_ioctl.h>
55 #include "cl_object.h"
57 #include "llite_internal.h"
58 #include "vvp_internal.h"
61 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
63 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
66 static enum llioc_iter
67 ll_iocontrol_call(struct inode *inode, struct file *file,
68 unsigned int cmd, unsigned long arg, int *rcp);
70 static struct ll_file_data *ll_file_data_get(void)
72 struct ll_file_data *fd;
74 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
78 fd->fd_write_failed = false;
83 static void ll_file_data_put(struct ll_file_data *fd)
86 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
90 * Packs all the attributes into @op_data for the CLOSE rpc.
92 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
93 struct obd_client_handle *och)
97 ll_prep_md_op_data(op_data, inode, NULL, NULL,
98 0, 0, LUSTRE_OPC_ANY, NULL);
100 op_data->op_attr.ia_mode = inode->i_mode;
101 op_data->op_attr.ia_atime = inode->i_atime;
102 op_data->op_attr.ia_mtime = inode->i_mtime;
103 op_data->op_attr.ia_ctime = inode->i_ctime;
104 op_data->op_attr.ia_size = i_size_read(inode);
105 op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
106 ATTR_MTIME | ATTR_MTIME_SET |
107 ATTR_CTIME | ATTR_CTIME_SET;
108 op_data->op_attr_blocks = inode->i_blocks;
109 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
110 op_data->op_handle = och->och_fh;
112 if (och->och_flags & FMODE_WRITE &&
113 ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED))
114 /* For HSM: if inode data has been modified, pack it so that
115 * MDT can set data dirty flag in the archive. */
116 op_data->op_bias |= MDS_DATA_MODIFIED;
122 * Perform a close, possibly with a bias.
123 * The meaning of "data" depends on the value of "bias".
125 * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
126 * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
129 static int ll_close_inode_openhandle(struct obd_export *md_exp,
130 struct obd_client_handle *och,
132 enum mds_op_bias bias, void *data)
134 const struct ll_inode_info *lli = ll_i2info(inode);
135 struct md_op_data *op_data;
136 struct ptlrpc_request *req = NULL;
140 if (class_exp2obd(md_exp) == NULL) {
141 CERROR("%s: invalid MDC connection handle closing "DFID"\n",
142 ll_get_fsname(inode->i_sb, NULL, 0),
143 PFID(&lli->lli_fid));
147 OBD_ALLOC_PTR(op_data);
148 /* We leak openhandle and request here on error, but not much to be
149 * done in OOM case since app won't retry close on error either. */
151 GOTO(out, rc = -ENOMEM);
153 ll_prepare_close(inode, op_data, och);
155 case MDS_CLOSE_LAYOUT_SWAP:
156 LASSERT(data != NULL);
157 op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
158 op_data->op_data_version = 0;
159 op_data->op_lease_handle = och->och_lease_handle;
160 op_data->op_fid2 = *ll_inode2fid(data);
163 case MDS_HSM_RELEASE:
164 LASSERT(data != NULL);
165 op_data->op_bias |= MDS_HSM_RELEASE;
166 op_data->op_data_version = *(__u64 *)data;
167 op_data->op_lease_handle = och->och_lease_handle;
168 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
172 LASSERT(data == NULL);
176 rc = md_close(md_exp, op_data, och->och_mod, &req);
177 if (rc != 0 && rc != -EINTR)
178 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
179 md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
182 op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
183 struct mdt_body *body;
185 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
186 if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
190 ll_finish_md_op_data(op_data);
194 md_clear_open_replay_data(md_exp, och);
195 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
198 ptlrpc_req_finished(req); /* This is close request */
202 int ll_md_real_close(struct inode *inode, fmode_t fmode)
204 struct ll_inode_info *lli = ll_i2info(inode);
205 struct obd_client_handle **och_p;
206 struct obd_client_handle *och;
211 if (fmode & FMODE_WRITE) {
212 och_p = &lli->lli_mds_write_och;
213 och_usecount = &lli->lli_open_fd_write_count;
214 } else if (fmode & FMODE_EXEC) {
215 och_p = &lli->lli_mds_exec_och;
216 och_usecount = &lli->lli_open_fd_exec_count;
218 LASSERT(fmode & FMODE_READ);
219 och_p = &lli->lli_mds_read_och;
220 och_usecount = &lli->lli_open_fd_read_count;
223 mutex_lock(&lli->lli_och_mutex);
224 if (*och_usecount > 0) {
225 /* There are still users of this handle, so skip
227 mutex_unlock(&lli->lli_och_mutex);
233 mutex_unlock(&lli->lli_och_mutex);
236 /* There might be a race and this handle may already
238 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
239 och, inode, 0, NULL);
245 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
248 union ldlm_policy_data policy = {
249 .l_inodebits = { MDS_INODELOCK_OPEN },
251 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
252 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
253 struct ll_inode_info *lli = ll_i2info(inode);
254 struct lustre_handle lockh;
255 enum ldlm_mode lockmode;
259 /* clear group lock, if present */
260 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
261 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
263 if (fd->fd_lease_och != NULL) {
266 /* Usually the lease is not released when the
267 * application crashed, we need to release here. */
268 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
269 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
270 PFID(&lli->lli_fid), rc, lease_broken);
272 fd->fd_lease_och = NULL;
275 if (fd->fd_och != NULL) {
276 rc = ll_close_inode_openhandle(md_exp, fd->fd_och, inode, 0,
282 /* Let's see if we have good enough OPEN lock on the file and if
283 we can skip talking to MDS */
284 mutex_lock(&lli->lli_och_mutex);
285 if (fd->fd_omode & FMODE_WRITE) {
287 LASSERT(lli->lli_open_fd_write_count);
288 lli->lli_open_fd_write_count--;
289 } else if (fd->fd_omode & FMODE_EXEC) {
291 LASSERT(lli->lli_open_fd_exec_count);
292 lli->lli_open_fd_exec_count--;
295 LASSERT(lli->lli_open_fd_read_count);
296 lli->lli_open_fd_read_count--;
298 mutex_unlock(&lli->lli_och_mutex);
300 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
301 LDLM_IBITS, &policy, lockmode, &lockh))
302 rc = ll_md_real_close(inode, fd->fd_omode);
305 LUSTRE_FPRIVATE(file) = NULL;
306 ll_file_data_put(fd);
311 /* While this returns an error code, fput() the caller does not, so we need
312 * to make every effort to clean up all of our state here. Also, applications
313 * rarely check close errors and even if an error is returned they will not
314 * re-try the close call.
316 int ll_file_release(struct inode *inode, struct file *file)
318 struct ll_file_data *fd;
319 struct ll_sb_info *sbi = ll_i2sbi(inode);
320 struct ll_inode_info *lli = ll_i2info(inode);
324 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
325 PFID(ll_inode2fid(inode)), inode);
327 #ifdef CONFIG_FS_POSIX_ACL
328 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
329 inode == inode->i_sb->s_root->d_inode) {
330 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
333 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
334 fd->fd_flags &= ~LL_FILE_RMTACL;
335 rct_del(&sbi->ll_rct, current_pid());
336 et_search_free(&sbi->ll_et, current_pid());
341 if (inode->i_sb->s_root != file->f_path.dentry)
342 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
343 fd = LUSTRE_FPRIVATE(file);
346 /* The last ref on @file, maybe not the the owner pid of statahead,
347 * because parent and child process can share the same file handle. */
348 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
349 ll_deauthorize_statahead(inode, fd);
351 if (inode->i_sb->s_root == file->f_path.dentry) {
352 LUSTRE_FPRIVATE(file) = NULL;
353 ll_file_data_put(fd);
357 if (!S_ISDIR(inode->i_mode)) {
358 if (lli->lli_clob != NULL)
359 lov_read_and_clear_async_rc(lli->lli_clob);
360 lli->lli_async_rc = 0;
363 rc = ll_md_close(sbi->ll_md_exp, inode, file);
365 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
366 libcfs_debug_dumplog();
371 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
372 struct lookup_intent *itp)
374 struct dentry *de = file->f_path.dentry;
375 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
376 struct dentry *parent = de->d_parent;
377 const char *name = NULL;
379 struct md_op_data *op_data;
380 struct ptlrpc_request *req = NULL;
384 LASSERT(parent != NULL);
385 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
387 /* if server supports open-by-fid, or file name is invalid, don't pack
388 * name in open request */
389 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
390 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
391 name = de->d_name.name;
392 len = de->d_name.len;
395 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
396 name, len, 0, LUSTRE_OPC_ANY, NULL);
398 RETURN(PTR_ERR(op_data));
399 op_data->op_data = lmm;
400 op_data->op_data_size = lmmsize;
402 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
403 &ll_md_blocking_ast, 0);
404 ll_finish_md_op_data(op_data);
406 /* reason for keep own exit path - don`t flood log
407 * with messages with -ESTALE errors.
409 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
410 it_open_error(DISP_OPEN_OPEN, itp))
412 ll_release_openhandle(de, itp);
416 if (it_disposition(itp, DISP_LOOKUP_NEG))
417 GOTO(out, rc = -ENOENT);
419 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
420 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
421 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
425 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
426 if (!rc && itp->d.lustre.it_lock_mode)
427 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
430 ptlrpc_req_finished(req);
431 ll_intent_drop_lock(itp);
436 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
437 struct obd_client_handle *och)
439 struct ptlrpc_request *req = it->d.lustre.it_data;
440 struct mdt_body *body;
442 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
443 och->och_fh = body->mbo_handle;
444 och->och_fid = body->mbo_fid1;
445 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
446 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
447 och->och_flags = it->it_flags;
449 return md_set_open_replay_data(md_exp, och, it);
452 static int ll_local_open(struct file *file, struct lookup_intent *it,
453 struct ll_file_data *fd, struct obd_client_handle *och)
455 struct inode *inode = file->f_path.dentry->d_inode;
458 LASSERT(!LUSTRE_FPRIVATE(file));
465 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
470 LUSTRE_FPRIVATE(file) = fd;
471 ll_readahead_init(inode, &fd->fd_ras);
472 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
474 /* ll_cl_context initialize */
475 rwlock_init(&fd->fd_lock);
476 INIT_LIST_HEAD(&fd->fd_lccs);
481 /* Open a file, and (for the very first open) create objects on the OSTs at
482 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
483 * creation or open until ll_lov_setstripe() ioctl is called.
485 * If we already have the stripe MD locally then we don't request it in
486 * md_open(), by passing a lmm_size = 0.
488 * It is up to the application to ensure no other processes open this file
489 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
490 * used. We might be able to avoid races of that sort by getting lli_open_sem
491 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
492 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
494 int ll_file_open(struct inode *inode, struct file *file)
496 struct ll_inode_info *lli = ll_i2info(inode);
497 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
498 .it_flags = file->f_flags };
499 struct obd_client_handle **och_p = NULL;
500 __u64 *och_usecount = NULL;
501 struct ll_file_data *fd;
505 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
506 PFID(ll_inode2fid(inode)), inode, file->f_flags);
508 it = file->private_data; /* XXX: compat macro */
509 file->private_data = NULL; /* prevent ll_local_open assertion */
511 fd = ll_file_data_get();
513 GOTO(out_openerr, rc = -ENOMEM);
516 if (S_ISDIR(inode->i_mode))
517 ll_authorize_statahead(inode, fd);
519 if (inode->i_sb->s_root == file->f_path.dentry) {
520 LUSTRE_FPRIVATE(file) = fd;
524 if (!it || !it->d.lustre.it_disposition) {
525 /* Convert f_flags into access mode. We cannot use file->f_mode,
526 * because everything but O_ACCMODE mask was stripped from
528 if ((oit.it_flags + 1) & O_ACCMODE)
530 if (file->f_flags & O_TRUNC)
531 oit.it_flags |= FMODE_WRITE;
533 /* kernel only call f_op->open in dentry_open. filp_open calls
534 * dentry_open after call to open_namei that checks permissions.
535 * Only nfsd_open call dentry_open directly without checking
536 * permissions and because of that this code below is safe. */
537 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
538 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
540 /* We do not want O_EXCL here, presumably we opened the file
541 * already? XXX - NFS implications? */
542 oit.it_flags &= ~O_EXCL;
544 /* bug20584, if "it_flags" contains O_CREAT, the file will be
545 * created if necessary, then "IT_CREAT" should be set to keep
546 * consistent with it */
547 if (oit.it_flags & O_CREAT)
548 oit.it_op |= IT_CREAT;
554 /* Let's see if we have file open on MDS already. */
555 if (it->it_flags & FMODE_WRITE) {
556 och_p = &lli->lli_mds_write_och;
557 och_usecount = &lli->lli_open_fd_write_count;
558 } else if (it->it_flags & FMODE_EXEC) {
559 och_p = &lli->lli_mds_exec_och;
560 och_usecount = &lli->lli_open_fd_exec_count;
562 och_p = &lli->lli_mds_read_och;
563 och_usecount = &lli->lli_open_fd_read_count;
566 mutex_lock(&lli->lli_och_mutex);
567 if (*och_p) { /* Open handle is present */
568 if (it_disposition(it, DISP_OPEN_OPEN)) {
569 /* Well, there's extra open request that we do not need,
570 let's close it somehow. This will decref request. */
571 rc = it_open_error(DISP_OPEN_OPEN, it);
573 mutex_unlock(&lli->lli_och_mutex);
574 GOTO(out_openerr, rc);
577 ll_release_openhandle(file->f_path.dentry, it);
581 rc = ll_local_open(file, it, fd, NULL);
584 mutex_unlock(&lli->lli_och_mutex);
585 GOTO(out_openerr, rc);
588 LASSERT(*och_usecount == 0);
589 if (!it->d.lustre.it_disposition) {
590 /* We cannot just request lock handle now, new ELC code
591 means that one of other OPEN locks for this file
592 could be cancelled, and since blocking ast handler
593 would attempt to grab och_mutex as well, that would
594 result in a deadlock */
595 mutex_unlock(&lli->lli_och_mutex);
597 * Normally called under two situations:
599 * 2. A race/condition on MDS resulting in no open
600 * handle to be returned from LOOKUP|OPEN request,
601 * for example if the target entry was a symlink.
603 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
605 * Always specify MDS_OPEN_BY_FID because we don't want
606 * to get file with different fid.
608 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
609 rc = ll_intent_file_open(file, NULL, 0, it);
611 GOTO(out_openerr, rc);
615 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
617 GOTO(out_och_free, rc = -ENOMEM);
621 /* md_intent_lock() didn't get a request ref if there was an
622 * open error, so don't do cleanup on the request here
624 /* XXX (green): Should not we bail out on any error here, not
625 * just open error? */
626 rc = it_open_error(DISP_OPEN_OPEN, it);
628 GOTO(out_och_free, rc);
630 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
631 "inode %p: disposition %x, status %d\n", inode,
632 it_disposition(it, ~0), it->d.lustre.it_status);
634 rc = ll_local_open(file, it, fd, *och_p);
636 GOTO(out_och_free, rc);
638 mutex_unlock(&lli->lli_och_mutex);
641 /* Must do this outside lli_och_mutex lock to prevent deadlock where
642 different kind of OPEN lock for this same inode gets cancelled
643 by ldlm_cancel_lru */
644 if (!S_ISREG(inode->i_mode))
645 GOTO(out_och_free, rc);
647 cl_lov_delay_create_clear(&file->f_flags);
648 GOTO(out_och_free, rc);
652 if (och_p && *och_p) {
653 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
654 *och_p = NULL; /* OBD_FREE writes some magic there */
657 mutex_unlock(&lli->lli_och_mutex);
660 if (lli->lli_opendir_key == fd)
661 ll_deauthorize_statahead(inode, fd);
663 ll_file_data_put(fd);
665 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
668 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
669 ptlrpc_req_finished(it->d.lustre.it_data);
670 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
676 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
677 struct ldlm_lock_desc *desc, void *data, int flag)
680 struct lustre_handle lockh;
684 case LDLM_CB_BLOCKING:
685 ldlm_lock2handle(lock, &lockh);
686 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
688 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
692 case LDLM_CB_CANCELING:
700 * Acquire a lease and open the file.
702 static struct obd_client_handle *
703 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
706 struct lookup_intent it = { .it_op = IT_OPEN };
707 struct ll_sb_info *sbi = ll_i2sbi(inode);
708 struct md_op_data *op_data;
709 struct ptlrpc_request *req = NULL;
710 struct lustre_handle old_handle = { 0 };
711 struct obd_client_handle *och = NULL;
716 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
717 RETURN(ERR_PTR(-EINVAL));
720 struct ll_inode_info *lli = ll_i2info(inode);
721 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
722 struct obd_client_handle **och_p;
725 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
726 RETURN(ERR_PTR(-EPERM));
728 /* Get the openhandle of the file */
730 mutex_lock(&lli->lli_och_mutex);
731 if (fd->fd_lease_och != NULL) {
732 mutex_unlock(&lli->lli_och_mutex);
736 if (fd->fd_och == NULL) {
737 if (file->f_mode & FMODE_WRITE) {
738 LASSERT(lli->lli_mds_write_och != NULL);
739 och_p = &lli->lli_mds_write_och;
740 och_usecount = &lli->lli_open_fd_write_count;
742 LASSERT(lli->lli_mds_read_och != NULL);
743 och_p = &lli->lli_mds_read_och;
744 och_usecount = &lli->lli_open_fd_read_count;
746 if (*och_usecount == 1) {
753 mutex_unlock(&lli->lli_och_mutex);
754 if (rc < 0) /* more than 1 opener */
757 LASSERT(fd->fd_och != NULL);
758 old_handle = fd->fd_och->och_fh;
763 RETURN(ERR_PTR(-ENOMEM));
765 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
766 LUSTRE_OPC_ANY, NULL);
768 GOTO(out, rc = PTR_ERR(op_data));
770 /* To tell the MDT this openhandle is from the same owner */
771 op_data->op_handle = old_handle;
773 it.it_flags = fmode | open_flags;
774 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
775 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
776 &ll_md_blocking_lease_ast,
777 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
778 * it can be cancelled which may mislead applications that the lease is
780 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
781 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
782 * doesn't deal with openhandle, so normal openhandle will be leaked. */
783 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
784 ll_finish_md_op_data(op_data);
785 ptlrpc_req_finished(req);
787 GOTO(out_release_it, rc);
789 if (it_disposition(&it, DISP_LOOKUP_NEG))
790 GOTO(out_release_it, rc = -ENOENT);
792 rc = it_open_error(DISP_OPEN_OPEN, &it);
794 GOTO(out_release_it, rc);
796 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
797 ll_och_fill(sbi->ll_md_exp, &it, och);
799 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
800 GOTO(out_close, rc = -EOPNOTSUPP);
802 /* already get lease, handle lease lock */
803 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
804 if (it.d.lustre.it_lock_mode == 0 ||
805 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
806 /* open lock must return for lease */
807 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
808 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
809 it.d.lustre.it_lock_bits);
810 GOTO(out_close, rc = -EPROTO);
813 ll_intent_release(&it);
817 /* Cancel open lock */
818 if (it.d.lustre.it_lock_mode != 0) {
819 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
820 it.d.lustre.it_lock_mode);
821 it.d.lustre.it_lock_mode = 0;
822 och->och_lease_handle.cookie = 0ULL;
824 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, och, inode, 0, NULL);
826 CERROR("%s: error closing file "DFID": %d\n",
827 ll_get_fsname(inode->i_sb, NULL, 0),
828 PFID(&ll_i2info(inode)->lli_fid), rc2);
829 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
831 ll_intent_release(&it);
839 * Check whether a layout swap can be done between two inodes.
841 * \param[in] inode1 First inode to check
842 * \param[in] inode2 Second inode to check
844 * \retval 0 on success, layout swap can be performed between both inodes
845 * \retval negative error code if requirements are not met
847 static int ll_check_swap_layouts_validity(struct inode *inode1,
848 struct inode *inode2)
850 if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
853 if (inode_permission(inode1, MAY_WRITE) ||
854 inode_permission(inode2, MAY_WRITE))
857 if (inode1->i_sb != inode2->i_sb)
863 static int ll_swap_layouts_close(struct obd_client_handle *och,
864 struct inode *inode, struct inode *inode2)
866 const struct lu_fid *fid1 = ll_inode2fid(inode);
867 const struct lu_fid *fid2;
871 CDEBUG(D_INODE, "%s: biased close of file "DFID"\n",
872 ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
874 rc = ll_check_swap_layouts_validity(inode, inode2);
876 GOTO(out_free_och, rc);
878 /* We now know that inode2 is a lustre inode */
879 fid2 = ll_inode2fid(inode2);
881 rc = lu_fid_cmp(fid1, fid2);
883 GOTO(out_free_och, rc = -EINVAL);
885 /* Close the file and swap layouts between inode & inode2.
886 * NB: lease lock handle is released in mdc_close_layout_swap_pack()
887 * because we still need it to pack l_remote_handle to MDT. */
888 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
889 MDS_CLOSE_LAYOUT_SWAP, inode2);
891 och = NULL; /* freed in ll_close_inode_openhandle() */
901 * Release lease and close the file.
902 * It will check if the lease has ever broken.
904 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
907 struct ldlm_lock *lock;
908 bool cancelled = true;
912 lock = ldlm_handle2lock(&och->och_lease_handle);
914 lock_res_and_lock(lock);
915 cancelled = ldlm_is_cancel(lock);
916 unlock_res_and_lock(lock);
920 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
921 PFID(&ll_i2info(inode)->lli_fid), cancelled);
924 ldlm_cli_cancel(&och->och_lease_handle, 0);
925 if (lease_broken != NULL)
926 *lease_broken = cancelled;
928 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
934 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
936 struct ll_inode_info *lli = ll_i2info(inode);
937 struct cl_object *obj = lli->lli_clob;
938 struct cl_attr *attr = vvp_env_thread_attr(env);
946 ll_inode_size_lock(inode);
948 /* merge timestamps the most recently obtained from mds with
949 timestamps obtained from osts */
950 LTIME_S(inode->i_atime) = lli->lli_atime;
951 LTIME_S(inode->i_mtime) = lli->lli_mtime;
952 LTIME_S(inode->i_ctime) = lli->lli_ctime;
954 atime = LTIME_S(inode->i_atime);
955 mtime = LTIME_S(inode->i_mtime);
956 ctime = LTIME_S(inode->i_ctime);
958 cl_object_attr_lock(obj);
959 rc = cl_object_attr_get(env, obj, attr);
960 cl_object_attr_unlock(obj);
963 GOTO(out_size_unlock, rc);
965 if (atime < attr->cat_atime)
966 atime = attr->cat_atime;
968 if (ctime < attr->cat_ctime)
969 ctime = attr->cat_ctime;
971 if (mtime < attr->cat_mtime)
972 mtime = attr->cat_mtime;
974 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
975 PFID(&lli->lli_fid), attr->cat_size);
977 i_size_write(inode, attr->cat_size);
978 inode->i_blocks = attr->cat_blocks;
980 LTIME_S(inode->i_atime) = atime;
981 LTIME_S(inode->i_mtime) = mtime;
982 LTIME_S(inode->i_ctime) = ctime;
985 ll_inode_size_unlock(inode);
990 static bool file_is_noatime(const struct file *file)
992 const struct vfsmount *mnt = file->f_path.mnt;
993 const struct inode *inode = file->f_path.dentry->d_inode;
995 /* Adapted from file_accessed() and touch_atime().*/
996 if (file->f_flags & O_NOATIME)
999 if (inode->i_flags & S_NOATIME)
1002 if (IS_NOATIME(inode))
1005 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1008 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1011 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1017 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1019 struct inode *inode = file->f_path.dentry->d_inode;
1021 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1023 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1024 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1025 file->f_flags & O_DIRECT ||
1028 io->ci_obj = ll_i2info(inode)->lli_clob;
1029 io->ci_lockreq = CILR_MAYBE;
1030 if (ll_file_nolock(file)) {
1031 io->ci_lockreq = CILR_NEVER;
1032 io->ci_no_srvlock = 1;
1033 } else if (file->f_flags & O_APPEND) {
1034 io->ci_lockreq = CILR_MANDATORY;
1037 io->ci_noatime = file_is_noatime(file);
1041 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1042 struct file *file, enum cl_io_type iot,
1043 loff_t *ppos, size_t count)
1045 struct vvp_io *vio = vvp_env_io(env);
1046 struct inode *inode = file->f_path.dentry->d_inode;
1047 struct ll_inode_info *lli = ll_i2info(inode);
1048 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1052 struct range_lock range;
1056 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1057 file->f_path.dentry->d_name.name, iot, *ppos, count);
1060 io = vvp_env_thread_io(env);
1061 ll_io_init(io, file, iot == CIT_WRITE);
1063 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1064 bool range_locked = false;
1066 if (file->f_flags & O_APPEND)
1067 range_lock_init(&range, 0, LUSTRE_EOF);
1069 range_lock_init(&range, *ppos, *ppos + count - 1);
1071 vio->vui_fd = LUSTRE_FPRIVATE(file);
1072 vio->vui_io_subtype = args->via_io_subtype;
1074 switch (vio->vui_io_subtype) {
1076 vio->vui_iter = args->u.normal.via_iter;
1077 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1078 vio->vui_tot_nrsegs = vio->vui_iter->nr_segs;
1079 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1080 vio->vui_iocb = args->u.normal.via_iocb;
1081 /* Direct IO reads must also take range lock,
1082 * or multiple reads will try to work on the same pages
1083 * See LU-6227 for details. */
1084 if (((iot == CIT_WRITE) ||
1085 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1086 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1087 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1089 rc = range_lock(&lli->lli_write_tree, &range);
1093 range_locked = true;
1097 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1098 vio->u.splice.vui_flags = args->u.splice.via_flags;
1101 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1105 ll_cl_add(file, env, io);
1106 rc = cl_io_loop(env, io);
1107 ll_cl_remove(file, env);
1110 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1112 range_unlock(&lli->lli_write_tree, &range);
1115 /* cl_io_rw_init() handled IO */
1119 if (io->ci_nob > 0) {
1120 result += io->ci_nob;
1121 count -= io->ci_nob;
1122 *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
1124 /* prepare IO restart */
1125 if (count > 0 && args->via_io_subtype == IO_NORMAL) {
1126 args->u.normal.via_iter = vio->vui_iter;
1127 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1128 args->u.normal.via_iter->nr_segs = vio->vui_tot_nrsegs;
1129 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1134 cl_io_fini(env, io);
1136 if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
1138 "%s: restart %s from %lld, count:%zu, result: %zd\n",
1139 file->f_path.dentry->d_name.name,
1140 iot == CIT_READ ? "read" : "write",
1141 *ppos, count, result);
1145 if (iot == CIT_READ) {
1147 ll_stats_ops_tally(ll_i2sbi(inode),
1148 LPROC_LL_READ_BYTES, result);
1149 } else if (iot == CIT_WRITE) {
1151 ll_stats_ops_tally(ll_i2sbi(inode),
1152 LPROC_LL_WRITE_BYTES, result);
1153 fd->fd_write_failed = false;
1154 } else if (rc != -ERESTARTSYS) {
1155 fd->fd_write_failed = true;
1159 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1161 return result > 0 ? result : rc;
1165 * Read from a file (through the page cache).
1167 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1169 struct vvp_io_args *args;
1174 env = cl_env_get(&refcheck);
1176 return PTR_ERR(env);
1178 args = ll_env_args(env, IO_NORMAL);
1179 args->u.normal.via_iter = to;
1180 args->u.normal.via_iocb = iocb;
1182 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1183 &iocb->ki_pos, iov_iter_count(to));
1184 cl_env_put(env, &refcheck);
1189 * Write to a file (through the page cache).
1191 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1193 struct vvp_io_args *args;
1198 env = cl_env_get(&refcheck);
1200 return PTR_ERR(env);
1202 args = ll_env_args(env, IO_NORMAL);
1203 args->u.normal.via_iter = from;
1204 args->u.normal.via_iocb = iocb;
1206 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1207 &iocb->ki_pos, iov_iter_count(from));
1208 cl_env_put(env, &refcheck);
1212 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1214 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1216 static int ll_file_get_iov_count(const struct iovec *iov,
1217 unsigned long *nr_segs, size_t *count)
1222 for (seg = 0; seg < *nr_segs; seg++) {
1223 const struct iovec *iv = &iov[seg];
1226 * If any segment has a negative length, or the cumulative
1227 * length ever wraps negative then return -EINVAL.
1230 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1232 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1237 cnt -= iv->iov_len; /* This segment is no good */
1244 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1245 unsigned long nr_segs, loff_t pos)
1247 struct iovec *local_iov;
1248 struct iov_iter *to;
1253 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1261 env = cl_env_get(&refcheck);
1263 RETURN(PTR_ERR(env));
1265 local_iov = &ll_env_info(env)->lti_local_iov;
1268 cl_env_put(env, &refcheck);
1270 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1271 if (local_iov == NULL)
1274 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1282 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1283 iov_iter_init(to, READ, local_iov, nr_segs, iov_count);
1284 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1285 iov_iter_init(to, local_iov, nr_segs, iov_count, 0);
1286 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1288 result = ll_file_read_iter(iocb, to);
1293 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1298 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1302 struct iovec iov = { .iov_base = buf, .iov_len = count };
1303 struct kiocb *kiocb;
1308 env = cl_env_get(&refcheck);
1310 RETURN(PTR_ERR(env));
1312 kiocb = &ll_env_info(env)->lti_kiocb;
1313 init_sync_kiocb(kiocb, file);
1314 kiocb->ki_pos = *ppos;
1315 #ifdef HAVE_KIOCB_KI_LEFT
1316 kiocb->ki_left = count;
1317 #elif defined(HAVE_KI_NBYTES)
1318 kiocb->ki_nbytes = count;
1321 result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos);
1322 *ppos = kiocb->ki_pos;
1324 cl_env_put(env, &refcheck);
1329 * Write to a file (through the page cache).
1332 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1333 unsigned long nr_segs, loff_t pos)
1335 struct iovec *local_iov;
1336 struct iov_iter *from;
1341 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1349 env = cl_env_get(&refcheck);
1351 RETURN(PTR_ERR(env));
1353 local_iov = &ll_env_info(env)->lti_local_iov;
1356 cl_env_put(env, &refcheck);
1358 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1359 if (local_iov == NULL)
1362 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1365 OBD_ALLOC_PTR(from);
1370 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1371 iov_iter_init(from, WRITE, local_iov, nr_segs, iov_count);
1372 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1373 iov_iter_init(from, local_iov, nr_segs, iov_count, 0);
1374 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1376 result = ll_file_write_iter(iocb, from);
1381 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1386 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1387 size_t count, loff_t *ppos)
1390 struct iovec iov = { .iov_base = (void __user *)buf,
1392 struct kiocb *kiocb;
1397 env = cl_env_get(&refcheck);
1399 RETURN(PTR_ERR(env));
1401 kiocb = &ll_env_info(env)->lti_kiocb;
1402 init_sync_kiocb(kiocb, file);
1403 kiocb->ki_pos = *ppos;
1404 #ifdef HAVE_KIOCB_KI_LEFT
1405 kiocb->ki_left = count;
1406 #elif defined(HAVE_KI_NBYTES)
1407 kiocb->ki_nbytes = count;
1410 result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
1411 *ppos = kiocb->ki_pos;
1413 cl_env_put(env, &refcheck);
1416 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1419 * Send file content (through pagecache) somewhere with helper
1421 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1422 struct pipe_inode_info *pipe, size_t count,
1426 struct vvp_io_args *args;
1431 env = cl_env_get(&refcheck);
1433 RETURN(PTR_ERR(env));
1435 args = ll_env_args(env, IO_SPLICE);
1436 args->u.splice.via_pipe = pipe;
1437 args->u.splice.via_flags = flags;
1439 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1440 cl_env_put(env, &refcheck);
1444 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1445 __u64 flags, struct lov_user_md *lum,
1448 struct lookup_intent oit = {
1450 .it_flags = flags | MDS_OPEN_BY_FID,
1455 ll_inode_size_lock(inode);
1456 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1458 GOTO(out_unlock, rc);
1460 ll_release_openhandle(file->f_path.dentry, &oit);
1463 ll_inode_size_unlock(inode);
1464 ll_intent_release(&oit);
1465 cl_lov_delay_create_clear(&file->f_flags);
1470 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1471 struct lov_mds_md **lmmp, int *lmm_size,
1472 struct ptlrpc_request **request)
1474 struct ll_sb_info *sbi = ll_i2sbi(inode);
1475 struct mdt_body *body;
1476 struct lov_mds_md *lmm = NULL;
1477 struct ptlrpc_request *req = NULL;
1478 struct md_op_data *op_data;
1481 rc = ll_get_default_mdsize(sbi, &lmmsize);
1485 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1486 strlen(filename), lmmsize,
1487 LUSTRE_OPC_ANY, NULL);
1488 if (IS_ERR(op_data))
1489 RETURN(PTR_ERR(op_data));
1491 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1492 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1493 ll_finish_md_op_data(op_data);
1495 CDEBUG(D_INFO, "md_getattr_name failed "
1496 "on %s: rc %d\n", filename, rc);
1500 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1501 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1503 lmmsize = body->mbo_eadatasize;
1505 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1507 GOTO(out, rc = -ENODATA);
1510 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1511 LASSERT(lmm != NULL);
1513 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1514 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1515 GOTO(out, rc = -EPROTO);
1519 * This is coming from the MDS, so is probably in
1520 * little endian. We convert it to host endian before
1521 * passing it to userspace.
1523 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1526 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1527 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1530 /* if function called for directory - we should
1531 * avoid swab not existent lsm objects */
1532 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1533 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1534 if (S_ISREG(body->mbo_mode))
1535 lustre_swab_lov_user_md_objects(
1536 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1538 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1539 lustre_swab_lov_user_md_v3(
1540 (struct lov_user_md_v3 *)lmm);
1541 if (S_ISREG(body->mbo_mode))
1542 lustre_swab_lov_user_md_objects(
1543 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1550 *lmm_size = lmmsize;
1555 static int ll_lov_setea(struct inode *inode, struct file *file,
1558 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1559 struct lov_user_md *lump;
1560 int lum_size = sizeof(struct lov_user_md) +
1561 sizeof(struct lov_user_ost_data);
1565 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1568 OBD_ALLOC_LARGE(lump, lum_size);
1572 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1573 OBD_FREE_LARGE(lump, lum_size);
1577 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1579 OBD_FREE_LARGE(lump, lum_size);
1583 static int ll_file_getstripe(struct inode *inode,
1584 struct lov_user_md __user *lum)
1591 env = cl_env_get(&refcheck);
1593 RETURN(PTR_ERR(env));
1595 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1596 cl_env_put(env, &refcheck);
1600 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1603 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1604 struct lov_user_md *klum;
1606 __u64 flags = FMODE_WRITE;
1609 rc = ll_copy_user_md(lum, &klum);
1614 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1618 put_user(0, &lum->lmm_stripe_count);
1620 ll_layout_refresh(inode, &gen);
1621 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1624 OBD_FREE(klum, lum_size);
1629 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1631 struct ll_inode_info *lli = ll_i2info(inode);
1632 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1633 struct ll_grouplock grouplock;
1638 CWARN("group id for group lock must not be 0\n");
1642 if (ll_file_nolock(file))
1643 RETURN(-EOPNOTSUPP);
1645 spin_lock(&lli->lli_lock);
1646 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1647 CWARN("group lock already existed with gid %lu\n",
1648 fd->fd_grouplock.lg_gid);
1649 spin_unlock(&lli->lli_lock);
1652 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1653 spin_unlock(&lli->lli_lock);
1655 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1656 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1660 spin_lock(&lli->lli_lock);
1661 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1662 spin_unlock(&lli->lli_lock);
1663 CERROR("another thread just won the race\n");
1664 cl_put_grouplock(&grouplock);
1668 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1669 fd->fd_grouplock = grouplock;
1670 spin_unlock(&lli->lli_lock);
1672 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1676 static int ll_put_grouplock(struct inode *inode, struct file *file,
1679 struct ll_inode_info *lli = ll_i2info(inode);
1680 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1681 struct ll_grouplock grouplock;
1684 spin_lock(&lli->lli_lock);
1685 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1686 spin_unlock(&lli->lli_lock);
1687 CWARN("no group lock held\n");
1691 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1693 if (fd->fd_grouplock.lg_gid != arg) {
1694 CWARN("group lock %lu doesn't match current id %lu\n",
1695 arg, fd->fd_grouplock.lg_gid);
1696 spin_unlock(&lli->lli_lock);
1700 grouplock = fd->fd_grouplock;
1701 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1702 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1703 spin_unlock(&lli->lli_lock);
1705 cl_put_grouplock(&grouplock);
1706 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1711 * Close inode open handle
1713 * \param dentry [in] dentry which contains the inode
1714 * \param it [in,out] intent which contains open info and result
1717 * \retval <0 failure
1719 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1721 struct inode *inode = dentry->d_inode;
1722 struct obd_client_handle *och;
1728 /* Root ? Do nothing. */
1729 if (dentry->d_inode->i_sb->s_root == dentry)
1732 /* No open handle to close? Move away */
1733 if (!it_disposition(it, DISP_OPEN_OPEN))
1736 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1738 OBD_ALLOC(och, sizeof(*och));
1740 GOTO(out, rc = -ENOMEM);
1742 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1744 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1745 och, inode, 0, NULL);
1747 /* this one is in place of ll_file_open */
1748 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1749 ptlrpc_req_finished(it->d.lustre.it_data);
1750 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1756 * Get size for inode for which FIEMAP mapping is requested.
1757 * Make the FIEMAP get_info call and returns the result.
1758 * \param fiemap kernel buffer to hold extens
1759 * \param num_bytes kernel buffer size
1761 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1767 struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
1770 /* Checks for fiemap flags */
1771 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1772 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1776 /* Check for FIEMAP_FLAG_SYNC */
1777 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1778 rc = filemap_fdatawrite(inode->i_mapping);
1783 env = cl_env_get(&refcheck);
1785 RETURN(PTR_ERR(env));
1787 if (i_size_read(inode) == 0) {
1788 rc = ll_glimpse_size(inode);
1793 fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1794 obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
1795 obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
1797 /* If filesize is 0, then there would be no objects for mapping */
1798 if (fmkey.lfik_oa.o_size == 0) {
1799 fiemap->fm_mapped_extents = 0;
1803 fmkey.lfik_fiemap = *fiemap;
1805 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1806 &fmkey, fiemap, &num_bytes);
1808 cl_env_put(env, &refcheck);
1812 int ll_fid2path(struct inode *inode, void __user *arg)
1814 struct obd_export *exp = ll_i2mdexp(inode);
1815 const struct getinfo_fid2path __user *gfin = arg;
1817 struct getinfo_fid2path *gfout;
1823 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1824 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1827 /* Only need to get the buflen */
1828 if (get_user(pathlen, &gfin->gf_pathlen))
1831 if (pathlen > PATH_MAX)
1834 outsize = sizeof(*gfout) + pathlen;
1835 OBD_ALLOC(gfout, outsize);
1839 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1840 GOTO(gf_free, rc = -EFAULT);
1842 /* Call mdc_iocontrol */
1843 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1847 if (copy_to_user(arg, gfout, outsize))
1851 OBD_FREE(gfout, outsize);
1856 * Read the data_version for inode.
1858 * This value is computed using stripe object version on OST.
1859 * Version is computed using server side locking.
1861 * @param flags if do sync on the OST side;
1863 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1864 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1866 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1868 struct cl_object *obj = ll_i2info(inode)->lli_clob;
1876 /* If no file object initialized, we consider its version is 0. */
1882 env = cl_env_get(&refcheck);
1884 RETURN(PTR_ERR(env));
1886 io = vvp_env_thread_io(env);
1888 io->u.ci_data_version.dv_data_version = 0;
1889 io->u.ci_data_version.dv_flags = flags;
1892 if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
1893 result = cl_io_loop(env, io);
1895 result = io->ci_result;
1897 *data_version = io->u.ci_data_version.dv_data_version;
1899 cl_io_fini(env, io);
1901 if (unlikely(io->ci_need_restart))
1904 cl_env_put(env, &refcheck);
1910 * Trigger a HSM release request for the provided inode.
1912 int ll_hsm_release(struct inode *inode)
1914 struct cl_env_nest nest;
1916 struct obd_client_handle *och = NULL;
1917 __u64 data_version = 0;
1921 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1922 ll_get_fsname(inode->i_sb, NULL, 0),
1923 PFID(&ll_i2info(inode)->lli_fid));
1925 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1927 GOTO(out, rc = PTR_ERR(och));
1929 /* Grab latest data_version and [am]time values */
1930 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1934 env = cl_env_nested_get(&nest);
1936 GOTO(out, rc = PTR_ERR(env));
1938 ll_merge_attr(env, inode);
1939 cl_env_nested_put(&nest, env);
1941 /* Release the file.
1942 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1943 * we still need it to pack l_remote_handle to MDT. */
1944 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
1945 MDS_HSM_RELEASE, &data_version);
1950 if (och != NULL && !IS_ERR(och)) /* close the file */
1951 ll_lease_close(och, inode, NULL);
1956 struct ll_swap_stack {
1959 struct inode *inode1;
1960 struct inode *inode2;
1965 static int ll_swap_layouts(struct file *file1, struct file *file2,
1966 struct lustre_swap_layouts *lsl)
1968 struct mdc_swap_layouts msl;
1969 struct md_op_data *op_data;
1972 struct ll_swap_stack *llss = NULL;
1975 OBD_ALLOC_PTR(llss);
1979 llss->inode1 = file1->f_path.dentry->d_inode;
1980 llss->inode2 = file2->f_path.dentry->d_inode;
1982 rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
1986 /* we use 2 bool because it is easier to swap than 2 bits */
1987 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1988 llss->check_dv1 = true;
1990 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1991 llss->check_dv2 = true;
1993 /* we cannot use lsl->sl_dvX directly because we may swap them */
1994 llss->dv1 = lsl->sl_dv1;
1995 llss->dv2 = lsl->sl_dv2;
1997 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1998 if (rc == 0) /* same file, done! */
2001 if (rc < 0) { /* sequentialize it */
2002 swap(llss->inode1, llss->inode2);
2004 swap(llss->dv1, llss->dv2);
2005 swap(llss->check_dv1, llss->check_dv2);
2009 if (gid != 0) { /* application asks to flush dirty cache */
2010 rc = ll_get_grouplock(llss->inode1, file1, gid);
2014 rc = ll_get_grouplock(llss->inode2, file2, gid);
2016 ll_put_grouplock(llss->inode1, file1, gid);
2021 /* ultimate check, before swaping the layouts we check if
2022 * dataversion has changed (if requested) */
2023 if (llss->check_dv1) {
2024 rc = ll_data_version(llss->inode1, &dv, 0);
2027 if (dv != llss->dv1)
2028 GOTO(putgl, rc = -EAGAIN);
2031 if (llss->check_dv2) {
2032 rc = ll_data_version(llss->inode2, &dv, 0);
2035 if (dv != llss->dv2)
2036 GOTO(putgl, rc = -EAGAIN);
2039 /* struct md_op_data is used to send the swap args to the mdt
2040 * only flags is missing, so we use struct mdc_swap_layouts
2041 * through the md_op_data->op_data */
2042 /* flags from user space have to be converted before they are send to
2043 * server, no flag is sent today, they are only used on the client */
2046 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2047 0, LUSTRE_OPC_ANY, &msl);
2048 if (IS_ERR(op_data))
2049 GOTO(free, rc = PTR_ERR(op_data));
2051 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2052 sizeof(*op_data), op_data, NULL);
2053 ll_finish_md_op_data(op_data);
2060 ll_put_grouplock(llss->inode2, file2, gid);
2061 ll_put_grouplock(llss->inode1, file1, gid);
2071 int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2073 struct md_op_data *op_data;
2077 /* Detect out-of range masks */
2078 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2081 /* Non-root users are forbidden to set or clear flags which are
2082 * NOT defined in HSM_USER_MASK. */
2083 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2084 !cfs_capable(CFS_CAP_SYS_ADMIN))
2087 /* Detect out-of range archive id */
2088 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2089 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2092 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2093 LUSTRE_OPC_ANY, hss);
2094 if (IS_ERR(op_data))
2095 RETURN(PTR_ERR(op_data));
2097 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2098 sizeof(*op_data), op_data, NULL);
2100 ll_finish_md_op_data(op_data);
2105 static int ll_hsm_import(struct inode *inode, struct file *file,
2106 struct hsm_user_import *hui)
2108 struct hsm_state_set *hss = NULL;
2109 struct iattr *attr = NULL;
2113 if (!S_ISREG(inode->i_mode))
2119 GOTO(out, rc = -ENOMEM);
2121 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2122 hss->hss_archive_id = hui->hui_archive_id;
2123 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2124 rc = ll_hsm_state_set(inode, hss);
2128 OBD_ALLOC_PTR(attr);
2130 GOTO(out, rc = -ENOMEM);
2132 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2133 attr->ia_mode |= S_IFREG;
2134 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2135 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2136 attr->ia_size = hui->hui_size;
2137 attr->ia_mtime.tv_sec = hui->hui_mtime;
2138 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2139 attr->ia_atime.tv_sec = hui->hui_atime;
2140 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2142 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2143 ATTR_UID | ATTR_GID |
2144 ATTR_MTIME | ATTR_MTIME_SET |
2145 ATTR_ATIME | ATTR_ATIME_SET;
2147 mutex_lock(&inode->i_mutex);
2149 rc = ll_setattr_raw(file->f_path.dentry, attr, true);
2153 mutex_unlock(&inode->i_mutex);
2165 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2167 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2168 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2171 static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
2173 struct inode *inode = file->f_path.dentry->d_inode;
2175 .ia_valid = ATTR_ATIME | ATTR_ATIME_SET |
2176 ATTR_MTIME | ATTR_MTIME_SET |
2177 ATTR_CTIME | ATTR_CTIME_SET,
2179 .tv_sec = lfu->lfu_atime_sec,
2180 .tv_nsec = lfu->lfu_atime_nsec,
2183 .tv_sec = lfu->lfu_mtime_sec,
2184 .tv_nsec = lfu->lfu_mtime_nsec,
2187 .tv_sec = lfu->lfu_ctime_sec,
2188 .tv_nsec = lfu->lfu_ctime_nsec,
2194 if (!capable(CAP_SYS_ADMIN))
2197 if (!S_ISREG(inode->i_mode))
2200 mutex_lock(&inode->i_mutex);
2201 rc = ll_setattr_raw(file->f_path.dentry, &ia, false);
2202 mutex_unlock(&inode->i_mutex);
2208 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2210 struct inode *inode = file->f_path.dentry->d_inode;
2211 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2215 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2216 PFID(ll_inode2fid(inode)), inode, cmd);
2217 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2219 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2220 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2224 case LL_IOC_GETFLAGS:
2225 /* Get the current value of the file flags */
2226 return put_user(fd->fd_flags, (int __user *)arg);
2227 case LL_IOC_SETFLAGS:
2228 case LL_IOC_CLRFLAGS:
2229 /* Set or clear specific file flags */
2230 /* XXX This probably needs checks to ensure the flags are
2231 * not abused, and to handle any flag side effects.
2233 if (get_user(flags, (int __user *) arg))
2236 if (cmd == LL_IOC_SETFLAGS) {
2237 if ((flags & LL_FILE_IGNORE_LOCK) &&
2238 !(file->f_flags & O_DIRECT)) {
2239 CERROR("%s: unable to disable locking on "
2240 "non-O_DIRECT file\n", current->comm);
2244 fd->fd_flags |= flags;
2246 fd->fd_flags &= ~flags;
2249 case LL_IOC_LOV_SETSTRIPE:
2250 RETURN(ll_lov_setstripe(inode, file, arg));
2251 case LL_IOC_LOV_SETEA:
2252 RETURN(ll_lov_setea(inode, file, arg));
2253 case LL_IOC_LOV_SWAP_LAYOUTS: {
2255 struct lustre_swap_layouts lsl;
2257 if (copy_from_user(&lsl, (char __user *)arg,
2258 sizeof(struct lustre_swap_layouts)))
2261 if ((file->f_flags & O_ACCMODE) == O_RDONLY)
2264 file2 = fget(lsl.sl_fd);
2268 /* O_WRONLY or O_RDWR */
2269 if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
2270 GOTO(out, rc = -EPERM);
2272 if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
2273 struct inode *inode2;
2274 struct ll_inode_info *lli;
2275 struct obd_client_handle *och = NULL;
2277 if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
2278 GOTO(out, rc = -EINVAL);
2280 lli = ll_i2info(inode);
2281 mutex_lock(&lli->lli_och_mutex);
2282 if (fd->fd_lease_och != NULL) {
2283 och = fd->fd_lease_och;
2284 fd->fd_lease_och = NULL;
2286 mutex_unlock(&lli->lli_och_mutex);
2288 GOTO(out, rc = -ENOLCK);
2289 inode2 = file2->f_path.dentry->d_inode;
2290 rc = ll_swap_layouts_close(och, inode, inode2);
2292 rc = ll_swap_layouts(file, file2, &lsl);
2298 case LL_IOC_LOV_GETSTRIPE:
2299 RETURN(ll_file_getstripe(inode,
2300 (struct lov_user_md __user *)arg));
2301 case FSFILT_IOC_GETFLAGS:
2302 case FSFILT_IOC_SETFLAGS:
2303 RETURN(ll_iocontrol(inode, file, cmd, arg));
2304 case FSFILT_IOC_GETVERSION_OLD:
2305 case FSFILT_IOC_GETVERSION:
2306 RETURN(put_user(inode->i_generation, (int __user *)arg));
2307 case LL_IOC_GROUP_LOCK:
2308 RETURN(ll_get_grouplock(inode, file, arg));
2309 case LL_IOC_GROUP_UNLOCK:
2310 RETURN(ll_put_grouplock(inode, file, arg));
2311 case IOC_OBD_STATFS:
2312 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2314 /* We need to special case any other ioctls we want to handle,
2315 * to send them to the MDS/OST as appropriate and to properly
2316 * network encode the arg field.
2317 case FSFILT_IOC_SETVERSION_OLD:
2318 case FSFILT_IOC_SETVERSION:
2320 case LL_IOC_FLUSHCTX:
2321 RETURN(ll_flush_ctx(inode));
2322 case LL_IOC_PATH2FID: {
2323 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2324 sizeof(struct lu_fid)))
2329 case LL_IOC_GETPARENT:
2330 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2332 case OBD_IOC_FID2PATH:
2333 RETURN(ll_fid2path(inode, (void __user *)arg));
2334 case LL_IOC_DATA_VERSION: {
2335 struct ioc_data_version idv;
2338 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2341 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2342 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2345 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2351 case LL_IOC_GET_MDTIDX: {
2354 mdtidx = ll_get_mdt_idx(inode);
2358 if (put_user((int)mdtidx, (int __user *)arg))
2363 case OBD_IOC_GETDTNAME:
2364 case OBD_IOC_GETMDNAME:
2365 RETURN(ll_get_obd_name(inode, cmd, arg));
2366 case LL_IOC_HSM_STATE_GET: {
2367 struct md_op_data *op_data;
2368 struct hsm_user_state *hus;
2375 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2376 LUSTRE_OPC_ANY, hus);
2377 if (IS_ERR(op_data)) {
2379 RETURN(PTR_ERR(op_data));
2382 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2385 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2388 ll_finish_md_op_data(op_data);
2392 case LL_IOC_HSM_STATE_SET: {
2393 struct hsm_state_set *hss;
2400 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2405 rc = ll_hsm_state_set(inode, hss);
2410 case LL_IOC_HSM_ACTION: {
2411 struct md_op_data *op_data;
2412 struct hsm_current_action *hca;
2419 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2420 LUSTRE_OPC_ANY, hca);
2421 if (IS_ERR(op_data)) {
2423 RETURN(PTR_ERR(op_data));
2426 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2429 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2432 ll_finish_md_op_data(op_data);
2436 case LL_IOC_SET_LEASE: {
2437 struct ll_inode_info *lli = ll_i2info(inode);
2438 struct obd_client_handle *och = NULL;
2443 case LL_LEASE_WRLCK:
2444 if (!(file->f_mode & FMODE_WRITE))
2446 fmode = FMODE_WRITE;
2448 case LL_LEASE_RDLCK:
2449 if (!(file->f_mode & FMODE_READ))
2453 case LL_LEASE_UNLCK:
2454 mutex_lock(&lli->lli_och_mutex);
2455 if (fd->fd_lease_och != NULL) {
2456 och = fd->fd_lease_och;
2457 fd->fd_lease_och = NULL;
2459 mutex_unlock(&lli->lli_och_mutex);
2464 fmode = och->och_flags;
2465 rc = ll_lease_close(och, inode, &lease_broken);
2472 RETURN(ll_lease_type_from_fmode(fmode));
2477 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2479 /* apply for lease */
2480 och = ll_lease_open(inode, file, fmode, 0);
2482 RETURN(PTR_ERR(och));
2485 mutex_lock(&lli->lli_och_mutex);
2486 if (fd->fd_lease_och == NULL) {
2487 fd->fd_lease_och = och;
2490 mutex_unlock(&lli->lli_och_mutex);
2492 /* impossible now that only excl is supported for now */
2493 ll_lease_close(och, inode, &lease_broken);
2498 case LL_IOC_GET_LEASE: {
2499 struct ll_inode_info *lli = ll_i2info(inode);
2500 struct ldlm_lock *lock = NULL;
2503 mutex_lock(&lli->lli_och_mutex);
2504 if (fd->fd_lease_och != NULL) {
2505 struct obd_client_handle *och = fd->fd_lease_och;
2507 lock = ldlm_handle2lock(&och->och_lease_handle);
2509 lock_res_and_lock(lock);
2510 if (!ldlm_is_cancel(lock))
2511 fmode = och->och_flags;
2513 unlock_res_and_lock(lock);
2514 LDLM_LOCK_PUT(lock);
2517 mutex_unlock(&lli->lli_och_mutex);
2519 RETURN(ll_lease_type_from_fmode(fmode));
2521 case LL_IOC_HSM_IMPORT: {
2522 struct hsm_user_import *hui;
2528 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2533 rc = ll_hsm_import(inode, file, hui);
2538 case LL_IOC_FUTIMES_3: {
2539 struct ll_futimes_3 lfu;
2541 if (copy_from_user(&lfu,
2542 (const struct ll_futimes_3 __user *)arg,
2546 RETURN(ll_file_futimes_3(file, &lfu));
2552 ll_iocontrol_call(inode, file, cmd, arg, &err))
2555 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2556 (void __user *)arg));
2561 #ifndef HAVE_FILE_LLSEEK_SIZE
2562 static inline loff_t
2563 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2565 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2567 if (offset > maxsize)
2570 if (offset != file->f_pos) {
2571 file->f_pos = offset;
2572 file->f_version = 0;
2578 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2579 loff_t maxsize, loff_t eof)
2581 struct inode *inode = file->f_path.dentry->d_inode;
2589 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2590 * position-querying operation. Avoid rewriting the "same"
2591 * f_pos value back to the file because a concurrent read(),
2592 * write() or lseek() might have altered it
2597 * f_lock protects against read/modify/write race with other
2598 * SEEK_CURs. Note that parallel writes and reads behave
2601 mutex_lock(&inode->i_mutex);
2602 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2603 mutex_unlock(&inode->i_mutex);
2607 * In the generic case the entire file is data, so as long as
2608 * offset isn't at the end of the file then the offset is data.
2615 * There is a virtual hole at the end of the file, so as long as
2616 * offset isn't i_size or larger, return i_size.
2624 return llseek_execute(file, offset, maxsize);
2628 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2630 struct inode *inode = file->f_path.dentry->d_inode;
2631 loff_t retval, eof = 0;
2634 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2635 (origin == SEEK_CUR) ? file->f_pos : 0);
2636 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2637 PFID(ll_inode2fid(inode)), inode, retval, retval,
2639 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2641 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2642 retval = ll_glimpse_size(inode);
2645 eof = i_size_read(inode);
2648 retval = ll_generic_file_llseek_size(file, offset, origin,
2649 ll_file_maxbytes(inode), eof);
2653 static int ll_flush(struct file *file, fl_owner_t id)
2655 struct inode *inode = file->f_path.dentry->d_inode;
2656 struct ll_inode_info *lli = ll_i2info(inode);
2657 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2660 LASSERT(!S_ISDIR(inode->i_mode));
2662 /* catch async errors that were recorded back when async writeback
2663 * failed for pages in this mapping. */
2664 rc = lli->lli_async_rc;
2665 lli->lli_async_rc = 0;
2666 if (lli->lli_clob != NULL) {
2667 err = lov_read_and_clear_async_rc(lli->lli_clob);
2672 /* The application has been told write failure already.
2673 * Do not report failure again. */
2674 if (fd->fd_write_failed)
2676 return rc ? -EIO : 0;
2680 * Called to make sure a portion of file has been written out.
2681 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2683 * Return how many pages have been written.
2685 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2686 enum cl_fsync_mode mode, int ignore_layout)
2688 struct cl_env_nest nest;
2691 struct cl_fsync_io *fio;
2695 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2696 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2699 env = cl_env_nested_get(&nest);
2701 RETURN(PTR_ERR(env));
2703 io = vvp_env_thread_io(env);
2704 io->ci_obj = ll_i2info(inode)->lli_clob;
2705 io->ci_ignore_layout = ignore_layout;
2707 /* initialize parameters for sync */
2708 fio = &io->u.ci_fsync;
2709 fio->fi_start = start;
2711 fio->fi_fid = ll_inode2fid(inode);
2712 fio->fi_mode = mode;
2713 fio->fi_nr_written = 0;
2715 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2716 result = cl_io_loop(env, io);
2718 result = io->ci_result;
2720 result = fio->fi_nr_written;
2721 cl_io_fini(env, io);
2722 cl_env_nested_put(&nest, env);
2728 * When dentry is provided (the 'else' case), *file->f_path.dentry may be
2729 * null and dentry must be used directly rather than pulled from
2730 * *file->f_path.dentry as is done otherwise.
2733 #ifdef HAVE_FILE_FSYNC_4ARGS
2734 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2736 struct dentry *dentry = file->f_path.dentry;
2737 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2738 int ll_fsync(struct file *file, int datasync)
2740 struct dentry *dentry = file->f_path.dentry;
2742 loff_t end = LLONG_MAX;
2744 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2747 loff_t end = LLONG_MAX;
2749 struct inode *inode = dentry->d_inode;
2750 struct ll_inode_info *lli = ll_i2info(inode);
2751 struct ptlrpc_request *req;
2755 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2756 PFID(ll_inode2fid(inode)), inode);
2757 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2759 #ifdef HAVE_FILE_FSYNC_4ARGS
2760 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2761 mutex_lock(&inode->i_mutex);
2763 /* fsync's caller has already called _fdata{sync,write}, we want
2764 * that IO to finish before calling the osc and mdc sync methods */
2765 rc = filemap_fdatawait(inode->i_mapping);
2768 /* catch async errors that were recorded back when async writeback
2769 * failed for pages in this mapping. */
2770 if (!S_ISDIR(inode->i_mode)) {
2771 err = lli->lli_async_rc;
2772 lli->lli_async_rc = 0;
2775 err = lov_read_and_clear_async_rc(lli->lli_clob);
2780 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
2784 ptlrpc_req_finished(req);
2786 if (S_ISREG(inode->i_mode)) {
2787 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2789 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2790 if (rc == 0 && err < 0)
2793 fd->fd_write_failed = true;
2795 fd->fd_write_failed = false;
2798 #ifdef HAVE_FILE_FSYNC_4ARGS
2799 mutex_unlock(&inode->i_mutex);
2805 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2807 struct inode *inode = file->f_path.dentry->d_inode;
2808 struct ll_sb_info *sbi = ll_i2sbi(inode);
2809 struct ldlm_enqueue_info einfo = {
2810 .ei_type = LDLM_FLOCK,
2811 .ei_cb_cp = ldlm_flock_completion_ast,
2812 .ei_cbdata = file_lock,
2814 struct md_op_data *op_data;
2815 struct lustre_handle lockh = { 0 };
2816 union ldlm_policy_data flock = { { 0 } };
2817 int fl_type = file_lock->fl_type;
2823 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2824 PFID(ll_inode2fid(inode)), file_lock);
2826 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2828 if (file_lock->fl_flags & FL_FLOCK) {
2829 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2830 /* flocks are whole-file locks */
2831 flock.l_flock.end = OFFSET_MAX;
2832 /* For flocks owner is determined by the local file desctiptor*/
2833 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2834 } else if (file_lock->fl_flags & FL_POSIX) {
2835 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2836 flock.l_flock.start = file_lock->fl_start;
2837 flock.l_flock.end = file_lock->fl_end;
2841 flock.l_flock.pid = file_lock->fl_pid;
2843 /* Somewhat ugly workaround for svc lockd.
2844 * lockd installs custom fl_lmops->lm_compare_owner that checks
2845 * for the fl_owner to be the same (which it always is on local node
2846 * I guess between lockd processes) and then compares pid.
2847 * As such we assign pid to the owner field to make it all work,
2848 * conflict with normal locks is unlikely since pid space and
2849 * pointer space for current->files are not intersecting */
2850 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2851 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2855 einfo.ei_mode = LCK_PR;
2858 /* An unlock request may or may not have any relation to
2859 * existing locks so we may not be able to pass a lock handle
2860 * via a normal ldlm_lock_cancel() request. The request may even
2861 * unlock a byte range in the middle of an existing lock. In
2862 * order to process an unlock request we need all of the same
2863 * information that is given with a normal read or write record
2864 * lock request. To avoid creating another ldlm unlock (cancel)
2865 * message we'll treat a LCK_NL flock request as an unlock. */
2866 einfo.ei_mode = LCK_NL;
2869 einfo.ei_mode = LCK_PW;
2872 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2887 flags = LDLM_FL_BLOCK_NOWAIT;
2893 flags = LDLM_FL_TEST_LOCK;
2896 CERROR("unknown fcntl lock command: %d\n", cmd);
2900 /* Save the old mode so that if the mode in the lock changes we
2901 * can decrement the appropriate reader or writer refcount. */
2902 file_lock->fl_type = einfo.ei_mode;
2904 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2905 LUSTRE_OPC_ANY, NULL);
2906 if (IS_ERR(op_data))
2907 RETURN(PTR_ERR(op_data));
2909 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2910 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2911 flock.l_flock.pid, flags, einfo.ei_mode,
2912 flock.l_flock.start, flock.l_flock.end);
2914 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2917 /* Restore the file lock type if not TEST lock. */
2918 if (!(flags & LDLM_FL_TEST_LOCK))
2919 file_lock->fl_type = fl_type;
2921 if ((file_lock->fl_flags & FL_FLOCK) &&
2922 (rc == 0 || file_lock->fl_type == F_UNLCK))
2923 rc2 = flock_lock_file_wait(file, file_lock);
2924 if ((file_lock->fl_flags & FL_POSIX) &&
2925 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2926 !(flags & LDLM_FL_TEST_LOCK))
2927 rc2 = posix_lock_file_wait(file, file_lock);
2929 if (rc2 && file_lock->fl_type != F_UNLCK) {
2930 einfo.ei_mode = LCK_NL;
2931 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2936 ll_finish_md_op_data(op_data);
2941 int ll_get_fid_by_name(struct inode *parent, const char *name,
2942 int namelen, struct lu_fid *fid,
2943 struct inode **inode)
2945 struct md_op_data *op_data = NULL;
2946 struct mdt_body *body;
2947 struct ptlrpc_request *req;
2951 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2952 LUSTRE_OPC_ANY, NULL);
2953 if (IS_ERR(op_data))
2954 RETURN(PTR_ERR(op_data));
2956 op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
2957 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2958 ll_finish_md_op_data(op_data);
2962 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2964 GOTO(out_req, rc = -EFAULT);
2966 *fid = body->mbo_fid1;
2969 rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
2971 ptlrpc_req_finished(req);
2975 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2976 const char *name, int namelen)
2978 struct dentry *dchild = NULL;
2979 struct inode *child_inode = NULL;
2980 struct md_op_data *op_data;
2981 struct ptlrpc_request *request = NULL;
2982 struct obd_client_handle *och = NULL;
2984 struct mdt_body *body;
2986 __u64 data_version = 0;
2989 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2990 name, PFID(ll_inode2fid(parent)), mdtidx);
2992 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2993 0, LUSTRE_OPC_ANY, NULL);
2994 if (IS_ERR(op_data))
2995 RETURN(PTR_ERR(op_data));
2997 /* Get child FID first */
2998 qstr.hash = full_name_hash(name, namelen);
3001 dchild = d_lookup(file->f_path.dentry, &qstr);
3002 if (dchild != NULL) {
3003 if (dchild->d_inode != NULL)
3004 child_inode = igrab(dchild->d_inode);
3008 if (child_inode == NULL) {
3009 rc = ll_get_fid_by_name(parent, name, namelen,
3010 &op_data->op_fid3, &child_inode);
3015 if (child_inode == NULL)
3016 GOTO(out_free, rc = -EINVAL);
3018 mutex_lock(&child_inode->i_mutex);
3019 op_data->op_fid3 = *ll_inode2fid(child_inode);
3020 if (!fid_is_sane(&op_data->op_fid3)) {
3021 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
3022 ll_get_fsname(parent->i_sb, NULL, 0), name,
3023 PFID(&op_data->op_fid3));
3024 GOTO(out_free, rc = -EINVAL);
3027 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
3032 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
3033 PFID(&op_data->op_fid3), mdtidx);
3034 GOTO(out_free, rc = 0);
3037 if (S_ISREG(child_inode->i_mode)) {
3038 och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
3045 rc = ll_data_version(child_inode, &data_version,
3050 op_data->op_handle = och->och_fh;
3051 op_data->op_data = och->och_mod;
3052 op_data->op_data_version = data_version;
3053 op_data->op_lease_handle = och->och_lease_handle;
3054 op_data->op_bias |= MDS_RENAME_MIGRATE;
3057 op_data->op_mds = mdtidx;
3058 op_data->op_cli_flags = CLI_MIGRATE;
3059 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
3060 namelen, name, namelen, &request);
3062 ll_update_times(request, parent);
3064 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
3066 GOTO(out_free, rc = -EPROTO);
3068 /* If the server does release layout lock, then we cleanup
3069 * the client och here, otherwise release it in out_free: */
3070 if (och != NULL && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
3071 obd_mod_put(och->och_mod);
3072 md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, och);
3073 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
3078 ptlrpc_req_finished(request);
3079 /* Try again if the file layout has changed. */
3080 if (rc == -EAGAIN && S_ISREG(child_inode->i_mode))
3083 if (child_inode != NULL) {
3084 if (och != NULL) /* close the file */
3085 ll_lease_close(och, child_inode, NULL);
3086 clear_nlink(child_inode);
3087 mutex_unlock(&child_inode->i_mutex);
3091 ll_finish_md_op_data(op_data);
3096 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3104 * test if some locks matching bits and l_req_mode are acquired
3105 * - bits can be in different locks
3106 * - if found clear the common lock bits in *bits
3107 * - the bits not found, are kept in *bits
3109 * \param bits [IN] searched lock bits [IN]
3110 * \param l_req_mode [IN] searched lock mode
3111 * \retval boolean, true iff all bits are found
3113 int ll_have_md_lock(struct inode *inode, __u64 *bits, enum ldlm_mode l_req_mode)
3115 struct lustre_handle lockh;
3116 union ldlm_policy_data policy;
3117 enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
3118 (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
3127 fid = &ll_i2info(inode)->lli_fid;
3128 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3129 ldlm_lockname[mode]);
3131 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3132 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3133 policy.l_inodebits.bits = *bits & (1 << i);
3134 if (policy.l_inodebits.bits == 0)
3137 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3138 &policy, mode, &lockh)) {
3139 struct ldlm_lock *lock;
3141 lock = ldlm_handle2lock(&lockh);
3144 ~(lock->l_policy_data.l_inodebits.bits);
3145 LDLM_LOCK_PUT(lock);
3147 *bits &= ~policy.l_inodebits.bits;
3154 enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
3155 struct lustre_handle *lockh, __u64 flags,
3156 enum ldlm_mode mode)
3158 union ldlm_policy_data policy = { .l_inodebits = { bits } };
3163 fid = &ll_i2info(inode)->lli_fid;
3164 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3166 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3167 fid, LDLM_IBITS, &policy, mode, lockh);
3172 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3174 /* Already unlinked. Just update nlink and return success */
3175 if (rc == -ENOENT) {
3177 /* If it is striped directory, and there is bad stripe
3178 * Let's revalidate the dentry again, instead of returning
3180 if (S_ISDIR(inode->i_mode) &&
3181 ll_i2info(inode)->lli_lsm_md != NULL)
3184 /* This path cannot be hit for regular files unless in
3185 * case of obscure races, so no need to to validate
3187 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3189 } else if (rc != 0) {
3190 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3191 "%s: revalidate FID "DFID" error: rc = %d\n",
3192 ll_get_fsname(inode->i_sb, NULL, 0),
3193 PFID(ll_inode2fid(inode)), rc);
3199 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3201 struct inode *inode = dentry->d_inode;
3202 struct ptlrpc_request *req = NULL;
3203 struct obd_export *exp;
3207 LASSERT(inode != NULL);
3209 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3210 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3212 exp = ll_i2mdexp(inode);
3214 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3215 * But under CMD case, it caused some lock issues, should be fixed
3216 * with new CMD ibits lock. See bug 12718 */
3217 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3218 struct lookup_intent oit = { .it_op = IT_GETATTR };
3219 struct md_op_data *op_data;
3221 if (ibits == MDS_INODELOCK_LOOKUP)
3222 oit.it_op = IT_LOOKUP;
3224 /* Call getattr by fid, so do not provide name at all. */
3225 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3226 dentry->d_inode, NULL, 0, 0,
3227 LUSTRE_OPC_ANY, NULL);
3228 if (IS_ERR(op_data))
3229 RETURN(PTR_ERR(op_data));
3231 rc = md_intent_lock(exp, op_data, &oit, &req,
3232 &ll_md_blocking_ast, 0);
3233 ll_finish_md_op_data(op_data);
3235 rc = ll_inode_revalidate_fini(inode, rc);
3239 rc = ll_revalidate_it_finish(req, &oit, dentry);
3241 ll_intent_release(&oit);
3245 /* Unlinked? Unhash dentry, so it is not picked up later by
3246 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3247 here to preserve get_cwd functionality on 2.6.
3249 if (!dentry->d_inode->i_nlink)
3250 d_lustre_invalidate(dentry, 0);
3252 ll_lookup_finish_locks(&oit, dentry);
3253 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3254 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3255 u64 valid = OBD_MD_FLGETATTR;
3256 struct md_op_data *op_data;
3259 if (S_ISREG(inode->i_mode)) {
3260 rc = ll_get_default_mdsize(sbi, &ealen);
3263 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3266 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3267 0, ealen, LUSTRE_OPC_ANY,
3269 if (IS_ERR(op_data))
3270 RETURN(PTR_ERR(op_data));
3272 op_data->op_valid = valid;
3273 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3274 ll_finish_md_op_data(op_data);
3276 rc = ll_inode_revalidate_fini(inode, rc);
3280 rc = ll_prep_inode(&inode, req, NULL, NULL);
3283 ptlrpc_req_finished(req);
3287 static int ll_merge_md_attr(struct inode *inode)
3289 struct cl_attr attr = { 0 };
3292 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3293 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3294 &attr, ll_md_blocking_ast);
3298 set_nlink(inode, attr.cat_nlink);
3299 inode->i_blocks = attr.cat_blocks;
3300 i_size_write(inode, attr.cat_size);
3302 ll_i2info(inode)->lli_atime = attr.cat_atime;
3303 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3304 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3310 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3312 struct inode *inode = dentry->d_inode;
3316 rc = __ll_inode_revalidate(dentry, ibits);
3320 /* if object isn't regular file, don't validate size */
3321 if (!S_ISREG(inode->i_mode)) {
3322 if (S_ISDIR(inode->i_mode) &&
3323 ll_i2info(inode)->lli_lsm_md != NULL) {
3324 rc = ll_merge_md_attr(inode);
3329 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3330 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3331 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3333 /* In case of restore, the MDT has the right size and has
3334 * already send it back without granting the layout lock,
3335 * inode is up-to-date so glimpse is useless.
3336 * Also to glimpse we need the layout, in case of a running
3337 * restore the MDT holds the layout lock so the glimpse will
3338 * block up to the end of restore (getattr will block)
3340 if (!ll_file_test_flag(ll_i2info(inode), LLIF_FILE_RESTORING))
3341 rc = ll_glimpse_size(inode);
3346 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3348 struct inode *inode = de->d_inode;
3349 struct ll_sb_info *sbi = ll_i2sbi(inode);
3350 struct ll_inode_info *lli = ll_i2info(inode);
3353 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3354 MDS_INODELOCK_LOOKUP);
3355 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3360 stat->dev = inode->i_sb->s_dev;
3361 if (ll_need_32bit_api(sbi))
3362 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3364 stat->ino = inode->i_ino;
3365 stat->mode = inode->i_mode;
3366 stat->uid = inode->i_uid;
3367 stat->gid = inode->i_gid;
3368 stat->rdev = inode->i_rdev;
3369 stat->atime = inode->i_atime;
3370 stat->mtime = inode->i_mtime;
3371 stat->ctime = inode->i_ctime;
3372 stat->blksize = 1 << inode->i_blkbits;
3374 stat->nlink = inode->i_nlink;
3375 stat->size = i_size_read(inode);
3376 stat->blocks = inode->i_blocks;
3381 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3382 __u64 start, __u64 len)
3386 struct fiemap *fiemap;
3387 unsigned int extent_count = fieinfo->fi_extents_max;
3389 num_bytes = sizeof(*fiemap) + (extent_count *
3390 sizeof(struct fiemap_extent));
3391 OBD_ALLOC_LARGE(fiemap, num_bytes);
3396 fiemap->fm_flags = fieinfo->fi_flags;
3397 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3398 fiemap->fm_start = start;
3399 fiemap->fm_length = len;
3400 if (extent_count > 0 &&
3401 copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3402 sizeof(struct fiemap_extent)) != 0)
3403 GOTO(out, rc = -EFAULT);
3405 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3407 fieinfo->fi_flags = fiemap->fm_flags;
3408 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3409 if (extent_count > 0 &&
3410 copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3411 fiemap->fm_mapped_extents *
3412 sizeof(struct fiemap_extent)) != 0)
3413 GOTO(out, rc = -EFAULT);
3415 OBD_FREE_LARGE(fiemap, num_bytes);
3419 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3421 struct ll_inode_info *lli = ll_i2info(inode);
3422 struct posix_acl *acl = NULL;
3425 spin_lock(&lli->lli_lock);
3426 /* VFS' acl_permission_check->check_acl will release the refcount */
3427 acl = posix_acl_dup(lli->lli_posix_acl);
3428 spin_unlock(&lli->lli_lock);
3433 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3435 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3436 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3438 ll_check_acl(struct inode *inode, int mask)
3441 # ifdef CONFIG_FS_POSIX_ACL
3442 struct posix_acl *acl;
3446 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3447 if (flags & IPERM_FLAG_RCU)
3450 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3455 rc = posix_acl_permission(inode, acl, mask);
3456 posix_acl_release(acl);
3459 # else /* !CONFIG_FS_POSIX_ACL */
3461 # endif /* CONFIG_FS_POSIX_ACL */
3463 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3465 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3466 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3468 # ifdef HAVE_INODE_PERMISION_2ARGS
3469 int ll_inode_permission(struct inode *inode, int mask)
3471 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3476 struct ll_sb_info *sbi;
3477 struct root_squash_info *squash;
3478 struct cred *cred = NULL;
3479 const struct cred *old_cred = NULL;
3481 bool squash_id = false;
3484 #ifdef MAY_NOT_BLOCK
3485 if (mask & MAY_NOT_BLOCK)
3487 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3488 if (flags & IPERM_FLAG_RCU)
3492 /* as root inode are NOT getting validated in lookup operation,
3493 * need to do it before permission check. */
3495 if (inode == inode->i_sb->s_root->d_inode) {
3496 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3497 MDS_INODELOCK_LOOKUP);
3502 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3503 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3505 /* squash fsuid/fsgid if needed */
3506 sbi = ll_i2sbi(inode);
3507 squash = &sbi->ll_squash;
3508 if (unlikely(squash->rsi_uid != 0 &&
3509 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3510 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3514 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3515 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3516 squash->rsi_uid, squash->rsi_gid);
3518 /* update current process's credentials
3519 * and FS capability */
3520 cred = prepare_creds();
3524 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3525 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3526 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3527 if ((1 << cap) & CFS_CAP_FS_MASK)
3528 cap_lower(cred->cap_effective, cap);
3530 old_cred = override_creds(cred);
3533 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3535 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3536 rc = lustre_check_remote_perm(inode, mask);
3538 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3540 /* restore current process's credentials and FS capability */
3542 revert_creds(old_cred);
3549 /* -o localflock - only provides locally consistent flock locks */
3550 struct file_operations ll_file_operations = {
3551 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3552 # ifdef HAVE_SYNC_READ_WRITE
3553 .read = new_sync_read,
3554 .write = new_sync_write,
3556 .read_iter = ll_file_read_iter,
3557 .write_iter = ll_file_write_iter,
3558 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3559 .read = ll_file_read,
3560 .aio_read = ll_file_aio_read,
3561 .write = ll_file_write,
3562 .aio_write = ll_file_aio_write,
3563 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3564 .unlocked_ioctl = ll_file_ioctl,
3565 .open = ll_file_open,
3566 .release = ll_file_release,
3567 .mmap = ll_file_mmap,
3568 .llseek = ll_file_seek,
3569 .splice_read = ll_file_splice_read,
3574 struct file_operations ll_file_operations_flock = {
3575 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3576 # ifdef HAVE_SYNC_READ_WRITE
3577 .read = new_sync_read,
3578 .write = new_sync_write,
3579 # endif /* HAVE_SYNC_READ_WRITE */
3580 .read_iter = ll_file_read_iter,
3581 .write_iter = ll_file_write_iter,
3582 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3583 .read = ll_file_read,
3584 .aio_read = ll_file_aio_read,
3585 .write = ll_file_write,
3586 .aio_write = ll_file_aio_write,
3587 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3588 .unlocked_ioctl = ll_file_ioctl,
3589 .open = ll_file_open,
3590 .release = ll_file_release,
3591 .mmap = ll_file_mmap,
3592 .llseek = ll_file_seek,
3593 .splice_read = ll_file_splice_read,
3596 .flock = ll_file_flock,
3597 .lock = ll_file_flock
3600 /* These are for -o noflock - to return ENOSYS on flock calls */
3601 struct file_operations ll_file_operations_noflock = {
3602 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3603 # ifdef HAVE_SYNC_READ_WRITE
3604 .read = new_sync_read,
3605 .write = new_sync_write,
3606 # endif /* HAVE_SYNC_READ_WRITE */
3607 .read_iter = ll_file_read_iter,
3608 .write_iter = ll_file_write_iter,
3609 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3610 .read = ll_file_read,
3611 .aio_read = ll_file_aio_read,
3612 .write = ll_file_write,
3613 .aio_write = ll_file_aio_write,
3614 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3615 .unlocked_ioctl = ll_file_ioctl,
3616 .open = ll_file_open,
3617 .release = ll_file_release,
3618 .mmap = ll_file_mmap,
3619 .llseek = ll_file_seek,
3620 .splice_read = ll_file_splice_read,
3623 .flock = ll_file_noflock,
3624 .lock = ll_file_noflock
3627 struct inode_operations ll_file_inode_operations = {
3628 .setattr = ll_setattr,
3629 .getattr = ll_getattr,
3630 .permission = ll_inode_permission,
3631 .setxattr = ll_setxattr,
3632 .getxattr = ll_getxattr,
3633 .listxattr = ll_listxattr,
3634 .removexattr = ll_removexattr,
3635 .fiemap = ll_fiemap,
3636 #ifdef HAVE_IOP_GET_ACL
3637 .get_acl = ll_get_acl,
3641 /* dynamic ioctl number support routins */
3642 static struct llioc_ctl_data {
3643 struct rw_semaphore ioc_sem;
3644 struct list_head ioc_head;
3646 __RWSEM_INITIALIZER(llioc.ioc_sem),
3647 LIST_HEAD_INIT(llioc.ioc_head)
3652 struct list_head iocd_list;
3653 unsigned int iocd_size;
3654 llioc_callback_t iocd_cb;
3655 unsigned int iocd_count;
3656 unsigned int iocd_cmd[0];
3659 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3662 struct llioc_data *in_data = NULL;
3665 if (cb == NULL || cmd == NULL ||
3666 count > LLIOC_MAX_CMD || count < 0)
3669 size = sizeof(*in_data) + count * sizeof(unsigned int);
3670 OBD_ALLOC(in_data, size);
3671 if (in_data == NULL)
3674 memset(in_data, 0, sizeof(*in_data));
3675 in_data->iocd_size = size;
3676 in_data->iocd_cb = cb;
3677 in_data->iocd_count = count;
3678 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3680 down_write(&llioc.ioc_sem);
3681 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3682 up_write(&llioc.ioc_sem);
3687 void ll_iocontrol_unregister(void *magic)
3689 struct llioc_data *tmp;
3694 down_write(&llioc.ioc_sem);
3695 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3697 unsigned int size = tmp->iocd_size;
3699 list_del(&tmp->iocd_list);
3700 up_write(&llioc.ioc_sem);
3702 OBD_FREE(tmp, size);
3706 up_write(&llioc.ioc_sem);
3708 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3711 EXPORT_SYMBOL(ll_iocontrol_register);
3712 EXPORT_SYMBOL(ll_iocontrol_unregister);
3714 static enum llioc_iter
3715 ll_iocontrol_call(struct inode *inode, struct file *file,
3716 unsigned int cmd, unsigned long arg, int *rcp)
3718 enum llioc_iter ret = LLIOC_CONT;
3719 struct llioc_data *data;
3720 int rc = -EINVAL, i;
3722 down_read(&llioc.ioc_sem);
3723 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3724 for (i = 0; i < data->iocd_count; i++) {
3725 if (cmd != data->iocd_cmd[i])
3728 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3732 if (ret == LLIOC_STOP)
3735 up_read(&llioc.ioc_sem);
3742 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3744 struct ll_inode_info *lli = ll_i2info(inode);
3745 struct cl_object *obj = lli->lli_clob;
3746 struct cl_env_nest nest;
3754 env = cl_env_nested_get(&nest);
3756 RETURN(PTR_ERR(env));
3758 rc = cl_conf_set(env, lli->lli_clob, conf);
3762 if (conf->coc_opc == OBJECT_CONF_SET) {
3763 struct ldlm_lock *lock = conf->coc_lock;
3764 struct cl_layout cl = {
3768 LASSERT(lock != NULL);
3769 LASSERT(ldlm_has_layout(lock));
3771 /* it can only be allowed to match after layout is
3772 * applied to inode otherwise false layout would be
3773 * seen. Applying layout shoud happen before dropping
3774 * the intent lock. */
3775 ldlm_lock_allow_match(lock);
3777 rc = cl_object_layout_get(env, obj, &cl);
3782 DFID": layout version change: %u -> %u\n",
3783 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3785 ll_layout_version_set(lli, cl.cl_layout_gen);
3789 cl_env_nested_put(&nest, env);
3794 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3795 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3798 struct ll_sb_info *sbi = ll_i2sbi(inode);
3799 struct ptlrpc_request *req;
3800 struct mdt_body *body;
3807 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3808 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3809 lock->l_lvb_data, lock->l_lvb_len);
3811 if (lock->l_lvb_data != NULL)
3814 /* if layout lock was granted right away, the layout is returned
3815 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3816 * blocked and then granted via completion ast, we have to fetch
3817 * layout here. Please note that we can't use the LVB buffer in
3818 * completion AST because it doesn't have a large enough buffer */
3819 rc = ll_get_default_mdsize(sbi, &lmmsize);
3821 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
3822 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3827 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3829 GOTO(out, rc = -EPROTO);
3831 lmmsize = body->mbo_eadatasize;
3832 if (lmmsize == 0) /* empty layout */
3835 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3837 GOTO(out, rc = -EFAULT);
3839 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3840 if (lvbdata == NULL)
3841 GOTO(out, rc = -ENOMEM);
3843 memcpy(lvbdata, lmm, lmmsize);
3844 lock_res_and_lock(lock);
3845 if (unlikely(lock->l_lvb_data == NULL)) {
3846 lock->l_lvb_type = LVB_T_LAYOUT;
3847 lock->l_lvb_data = lvbdata;
3848 lock->l_lvb_len = lmmsize;
3851 unlock_res_and_lock(lock);
3853 if (lvbdata != NULL)
3854 OBD_FREE_LARGE(lvbdata, lmmsize);
3859 ptlrpc_req_finished(req);
3864 * Apply the layout to the inode. Layout lock is held and will be released
3867 static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
3868 struct inode *inode)
3870 struct ll_inode_info *lli = ll_i2info(inode);
3871 struct ll_sb_info *sbi = ll_i2sbi(inode);
3872 struct ldlm_lock *lock;
3873 struct cl_object_conf conf;
3876 bool wait_layout = false;
3879 LASSERT(lustre_handle_is_used(lockh));
3881 lock = ldlm_handle2lock(lockh);
3882 LASSERT(lock != NULL);
3883 LASSERT(ldlm_has_layout(lock));
3885 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured",
3886 PFID(&lli->lli_fid), inode);
3888 /* in case this is a caching lock and reinstate with new inode */
3889 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3891 lock_res_and_lock(lock);
3892 lvb_ready = ldlm_is_lvb_ready(lock);
3893 unlock_res_and_lock(lock);
3894 /* checking lvb_ready is racy but this is okay. The worst case is
3895 * that multi processes may configure the file on the same time. */
3900 rc = ll_layout_fetch(inode, lock);
3904 /* for layout lock, lmm is stored in lock's lvb.
3905 * lvb_data is immutable if the lock is held so it's safe to access it
3908 * set layout to file. Unlikely this will fail as old layout was
3909 * surely eliminated */
3910 memset(&conf, 0, sizeof conf);
3911 conf.coc_opc = OBJECT_CONF_SET;
3912 conf.coc_inode = inode;
3913 conf.coc_lock = lock;
3914 conf.u.coc_layout.lb_buf = lock->l_lvb_data;
3915 conf.u.coc_layout.lb_len = lock->l_lvb_len;
3916 rc = ll_layout_conf(inode, &conf);
3918 /* refresh layout failed, need to wait */
3919 wait_layout = rc == -EBUSY;
3923 LDLM_LOCK_PUT(lock);
3924 ldlm_lock_decref(lockh, mode);
3926 /* wait for IO to complete if it's still being used. */
3928 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3929 ll_get_fsname(inode->i_sb, NULL, 0),
3930 PFID(&lli->lli_fid), inode);
3932 memset(&conf, 0, sizeof conf);
3933 conf.coc_opc = OBJECT_CONF_WAIT;
3934 conf.coc_inode = inode;
3935 rc = ll_layout_conf(inode, &conf);
3939 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3940 ll_get_fsname(inode->i_sb, NULL, 0),
3941 PFID(&lli->lli_fid), rc);
3946 static int ll_layout_refresh_locked(struct inode *inode)
3948 struct ll_inode_info *lli = ll_i2info(inode);
3949 struct ll_sb_info *sbi = ll_i2sbi(inode);
3950 struct md_op_data *op_data;
3951 struct lookup_intent it;
3952 struct lustre_handle lockh;
3953 enum ldlm_mode mode;
3954 struct ldlm_enqueue_info einfo = {
3955 .ei_type = LDLM_IBITS,
3957 .ei_cb_bl = &ll_md_blocking_ast,
3958 .ei_cb_cp = &ldlm_completion_ast,
3964 /* mostly layout lock is caching on the local side, so try to match
3965 * it before grabbing layout lock mutex. */
3966 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3967 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3968 if (mode != 0) { /* hit cached lock */
3969 rc = ll_layout_lock_set(&lockh, mode, inode);
3976 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3977 0, 0, LUSTRE_OPC_ANY, NULL);
3978 if (IS_ERR(op_data))
3979 RETURN(PTR_ERR(op_data));
3981 /* have to enqueue one */
3982 memset(&it, 0, sizeof(it));
3983 it.it_op = IT_LAYOUT;
3984 lockh.cookie = 0ULL;
3986 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3987 ll_get_fsname(inode->i_sb, NULL, 0),
3988 PFID(&lli->lli_fid), inode);
3990 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3991 if (it.d.lustre.it_data != NULL)
3992 ptlrpc_req_finished(it.d.lustre.it_data);
3993 it.d.lustre.it_data = NULL;
3995 ll_finish_md_op_data(op_data);
3997 mode = it.d.lustre.it_lock_mode;
3998 it.d.lustre.it_lock_mode = 0;
3999 ll_intent_drop_lock(&it);
4002 /* set lock data in case this is a new lock */
4003 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
4004 rc = ll_layout_lock_set(&lockh, mode, inode);
4013 * This function checks if there exists a LAYOUT lock on the client side,
4014 * or enqueues it if it doesn't have one in cache.
4016 * This function will not hold layout lock so it may be revoked any time after
4017 * this function returns. Any operations depend on layout should be redone
4020 * This function should be called before lov_io_init() to get an uptodate
4021 * layout version, the caller should save the version number and after IO
4022 * is finished, this function should be called again to verify that layout
4023 * is not changed during IO time.
4025 int ll_layout_refresh(struct inode *inode, __u32 *gen)
4027 struct ll_inode_info *lli = ll_i2info(inode);
4028 struct ll_sb_info *sbi = ll_i2sbi(inode);
4032 *gen = ll_layout_version_get(lli);
4033 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
4037 LASSERT(fid_is_sane(ll_inode2fid(inode)));
4038 LASSERT(S_ISREG(inode->i_mode));
4040 /* take layout lock mutex to enqueue layout lock exclusively. */
4041 mutex_lock(&lli->lli_layout_mutex);
4043 rc = ll_layout_refresh_locked(inode);
4047 *gen = ll_layout_version_get(lli);
4049 mutex_unlock(&lli->lli_layout_mutex);
4055 * This function send a restore request to the MDT
4057 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
4059 struct hsm_user_request *hur;
4063 len = sizeof(struct hsm_user_request) +
4064 sizeof(struct hsm_user_item);
4065 OBD_ALLOC(hur, len);
4069 hur->hur_request.hr_action = HUA_RESTORE;
4070 hur->hur_request.hr_archive_id = 0;
4071 hur->hur_request.hr_flags = 0;
4072 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
4073 sizeof(hur->hur_user_item[0].hui_fid));
4074 hur->hur_user_item[0].hui_extent.offset = offset;
4075 hur->hur_user_item[0].hui_extent.length = length;
4076 hur->hur_request.hr_itemcount = 1;
4077 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,