4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include <linux/user_namespace.h>
49 #ifdef HAVE_UIDGID_HEADER
50 # include <linux/uidgid.h>
52 #include <lustre/ll_fiemap.h>
53 #include <lustre_ioctl.h>
55 #include "cl_object.h"
57 #include "llite_internal.h"
58 #include "vvp_internal.h"
61 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
63 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
66 static enum llioc_iter
67 ll_iocontrol_call(struct inode *inode, struct file *file,
68 unsigned int cmd, unsigned long arg, int *rcp);
70 static struct ll_file_data *ll_file_data_get(void)
72 struct ll_file_data *fd;
74 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
78 fd->fd_write_failed = false;
83 static void ll_file_data_put(struct ll_file_data *fd)
86 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
90 * Packs all the attributes into @op_data for the CLOSE rpc.
92 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
93 struct obd_client_handle *och)
97 ll_prep_md_op_data(op_data, inode, NULL, NULL,
98 0, 0, LUSTRE_OPC_ANY, NULL);
100 op_data->op_attr.ia_mode = inode->i_mode;
101 op_data->op_attr.ia_atime = inode->i_atime;
102 op_data->op_attr.ia_mtime = inode->i_mtime;
103 op_data->op_attr.ia_ctime = inode->i_ctime;
104 op_data->op_attr.ia_size = i_size_read(inode);
105 op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
106 ATTR_MTIME | ATTR_MTIME_SET |
107 ATTR_CTIME | ATTR_CTIME_SET;
108 op_data->op_attr_blocks = inode->i_blocks;
109 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
110 op_data->op_handle = och->och_fh;
112 if (och->och_flags & FMODE_WRITE &&
113 ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED))
114 /* For HSM: if inode data has been modified, pack it so that
115 * MDT can set data dirty flag in the archive. */
116 op_data->op_bias |= MDS_DATA_MODIFIED;
122 * Perform a close, possibly with a bias.
123 * The meaning of "data" depends on the value of "bias".
125 * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
126 * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
129 static int ll_close_inode_openhandle(struct obd_export *md_exp,
130 struct obd_client_handle *och,
132 enum mds_op_bias bias, void *data)
134 const struct ll_inode_info *lli = ll_i2info(inode);
135 struct md_op_data *op_data;
136 struct ptlrpc_request *req = NULL;
140 if (class_exp2obd(md_exp) == NULL) {
141 CERROR("%s: invalid MDC connection handle closing "DFID"\n",
142 ll_get_fsname(inode->i_sb, NULL, 0),
143 PFID(&lli->lli_fid));
147 OBD_ALLOC_PTR(op_data);
148 /* We leak openhandle and request here on error, but not much to be
149 * done in OOM case since app won't retry close on error either. */
151 GOTO(out, rc = -ENOMEM);
153 ll_prepare_close(inode, op_data, och);
155 case MDS_CLOSE_LAYOUT_SWAP:
156 LASSERT(data != NULL);
157 op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
158 op_data->op_data_version = 0;
159 op_data->op_lease_handle = och->och_lease_handle;
160 op_data->op_fid2 = *ll_inode2fid(data);
163 case MDS_HSM_RELEASE:
164 LASSERT(data != NULL);
165 op_data->op_bias |= MDS_HSM_RELEASE;
166 op_data->op_data_version = *(__u64 *)data;
167 op_data->op_lease_handle = och->och_lease_handle;
168 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
172 LASSERT(data == NULL);
176 rc = md_close(md_exp, op_data, och->och_mod, &req);
177 if (rc != 0 && rc != -EINTR)
178 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
179 md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
182 op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
183 struct mdt_body *body;
185 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
186 if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
190 ll_finish_md_op_data(op_data);
194 md_clear_open_replay_data(md_exp, och);
195 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
198 ptlrpc_req_finished(req); /* This is close request */
202 int ll_md_real_close(struct inode *inode, fmode_t fmode)
204 struct ll_inode_info *lli = ll_i2info(inode);
205 struct obd_client_handle **och_p;
206 struct obd_client_handle *och;
211 if (fmode & FMODE_WRITE) {
212 och_p = &lli->lli_mds_write_och;
213 och_usecount = &lli->lli_open_fd_write_count;
214 } else if (fmode & FMODE_EXEC) {
215 och_p = &lli->lli_mds_exec_och;
216 och_usecount = &lli->lli_open_fd_exec_count;
218 LASSERT(fmode & FMODE_READ);
219 och_p = &lli->lli_mds_read_och;
220 och_usecount = &lli->lli_open_fd_read_count;
223 mutex_lock(&lli->lli_och_mutex);
224 if (*och_usecount > 0) {
225 /* There are still users of this handle, so skip
227 mutex_unlock(&lli->lli_och_mutex);
233 mutex_unlock(&lli->lli_och_mutex);
236 /* There might be a race and this handle may already
238 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
239 och, inode, 0, NULL);
245 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
248 union ldlm_policy_data policy = {
249 .l_inodebits = { MDS_INODELOCK_OPEN },
251 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
252 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
253 struct ll_inode_info *lli = ll_i2info(inode);
254 struct lustre_handle lockh;
255 enum ldlm_mode lockmode;
259 /* clear group lock, if present */
260 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
261 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
263 if (fd->fd_lease_och != NULL) {
266 /* Usually the lease is not released when the
267 * application crashed, we need to release here. */
268 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
269 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
270 PFID(&lli->lli_fid), rc, lease_broken);
272 fd->fd_lease_och = NULL;
275 if (fd->fd_och != NULL) {
276 rc = ll_close_inode_openhandle(md_exp, fd->fd_och, inode, 0,
282 /* Let's see if we have good enough OPEN lock on the file and if
283 we can skip talking to MDS */
284 mutex_lock(&lli->lli_och_mutex);
285 if (fd->fd_omode & FMODE_WRITE) {
287 LASSERT(lli->lli_open_fd_write_count);
288 lli->lli_open_fd_write_count--;
289 } else if (fd->fd_omode & FMODE_EXEC) {
291 LASSERT(lli->lli_open_fd_exec_count);
292 lli->lli_open_fd_exec_count--;
295 LASSERT(lli->lli_open_fd_read_count);
296 lli->lli_open_fd_read_count--;
298 mutex_unlock(&lli->lli_och_mutex);
300 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
301 LDLM_IBITS, &policy, lockmode, &lockh))
302 rc = ll_md_real_close(inode, fd->fd_omode);
305 LUSTRE_FPRIVATE(file) = NULL;
306 ll_file_data_put(fd);
311 /* While this returns an error code, fput() the caller does not, so we need
312 * to make every effort to clean up all of our state here. Also, applications
313 * rarely check close errors and even if an error is returned they will not
314 * re-try the close call.
316 int ll_file_release(struct inode *inode, struct file *file)
318 struct ll_file_data *fd;
319 struct ll_sb_info *sbi = ll_i2sbi(inode);
320 struct ll_inode_info *lli = ll_i2info(inode);
324 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
325 PFID(ll_inode2fid(inode)), inode);
327 #ifdef CONFIG_FS_POSIX_ACL
328 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
329 inode == inode->i_sb->s_root->d_inode) {
330 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
333 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
334 fd->fd_flags &= ~LL_FILE_RMTACL;
335 rct_del(&sbi->ll_rct, current_pid());
336 et_search_free(&sbi->ll_et, current_pid());
341 if (inode->i_sb->s_root != file->f_path.dentry)
342 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
343 fd = LUSTRE_FPRIVATE(file);
346 /* The last ref on @file, maybe not the the owner pid of statahead,
347 * because parent and child process can share the same file handle. */
348 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
349 ll_deauthorize_statahead(inode, fd);
351 if (inode->i_sb->s_root == file->f_path.dentry) {
352 LUSTRE_FPRIVATE(file) = NULL;
353 ll_file_data_put(fd);
357 if (!S_ISDIR(inode->i_mode)) {
358 if (lli->lli_clob != NULL)
359 lov_read_and_clear_async_rc(lli->lli_clob);
360 lli->lli_async_rc = 0;
363 rc = ll_md_close(sbi->ll_md_exp, inode, file);
365 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
366 libcfs_debug_dumplog();
371 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
372 struct lookup_intent *itp)
374 struct dentry *de = file->f_path.dentry;
375 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
376 struct dentry *parent = de->d_parent;
377 const char *name = NULL;
379 struct md_op_data *op_data;
380 struct ptlrpc_request *req = NULL;
384 LASSERT(parent != NULL);
385 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
387 /* if server supports open-by-fid, or file name is invalid, don't pack
388 * name in open request */
389 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
390 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
391 name = de->d_name.name;
392 len = de->d_name.len;
395 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
396 name, len, 0, LUSTRE_OPC_ANY, NULL);
398 RETURN(PTR_ERR(op_data));
399 op_data->op_data = lmm;
400 op_data->op_data_size = lmmsize;
402 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
403 &ll_md_blocking_ast, 0);
404 ll_finish_md_op_data(op_data);
406 /* reason for keep own exit path - don`t flood log
407 * with messages with -ESTALE errors.
409 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
410 it_open_error(DISP_OPEN_OPEN, itp))
412 ll_release_openhandle(de, itp);
416 if (it_disposition(itp, DISP_LOOKUP_NEG))
417 GOTO(out, rc = -ENOENT);
419 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
420 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
421 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
425 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
426 if (!rc && itp->d.lustre.it_lock_mode)
427 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
430 ptlrpc_req_finished(req);
431 ll_intent_drop_lock(itp);
436 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
437 struct obd_client_handle *och)
439 struct ptlrpc_request *req = it->d.lustre.it_data;
440 struct mdt_body *body;
442 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
443 och->och_fh = body->mbo_handle;
444 och->och_fid = body->mbo_fid1;
445 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
446 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
447 och->och_flags = it->it_flags;
449 return md_set_open_replay_data(md_exp, och, it);
452 static int ll_local_open(struct file *file, struct lookup_intent *it,
453 struct ll_file_data *fd, struct obd_client_handle *och)
455 struct inode *inode = file->f_path.dentry->d_inode;
458 LASSERT(!LUSTRE_FPRIVATE(file));
465 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
470 LUSTRE_FPRIVATE(file) = fd;
471 ll_readahead_init(inode, &fd->fd_ras);
472 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
474 /* ll_cl_context initialize */
475 rwlock_init(&fd->fd_lock);
476 INIT_LIST_HEAD(&fd->fd_lccs);
481 /* Open a file, and (for the very first open) create objects on the OSTs at
482 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
483 * creation or open until ll_lov_setstripe() ioctl is called.
485 * If we already have the stripe MD locally then we don't request it in
486 * md_open(), by passing a lmm_size = 0.
488 * It is up to the application to ensure no other processes open this file
489 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
490 * used. We might be able to avoid races of that sort by getting lli_open_sem
491 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
492 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
494 int ll_file_open(struct inode *inode, struct file *file)
496 struct ll_inode_info *lli = ll_i2info(inode);
497 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
498 .it_flags = file->f_flags };
499 struct obd_client_handle **och_p = NULL;
500 __u64 *och_usecount = NULL;
501 struct ll_file_data *fd;
505 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
506 PFID(ll_inode2fid(inode)), inode, file->f_flags);
508 it = file->private_data; /* XXX: compat macro */
509 file->private_data = NULL; /* prevent ll_local_open assertion */
511 fd = ll_file_data_get();
513 GOTO(out_openerr, rc = -ENOMEM);
516 if (S_ISDIR(inode->i_mode))
517 ll_authorize_statahead(inode, fd);
519 if (inode->i_sb->s_root == file->f_path.dentry) {
520 LUSTRE_FPRIVATE(file) = fd;
524 if (!it || !it->d.lustre.it_disposition) {
525 /* Convert f_flags into access mode. We cannot use file->f_mode,
526 * because everything but O_ACCMODE mask was stripped from
528 if ((oit.it_flags + 1) & O_ACCMODE)
530 if (file->f_flags & O_TRUNC)
531 oit.it_flags |= FMODE_WRITE;
533 /* kernel only call f_op->open in dentry_open. filp_open calls
534 * dentry_open after call to open_namei that checks permissions.
535 * Only nfsd_open call dentry_open directly without checking
536 * permissions and because of that this code below is safe. */
537 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
538 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
540 /* We do not want O_EXCL here, presumably we opened the file
541 * already? XXX - NFS implications? */
542 oit.it_flags &= ~O_EXCL;
544 /* bug20584, if "it_flags" contains O_CREAT, the file will be
545 * created if necessary, then "IT_CREAT" should be set to keep
546 * consistent with it */
547 if (oit.it_flags & O_CREAT)
548 oit.it_op |= IT_CREAT;
554 /* Let's see if we have file open on MDS already. */
555 if (it->it_flags & FMODE_WRITE) {
556 och_p = &lli->lli_mds_write_och;
557 och_usecount = &lli->lli_open_fd_write_count;
558 } else if (it->it_flags & FMODE_EXEC) {
559 och_p = &lli->lli_mds_exec_och;
560 och_usecount = &lli->lli_open_fd_exec_count;
562 och_p = &lli->lli_mds_read_och;
563 och_usecount = &lli->lli_open_fd_read_count;
566 mutex_lock(&lli->lli_och_mutex);
567 if (*och_p) { /* Open handle is present */
568 if (it_disposition(it, DISP_OPEN_OPEN)) {
569 /* Well, there's extra open request that we do not need,
570 let's close it somehow. This will decref request. */
571 rc = it_open_error(DISP_OPEN_OPEN, it);
573 mutex_unlock(&lli->lli_och_mutex);
574 GOTO(out_openerr, rc);
577 ll_release_openhandle(file->f_path.dentry, it);
581 rc = ll_local_open(file, it, fd, NULL);
584 mutex_unlock(&lli->lli_och_mutex);
585 GOTO(out_openerr, rc);
588 LASSERT(*och_usecount == 0);
589 if (!it->d.lustre.it_disposition) {
590 /* We cannot just request lock handle now, new ELC code
591 means that one of other OPEN locks for this file
592 could be cancelled, and since blocking ast handler
593 would attempt to grab och_mutex as well, that would
594 result in a deadlock */
595 mutex_unlock(&lli->lli_och_mutex);
597 * Normally called under two situations:
599 * 2. A race/condition on MDS resulting in no open
600 * handle to be returned from LOOKUP|OPEN request,
601 * for example if the target entry was a symlink.
603 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
605 * Always specify MDS_OPEN_BY_FID because we don't want
606 * to get file with different fid.
608 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
609 rc = ll_intent_file_open(file, NULL, 0, it);
611 GOTO(out_openerr, rc);
615 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
617 GOTO(out_och_free, rc = -ENOMEM);
621 /* md_intent_lock() didn't get a request ref if there was an
622 * open error, so don't do cleanup on the request here
624 /* XXX (green): Should not we bail out on any error here, not
625 * just open error? */
626 rc = it_open_error(DISP_OPEN_OPEN, it);
628 GOTO(out_och_free, rc);
630 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
631 "inode %p: disposition %x, status %d\n", inode,
632 it_disposition(it, ~0), it->d.lustre.it_status);
634 rc = ll_local_open(file, it, fd, *och_p);
636 GOTO(out_och_free, rc);
638 mutex_unlock(&lli->lli_och_mutex);
641 /* Must do this outside lli_och_mutex lock to prevent deadlock where
642 different kind of OPEN lock for this same inode gets cancelled
643 by ldlm_cancel_lru */
644 if (!S_ISREG(inode->i_mode))
645 GOTO(out_och_free, rc);
647 cl_lov_delay_create_clear(&file->f_flags);
648 GOTO(out_och_free, rc);
652 if (och_p && *och_p) {
653 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
654 *och_p = NULL; /* OBD_FREE writes some magic there */
657 mutex_unlock(&lli->lli_och_mutex);
660 if (lli->lli_opendir_key == fd)
661 ll_deauthorize_statahead(inode, fd);
663 ll_file_data_put(fd);
665 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
668 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
669 ptlrpc_req_finished(it->d.lustre.it_data);
670 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
676 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
677 struct ldlm_lock_desc *desc, void *data, int flag)
680 struct lustre_handle lockh;
684 case LDLM_CB_BLOCKING:
685 ldlm_lock2handle(lock, &lockh);
686 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
688 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
692 case LDLM_CB_CANCELING:
700 * Acquire a lease and open the file.
702 static struct obd_client_handle *
703 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
706 struct lookup_intent it = { .it_op = IT_OPEN };
707 struct ll_sb_info *sbi = ll_i2sbi(inode);
708 struct md_op_data *op_data;
709 struct ptlrpc_request *req = NULL;
710 struct lustre_handle old_handle = { 0 };
711 struct obd_client_handle *och = NULL;
716 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
717 RETURN(ERR_PTR(-EINVAL));
720 struct ll_inode_info *lli = ll_i2info(inode);
721 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
722 struct obd_client_handle **och_p;
725 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
726 RETURN(ERR_PTR(-EPERM));
728 /* Get the openhandle of the file */
730 mutex_lock(&lli->lli_och_mutex);
731 if (fd->fd_lease_och != NULL) {
732 mutex_unlock(&lli->lli_och_mutex);
736 if (fd->fd_och == NULL) {
737 if (file->f_mode & FMODE_WRITE) {
738 LASSERT(lli->lli_mds_write_och != NULL);
739 och_p = &lli->lli_mds_write_och;
740 och_usecount = &lli->lli_open_fd_write_count;
742 LASSERT(lli->lli_mds_read_och != NULL);
743 och_p = &lli->lli_mds_read_och;
744 och_usecount = &lli->lli_open_fd_read_count;
746 if (*och_usecount == 1) {
753 mutex_unlock(&lli->lli_och_mutex);
754 if (rc < 0) /* more than 1 opener */
757 LASSERT(fd->fd_och != NULL);
758 old_handle = fd->fd_och->och_fh;
763 RETURN(ERR_PTR(-ENOMEM));
765 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
766 LUSTRE_OPC_ANY, NULL);
768 GOTO(out, rc = PTR_ERR(op_data));
770 /* To tell the MDT this openhandle is from the same owner */
771 op_data->op_handle = old_handle;
773 it.it_flags = fmode | open_flags;
774 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
775 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
776 &ll_md_blocking_lease_ast,
777 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
778 * it can be cancelled which may mislead applications that the lease is
780 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
781 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
782 * doesn't deal with openhandle, so normal openhandle will be leaked. */
783 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
784 ll_finish_md_op_data(op_data);
785 ptlrpc_req_finished(req);
787 GOTO(out_release_it, rc);
789 if (it_disposition(&it, DISP_LOOKUP_NEG))
790 GOTO(out_release_it, rc = -ENOENT);
792 rc = it_open_error(DISP_OPEN_OPEN, &it);
794 GOTO(out_release_it, rc);
796 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
797 ll_och_fill(sbi->ll_md_exp, &it, och);
799 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
800 GOTO(out_close, rc = -EOPNOTSUPP);
802 /* already get lease, handle lease lock */
803 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
804 if (it.d.lustre.it_lock_mode == 0 ||
805 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
806 /* open lock must return for lease */
807 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
808 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
809 it.d.lustre.it_lock_bits);
810 GOTO(out_close, rc = -EPROTO);
813 ll_intent_release(&it);
817 /* Cancel open lock */
818 if (it.d.lustre.it_lock_mode != 0) {
819 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
820 it.d.lustre.it_lock_mode);
821 it.d.lustre.it_lock_mode = 0;
822 och->och_lease_handle.cookie = 0ULL;
824 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, och, inode, 0, NULL);
826 CERROR("%s: error closing file "DFID": %d\n",
827 ll_get_fsname(inode->i_sb, NULL, 0),
828 PFID(&ll_i2info(inode)->lli_fid), rc2);
829 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
831 ll_intent_release(&it);
839 * Check whether a layout swap can be done between two inodes.
841 * \param[in] inode1 First inode to check
842 * \param[in] inode2 Second inode to check
844 * \retval 0 on success, layout swap can be performed between both inodes
845 * \retval negative error code if requirements are not met
847 static int ll_check_swap_layouts_validity(struct inode *inode1,
848 struct inode *inode2)
850 if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
853 if (inode_permission(inode1, MAY_WRITE) ||
854 inode_permission(inode2, MAY_WRITE))
857 if (inode1->i_sb != inode2->i_sb)
863 static int ll_swap_layouts_close(struct obd_client_handle *och,
864 struct inode *inode, struct inode *inode2)
866 const struct lu_fid *fid1 = ll_inode2fid(inode);
867 const struct lu_fid *fid2;
871 CDEBUG(D_INODE, "%s: biased close of file "DFID"\n",
872 ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
874 rc = ll_check_swap_layouts_validity(inode, inode2);
876 GOTO(out_free_och, rc);
878 /* We now know that inode2 is a lustre inode */
879 fid2 = ll_inode2fid(inode2);
881 rc = lu_fid_cmp(fid1, fid2);
883 GOTO(out_free_och, rc = -EINVAL);
885 /* Close the file and swap layouts between inode & inode2.
886 * NB: lease lock handle is released in mdc_close_layout_swap_pack()
887 * because we still need it to pack l_remote_handle to MDT. */
888 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
889 MDS_CLOSE_LAYOUT_SWAP, inode2);
891 och = NULL; /* freed in ll_close_inode_openhandle() */
901 * Release lease and close the file.
902 * It will check if the lease has ever broken.
904 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
907 struct ldlm_lock *lock;
908 bool cancelled = true;
912 lock = ldlm_handle2lock(&och->och_lease_handle);
914 lock_res_and_lock(lock);
915 cancelled = ldlm_is_cancel(lock);
916 unlock_res_and_lock(lock);
920 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
921 PFID(&ll_i2info(inode)->lli_fid), cancelled);
924 ldlm_cli_cancel(&och->och_lease_handle, 0);
925 if (lease_broken != NULL)
926 *lease_broken = cancelled;
928 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
934 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
936 struct ll_inode_info *lli = ll_i2info(inode);
937 struct cl_object *obj = lli->lli_clob;
938 struct cl_attr *attr = vvp_env_thread_attr(env);
946 ll_inode_size_lock(inode);
948 /* merge timestamps the most recently obtained from mds with
949 timestamps obtained from osts */
950 LTIME_S(inode->i_atime) = lli->lli_atime;
951 LTIME_S(inode->i_mtime) = lli->lli_mtime;
952 LTIME_S(inode->i_ctime) = lli->lli_ctime;
954 atime = LTIME_S(inode->i_atime);
955 mtime = LTIME_S(inode->i_mtime);
956 ctime = LTIME_S(inode->i_ctime);
958 cl_object_attr_lock(obj);
959 rc = cl_object_attr_get(env, obj, attr);
960 cl_object_attr_unlock(obj);
963 GOTO(out_size_unlock, rc);
965 if (atime < attr->cat_atime)
966 atime = attr->cat_atime;
968 if (ctime < attr->cat_ctime)
969 ctime = attr->cat_ctime;
971 if (mtime < attr->cat_mtime)
972 mtime = attr->cat_mtime;
974 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
975 PFID(&lli->lli_fid), attr->cat_size);
977 i_size_write(inode, attr->cat_size);
978 inode->i_blocks = attr->cat_blocks;
980 LTIME_S(inode->i_atime) = atime;
981 LTIME_S(inode->i_mtime) = mtime;
982 LTIME_S(inode->i_ctime) = ctime;
985 ll_inode_size_unlock(inode);
990 static bool file_is_noatime(const struct file *file)
992 const struct vfsmount *mnt = file->f_path.mnt;
993 const struct inode *inode = file->f_path.dentry->d_inode;
995 /* Adapted from file_accessed() and touch_atime().*/
996 if (file->f_flags & O_NOATIME)
999 if (inode->i_flags & S_NOATIME)
1002 if (IS_NOATIME(inode))
1005 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1008 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1011 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1017 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1019 struct inode *inode = file->f_path.dentry->d_inode;
1021 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1023 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1024 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1025 file->f_flags & O_DIRECT ||
1028 io->ci_obj = ll_i2info(inode)->lli_clob;
1029 io->ci_lockreq = CILR_MAYBE;
1030 if (ll_file_nolock(file)) {
1031 io->ci_lockreq = CILR_NEVER;
1032 io->ci_no_srvlock = 1;
1033 } else if (file->f_flags & O_APPEND) {
1034 io->ci_lockreq = CILR_MANDATORY;
1037 io->ci_noatime = file_is_noatime(file);
1041 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1042 struct file *file, enum cl_io_type iot,
1043 loff_t *ppos, size_t count)
1045 struct vvp_io *vio = vvp_env_io(env);
1046 struct inode *inode = file->f_path.dentry->d_inode;
1047 struct ll_inode_info *lli = ll_i2info(inode);
1048 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1052 struct range_lock range;
1056 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1057 file->f_path.dentry->d_name.name, iot, *ppos, count);
1060 io = vvp_env_thread_io(env);
1061 ll_io_init(io, file, iot == CIT_WRITE);
1063 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1064 bool range_locked = false;
1066 if (file->f_flags & O_APPEND)
1067 range_lock_init(&range, 0, LUSTRE_EOF);
1069 range_lock_init(&range, *ppos, *ppos + count - 1);
1071 vio->vui_fd = LUSTRE_FPRIVATE(file);
1072 vio->vui_io_subtype = args->via_io_subtype;
1074 switch (vio->vui_io_subtype) {
1076 vio->vui_iter = args->u.normal.via_iter;
1077 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1078 vio->vui_tot_nrsegs = vio->vui_iter->nr_segs;
1079 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1080 vio->vui_iocb = args->u.normal.via_iocb;
1081 /* Direct IO reads must also take range lock,
1082 * or multiple reads will try to work on the same pages
1083 * See LU-6227 for details. */
1084 if (((iot == CIT_WRITE) ||
1085 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1086 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1087 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1089 rc = range_lock(&lli->lli_write_tree, &range);
1093 range_locked = true;
1097 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1098 vio->u.splice.vui_flags = args->u.splice.via_flags;
1101 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1105 ll_cl_add(file, env, io);
1106 rc = cl_io_loop(env, io);
1107 ll_cl_remove(file, env);
1110 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1112 range_unlock(&lli->lli_write_tree, &range);
1115 /* cl_io_rw_init() handled IO */
1119 if (io->ci_nob > 0) {
1120 result += io->ci_nob;
1121 count -= io->ci_nob;
1122 *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
1124 /* prepare IO restart */
1125 if (count > 0 && args->via_io_subtype == IO_NORMAL) {
1126 args->u.normal.via_iter = vio->vui_iter;
1127 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1128 args->u.normal.via_iter->nr_segs = vio->vui_tot_nrsegs;
1129 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1134 cl_io_fini(env, io);
1136 if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
1138 "%s: restart %s from %lld, count:%zu, result: %zd\n",
1139 file->f_path.dentry->d_name.name,
1140 iot == CIT_READ ? "read" : "write",
1141 *ppos, count, result);
1145 if (iot == CIT_READ) {
1147 ll_stats_ops_tally(ll_i2sbi(inode),
1148 LPROC_LL_READ_BYTES, result);
1149 } else if (iot == CIT_WRITE) {
1151 ll_stats_ops_tally(ll_i2sbi(inode),
1152 LPROC_LL_WRITE_BYTES, result);
1153 fd->fd_write_failed = false;
1154 } else if (result == 0 && rc == 0) {
1157 fd->fd_write_failed = true;
1159 fd->fd_write_failed = false;
1160 } else if (rc != -ERESTARTSYS) {
1161 fd->fd_write_failed = true;
1165 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1167 return result > 0 ? result : rc;
1171 * Read from a file (through the page cache).
1173 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1175 struct vvp_io_args *args;
1180 env = cl_env_get(&refcheck);
1182 return PTR_ERR(env);
1184 args = ll_env_args(env, IO_NORMAL);
1185 args->u.normal.via_iter = to;
1186 args->u.normal.via_iocb = iocb;
1188 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1189 &iocb->ki_pos, iov_iter_count(to));
1190 cl_env_put(env, &refcheck);
1195 * Write to a file (through the page cache).
1197 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1199 struct vvp_io_args *args;
1204 env = cl_env_get(&refcheck);
1206 return PTR_ERR(env);
1208 args = ll_env_args(env, IO_NORMAL);
1209 args->u.normal.via_iter = from;
1210 args->u.normal.via_iocb = iocb;
1212 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1213 &iocb->ki_pos, iov_iter_count(from));
1214 cl_env_put(env, &refcheck);
1218 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1220 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1222 static int ll_file_get_iov_count(const struct iovec *iov,
1223 unsigned long *nr_segs, size_t *count)
1228 for (seg = 0; seg < *nr_segs; seg++) {
1229 const struct iovec *iv = &iov[seg];
1232 * If any segment has a negative length, or the cumulative
1233 * length ever wraps negative then return -EINVAL.
1236 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1238 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1243 cnt -= iv->iov_len; /* This segment is no good */
1250 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1251 unsigned long nr_segs, loff_t pos)
1253 struct iovec *local_iov;
1254 struct iov_iter *to;
1259 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1267 env = cl_env_get(&refcheck);
1269 RETURN(PTR_ERR(env));
1271 local_iov = &ll_env_info(env)->lti_local_iov;
1274 cl_env_put(env, &refcheck);
1276 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1277 if (local_iov == NULL)
1280 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1288 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1289 iov_iter_init(to, READ, local_iov, nr_segs, iov_count);
1290 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1291 iov_iter_init(to, local_iov, nr_segs, iov_count, 0);
1292 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1294 result = ll_file_read_iter(iocb, to);
1299 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1304 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1308 struct iovec iov = { .iov_base = buf, .iov_len = count };
1309 struct kiocb *kiocb;
1314 env = cl_env_get(&refcheck);
1316 RETURN(PTR_ERR(env));
1318 kiocb = &ll_env_info(env)->lti_kiocb;
1319 init_sync_kiocb(kiocb, file);
1320 kiocb->ki_pos = *ppos;
1321 #ifdef HAVE_KIOCB_KI_LEFT
1322 kiocb->ki_left = count;
1323 #elif defined(HAVE_KI_NBYTES)
1324 kiocb->ki_nbytes = count;
1327 result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos);
1328 *ppos = kiocb->ki_pos;
1330 cl_env_put(env, &refcheck);
1335 * Write to a file (through the page cache).
1338 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1339 unsigned long nr_segs, loff_t pos)
1341 struct iovec *local_iov;
1342 struct iov_iter *from;
1347 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1355 env = cl_env_get(&refcheck);
1357 RETURN(PTR_ERR(env));
1359 local_iov = &ll_env_info(env)->lti_local_iov;
1362 cl_env_put(env, &refcheck);
1364 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1365 if (local_iov == NULL)
1368 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1371 OBD_ALLOC_PTR(from);
1376 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1377 iov_iter_init(from, WRITE, local_iov, nr_segs, iov_count);
1378 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1379 iov_iter_init(from, local_iov, nr_segs, iov_count, 0);
1380 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1382 result = ll_file_write_iter(iocb, from);
1387 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1392 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1393 size_t count, loff_t *ppos)
1396 struct iovec iov = { .iov_base = (void __user *)buf,
1398 struct kiocb *kiocb;
1403 env = cl_env_get(&refcheck);
1405 RETURN(PTR_ERR(env));
1407 kiocb = &ll_env_info(env)->lti_kiocb;
1408 init_sync_kiocb(kiocb, file);
1409 kiocb->ki_pos = *ppos;
1410 #ifdef HAVE_KIOCB_KI_LEFT
1411 kiocb->ki_left = count;
1412 #elif defined(HAVE_KI_NBYTES)
1413 kiocb->ki_nbytes = count;
1416 result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
1417 *ppos = kiocb->ki_pos;
1419 cl_env_put(env, &refcheck);
1422 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1425 * Send file content (through pagecache) somewhere with helper
1427 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1428 struct pipe_inode_info *pipe, size_t count,
1432 struct vvp_io_args *args;
1437 env = cl_env_get(&refcheck);
1439 RETURN(PTR_ERR(env));
1441 args = ll_env_args(env, IO_SPLICE);
1442 args->u.splice.via_pipe = pipe;
1443 args->u.splice.via_flags = flags;
1445 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1446 cl_env_put(env, &refcheck);
1450 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1451 __u64 flags, struct lov_user_md *lum,
1454 struct lookup_intent oit = {
1456 .it_flags = flags | MDS_OPEN_BY_FID,
1461 ll_inode_size_lock(inode);
1462 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1464 GOTO(out_unlock, rc);
1466 ll_release_openhandle(file->f_path.dentry, &oit);
1469 ll_inode_size_unlock(inode);
1470 ll_intent_release(&oit);
1471 cl_lov_delay_create_clear(&file->f_flags);
1476 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1477 struct lov_mds_md **lmmp, int *lmm_size,
1478 struct ptlrpc_request **request)
1480 struct ll_sb_info *sbi = ll_i2sbi(inode);
1481 struct mdt_body *body;
1482 struct lov_mds_md *lmm = NULL;
1483 struct ptlrpc_request *req = NULL;
1484 struct md_op_data *op_data;
1487 rc = ll_get_default_mdsize(sbi, &lmmsize);
1491 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1492 strlen(filename), lmmsize,
1493 LUSTRE_OPC_ANY, NULL);
1494 if (IS_ERR(op_data))
1495 RETURN(PTR_ERR(op_data));
1497 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1498 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1499 ll_finish_md_op_data(op_data);
1501 CDEBUG(D_INFO, "md_getattr_name failed "
1502 "on %s: rc %d\n", filename, rc);
1506 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1507 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1509 lmmsize = body->mbo_eadatasize;
1511 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1513 GOTO(out, rc = -ENODATA);
1516 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1517 LASSERT(lmm != NULL);
1519 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1520 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1521 GOTO(out, rc = -EPROTO);
1525 * This is coming from the MDS, so is probably in
1526 * little endian. We convert it to host endian before
1527 * passing it to userspace.
1529 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1532 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1533 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1536 /* if function called for directory - we should
1537 * avoid swab not existent lsm objects */
1538 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1539 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1540 if (S_ISREG(body->mbo_mode))
1541 lustre_swab_lov_user_md_objects(
1542 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1544 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1545 lustre_swab_lov_user_md_v3(
1546 (struct lov_user_md_v3 *)lmm);
1547 if (S_ISREG(body->mbo_mode))
1548 lustre_swab_lov_user_md_objects(
1549 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1556 *lmm_size = lmmsize;
1561 static int ll_lov_setea(struct inode *inode, struct file *file,
1564 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1565 struct lov_user_md *lump;
1566 int lum_size = sizeof(struct lov_user_md) +
1567 sizeof(struct lov_user_ost_data);
1571 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1574 OBD_ALLOC_LARGE(lump, lum_size);
1578 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1579 OBD_FREE_LARGE(lump, lum_size);
1583 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1585 OBD_FREE_LARGE(lump, lum_size);
1589 static int ll_file_getstripe(struct inode *inode,
1590 struct lov_user_md __user *lum)
1597 env = cl_env_get(&refcheck);
1599 RETURN(PTR_ERR(env));
1601 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1602 cl_env_put(env, &refcheck);
1606 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1609 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1610 struct lov_user_md *klum;
1612 __u64 flags = FMODE_WRITE;
1615 rc = ll_copy_user_md(lum, &klum);
1620 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1624 put_user(0, &lum->lmm_stripe_count);
1626 ll_layout_refresh(inode, &gen);
1627 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1630 OBD_FREE(klum, lum_size);
1635 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1637 struct ll_inode_info *lli = ll_i2info(inode);
1638 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1639 struct ll_grouplock grouplock;
1644 CWARN("group id for group lock must not be 0\n");
1648 if (ll_file_nolock(file))
1649 RETURN(-EOPNOTSUPP);
1651 spin_lock(&lli->lli_lock);
1652 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1653 CWARN("group lock already existed with gid %lu\n",
1654 fd->fd_grouplock.lg_gid);
1655 spin_unlock(&lli->lli_lock);
1658 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1659 spin_unlock(&lli->lli_lock);
1661 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1662 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1666 spin_lock(&lli->lli_lock);
1667 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1668 spin_unlock(&lli->lli_lock);
1669 CERROR("another thread just won the race\n");
1670 cl_put_grouplock(&grouplock);
1674 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1675 fd->fd_grouplock = grouplock;
1676 spin_unlock(&lli->lli_lock);
1678 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1682 static int ll_put_grouplock(struct inode *inode, struct file *file,
1685 struct ll_inode_info *lli = ll_i2info(inode);
1686 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1687 struct ll_grouplock grouplock;
1690 spin_lock(&lli->lli_lock);
1691 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1692 spin_unlock(&lli->lli_lock);
1693 CWARN("no group lock held\n");
1697 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1699 if (fd->fd_grouplock.lg_gid != arg) {
1700 CWARN("group lock %lu doesn't match current id %lu\n",
1701 arg, fd->fd_grouplock.lg_gid);
1702 spin_unlock(&lli->lli_lock);
1706 grouplock = fd->fd_grouplock;
1707 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1708 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1709 spin_unlock(&lli->lli_lock);
1711 cl_put_grouplock(&grouplock);
1712 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1717 * Close inode open handle
1719 * \param dentry [in] dentry which contains the inode
1720 * \param it [in,out] intent which contains open info and result
1723 * \retval <0 failure
1725 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1727 struct inode *inode = dentry->d_inode;
1728 struct obd_client_handle *och;
1734 /* Root ? Do nothing. */
1735 if (dentry->d_inode->i_sb->s_root == dentry)
1738 /* No open handle to close? Move away */
1739 if (!it_disposition(it, DISP_OPEN_OPEN))
1742 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1744 OBD_ALLOC(och, sizeof(*och));
1746 GOTO(out, rc = -ENOMEM);
1748 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1750 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1751 och, inode, 0, NULL);
1753 /* this one is in place of ll_file_open */
1754 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1755 ptlrpc_req_finished(it->d.lustre.it_data);
1756 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1762 * Get size for inode for which FIEMAP mapping is requested.
1763 * Make the FIEMAP get_info call and returns the result.
1764 * \param fiemap kernel buffer to hold extens
1765 * \param num_bytes kernel buffer size
1767 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1773 struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
1776 /* Checks for fiemap flags */
1777 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1778 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1782 /* Check for FIEMAP_FLAG_SYNC */
1783 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1784 rc = filemap_fdatawrite(inode->i_mapping);
1789 env = cl_env_get(&refcheck);
1791 RETURN(PTR_ERR(env));
1793 if (i_size_read(inode) == 0) {
1794 rc = ll_glimpse_size(inode);
1799 fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1800 obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
1801 obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
1803 /* If filesize is 0, then there would be no objects for mapping */
1804 if (fmkey.lfik_oa.o_size == 0) {
1805 fiemap->fm_mapped_extents = 0;
1809 fmkey.lfik_fiemap = *fiemap;
1811 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1812 &fmkey, fiemap, &num_bytes);
1814 cl_env_put(env, &refcheck);
1818 int ll_fid2path(struct inode *inode, void __user *arg)
1820 struct obd_export *exp = ll_i2mdexp(inode);
1821 const struct getinfo_fid2path __user *gfin = arg;
1823 struct getinfo_fid2path *gfout;
1829 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1830 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1833 /* Only need to get the buflen */
1834 if (get_user(pathlen, &gfin->gf_pathlen))
1837 if (pathlen > PATH_MAX)
1840 outsize = sizeof(*gfout) + pathlen;
1841 OBD_ALLOC(gfout, outsize);
1845 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1846 GOTO(gf_free, rc = -EFAULT);
1848 /* Call mdc_iocontrol */
1849 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1853 if (copy_to_user(arg, gfout, outsize))
1857 OBD_FREE(gfout, outsize);
1862 * Read the data_version for inode.
1864 * This value is computed using stripe object version on OST.
1865 * Version is computed using server side locking.
1867 * @param flags if do sync on the OST side;
1869 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1870 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1872 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1874 struct cl_object *obj = ll_i2info(inode)->lli_clob;
1882 /* If no file object initialized, we consider its version is 0. */
1888 env = cl_env_get(&refcheck);
1890 RETURN(PTR_ERR(env));
1892 io = vvp_env_thread_io(env);
1894 io->u.ci_data_version.dv_data_version = 0;
1895 io->u.ci_data_version.dv_flags = flags;
1898 if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
1899 result = cl_io_loop(env, io);
1901 result = io->ci_result;
1903 *data_version = io->u.ci_data_version.dv_data_version;
1905 cl_io_fini(env, io);
1907 if (unlikely(io->ci_need_restart))
1910 cl_env_put(env, &refcheck);
1916 * Trigger a HSM release request for the provided inode.
1918 int ll_hsm_release(struct inode *inode)
1920 struct cl_env_nest nest;
1922 struct obd_client_handle *och = NULL;
1923 __u64 data_version = 0;
1927 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1928 ll_get_fsname(inode->i_sb, NULL, 0),
1929 PFID(&ll_i2info(inode)->lli_fid));
1931 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1933 GOTO(out, rc = PTR_ERR(och));
1935 /* Grab latest data_version and [am]time values */
1936 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1940 env = cl_env_nested_get(&nest);
1942 GOTO(out, rc = PTR_ERR(env));
1944 ll_merge_attr(env, inode);
1945 cl_env_nested_put(&nest, env);
1947 /* Release the file.
1948 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1949 * we still need it to pack l_remote_handle to MDT. */
1950 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
1951 MDS_HSM_RELEASE, &data_version);
1956 if (och != NULL && !IS_ERR(och)) /* close the file */
1957 ll_lease_close(och, inode, NULL);
1962 struct ll_swap_stack {
1965 struct inode *inode1;
1966 struct inode *inode2;
1971 static int ll_swap_layouts(struct file *file1, struct file *file2,
1972 struct lustre_swap_layouts *lsl)
1974 struct mdc_swap_layouts msl;
1975 struct md_op_data *op_data;
1978 struct ll_swap_stack *llss = NULL;
1981 OBD_ALLOC_PTR(llss);
1985 llss->inode1 = file1->f_path.dentry->d_inode;
1986 llss->inode2 = file2->f_path.dentry->d_inode;
1988 rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
1992 /* we use 2 bool because it is easier to swap than 2 bits */
1993 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1994 llss->check_dv1 = true;
1996 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1997 llss->check_dv2 = true;
1999 /* we cannot use lsl->sl_dvX directly because we may swap them */
2000 llss->dv1 = lsl->sl_dv1;
2001 llss->dv2 = lsl->sl_dv2;
2003 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2004 if (rc == 0) /* same file, done! */
2007 if (rc < 0) { /* sequentialize it */
2008 swap(llss->inode1, llss->inode2);
2010 swap(llss->dv1, llss->dv2);
2011 swap(llss->check_dv1, llss->check_dv2);
2015 if (gid != 0) { /* application asks to flush dirty cache */
2016 rc = ll_get_grouplock(llss->inode1, file1, gid);
2020 rc = ll_get_grouplock(llss->inode2, file2, gid);
2022 ll_put_grouplock(llss->inode1, file1, gid);
2027 /* ultimate check, before swaping the layouts we check if
2028 * dataversion has changed (if requested) */
2029 if (llss->check_dv1) {
2030 rc = ll_data_version(llss->inode1, &dv, 0);
2033 if (dv != llss->dv1)
2034 GOTO(putgl, rc = -EAGAIN);
2037 if (llss->check_dv2) {
2038 rc = ll_data_version(llss->inode2, &dv, 0);
2041 if (dv != llss->dv2)
2042 GOTO(putgl, rc = -EAGAIN);
2045 /* struct md_op_data is used to send the swap args to the mdt
2046 * only flags is missing, so we use struct mdc_swap_layouts
2047 * through the md_op_data->op_data */
2048 /* flags from user space have to be converted before they are send to
2049 * server, no flag is sent today, they are only used on the client */
2052 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2053 0, LUSTRE_OPC_ANY, &msl);
2054 if (IS_ERR(op_data))
2055 GOTO(free, rc = PTR_ERR(op_data));
2057 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2058 sizeof(*op_data), op_data, NULL);
2059 ll_finish_md_op_data(op_data);
2066 ll_put_grouplock(llss->inode2, file2, gid);
2067 ll_put_grouplock(llss->inode1, file1, gid);
2077 int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2079 struct md_op_data *op_data;
2083 /* Detect out-of range masks */
2084 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2087 /* Non-root users are forbidden to set or clear flags which are
2088 * NOT defined in HSM_USER_MASK. */
2089 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2090 !cfs_capable(CFS_CAP_SYS_ADMIN))
2093 /* Detect out-of range archive id */
2094 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2095 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2098 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2099 LUSTRE_OPC_ANY, hss);
2100 if (IS_ERR(op_data))
2101 RETURN(PTR_ERR(op_data));
2103 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2104 sizeof(*op_data), op_data, NULL);
2106 ll_finish_md_op_data(op_data);
2111 static int ll_hsm_import(struct inode *inode, struct file *file,
2112 struct hsm_user_import *hui)
2114 struct hsm_state_set *hss = NULL;
2115 struct iattr *attr = NULL;
2119 if (!S_ISREG(inode->i_mode))
2125 GOTO(out, rc = -ENOMEM);
2127 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2128 hss->hss_archive_id = hui->hui_archive_id;
2129 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2130 rc = ll_hsm_state_set(inode, hss);
2134 OBD_ALLOC_PTR(attr);
2136 GOTO(out, rc = -ENOMEM);
2138 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2139 attr->ia_mode |= S_IFREG;
2140 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2141 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2142 attr->ia_size = hui->hui_size;
2143 attr->ia_mtime.tv_sec = hui->hui_mtime;
2144 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2145 attr->ia_atime.tv_sec = hui->hui_atime;
2146 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2148 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2149 ATTR_UID | ATTR_GID |
2150 ATTR_MTIME | ATTR_MTIME_SET |
2151 ATTR_ATIME | ATTR_ATIME_SET;
2153 mutex_lock(&inode->i_mutex);
2155 rc = ll_setattr_raw(file->f_path.dentry, attr, true);
2159 mutex_unlock(&inode->i_mutex);
2171 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2173 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2174 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2177 static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
2179 struct inode *inode = file->f_path.dentry->d_inode;
2181 .ia_valid = ATTR_ATIME | ATTR_ATIME_SET |
2182 ATTR_MTIME | ATTR_MTIME_SET |
2183 ATTR_CTIME | ATTR_CTIME_SET,
2185 .tv_sec = lfu->lfu_atime_sec,
2186 .tv_nsec = lfu->lfu_atime_nsec,
2189 .tv_sec = lfu->lfu_mtime_sec,
2190 .tv_nsec = lfu->lfu_mtime_nsec,
2193 .tv_sec = lfu->lfu_ctime_sec,
2194 .tv_nsec = lfu->lfu_ctime_nsec,
2200 if (!capable(CAP_SYS_ADMIN))
2203 if (!S_ISREG(inode->i_mode))
2206 mutex_lock(&inode->i_mutex);
2207 rc = ll_setattr_raw(file->f_path.dentry, &ia, false);
2208 mutex_unlock(&inode->i_mutex);
2214 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2216 struct inode *inode = file->f_path.dentry->d_inode;
2217 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2221 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2222 PFID(ll_inode2fid(inode)), inode, cmd);
2223 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2225 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2226 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2230 case LL_IOC_GETFLAGS:
2231 /* Get the current value of the file flags */
2232 return put_user(fd->fd_flags, (int __user *)arg);
2233 case LL_IOC_SETFLAGS:
2234 case LL_IOC_CLRFLAGS:
2235 /* Set or clear specific file flags */
2236 /* XXX This probably needs checks to ensure the flags are
2237 * not abused, and to handle any flag side effects.
2239 if (get_user(flags, (int __user *) arg))
2242 if (cmd == LL_IOC_SETFLAGS) {
2243 if ((flags & LL_FILE_IGNORE_LOCK) &&
2244 !(file->f_flags & O_DIRECT)) {
2245 CERROR("%s: unable to disable locking on "
2246 "non-O_DIRECT file\n", current->comm);
2250 fd->fd_flags |= flags;
2252 fd->fd_flags &= ~flags;
2255 case LL_IOC_LOV_SETSTRIPE:
2256 RETURN(ll_lov_setstripe(inode, file, arg));
2257 case LL_IOC_LOV_SETEA:
2258 RETURN(ll_lov_setea(inode, file, arg));
2259 case LL_IOC_LOV_SWAP_LAYOUTS: {
2261 struct lustre_swap_layouts lsl;
2263 if (copy_from_user(&lsl, (char __user *)arg,
2264 sizeof(struct lustre_swap_layouts)))
2267 if ((file->f_flags & O_ACCMODE) == O_RDONLY)
2270 file2 = fget(lsl.sl_fd);
2274 /* O_WRONLY or O_RDWR */
2275 if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
2276 GOTO(out, rc = -EPERM);
2278 if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
2279 struct inode *inode2;
2280 struct ll_inode_info *lli;
2281 struct obd_client_handle *och = NULL;
2283 if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
2284 GOTO(out, rc = -EINVAL);
2286 lli = ll_i2info(inode);
2287 mutex_lock(&lli->lli_och_mutex);
2288 if (fd->fd_lease_och != NULL) {
2289 och = fd->fd_lease_och;
2290 fd->fd_lease_och = NULL;
2292 mutex_unlock(&lli->lli_och_mutex);
2294 GOTO(out, rc = -ENOLCK);
2295 inode2 = file2->f_path.dentry->d_inode;
2296 rc = ll_swap_layouts_close(och, inode, inode2);
2298 rc = ll_swap_layouts(file, file2, &lsl);
2304 case LL_IOC_LOV_GETSTRIPE:
2305 RETURN(ll_file_getstripe(inode,
2306 (struct lov_user_md __user *)arg));
2307 case FSFILT_IOC_GETFLAGS:
2308 case FSFILT_IOC_SETFLAGS:
2309 RETURN(ll_iocontrol(inode, file, cmd, arg));
2310 case FSFILT_IOC_GETVERSION_OLD:
2311 case FSFILT_IOC_GETVERSION:
2312 RETURN(put_user(inode->i_generation, (int __user *)arg));
2313 case LL_IOC_GROUP_LOCK:
2314 RETURN(ll_get_grouplock(inode, file, arg));
2315 case LL_IOC_GROUP_UNLOCK:
2316 RETURN(ll_put_grouplock(inode, file, arg));
2317 case IOC_OBD_STATFS:
2318 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2320 /* We need to special case any other ioctls we want to handle,
2321 * to send them to the MDS/OST as appropriate and to properly
2322 * network encode the arg field.
2323 case FSFILT_IOC_SETVERSION_OLD:
2324 case FSFILT_IOC_SETVERSION:
2326 case LL_IOC_FLUSHCTX:
2327 RETURN(ll_flush_ctx(inode));
2328 case LL_IOC_PATH2FID: {
2329 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2330 sizeof(struct lu_fid)))
2335 case LL_IOC_GETPARENT:
2336 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2338 case OBD_IOC_FID2PATH:
2339 RETURN(ll_fid2path(inode, (void __user *)arg));
2340 case LL_IOC_DATA_VERSION: {
2341 struct ioc_data_version idv;
2344 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2347 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2348 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2351 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2357 case LL_IOC_GET_MDTIDX: {
2360 mdtidx = ll_get_mdt_idx(inode);
2364 if (put_user((int)mdtidx, (int __user *)arg))
2369 case OBD_IOC_GETDTNAME:
2370 case OBD_IOC_GETMDNAME:
2371 RETURN(ll_get_obd_name(inode, cmd, arg));
2372 case LL_IOC_HSM_STATE_GET: {
2373 struct md_op_data *op_data;
2374 struct hsm_user_state *hus;
2381 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2382 LUSTRE_OPC_ANY, hus);
2383 if (IS_ERR(op_data)) {
2385 RETURN(PTR_ERR(op_data));
2388 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2391 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2394 ll_finish_md_op_data(op_data);
2398 case LL_IOC_HSM_STATE_SET: {
2399 struct hsm_state_set *hss;
2406 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2411 rc = ll_hsm_state_set(inode, hss);
2416 case LL_IOC_HSM_ACTION: {
2417 struct md_op_data *op_data;
2418 struct hsm_current_action *hca;
2425 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2426 LUSTRE_OPC_ANY, hca);
2427 if (IS_ERR(op_data)) {
2429 RETURN(PTR_ERR(op_data));
2432 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2435 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2438 ll_finish_md_op_data(op_data);
2442 case LL_IOC_SET_LEASE: {
2443 struct ll_inode_info *lli = ll_i2info(inode);
2444 struct obd_client_handle *och = NULL;
2449 case LL_LEASE_WRLCK:
2450 if (!(file->f_mode & FMODE_WRITE))
2452 fmode = FMODE_WRITE;
2454 case LL_LEASE_RDLCK:
2455 if (!(file->f_mode & FMODE_READ))
2459 case LL_LEASE_UNLCK:
2460 mutex_lock(&lli->lli_och_mutex);
2461 if (fd->fd_lease_och != NULL) {
2462 och = fd->fd_lease_och;
2463 fd->fd_lease_och = NULL;
2465 mutex_unlock(&lli->lli_och_mutex);
2470 fmode = och->och_flags;
2471 rc = ll_lease_close(och, inode, &lease_broken);
2478 RETURN(ll_lease_type_from_fmode(fmode));
2483 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2485 /* apply for lease */
2486 och = ll_lease_open(inode, file, fmode, 0);
2488 RETURN(PTR_ERR(och));
2491 mutex_lock(&lli->lli_och_mutex);
2492 if (fd->fd_lease_och == NULL) {
2493 fd->fd_lease_och = och;
2496 mutex_unlock(&lli->lli_och_mutex);
2498 /* impossible now that only excl is supported for now */
2499 ll_lease_close(och, inode, &lease_broken);
2504 case LL_IOC_GET_LEASE: {
2505 struct ll_inode_info *lli = ll_i2info(inode);
2506 struct ldlm_lock *lock = NULL;
2509 mutex_lock(&lli->lli_och_mutex);
2510 if (fd->fd_lease_och != NULL) {
2511 struct obd_client_handle *och = fd->fd_lease_och;
2513 lock = ldlm_handle2lock(&och->och_lease_handle);
2515 lock_res_and_lock(lock);
2516 if (!ldlm_is_cancel(lock))
2517 fmode = och->och_flags;
2519 unlock_res_and_lock(lock);
2520 LDLM_LOCK_PUT(lock);
2523 mutex_unlock(&lli->lli_och_mutex);
2525 RETURN(ll_lease_type_from_fmode(fmode));
2527 case LL_IOC_HSM_IMPORT: {
2528 struct hsm_user_import *hui;
2534 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2539 rc = ll_hsm_import(inode, file, hui);
2544 case LL_IOC_FUTIMES_3: {
2545 struct ll_futimes_3 lfu;
2547 if (copy_from_user(&lfu,
2548 (const struct ll_futimes_3 __user *)arg,
2552 RETURN(ll_file_futimes_3(file, &lfu));
2558 ll_iocontrol_call(inode, file, cmd, arg, &err))
2561 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2562 (void __user *)arg));
2567 #ifndef HAVE_FILE_LLSEEK_SIZE
2568 static inline loff_t
2569 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2571 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2573 if (offset > maxsize)
2576 if (offset != file->f_pos) {
2577 file->f_pos = offset;
2578 file->f_version = 0;
2584 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2585 loff_t maxsize, loff_t eof)
2587 struct inode *inode = file->f_path.dentry->d_inode;
2595 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2596 * position-querying operation. Avoid rewriting the "same"
2597 * f_pos value back to the file because a concurrent read(),
2598 * write() or lseek() might have altered it
2603 * f_lock protects against read/modify/write race with other
2604 * SEEK_CURs. Note that parallel writes and reads behave
2607 mutex_lock(&inode->i_mutex);
2608 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2609 mutex_unlock(&inode->i_mutex);
2613 * In the generic case the entire file is data, so as long as
2614 * offset isn't at the end of the file then the offset is data.
2621 * There is a virtual hole at the end of the file, so as long as
2622 * offset isn't i_size or larger, return i_size.
2630 return llseek_execute(file, offset, maxsize);
2634 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2636 struct inode *inode = file->f_path.dentry->d_inode;
2637 loff_t retval, eof = 0;
2640 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2641 (origin == SEEK_CUR) ? file->f_pos : 0);
2642 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2643 PFID(ll_inode2fid(inode)), inode, retval, retval,
2645 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2647 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2648 retval = ll_glimpse_size(inode);
2651 eof = i_size_read(inode);
2654 retval = ll_generic_file_llseek_size(file, offset, origin,
2655 ll_file_maxbytes(inode), eof);
2659 static int ll_flush(struct file *file, fl_owner_t id)
2661 struct inode *inode = file->f_path.dentry->d_inode;
2662 struct ll_inode_info *lli = ll_i2info(inode);
2663 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2666 LASSERT(!S_ISDIR(inode->i_mode));
2668 /* catch async errors that were recorded back when async writeback
2669 * failed for pages in this mapping. */
2670 rc = lli->lli_async_rc;
2671 lli->lli_async_rc = 0;
2672 if (lli->lli_clob != NULL) {
2673 err = lov_read_and_clear_async_rc(lli->lli_clob);
2678 /* The application has been told write failure already.
2679 * Do not report failure again. */
2680 if (fd->fd_write_failed)
2682 return rc ? -EIO : 0;
2686 * Called to make sure a portion of file has been written out.
2687 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2689 * Return how many pages have been written.
2691 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2692 enum cl_fsync_mode mode, int ignore_layout)
2694 struct cl_env_nest nest;
2697 struct cl_fsync_io *fio;
2701 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2702 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2705 env = cl_env_nested_get(&nest);
2707 RETURN(PTR_ERR(env));
2709 io = vvp_env_thread_io(env);
2710 io->ci_obj = ll_i2info(inode)->lli_clob;
2711 io->ci_ignore_layout = ignore_layout;
2713 /* initialize parameters for sync */
2714 fio = &io->u.ci_fsync;
2715 fio->fi_start = start;
2717 fio->fi_fid = ll_inode2fid(inode);
2718 fio->fi_mode = mode;
2719 fio->fi_nr_written = 0;
2721 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2722 result = cl_io_loop(env, io);
2724 result = io->ci_result;
2726 result = fio->fi_nr_written;
2727 cl_io_fini(env, io);
2728 cl_env_nested_put(&nest, env);
2734 * When dentry is provided (the 'else' case), *file->f_path.dentry may be
2735 * null and dentry must be used directly rather than pulled from
2736 * *file->f_path.dentry as is done otherwise.
2739 #ifdef HAVE_FILE_FSYNC_4ARGS
2740 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2742 struct dentry *dentry = file->f_path.dentry;
2743 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2744 int ll_fsync(struct file *file, int datasync)
2746 struct dentry *dentry = file->f_path.dentry;
2748 loff_t end = LLONG_MAX;
2750 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2753 loff_t end = LLONG_MAX;
2755 struct inode *inode = dentry->d_inode;
2756 struct ll_inode_info *lli = ll_i2info(inode);
2757 struct ptlrpc_request *req;
2761 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2762 PFID(ll_inode2fid(inode)), inode);
2763 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2765 #ifdef HAVE_FILE_FSYNC_4ARGS
2766 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2767 mutex_lock(&inode->i_mutex);
2769 /* fsync's caller has already called _fdata{sync,write}, we want
2770 * that IO to finish before calling the osc and mdc sync methods */
2771 rc = filemap_fdatawait(inode->i_mapping);
2774 /* catch async errors that were recorded back when async writeback
2775 * failed for pages in this mapping. */
2776 if (!S_ISDIR(inode->i_mode)) {
2777 err = lli->lli_async_rc;
2778 lli->lli_async_rc = 0;
2781 err = lov_read_and_clear_async_rc(lli->lli_clob);
2786 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
2790 ptlrpc_req_finished(req);
2792 if (S_ISREG(inode->i_mode)) {
2793 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2795 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2796 if (rc == 0 && err < 0)
2799 fd->fd_write_failed = true;
2801 fd->fd_write_failed = false;
2804 #ifdef HAVE_FILE_FSYNC_4ARGS
2805 mutex_unlock(&inode->i_mutex);
2811 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2813 struct inode *inode = file->f_path.dentry->d_inode;
2814 struct ll_sb_info *sbi = ll_i2sbi(inode);
2815 struct ldlm_enqueue_info einfo = {
2816 .ei_type = LDLM_FLOCK,
2817 .ei_cb_cp = ldlm_flock_completion_ast,
2818 .ei_cbdata = file_lock,
2820 struct md_op_data *op_data;
2821 struct lustre_handle lockh = { 0 };
2822 union ldlm_policy_data flock = { { 0 } };
2823 int fl_type = file_lock->fl_type;
2829 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2830 PFID(ll_inode2fid(inode)), file_lock);
2832 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2834 if (file_lock->fl_flags & FL_FLOCK) {
2835 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2836 /* flocks are whole-file locks */
2837 flock.l_flock.end = OFFSET_MAX;
2838 /* For flocks owner is determined by the local file desctiptor*/
2839 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2840 } else if (file_lock->fl_flags & FL_POSIX) {
2841 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2842 flock.l_flock.start = file_lock->fl_start;
2843 flock.l_flock.end = file_lock->fl_end;
2847 flock.l_flock.pid = file_lock->fl_pid;
2849 /* Somewhat ugly workaround for svc lockd.
2850 * lockd installs custom fl_lmops->lm_compare_owner that checks
2851 * for the fl_owner to be the same (which it always is on local node
2852 * I guess between lockd processes) and then compares pid.
2853 * As such we assign pid to the owner field to make it all work,
2854 * conflict with normal locks is unlikely since pid space and
2855 * pointer space for current->files are not intersecting */
2856 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2857 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2861 einfo.ei_mode = LCK_PR;
2864 /* An unlock request may or may not have any relation to
2865 * existing locks so we may not be able to pass a lock handle
2866 * via a normal ldlm_lock_cancel() request. The request may even
2867 * unlock a byte range in the middle of an existing lock. In
2868 * order to process an unlock request we need all of the same
2869 * information that is given with a normal read or write record
2870 * lock request. To avoid creating another ldlm unlock (cancel)
2871 * message we'll treat a LCK_NL flock request as an unlock. */
2872 einfo.ei_mode = LCK_NL;
2875 einfo.ei_mode = LCK_PW;
2878 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2893 flags = LDLM_FL_BLOCK_NOWAIT;
2899 flags = LDLM_FL_TEST_LOCK;
2902 CERROR("unknown fcntl lock command: %d\n", cmd);
2906 /* Save the old mode so that if the mode in the lock changes we
2907 * can decrement the appropriate reader or writer refcount. */
2908 file_lock->fl_type = einfo.ei_mode;
2910 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2911 LUSTRE_OPC_ANY, NULL);
2912 if (IS_ERR(op_data))
2913 RETURN(PTR_ERR(op_data));
2915 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2916 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2917 flock.l_flock.pid, flags, einfo.ei_mode,
2918 flock.l_flock.start, flock.l_flock.end);
2920 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2923 /* Restore the file lock type if not TEST lock. */
2924 if (!(flags & LDLM_FL_TEST_LOCK))
2925 file_lock->fl_type = fl_type;
2927 if ((file_lock->fl_flags & FL_FLOCK) &&
2928 (rc == 0 || file_lock->fl_type == F_UNLCK))
2929 rc2 = flock_lock_file_wait(file, file_lock);
2930 if ((file_lock->fl_flags & FL_POSIX) &&
2931 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2932 !(flags & LDLM_FL_TEST_LOCK))
2933 rc2 = posix_lock_file_wait(file, file_lock);
2935 if (rc2 && file_lock->fl_type != F_UNLCK) {
2936 einfo.ei_mode = LCK_NL;
2937 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2942 ll_finish_md_op_data(op_data);
2947 int ll_get_fid_by_name(struct inode *parent, const char *name,
2948 int namelen, struct lu_fid *fid,
2949 struct inode **inode)
2951 struct md_op_data *op_data = NULL;
2952 struct mdt_body *body;
2953 struct ptlrpc_request *req;
2957 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2958 LUSTRE_OPC_ANY, NULL);
2959 if (IS_ERR(op_data))
2960 RETURN(PTR_ERR(op_data));
2962 op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
2963 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2964 ll_finish_md_op_data(op_data);
2968 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2970 GOTO(out_req, rc = -EFAULT);
2972 *fid = body->mbo_fid1;
2975 rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
2977 ptlrpc_req_finished(req);
2981 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2982 const char *name, int namelen)
2984 struct dentry *dchild = NULL;
2985 struct inode *child_inode = NULL;
2986 struct md_op_data *op_data;
2987 struct ptlrpc_request *request = NULL;
2988 struct obd_client_handle *och = NULL;
2990 struct mdt_body *body;
2992 __u64 data_version = 0;
2995 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2996 name, PFID(ll_inode2fid(parent)), mdtidx);
2998 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2999 0, LUSTRE_OPC_ANY, NULL);
3000 if (IS_ERR(op_data))
3001 RETURN(PTR_ERR(op_data));
3003 /* Get child FID first */
3004 qstr.hash = full_name_hash(name, namelen);
3007 dchild = d_lookup(file->f_path.dentry, &qstr);
3008 if (dchild != NULL) {
3009 if (dchild->d_inode != NULL)
3010 child_inode = igrab(dchild->d_inode);
3014 if (child_inode == NULL) {
3015 rc = ll_get_fid_by_name(parent, name, namelen,
3016 &op_data->op_fid3, &child_inode);
3021 if (child_inode == NULL)
3022 GOTO(out_free, rc = -EINVAL);
3024 mutex_lock(&child_inode->i_mutex);
3025 op_data->op_fid3 = *ll_inode2fid(child_inode);
3026 if (!fid_is_sane(&op_data->op_fid3)) {
3027 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
3028 ll_get_fsname(parent->i_sb, NULL, 0), name,
3029 PFID(&op_data->op_fid3));
3030 GOTO(out_free, rc = -EINVAL);
3033 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
3038 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
3039 PFID(&op_data->op_fid3), mdtidx);
3040 GOTO(out_free, rc = 0);
3043 if (S_ISREG(child_inode->i_mode)) {
3044 och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
3051 rc = ll_data_version(child_inode, &data_version,
3056 op_data->op_handle = och->och_fh;
3057 op_data->op_data = och->och_mod;
3058 op_data->op_data_version = data_version;
3059 op_data->op_lease_handle = och->och_lease_handle;
3060 op_data->op_bias |= MDS_RENAME_MIGRATE;
3063 op_data->op_mds = mdtidx;
3064 op_data->op_cli_flags = CLI_MIGRATE;
3065 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
3066 namelen, name, namelen, &request);
3068 ll_update_times(request, parent);
3070 if (request != NULL) {
3071 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
3073 ptlrpc_req_finished(request);
3074 GOTO(out_free, rc = -EPROTO);
3077 /* If the server does release layout lock, then we cleanup
3078 * the client och here, otherwise release it in out_free: */
3080 body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
3081 obd_mod_put(och->och_mod);
3082 md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp,
3084 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
3088 ptlrpc_req_finished(request);
3091 /* Try again if the file layout has changed. */
3092 if (rc == -EAGAIN && S_ISREG(child_inode->i_mode)) {
3097 if (child_inode != NULL) {
3098 if (och != NULL) /* close the file */
3099 ll_lease_close(och, child_inode, NULL);
3100 clear_nlink(child_inode);
3101 mutex_unlock(&child_inode->i_mutex);
3105 ll_finish_md_op_data(op_data);
3110 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3118 * test if some locks matching bits and l_req_mode are acquired
3119 * - bits can be in different locks
3120 * - if found clear the common lock bits in *bits
3121 * - the bits not found, are kept in *bits
3123 * \param bits [IN] searched lock bits [IN]
3124 * \param l_req_mode [IN] searched lock mode
3125 * \retval boolean, true iff all bits are found
3127 int ll_have_md_lock(struct inode *inode, __u64 *bits, enum ldlm_mode l_req_mode)
3129 struct lustre_handle lockh;
3130 union ldlm_policy_data policy;
3131 enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
3132 (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
3141 fid = &ll_i2info(inode)->lli_fid;
3142 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3143 ldlm_lockname[mode]);
3145 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3146 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3147 policy.l_inodebits.bits = *bits & (1 << i);
3148 if (policy.l_inodebits.bits == 0)
3151 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3152 &policy, mode, &lockh)) {
3153 struct ldlm_lock *lock;
3155 lock = ldlm_handle2lock(&lockh);
3158 ~(lock->l_policy_data.l_inodebits.bits);
3159 LDLM_LOCK_PUT(lock);
3161 *bits &= ~policy.l_inodebits.bits;
3168 enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
3169 struct lustre_handle *lockh, __u64 flags,
3170 enum ldlm_mode mode)
3172 union ldlm_policy_data policy = { .l_inodebits = { bits } };
3177 fid = &ll_i2info(inode)->lli_fid;
3178 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3180 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3181 fid, LDLM_IBITS, &policy, mode, lockh);
3186 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3188 /* Already unlinked. Just update nlink and return success */
3189 if (rc == -ENOENT) {
3191 /* If it is striped directory, and there is bad stripe
3192 * Let's revalidate the dentry again, instead of returning
3194 if (S_ISDIR(inode->i_mode) &&
3195 ll_i2info(inode)->lli_lsm_md != NULL)
3198 /* This path cannot be hit for regular files unless in
3199 * case of obscure races, so no need to to validate
3201 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3203 } else if (rc != 0) {
3204 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3205 "%s: revalidate FID "DFID" error: rc = %d\n",
3206 ll_get_fsname(inode->i_sb, NULL, 0),
3207 PFID(ll_inode2fid(inode)), rc);
3213 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3215 struct inode *inode = dentry->d_inode;
3216 struct ptlrpc_request *req = NULL;
3217 struct obd_export *exp;
3221 LASSERT(inode != NULL);
3223 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3224 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3226 exp = ll_i2mdexp(inode);
3228 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3229 * But under CMD case, it caused some lock issues, should be fixed
3230 * with new CMD ibits lock. See bug 12718 */
3231 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3232 struct lookup_intent oit = { .it_op = IT_GETATTR };
3233 struct md_op_data *op_data;
3235 if (ibits == MDS_INODELOCK_LOOKUP)
3236 oit.it_op = IT_LOOKUP;
3238 /* Call getattr by fid, so do not provide name at all. */
3239 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3240 dentry->d_inode, NULL, 0, 0,
3241 LUSTRE_OPC_ANY, NULL);
3242 if (IS_ERR(op_data))
3243 RETURN(PTR_ERR(op_data));
3245 rc = md_intent_lock(exp, op_data, &oit, &req,
3246 &ll_md_blocking_ast, 0);
3247 ll_finish_md_op_data(op_data);
3249 rc = ll_inode_revalidate_fini(inode, rc);
3253 rc = ll_revalidate_it_finish(req, &oit, dentry);
3255 ll_intent_release(&oit);
3259 /* Unlinked? Unhash dentry, so it is not picked up later by
3260 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3261 here to preserve get_cwd functionality on 2.6.
3263 if (!dentry->d_inode->i_nlink)
3264 d_lustre_invalidate(dentry, 0);
3266 ll_lookup_finish_locks(&oit, dentry);
3267 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3268 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3269 u64 valid = OBD_MD_FLGETATTR;
3270 struct md_op_data *op_data;
3273 if (S_ISREG(inode->i_mode)) {
3274 rc = ll_get_default_mdsize(sbi, &ealen);
3277 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3280 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3281 0, ealen, LUSTRE_OPC_ANY,
3283 if (IS_ERR(op_data))
3284 RETURN(PTR_ERR(op_data));
3286 op_data->op_valid = valid;
3287 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3288 ll_finish_md_op_data(op_data);
3290 rc = ll_inode_revalidate_fini(inode, rc);
3294 rc = ll_prep_inode(&inode, req, NULL, NULL);
3297 ptlrpc_req_finished(req);
3301 static int ll_merge_md_attr(struct inode *inode)
3303 struct cl_attr attr = { 0 };
3306 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3307 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3308 &attr, ll_md_blocking_ast);
3312 set_nlink(inode, attr.cat_nlink);
3313 inode->i_blocks = attr.cat_blocks;
3314 i_size_write(inode, attr.cat_size);
3316 ll_i2info(inode)->lli_atime = attr.cat_atime;
3317 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3318 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3324 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3326 struct inode *inode = dentry->d_inode;
3330 rc = __ll_inode_revalidate(dentry, ibits);
3334 /* if object isn't regular file, don't validate size */
3335 if (!S_ISREG(inode->i_mode)) {
3336 if (S_ISDIR(inode->i_mode) &&
3337 ll_i2info(inode)->lli_lsm_md != NULL) {
3338 rc = ll_merge_md_attr(inode);
3343 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3344 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3345 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3347 /* In case of restore, the MDT has the right size and has
3348 * already send it back without granting the layout lock,
3349 * inode is up-to-date so glimpse is useless.
3350 * Also to glimpse we need the layout, in case of a running
3351 * restore the MDT holds the layout lock so the glimpse will
3352 * block up to the end of restore (getattr will block)
3354 if (!ll_file_test_flag(ll_i2info(inode), LLIF_FILE_RESTORING))
3355 rc = ll_glimpse_size(inode);
3360 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3362 struct inode *inode = de->d_inode;
3363 struct ll_sb_info *sbi = ll_i2sbi(inode);
3364 struct ll_inode_info *lli = ll_i2info(inode);
3367 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3368 MDS_INODELOCK_LOOKUP);
3369 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3374 stat->dev = inode->i_sb->s_dev;
3375 if (ll_need_32bit_api(sbi))
3376 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3378 stat->ino = inode->i_ino;
3379 stat->mode = inode->i_mode;
3380 stat->uid = inode->i_uid;
3381 stat->gid = inode->i_gid;
3382 stat->rdev = inode->i_rdev;
3383 stat->atime = inode->i_atime;
3384 stat->mtime = inode->i_mtime;
3385 stat->ctime = inode->i_ctime;
3386 stat->blksize = 1 << inode->i_blkbits;
3388 stat->nlink = inode->i_nlink;
3389 stat->size = i_size_read(inode);
3390 stat->blocks = inode->i_blocks;
3395 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3396 __u64 start, __u64 len)
3400 struct fiemap *fiemap;
3401 unsigned int extent_count = fieinfo->fi_extents_max;
3403 num_bytes = sizeof(*fiemap) + (extent_count *
3404 sizeof(struct fiemap_extent));
3405 OBD_ALLOC_LARGE(fiemap, num_bytes);
3410 fiemap->fm_flags = fieinfo->fi_flags;
3411 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3412 fiemap->fm_start = start;
3413 fiemap->fm_length = len;
3414 if (extent_count > 0 &&
3415 copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3416 sizeof(struct fiemap_extent)) != 0)
3417 GOTO(out, rc = -EFAULT);
3419 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3421 fieinfo->fi_flags = fiemap->fm_flags;
3422 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3423 if (extent_count > 0 &&
3424 copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3425 fiemap->fm_mapped_extents *
3426 sizeof(struct fiemap_extent)) != 0)
3427 GOTO(out, rc = -EFAULT);
3429 OBD_FREE_LARGE(fiemap, num_bytes);
3433 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3435 struct ll_inode_info *lli = ll_i2info(inode);
3436 struct posix_acl *acl = NULL;
3439 spin_lock(&lli->lli_lock);
3440 /* VFS' acl_permission_check->check_acl will release the refcount */
3441 acl = posix_acl_dup(lli->lli_posix_acl);
3442 spin_unlock(&lli->lli_lock);
3447 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3449 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3450 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3452 ll_check_acl(struct inode *inode, int mask)
3455 # ifdef CONFIG_FS_POSIX_ACL
3456 struct posix_acl *acl;
3460 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3461 if (flags & IPERM_FLAG_RCU)
3464 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3469 rc = posix_acl_permission(inode, acl, mask);
3470 posix_acl_release(acl);
3473 # else /* !CONFIG_FS_POSIX_ACL */
3475 # endif /* CONFIG_FS_POSIX_ACL */
3477 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3479 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3480 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3482 # ifdef HAVE_INODE_PERMISION_2ARGS
3483 int ll_inode_permission(struct inode *inode, int mask)
3485 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3490 struct ll_sb_info *sbi;
3491 struct root_squash_info *squash;
3492 struct cred *cred = NULL;
3493 const struct cred *old_cred = NULL;
3495 bool squash_id = false;
3498 #ifdef MAY_NOT_BLOCK
3499 if (mask & MAY_NOT_BLOCK)
3501 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3502 if (flags & IPERM_FLAG_RCU)
3506 /* as root inode are NOT getting validated in lookup operation,
3507 * need to do it before permission check. */
3509 if (inode == inode->i_sb->s_root->d_inode) {
3510 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3511 MDS_INODELOCK_LOOKUP);
3516 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3517 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3519 /* squash fsuid/fsgid if needed */
3520 sbi = ll_i2sbi(inode);
3521 squash = &sbi->ll_squash;
3522 if (unlikely(squash->rsi_uid != 0 &&
3523 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3524 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3528 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3529 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3530 squash->rsi_uid, squash->rsi_gid);
3532 /* update current process's credentials
3533 * and FS capability */
3534 cred = prepare_creds();
3538 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3539 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3540 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3541 if ((1 << cap) & CFS_CAP_FS_MASK)
3542 cap_lower(cred->cap_effective, cap);
3544 old_cred = override_creds(cred);
3547 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3549 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3550 rc = lustre_check_remote_perm(inode, mask);
3552 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3554 /* restore current process's credentials and FS capability */
3556 revert_creds(old_cred);
3563 /* -o localflock - only provides locally consistent flock locks */
3564 struct file_operations ll_file_operations = {
3565 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3566 # ifdef HAVE_SYNC_READ_WRITE
3567 .read = new_sync_read,
3568 .write = new_sync_write,
3570 .read_iter = ll_file_read_iter,
3571 .write_iter = ll_file_write_iter,
3572 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3573 .read = ll_file_read,
3574 .aio_read = ll_file_aio_read,
3575 .write = ll_file_write,
3576 .aio_write = ll_file_aio_write,
3577 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3578 .unlocked_ioctl = ll_file_ioctl,
3579 .open = ll_file_open,
3580 .release = ll_file_release,
3581 .mmap = ll_file_mmap,
3582 .llseek = ll_file_seek,
3583 .splice_read = ll_file_splice_read,
3588 struct file_operations ll_file_operations_flock = {
3589 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3590 # ifdef HAVE_SYNC_READ_WRITE
3591 .read = new_sync_read,
3592 .write = new_sync_write,
3593 # endif /* HAVE_SYNC_READ_WRITE */
3594 .read_iter = ll_file_read_iter,
3595 .write_iter = ll_file_write_iter,
3596 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3597 .read = ll_file_read,
3598 .aio_read = ll_file_aio_read,
3599 .write = ll_file_write,
3600 .aio_write = ll_file_aio_write,
3601 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3602 .unlocked_ioctl = ll_file_ioctl,
3603 .open = ll_file_open,
3604 .release = ll_file_release,
3605 .mmap = ll_file_mmap,
3606 .llseek = ll_file_seek,
3607 .splice_read = ll_file_splice_read,
3610 .flock = ll_file_flock,
3611 .lock = ll_file_flock
3614 /* These are for -o noflock - to return ENOSYS on flock calls */
3615 struct file_operations ll_file_operations_noflock = {
3616 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3617 # ifdef HAVE_SYNC_READ_WRITE
3618 .read = new_sync_read,
3619 .write = new_sync_write,
3620 # endif /* HAVE_SYNC_READ_WRITE */
3621 .read_iter = ll_file_read_iter,
3622 .write_iter = ll_file_write_iter,
3623 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3624 .read = ll_file_read,
3625 .aio_read = ll_file_aio_read,
3626 .write = ll_file_write,
3627 .aio_write = ll_file_aio_write,
3628 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3629 .unlocked_ioctl = ll_file_ioctl,
3630 .open = ll_file_open,
3631 .release = ll_file_release,
3632 .mmap = ll_file_mmap,
3633 .llseek = ll_file_seek,
3634 .splice_read = ll_file_splice_read,
3637 .flock = ll_file_noflock,
3638 .lock = ll_file_noflock
3641 struct inode_operations ll_file_inode_operations = {
3642 .setattr = ll_setattr,
3643 .getattr = ll_getattr,
3644 .permission = ll_inode_permission,
3645 .setxattr = ll_setxattr,
3646 .getxattr = ll_getxattr,
3647 .listxattr = ll_listxattr,
3648 .removexattr = ll_removexattr,
3649 .fiemap = ll_fiemap,
3650 #ifdef HAVE_IOP_GET_ACL
3651 .get_acl = ll_get_acl,
3655 /* dynamic ioctl number support routins */
3656 static struct llioc_ctl_data {
3657 struct rw_semaphore ioc_sem;
3658 struct list_head ioc_head;
3660 __RWSEM_INITIALIZER(llioc.ioc_sem),
3661 LIST_HEAD_INIT(llioc.ioc_head)
3666 struct list_head iocd_list;
3667 unsigned int iocd_size;
3668 llioc_callback_t iocd_cb;
3669 unsigned int iocd_count;
3670 unsigned int iocd_cmd[0];
3673 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3676 struct llioc_data *in_data = NULL;
3679 if (cb == NULL || cmd == NULL ||
3680 count > LLIOC_MAX_CMD || count < 0)
3683 size = sizeof(*in_data) + count * sizeof(unsigned int);
3684 OBD_ALLOC(in_data, size);
3685 if (in_data == NULL)
3688 memset(in_data, 0, sizeof(*in_data));
3689 in_data->iocd_size = size;
3690 in_data->iocd_cb = cb;
3691 in_data->iocd_count = count;
3692 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3694 down_write(&llioc.ioc_sem);
3695 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3696 up_write(&llioc.ioc_sem);
3701 void ll_iocontrol_unregister(void *magic)
3703 struct llioc_data *tmp;
3708 down_write(&llioc.ioc_sem);
3709 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3711 unsigned int size = tmp->iocd_size;
3713 list_del(&tmp->iocd_list);
3714 up_write(&llioc.ioc_sem);
3716 OBD_FREE(tmp, size);
3720 up_write(&llioc.ioc_sem);
3722 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3725 EXPORT_SYMBOL(ll_iocontrol_register);
3726 EXPORT_SYMBOL(ll_iocontrol_unregister);
3728 static enum llioc_iter
3729 ll_iocontrol_call(struct inode *inode, struct file *file,
3730 unsigned int cmd, unsigned long arg, int *rcp)
3732 enum llioc_iter ret = LLIOC_CONT;
3733 struct llioc_data *data;
3734 int rc = -EINVAL, i;
3736 down_read(&llioc.ioc_sem);
3737 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3738 for (i = 0; i < data->iocd_count; i++) {
3739 if (cmd != data->iocd_cmd[i])
3742 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3746 if (ret == LLIOC_STOP)
3749 up_read(&llioc.ioc_sem);
3756 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3758 struct ll_inode_info *lli = ll_i2info(inode);
3759 struct cl_object *obj = lli->lli_clob;
3760 struct cl_env_nest nest;
3768 env = cl_env_nested_get(&nest);
3770 RETURN(PTR_ERR(env));
3772 rc = cl_conf_set(env, lli->lli_clob, conf);
3776 if (conf->coc_opc == OBJECT_CONF_SET) {
3777 struct ldlm_lock *lock = conf->coc_lock;
3778 struct cl_layout cl = {
3782 LASSERT(lock != NULL);
3783 LASSERT(ldlm_has_layout(lock));
3785 /* it can only be allowed to match after layout is
3786 * applied to inode otherwise false layout would be
3787 * seen. Applying layout shoud happen before dropping
3788 * the intent lock. */
3789 ldlm_lock_allow_match(lock);
3791 rc = cl_object_layout_get(env, obj, &cl);
3796 DFID": layout version change: %u -> %u\n",
3797 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3799 ll_layout_version_set(lli, cl.cl_layout_gen);
3803 cl_env_nested_put(&nest, env);
3808 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3809 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3812 struct ll_sb_info *sbi = ll_i2sbi(inode);
3813 struct ptlrpc_request *req;
3814 struct mdt_body *body;
3821 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3822 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3823 lock->l_lvb_data, lock->l_lvb_len);
3825 if (lock->l_lvb_data != NULL)
3828 /* if layout lock was granted right away, the layout is returned
3829 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3830 * blocked and then granted via completion ast, we have to fetch
3831 * layout here. Please note that we can't use the LVB buffer in
3832 * completion AST because it doesn't have a large enough buffer */
3833 rc = ll_get_default_mdsize(sbi, &lmmsize);
3835 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
3836 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3841 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3843 GOTO(out, rc = -EPROTO);
3845 lmmsize = body->mbo_eadatasize;
3846 if (lmmsize == 0) /* empty layout */
3849 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3851 GOTO(out, rc = -EFAULT);
3853 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3854 if (lvbdata == NULL)
3855 GOTO(out, rc = -ENOMEM);
3857 memcpy(lvbdata, lmm, lmmsize);
3858 lock_res_and_lock(lock);
3859 if (unlikely(lock->l_lvb_data == NULL)) {
3860 lock->l_lvb_type = LVB_T_LAYOUT;
3861 lock->l_lvb_data = lvbdata;
3862 lock->l_lvb_len = lmmsize;
3865 unlock_res_and_lock(lock);
3867 if (lvbdata != NULL)
3868 OBD_FREE_LARGE(lvbdata, lmmsize);
3873 ptlrpc_req_finished(req);
3878 * Apply the layout to the inode. Layout lock is held and will be released
3881 static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
3882 struct inode *inode)
3884 struct ll_inode_info *lli = ll_i2info(inode);
3885 struct ll_sb_info *sbi = ll_i2sbi(inode);
3886 struct ldlm_lock *lock;
3887 struct cl_object_conf conf;
3890 bool wait_layout = false;
3893 LASSERT(lustre_handle_is_used(lockh));
3895 lock = ldlm_handle2lock(lockh);
3896 LASSERT(lock != NULL);
3897 LASSERT(ldlm_has_layout(lock));
3899 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured",
3900 PFID(&lli->lli_fid), inode);
3902 /* in case this is a caching lock and reinstate with new inode */
3903 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3905 lock_res_and_lock(lock);
3906 lvb_ready = ldlm_is_lvb_ready(lock);
3907 unlock_res_and_lock(lock);
3908 /* checking lvb_ready is racy but this is okay. The worst case is
3909 * that multi processes may configure the file on the same time. */
3914 rc = ll_layout_fetch(inode, lock);
3918 /* for layout lock, lmm is stored in lock's lvb.
3919 * lvb_data is immutable if the lock is held so it's safe to access it
3922 * set layout to file. Unlikely this will fail as old layout was
3923 * surely eliminated */
3924 memset(&conf, 0, sizeof conf);
3925 conf.coc_opc = OBJECT_CONF_SET;
3926 conf.coc_inode = inode;
3927 conf.coc_lock = lock;
3928 conf.u.coc_layout.lb_buf = lock->l_lvb_data;
3929 conf.u.coc_layout.lb_len = lock->l_lvb_len;
3930 rc = ll_layout_conf(inode, &conf);
3932 /* refresh layout failed, need to wait */
3933 wait_layout = rc == -EBUSY;
3937 LDLM_LOCK_PUT(lock);
3938 ldlm_lock_decref(lockh, mode);
3940 /* wait for IO to complete if it's still being used. */
3942 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3943 ll_get_fsname(inode->i_sb, NULL, 0),
3944 PFID(&lli->lli_fid), inode);
3946 memset(&conf, 0, sizeof conf);
3947 conf.coc_opc = OBJECT_CONF_WAIT;
3948 conf.coc_inode = inode;
3949 rc = ll_layout_conf(inode, &conf);
3953 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3954 ll_get_fsname(inode->i_sb, NULL, 0),
3955 PFID(&lli->lli_fid), rc);
3960 static int ll_layout_refresh_locked(struct inode *inode)
3962 struct ll_inode_info *lli = ll_i2info(inode);
3963 struct ll_sb_info *sbi = ll_i2sbi(inode);
3964 struct md_op_data *op_data;
3965 struct lookup_intent it;
3966 struct lustre_handle lockh;
3967 enum ldlm_mode mode;
3968 struct ldlm_enqueue_info einfo = {
3969 .ei_type = LDLM_IBITS,
3971 .ei_cb_bl = &ll_md_blocking_ast,
3972 .ei_cb_cp = &ldlm_completion_ast,
3978 /* mostly layout lock is caching on the local side, so try to match
3979 * it before grabbing layout lock mutex. */
3980 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3981 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3982 if (mode != 0) { /* hit cached lock */
3983 rc = ll_layout_lock_set(&lockh, mode, inode);
3990 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3991 0, 0, LUSTRE_OPC_ANY, NULL);
3992 if (IS_ERR(op_data))
3993 RETURN(PTR_ERR(op_data));
3995 /* have to enqueue one */
3996 memset(&it, 0, sizeof(it));
3997 it.it_op = IT_LAYOUT;
3998 lockh.cookie = 0ULL;
4000 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
4001 ll_get_fsname(inode->i_sb, NULL, 0),
4002 PFID(&lli->lli_fid), inode);
4004 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
4005 if (it.d.lustre.it_data != NULL)
4006 ptlrpc_req_finished(it.d.lustre.it_data);
4007 it.d.lustre.it_data = NULL;
4009 ll_finish_md_op_data(op_data);
4011 mode = it.d.lustre.it_lock_mode;
4012 it.d.lustre.it_lock_mode = 0;
4013 ll_intent_drop_lock(&it);
4016 /* set lock data in case this is a new lock */
4017 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
4018 rc = ll_layout_lock_set(&lockh, mode, inode);
4027 * This function checks if there exists a LAYOUT lock on the client side,
4028 * or enqueues it if it doesn't have one in cache.
4030 * This function will not hold layout lock so it may be revoked any time after
4031 * this function returns. Any operations depend on layout should be redone
4034 * This function should be called before lov_io_init() to get an uptodate
4035 * layout version, the caller should save the version number and after IO
4036 * is finished, this function should be called again to verify that layout
4037 * is not changed during IO time.
4039 int ll_layout_refresh(struct inode *inode, __u32 *gen)
4041 struct ll_inode_info *lli = ll_i2info(inode);
4042 struct ll_sb_info *sbi = ll_i2sbi(inode);
4046 *gen = ll_layout_version_get(lli);
4047 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
4051 LASSERT(fid_is_sane(ll_inode2fid(inode)));
4052 LASSERT(S_ISREG(inode->i_mode));
4054 /* take layout lock mutex to enqueue layout lock exclusively. */
4055 mutex_lock(&lli->lli_layout_mutex);
4057 rc = ll_layout_refresh_locked(inode);
4061 *gen = ll_layout_version_get(lli);
4063 mutex_unlock(&lli->lli_layout_mutex);
4069 * This function send a restore request to the MDT
4071 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
4073 struct hsm_user_request *hur;
4077 len = sizeof(struct hsm_user_request) +
4078 sizeof(struct hsm_user_item);
4079 OBD_ALLOC(hur, len);
4083 hur->hur_request.hr_action = HUA_RESTORE;
4084 hur->hur_request.hr_archive_id = 0;
4085 hur->hur_request.hr_flags = 0;
4086 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
4087 sizeof(hur->hur_user_item[0].hui_fid));
4088 hur->hur_user_item[0].hui_extent.offset = offset;
4089 hur->hur_user_item[0].hui_extent.length = length;
4090 hur->hur_request.hr_itemcount = 1;
4091 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,