4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include <linux/user_namespace.h>
49 #ifdef HAVE_UIDGID_HEADER
50 # include <linux/uidgid.h>
52 #include <lustre/ll_fiemap.h>
53 #include <lustre_ioctl.h>
55 #include "cl_object.h"
57 #include "llite_internal.h"
58 #include "vvp_internal.h"
61 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
63 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
66 static enum llioc_iter
67 ll_iocontrol_call(struct inode *inode, struct file *file,
68 unsigned int cmd, unsigned long arg, int *rcp);
70 static struct ll_file_data *ll_file_data_get(void)
72 struct ll_file_data *fd;
74 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
78 fd->fd_write_failed = false;
83 static void ll_file_data_put(struct ll_file_data *fd)
86 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
90 * Packs all the attributes into @op_data for the CLOSE rpc.
92 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
93 struct obd_client_handle *och)
97 ll_prep_md_op_data(op_data, inode, NULL, NULL,
98 0, 0, LUSTRE_OPC_ANY, NULL);
100 op_data->op_attr.ia_mode = inode->i_mode;
101 op_data->op_attr.ia_atime = inode->i_atime;
102 op_data->op_attr.ia_mtime = inode->i_mtime;
103 op_data->op_attr.ia_ctime = inode->i_ctime;
104 op_data->op_attr.ia_size = i_size_read(inode);
105 op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
106 ATTR_MTIME | ATTR_MTIME_SET |
107 ATTR_CTIME | ATTR_CTIME_SET;
108 op_data->op_attr_blocks = inode->i_blocks;
109 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
110 op_data->op_handle = och->och_fh;
112 if (och->och_flags & FMODE_WRITE &&
113 ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED))
114 /* For HSM: if inode data has been modified, pack it so that
115 * MDT can set data dirty flag in the archive. */
116 op_data->op_bias |= MDS_DATA_MODIFIED;
122 * Perform a close, possibly with a bias.
123 * The meaning of "data" depends on the value of "bias".
125 * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
126 * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
129 static int ll_close_inode_openhandle(struct inode *inode,
130 struct obd_client_handle *och,
131 enum mds_op_bias bias, void *data)
133 struct obd_export *md_exp = ll_i2mdexp(inode);
134 const struct ll_inode_info *lli = ll_i2info(inode);
135 struct md_op_data *op_data;
136 struct ptlrpc_request *req = NULL;
140 if (class_exp2obd(md_exp) == NULL) {
141 CERROR("%s: invalid MDC connection handle closing "DFID"\n",
142 ll_get_fsname(inode->i_sb, NULL, 0),
143 PFID(&lli->lli_fid));
147 OBD_ALLOC_PTR(op_data);
148 /* We leak openhandle and request here on error, but not much to be
149 * done in OOM case since app won't retry close on error either. */
151 GOTO(out, rc = -ENOMEM);
153 ll_prepare_close(inode, op_data, och);
155 case MDS_CLOSE_LAYOUT_SWAP:
156 LASSERT(data != NULL);
157 op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
158 op_data->op_data_version = 0;
159 op_data->op_lease_handle = och->och_lease_handle;
160 op_data->op_fid2 = *ll_inode2fid(data);
163 case MDS_HSM_RELEASE:
164 LASSERT(data != NULL);
165 op_data->op_bias |= MDS_HSM_RELEASE;
166 op_data->op_data_version = *(__u64 *)data;
167 op_data->op_lease_handle = och->och_lease_handle;
168 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
172 LASSERT(data == NULL);
176 rc = md_close(md_exp, op_data, och->och_mod, &req);
177 if (rc != 0 && rc != -EINTR)
178 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
179 md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc);
182 op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)) {
183 struct mdt_body *body;
185 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
186 if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
190 ll_finish_md_op_data(op_data);
194 md_clear_open_replay_data(md_exp, och);
195 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
198 ptlrpc_req_finished(req); /* This is close request */
202 int ll_md_real_close(struct inode *inode, fmode_t fmode)
204 struct ll_inode_info *lli = ll_i2info(inode);
205 struct obd_client_handle **och_p;
206 struct obd_client_handle *och;
211 if (fmode & FMODE_WRITE) {
212 och_p = &lli->lli_mds_write_och;
213 och_usecount = &lli->lli_open_fd_write_count;
214 } else if (fmode & FMODE_EXEC) {
215 och_p = &lli->lli_mds_exec_och;
216 och_usecount = &lli->lli_open_fd_exec_count;
218 LASSERT(fmode & FMODE_READ);
219 och_p = &lli->lli_mds_read_och;
220 och_usecount = &lli->lli_open_fd_read_count;
223 mutex_lock(&lli->lli_och_mutex);
224 if (*och_usecount > 0) {
225 /* There are still users of this handle, so skip
227 mutex_unlock(&lli->lli_och_mutex);
233 mutex_unlock(&lli->lli_och_mutex);
236 /* There might be a race and this handle may already
238 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
244 static int ll_md_close(struct inode *inode, struct file *file)
246 union ldlm_policy_data policy = {
247 .l_inodebits = { MDS_INODELOCK_OPEN },
249 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
250 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
251 struct ll_inode_info *lli = ll_i2info(inode);
252 struct lustre_handle lockh;
253 enum ldlm_mode lockmode;
257 /* clear group lock, if present */
258 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
259 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
261 if (fd->fd_lease_och != NULL) {
264 /* Usually the lease is not released when the
265 * application crashed, we need to release here. */
266 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
267 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
268 PFID(&lli->lli_fid), rc, lease_broken);
270 fd->fd_lease_och = NULL;
273 if (fd->fd_och != NULL) {
274 rc = ll_close_inode_openhandle(inode, fd->fd_och, 0, NULL);
279 /* Let's see if we have good enough OPEN lock on the file and if
280 we can skip talking to MDS */
281 mutex_lock(&lli->lli_och_mutex);
282 if (fd->fd_omode & FMODE_WRITE) {
284 LASSERT(lli->lli_open_fd_write_count);
285 lli->lli_open_fd_write_count--;
286 } else if (fd->fd_omode & FMODE_EXEC) {
288 LASSERT(lli->lli_open_fd_exec_count);
289 lli->lli_open_fd_exec_count--;
292 LASSERT(lli->lli_open_fd_read_count);
293 lli->lli_open_fd_read_count--;
295 mutex_unlock(&lli->lli_och_mutex);
297 if (!md_lock_match(ll_i2mdexp(inode), flags, ll_inode2fid(inode),
298 LDLM_IBITS, &policy, lockmode, &lockh))
299 rc = ll_md_real_close(inode, fd->fd_omode);
302 LUSTRE_FPRIVATE(file) = NULL;
303 ll_file_data_put(fd);
308 /* While this returns an error code, fput() the caller does not, so we need
309 * to make every effort to clean up all of our state here. Also, applications
310 * rarely check close errors and even if an error is returned they will not
311 * re-try the close call.
313 int ll_file_release(struct inode *inode, struct file *file)
315 struct ll_file_data *fd;
316 struct ll_sb_info *sbi = ll_i2sbi(inode);
317 struct ll_inode_info *lli = ll_i2info(inode);
321 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
322 PFID(ll_inode2fid(inode)), inode);
324 #ifdef CONFIG_FS_POSIX_ACL
325 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
326 inode == inode->i_sb->s_root->d_inode) {
327 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
330 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
331 fd->fd_flags &= ~LL_FILE_RMTACL;
332 rct_del(&sbi->ll_rct, current_pid());
333 et_search_free(&sbi->ll_et, current_pid());
338 if (inode->i_sb->s_root != file->f_path.dentry)
339 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
340 fd = LUSTRE_FPRIVATE(file);
343 /* The last ref on @file, maybe not the the owner pid of statahead,
344 * because parent and child process can share the same file handle. */
345 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
346 ll_deauthorize_statahead(inode, fd);
348 if (inode->i_sb->s_root == file->f_path.dentry) {
349 LUSTRE_FPRIVATE(file) = NULL;
350 ll_file_data_put(fd);
354 if (!S_ISDIR(inode->i_mode)) {
355 if (lli->lli_clob != NULL)
356 lov_read_and_clear_async_rc(lli->lli_clob);
357 lli->lli_async_rc = 0;
360 rc = ll_md_close(inode, file);
362 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
363 libcfs_debug_dumplog();
368 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
369 struct lookup_intent *itp)
371 struct dentry *de = file->f_path.dentry;
372 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
373 struct dentry *parent = de->d_parent;
374 const char *name = NULL;
376 struct md_op_data *op_data;
377 struct ptlrpc_request *req = NULL;
381 LASSERT(parent != NULL);
382 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
384 /* if server supports open-by-fid, or file name is invalid, don't pack
385 * name in open request */
386 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
387 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
388 name = de->d_name.name;
389 len = de->d_name.len;
392 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
393 name, len, 0, LUSTRE_OPC_ANY, NULL);
395 RETURN(PTR_ERR(op_data));
396 op_data->op_data = lmm;
397 op_data->op_data_size = lmmsize;
399 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
400 &ll_md_blocking_ast, 0);
401 ll_finish_md_op_data(op_data);
403 /* reason for keep own exit path - don`t flood log
404 * with messages with -ESTALE errors.
406 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
407 it_open_error(DISP_OPEN_OPEN, itp))
409 ll_release_openhandle(de, itp);
413 if (it_disposition(itp, DISP_LOOKUP_NEG))
414 GOTO(out, rc = -ENOENT);
416 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
417 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
418 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
422 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
423 if (!rc && itp->d.lustre.it_lock_mode)
424 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
427 ptlrpc_req_finished(req);
428 ll_intent_drop_lock(itp);
433 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
434 struct obd_client_handle *och)
436 struct ptlrpc_request *req = it->d.lustre.it_data;
437 struct mdt_body *body;
439 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
440 och->och_fh = body->mbo_handle;
441 och->och_fid = body->mbo_fid1;
442 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
443 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
444 och->och_flags = it->it_flags;
446 return md_set_open_replay_data(md_exp, och, it);
449 static int ll_local_open(struct file *file, struct lookup_intent *it,
450 struct ll_file_data *fd, struct obd_client_handle *och)
452 struct inode *inode = file->f_path.dentry->d_inode;
455 LASSERT(!LUSTRE_FPRIVATE(file));
462 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
467 LUSTRE_FPRIVATE(file) = fd;
468 ll_readahead_init(inode, &fd->fd_ras);
469 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
471 /* ll_cl_context initialize */
472 rwlock_init(&fd->fd_lock);
473 INIT_LIST_HEAD(&fd->fd_lccs);
478 /* Open a file, and (for the very first open) create objects on the OSTs at
479 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
480 * creation or open until ll_lov_setstripe() ioctl is called.
482 * If we already have the stripe MD locally then we don't request it in
483 * md_open(), by passing a lmm_size = 0.
485 * It is up to the application to ensure no other processes open this file
486 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
487 * used. We might be able to avoid races of that sort by getting lli_open_sem
488 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
489 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
491 int ll_file_open(struct inode *inode, struct file *file)
493 struct ll_inode_info *lli = ll_i2info(inode);
494 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
495 .it_flags = file->f_flags };
496 struct obd_client_handle **och_p = NULL;
497 __u64 *och_usecount = NULL;
498 struct ll_file_data *fd;
502 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
503 PFID(ll_inode2fid(inode)), inode, file->f_flags);
505 it = file->private_data; /* XXX: compat macro */
506 file->private_data = NULL; /* prevent ll_local_open assertion */
508 fd = ll_file_data_get();
510 GOTO(out_openerr, rc = -ENOMEM);
513 if (S_ISDIR(inode->i_mode))
514 ll_authorize_statahead(inode, fd);
516 if (inode->i_sb->s_root == file->f_path.dentry) {
517 LUSTRE_FPRIVATE(file) = fd;
521 if (!it || !it->d.lustre.it_disposition) {
522 /* Convert f_flags into access mode. We cannot use file->f_mode,
523 * because everything but O_ACCMODE mask was stripped from
525 if ((oit.it_flags + 1) & O_ACCMODE)
527 if (file->f_flags & O_TRUNC)
528 oit.it_flags |= FMODE_WRITE;
530 /* kernel only call f_op->open in dentry_open. filp_open calls
531 * dentry_open after call to open_namei that checks permissions.
532 * Only nfsd_open call dentry_open directly without checking
533 * permissions and because of that this code below is safe. */
534 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
535 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
537 /* We do not want O_EXCL here, presumably we opened the file
538 * already? XXX - NFS implications? */
539 oit.it_flags &= ~O_EXCL;
541 /* bug20584, if "it_flags" contains O_CREAT, the file will be
542 * created if necessary, then "IT_CREAT" should be set to keep
543 * consistent with it */
544 if (oit.it_flags & O_CREAT)
545 oit.it_op |= IT_CREAT;
551 /* Let's see if we have file open on MDS already. */
552 if (it->it_flags & FMODE_WRITE) {
553 och_p = &lli->lli_mds_write_och;
554 och_usecount = &lli->lli_open_fd_write_count;
555 } else if (it->it_flags & FMODE_EXEC) {
556 och_p = &lli->lli_mds_exec_och;
557 och_usecount = &lli->lli_open_fd_exec_count;
559 och_p = &lli->lli_mds_read_och;
560 och_usecount = &lli->lli_open_fd_read_count;
563 mutex_lock(&lli->lli_och_mutex);
564 if (*och_p) { /* Open handle is present */
565 if (it_disposition(it, DISP_OPEN_OPEN)) {
566 /* Well, there's extra open request that we do not need,
567 let's close it somehow. This will decref request. */
568 rc = it_open_error(DISP_OPEN_OPEN, it);
570 mutex_unlock(&lli->lli_och_mutex);
571 GOTO(out_openerr, rc);
574 ll_release_openhandle(file->f_path.dentry, it);
578 rc = ll_local_open(file, it, fd, NULL);
581 mutex_unlock(&lli->lli_och_mutex);
582 GOTO(out_openerr, rc);
585 LASSERT(*och_usecount == 0);
586 if (!it->d.lustre.it_disposition) {
587 /* We cannot just request lock handle now, new ELC code
588 means that one of other OPEN locks for this file
589 could be cancelled, and since blocking ast handler
590 would attempt to grab och_mutex as well, that would
591 result in a deadlock */
592 mutex_unlock(&lli->lli_och_mutex);
594 * Normally called under two situations:
596 * 2. A race/condition on MDS resulting in no open
597 * handle to be returned from LOOKUP|OPEN request,
598 * for example if the target entry was a symlink.
600 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
602 * Always specify MDS_OPEN_BY_FID because we don't want
603 * to get file with different fid.
605 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
606 rc = ll_intent_file_open(file, NULL, 0, it);
608 GOTO(out_openerr, rc);
612 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
614 GOTO(out_och_free, rc = -ENOMEM);
618 /* md_intent_lock() didn't get a request ref if there was an
619 * open error, so don't do cleanup on the request here
621 /* XXX (green): Should not we bail out on any error here, not
622 * just open error? */
623 rc = it_open_error(DISP_OPEN_OPEN, it);
625 GOTO(out_och_free, rc);
627 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
628 "inode %p: disposition %x, status %d\n", inode,
629 it_disposition(it, ~0), it->d.lustre.it_status);
631 rc = ll_local_open(file, it, fd, *och_p);
633 GOTO(out_och_free, rc);
635 mutex_unlock(&lli->lli_och_mutex);
638 /* Must do this outside lli_och_mutex lock to prevent deadlock where
639 different kind of OPEN lock for this same inode gets cancelled
640 by ldlm_cancel_lru */
641 if (!S_ISREG(inode->i_mode))
642 GOTO(out_och_free, rc);
644 cl_lov_delay_create_clear(&file->f_flags);
645 GOTO(out_och_free, rc);
649 if (och_p && *och_p) {
650 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
651 *och_p = NULL; /* OBD_FREE writes some magic there */
654 mutex_unlock(&lli->lli_och_mutex);
657 if (lli->lli_opendir_key == fd)
658 ll_deauthorize_statahead(inode, fd);
660 ll_file_data_put(fd);
662 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
665 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
666 ptlrpc_req_finished(it->d.lustre.it_data);
667 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
673 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
674 struct ldlm_lock_desc *desc, void *data, int flag)
677 struct lustre_handle lockh;
681 case LDLM_CB_BLOCKING:
682 ldlm_lock2handle(lock, &lockh);
683 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
685 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
689 case LDLM_CB_CANCELING:
697 * Acquire a lease and open the file.
699 static struct obd_client_handle *
700 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
703 struct lookup_intent it = { .it_op = IT_OPEN };
704 struct ll_sb_info *sbi = ll_i2sbi(inode);
705 struct md_op_data *op_data;
706 struct ptlrpc_request *req = NULL;
707 struct lustre_handle old_handle = { 0 };
708 struct obd_client_handle *och = NULL;
713 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
714 RETURN(ERR_PTR(-EINVAL));
717 struct ll_inode_info *lli = ll_i2info(inode);
718 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
719 struct obd_client_handle **och_p;
722 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
723 RETURN(ERR_PTR(-EPERM));
725 /* Get the openhandle of the file */
727 mutex_lock(&lli->lli_och_mutex);
728 if (fd->fd_lease_och != NULL) {
729 mutex_unlock(&lli->lli_och_mutex);
733 if (fd->fd_och == NULL) {
734 if (file->f_mode & FMODE_WRITE) {
735 LASSERT(lli->lli_mds_write_och != NULL);
736 och_p = &lli->lli_mds_write_och;
737 och_usecount = &lli->lli_open_fd_write_count;
739 LASSERT(lli->lli_mds_read_och != NULL);
740 och_p = &lli->lli_mds_read_och;
741 och_usecount = &lli->lli_open_fd_read_count;
743 if (*och_usecount == 1) {
750 mutex_unlock(&lli->lli_och_mutex);
751 if (rc < 0) /* more than 1 opener */
754 LASSERT(fd->fd_och != NULL);
755 old_handle = fd->fd_och->och_fh;
760 RETURN(ERR_PTR(-ENOMEM));
762 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
763 LUSTRE_OPC_ANY, NULL);
765 GOTO(out, rc = PTR_ERR(op_data));
767 /* To tell the MDT this openhandle is from the same owner */
768 op_data->op_handle = old_handle;
770 it.it_flags = fmode | open_flags;
771 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
772 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
773 &ll_md_blocking_lease_ast,
774 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
775 * it can be cancelled which may mislead applications that the lease is
777 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
778 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
779 * doesn't deal with openhandle, so normal openhandle will be leaked. */
780 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
781 ll_finish_md_op_data(op_data);
782 ptlrpc_req_finished(req);
784 GOTO(out_release_it, rc);
786 if (it_disposition(&it, DISP_LOOKUP_NEG))
787 GOTO(out_release_it, rc = -ENOENT);
789 rc = it_open_error(DISP_OPEN_OPEN, &it);
791 GOTO(out_release_it, rc);
793 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
794 ll_och_fill(sbi->ll_md_exp, &it, och);
796 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
797 GOTO(out_close, rc = -EOPNOTSUPP);
799 /* already get lease, handle lease lock */
800 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
801 if (it.d.lustre.it_lock_mode == 0 ||
802 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
803 /* open lock must return for lease */
804 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
805 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
806 it.d.lustre.it_lock_bits);
807 GOTO(out_close, rc = -EPROTO);
810 ll_intent_release(&it);
814 /* Cancel open lock */
815 if (it.d.lustre.it_lock_mode != 0) {
816 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
817 it.d.lustre.it_lock_mode);
818 it.d.lustre.it_lock_mode = 0;
819 och->och_lease_handle.cookie = 0ULL;
821 rc2 = ll_close_inode_openhandle(inode, och, 0, NULL);
823 CERROR("%s: error closing file "DFID": %d\n",
824 ll_get_fsname(inode->i_sb, NULL, 0),
825 PFID(&ll_i2info(inode)->lli_fid), rc2);
826 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
828 ll_intent_release(&it);
836 * Check whether a layout swap can be done between two inodes.
838 * \param[in] inode1 First inode to check
839 * \param[in] inode2 Second inode to check
841 * \retval 0 on success, layout swap can be performed between both inodes
842 * \retval negative error code if requirements are not met
844 static int ll_check_swap_layouts_validity(struct inode *inode1,
845 struct inode *inode2)
847 if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
850 if (inode_permission(inode1, MAY_WRITE) ||
851 inode_permission(inode2, MAY_WRITE))
854 if (inode1->i_sb != inode2->i_sb)
860 static int ll_swap_layouts_close(struct obd_client_handle *och,
861 struct inode *inode, struct inode *inode2)
863 const struct lu_fid *fid1 = ll_inode2fid(inode);
864 const struct lu_fid *fid2;
868 CDEBUG(D_INODE, "%s: biased close of file "DFID"\n",
869 ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
871 rc = ll_check_swap_layouts_validity(inode, inode2);
873 GOTO(out_free_och, rc);
875 /* We now know that inode2 is a lustre inode */
876 fid2 = ll_inode2fid(inode2);
878 rc = lu_fid_cmp(fid1, fid2);
880 GOTO(out_free_och, rc = -EINVAL);
882 /* Close the file and swap layouts between inode & inode2.
883 * NB: lease lock handle is released in mdc_close_layout_swap_pack()
884 * because we still need it to pack l_remote_handle to MDT. */
885 rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP,
888 och = NULL; /* freed in ll_close_inode_openhandle() */
898 * Release lease and close the file.
899 * It will check if the lease has ever broken.
901 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
904 struct ldlm_lock *lock;
905 bool cancelled = true;
909 lock = ldlm_handle2lock(&och->och_lease_handle);
911 lock_res_and_lock(lock);
912 cancelled = ldlm_is_cancel(lock);
913 unlock_res_and_lock(lock);
917 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
918 PFID(&ll_i2info(inode)->lli_fid), cancelled);
921 ldlm_cli_cancel(&och->och_lease_handle, 0);
922 if (lease_broken != NULL)
923 *lease_broken = cancelled;
925 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
929 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
931 struct ll_inode_info *lli = ll_i2info(inode);
932 struct cl_object *obj = lli->lli_clob;
933 struct cl_attr *attr = vvp_env_thread_attr(env);
941 ll_inode_size_lock(inode);
943 /* merge timestamps the most recently obtained from mds with
944 timestamps obtained from osts */
945 LTIME_S(inode->i_atime) = lli->lli_atime;
946 LTIME_S(inode->i_mtime) = lli->lli_mtime;
947 LTIME_S(inode->i_ctime) = lli->lli_ctime;
949 atime = LTIME_S(inode->i_atime);
950 mtime = LTIME_S(inode->i_mtime);
951 ctime = LTIME_S(inode->i_ctime);
953 cl_object_attr_lock(obj);
954 rc = cl_object_attr_get(env, obj, attr);
955 cl_object_attr_unlock(obj);
958 GOTO(out_size_unlock, rc);
960 if (atime < attr->cat_atime)
961 atime = attr->cat_atime;
963 if (ctime < attr->cat_ctime)
964 ctime = attr->cat_ctime;
966 if (mtime < attr->cat_mtime)
967 mtime = attr->cat_mtime;
969 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
970 PFID(&lli->lli_fid), attr->cat_size);
972 i_size_write(inode, attr->cat_size);
973 inode->i_blocks = attr->cat_blocks;
975 LTIME_S(inode->i_atime) = atime;
976 LTIME_S(inode->i_mtime) = mtime;
977 LTIME_S(inode->i_ctime) = ctime;
980 ll_inode_size_unlock(inode);
985 static bool file_is_noatime(const struct file *file)
987 const struct vfsmount *mnt = file->f_path.mnt;
988 const struct inode *inode = file->f_path.dentry->d_inode;
990 /* Adapted from file_accessed() and touch_atime().*/
991 if (file->f_flags & O_NOATIME)
994 if (inode->i_flags & S_NOATIME)
997 if (IS_NOATIME(inode))
1000 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1003 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1006 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1012 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1014 struct inode *inode = file->f_path.dentry->d_inode;
1016 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1018 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1019 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1020 file->f_flags & O_DIRECT ||
1023 io->ci_obj = ll_i2info(inode)->lli_clob;
1024 io->ci_lockreq = CILR_MAYBE;
1025 if (ll_file_nolock(file)) {
1026 io->ci_lockreq = CILR_NEVER;
1027 io->ci_no_srvlock = 1;
1028 } else if (file->f_flags & O_APPEND) {
1029 io->ci_lockreq = CILR_MANDATORY;
1032 io->ci_noatime = file_is_noatime(file);
1036 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1037 struct file *file, enum cl_io_type iot,
1038 loff_t *ppos, size_t count)
1040 struct vvp_io *vio = vvp_env_io(env);
1041 struct inode *inode = file->f_path.dentry->d_inode;
1042 struct ll_inode_info *lli = ll_i2info(inode);
1043 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1047 struct range_lock range;
1051 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1052 file->f_path.dentry->d_name.name, iot, *ppos, count);
1055 io = vvp_env_thread_io(env);
1056 ll_io_init(io, file, iot == CIT_WRITE);
1058 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1059 bool range_locked = false;
1061 if (file->f_flags & O_APPEND)
1062 range_lock_init(&range, 0, LUSTRE_EOF);
1064 range_lock_init(&range, *ppos, *ppos + count - 1);
1066 vio->vui_fd = LUSTRE_FPRIVATE(file);
1067 vio->vui_io_subtype = args->via_io_subtype;
1069 switch (vio->vui_io_subtype) {
1071 vio->vui_iter = args->u.normal.via_iter;
1072 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1073 vio->vui_tot_nrsegs = vio->vui_iter->nr_segs;
1074 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1075 vio->vui_iocb = args->u.normal.via_iocb;
1076 /* Direct IO reads must also take range lock,
1077 * or multiple reads will try to work on the same pages
1078 * See LU-6227 for details. */
1079 if (((iot == CIT_WRITE) ||
1080 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1081 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1082 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1084 rc = range_lock(&lli->lli_write_tree, &range);
1088 range_locked = true;
1092 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1093 vio->u.splice.vui_flags = args->u.splice.via_flags;
1096 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1100 ll_cl_add(file, env, io);
1101 rc = cl_io_loop(env, io);
1102 ll_cl_remove(file, env);
1105 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1107 range_unlock(&lli->lli_write_tree, &range);
1110 /* cl_io_rw_init() handled IO */
1114 if (io->ci_nob > 0) {
1115 result += io->ci_nob;
1116 count -= io->ci_nob;
1117 *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
1119 /* prepare IO restart */
1120 if (count > 0 && args->via_io_subtype == IO_NORMAL) {
1121 args->u.normal.via_iter = vio->vui_iter;
1122 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1123 args->u.normal.via_iter->nr_segs = vio->vui_tot_nrsegs;
1124 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1129 cl_io_fini(env, io);
1131 if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
1133 "%s: restart %s from %lld, count:%zu, result: %zd\n",
1134 file->f_path.dentry->d_name.name,
1135 iot == CIT_READ ? "read" : "write",
1136 *ppos, count, result);
1140 if (iot == CIT_READ) {
1142 ll_stats_ops_tally(ll_i2sbi(inode),
1143 LPROC_LL_READ_BYTES, result);
1144 } else if (iot == CIT_WRITE) {
1146 ll_stats_ops_tally(ll_i2sbi(inode),
1147 LPROC_LL_WRITE_BYTES, result);
1148 fd->fd_write_failed = false;
1149 } else if (result == 0 && rc == 0) {
1152 fd->fd_write_failed = true;
1154 fd->fd_write_failed = false;
1155 } else if (rc != -ERESTARTSYS) {
1156 fd->fd_write_failed = true;
1160 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1162 return result > 0 ? result : rc;
1166 * Read from a file (through the page cache).
1168 static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1170 struct vvp_io_args *args;
1175 env = cl_env_get(&refcheck);
1177 return PTR_ERR(env);
1179 args = ll_env_args(env, IO_NORMAL);
1180 args->u.normal.via_iter = to;
1181 args->u.normal.via_iocb = iocb;
1183 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1184 &iocb->ki_pos, iov_iter_count(to));
1185 cl_env_put(env, &refcheck);
1190 * Write to a file (through the page cache).
1192 static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1194 struct vvp_io_args *args;
1199 env = cl_env_get(&refcheck);
1201 return PTR_ERR(env);
1203 args = ll_env_args(env, IO_NORMAL);
1204 args->u.normal.via_iter = from;
1205 args->u.normal.via_iocb = iocb;
1207 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1208 &iocb->ki_pos, iov_iter_count(from));
1209 cl_env_put(env, &refcheck);
1213 #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
1215 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1217 static int ll_file_get_iov_count(const struct iovec *iov,
1218 unsigned long *nr_segs, size_t *count)
1223 for (seg = 0; seg < *nr_segs; seg++) {
1224 const struct iovec *iv = &iov[seg];
1227 * If any segment has a negative length, or the cumulative
1228 * length ever wraps negative then return -EINVAL.
1231 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1233 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1238 cnt -= iv->iov_len; /* This segment is no good */
1245 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1246 unsigned long nr_segs, loff_t pos)
1248 struct iovec *local_iov;
1249 struct iov_iter *to;
1252 struct lu_env *env = NULL;
1256 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1262 env = cl_env_get(&refcheck);
1264 RETURN(PTR_ERR(env));
1266 local_iov = &ll_env_info(env)->lti_local_iov;
1270 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1271 if (local_iov == NULL)
1274 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1282 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1283 iov_iter_init(to, READ, local_iov, nr_segs, iov_count);
1284 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1285 iov_iter_init(to, local_iov, nr_segs, iov_count, 0);
1286 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1288 result = ll_file_read_iter(iocb, to);
1293 cl_env_put(env, &refcheck);
1295 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1300 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1304 struct iovec iov = { .iov_base = buf, .iov_len = count };
1305 struct kiocb *kiocb;
1310 env = cl_env_get(&refcheck);
1312 RETURN(PTR_ERR(env));
1314 kiocb = &ll_env_info(env)->lti_kiocb;
1315 init_sync_kiocb(kiocb, file);
1316 kiocb->ki_pos = *ppos;
1317 #ifdef HAVE_KIOCB_KI_LEFT
1318 kiocb->ki_left = count;
1319 #elif defined(HAVE_KI_NBYTES)
1320 kiocb->ki_nbytes = count;
1323 result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos);
1324 *ppos = kiocb->ki_pos;
1326 cl_env_put(env, &refcheck);
1331 * Write to a file (through the page cache).
1334 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1335 unsigned long nr_segs, loff_t pos)
1337 struct iovec *local_iov;
1338 struct iov_iter *from;
1341 struct lu_env *env = NULL;
1345 result = ll_file_get_iov_count(iov, &nr_segs, &iov_count);
1350 env = cl_env_get(&refcheck);
1352 RETURN(PTR_ERR(env));
1354 local_iov = &ll_env_info(env)->lti_local_iov;
1357 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1358 if (local_iov == NULL)
1361 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1364 OBD_ALLOC_PTR(from);
1369 # ifdef HAVE_IOV_ITER_INIT_DIRECTION
1370 iov_iter_init(from, WRITE, local_iov, nr_segs, iov_count);
1371 # else /* !HAVE_IOV_ITER_INIT_DIRECTION */
1372 iov_iter_init(from, local_iov, nr_segs, iov_count, 0);
1373 # endif /* HAVE_IOV_ITER_INIT_DIRECTION */
1375 result = ll_file_write_iter(iocb, from);
1380 cl_env_put(env, &refcheck);
1382 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1387 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1388 size_t count, loff_t *ppos)
1391 struct iovec iov = { .iov_base = (void __user *)buf,
1393 struct kiocb *kiocb;
1398 env = cl_env_get(&refcheck);
1400 RETURN(PTR_ERR(env));
1402 kiocb = &ll_env_info(env)->lti_kiocb;
1403 init_sync_kiocb(kiocb, file);
1404 kiocb->ki_pos = *ppos;
1405 #ifdef HAVE_KIOCB_KI_LEFT
1406 kiocb->ki_left = count;
1407 #elif defined(HAVE_KI_NBYTES)
1408 kiocb->ki_nbytes = count;
1411 result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
1412 *ppos = kiocb->ki_pos;
1414 cl_env_put(env, &refcheck);
1417 #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
1420 * Send file content (through pagecache) somewhere with helper
1422 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1423 struct pipe_inode_info *pipe, size_t count,
1427 struct vvp_io_args *args;
1432 env = cl_env_get(&refcheck);
1434 RETURN(PTR_ERR(env));
1436 args = ll_env_args(env, IO_SPLICE);
1437 args->u.splice.via_pipe = pipe;
1438 args->u.splice.via_flags = flags;
1440 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1441 cl_env_put(env, &refcheck);
1445 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1446 __u64 flags, struct lov_user_md *lum,
1449 struct lookup_intent oit = {
1451 .it_flags = flags | MDS_OPEN_BY_FID,
1456 ll_inode_size_lock(inode);
1457 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1459 GOTO(out_unlock, rc);
1461 ll_release_openhandle(file->f_path.dentry, &oit);
1464 ll_inode_size_unlock(inode);
1465 ll_intent_release(&oit);
1466 cl_lov_delay_create_clear(&file->f_flags);
1471 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1472 struct lov_mds_md **lmmp, int *lmm_size,
1473 struct ptlrpc_request **request)
1475 struct ll_sb_info *sbi = ll_i2sbi(inode);
1476 struct mdt_body *body;
1477 struct lov_mds_md *lmm = NULL;
1478 struct ptlrpc_request *req = NULL;
1479 struct md_op_data *op_data;
1482 rc = ll_get_default_mdsize(sbi, &lmmsize);
1486 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1487 strlen(filename), lmmsize,
1488 LUSTRE_OPC_ANY, NULL);
1489 if (IS_ERR(op_data))
1490 RETURN(PTR_ERR(op_data));
1492 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1493 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1494 ll_finish_md_op_data(op_data);
1496 CDEBUG(D_INFO, "md_getattr_name failed "
1497 "on %s: rc %d\n", filename, rc);
1501 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1502 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1504 lmmsize = body->mbo_eadatasize;
1506 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1508 GOTO(out, rc = -ENODATA);
1511 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1512 LASSERT(lmm != NULL);
1514 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1515 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1516 GOTO(out, rc = -EPROTO);
1520 * This is coming from the MDS, so is probably in
1521 * little endian. We convert it to host endian before
1522 * passing it to userspace.
1524 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1527 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1528 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1531 /* if function called for directory - we should
1532 * avoid swab not existent lsm objects */
1533 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1534 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1535 if (S_ISREG(body->mbo_mode))
1536 lustre_swab_lov_user_md_objects(
1537 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1539 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1540 lustre_swab_lov_user_md_v3(
1541 (struct lov_user_md_v3 *)lmm);
1542 if (S_ISREG(body->mbo_mode))
1543 lustre_swab_lov_user_md_objects(
1544 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1551 *lmm_size = lmmsize;
1556 static int ll_lov_setea(struct inode *inode, struct file *file,
1559 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1560 struct lov_user_md *lump;
1561 int lum_size = sizeof(struct lov_user_md) +
1562 sizeof(struct lov_user_ost_data);
1566 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1569 OBD_ALLOC_LARGE(lump, lum_size);
1573 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1574 OBD_FREE_LARGE(lump, lum_size);
1578 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1580 OBD_FREE_LARGE(lump, lum_size);
1584 static int ll_file_getstripe(struct inode *inode,
1585 struct lov_user_md __user *lum)
1592 env = cl_env_get(&refcheck);
1594 RETURN(PTR_ERR(env));
1596 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1597 cl_env_put(env, &refcheck);
1601 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1604 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1605 struct lov_user_md *klum;
1607 __u64 flags = FMODE_WRITE;
1610 rc = ll_copy_user_md(lum, &klum);
1615 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1619 put_user(0, &lum->lmm_stripe_count);
1621 ll_layout_refresh(inode, &gen);
1622 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1625 OBD_FREE(klum, lum_size);
1630 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1632 struct ll_inode_info *lli = ll_i2info(inode);
1633 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1634 struct ll_grouplock grouplock;
1639 CWARN("group id for group lock must not be 0\n");
1643 if (ll_file_nolock(file))
1644 RETURN(-EOPNOTSUPP);
1646 spin_lock(&lli->lli_lock);
1647 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1648 CWARN("group lock already existed with gid %lu\n",
1649 fd->fd_grouplock.lg_gid);
1650 spin_unlock(&lli->lli_lock);
1653 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1654 spin_unlock(&lli->lli_lock);
1656 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1657 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1661 spin_lock(&lli->lli_lock);
1662 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1663 spin_unlock(&lli->lli_lock);
1664 CERROR("another thread just won the race\n");
1665 cl_put_grouplock(&grouplock);
1669 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1670 fd->fd_grouplock = grouplock;
1671 spin_unlock(&lli->lli_lock);
1673 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1677 static int ll_put_grouplock(struct inode *inode, struct file *file,
1680 struct ll_inode_info *lli = ll_i2info(inode);
1681 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1682 struct ll_grouplock grouplock;
1685 spin_lock(&lli->lli_lock);
1686 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1687 spin_unlock(&lli->lli_lock);
1688 CWARN("no group lock held\n");
1692 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1694 if (fd->fd_grouplock.lg_gid != arg) {
1695 CWARN("group lock %lu doesn't match current id %lu\n",
1696 arg, fd->fd_grouplock.lg_gid);
1697 spin_unlock(&lli->lli_lock);
1701 grouplock = fd->fd_grouplock;
1702 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1703 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1704 spin_unlock(&lli->lli_lock);
1706 cl_put_grouplock(&grouplock);
1707 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1712 * Close inode open handle
1714 * \param dentry [in] dentry which contains the inode
1715 * \param it [in,out] intent which contains open info and result
1718 * \retval <0 failure
1720 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1722 struct inode *inode = dentry->d_inode;
1723 struct obd_client_handle *och;
1729 /* Root ? Do nothing. */
1730 if (dentry->d_inode->i_sb->s_root == dentry)
1733 /* No open handle to close? Move away */
1734 if (!it_disposition(it, DISP_OPEN_OPEN))
1737 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1739 OBD_ALLOC(och, sizeof(*och));
1741 GOTO(out, rc = -ENOMEM);
1743 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1745 rc = ll_close_inode_openhandle(inode, och, 0, NULL);
1747 /* this one is in place of ll_file_open */
1748 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1749 ptlrpc_req_finished(it->d.lustre.it_data);
1750 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1756 * Get size for inode for which FIEMAP mapping is requested.
1757 * Make the FIEMAP get_info call and returns the result.
1758 * \param fiemap kernel buffer to hold extens
1759 * \param num_bytes kernel buffer size
1761 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1767 struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, };
1770 /* Checks for fiemap flags */
1771 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1772 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1776 /* Check for FIEMAP_FLAG_SYNC */
1777 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1778 rc = filemap_fdatawrite(inode->i_mapping);
1783 env = cl_env_get(&refcheck);
1785 RETURN(PTR_ERR(env));
1787 if (i_size_read(inode) == 0) {
1788 rc = ll_glimpse_size(inode);
1793 fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1794 obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE);
1795 obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid);
1797 /* If filesize is 0, then there would be no objects for mapping */
1798 if (fmkey.lfik_oa.o_size == 0) {
1799 fiemap->fm_mapped_extents = 0;
1803 fmkey.lfik_fiemap = *fiemap;
1805 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1806 &fmkey, fiemap, &num_bytes);
1808 cl_env_put(env, &refcheck);
1812 int ll_fid2path(struct inode *inode, void __user *arg)
1814 struct obd_export *exp = ll_i2mdexp(inode);
1815 const struct getinfo_fid2path __user *gfin = arg;
1817 struct getinfo_fid2path *gfout;
1823 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1824 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1827 /* Only need to get the buflen */
1828 if (get_user(pathlen, &gfin->gf_pathlen))
1831 if (pathlen > PATH_MAX)
1834 outsize = sizeof(*gfout) + pathlen;
1835 OBD_ALLOC(gfout, outsize);
1839 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1840 GOTO(gf_free, rc = -EFAULT);
1842 /* Call mdc_iocontrol */
1843 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1847 if (copy_to_user(arg, gfout, outsize))
1851 OBD_FREE(gfout, outsize);
1856 * Read the data_version for inode.
1858 * This value is computed using stripe object version on OST.
1859 * Version is computed using server side locking.
1861 * @param flags if do sync on the OST side;
1863 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1864 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1866 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1868 struct cl_object *obj = ll_i2info(inode)->lli_clob;
1876 /* If no file object initialized, we consider its version is 0. */
1882 env = cl_env_get(&refcheck);
1884 RETURN(PTR_ERR(env));
1886 io = vvp_env_thread_io(env);
1888 io->u.ci_data_version.dv_data_version = 0;
1889 io->u.ci_data_version.dv_flags = flags;
1892 if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
1893 result = cl_io_loop(env, io);
1895 result = io->ci_result;
1897 *data_version = io->u.ci_data_version.dv_data_version;
1899 cl_io_fini(env, io);
1901 if (unlikely(io->ci_need_restart))
1904 cl_env_put(env, &refcheck);
1910 * Trigger a HSM release request for the provided inode.
1912 int ll_hsm_release(struct inode *inode)
1914 struct cl_env_nest nest;
1916 struct obd_client_handle *och = NULL;
1917 __u64 data_version = 0;
1921 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1922 ll_get_fsname(inode->i_sb, NULL, 0),
1923 PFID(&ll_i2info(inode)->lli_fid));
1925 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1927 GOTO(out, rc = PTR_ERR(och));
1929 /* Grab latest data_version and [am]time values */
1930 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1934 env = cl_env_nested_get(&nest);
1936 GOTO(out, rc = PTR_ERR(env));
1938 ll_merge_attr(env, inode);
1939 cl_env_nested_put(&nest, env);
1941 /* Release the file.
1942 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1943 * we still need it to pack l_remote_handle to MDT. */
1944 rc = ll_close_inode_openhandle(inode, och, MDS_HSM_RELEASE,
1950 if (och != NULL && !IS_ERR(och)) /* close the file */
1951 ll_lease_close(och, inode, NULL);
1956 struct ll_swap_stack {
1959 struct inode *inode1;
1960 struct inode *inode2;
1965 static int ll_swap_layouts(struct file *file1, struct file *file2,
1966 struct lustre_swap_layouts *lsl)
1968 struct mdc_swap_layouts msl;
1969 struct md_op_data *op_data;
1972 struct ll_swap_stack *llss = NULL;
1975 OBD_ALLOC_PTR(llss);
1979 llss->inode1 = file1->f_path.dentry->d_inode;
1980 llss->inode2 = file2->f_path.dentry->d_inode;
1982 rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
1986 /* we use 2 bool because it is easier to swap than 2 bits */
1987 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1988 llss->check_dv1 = true;
1990 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1991 llss->check_dv2 = true;
1993 /* we cannot use lsl->sl_dvX directly because we may swap them */
1994 llss->dv1 = lsl->sl_dv1;
1995 llss->dv2 = lsl->sl_dv2;
1997 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1998 if (rc == 0) /* same file, done! */
2001 if (rc < 0) { /* sequentialize it */
2002 swap(llss->inode1, llss->inode2);
2004 swap(llss->dv1, llss->dv2);
2005 swap(llss->check_dv1, llss->check_dv2);
2009 if (gid != 0) { /* application asks to flush dirty cache */
2010 rc = ll_get_grouplock(llss->inode1, file1, gid);
2014 rc = ll_get_grouplock(llss->inode2, file2, gid);
2016 ll_put_grouplock(llss->inode1, file1, gid);
2021 /* ultimate check, before swaping the layouts we check if
2022 * dataversion has changed (if requested) */
2023 if (llss->check_dv1) {
2024 rc = ll_data_version(llss->inode1, &dv, 0);
2027 if (dv != llss->dv1)
2028 GOTO(putgl, rc = -EAGAIN);
2031 if (llss->check_dv2) {
2032 rc = ll_data_version(llss->inode2, &dv, 0);
2035 if (dv != llss->dv2)
2036 GOTO(putgl, rc = -EAGAIN);
2039 /* struct md_op_data is used to send the swap args to the mdt
2040 * only flags is missing, so we use struct mdc_swap_layouts
2041 * through the md_op_data->op_data */
2042 /* flags from user space have to be converted before they are send to
2043 * server, no flag is sent today, they are only used on the client */
2046 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2047 0, LUSTRE_OPC_ANY, &msl);
2048 if (IS_ERR(op_data))
2049 GOTO(free, rc = PTR_ERR(op_data));
2051 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2052 sizeof(*op_data), op_data, NULL);
2053 ll_finish_md_op_data(op_data);
2060 ll_put_grouplock(llss->inode2, file2, gid);
2061 ll_put_grouplock(llss->inode1, file1, gid);
2071 int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2073 struct md_op_data *op_data;
2077 /* Detect out-of range masks */
2078 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2081 /* Non-root users are forbidden to set or clear flags which are
2082 * NOT defined in HSM_USER_MASK. */
2083 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2084 !cfs_capable(CFS_CAP_SYS_ADMIN))
2087 /* Detect out-of range archive id */
2088 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2089 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2092 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2093 LUSTRE_OPC_ANY, hss);
2094 if (IS_ERR(op_data))
2095 RETURN(PTR_ERR(op_data));
2097 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2098 sizeof(*op_data), op_data, NULL);
2100 ll_finish_md_op_data(op_data);
2105 static int ll_hsm_import(struct inode *inode, struct file *file,
2106 struct hsm_user_import *hui)
2108 struct hsm_state_set *hss = NULL;
2109 struct iattr *attr = NULL;
2113 if (!S_ISREG(inode->i_mode))
2119 GOTO(out, rc = -ENOMEM);
2121 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2122 hss->hss_archive_id = hui->hui_archive_id;
2123 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2124 rc = ll_hsm_state_set(inode, hss);
2128 OBD_ALLOC_PTR(attr);
2130 GOTO(out, rc = -ENOMEM);
2132 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2133 attr->ia_mode |= S_IFREG;
2134 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2135 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2136 attr->ia_size = hui->hui_size;
2137 attr->ia_mtime.tv_sec = hui->hui_mtime;
2138 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2139 attr->ia_atime.tv_sec = hui->hui_atime;
2140 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2142 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2143 ATTR_UID | ATTR_GID |
2144 ATTR_MTIME | ATTR_MTIME_SET |
2145 ATTR_ATIME | ATTR_ATIME_SET;
2147 mutex_lock(&inode->i_mutex);
2149 rc = ll_setattr_raw(file->f_path.dentry, attr, true);
2153 mutex_unlock(&inode->i_mutex);
2165 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2167 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2168 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2171 static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu)
2173 struct inode *inode = file->f_path.dentry->d_inode;
2175 .ia_valid = ATTR_ATIME | ATTR_ATIME_SET |
2176 ATTR_MTIME | ATTR_MTIME_SET |
2177 ATTR_CTIME | ATTR_CTIME_SET,
2179 .tv_sec = lfu->lfu_atime_sec,
2180 .tv_nsec = lfu->lfu_atime_nsec,
2183 .tv_sec = lfu->lfu_mtime_sec,
2184 .tv_nsec = lfu->lfu_mtime_nsec,
2187 .tv_sec = lfu->lfu_ctime_sec,
2188 .tv_nsec = lfu->lfu_ctime_nsec,
2194 if (!capable(CAP_SYS_ADMIN))
2197 if (!S_ISREG(inode->i_mode))
2200 mutex_lock(&inode->i_mutex);
2201 rc = ll_setattr_raw(file->f_path.dentry, &ia, false);
2202 mutex_unlock(&inode->i_mutex);
2208 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2210 struct inode *inode = file->f_path.dentry->d_inode;
2211 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2215 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2216 PFID(ll_inode2fid(inode)), inode, cmd);
2217 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2219 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2220 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2224 case LL_IOC_GETFLAGS:
2225 /* Get the current value of the file flags */
2226 return put_user(fd->fd_flags, (int __user *)arg);
2227 case LL_IOC_SETFLAGS:
2228 case LL_IOC_CLRFLAGS:
2229 /* Set or clear specific file flags */
2230 /* XXX This probably needs checks to ensure the flags are
2231 * not abused, and to handle any flag side effects.
2233 if (get_user(flags, (int __user *) arg))
2236 if (cmd == LL_IOC_SETFLAGS) {
2237 if ((flags & LL_FILE_IGNORE_LOCK) &&
2238 !(file->f_flags & O_DIRECT)) {
2239 CERROR("%s: unable to disable locking on "
2240 "non-O_DIRECT file\n", current->comm);
2244 fd->fd_flags |= flags;
2246 fd->fd_flags &= ~flags;
2249 case LL_IOC_LOV_SETSTRIPE:
2250 RETURN(ll_lov_setstripe(inode, file, arg));
2251 case LL_IOC_LOV_SETEA:
2252 RETURN(ll_lov_setea(inode, file, arg));
2253 case LL_IOC_LOV_SWAP_LAYOUTS: {
2255 struct lustre_swap_layouts lsl;
2257 if (copy_from_user(&lsl, (char __user *)arg,
2258 sizeof(struct lustre_swap_layouts)))
2261 if ((file->f_flags & O_ACCMODE) == O_RDONLY)
2264 file2 = fget(lsl.sl_fd);
2268 /* O_WRONLY or O_RDWR */
2269 if ((file2->f_flags & O_ACCMODE) == O_RDONLY)
2270 GOTO(out, rc = -EPERM);
2272 if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
2273 struct inode *inode2;
2274 struct ll_inode_info *lli;
2275 struct obd_client_handle *och = NULL;
2277 if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE)
2278 GOTO(out, rc = -EINVAL);
2280 lli = ll_i2info(inode);
2281 mutex_lock(&lli->lli_och_mutex);
2282 if (fd->fd_lease_och != NULL) {
2283 och = fd->fd_lease_och;
2284 fd->fd_lease_och = NULL;
2286 mutex_unlock(&lli->lli_och_mutex);
2288 GOTO(out, rc = -ENOLCK);
2289 inode2 = file2->f_path.dentry->d_inode;
2290 rc = ll_swap_layouts_close(och, inode, inode2);
2292 rc = ll_swap_layouts(file, file2, &lsl);
2298 case LL_IOC_LOV_GETSTRIPE:
2299 RETURN(ll_file_getstripe(inode,
2300 (struct lov_user_md __user *)arg));
2301 case FSFILT_IOC_GETFLAGS:
2302 case FSFILT_IOC_SETFLAGS:
2303 RETURN(ll_iocontrol(inode, file, cmd, arg));
2304 case FSFILT_IOC_GETVERSION_OLD:
2305 case FSFILT_IOC_GETVERSION:
2306 RETURN(put_user(inode->i_generation, (int __user *)arg));
2307 case LL_IOC_GROUP_LOCK:
2308 RETURN(ll_get_grouplock(inode, file, arg));
2309 case LL_IOC_GROUP_UNLOCK:
2310 RETURN(ll_put_grouplock(inode, file, arg));
2311 case IOC_OBD_STATFS:
2312 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2314 /* We need to special case any other ioctls we want to handle,
2315 * to send them to the MDS/OST as appropriate and to properly
2316 * network encode the arg field.
2317 case FSFILT_IOC_SETVERSION_OLD:
2318 case FSFILT_IOC_SETVERSION:
2320 case LL_IOC_FLUSHCTX:
2321 RETURN(ll_flush_ctx(inode));
2322 case LL_IOC_PATH2FID: {
2323 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2324 sizeof(struct lu_fid)))
2329 case LL_IOC_GETPARENT:
2330 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2332 case OBD_IOC_FID2PATH:
2333 RETURN(ll_fid2path(inode, (void __user *)arg));
2334 case LL_IOC_DATA_VERSION: {
2335 struct ioc_data_version idv;
2338 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2341 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2342 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2345 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2351 case LL_IOC_GET_MDTIDX: {
2354 mdtidx = ll_get_mdt_idx(inode);
2358 if (put_user((int)mdtidx, (int __user *)arg))
2363 case OBD_IOC_GETDTNAME:
2364 case OBD_IOC_GETMDNAME:
2365 RETURN(ll_get_obd_name(inode, cmd, arg));
2366 case LL_IOC_HSM_STATE_GET: {
2367 struct md_op_data *op_data;
2368 struct hsm_user_state *hus;
2375 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2376 LUSTRE_OPC_ANY, hus);
2377 if (IS_ERR(op_data)) {
2379 RETURN(PTR_ERR(op_data));
2382 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2385 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2388 ll_finish_md_op_data(op_data);
2392 case LL_IOC_HSM_STATE_SET: {
2393 struct hsm_state_set *hss;
2400 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2405 rc = ll_hsm_state_set(inode, hss);
2410 case LL_IOC_HSM_ACTION: {
2411 struct md_op_data *op_data;
2412 struct hsm_current_action *hca;
2419 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2420 LUSTRE_OPC_ANY, hca);
2421 if (IS_ERR(op_data)) {
2423 RETURN(PTR_ERR(op_data));
2426 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2429 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2432 ll_finish_md_op_data(op_data);
2436 case LL_IOC_SET_LEASE: {
2437 struct ll_inode_info *lli = ll_i2info(inode);
2438 struct obd_client_handle *och = NULL;
2443 case LL_LEASE_WRLCK:
2444 if (!(file->f_mode & FMODE_WRITE))
2446 fmode = FMODE_WRITE;
2448 case LL_LEASE_RDLCK:
2449 if (!(file->f_mode & FMODE_READ))
2453 case LL_LEASE_UNLCK:
2454 mutex_lock(&lli->lli_och_mutex);
2455 if (fd->fd_lease_och != NULL) {
2456 och = fd->fd_lease_och;
2457 fd->fd_lease_och = NULL;
2459 mutex_unlock(&lli->lli_och_mutex);
2464 fmode = och->och_flags;
2465 rc = ll_lease_close(och, inode, &lease_broken);
2472 RETURN(ll_lease_type_from_fmode(fmode));
2477 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2479 /* apply for lease */
2480 och = ll_lease_open(inode, file, fmode, 0);
2482 RETURN(PTR_ERR(och));
2485 mutex_lock(&lli->lli_och_mutex);
2486 if (fd->fd_lease_och == NULL) {
2487 fd->fd_lease_och = och;
2490 mutex_unlock(&lli->lli_och_mutex);
2492 /* impossible now that only excl is supported for now */
2493 ll_lease_close(och, inode, &lease_broken);
2498 case LL_IOC_GET_LEASE: {
2499 struct ll_inode_info *lli = ll_i2info(inode);
2500 struct ldlm_lock *lock = NULL;
2503 mutex_lock(&lli->lli_och_mutex);
2504 if (fd->fd_lease_och != NULL) {
2505 struct obd_client_handle *och = fd->fd_lease_och;
2507 lock = ldlm_handle2lock(&och->och_lease_handle);
2509 lock_res_and_lock(lock);
2510 if (!ldlm_is_cancel(lock))
2511 fmode = och->och_flags;
2513 unlock_res_and_lock(lock);
2514 LDLM_LOCK_PUT(lock);
2517 mutex_unlock(&lli->lli_och_mutex);
2519 RETURN(ll_lease_type_from_fmode(fmode));
2521 case LL_IOC_HSM_IMPORT: {
2522 struct hsm_user_import *hui;
2528 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2533 rc = ll_hsm_import(inode, file, hui);
2538 case LL_IOC_FUTIMES_3: {
2539 struct ll_futimes_3 lfu;
2541 if (copy_from_user(&lfu,
2542 (const struct ll_futimes_3 __user *)arg,
2546 RETURN(ll_file_futimes_3(file, &lfu));
2552 ll_iocontrol_call(inode, file, cmd, arg, &err))
2555 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2556 (void __user *)arg));
2561 #ifndef HAVE_FILE_LLSEEK_SIZE
2562 static inline loff_t
2563 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2565 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2567 if (offset > maxsize)
2570 if (offset != file->f_pos) {
2571 file->f_pos = offset;
2572 file->f_version = 0;
2578 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2579 loff_t maxsize, loff_t eof)
2581 struct inode *inode = file->f_path.dentry->d_inode;
2589 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2590 * position-querying operation. Avoid rewriting the "same"
2591 * f_pos value back to the file because a concurrent read(),
2592 * write() or lseek() might have altered it
2597 * f_lock protects against read/modify/write race with other
2598 * SEEK_CURs. Note that parallel writes and reads behave
2601 mutex_lock(&inode->i_mutex);
2602 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2603 mutex_unlock(&inode->i_mutex);
2607 * In the generic case the entire file is data, so as long as
2608 * offset isn't at the end of the file then the offset is data.
2615 * There is a virtual hole at the end of the file, so as long as
2616 * offset isn't i_size or larger, return i_size.
2624 return llseek_execute(file, offset, maxsize);
2628 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2630 struct inode *inode = file->f_path.dentry->d_inode;
2631 loff_t retval, eof = 0;
2634 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2635 (origin == SEEK_CUR) ? file->f_pos : 0);
2636 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2637 PFID(ll_inode2fid(inode)), inode, retval, retval,
2639 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2641 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2642 retval = ll_glimpse_size(inode);
2645 eof = i_size_read(inode);
2648 retval = ll_generic_file_llseek_size(file, offset, origin,
2649 ll_file_maxbytes(inode), eof);
2653 static int ll_flush(struct file *file, fl_owner_t id)
2655 struct inode *inode = file->f_path.dentry->d_inode;
2656 struct ll_inode_info *lli = ll_i2info(inode);
2657 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2660 LASSERT(!S_ISDIR(inode->i_mode));
2662 /* catch async errors that were recorded back when async writeback
2663 * failed for pages in this mapping. */
2664 rc = lli->lli_async_rc;
2665 lli->lli_async_rc = 0;
2666 if (lli->lli_clob != NULL) {
2667 err = lov_read_and_clear_async_rc(lli->lli_clob);
2672 /* The application has been told write failure already.
2673 * Do not report failure again. */
2674 if (fd->fd_write_failed)
2676 return rc ? -EIO : 0;
2680 * Called to make sure a portion of file has been written out.
2681 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2683 * Return how many pages have been written.
2685 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2686 enum cl_fsync_mode mode, int ignore_layout)
2688 struct cl_env_nest nest;
2691 struct cl_fsync_io *fio;
2695 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2696 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2699 env = cl_env_nested_get(&nest);
2701 RETURN(PTR_ERR(env));
2703 io = vvp_env_thread_io(env);
2704 io->ci_obj = ll_i2info(inode)->lli_clob;
2705 io->ci_ignore_layout = ignore_layout;
2707 /* initialize parameters for sync */
2708 fio = &io->u.ci_fsync;
2709 fio->fi_start = start;
2711 fio->fi_fid = ll_inode2fid(inode);
2712 fio->fi_mode = mode;
2713 fio->fi_nr_written = 0;
2715 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2716 result = cl_io_loop(env, io);
2718 result = io->ci_result;
2720 result = fio->fi_nr_written;
2721 cl_io_fini(env, io);
2722 cl_env_nested_put(&nest, env);
2728 * When dentry is provided (the 'else' case), *file->f_path.dentry may be
2729 * null and dentry must be used directly rather than pulled from
2730 * *file->f_path.dentry as is done otherwise.
2733 #ifdef HAVE_FILE_FSYNC_4ARGS
2734 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2736 struct dentry *dentry = file->f_path.dentry;
2737 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2738 int ll_fsync(struct file *file, int datasync)
2740 struct dentry *dentry = file->f_path.dentry;
2742 loff_t end = LLONG_MAX;
2744 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2747 loff_t end = LLONG_MAX;
2749 struct inode *inode = dentry->d_inode;
2750 struct ll_inode_info *lli = ll_i2info(inode);
2751 struct ptlrpc_request *req;
2755 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2756 PFID(ll_inode2fid(inode)), inode);
2757 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2759 #ifdef HAVE_FILE_FSYNC_4ARGS
2760 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2761 mutex_lock(&inode->i_mutex);
2763 /* fsync's caller has already called _fdata{sync,write}, we want
2764 * that IO to finish before calling the osc and mdc sync methods */
2765 rc = filemap_fdatawait(inode->i_mapping);
2768 /* catch async errors that were recorded back when async writeback
2769 * failed for pages in this mapping. */
2770 if (!S_ISDIR(inode->i_mode)) {
2771 err = lli->lli_async_rc;
2772 lli->lli_async_rc = 0;
2775 err = lov_read_and_clear_async_rc(lli->lli_clob);
2780 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req);
2784 ptlrpc_req_finished(req);
2786 if (S_ISREG(inode->i_mode)) {
2787 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2789 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2790 if (rc == 0 && err < 0)
2793 fd->fd_write_failed = true;
2795 fd->fd_write_failed = false;
2798 #ifdef HAVE_FILE_FSYNC_4ARGS
2799 mutex_unlock(&inode->i_mutex);
2805 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2807 struct inode *inode = file->f_path.dentry->d_inode;
2808 struct ll_sb_info *sbi = ll_i2sbi(inode);
2809 struct ldlm_enqueue_info einfo = {
2810 .ei_type = LDLM_FLOCK,
2811 .ei_cb_cp = ldlm_flock_completion_ast,
2812 .ei_cbdata = file_lock,
2814 struct md_op_data *op_data;
2815 struct lustre_handle lockh = { 0 };
2816 union ldlm_policy_data flock = { { 0 } };
2817 int fl_type = file_lock->fl_type;
2823 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2824 PFID(ll_inode2fid(inode)), file_lock);
2826 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2828 if (file_lock->fl_flags & FL_FLOCK) {
2829 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2830 /* flocks are whole-file locks */
2831 flock.l_flock.end = OFFSET_MAX;
2832 /* For flocks owner is determined by the local file desctiptor*/
2833 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2834 } else if (file_lock->fl_flags & FL_POSIX) {
2835 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2836 flock.l_flock.start = file_lock->fl_start;
2837 flock.l_flock.end = file_lock->fl_end;
2841 flock.l_flock.pid = file_lock->fl_pid;
2843 /* Somewhat ugly workaround for svc lockd.
2844 * lockd installs custom fl_lmops->lm_compare_owner that checks
2845 * for the fl_owner to be the same (which it always is on local node
2846 * I guess between lockd processes) and then compares pid.
2847 * As such we assign pid to the owner field to make it all work,
2848 * conflict with normal locks is unlikely since pid space and
2849 * pointer space for current->files are not intersecting */
2850 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2851 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2855 einfo.ei_mode = LCK_PR;
2858 /* An unlock request may or may not have any relation to
2859 * existing locks so we may not be able to pass a lock handle
2860 * via a normal ldlm_lock_cancel() request. The request may even
2861 * unlock a byte range in the middle of an existing lock. In
2862 * order to process an unlock request we need all of the same
2863 * information that is given with a normal read or write record
2864 * lock request. To avoid creating another ldlm unlock (cancel)
2865 * message we'll treat a LCK_NL flock request as an unlock. */
2866 einfo.ei_mode = LCK_NL;
2869 einfo.ei_mode = LCK_PW;
2872 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2887 flags = LDLM_FL_BLOCK_NOWAIT;
2893 flags = LDLM_FL_TEST_LOCK;
2896 CERROR("unknown fcntl lock command: %d\n", cmd);
2900 /* Save the old mode so that if the mode in the lock changes we
2901 * can decrement the appropriate reader or writer refcount. */
2902 file_lock->fl_type = einfo.ei_mode;
2904 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2905 LUSTRE_OPC_ANY, NULL);
2906 if (IS_ERR(op_data))
2907 RETURN(PTR_ERR(op_data));
2909 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2910 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2911 flock.l_flock.pid, flags, einfo.ei_mode,
2912 flock.l_flock.start, flock.l_flock.end);
2914 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2917 /* Restore the file lock type if not TEST lock. */
2918 if (!(flags & LDLM_FL_TEST_LOCK))
2919 file_lock->fl_type = fl_type;
2921 if ((file_lock->fl_flags & FL_FLOCK) &&
2922 (rc == 0 || file_lock->fl_type == F_UNLCK))
2923 rc2 = flock_lock_file_wait(file, file_lock);
2924 if ((file_lock->fl_flags & FL_POSIX) &&
2925 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2926 !(flags & LDLM_FL_TEST_LOCK))
2927 rc2 = posix_lock_file_wait(file, file_lock);
2929 if (rc2 && file_lock->fl_type != F_UNLCK) {
2930 einfo.ei_mode = LCK_NL;
2931 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2936 ll_finish_md_op_data(op_data);
2941 int ll_get_fid_by_name(struct inode *parent, const char *name,
2942 int namelen, struct lu_fid *fid,
2943 struct inode **inode)
2945 struct md_op_data *op_data = NULL;
2946 struct mdt_body *body;
2947 struct ptlrpc_request *req;
2951 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2952 LUSTRE_OPC_ANY, NULL);
2953 if (IS_ERR(op_data))
2954 RETURN(PTR_ERR(op_data));
2956 op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
2957 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2958 ll_finish_md_op_data(op_data);
2962 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2964 GOTO(out_req, rc = -EFAULT);
2966 *fid = body->mbo_fid1;
2969 rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
2971 ptlrpc_req_finished(req);
2975 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2976 const char *name, int namelen)
2978 struct dentry *dchild = NULL;
2979 struct inode *child_inode = NULL;
2980 struct md_op_data *op_data;
2981 struct ptlrpc_request *request = NULL;
2982 struct obd_client_handle *och = NULL;
2984 struct mdt_body *body;
2986 __u64 data_version = 0;
2989 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2990 name, PFID(ll_inode2fid(parent)), mdtidx);
2992 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2993 0, LUSTRE_OPC_ANY, NULL);
2994 if (IS_ERR(op_data))
2995 RETURN(PTR_ERR(op_data));
2997 /* Get child FID first */
2998 qstr.hash = full_name_hash(name, namelen);
3001 dchild = d_lookup(file->f_path.dentry, &qstr);
3002 if (dchild != NULL) {
3003 if (dchild->d_inode != NULL)
3004 child_inode = igrab(dchild->d_inode);
3008 if (child_inode == NULL) {
3009 rc = ll_get_fid_by_name(parent, name, namelen,
3010 &op_data->op_fid3, &child_inode);
3015 if (child_inode == NULL)
3016 GOTO(out_free, rc = -EINVAL);
3018 mutex_lock(&child_inode->i_mutex);
3019 op_data->op_fid3 = *ll_inode2fid(child_inode);
3020 if (!fid_is_sane(&op_data->op_fid3)) {
3021 CERROR("%s: migrate %s, but FID "DFID" is insane\n",
3022 ll_get_fsname(parent->i_sb, NULL, 0), name,
3023 PFID(&op_data->op_fid3));
3024 GOTO(out_unlock, rc = -EINVAL);
3027 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
3029 GOTO(out_unlock, rc);
3032 CDEBUG(D_INFO, "%s: "DFID" is already on MDT%04x\n", name,
3033 PFID(&op_data->op_fid3), mdtidx);
3034 GOTO(out_unlock, rc = 0);
3037 if (S_ISREG(child_inode->i_mode)) {
3038 och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
3042 GOTO(out_unlock, rc);
3045 rc = ll_data_version(child_inode, &data_version,
3048 GOTO(out_close, rc);
3050 op_data->op_handle = och->och_fh;
3051 op_data->op_data = och->och_mod;
3052 op_data->op_data_version = data_version;
3053 op_data->op_lease_handle = och->och_lease_handle;
3054 op_data->op_bias |= MDS_RENAME_MIGRATE;
3057 op_data->op_mds = mdtidx;
3058 op_data->op_cli_flags = CLI_MIGRATE;
3059 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
3060 namelen, name, namelen, &request);
3062 ll_update_times(request, parent);
3064 if (request != NULL) {
3065 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
3067 ptlrpc_req_finished(request);
3068 GOTO(out_close, rc = -EPROTO);
3071 /* If the server does release layout lock, then we cleanup
3072 * the client och here, otherwise release it in out_close: */
3074 body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
3075 obd_mod_put(och->och_mod);
3076 md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp,
3078 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
3082 ptlrpc_req_finished(request);
3085 /* Try again if the file layout has changed. */
3086 if (rc == -EAGAIN && S_ISREG(child_inode->i_mode)) {
3091 if (och != NULL) /* close the file */
3092 ll_lease_close(och, child_inode, NULL);
3094 clear_nlink(child_inode);
3096 mutex_unlock(&child_inode->i_mutex);
3099 ll_finish_md_op_data(op_data);
3104 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3112 * test if some locks matching bits and l_req_mode are acquired
3113 * - bits can be in different locks
3114 * - if found clear the common lock bits in *bits
3115 * - the bits not found, are kept in *bits
3117 * \param bits [IN] searched lock bits [IN]
3118 * \param l_req_mode [IN] searched lock mode
3119 * \retval boolean, true iff all bits are found
3121 int ll_have_md_lock(struct inode *inode, __u64 *bits, enum ldlm_mode l_req_mode)
3123 struct lustre_handle lockh;
3124 union ldlm_policy_data policy;
3125 enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
3126 (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
3135 fid = &ll_i2info(inode)->lli_fid;
3136 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3137 ldlm_lockname[mode]);
3139 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3140 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3141 policy.l_inodebits.bits = *bits & (1 << i);
3142 if (policy.l_inodebits.bits == 0)
3145 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3146 &policy, mode, &lockh)) {
3147 struct ldlm_lock *lock;
3149 lock = ldlm_handle2lock(&lockh);
3152 ~(lock->l_policy_data.l_inodebits.bits);
3153 LDLM_LOCK_PUT(lock);
3155 *bits &= ~policy.l_inodebits.bits;
3162 enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
3163 struct lustre_handle *lockh, __u64 flags,
3164 enum ldlm_mode mode)
3166 union ldlm_policy_data policy = { .l_inodebits = { bits } };
3171 fid = &ll_i2info(inode)->lli_fid;
3172 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3174 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3175 fid, LDLM_IBITS, &policy, mode, lockh);
3180 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3182 /* Already unlinked. Just update nlink and return success */
3183 if (rc == -ENOENT) {
3185 /* If it is striped directory, and there is bad stripe
3186 * Let's revalidate the dentry again, instead of returning
3188 if (S_ISDIR(inode->i_mode) &&
3189 ll_i2info(inode)->lli_lsm_md != NULL)
3192 /* This path cannot be hit for regular files unless in
3193 * case of obscure races, so no need to to validate
3195 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3197 } else if (rc != 0) {
3198 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3199 "%s: revalidate FID "DFID" error: rc = %d\n",
3200 ll_get_fsname(inode->i_sb, NULL, 0),
3201 PFID(ll_inode2fid(inode)), rc);
3207 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3209 struct inode *inode = dentry->d_inode;
3210 struct ptlrpc_request *req = NULL;
3211 struct obd_export *exp;
3215 LASSERT(inode != NULL);
3217 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3218 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3220 exp = ll_i2mdexp(inode);
3222 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3223 * But under CMD case, it caused some lock issues, should be fixed
3224 * with new CMD ibits lock. See bug 12718 */
3225 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3226 struct lookup_intent oit = { .it_op = IT_GETATTR };
3227 struct md_op_data *op_data;
3229 if (ibits == MDS_INODELOCK_LOOKUP)
3230 oit.it_op = IT_LOOKUP;
3232 /* Call getattr by fid, so do not provide name at all. */
3233 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3234 dentry->d_inode, NULL, 0, 0,
3235 LUSTRE_OPC_ANY, NULL);
3236 if (IS_ERR(op_data))
3237 RETURN(PTR_ERR(op_data));
3239 rc = md_intent_lock(exp, op_data, &oit, &req,
3240 &ll_md_blocking_ast, 0);
3241 ll_finish_md_op_data(op_data);
3243 rc = ll_inode_revalidate_fini(inode, rc);
3247 rc = ll_revalidate_it_finish(req, &oit, dentry);
3249 ll_intent_release(&oit);
3253 /* Unlinked? Unhash dentry, so it is not picked up later by
3254 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3255 here to preserve get_cwd functionality on 2.6.
3257 if (!dentry->d_inode->i_nlink)
3258 d_lustre_invalidate(dentry, 0);
3260 ll_lookup_finish_locks(&oit, dentry);
3261 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3262 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3263 u64 valid = OBD_MD_FLGETATTR;
3264 struct md_op_data *op_data;
3267 if (S_ISREG(inode->i_mode)) {
3268 rc = ll_get_default_mdsize(sbi, &ealen);
3271 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3274 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3275 0, ealen, LUSTRE_OPC_ANY,
3277 if (IS_ERR(op_data))
3278 RETURN(PTR_ERR(op_data));
3280 op_data->op_valid = valid;
3281 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3282 ll_finish_md_op_data(op_data);
3284 rc = ll_inode_revalidate_fini(inode, rc);
3288 rc = ll_prep_inode(&inode, req, NULL, NULL);
3291 ptlrpc_req_finished(req);
3295 static int ll_merge_md_attr(struct inode *inode)
3297 struct cl_attr attr = { 0 };
3300 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3301 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3302 &attr, ll_md_blocking_ast);
3306 set_nlink(inode, attr.cat_nlink);
3307 inode->i_blocks = attr.cat_blocks;
3308 i_size_write(inode, attr.cat_size);
3310 ll_i2info(inode)->lli_atime = attr.cat_atime;
3311 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3312 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3318 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3320 struct inode *inode = dentry->d_inode;
3324 rc = __ll_inode_revalidate(dentry, ibits);
3328 /* if object isn't regular file, don't validate size */
3329 if (!S_ISREG(inode->i_mode)) {
3330 if (S_ISDIR(inode->i_mode) &&
3331 ll_i2info(inode)->lli_lsm_md != NULL) {
3332 rc = ll_merge_md_attr(inode);
3337 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3338 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3339 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3341 /* In case of restore, the MDT has the right size and has
3342 * already send it back without granting the layout lock,
3343 * inode is up-to-date so glimpse is useless.
3344 * Also to glimpse we need the layout, in case of a running
3345 * restore the MDT holds the layout lock so the glimpse will
3346 * block up to the end of restore (getattr will block)
3348 if (!ll_file_test_flag(ll_i2info(inode), LLIF_FILE_RESTORING))
3349 rc = ll_glimpse_size(inode);
3354 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3356 struct inode *inode = de->d_inode;
3357 struct ll_sb_info *sbi = ll_i2sbi(inode);
3358 struct ll_inode_info *lli = ll_i2info(inode);
3361 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3362 MDS_INODELOCK_LOOKUP);
3363 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3368 stat->dev = inode->i_sb->s_dev;
3369 if (ll_need_32bit_api(sbi))
3370 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3372 stat->ino = inode->i_ino;
3373 stat->mode = inode->i_mode;
3374 stat->uid = inode->i_uid;
3375 stat->gid = inode->i_gid;
3376 stat->rdev = inode->i_rdev;
3377 stat->atime = inode->i_atime;
3378 stat->mtime = inode->i_mtime;
3379 stat->ctime = inode->i_ctime;
3380 stat->blksize = 1 << inode->i_blkbits;
3382 stat->nlink = inode->i_nlink;
3383 stat->size = i_size_read(inode);
3384 stat->blocks = inode->i_blocks;
3389 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3390 __u64 start, __u64 len)
3394 struct fiemap *fiemap;
3395 unsigned int extent_count = fieinfo->fi_extents_max;
3397 num_bytes = sizeof(*fiemap) + (extent_count *
3398 sizeof(struct fiemap_extent));
3399 OBD_ALLOC_LARGE(fiemap, num_bytes);
3404 fiemap->fm_flags = fieinfo->fi_flags;
3405 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3406 fiemap->fm_start = start;
3407 fiemap->fm_length = len;
3408 if (extent_count > 0 &&
3409 copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3410 sizeof(struct fiemap_extent)) != 0)
3411 GOTO(out, rc = -EFAULT);
3413 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3415 fieinfo->fi_flags = fiemap->fm_flags;
3416 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3417 if (extent_count > 0 &&
3418 copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3419 fiemap->fm_mapped_extents *
3420 sizeof(struct fiemap_extent)) != 0)
3421 GOTO(out, rc = -EFAULT);
3423 OBD_FREE_LARGE(fiemap, num_bytes);
3427 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3429 struct ll_inode_info *lli = ll_i2info(inode);
3430 struct posix_acl *acl = NULL;
3433 spin_lock(&lli->lli_lock);
3434 /* VFS' acl_permission_check->check_acl will release the refcount */
3435 acl = posix_acl_dup(lli->lli_posix_acl);
3436 spin_unlock(&lli->lli_lock);
3441 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3443 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3444 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3446 ll_check_acl(struct inode *inode, int mask)
3449 # ifdef CONFIG_FS_POSIX_ACL
3450 struct posix_acl *acl;
3454 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3455 if (flags & IPERM_FLAG_RCU)
3458 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3463 rc = posix_acl_permission(inode, acl, mask);
3464 posix_acl_release(acl);
3467 # else /* !CONFIG_FS_POSIX_ACL */
3469 # endif /* CONFIG_FS_POSIX_ACL */
3471 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3473 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3474 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3476 # ifdef HAVE_INODE_PERMISION_2ARGS
3477 int ll_inode_permission(struct inode *inode, int mask)
3479 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3484 struct ll_sb_info *sbi;
3485 struct root_squash_info *squash;
3486 struct cred *cred = NULL;
3487 const struct cred *old_cred = NULL;
3489 bool squash_id = false;
3492 #ifdef MAY_NOT_BLOCK
3493 if (mask & MAY_NOT_BLOCK)
3495 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3496 if (flags & IPERM_FLAG_RCU)
3500 /* as root inode are NOT getting validated in lookup operation,
3501 * need to do it before permission check. */
3503 if (inode == inode->i_sb->s_root->d_inode) {
3504 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3505 MDS_INODELOCK_LOOKUP);
3510 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3511 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3513 /* squash fsuid/fsgid if needed */
3514 sbi = ll_i2sbi(inode);
3515 squash = &sbi->ll_squash;
3516 if (unlikely(squash->rsi_uid != 0 &&
3517 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3518 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3522 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3523 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3524 squash->rsi_uid, squash->rsi_gid);
3526 /* update current process's credentials
3527 * and FS capability */
3528 cred = prepare_creds();
3532 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3533 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3534 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3535 if ((1 << cap) & CFS_CAP_FS_MASK)
3536 cap_lower(cred->cap_effective, cap);
3538 old_cred = override_creds(cred);
3541 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3543 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3544 rc = lustre_check_remote_perm(inode, mask);
3546 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3548 /* restore current process's credentials and FS capability */
3550 revert_creds(old_cred);
3557 /* -o localflock - only provides locally consistent flock locks */
3558 struct file_operations ll_file_operations = {
3559 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3560 # ifdef HAVE_SYNC_READ_WRITE
3561 .read = new_sync_read,
3562 .write = new_sync_write,
3564 .read_iter = ll_file_read_iter,
3565 .write_iter = ll_file_write_iter,
3566 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3567 .read = ll_file_read,
3568 .aio_read = ll_file_aio_read,
3569 .write = ll_file_write,
3570 .aio_write = ll_file_aio_write,
3571 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3572 .unlocked_ioctl = ll_file_ioctl,
3573 .open = ll_file_open,
3574 .release = ll_file_release,
3575 .mmap = ll_file_mmap,
3576 .llseek = ll_file_seek,
3577 .splice_read = ll_file_splice_read,
3582 struct file_operations ll_file_operations_flock = {
3583 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3584 # ifdef HAVE_SYNC_READ_WRITE
3585 .read = new_sync_read,
3586 .write = new_sync_write,
3587 # endif /* HAVE_SYNC_READ_WRITE */
3588 .read_iter = ll_file_read_iter,
3589 .write_iter = ll_file_write_iter,
3590 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3591 .read = ll_file_read,
3592 .aio_read = ll_file_aio_read,
3593 .write = ll_file_write,
3594 .aio_write = ll_file_aio_write,
3595 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3596 .unlocked_ioctl = ll_file_ioctl,
3597 .open = ll_file_open,
3598 .release = ll_file_release,
3599 .mmap = ll_file_mmap,
3600 .llseek = ll_file_seek,
3601 .splice_read = ll_file_splice_read,
3604 .flock = ll_file_flock,
3605 .lock = ll_file_flock
3608 /* These are for -o noflock - to return ENOSYS on flock calls */
3609 struct file_operations ll_file_operations_noflock = {
3610 #ifdef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
3611 # ifdef HAVE_SYNC_READ_WRITE
3612 .read = new_sync_read,
3613 .write = new_sync_write,
3614 # endif /* HAVE_SYNC_READ_WRITE */
3615 .read_iter = ll_file_read_iter,
3616 .write_iter = ll_file_write_iter,
3617 #else /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3618 .read = ll_file_read,
3619 .aio_read = ll_file_aio_read,
3620 .write = ll_file_write,
3621 .aio_write = ll_file_aio_write,
3622 #endif /* HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
3623 .unlocked_ioctl = ll_file_ioctl,
3624 .open = ll_file_open,
3625 .release = ll_file_release,
3626 .mmap = ll_file_mmap,
3627 .llseek = ll_file_seek,
3628 .splice_read = ll_file_splice_read,
3631 .flock = ll_file_noflock,
3632 .lock = ll_file_noflock
3635 struct inode_operations ll_file_inode_operations = {
3636 .setattr = ll_setattr,
3637 .getattr = ll_getattr,
3638 .permission = ll_inode_permission,
3639 .setxattr = ll_setxattr,
3640 .getxattr = ll_getxattr,
3641 .listxattr = ll_listxattr,
3642 .removexattr = ll_removexattr,
3643 .fiemap = ll_fiemap,
3644 #ifdef HAVE_IOP_GET_ACL
3645 .get_acl = ll_get_acl,
3649 /* dynamic ioctl number support routins */
3650 static struct llioc_ctl_data {
3651 struct rw_semaphore ioc_sem;
3652 struct list_head ioc_head;
3654 __RWSEM_INITIALIZER(llioc.ioc_sem),
3655 LIST_HEAD_INIT(llioc.ioc_head)
3660 struct list_head iocd_list;
3661 unsigned int iocd_size;
3662 llioc_callback_t iocd_cb;
3663 unsigned int iocd_count;
3664 unsigned int iocd_cmd[0];
3667 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3670 struct llioc_data *in_data = NULL;
3673 if (cb == NULL || cmd == NULL ||
3674 count > LLIOC_MAX_CMD || count < 0)
3677 size = sizeof(*in_data) + count * sizeof(unsigned int);
3678 OBD_ALLOC(in_data, size);
3679 if (in_data == NULL)
3682 memset(in_data, 0, sizeof(*in_data));
3683 in_data->iocd_size = size;
3684 in_data->iocd_cb = cb;
3685 in_data->iocd_count = count;
3686 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3688 down_write(&llioc.ioc_sem);
3689 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3690 up_write(&llioc.ioc_sem);
3695 void ll_iocontrol_unregister(void *magic)
3697 struct llioc_data *tmp;
3702 down_write(&llioc.ioc_sem);
3703 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3705 unsigned int size = tmp->iocd_size;
3707 list_del(&tmp->iocd_list);
3708 up_write(&llioc.ioc_sem);
3710 OBD_FREE(tmp, size);
3714 up_write(&llioc.ioc_sem);
3716 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3719 EXPORT_SYMBOL(ll_iocontrol_register);
3720 EXPORT_SYMBOL(ll_iocontrol_unregister);
3722 static enum llioc_iter
3723 ll_iocontrol_call(struct inode *inode, struct file *file,
3724 unsigned int cmd, unsigned long arg, int *rcp)
3726 enum llioc_iter ret = LLIOC_CONT;
3727 struct llioc_data *data;
3728 int rc = -EINVAL, i;
3730 down_read(&llioc.ioc_sem);
3731 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3732 for (i = 0; i < data->iocd_count; i++) {
3733 if (cmd != data->iocd_cmd[i])
3736 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3740 if (ret == LLIOC_STOP)
3743 up_read(&llioc.ioc_sem);
3750 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3752 struct ll_inode_info *lli = ll_i2info(inode);
3753 struct cl_object *obj = lli->lli_clob;
3754 struct cl_env_nest nest;
3762 env = cl_env_nested_get(&nest);
3764 RETURN(PTR_ERR(env));
3766 rc = cl_conf_set(env, lli->lli_clob, conf);
3770 if (conf->coc_opc == OBJECT_CONF_SET) {
3771 struct ldlm_lock *lock = conf->coc_lock;
3772 struct cl_layout cl = {
3776 LASSERT(lock != NULL);
3777 LASSERT(ldlm_has_layout(lock));
3779 /* it can only be allowed to match after layout is
3780 * applied to inode otherwise false layout would be
3781 * seen. Applying layout shoud happen before dropping
3782 * the intent lock. */
3783 ldlm_lock_allow_match(lock);
3785 rc = cl_object_layout_get(env, obj, &cl);
3790 DFID": layout version change: %u -> %u\n",
3791 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3793 ll_layout_version_set(lli, cl.cl_layout_gen);
3797 cl_env_nested_put(&nest, env);
3802 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3803 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3806 struct ll_sb_info *sbi = ll_i2sbi(inode);
3807 struct ptlrpc_request *req;
3808 struct mdt_body *body;
3815 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3816 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3817 lock->l_lvb_data, lock->l_lvb_len);
3819 if (lock->l_lvb_data != NULL)
3822 /* if layout lock was granted right away, the layout is returned
3823 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3824 * blocked and then granted via completion ast, we have to fetch
3825 * layout here. Please note that we can't use the LVB buffer in
3826 * completion AST because it doesn't have a large enough buffer */
3827 rc = ll_get_default_mdsize(sbi, &lmmsize);
3829 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode),
3830 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3835 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3837 GOTO(out, rc = -EPROTO);
3839 lmmsize = body->mbo_eadatasize;
3840 if (lmmsize == 0) /* empty layout */
3843 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3845 GOTO(out, rc = -EFAULT);
3847 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3848 if (lvbdata == NULL)
3849 GOTO(out, rc = -ENOMEM);
3851 memcpy(lvbdata, lmm, lmmsize);
3852 lock_res_and_lock(lock);
3853 if (unlikely(lock->l_lvb_data == NULL)) {
3854 lock->l_lvb_type = LVB_T_LAYOUT;
3855 lock->l_lvb_data = lvbdata;
3856 lock->l_lvb_len = lmmsize;
3859 unlock_res_and_lock(lock);
3861 if (lvbdata != NULL)
3862 OBD_FREE_LARGE(lvbdata, lmmsize);
3867 ptlrpc_req_finished(req);
3872 * Apply the layout to the inode. Layout lock is held and will be released
3875 static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
3876 struct inode *inode)
3878 struct ll_inode_info *lli = ll_i2info(inode);
3879 struct ll_sb_info *sbi = ll_i2sbi(inode);
3880 struct ldlm_lock *lock;
3881 struct cl_object_conf conf;
3884 bool wait_layout = false;
3887 LASSERT(lustre_handle_is_used(lockh));
3889 lock = ldlm_handle2lock(lockh);
3890 LASSERT(lock != NULL);
3891 LASSERT(ldlm_has_layout(lock));
3893 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured",
3894 PFID(&lli->lli_fid), inode);
3896 /* in case this is a caching lock and reinstate with new inode */
3897 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3899 lock_res_and_lock(lock);
3900 lvb_ready = ldlm_is_lvb_ready(lock);
3901 unlock_res_and_lock(lock);
3902 /* checking lvb_ready is racy but this is okay. The worst case is
3903 * that multi processes may configure the file on the same time. */
3908 rc = ll_layout_fetch(inode, lock);
3912 /* for layout lock, lmm is stored in lock's lvb.
3913 * lvb_data is immutable if the lock is held so it's safe to access it
3916 * set layout to file. Unlikely this will fail as old layout was
3917 * surely eliminated */
3918 memset(&conf, 0, sizeof conf);
3919 conf.coc_opc = OBJECT_CONF_SET;
3920 conf.coc_inode = inode;
3921 conf.coc_lock = lock;
3922 conf.u.coc_layout.lb_buf = lock->l_lvb_data;
3923 conf.u.coc_layout.lb_len = lock->l_lvb_len;
3924 rc = ll_layout_conf(inode, &conf);
3926 /* refresh layout failed, need to wait */
3927 wait_layout = rc == -EBUSY;
3931 LDLM_LOCK_PUT(lock);
3932 ldlm_lock_decref(lockh, mode);
3934 /* wait for IO to complete if it's still being used. */
3936 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3937 ll_get_fsname(inode->i_sb, NULL, 0),
3938 PFID(&lli->lli_fid), inode);
3940 memset(&conf, 0, sizeof conf);
3941 conf.coc_opc = OBJECT_CONF_WAIT;
3942 conf.coc_inode = inode;
3943 rc = ll_layout_conf(inode, &conf);
3947 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3948 ll_get_fsname(inode->i_sb, NULL, 0),
3949 PFID(&lli->lli_fid), rc);
3954 static int ll_layout_refresh_locked(struct inode *inode)
3956 struct ll_inode_info *lli = ll_i2info(inode);
3957 struct ll_sb_info *sbi = ll_i2sbi(inode);
3958 struct md_op_data *op_data;
3959 struct lookup_intent it;
3960 struct lustre_handle lockh;
3961 enum ldlm_mode mode;
3962 struct ldlm_enqueue_info einfo = {
3963 .ei_type = LDLM_IBITS,
3965 .ei_cb_bl = &ll_md_blocking_ast,
3966 .ei_cb_cp = &ldlm_completion_ast,
3972 /* mostly layout lock is caching on the local side, so try to match
3973 * it before grabbing layout lock mutex. */
3974 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3975 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3976 if (mode != 0) { /* hit cached lock */
3977 rc = ll_layout_lock_set(&lockh, mode, inode);
3984 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3985 0, 0, LUSTRE_OPC_ANY, NULL);
3986 if (IS_ERR(op_data))
3987 RETURN(PTR_ERR(op_data));
3989 /* have to enqueue one */
3990 memset(&it, 0, sizeof(it));
3991 it.it_op = IT_LAYOUT;
3992 lockh.cookie = 0ULL;
3994 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3995 ll_get_fsname(inode->i_sb, NULL, 0),
3996 PFID(&lli->lli_fid), inode);
3998 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3999 if (it.d.lustre.it_data != NULL)
4000 ptlrpc_req_finished(it.d.lustre.it_data);
4001 it.d.lustre.it_data = NULL;
4003 ll_finish_md_op_data(op_data);
4005 mode = it.d.lustre.it_lock_mode;
4006 it.d.lustre.it_lock_mode = 0;
4007 ll_intent_drop_lock(&it);
4010 /* set lock data in case this is a new lock */
4011 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
4012 rc = ll_layout_lock_set(&lockh, mode, inode);
4021 * This function checks if there exists a LAYOUT lock on the client side,
4022 * or enqueues it if it doesn't have one in cache.
4024 * This function will not hold layout lock so it may be revoked any time after
4025 * this function returns. Any operations depend on layout should be redone
4028 * This function should be called before lov_io_init() to get an uptodate
4029 * layout version, the caller should save the version number and after IO
4030 * is finished, this function should be called again to verify that layout
4031 * is not changed during IO time.
4033 int ll_layout_refresh(struct inode *inode, __u32 *gen)
4035 struct ll_inode_info *lli = ll_i2info(inode);
4036 struct ll_sb_info *sbi = ll_i2sbi(inode);
4040 *gen = ll_layout_version_get(lli);
4041 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
4045 LASSERT(fid_is_sane(ll_inode2fid(inode)));
4046 LASSERT(S_ISREG(inode->i_mode));
4048 /* take layout lock mutex to enqueue layout lock exclusively. */
4049 mutex_lock(&lli->lli_layout_mutex);
4051 rc = ll_layout_refresh_locked(inode);
4055 *gen = ll_layout_version_get(lli);
4057 mutex_unlock(&lli->lli_layout_mutex);
4063 * This function send a restore request to the MDT
4065 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
4067 struct hsm_user_request *hur;
4071 len = sizeof(struct hsm_user_request) +
4072 sizeof(struct hsm_user_item);
4073 OBD_ALLOC(hur, len);
4077 hur->hur_request.hr_action = HUA_RESTORE;
4078 hur->hur_request.hr_archive_id = 0;
4079 hur->hur_request.hr_flags = 0;
4080 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
4081 sizeof(hur->hur_user_item[0].hui_fid));
4082 hur->hur_user_item[0].hui_extent.offset = offset;
4083 hur->hur_user_item[0].hui_extent.length = length;
4084 hur->hur_request.hr_itemcount = 1;
4085 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,