4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
50 #include <lustre_ioctl.h>
52 #include "cl_object.h"
55 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
57 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
60 static enum llioc_iter
61 ll_iocontrol_call(struct inode *inode, struct file *file,
62 unsigned int cmd, unsigned long arg, int *rcp);
64 static struct ll_file_data *ll_file_data_get(void)
66 struct ll_file_data *fd;
68 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
72 fd->fd_write_failed = false;
77 static void ll_file_data_put(struct ll_file_data *fd)
80 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
83 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
84 struct lustre_handle *fh)
86 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
87 op_data->op_attr.ia_mode = inode->i_mode;
88 op_data->op_attr.ia_atime = inode->i_atime;
89 op_data->op_attr.ia_mtime = inode->i_mtime;
90 op_data->op_attr.ia_ctime = inode->i_ctime;
91 op_data->op_attr.ia_size = i_size_read(inode);
92 op_data->op_attr_blocks = inode->i_blocks;
93 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
95 op_data->op_handle = *fh;
96 op_data->op_capa1 = ll_mdscapa_get(inode);
98 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
99 op_data->op_bias |= MDS_DATA_MODIFIED;
103 * Packs all the attributes into @op_data for the CLOSE rpc.
105 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
106 struct obd_client_handle *och)
110 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
111 ATTR_MTIME | ATTR_MTIME_SET |
112 ATTR_CTIME | ATTR_CTIME_SET;
114 if (!(och->och_flags & FMODE_WRITE))
117 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
120 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
121 ll_prep_md_op_data(op_data, inode, NULL, NULL,
122 0, 0, LUSTRE_OPC_ANY, NULL);
126 static int ll_close_inode_openhandle(struct obd_export *md_exp,
128 struct obd_client_handle *och,
129 const __u64 *data_version)
131 struct obd_export *exp = ll_i2mdexp(inode);
132 struct md_op_data *op_data;
133 struct ptlrpc_request *req = NULL;
134 struct obd_device *obd = class_exp2obd(exp);
140 * XXX: in case of LMV, is this correct to access
143 CERROR("Invalid MDC connection handle "LPX64"\n",
144 ll_i2mdexp(inode)->exp_handle.h_cookie);
148 OBD_ALLOC_PTR(op_data);
150 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
152 ll_prepare_close(inode, op_data, och);
153 if (data_version != NULL) {
154 /* Pass in data_version implies release. */
155 op_data->op_bias |= MDS_HSM_RELEASE;
156 op_data->op_data_version = *data_version;
157 op_data->op_lease_handle = och->och_lease_handle;
158 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
161 rc = md_close(md_exp, op_data, och->och_mod, &req);
163 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
164 ll_i2mdexp(inode)->exp_obd->obd_name,
165 PFID(ll_inode2fid(inode)), rc);
168 /* DATA_MODIFIED flag was successfully sent on close, cancel data
169 * modification flag. */
170 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
171 struct ll_inode_info *lli = ll_i2info(inode);
173 spin_lock(&lli->lli_lock);
174 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
175 spin_unlock(&lli->lli_lock);
178 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
179 struct mdt_body *body;
180 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
181 if (!(body->mbo_valid & OBD_MD_FLRELEASED))
185 ll_finish_md_op_data(op_data);
189 md_clear_open_replay_data(md_exp, och);
190 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
193 if (req) /* This is close request */
194 ptlrpc_req_finished(req);
198 int ll_md_real_close(struct inode *inode, fmode_t fmode)
200 struct ll_inode_info *lli = ll_i2info(inode);
201 struct obd_client_handle **och_p;
202 struct obd_client_handle *och;
207 if (fmode & FMODE_WRITE) {
208 och_p = &lli->lli_mds_write_och;
209 och_usecount = &lli->lli_open_fd_write_count;
210 } else if (fmode & FMODE_EXEC) {
211 och_p = &lli->lli_mds_exec_och;
212 och_usecount = &lli->lli_open_fd_exec_count;
214 LASSERT(fmode & FMODE_READ);
215 och_p = &lli->lli_mds_read_och;
216 och_usecount = &lli->lli_open_fd_read_count;
219 mutex_lock(&lli->lli_och_mutex);
220 if (*och_usecount > 0) {
221 /* There are still users of this handle, so skip
223 mutex_unlock(&lli->lli_och_mutex);
229 mutex_unlock(&lli->lli_och_mutex);
232 /* There might be a race and this handle may already
234 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
241 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
244 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
245 struct ll_inode_info *lli = ll_i2info(inode);
249 /* clear group lock, if present */
250 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
251 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
253 if (fd->fd_lease_och != NULL) {
256 /* Usually the lease is not released when the
257 * application crashed, we need to release here. */
258 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
259 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
260 PFID(&lli->lli_fid), rc, lease_broken);
262 fd->fd_lease_och = NULL;
265 if (fd->fd_och != NULL) {
266 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
271 /* Let's see if we have good enough OPEN lock on the file and if
272 we can skip talking to MDS */
273 if (file->f_dentry->d_inode) { /* Can this ever be false? */
275 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
276 struct lustre_handle lockh;
277 struct inode *inode = file->f_dentry->d_inode;
278 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
280 mutex_lock(&lli->lli_och_mutex);
281 if (fd->fd_omode & FMODE_WRITE) {
283 LASSERT(lli->lli_open_fd_write_count);
284 lli->lli_open_fd_write_count--;
285 } else if (fd->fd_omode & FMODE_EXEC) {
287 LASSERT(lli->lli_open_fd_exec_count);
288 lli->lli_open_fd_exec_count--;
291 LASSERT(lli->lli_open_fd_read_count);
292 lli->lli_open_fd_read_count--;
294 mutex_unlock(&lli->lli_och_mutex);
296 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
297 LDLM_IBITS, &policy, lockmode,
299 rc = ll_md_real_close(file->f_dentry->d_inode,
303 CERROR("released file has negative dentry: file = %p, "
304 "dentry = %p, name = %s\n",
305 file, file->f_dentry, file->f_dentry->d_name.name);
309 LUSTRE_FPRIVATE(file) = NULL;
310 ll_file_data_put(fd);
311 ll_capa_close(inode);
316 /* While this returns an error code, fput() the caller does not, so we need
317 * to make every effort to clean up all of our state here. Also, applications
318 * rarely check close errors and even if an error is returned they will not
319 * re-try the close call.
321 int ll_file_release(struct inode *inode, struct file *file)
323 struct ll_file_data *fd;
324 struct ll_sb_info *sbi = ll_i2sbi(inode);
325 struct ll_inode_info *lli = ll_i2info(inode);
329 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
330 PFID(ll_inode2fid(inode)), inode);
332 #ifdef CONFIG_FS_POSIX_ACL
333 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
334 inode == inode->i_sb->s_root->d_inode) {
335 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
338 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
339 fd->fd_flags &= ~LL_FILE_RMTACL;
340 rct_del(&sbi->ll_rct, current_pid());
341 et_search_free(&sbi->ll_et, current_pid());
346 if (inode->i_sb->s_root != file->f_dentry)
347 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
348 fd = LUSTRE_FPRIVATE(file);
351 /* The last ref on @file, maybe not the the owner pid of statahead,
352 * because parent and child process can share the same file handle. */
353 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
354 ll_deauthorize_statahead(inode, fd);
356 if (inode->i_sb->s_root == file->f_dentry) {
357 LUSTRE_FPRIVATE(file) = NULL;
358 ll_file_data_put(fd);
362 if (!S_ISDIR(inode->i_mode)) {
363 if (lli->lli_clob != NULL)
364 lov_read_and_clear_async_rc(lli->lli_clob);
365 lli->lli_async_rc = 0;
368 rc = ll_md_close(sbi->ll_md_exp, inode, file);
370 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
371 libcfs_debug_dumplog();
376 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
377 struct lookup_intent *itp)
379 struct dentry *de = file->f_dentry;
380 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
381 struct dentry *parent = de->d_parent;
382 const char *name = NULL;
384 struct md_op_data *op_data;
385 struct ptlrpc_request *req = NULL;
389 LASSERT(parent != NULL);
390 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
392 /* if server supports open-by-fid, or file name is invalid, don't pack
393 * name in open request */
394 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
395 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
396 name = de->d_name.name;
397 len = de->d_name.len;
400 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
401 name, len, 0, LUSTRE_OPC_ANY, NULL);
403 RETURN(PTR_ERR(op_data));
404 op_data->op_data = lmm;
405 op_data->op_data_size = lmmsize;
407 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
408 &ll_md_blocking_ast, 0);
409 ll_finish_md_op_data(op_data);
411 /* reason for keep own exit path - don`t flood log
412 * with messages with -ESTALE errors.
414 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
415 it_open_error(DISP_OPEN_OPEN, itp))
417 ll_release_openhandle(de, itp);
421 if (it_disposition(itp, DISP_LOOKUP_NEG))
422 GOTO(out, rc = -ENOENT);
424 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
425 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
426 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
430 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
431 if (!rc && itp->d.lustre.it_lock_mode)
432 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
435 ptlrpc_req_finished(req);
436 ll_intent_drop_lock(itp);
441 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
442 struct obd_client_handle *och)
444 struct ptlrpc_request *req = it->d.lustre.it_data;
445 struct mdt_body *body;
447 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
448 och->och_fh = body->mbo_handle;
449 och->och_fid = body->mbo_fid1;
450 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
451 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
452 och->och_flags = it->it_flags;
454 return md_set_open_replay_data(md_exp, och, it);
457 static int ll_local_open(struct file *file, struct lookup_intent *it,
458 struct ll_file_data *fd, struct obd_client_handle *och)
460 struct inode *inode = file->f_dentry->d_inode;
463 LASSERT(!LUSTRE_FPRIVATE(file));
470 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
475 LUSTRE_FPRIVATE(file) = fd;
476 ll_readahead_init(inode, &fd->fd_ras);
477 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
479 /* ll_cl_context initialize */
480 rwlock_init(&fd->fd_lock);
481 INIT_LIST_HEAD(&fd->fd_lccs);
486 /* Open a file, and (for the very first open) create objects on the OSTs at
487 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
488 * creation or open until ll_lov_setstripe() ioctl is called.
490 * If we already have the stripe MD locally then we don't request it in
491 * md_open(), by passing a lmm_size = 0.
493 * It is up to the application to ensure no other processes open this file
494 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
495 * used. We might be able to avoid races of that sort by getting lli_open_sem
496 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
497 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
499 int ll_file_open(struct inode *inode, struct file *file)
501 struct ll_inode_info *lli = ll_i2info(inode);
502 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
503 .it_flags = file->f_flags };
504 struct obd_client_handle **och_p = NULL;
505 __u64 *och_usecount = NULL;
506 struct ll_file_data *fd;
510 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
511 PFID(ll_inode2fid(inode)), inode, file->f_flags);
513 it = file->private_data; /* XXX: compat macro */
514 file->private_data = NULL; /* prevent ll_local_open assertion */
516 fd = ll_file_data_get();
518 GOTO(out_openerr, rc = -ENOMEM);
521 if (S_ISDIR(inode->i_mode))
522 ll_authorize_statahead(inode, fd);
524 if (inode->i_sb->s_root == file->f_dentry) {
525 LUSTRE_FPRIVATE(file) = fd;
529 if (!it || !it->d.lustre.it_disposition) {
530 /* Convert f_flags into access mode. We cannot use file->f_mode,
531 * because everything but O_ACCMODE mask was stripped from
533 if ((oit.it_flags + 1) & O_ACCMODE)
535 if (file->f_flags & O_TRUNC)
536 oit.it_flags |= FMODE_WRITE;
538 /* kernel only call f_op->open in dentry_open. filp_open calls
539 * dentry_open after call to open_namei that checks permissions.
540 * Only nfsd_open call dentry_open directly without checking
541 * permissions and because of that this code below is safe. */
542 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
543 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
545 /* We do not want O_EXCL here, presumably we opened the file
546 * already? XXX - NFS implications? */
547 oit.it_flags &= ~O_EXCL;
549 /* bug20584, if "it_flags" contains O_CREAT, the file will be
550 * created if necessary, then "IT_CREAT" should be set to keep
551 * consistent with it */
552 if (oit.it_flags & O_CREAT)
553 oit.it_op |= IT_CREAT;
559 /* Let's see if we have file open on MDS already. */
560 if (it->it_flags & FMODE_WRITE) {
561 och_p = &lli->lli_mds_write_och;
562 och_usecount = &lli->lli_open_fd_write_count;
563 } else if (it->it_flags & FMODE_EXEC) {
564 och_p = &lli->lli_mds_exec_och;
565 och_usecount = &lli->lli_open_fd_exec_count;
567 och_p = &lli->lli_mds_read_och;
568 och_usecount = &lli->lli_open_fd_read_count;
571 mutex_lock(&lli->lli_och_mutex);
572 if (*och_p) { /* Open handle is present */
573 if (it_disposition(it, DISP_OPEN_OPEN)) {
574 /* Well, there's extra open request that we do not need,
575 let's close it somehow. This will decref request. */
576 rc = it_open_error(DISP_OPEN_OPEN, it);
578 mutex_unlock(&lli->lli_och_mutex);
579 GOTO(out_openerr, rc);
582 ll_release_openhandle(file->f_dentry, it);
586 rc = ll_local_open(file, it, fd, NULL);
589 mutex_unlock(&lli->lli_och_mutex);
590 GOTO(out_openerr, rc);
593 LASSERT(*och_usecount == 0);
594 if (!it->d.lustre.it_disposition) {
595 /* We cannot just request lock handle now, new ELC code
596 means that one of other OPEN locks for this file
597 could be cancelled, and since blocking ast handler
598 would attempt to grab och_mutex as well, that would
599 result in a deadlock */
600 mutex_unlock(&lli->lli_och_mutex);
602 * Normally called under two situations:
604 * 2. A race/condition on MDS resulting in no open
605 * handle to be returned from LOOKUP|OPEN request,
606 * for example if the target entry was a symlink.
608 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
610 * Always specify MDS_OPEN_BY_FID because we don't want
611 * to get file with different fid.
613 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
614 rc = ll_intent_file_open(file, NULL, 0, it);
616 GOTO(out_openerr, rc);
620 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
622 GOTO(out_och_free, rc = -ENOMEM);
626 /* md_intent_lock() didn't get a request ref if there was an
627 * open error, so don't do cleanup on the request here
629 /* XXX (green): Should not we bail out on any error here, not
630 * just open error? */
631 rc = it_open_error(DISP_OPEN_OPEN, it);
633 GOTO(out_och_free, rc);
635 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
636 "inode %p: disposition %x, status %d\n", inode,
637 it_disposition(it, ~0), it->d.lustre.it_status);
639 rc = ll_local_open(file, it, fd, *och_p);
641 GOTO(out_och_free, rc);
643 mutex_unlock(&lli->lli_och_mutex);
646 /* Must do this outside lli_och_mutex lock to prevent deadlock where
647 different kind of OPEN lock for this same inode gets cancelled
648 by ldlm_cancel_lru */
649 if (!S_ISREG(inode->i_mode))
650 GOTO(out_och_free, rc);
654 if (!lli->lli_has_smd &&
655 (cl_is_lov_delay_create(file->f_flags) ||
656 (file->f_mode & FMODE_WRITE) == 0)) {
657 CDEBUG(D_INODE, "object creation was delayed\n");
658 GOTO(out_och_free, rc);
660 cl_lov_delay_create_clear(&file->f_flags);
661 GOTO(out_och_free, rc);
665 if (och_p && *och_p) {
666 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
667 *och_p = NULL; /* OBD_FREE writes some magic there */
670 mutex_unlock(&lli->lli_och_mutex);
673 if (lli->lli_opendir_key == fd)
674 ll_deauthorize_statahead(inode, fd);
676 ll_file_data_put(fd);
678 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
681 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
682 ptlrpc_req_finished(it->d.lustre.it_data);
683 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
689 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
690 struct ldlm_lock_desc *desc, void *data, int flag)
693 struct lustre_handle lockh;
697 case LDLM_CB_BLOCKING:
698 ldlm_lock2handle(lock, &lockh);
699 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
701 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
705 case LDLM_CB_CANCELING:
713 * Acquire a lease and open the file.
715 static struct obd_client_handle *
716 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
719 struct lookup_intent it = { .it_op = IT_OPEN };
720 struct ll_sb_info *sbi = ll_i2sbi(inode);
721 struct md_op_data *op_data;
722 struct ptlrpc_request *req = NULL;
723 struct lustre_handle old_handle = { 0 };
724 struct obd_client_handle *och = NULL;
729 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
730 RETURN(ERR_PTR(-EINVAL));
733 struct ll_inode_info *lli = ll_i2info(inode);
734 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
735 struct obd_client_handle **och_p;
738 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
739 RETURN(ERR_PTR(-EPERM));
741 /* Get the openhandle of the file */
743 mutex_lock(&lli->lli_och_mutex);
744 if (fd->fd_lease_och != NULL) {
745 mutex_unlock(&lli->lli_och_mutex);
749 if (fd->fd_och == NULL) {
750 if (file->f_mode & FMODE_WRITE) {
751 LASSERT(lli->lli_mds_write_och != NULL);
752 och_p = &lli->lli_mds_write_och;
753 och_usecount = &lli->lli_open_fd_write_count;
755 LASSERT(lli->lli_mds_read_och != NULL);
756 och_p = &lli->lli_mds_read_och;
757 och_usecount = &lli->lli_open_fd_read_count;
759 if (*och_usecount == 1) {
766 mutex_unlock(&lli->lli_och_mutex);
767 if (rc < 0) /* more than 1 opener */
770 LASSERT(fd->fd_och != NULL);
771 old_handle = fd->fd_och->och_fh;
776 RETURN(ERR_PTR(-ENOMEM));
778 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
779 LUSTRE_OPC_ANY, NULL);
781 GOTO(out, rc = PTR_ERR(op_data));
783 /* To tell the MDT this openhandle is from the same owner */
784 op_data->op_handle = old_handle;
786 it.it_flags = fmode | open_flags;
787 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
788 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
789 &ll_md_blocking_lease_ast,
790 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
791 * it can be cancelled which may mislead applications that the lease is
793 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
794 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
795 * doesn't deal with openhandle, so normal openhandle will be leaked. */
796 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
797 ll_finish_md_op_data(op_data);
798 ptlrpc_req_finished(req);
800 GOTO(out_release_it, rc);
802 if (it_disposition(&it, DISP_LOOKUP_NEG))
803 GOTO(out_release_it, rc = -ENOENT);
805 rc = it_open_error(DISP_OPEN_OPEN, &it);
807 GOTO(out_release_it, rc);
809 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
810 ll_och_fill(sbi->ll_md_exp, &it, och);
812 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
813 GOTO(out_close, rc = -EOPNOTSUPP);
815 /* already get lease, handle lease lock */
816 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
817 if (it.d.lustre.it_lock_mode == 0 ||
818 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
819 /* open lock must return for lease */
820 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
821 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
822 it.d.lustre.it_lock_bits);
823 GOTO(out_close, rc = -EPROTO);
826 ll_intent_release(&it);
830 /* Cancel open lock */
831 if (it.d.lustre.it_lock_mode != 0) {
832 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
833 it.d.lustre.it_lock_mode);
834 it.d.lustre.it_lock_mode = 0;
835 och->och_lease_handle.cookie = 0ULL;
837 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
839 CERROR("%s: error closing file "DFID": %d\n",
840 ll_get_fsname(inode->i_sb, NULL, 0),
841 PFID(&ll_i2info(inode)->lli_fid), rc2);
842 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
844 ll_intent_release(&it);
852 * Release lease and close the file.
853 * It will check if the lease has ever broken.
855 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
858 struct ldlm_lock *lock;
859 bool cancelled = true;
863 lock = ldlm_handle2lock(&och->och_lease_handle);
865 lock_res_and_lock(lock);
866 cancelled = ldlm_is_cancel(lock);
867 unlock_res_and_lock(lock);
871 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
872 PFID(&ll_i2info(inode)->lli_fid), cancelled);
875 ldlm_cli_cancel(&och->och_lease_handle, 0);
876 if (lease_broken != NULL)
877 *lease_broken = cancelled;
879 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
884 /* Fills the obdo with the attributes for the lsm */
885 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
886 struct obd_capa *capa, struct obdo *obdo,
889 struct ptlrpc_request_set *set;
890 struct obd_info oinfo = { { { 0 } } };
895 LASSERT(lsm != NULL);
899 oinfo.oi_oa->o_oi = lsm->lsm_oi;
900 oinfo.oi_oa->o_mode = S_IFREG;
901 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
902 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
903 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
904 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
905 OBD_MD_FLGROUP | OBD_MD_FLDATAVERSION;
906 oinfo.oi_capa = capa;
907 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
908 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
909 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
910 if (dv_flags & LL_DV_WR_FLUSH)
911 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
914 set = ptlrpc_prep_set();
916 CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM);
919 rc = obd_getattr_async(exp, &oinfo, set);
921 rc = ptlrpc_set_wait(set);
922 ptlrpc_set_destroy(set);
925 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
926 OBD_MD_FLATIME | OBD_MD_FLMTIME |
927 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
928 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
929 if (dv_flags & LL_DV_WR_FLUSH &&
930 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
931 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
937 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
939 struct ll_inode_info *lli = ll_i2info(inode);
940 struct cl_object *obj = lli->lli_clob;
941 struct cl_attr *attr = ccc_env_thread_attr(env);
949 ll_inode_size_lock(inode);
951 /* merge timestamps the most recently obtained from mds with
952 timestamps obtained from osts */
953 LTIME_S(inode->i_atime) = lli->lli_atime;
954 LTIME_S(inode->i_mtime) = lli->lli_mtime;
955 LTIME_S(inode->i_ctime) = lli->lli_ctime;
957 atime = LTIME_S(inode->i_atime);
958 mtime = LTIME_S(inode->i_mtime);
959 ctime = LTIME_S(inode->i_ctime);
961 cl_object_attr_lock(obj);
962 rc = cl_object_attr_get(env, obj, attr);
963 cl_object_attr_unlock(obj);
966 GOTO(out_size_unlock, rc);
968 if (atime < attr->cat_atime)
969 atime = attr->cat_atime;
971 if (ctime < attr->cat_ctime)
972 ctime = attr->cat_ctime;
974 if (mtime < attr->cat_mtime)
975 mtime = attr->cat_mtime;
977 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
978 PFID(&lli->lli_fid), attr->cat_size);
980 i_size_write(inode, attr->cat_size);
981 inode->i_blocks = attr->cat_blocks;
983 LTIME_S(inode->i_atime) = atime;
984 LTIME_S(inode->i_mtime) = mtime;
985 LTIME_S(inode->i_ctime) = ctime;
988 ll_inode_size_unlock(inode);
993 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
996 struct obdo obdo = { 0 };
999 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0);
1001 st->st_size = obdo.o_size;
1002 st->st_blocks = obdo.o_blocks;
1003 st->st_mtime = obdo.o_mtime;
1004 st->st_atime = obdo.o_atime;
1005 st->st_ctime = obdo.o_ctime;
1010 static bool file_is_noatime(const struct file *file)
1012 const struct vfsmount *mnt = file->f_path.mnt;
1013 const struct inode *inode = file->f_path.dentry->d_inode;
1015 /* Adapted from file_accessed() and touch_atime().*/
1016 if (file->f_flags & O_NOATIME)
1019 if (inode->i_flags & S_NOATIME)
1022 if (IS_NOATIME(inode))
1025 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1028 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1031 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1037 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1039 struct inode *inode = file->f_dentry->d_inode;
1041 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1043 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1044 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1045 file->f_flags & O_DIRECT ||
1048 io->ci_obj = ll_i2info(inode)->lli_clob;
1049 io->ci_lockreq = CILR_MAYBE;
1050 if (ll_file_nolock(file)) {
1051 io->ci_lockreq = CILR_NEVER;
1052 io->ci_no_srvlock = 1;
1053 } else if (file->f_flags & O_APPEND) {
1054 io->ci_lockreq = CILR_MANDATORY;
1057 io->ci_noatime = file_is_noatime(file);
1061 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1062 struct file *file, enum cl_io_type iot,
1063 loff_t *ppos, size_t count)
1065 struct inode *inode = file->f_dentry->d_inode;
1066 struct ll_inode_info *lli = ll_i2info(inode);
1068 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1071 struct range_lock range;
1074 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1075 file->f_dentry->d_name.name, iot, *ppos, count);
1078 io = ccc_env_thread_io(env);
1079 ll_io_init(io, file, iot == CIT_WRITE);
1081 /* The maximum Lustre file size is variable, based on the
1082 * OST maximum object size and number of stripes. This
1083 * needs another check in addition to the VFS checks earlier. */
1084 end = (io->u.ci_wr.wr_append ? i_size_read(inode) : *ppos) + count;
1085 if (end > ll_file_maxbytes(inode)) {
1087 CDEBUG(D_INODE, "%s: file "DFID" offset %llu > maxbytes "LPU64
1088 ": rc = %zd\n", ll_get_fsname(inode->i_sb, NULL, 0),
1089 PFID(&lli->lli_fid), end, ll_file_maxbytes(inode),
1094 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1095 struct vvp_io *vio = vvp_env_io(env);
1096 bool range_locked = false;
1098 if (file->f_flags & O_APPEND)
1099 range_lock_init(&range, 0, LUSTRE_EOF);
1101 range_lock_init(&range, *ppos, *ppos + count - 1);
1103 vio->vui_fd = LUSTRE_FPRIVATE(file);
1104 vio->vui_io_subtype = args->via_io_subtype;
1106 switch (vio->vui_io_subtype) {
1108 vio->vui_iov = args->u.normal.via_iov;
1109 vio->vui_nrsegs = args->u.normal.via_nrsegs;
1110 vio->vui_tot_nrsegs = vio->vui_nrsegs;
1111 vio->vui_iocb = args->u.normal.via_iocb;
1112 if ((iot == CIT_WRITE) &&
1113 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1114 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1116 result = range_lock(&lli->lli_write_tree,
1121 range_locked = true;
1123 down_read(&lli->lli_trunc_sem);
1126 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1127 vio->u.splice.vui_flags = args->u.splice.via_flags;
1130 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1134 ll_cl_add(file, env, io);
1135 result = cl_io_loop(env, io);
1136 ll_cl_remove(file, env);
1138 if (args->via_io_subtype == IO_NORMAL)
1139 up_read(&lli->lli_trunc_sem);
1141 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1143 range_unlock(&lli->lli_write_tree, &range);
1146 /* cl_io_rw_init() handled IO */
1147 result = io->ci_result;
1150 if (io->ci_nob > 0) {
1151 result = io->ci_nob;
1152 *ppos = io->u.ci_wr.wr.crw_pos;
1156 cl_io_fini(env, io);
1157 /* If any bit been read/written (result != 0), we just return
1158 * short read/write instead of restart io. */
1159 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1160 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zu\n",
1161 iot == CIT_READ ? "read" : "write",
1162 file->f_dentry->d_name.name, *ppos, count);
1163 LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
1167 if (iot == CIT_READ) {
1169 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1170 LPROC_LL_READ_BYTES, result);
1171 } else if (iot == CIT_WRITE) {
1173 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1174 LPROC_LL_WRITE_BYTES, result);
1175 fd->fd_write_failed = false;
1176 } else if (result != -ERESTARTSYS) {
1177 fd->fd_write_failed = true;
1180 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1187 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1189 static int ll_file_get_iov_count(const struct iovec *iov,
1190 unsigned long *nr_segs, size_t *count)
1195 for (seg = 0; seg < *nr_segs; seg++) {
1196 const struct iovec *iv = &iov[seg];
1199 * If any segment has a negative length, or the cumulative
1200 * length ever wraps negative then return -EINVAL.
1203 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1205 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1210 cnt -= iv->iov_len; /* This segment is no good */
1217 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1218 unsigned long nr_segs, loff_t pos)
1221 struct vvp_io_args *args;
1227 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1231 env = cl_env_get(&refcheck);
1233 RETURN(PTR_ERR(env));
1235 args = vvp_env_args(env, IO_NORMAL);
1236 args->u.normal.via_iov = (struct iovec *)iov;
1237 args->u.normal.via_nrsegs = nr_segs;
1238 args->u.normal.via_iocb = iocb;
1240 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1241 &iocb->ki_pos, count);
1242 cl_env_put(env, &refcheck);
1246 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1250 struct iovec *local_iov;
1251 struct kiocb *kiocb;
1256 env = cl_env_get(&refcheck);
1258 RETURN(PTR_ERR(env));
1260 local_iov = &vvp_env_info(env)->vti_local_iov;
1261 kiocb = &vvp_env_info(env)->vti_kiocb;
1262 local_iov->iov_base = (void __user *)buf;
1263 local_iov->iov_len = count;
1264 init_sync_kiocb(kiocb, file);
1265 kiocb->ki_pos = *ppos;
1266 #ifdef HAVE_KIOCB_KI_LEFT
1267 kiocb->ki_left = count;
1269 kiocb->ki_nbytes = count;
1272 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1273 *ppos = kiocb->ki_pos;
1275 cl_env_put(env, &refcheck);
1280 * Write to a file (through the page cache).
1283 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1284 unsigned long nr_segs, loff_t pos)
1287 struct vvp_io_args *args;
1293 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1297 env = cl_env_get(&refcheck);
1299 RETURN(PTR_ERR(env));
1301 args = vvp_env_args(env, IO_NORMAL);
1302 args->u.normal.via_iov = (struct iovec *)iov;
1303 args->u.normal.via_nrsegs = nr_segs;
1304 args->u.normal.via_iocb = iocb;
1306 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1307 &iocb->ki_pos, count);
1308 cl_env_put(env, &refcheck);
1312 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1313 size_t count, loff_t *ppos)
1316 struct iovec *local_iov;
1317 struct kiocb *kiocb;
1322 env = cl_env_get(&refcheck);
1324 RETURN(PTR_ERR(env));
1326 local_iov = &vvp_env_info(env)->vti_local_iov;
1327 kiocb = &vvp_env_info(env)->vti_kiocb;
1328 local_iov->iov_base = (void __user *)buf;
1329 local_iov->iov_len = count;
1330 init_sync_kiocb(kiocb, file);
1331 kiocb->ki_pos = *ppos;
1332 #ifdef HAVE_KIOCB_KI_LEFT
1333 kiocb->ki_left = count;
1335 kiocb->ki_nbytes = count;
1338 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1339 *ppos = kiocb->ki_pos;
1341 cl_env_put(env, &refcheck);
1346 * Send file content (through pagecache) somewhere with helper
1348 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1349 struct pipe_inode_info *pipe, size_t count,
1353 struct vvp_io_args *args;
1358 env = cl_env_get(&refcheck);
1360 RETURN(PTR_ERR(env));
1362 args = vvp_env_args(env, IO_SPLICE);
1363 args->u.splice.via_pipe = pipe;
1364 args->u.splice.via_flags = flags;
1366 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1367 cl_env_put(env, &refcheck);
1371 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1372 __u64 flags, struct lov_user_md *lum,
1375 struct lov_stripe_md *lsm = NULL;
1376 struct lookup_intent oit = {
1378 .it_flags = flags | MDS_OPEN_BY_FID,
1383 lsm = ccc_inode_lsm_get(inode);
1385 ccc_inode_lsm_put(inode, lsm);
1386 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1387 PFID(ll_inode2fid(inode)));
1388 GOTO(out, rc = -EEXIST);
1391 ll_inode_size_lock(inode);
1392 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1394 GOTO(out_unlock, rc);
1396 rc = oit.d.lustre.it_status;
1398 GOTO(out_unlock, rc);
1400 ll_release_openhandle(file->f_dentry, &oit);
1403 ll_inode_size_unlock(inode);
1404 ll_intent_release(&oit);
1405 ccc_inode_lsm_put(inode, lsm);
1407 cl_lov_delay_create_clear(&file->f_flags);
1412 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1413 struct lov_mds_md **lmmp, int *lmm_size,
1414 struct ptlrpc_request **request)
1416 struct ll_sb_info *sbi = ll_i2sbi(inode);
1417 struct mdt_body *body;
1418 struct lov_mds_md *lmm = NULL;
1419 struct ptlrpc_request *req = NULL;
1420 struct md_op_data *op_data;
1423 rc = ll_get_default_mdsize(sbi, &lmmsize);
1427 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1428 strlen(filename), lmmsize,
1429 LUSTRE_OPC_ANY, NULL);
1430 if (IS_ERR(op_data))
1431 RETURN(PTR_ERR(op_data));
1433 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1434 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1435 ll_finish_md_op_data(op_data);
1437 CDEBUG(D_INFO, "md_getattr_name failed "
1438 "on %s: rc %d\n", filename, rc);
1442 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1443 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1445 lmmsize = body->mbo_eadatasize;
1447 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1449 GOTO(out, rc = -ENODATA);
1452 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1453 LASSERT(lmm != NULL);
1455 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1456 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1457 GOTO(out, rc = -EPROTO);
1461 * This is coming from the MDS, so is probably in
1462 * little endian. We convert it to host endian before
1463 * passing it to userspace.
1465 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1468 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1469 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1472 /* if function called for directory - we should
1473 * avoid swab not existent lsm objects */
1474 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1475 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1476 if (S_ISREG(body->mbo_mode))
1477 lustre_swab_lov_user_md_objects(
1478 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1480 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1481 lustre_swab_lov_user_md_v3(
1482 (struct lov_user_md_v3 *)lmm);
1483 if (S_ISREG(body->mbo_mode))
1484 lustre_swab_lov_user_md_objects(
1485 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1492 *lmm_size = lmmsize;
1497 static int ll_lov_setea(struct inode *inode, struct file *file,
1500 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1501 struct lov_user_md *lump;
1502 int lum_size = sizeof(struct lov_user_md) +
1503 sizeof(struct lov_user_ost_data);
1507 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1510 OBD_ALLOC_LARGE(lump, lum_size);
1514 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1515 OBD_FREE_LARGE(lump, lum_size);
1519 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1521 OBD_FREE_LARGE(lump, lum_size);
1525 static int ll_file_getstripe(struct inode *inode,
1526 struct lov_user_md __user *lum)
1533 env = cl_env_get(&refcheck);
1535 RETURN(PTR_ERR(env));
1537 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1538 cl_env_put(env, &refcheck);
1542 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1545 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1546 struct lov_user_md *klum;
1548 __u64 flags = FMODE_WRITE;
1551 rc = ll_copy_user_md(lum, &klum);
1556 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1560 put_user(0, &lum->lmm_stripe_count);
1562 ll_layout_refresh(inode, &gen);
1563 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1566 OBD_FREE(klum, lum_size);
1571 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1573 struct ll_inode_info *lli = ll_i2info(inode);
1574 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1575 struct ccc_grouplock grouplock;
1580 CWARN("group id for group lock must not be 0\n");
1584 if (ll_file_nolock(file))
1585 RETURN(-EOPNOTSUPP);
1587 spin_lock(&lli->lli_lock);
1588 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1589 CWARN("group lock already existed with gid %lu\n",
1590 fd->fd_grouplock.cg_gid);
1591 spin_unlock(&lli->lli_lock);
1594 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1595 spin_unlock(&lli->lli_lock);
1597 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1598 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1602 spin_lock(&lli->lli_lock);
1603 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1604 spin_unlock(&lli->lli_lock);
1605 CERROR("another thread just won the race\n");
1606 cl_put_grouplock(&grouplock);
1610 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1611 fd->fd_grouplock = grouplock;
1612 spin_unlock(&lli->lli_lock);
1614 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1618 static int ll_put_grouplock(struct inode *inode, struct file *file,
1621 struct ll_inode_info *lli = ll_i2info(inode);
1622 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1623 struct ccc_grouplock grouplock;
1626 spin_lock(&lli->lli_lock);
1627 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1628 spin_unlock(&lli->lli_lock);
1629 CWARN("no group lock held\n");
1632 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1634 if (fd->fd_grouplock.cg_gid != arg) {
1635 CWARN("group lock %lu doesn't match current id %lu\n",
1636 arg, fd->fd_grouplock.cg_gid);
1637 spin_unlock(&lli->lli_lock);
1641 grouplock = fd->fd_grouplock;
1642 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1643 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1644 spin_unlock(&lli->lli_lock);
1646 cl_put_grouplock(&grouplock);
1647 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1652 * Close inode open handle
1654 * \param dentry [in] dentry which contains the inode
1655 * \param it [in,out] intent which contains open info and result
1658 * \retval <0 failure
1660 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1662 struct inode *inode = dentry->d_inode;
1663 struct obd_client_handle *och;
1669 /* Root ? Do nothing. */
1670 if (dentry->d_inode->i_sb->s_root == dentry)
1673 /* No open handle to close? Move away */
1674 if (!it_disposition(it, DISP_OPEN_OPEN))
1677 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1679 OBD_ALLOC(och, sizeof(*och));
1681 GOTO(out, rc = -ENOMEM);
1683 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1685 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1688 /* this one is in place of ll_file_open */
1689 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1690 ptlrpc_req_finished(it->d.lustre.it_data);
1691 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1697 * Get size for inode for which FIEMAP mapping is requested.
1698 * Make the FIEMAP get_info call and returns the result.
1700 static int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1703 struct obd_export *exp = ll_i2dtexp(inode);
1704 struct lov_stripe_md *lsm = NULL;
1705 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1706 __u32 vallen = num_bytes;
1710 /* Checks for fiemap flags */
1711 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1712 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1716 /* Check for FIEMAP_FLAG_SYNC */
1717 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1718 rc = filemap_fdatawrite(inode->i_mapping);
1723 lsm = ccc_inode_lsm_get(inode);
1727 /* If the stripe_count > 1 and the application does not understand
1728 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1730 if (lsm->lsm_stripe_count > 1 &&
1731 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1732 GOTO(out, rc = -EOPNOTSUPP);
1734 fm_key.oa.o_oi = lsm->lsm_oi;
1735 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1737 if (i_size_read(inode) == 0) {
1738 rc = ll_glimpse_size(inode);
1743 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1744 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1745 /* If filesize is 0, then there would be no objects for mapping */
1746 if (fm_key.oa.o_size == 0) {
1747 fiemap->fm_mapped_extents = 0;
1751 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1753 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1756 CERROR("obd_get_info failed: rc = %d\n", rc);
1759 ccc_inode_lsm_put(inode, lsm);
1763 int ll_fid2path(struct inode *inode, void __user *arg)
1765 struct obd_export *exp = ll_i2mdexp(inode);
1766 const struct getinfo_fid2path __user *gfin = arg;
1768 struct getinfo_fid2path *gfout;
1774 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1775 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1778 /* Only need to get the buflen */
1779 if (get_user(pathlen, &gfin->gf_pathlen))
1782 if (pathlen > PATH_MAX)
1785 outsize = sizeof(*gfout) + pathlen;
1786 OBD_ALLOC(gfout, outsize);
1790 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1791 GOTO(gf_free, rc = -EFAULT);
1793 /* Call mdc_iocontrol */
1794 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1798 if (copy_to_user(arg, gfout, outsize))
1802 OBD_FREE(gfout, outsize);
1806 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1808 struct ll_user_fiemap *fiemap_s;
1809 size_t num_bytes, ret_bytes;
1810 unsigned int extent_count;
1813 /* Get the extent count so we can calculate the size of
1814 * required fiemap buffer */
1815 if (get_user(extent_count,
1816 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1820 (SIZE_MAX - sizeof(*fiemap_s)) / sizeof(struct ll_fiemap_extent))
1822 num_bytes = sizeof(*fiemap_s) + (extent_count *
1823 sizeof(struct ll_fiemap_extent));
1825 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1826 if (fiemap_s == NULL)
1829 /* get the fiemap value */
1830 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1832 GOTO(error, rc = -EFAULT);
1834 /* If fm_extent_count is non-zero, read the first extent since
1835 * it is used to calculate end_offset and device from previous
1838 if (copy_from_user(&fiemap_s->fm_extents[0],
1839 (char __user *)arg + sizeof(*fiemap_s),
1840 sizeof(struct ll_fiemap_extent)))
1841 GOTO(error, rc = -EFAULT);
1844 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1848 ret_bytes = sizeof(struct ll_user_fiemap);
1850 if (extent_count != 0)
1851 ret_bytes += (fiemap_s->fm_mapped_extents *
1852 sizeof(struct ll_fiemap_extent));
1854 if (copy_to_user((void __user *)arg, fiemap_s, ret_bytes))
1858 OBD_FREE_LARGE(fiemap_s, num_bytes);
1863 * Read the data_version for inode.
1865 * This value is computed using stripe object version on OST.
1866 * Version is computed using server side locking.
1868 * @param sync if do sync on the OST side;
1870 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1871 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1873 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1875 struct lov_stripe_md *lsm = NULL;
1876 struct ll_sb_info *sbi = ll_i2sbi(inode);
1877 struct obdo *obdo = NULL;
1881 /* If no stripe, we consider version is 0. */
1882 lsm = ccc_inode_lsm_get(inode);
1883 if (!lsm_has_objects(lsm)) {
1885 CDEBUG(D_INODE, "No object for inode\n");
1889 OBD_ALLOC_PTR(obdo);
1891 GOTO(out, rc = -ENOMEM);
1893 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, flags);
1895 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1898 *data_version = obdo->o_data_version;
1904 ccc_inode_lsm_put(inode, lsm);
1909 * Trigger a HSM release request for the provided inode.
1911 int ll_hsm_release(struct inode *inode)
1913 struct cl_env_nest nest;
1915 struct obd_client_handle *och = NULL;
1916 __u64 data_version = 0;
1920 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1921 ll_get_fsname(inode->i_sb, NULL, 0),
1922 PFID(&ll_i2info(inode)->lli_fid));
1924 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1926 GOTO(out, rc = PTR_ERR(och));
1928 /* Grab latest data_version and [am]time values */
1929 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1933 env = cl_env_nested_get(&nest);
1935 GOTO(out, rc = PTR_ERR(env));
1937 ll_merge_attr(env, inode);
1938 cl_env_nested_put(&nest, env);
1940 /* Release the file.
1941 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1942 * we still need it to pack l_remote_handle to MDT. */
1943 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1949 if (och != NULL && !IS_ERR(och)) /* close the file */
1950 ll_lease_close(och, inode, NULL);
1955 struct ll_swap_stack {
1956 struct iattr ia1, ia2;
1958 struct inode *inode1, *inode2;
1959 bool check_dv1, check_dv2;
1962 static int ll_swap_layouts(struct file *file1, struct file *file2,
1963 struct lustre_swap_layouts *lsl)
1965 struct mdc_swap_layouts msl;
1966 struct md_op_data *op_data;
1969 struct ll_swap_stack *llss = NULL;
1972 OBD_ALLOC_PTR(llss);
1976 llss->inode1 = file1->f_dentry->d_inode;
1977 llss->inode2 = file2->f_dentry->d_inode;
1979 if (!S_ISREG(llss->inode2->i_mode))
1980 GOTO(free, rc = -EINVAL);
1982 if (inode_permission(llss->inode1, MAY_WRITE) ||
1983 inode_permission(llss->inode2, MAY_WRITE))
1984 GOTO(free, rc = -EPERM);
1986 if (llss->inode2->i_sb != llss->inode1->i_sb)
1987 GOTO(free, rc = -EXDEV);
1989 /* we use 2 bool because it is easier to swap than 2 bits */
1990 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1991 llss->check_dv1 = true;
1993 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1994 llss->check_dv2 = true;
1996 /* we cannot use lsl->sl_dvX directly because we may swap them */
1997 llss->dv1 = lsl->sl_dv1;
1998 llss->dv2 = lsl->sl_dv2;
2000 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2001 if (rc == 0) /* same file, done! */
2004 if (rc < 0) { /* sequentialize it */
2005 swap(llss->inode1, llss->inode2);
2007 swap(llss->dv1, llss->dv2);
2008 swap(llss->check_dv1, llss->check_dv2);
2012 if (gid != 0) { /* application asks to flush dirty cache */
2013 rc = ll_get_grouplock(llss->inode1, file1, gid);
2017 rc = ll_get_grouplock(llss->inode2, file2, gid);
2019 ll_put_grouplock(llss->inode1, file1, gid);
2024 /* to be able to restore mtime and atime after swap
2025 * we need to first save them */
2027 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2028 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2029 llss->ia1.ia_atime = llss->inode1->i_atime;
2030 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2031 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2032 llss->ia2.ia_atime = llss->inode2->i_atime;
2033 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2036 /* ultimate check, before swaping the layouts we check if
2037 * dataversion has changed (if requested) */
2038 if (llss->check_dv1) {
2039 rc = ll_data_version(llss->inode1, &dv, 0);
2042 if (dv != llss->dv1)
2043 GOTO(putgl, rc = -EAGAIN);
2046 if (llss->check_dv2) {
2047 rc = ll_data_version(llss->inode2, &dv, 0);
2050 if (dv != llss->dv2)
2051 GOTO(putgl, rc = -EAGAIN);
2054 /* struct md_op_data is used to send the swap args to the mdt
2055 * only flags is missing, so we use struct mdc_swap_layouts
2056 * through the md_op_data->op_data */
2057 /* flags from user space have to be converted before they are send to
2058 * server, no flag is sent today, they are only used on the client */
2061 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2062 0, LUSTRE_OPC_ANY, &msl);
2063 if (IS_ERR(op_data))
2064 GOTO(free, rc = PTR_ERR(op_data));
2066 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2067 sizeof(*op_data), op_data, NULL);
2068 ll_finish_md_op_data(op_data);
2072 ll_put_grouplock(llss->inode2, file2, gid);
2073 ll_put_grouplock(llss->inode1, file1, gid);
2076 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2080 /* clear useless flags */
2081 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2082 llss->ia1.ia_valid &= ~ATTR_MTIME;
2083 llss->ia2.ia_valid &= ~ATTR_MTIME;
2086 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2087 llss->ia1.ia_valid &= ~ATTR_ATIME;
2088 llss->ia2.ia_valid &= ~ATTR_ATIME;
2091 /* update time if requested */
2093 if (llss->ia2.ia_valid != 0) {
2094 mutex_lock(&llss->inode1->i_mutex);
2095 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2096 mutex_unlock(&llss->inode1->i_mutex);
2099 if (llss->ia1.ia_valid != 0) {
2102 mutex_lock(&llss->inode2->i_mutex);
2103 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2104 mutex_unlock(&llss->inode2->i_mutex);
2116 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2118 struct md_op_data *op_data;
2121 /* Non-root users are forbidden to set or clear flags which are
2122 * NOT defined in HSM_USER_MASK. */
2123 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2124 !cfs_capable(CFS_CAP_SYS_ADMIN))
2127 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2128 LUSTRE_OPC_ANY, hss);
2129 if (IS_ERR(op_data))
2130 RETURN(PTR_ERR(op_data));
2132 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2133 sizeof(*op_data), op_data, NULL);
2135 ll_finish_md_op_data(op_data);
2140 static int ll_hsm_import(struct inode *inode, struct file *file,
2141 struct hsm_user_import *hui)
2143 struct hsm_state_set *hss = NULL;
2144 struct iattr *attr = NULL;
2148 if (!S_ISREG(inode->i_mode))
2154 GOTO(out, rc = -ENOMEM);
2156 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2157 hss->hss_archive_id = hui->hui_archive_id;
2158 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2159 rc = ll_hsm_state_set(inode, hss);
2163 OBD_ALLOC_PTR(attr);
2165 GOTO(out, rc = -ENOMEM);
2167 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2168 attr->ia_mode |= S_IFREG;
2169 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2170 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2171 attr->ia_size = hui->hui_size;
2172 attr->ia_mtime.tv_sec = hui->hui_mtime;
2173 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2174 attr->ia_atime.tv_sec = hui->hui_atime;
2175 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2177 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2178 ATTR_UID | ATTR_GID |
2179 ATTR_MTIME | ATTR_MTIME_SET |
2180 ATTR_ATIME | ATTR_ATIME_SET;
2182 mutex_lock(&inode->i_mutex);
2184 rc = ll_setattr_raw(file->f_dentry, attr, true);
2188 mutex_unlock(&inode->i_mutex);
2200 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2202 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2203 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2207 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2209 struct inode *inode = file->f_dentry->d_inode;
2210 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2214 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2215 PFID(ll_inode2fid(inode)), inode, cmd);
2216 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2218 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2219 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2223 case LL_IOC_GETFLAGS:
2224 /* Get the current value of the file flags */
2225 return put_user(fd->fd_flags, (int __user *)arg);
2226 case LL_IOC_SETFLAGS:
2227 case LL_IOC_CLRFLAGS:
2228 /* Set or clear specific file flags */
2229 /* XXX This probably needs checks to ensure the flags are
2230 * not abused, and to handle any flag side effects.
2232 if (get_user(flags, (int __user *) arg))
2235 if (cmd == LL_IOC_SETFLAGS) {
2236 if ((flags & LL_FILE_IGNORE_LOCK) &&
2237 !(file->f_flags & O_DIRECT)) {
2238 CERROR("%s: unable to disable locking on "
2239 "non-O_DIRECT file\n", current->comm);
2243 fd->fd_flags |= flags;
2245 fd->fd_flags &= ~flags;
2248 case LL_IOC_LOV_SETSTRIPE:
2249 RETURN(ll_lov_setstripe(inode, file, arg));
2250 case LL_IOC_LOV_SETEA:
2251 RETURN(ll_lov_setea(inode, file, arg));
2252 case LL_IOC_LOV_SWAP_LAYOUTS: {
2254 struct lustre_swap_layouts lsl;
2256 if (copy_from_user(&lsl, (char __user *)arg,
2257 sizeof(struct lustre_swap_layouts)))
2260 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2263 file2 = fget(lsl.sl_fd);
2268 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2269 rc = ll_swap_layouts(file, file2, &lsl);
2273 case LL_IOC_LOV_GETSTRIPE:
2274 RETURN(ll_file_getstripe(inode,
2275 (struct lov_user_md __user *)arg));
2276 case FSFILT_IOC_FIEMAP:
2277 RETURN(ll_ioctl_fiemap(inode, arg));
2278 case FSFILT_IOC_GETFLAGS:
2279 case FSFILT_IOC_SETFLAGS:
2280 RETURN(ll_iocontrol(inode, file, cmd, arg));
2281 case FSFILT_IOC_GETVERSION_OLD:
2282 case FSFILT_IOC_GETVERSION:
2283 RETURN(put_user(inode->i_generation, (int __user *)arg));
2284 case LL_IOC_GROUP_LOCK:
2285 RETURN(ll_get_grouplock(inode, file, arg));
2286 case LL_IOC_GROUP_UNLOCK:
2287 RETURN(ll_put_grouplock(inode, file, arg));
2288 case IOC_OBD_STATFS:
2289 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2291 /* We need to special case any other ioctls we want to handle,
2292 * to send them to the MDS/OST as appropriate and to properly
2293 * network encode the arg field.
2294 case FSFILT_IOC_SETVERSION_OLD:
2295 case FSFILT_IOC_SETVERSION:
2297 case LL_IOC_FLUSHCTX:
2298 RETURN(ll_flush_ctx(inode));
2299 case LL_IOC_PATH2FID: {
2300 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2301 sizeof(struct lu_fid)))
2306 case LL_IOC_GETPARENT:
2307 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2309 case OBD_IOC_FID2PATH:
2310 RETURN(ll_fid2path(inode, (void __user *)arg));
2311 case LL_IOC_DATA_VERSION: {
2312 struct ioc_data_version idv;
2315 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2318 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2319 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2322 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2328 case LL_IOC_GET_MDTIDX: {
2331 mdtidx = ll_get_mdt_idx(inode);
2335 if (put_user((int)mdtidx, (int __user *)arg))
2340 case OBD_IOC_GETDTNAME:
2341 case OBD_IOC_GETMDNAME:
2342 RETURN(ll_get_obd_name(inode, cmd, arg));
2343 case LL_IOC_HSM_STATE_GET: {
2344 struct md_op_data *op_data;
2345 struct hsm_user_state *hus;
2352 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2353 LUSTRE_OPC_ANY, hus);
2354 if (IS_ERR(op_data)) {
2356 RETURN(PTR_ERR(op_data));
2359 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2362 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2365 ll_finish_md_op_data(op_data);
2369 case LL_IOC_HSM_STATE_SET: {
2370 struct hsm_state_set *hss;
2377 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2382 rc = ll_hsm_state_set(inode, hss);
2387 case LL_IOC_HSM_ACTION: {
2388 struct md_op_data *op_data;
2389 struct hsm_current_action *hca;
2396 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2397 LUSTRE_OPC_ANY, hca);
2398 if (IS_ERR(op_data)) {
2400 RETURN(PTR_ERR(op_data));
2403 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2406 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2409 ll_finish_md_op_data(op_data);
2413 case LL_IOC_SET_LEASE: {
2414 struct ll_inode_info *lli = ll_i2info(inode);
2415 struct obd_client_handle *och = NULL;
2420 case LL_LEASE_WRLCK:
2421 if (!(file->f_mode & FMODE_WRITE))
2423 fmode = FMODE_WRITE;
2425 case LL_LEASE_RDLCK:
2426 if (!(file->f_mode & FMODE_READ))
2430 case LL_LEASE_UNLCK:
2431 mutex_lock(&lli->lli_och_mutex);
2432 if (fd->fd_lease_och != NULL) {
2433 och = fd->fd_lease_och;
2434 fd->fd_lease_och = NULL;
2436 mutex_unlock(&lli->lli_och_mutex);
2441 fmode = och->och_flags;
2442 rc = ll_lease_close(och, inode, &lease_broken);
2449 RETURN(ll_lease_type_from_fmode(fmode));
2454 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2456 /* apply for lease */
2457 och = ll_lease_open(inode, file, fmode, 0);
2459 RETURN(PTR_ERR(och));
2462 mutex_lock(&lli->lli_och_mutex);
2463 if (fd->fd_lease_och == NULL) {
2464 fd->fd_lease_och = och;
2467 mutex_unlock(&lli->lli_och_mutex);
2469 /* impossible now that only excl is supported for now */
2470 ll_lease_close(och, inode, &lease_broken);
2475 case LL_IOC_GET_LEASE: {
2476 struct ll_inode_info *lli = ll_i2info(inode);
2477 struct ldlm_lock *lock = NULL;
2480 mutex_lock(&lli->lli_och_mutex);
2481 if (fd->fd_lease_och != NULL) {
2482 struct obd_client_handle *och = fd->fd_lease_och;
2484 lock = ldlm_handle2lock(&och->och_lease_handle);
2486 lock_res_and_lock(lock);
2487 if (!ldlm_is_cancel(lock))
2488 fmode = och->och_flags;
2490 unlock_res_and_lock(lock);
2491 LDLM_LOCK_PUT(lock);
2494 mutex_unlock(&lli->lli_och_mutex);
2496 RETURN(ll_lease_type_from_fmode(fmode));
2498 case LL_IOC_HSM_IMPORT: {
2499 struct hsm_user_import *hui;
2505 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2510 rc = ll_hsm_import(inode, file, hui);
2520 ll_iocontrol_call(inode, file, cmd, arg, &err))
2523 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2524 (void __user *)arg));
2529 #ifndef HAVE_FILE_LLSEEK_SIZE
2530 static inline loff_t
2531 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2533 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2535 if (offset > maxsize)
2538 if (offset != file->f_pos) {
2539 file->f_pos = offset;
2540 file->f_version = 0;
2546 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2547 loff_t maxsize, loff_t eof)
2549 struct inode *inode = file->f_dentry->d_inode;
2557 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2558 * position-querying operation. Avoid rewriting the "same"
2559 * f_pos value back to the file because a concurrent read(),
2560 * write() or lseek() might have altered it
2565 * f_lock protects against read/modify/write race with other
2566 * SEEK_CURs. Note that parallel writes and reads behave
2569 mutex_lock(&inode->i_mutex);
2570 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2571 mutex_unlock(&inode->i_mutex);
2575 * In the generic case the entire file is data, so as long as
2576 * offset isn't at the end of the file then the offset is data.
2583 * There is a virtual hole at the end of the file, so as long as
2584 * offset isn't i_size or larger, return i_size.
2592 return llseek_execute(file, offset, maxsize);
2596 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2598 struct inode *inode = file->f_dentry->d_inode;
2599 loff_t retval, eof = 0;
2602 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2603 (origin == SEEK_CUR) ? file->f_pos : 0);
2604 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2605 PFID(ll_inode2fid(inode)), inode, retval, retval,
2607 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2609 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2610 retval = ll_glimpse_size(inode);
2613 eof = i_size_read(inode);
2616 retval = ll_generic_file_llseek_size(file, offset, origin,
2617 ll_file_maxbytes(inode), eof);
2621 static int ll_flush(struct file *file, fl_owner_t id)
2623 struct inode *inode = file->f_dentry->d_inode;
2624 struct ll_inode_info *lli = ll_i2info(inode);
2625 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2628 LASSERT(!S_ISDIR(inode->i_mode));
2630 /* catch async errors that were recorded back when async writeback
2631 * failed for pages in this mapping. */
2632 rc = lli->lli_async_rc;
2633 lli->lli_async_rc = 0;
2634 if (lli->lli_clob != NULL) {
2635 err = lov_read_and_clear_async_rc(lli->lli_clob);
2640 /* The application has been told write failure already.
2641 * Do not report failure again. */
2642 if (fd->fd_write_failed)
2644 return rc ? -EIO : 0;
2648 * Called to make sure a portion of file has been written out.
2649 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2651 * Return how many pages have been written.
2653 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2654 enum cl_fsync_mode mode, int ignore_layout)
2656 struct cl_env_nest nest;
2659 struct obd_capa *capa = NULL;
2660 struct cl_fsync_io *fio;
2664 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2665 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2668 env = cl_env_nested_get(&nest);
2670 RETURN(PTR_ERR(env));
2672 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2674 io = ccc_env_thread_io(env);
2675 io->ci_obj = ll_i2info(inode)->lli_clob;
2676 io->ci_ignore_layout = ignore_layout;
2678 /* initialize parameters for sync */
2679 fio = &io->u.ci_fsync;
2680 fio->fi_capa = capa;
2681 fio->fi_start = start;
2683 fio->fi_fid = ll_inode2fid(inode);
2684 fio->fi_mode = mode;
2685 fio->fi_nr_written = 0;
2687 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2688 result = cl_io_loop(env, io);
2690 result = io->ci_result;
2692 result = fio->fi_nr_written;
2693 cl_io_fini(env, io);
2694 cl_env_nested_put(&nest, env);
2702 * When dentry is provided (the 'else' case), *file->f_dentry may be
2703 * null and dentry must be used directly rather than pulled from
2704 * *file->f_dentry as is done otherwise.
2707 #ifdef HAVE_FILE_FSYNC_4ARGS
2708 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2710 struct dentry *dentry = file->f_dentry;
2711 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2712 int ll_fsync(struct file *file, int datasync)
2714 struct dentry *dentry = file->f_dentry;
2716 loff_t end = LLONG_MAX;
2718 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2721 loff_t end = LLONG_MAX;
2723 struct inode *inode = dentry->d_inode;
2724 struct ll_inode_info *lli = ll_i2info(inode);
2725 struct ptlrpc_request *req;
2726 struct obd_capa *oc;
2730 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2731 PFID(ll_inode2fid(inode)), inode);
2732 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2734 #ifdef HAVE_FILE_FSYNC_4ARGS
2735 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2736 mutex_lock(&inode->i_mutex);
2738 /* fsync's caller has already called _fdata{sync,write}, we want
2739 * that IO to finish before calling the osc and mdc sync methods */
2740 rc = filemap_fdatawait(inode->i_mapping);
2743 /* catch async errors that were recorded back when async writeback
2744 * failed for pages in this mapping. */
2745 if (!S_ISDIR(inode->i_mode)) {
2746 err = lli->lli_async_rc;
2747 lli->lli_async_rc = 0;
2750 err = lov_read_and_clear_async_rc(lli->lli_clob);
2755 oc = ll_mdscapa_get(inode);
2756 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2762 ptlrpc_req_finished(req);
2764 if (S_ISREG(inode->i_mode)) {
2765 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2767 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2768 if (rc == 0 && err < 0)
2771 fd->fd_write_failed = true;
2773 fd->fd_write_failed = false;
2776 #ifdef HAVE_FILE_FSYNC_4ARGS
2777 mutex_unlock(&inode->i_mutex);
2783 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2785 struct inode *inode = file->f_dentry->d_inode;
2786 struct ll_sb_info *sbi = ll_i2sbi(inode);
2787 struct ldlm_enqueue_info einfo = {
2788 .ei_type = LDLM_FLOCK,
2789 .ei_cb_cp = ldlm_flock_completion_ast,
2790 .ei_cbdata = file_lock,
2792 struct md_op_data *op_data;
2793 struct lustre_handle lockh = {0};
2794 ldlm_policy_data_t flock = {{0}};
2795 int fl_type = file_lock->fl_type;
2801 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2802 PFID(ll_inode2fid(inode)), file_lock);
2804 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2806 if (file_lock->fl_flags & FL_FLOCK) {
2807 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2808 /* flocks are whole-file locks */
2809 flock.l_flock.end = OFFSET_MAX;
2810 /* For flocks owner is determined by the local file desctiptor*/
2811 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2812 } else if (file_lock->fl_flags & FL_POSIX) {
2813 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2814 flock.l_flock.start = file_lock->fl_start;
2815 flock.l_flock.end = file_lock->fl_end;
2819 flock.l_flock.pid = file_lock->fl_pid;
2821 /* Somewhat ugly workaround for svc lockd.
2822 * lockd installs custom fl_lmops->lm_compare_owner that checks
2823 * for the fl_owner to be the same (which it always is on local node
2824 * I guess between lockd processes) and then compares pid.
2825 * As such we assign pid to the owner field to make it all work,
2826 * conflict with normal locks is unlikely since pid space and
2827 * pointer space for current->files are not intersecting */
2828 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2829 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2833 einfo.ei_mode = LCK_PR;
2836 /* An unlock request may or may not have any relation to
2837 * existing locks so we may not be able to pass a lock handle
2838 * via a normal ldlm_lock_cancel() request. The request may even
2839 * unlock a byte range in the middle of an existing lock. In
2840 * order to process an unlock request we need all of the same
2841 * information that is given with a normal read or write record
2842 * lock request. To avoid creating another ldlm unlock (cancel)
2843 * message we'll treat a LCK_NL flock request as an unlock. */
2844 einfo.ei_mode = LCK_NL;
2847 einfo.ei_mode = LCK_PW;
2850 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2865 flags = LDLM_FL_BLOCK_NOWAIT;
2871 flags = LDLM_FL_TEST_LOCK;
2874 CERROR("unknown fcntl lock command: %d\n", cmd);
2878 /* Save the old mode so that if the mode in the lock changes we
2879 * can decrement the appropriate reader or writer refcount. */
2880 file_lock->fl_type = einfo.ei_mode;
2882 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2883 LUSTRE_OPC_ANY, NULL);
2884 if (IS_ERR(op_data))
2885 RETURN(PTR_ERR(op_data));
2887 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2888 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2889 flock.l_flock.pid, flags, einfo.ei_mode,
2890 flock.l_flock.start, flock.l_flock.end);
2892 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2895 /* Restore the file lock type if not TEST lock. */
2896 if (!(flags & LDLM_FL_TEST_LOCK))
2897 file_lock->fl_type = fl_type;
2899 if ((file_lock->fl_flags & FL_FLOCK) &&
2900 (rc == 0 || file_lock->fl_type == F_UNLCK))
2901 rc2 = flock_lock_file_wait(file, file_lock);
2902 if ((file_lock->fl_flags & FL_POSIX) &&
2903 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2904 !(flags & LDLM_FL_TEST_LOCK))
2905 rc2 = posix_lock_file_wait(file, file_lock);
2907 if (rc2 && file_lock->fl_type != F_UNLCK) {
2908 einfo.ei_mode = LCK_NL;
2909 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2914 ll_finish_md_op_data(op_data);
2919 int ll_get_fid_by_name(struct inode *parent, const char *name,
2920 int namelen, struct lu_fid *fid)
2922 struct md_op_data *op_data = NULL;
2923 struct mdt_body *body;
2924 struct ptlrpc_request *req;
2928 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2929 LUSTRE_OPC_ANY, NULL);
2930 if (IS_ERR(op_data))
2931 RETURN(PTR_ERR(op_data));
2933 op_data->op_valid = OBD_MD_FLID;
2934 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2935 ll_finish_md_op_data(op_data);
2939 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2941 GOTO(out_req, rc = -EFAULT);
2943 *fid = body->mbo_fid1;
2945 ptlrpc_req_finished(req);
2949 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2950 const char *name, int namelen)
2952 struct dentry *dchild = NULL;
2953 struct inode *child_inode = NULL;
2954 struct md_op_data *op_data;
2955 struct ptlrpc_request *request = NULL;
2960 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2961 name, PFID(ll_inode2fid(parent)), mdtidx);
2963 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2964 0, LUSTRE_OPC_ANY, NULL);
2965 if (IS_ERR(op_data))
2966 RETURN(PTR_ERR(op_data));
2968 /* Get child FID first */
2969 qstr.hash = full_name_hash(name, namelen);
2972 dchild = d_lookup(file->f_dentry, &qstr);
2973 if (dchild != NULL) {
2974 if (dchild->d_inode != NULL) {
2975 child_inode = igrab(dchild->d_inode);
2976 if (child_inode != NULL) {
2977 mutex_lock(&child_inode->i_mutex);
2978 op_data->op_fid3 = *ll_inode2fid(child_inode);
2979 ll_invalidate_aliases(child_inode);
2984 rc = ll_get_fid_by_name(parent, name, namelen,
2990 if (!fid_is_sane(&op_data->op_fid3)) {
2991 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
2992 ll_get_fsname(parent->i_sb, NULL, 0), name,
2993 PFID(&op_data->op_fid3));
2994 GOTO(out_free, rc = -EINVAL);
2997 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
3002 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
3003 PFID(&op_data->op_fid3), mdtidx);
3004 GOTO(out_free, rc = 0);
3007 op_data->op_mds = mdtidx;
3008 op_data->op_cli_flags = CLI_MIGRATE;
3009 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
3010 namelen, name, namelen, &request);
3012 ll_update_times(request, parent);
3014 ptlrpc_req_finished(request);
3019 if (child_inode != NULL) {
3020 clear_nlink(child_inode);
3021 mutex_unlock(&child_inode->i_mutex);
3025 ll_finish_md_op_data(op_data);
3030 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3038 * test if some locks matching bits and l_req_mode are acquired
3039 * - bits can be in different locks
3040 * - if found clear the common lock bits in *bits
3041 * - the bits not found, are kept in *bits
3043 * \param bits [IN] searched lock bits [IN]
3044 * \param l_req_mode [IN] searched lock mode
3045 * \retval boolean, true iff all bits are found
3047 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3049 struct lustre_handle lockh;
3050 ldlm_policy_data_t policy;
3051 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3052 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3061 fid = &ll_i2info(inode)->lli_fid;
3062 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3063 ldlm_lockname[mode]);
3065 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3066 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3067 policy.l_inodebits.bits = *bits & (1 << i);
3068 if (policy.l_inodebits.bits == 0)
3071 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3072 &policy, mode, &lockh)) {
3073 struct ldlm_lock *lock;
3075 lock = ldlm_handle2lock(&lockh);
3078 ~(lock->l_policy_data.l_inodebits.bits);
3079 LDLM_LOCK_PUT(lock);
3081 *bits &= ~policy.l_inodebits.bits;
3088 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3089 struct lustre_handle *lockh, __u64 flags,
3092 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3097 fid = &ll_i2info(inode)->lli_fid;
3098 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3100 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3101 fid, LDLM_IBITS, &policy, mode, lockh);
3106 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3108 /* Already unlinked. Just update nlink and return success */
3109 if (rc == -ENOENT) {
3111 /* This path cannot be hit for regular files unless in
3112 * case of obscure races, so no need to to validate
3114 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3116 } else if (rc != 0) {
3117 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3118 "%s: revalidate FID "DFID" error: rc = %d\n",
3119 ll_get_fsname(inode->i_sb, NULL, 0),
3120 PFID(ll_inode2fid(inode)), rc);
3126 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3128 struct inode *inode = dentry->d_inode;
3129 struct ptlrpc_request *req = NULL;
3130 struct obd_export *exp;
3134 LASSERT(inode != NULL);
3136 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3137 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3139 exp = ll_i2mdexp(inode);
3141 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3142 * But under CMD case, it caused some lock issues, should be fixed
3143 * with new CMD ibits lock. See bug 12718 */
3144 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3145 struct lookup_intent oit = { .it_op = IT_GETATTR };
3146 struct md_op_data *op_data;
3148 if (ibits == MDS_INODELOCK_LOOKUP)
3149 oit.it_op = IT_LOOKUP;
3151 /* Call getattr by fid, so do not provide name at all. */
3152 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3153 dentry->d_inode, NULL, 0, 0,
3154 LUSTRE_OPC_ANY, NULL);
3155 if (IS_ERR(op_data))
3156 RETURN(PTR_ERR(op_data));
3158 rc = md_intent_lock(exp, op_data, &oit, &req,
3159 &ll_md_blocking_ast, 0);
3160 ll_finish_md_op_data(op_data);
3162 rc = ll_inode_revalidate_fini(inode, rc);
3166 rc = ll_revalidate_it_finish(req, &oit, dentry);
3168 ll_intent_release(&oit);
3172 /* Unlinked? Unhash dentry, so it is not picked up later by
3173 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3174 here to preserve get_cwd functionality on 2.6.
3176 if (!dentry->d_inode->i_nlink)
3177 d_lustre_invalidate(dentry, 0);
3179 ll_lookup_finish_locks(&oit, dentry);
3180 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3181 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3182 u64 valid = OBD_MD_FLGETATTR;
3183 struct md_op_data *op_data;
3186 if (S_ISREG(inode->i_mode)) {
3187 rc = ll_get_default_mdsize(sbi, &ealen);
3190 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3193 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3194 0, ealen, LUSTRE_OPC_ANY,
3196 if (IS_ERR(op_data))
3197 RETURN(PTR_ERR(op_data));
3199 op_data->op_valid = valid;
3200 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3201 * capa for this inode. Because we only keep capas of dirs
3203 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3204 ll_finish_md_op_data(op_data);
3206 rc = ll_inode_revalidate_fini(inode, rc);
3210 rc = ll_prep_inode(&inode, req, NULL, NULL);
3213 ptlrpc_req_finished(req);
3217 static int ll_merge_md_attr(struct inode *inode)
3219 struct cl_attr attr = { 0 };
3222 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3223 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3224 &attr, ll_md_blocking_ast);
3228 set_nlink(inode, attr.cat_nlink);
3229 inode->i_blocks = attr.cat_blocks;
3230 i_size_write(inode, attr.cat_size);
3232 ll_i2info(inode)->lli_atime = attr.cat_atime;
3233 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3234 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3240 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3242 struct inode *inode = dentry->d_inode;
3246 rc = __ll_inode_revalidate(dentry, ibits);
3250 /* if object isn't regular file, don't validate size */
3251 if (!S_ISREG(inode->i_mode)) {
3252 if (S_ISDIR(inode->i_mode) &&
3253 ll_i2info(inode)->lli_lsm_md != NULL) {
3254 rc = ll_merge_md_attr(inode);
3259 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3260 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3261 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3263 /* In case of restore, the MDT has the right size and has
3264 * already send it back without granting the layout lock,
3265 * inode is up-to-date so glimpse is useless.
3266 * Also to glimpse we need the layout, in case of a running
3267 * restore the MDT holds the layout lock so the glimpse will
3268 * block up to the end of restore (getattr will block)
3270 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3271 rc = ll_glimpse_size(inode);
3276 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3278 struct inode *inode = de->d_inode;
3279 struct ll_sb_info *sbi = ll_i2sbi(inode);
3280 struct ll_inode_info *lli = ll_i2info(inode);
3283 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3284 MDS_INODELOCK_LOOKUP);
3285 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3290 stat->dev = inode->i_sb->s_dev;
3291 if (ll_need_32bit_api(sbi))
3292 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3294 stat->ino = inode->i_ino;
3295 stat->mode = inode->i_mode;
3296 stat->uid = inode->i_uid;
3297 stat->gid = inode->i_gid;
3298 stat->rdev = inode->i_rdev;
3299 stat->atime = inode->i_atime;
3300 stat->mtime = inode->i_mtime;
3301 stat->ctime = inode->i_ctime;
3302 stat->blksize = 1 << inode->i_blkbits;
3304 stat->nlink = inode->i_nlink;
3305 stat->size = i_size_read(inode);
3306 stat->blocks = inode->i_blocks;
3311 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3312 __u64 start, __u64 len)
3316 struct ll_user_fiemap *fiemap;
3317 unsigned int extent_count = fieinfo->fi_extents_max;
3319 num_bytes = sizeof(*fiemap) + (extent_count *
3320 sizeof(struct ll_fiemap_extent));
3321 OBD_ALLOC_LARGE(fiemap, num_bytes);
3326 fiemap->fm_flags = fieinfo->fi_flags;
3327 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3328 fiemap->fm_start = start;
3329 fiemap->fm_length = len;
3330 if (extent_count > 0)
3331 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3332 sizeof(struct ll_fiemap_extent));
3334 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3336 fieinfo->fi_flags = fiemap->fm_flags;
3337 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3338 if (extent_count > 0)
3339 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3340 fiemap->fm_mapped_extents *
3341 sizeof(struct ll_fiemap_extent));
3343 OBD_FREE_LARGE(fiemap, num_bytes);
3347 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3349 struct ll_inode_info *lli = ll_i2info(inode);
3350 struct posix_acl *acl = NULL;
3353 spin_lock(&lli->lli_lock);
3354 /* VFS' acl_permission_check->check_acl will release the refcount */
3355 acl = posix_acl_dup(lli->lli_posix_acl);
3356 spin_unlock(&lli->lli_lock);
3361 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3363 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3364 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3366 ll_check_acl(struct inode *inode, int mask)
3369 # ifdef CONFIG_FS_POSIX_ACL
3370 struct posix_acl *acl;
3374 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3375 if (flags & IPERM_FLAG_RCU)
3378 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3383 rc = posix_acl_permission(inode, acl, mask);
3384 posix_acl_release(acl);
3387 # else /* !CONFIG_FS_POSIX_ACL */
3389 # endif /* CONFIG_FS_POSIX_ACL */
3391 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3393 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3394 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3396 # ifdef HAVE_INODE_PERMISION_2ARGS
3397 int ll_inode_permission(struct inode *inode, int mask)
3399 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3404 struct ll_sb_info *sbi;
3405 struct root_squash_info *squash;
3406 struct cred *cred = NULL;
3407 const struct cred *old_cred = NULL;
3409 bool squash_id = false;
3412 #ifdef MAY_NOT_BLOCK
3413 if (mask & MAY_NOT_BLOCK)
3415 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3416 if (flags & IPERM_FLAG_RCU)
3420 /* as root inode are NOT getting validated in lookup operation,
3421 * need to do it before permission check. */
3423 if (inode == inode->i_sb->s_root->d_inode) {
3424 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3425 MDS_INODELOCK_LOOKUP);
3430 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3431 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3433 /* squash fsuid/fsgid if needed */
3434 sbi = ll_i2sbi(inode);
3435 squash = &sbi->ll_squash;
3436 if (unlikely(squash->rsi_uid != 0 &&
3437 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3438 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3442 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3443 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3444 squash->rsi_uid, squash->rsi_gid);
3446 /* update current process's credentials
3447 * and FS capability */
3448 cred = prepare_creds();
3452 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3453 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3454 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3455 if ((1 << cap) & CFS_CAP_FS_MASK)
3456 cap_lower(cred->cap_effective, cap);
3458 old_cred = override_creds(cred);
3461 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3463 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3464 rc = lustre_check_remote_perm(inode, mask);
3466 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3468 /* restore current process's credentials and FS capability */
3470 revert_creds(old_cred);
3477 /* -o localflock - only provides locally consistent flock locks */
3478 struct file_operations ll_file_operations = {
3479 .read = ll_file_read,
3480 .aio_read = ll_file_aio_read,
3481 .write = ll_file_write,
3482 .aio_write = ll_file_aio_write,
3483 .unlocked_ioctl = ll_file_ioctl,
3484 .open = ll_file_open,
3485 .release = ll_file_release,
3486 .mmap = ll_file_mmap,
3487 .llseek = ll_file_seek,
3488 .splice_read = ll_file_splice_read,
3493 struct file_operations ll_file_operations_flock = {
3494 .read = ll_file_read,
3495 .aio_read = ll_file_aio_read,
3496 .write = ll_file_write,
3497 .aio_write = ll_file_aio_write,
3498 .unlocked_ioctl = ll_file_ioctl,
3499 .open = ll_file_open,
3500 .release = ll_file_release,
3501 .mmap = ll_file_mmap,
3502 .llseek = ll_file_seek,
3503 .splice_read = ll_file_splice_read,
3506 .flock = ll_file_flock,
3507 .lock = ll_file_flock
3510 /* These are for -o noflock - to return ENOSYS on flock calls */
3511 struct file_operations ll_file_operations_noflock = {
3512 .read = ll_file_read,
3513 .aio_read = ll_file_aio_read,
3514 .write = ll_file_write,
3515 .aio_write = ll_file_aio_write,
3516 .unlocked_ioctl = ll_file_ioctl,
3517 .open = ll_file_open,
3518 .release = ll_file_release,
3519 .mmap = ll_file_mmap,
3520 .llseek = ll_file_seek,
3521 .splice_read = ll_file_splice_read,
3524 .flock = ll_file_noflock,
3525 .lock = ll_file_noflock
3528 struct inode_operations ll_file_inode_operations = {
3529 .setattr = ll_setattr,
3530 .getattr = ll_getattr,
3531 .permission = ll_inode_permission,
3532 .setxattr = ll_setxattr,
3533 .getxattr = ll_getxattr,
3534 .listxattr = ll_listxattr,
3535 .removexattr = ll_removexattr,
3536 .fiemap = ll_fiemap,
3537 #ifdef HAVE_IOP_GET_ACL
3538 .get_acl = ll_get_acl,
3542 /* dynamic ioctl number support routins */
3543 static struct llioc_ctl_data {
3544 struct rw_semaphore ioc_sem;
3545 struct list_head ioc_head;
3547 __RWSEM_INITIALIZER(llioc.ioc_sem),
3548 LIST_HEAD_INIT(llioc.ioc_head)
3553 struct list_head iocd_list;
3554 unsigned int iocd_size;
3555 llioc_callback_t iocd_cb;
3556 unsigned int iocd_count;
3557 unsigned int iocd_cmd[0];
3560 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3563 struct llioc_data *in_data = NULL;
3566 if (cb == NULL || cmd == NULL ||
3567 count > LLIOC_MAX_CMD || count < 0)
3570 size = sizeof(*in_data) + count * sizeof(unsigned int);
3571 OBD_ALLOC(in_data, size);
3572 if (in_data == NULL)
3575 memset(in_data, 0, sizeof(*in_data));
3576 in_data->iocd_size = size;
3577 in_data->iocd_cb = cb;
3578 in_data->iocd_count = count;
3579 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3581 down_write(&llioc.ioc_sem);
3582 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3583 up_write(&llioc.ioc_sem);
3588 void ll_iocontrol_unregister(void *magic)
3590 struct llioc_data *tmp;
3595 down_write(&llioc.ioc_sem);
3596 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3598 unsigned int size = tmp->iocd_size;
3600 list_del(&tmp->iocd_list);
3601 up_write(&llioc.ioc_sem);
3603 OBD_FREE(tmp, size);
3607 up_write(&llioc.ioc_sem);
3609 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3612 EXPORT_SYMBOL(ll_iocontrol_register);
3613 EXPORT_SYMBOL(ll_iocontrol_unregister);
3615 static enum llioc_iter
3616 ll_iocontrol_call(struct inode *inode, struct file *file,
3617 unsigned int cmd, unsigned long arg, int *rcp)
3619 enum llioc_iter ret = LLIOC_CONT;
3620 struct llioc_data *data;
3621 int rc = -EINVAL, i;
3623 down_read(&llioc.ioc_sem);
3624 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3625 for (i = 0; i < data->iocd_count; i++) {
3626 if (cmd != data->iocd_cmd[i])
3629 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3633 if (ret == LLIOC_STOP)
3636 up_read(&llioc.ioc_sem);
3643 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3645 struct ll_inode_info *lli = ll_i2info(inode);
3646 struct cl_env_nest nest;
3651 if (lli->lli_clob == NULL)
3654 env = cl_env_nested_get(&nest);
3656 RETURN(PTR_ERR(env));
3658 result = cl_conf_set(env, lli->lli_clob, conf);
3659 cl_env_nested_put(&nest, env);
3661 if (conf->coc_opc == OBJECT_CONF_SET) {
3662 struct ldlm_lock *lock = conf->coc_lock;
3664 LASSERT(lock != NULL);
3665 LASSERT(ldlm_has_layout(lock));
3667 struct lustre_md *md = conf->u.coc_md;
3668 __u32 gen = LL_LAYOUT_GEN_EMPTY;
3670 /* it can only be allowed to match after layout is
3671 * applied to inode otherwise false layout would be
3672 * seen. Applying layout shoud happen before dropping
3673 * the intent lock. */
3674 ldlm_lock_allow_match(lock);
3676 lli->lli_has_smd = lsm_has_objects(md->lsm);
3677 if (md->lsm != NULL)
3678 gen = md->lsm->lsm_layout_gen;
3681 DFID ": layout version change: %u -> %u\n",
3682 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3684 ll_layout_version_set(lli, gen);
3690 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3691 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3694 struct ll_sb_info *sbi = ll_i2sbi(inode);
3695 struct obd_capa *oc;
3696 struct ptlrpc_request *req;
3697 struct mdt_body *body;
3704 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3705 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3706 lock->l_lvb_data, lock->l_lvb_len);
3708 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3711 /* if layout lock was granted right away, the layout is returned
3712 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3713 * blocked and then granted via completion ast, we have to fetch
3714 * layout here. Please note that we can't use the LVB buffer in
3715 * completion AST because it doesn't have a large enough buffer */
3716 oc = ll_mdscapa_get(inode);
3717 rc = ll_get_default_mdsize(sbi, &lmmsize);
3719 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3720 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3726 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3728 GOTO(out, rc = -EPROTO);
3730 lmmsize = body->mbo_eadatasize;
3731 if (lmmsize == 0) /* empty layout */
3734 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3736 GOTO(out, rc = -EFAULT);
3738 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3739 if (lvbdata == NULL)
3740 GOTO(out, rc = -ENOMEM);
3742 memcpy(lvbdata, lmm, lmmsize);
3743 lock_res_and_lock(lock);
3744 if (lock->l_lvb_data != NULL)
3745 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3747 lock->l_lvb_data = lvbdata;
3748 lock->l_lvb_len = lmmsize;
3749 unlock_res_and_lock(lock);
3754 ptlrpc_req_finished(req);
3759 * Apply the layout to the inode. Layout lock is held and will be released
3762 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3763 struct inode *inode, __u32 *gen, bool reconf)
3765 struct ll_inode_info *lli = ll_i2info(inode);
3766 struct ll_sb_info *sbi = ll_i2sbi(inode);
3767 struct ldlm_lock *lock;
3768 struct lustre_md md = { NULL };
3769 struct cl_object_conf conf;
3772 bool wait_layout = false;
3775 LASSERT(lustre_handle_is_used(lockh));
3777 lock = ldlm_handle2lock(lockh);
3778 LASSERT(lock != NULL);
3779 LASSERT(ldlm_has_layout(lock));
3781 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d",
3782 PFID(&lli->lli_fid), inode, reconf);
3784 /* in case this is a caching lock and reinstate with new inode */
3785 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3787 lock_res_and_lock(lock);
3788 lvb_ready = ldlm_is_lvb_ready(lock);
3789 unlock_res_and_lock(lock);
3790 /* checking lvb_ready is racy but this is okay. The worst case is
3791 * that multi processes may configure the file on the same time. */
3793 if (lvb_ready || !reconf) {
3796 /* layout_gen must be valid if layout lock is not
3797 * cancelled and stripe has already set */
3798 *gen = ll_layout_version_get(lli);
3804 rc = ll_layout_fetch(inode, lock);
3808 /* for layout lock, lmm is returned in lock's lvb.
3809 * lvb_data is immutable if the lock is held so it's safe to access it
3810 * without res lock. See the description in ldlm_lock_decref_internal()
3811 * for the condition to free lvb_data of layout lock */
3812 if (lock->l_lvb_data != NULL) {
3813 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3814 lock->l_lvb_data, lock->l_lvb_len);
3816 *gen = LL_LAYOUT_GEN_EMPTY;
3818 *gen = md.lsm->lsm_layout_gen;
3821 CERROR("%s: file "DFID" unpackmd error: %d\n",
3822 ll_get_fsname(inode->i_sb, NULL, 0),
3823 PFID(&lli->lli_fid), rc);
3829 /* set layout to file. Unlikely this will fail as old layout was
3830 * surely eliminated */
3831 memset(&conf, 0, sizeof conf);
3832 conf.coc_opc = OBJECT_CONF_SET;
3833 conf.coc_inode = inode;
3834 conf.coc_lock = lock;
3835 conf.u.coc_md = &md;
3836 rc = ll_layout_conf(inode, &conf);
3839 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3841 /* refresh layout failed, need to wait */
3842 wait_layout = rc == -EBUSY;
3846 LDLM_LOCK_PUT(lock);
3847 ldlm_lock_decref(lockh, mode);
3849 /* wait for IO to complete if it's still being used. */
3851 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3852 ll_get_fsname(inode->i_sb, NULL, 0),
3853 PFID(&lli->lli_fid), inode);
3855 memset(&conf, 0, sizeof conf);
3856 conf.coc_opc = OBJECT_CONF_WAIT;
3857 conf.coc_inode = inode;
3858 rc = ll_layout_conf(inode, &conf);
3862 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3863 ll_get_fsname(inode->i_sb, NULL, 0),
3864 PFID(&lli->lli_fid), rc);
3870 * This function checks if there exists a LAYOUT lock on the client side,
3871 * or enqueues it if it doesn't have one in cache.
3873 * This function will not hold layout lock so it may be revoked any time after
3874 * this function returns. Any operations depend on layout should be redone
3877 * This function should be called before lov_io_init() to get an uptodate
3878 * layout version, the caller should save the version number and after IO
3879 * is finished, this function should be called again to verify that layout
3880 * is not changed during IO time.
3882 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3884 struct ll_inode_info *lli = ll_i2info(inode);
3885 struct ll_sb_info *sbi = ll_i2sbi(inode);
3886 struct md_op_data *op_data;
3887 struct lookup_intent it;
3888 struct lustre_handle lockh;
3890 struct ldlm_enqueue_info einfo = {
3891 .ei_type = LDLM_IBITS,
3893 .ei_cb_bl = &ll_md_blocking_ast,
3894 .ei_cb_cp = &ldlm_completion_ast,
3899 *gen = ll_layout_version_get(lli);
3900 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
3904 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3905 LASSERT(S_ISREG(inode->i_mode));
3907 /* take layout lock mutex to enqueue layout lock exclusively. */
3908 mutex_lock(&lli->lli_layout_mutex);
3911 /* mostly layout lock is caching on the local side, so try to match
3912 * it before grabbing layout lock mutex. */
3913 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3914 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3915 if (mode != 0) { /* hit cached lock */
3916 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3920 mutex_unlock(&lli->lli_layout_mutex);
3924 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3925 0, 0, LUSTRE_OPC_ANY, NULL);
3926 if (IS_ERR(op_data)) {
3927 mutex_unlock(&lli->lli_layout_mutex);
3928 RETURN(PTR_ERR(op_data));
3931 /* have to enqueue one */
3932 memset(&it, 0, sizeof(it));
3933 it.it_op = IT_LAYOUT;
3934 lockh.cookie = 0ULL;
3936 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3937 ll_get_fsname(inode->i_sb, NULL, 0),
3938 PFID(&lli->lli_fid), inode);
3940 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3941 if (it.d.lustre.it_data != NULL)
3942 ptlrpc_req_finished(it.d.lustre.it_data);
3943 it.d.lustre.it_data = NULL;
3945 ll_finish_md_op_data(op_data);
3947 mode = it.d.lustre.it_lock_mode;
3948 it.d.lustre.it_lock_mode = 0;
3949 ll_intent_drop_lock(&it);
3952 /* set lock data in case this is a new lock */
3953 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3954 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3958 mutex_unlock(&lli->lli_layout_mutex);
3964 * This function send a restore request to the MDT
3966 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3968 struct hsm_user_request *hur;
3972 len = sizeof(struct hsm_user_request) +
3973 sizeof(struct hsm_user_item);
3974 OBD_ALLOC(hur, len);
3978 hur->hur_request.hr_action = HUA_RESTORE;
3979 hur->hur_request.hr_archive_id = 0;
3980 hur->hur_request.hr_flags = 0;
3981 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3982 sizeof(hur->hur_user_item[0].hui_fid));
3983 hur->hur_user_item[0].hui_extent.offset = offset;
3984 hur->hur_user_item[0].hui_extent.length = length;
3985 hur->hur_request.hr_itemcount = 1;
3986 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,