4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
50 #include <lustre_ioctl.h>
52 #include "cl_object.h"
55 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
57 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
60 static enum llioc_iter
61 ll_iocontrol_call(struct inode *inode, struct file *file,
62 unsigned int cmd, unsigned long arg, int *rcp);
64 static struct ll_file_data *ll_file_data_get(void)
66 struct ll_file_data *fd;
68 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
72 fd->fd_write_failed = false;
77 static void ll_file_data_put(struct ll_file_data *fd)
80 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
83 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
84 struct lustre_handle *fh)
86 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
87 op_data->op_attr.ia_mode = inode->i_mode;
88 op_data->op_attr.ia_atime = inode->i_atime;
89 op_data->op_attr.ia_mtime = inode->i_mtime;
90 op_data->op_attr.ia_ctime = inode->i_ctime;
91 op_data->op_attr.ia_size = i_size_read(inode);
92 op_data->op_attr_blocks = inode->i_blocks;
93 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
95 op_data->op_handle = *fh;
96 op_data->op_capa1 = ll_mdscapa_get(inode);
98 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
99 op_data->op_bias |= MDS_DATA_MODIFIED;
103 * Packs all the attributes into @op_data for the CLOSE rpc.
105 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
106 struct obd_client_handle *och)
110 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
111 ATTR_MTIME | ATTR_MTIME_SET |
112 ATTR_CTIME | ATTR_CTIME_SET;
114 if (!(och->och_flags & FMODE_WRITE))
117 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
120 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
121 ll_prep_md_op_data(op_data, inode, NULL, NULL,
122 0, 0, LUSTRE_OPC_ANY, NULL);
126 static int ll_close_inode_openhandle(struct obd_export *md_exp,
128 struct obd_client_handle *och,
129 const __u64 *data_version)
131 struct obd_export *exp = ll_i2mdexp(inode);
132 struct md_op_data *op_data;
133 struct ptlrpc_request *req = NULL;
134 struct obd_device *obd = class_exp2obd(exp);
140 * XXX: in case of LMV, is this correct to access
143 CERROR("Invalid MDC connection handle "LPX64"\n",
144 ll_i2mdexp(inode)->exp_handle.h_cookie);
148 OBD_ALLOC_PTR(op_data);
150 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
152 ll_prepare_close(inode, op_data, och);
153 if (data_version != NULL) {
154 /* Pass in data_version implies release. */
155 op_data->op_bias |= MDS_HSM_RELEASE;
156 op_data->op_data_version = *data_version;
157 op_data->op_lease_handle = och->och_lease_handle;
158 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
161 rc = md_close(md_exp, op_data, och->och_mod, &req);
163 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
164 ll_i2mdexp(inode)->exp_obd->obd_name,
165 PFID(ll_inode2fid(inode)), rc);
168 /* DATA_MODIFIED flag was successfully sent on close, cancel data
169 * modification flag. */
170 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
171 struct ll_inode_info *lli = ll_i2info(inode);
173 spin_lock(&lli->lli_lock);
174 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
175 spin_unlock(&lli->lli_lock);
178 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
179 struct mdt_body *body;
180 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
181 if (!(body->mbo_valid & OBD_MD_FLRELEASED))
185 ll_finish_md_op_data(op_data);
189 md_clear_open_replay_data(md_exp, och);
190 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
193 if (req) /* This is close request */
194 ptlrpc_req_finished(req);
198 int ll_md_real_close(struct inode *inode, fmode_t fmode)
200 struct ll_inode_info *lli = ll_i2info(inode);
201 struct obd_client_handle **och_p;
202 struct obd_client_handle *och;
207 if (fmode & FMODE_WRITE) {
208 och_p = &lli->lli_mds_write_och;
209 och_usecount = &lli->lli_open_fd_write_count;
210 } else if (fmode & FMODE_EXEC) {
211 och_p = &lli->lli_mds_exec_och;
212 och_usecount = &lli->lli_open_fd_exec_count;
214 LASSERT(fmode & FMODE_READ);
215 och_p = &lli->lli_mds_read_och;
216 och_usecount = &lli->lli_open_fd_read_count;
219 mutex_lock(&lli->lli_och_mutex);
220 if (*och_usecount > 0) {
221 /* There are still users of this handle, so skip
223 mutex_unlock(&lli->lli_och_mutex);
229 mutex_unlock(&lli->lli_och_mutex);
232 /* There might be a race and this handle may already
234 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
241 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
244 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
245 struct ll_inode_info *lli = ll_i2info(inode);
249 /* clear group lock, if present */
250 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
251 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
253 if (fd->fd_lease_och != NULL) {
256 /* Usually the lease is not released when the
257 * application crashed, we need to release here. */
258 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
259 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
260 PFID(&lli->lli_fid), rc, lease_broken);
262 fd->fd_lease_och = NULL;
265 if (fd->fd_och != NULL) {
266 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
271 /* Let's see if we have good enough OPEN lock on the file and if
272 we can skip talking to MDS */
273 if (file->f_dentry->d_inode) { /* Can this ever be false? */
275 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
276 struct lustre_handle lockh;
277 struct inode *inode = file->f_dentry->d_inode;
278 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
280 mutex_lock(&lli->lli_och_mutex);
281 if (fd->fd_omode & FMODE_WRITE) {
283 LASSERT(lli->lli_open_fd_write_count);
284 lli->lli_open_fd_write_count--;
285 } else if (fd->fd_omode & FMODE_EXEC) {
287 LASSERT(lli->lli_open_fd_exec_count);
288 lli->lli_open_fd_exec_count--;
291 LASSERT(lli->lli_open_fd_read_count);
292 lli->lli_open_fd_read_count--;
294 mutex_unlock(&lli->lli_och_mutex);
296 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
297 LDLM_IBITS, &policy, lockmode,
299 rc = ll_md_real_close(file->f_dentry->d_inode,
303 CERROR("released file has negative dentry: file = %p, "
304 "dentry = %p, name = %s\n",
305 file, file->f_dentry, file->f_dentry->d_name.name);
309 LUSTRE_FPRIVATE(file) = NULL;
310 ll_file_data_put(fd);
311 ll_capa_close(inode);
316 /* While this returns an error code, fput() the caller does not, so we need
317 * to make every effort to clean up all of our state here. Also, applications
318 * rarely check close errors and even if an error is returned they will not
319 * re-try the close call.
321 int ll_file_release(struct inode *inode, struct file *file)
323 struct ll_file_data *fd;
324 struct ll_sb_info *sbi = ll_i2sbi(inode);
325 struct ll_inode_info *lli = ll_i2info(inode);
329 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
330 PFID(ll_inode2fid(inode)), inode);
332 #ifdef CONFIG_FS_POSIX_ACL
333 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
334 inode == inode->i_sb->s_root->d_inode) {
335 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
338 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
339 fd->fd_flags &= ~LL_FILE_RMTACL;
340 rct_del(&sbi->ll_rct, current_pid());
341 et_search_free(&sbi->ll_et, current_pid());
346 if (inode->i_sb->s_root != file->f_dentry)
347 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
348 fd = LUSTRE_FPRIVATE(file);
351 /* The last ref on @file, maybe not the the owner pid of statahead,
352 * because parent and child process can share the same file handle. */
353 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
354 ll_deauthorize_statahead(inode, fd);
356 if (inode->i_sb->s_root == file->f_dentry) {
357 LUSTRE_FPRIVATE(file) = NULL;
358 ll_file_data_put(fd);
362 if (!S_ISDIR(inode->i_mode)) {
363 if (lli->lli_clob != NULL)
364 lov_read_and_clear_async_rc(lli->lli_clob);
365 lli->lli_async_rc = 0;
368 rc = ll_md_close(sbi->ll_md_exp, inode, file);
370 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
371 libcfs_debug_dumplog();
376 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
377 struct lookup_intent *itp)
379 struct dentry *de = file->f_dentry;
380 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
381 struct dentry *parent = de->d_parent;
382 const char *name = NULL;
384 struct md_op_data *op_data;
385 struct ptlrpc_request *req = NULL;
389 LASSERT(parent != NULL);
390 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
392 /* if server supports open-by-fid, or file name is invalid, don't pack
393 * name in open request */
394 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
395 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
396 name = de->d_name.name;
397 len = de->d_name.len;
400 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
401 name, len, 0, LUSTRE_OPC_ANY, NULL);
403 RETURN(PTR_ERR(op_data));
404 op_data->op_data = lmm;
405 op_data->op_data_size = lmmsize;
407 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
408 &ll_md_blocking_ast, 0);
409 ll_finish_md_op_data(op_data);
411 /* reason for keep own exit path - don`t flood log
412 * with messages with -ESTALE errors.
414 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
415 it_open_error(DISP_OPEN_OPEN, itp))
417 ll_release_openhandle(de, itp);
421 if (it_disposition(itp, DISP_LOOKUP_NEG))
422 GOTO(out, rc = -ENOENT);
424 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
425 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
426 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
430 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
431 if (!rc && itp->d.lustre.it_lock_mode)
432 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
435 ptlrpc_req_finished(req);
436 ll_intent_drop_lock(itp);
441 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
442 struct obd_client_handle *och)
444 struct ptlrpc_request *req = it->d.lustre.it_data;
445 struct mdt_body *body;
447 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
448 och->och_fh = body->mbo_handle;
449 och->och_fid = body->mbo_fid1;
450 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
451 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
452 och->och_flags = it->it_flags;
454 return md_set_open_replay_data(md_exp, och, it);
457 static int ll_local_open(struct file *file, struct lookup_intent *it,
458 struct ll_file_data *fd, struct obd_client_handle *och)
460 struct inode *inode = file->f_dentry->d_inode;
463 LASSERT(!LUSTRE_FPRIVATE(file));
470 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
475 LUSTRE_FPRIVATE(file) = fd;
476 ll_readahead_init(inode, &fd->fd_ras);
477 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
479 /* ll_cl_context initialize */
480 rwlock_init(&fd->fd_lock);
481 INIT_LIST_HEAD(&fd->fd_lccs);
486 /* Open a file, and (for the very first open) create objects on the OSTs at
487 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
488 * creation or open until ll_lov_setstripe() ioctl is called.
490 * If we already have the stripe MD locally then we don't request it in
491 * md_open(), by passing a lmm_size = 0.
493 * It is up to the application to ensure no other processes open this file
494 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
495 * used. We might be able to avoid races of that sort by getting lli_open_sem
496 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
497 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
499 int ll_file_open(struct inode *inode, struct file *file)
501 struct ll_inode_info *lli = ll_i2info(inode);
502 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
503 .it_flags = file->f_flags };
504 struct obd_client_handle **och_p = NULL;
505 __u64 *och_usecount = NULL;
506 struct ll_file_data *fd;
510 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
511 PFID(ll_inode2fid(inode)), inode, file->f_flags);
513 it = file->private_data; /* XXX: compat macro */
514 file->private_data = NULL; /* prevent ll_local_open assertion */
516 fd = ll_file_data_get();
518 GOTO(out_openerr, rc = -ENOMEM);
521 if (S_ISDIR(inode->i_mode))
522 ll_authorize_statahead(inode, fd);
524 if (inode->i_sb->s_root == file->f_dentry) {
525 LUSTRE_FPRIVATE(file) = fd;
529 if (!it || !it->d.lustre.it_disposition) {
530 /* Convert f_flags into access mode. We cannot use file->f_mode,
531 * because everything but O_ACCMODE mask was stripped from
533 if ((oit.it_flags + 1) & O_ACCMODE)
535 if (file->f_flags & O_TRUNC)
536 oit.it_flags |= FMODE_WRITE;
538 /* kernel only call f_op->open in dentry_open. filp_open calls
539 * dentry_open after call to open_namei that checks permissions.
540 * Only nfsd_open call dentry_open directly without checking
541 * permissions and because of that this code below is safe. */
542 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
543 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
545 /* We do not want O_EXCL here, presumably we opened the file
546 * already? XXX - NFS implications? */
547 oit.it_flags &= ~O_EXCL;
549 /* bug20584, if "it_flags" contains O_CREAT, the file will be
550 * created if necessary, then "IT_CREAT" should be set to keep
551 * consistent with it */
552 if (oit.it_flags & O_CREAT)
553 oit.it_op |= IT_CREAT;
559 /* Let's see if we have file open on MDS already. */
560 if (it->it_flags & FMODE_WRITE) {
561 och_p = &lli->lli_mds_write_och;
562 och_usecount = &lli->lli_open_fd_write_count;
563 } else if (it->it_flags & FMODE_EXEC) {
564 och_p = &lli->lli_mds_exec_och;
565 och_usecount = &lli->lli_open_fd_exec_count;
567 och_p = &lli->lli_mds_read_och;
568 och_usecount = &lli->lli_open_fd_read_count;
571 mutex_lock(&lli->lli_och_mutex);
572 if (*och_p) { /* Open handle is present */
573 if (it_disposition(it, DISP_OPEN_OPEN)) {
574 /* Well, there's extra open request that we do not need,
575 let's close it somehow. This will decref request. */
576 rc = it_open_error(DISP_OPEN_OPEN, it);
578 mutex_unlock(&lli->lli_och_mutex);
579 GOTO(out_openerr, rc);
582 ll_release_openhandle(file->f_dentry, it);
586 rc = ll_local_open(file, it, fd, NULL);
589 mutex_unlock(&lli->lli_och_mutex);
590 GOTO(out_openerr, rc);
593 LASSERT(*och_usecount == 0);
594 if (!it->d.lustre.it_disposition) {
595 /* We cannot just request lock handle now, new ELC code
596 means that one of other OPEN locks for this file
597 could be cancelled, and since blocking ast handler
598 would attempt to grab och_mutex as well, that would
599 result in a deadlock */
600 mutex_unlock(&lli->lli_och_mutex);
602 * Normally called under two situations:
604 * 2. A race/condition on MDS resulting in no open
605 * handle to be returned from LOOKUP|OPEN request,
606 * for example if the target entry was a symlink.
608 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
610 * Always specify MDS_OPEN_BY_FID because we don't want
611 * to get file with different fid.
613 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
614 rc = ll_intent_file_open(file, NULL, 0, it);
616 GOTO(out_openerr, rc);
620 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
622 GOTO(out_och_free, rc = -ENOMEM);
626 /* md_intent_lock() didn't get a request ref if there was an
627 * open error, so don't do cleanup on the request here
629 /* XXX (green): Should not we bail out on any error here, not
630 * just open error? */
631 rc = it_open_error(DISP_OPEN_OPEN, it);
633 GOTO(out_och_free, rc);
635 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
636 "inode %p: disposition %x, status %d\n", inode,
637 it_disposition(it, ~0), it->d.lustre.it_status);
639 rc = ll_local_open(file, it, fd, *och_p);
641 GOTO(out_och_free, rc);
643 mutex_unlock(&lli->lli_och_mutex);
646 /* Must do this outside lli_och_mutex lock to prevent deadlock where
647 different kind of OPEN lock for this same inode gets cancelled
648 by ldlm_cancel_lru */
649 if (!S_ISREG(inode->i_mode))
650 GOTO(out_och_free, rc);
654 if (!lli->lli_has_smd &&
655 (cl_is_lov_delay_create(file->f_flags) ||
656 (file->f_mode & FMODE_WRITE) == 0)) {
657 CDEBUG(D_INODE, "object creation was delayed\n");
658 GOTO(out_och_free, rc);
660 cl_lov_delay_create_clear(&file->f_flags);
661 GOTO(out_och_free, rc);
665 if (och_p && *och_p) {
666 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
667 *och_p = NULL; /* OBD_FREE writes some magic there */
670 mutex_unlock(&lli->lli_och_mutex);
673 if (lli->lli_opendir_key == fd)
674 ll_deauthorize_statahead(inode, fd);
676 ll_file_data_put(fd);
678 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
681 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
682 ptlrpc_req_finished(it->d.lustre.it_data);
683 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
689 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
690 struct ldlm_lock_desc *desc, void *data, int flag)
693 struct lustre_handle lockh;
697 case LDLM_CB_BLOCKING:
698 ldlm_lock2handle(lock, &lockh);
699 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
701 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
705 case LDLM_CB_CANCELING:
713 * Acquire a lease and open the file.
715 static struct obd_client_handle *
716 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
719 struct lookup_intent it = { .it_op = IT_OPEN };
720 struct ll_sb_info *sbi = ll_i2sbi(inode);
721 struct md_op_data *op_data;
722 struct ptlrpc_request *req = NULL;
723 struct lustre_handle old_handle = { 0 };
724 struct obd_client_handle *och = NULL;
729 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
730 RETURN(ERR_PTR(-EINVAL));
733 struct ll_inode_info *lli = ll_i2info(inode);
734 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
735 struct obd_client_handle **och_p;
738 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
739 RETURN(ERR_PTR(-EPERM));
741 /* Get the openhandle of the file */
743 mutex_lock(&lli->lli_och_mutex);
744 if (fd->fd_lease_och != NULL) {
745 mutex_unlock(&lli->lli_och_mutex);
749 if (fd->fd_och == NULL) {
750 if (file->f_mode & FMODE_WRITE) {
751 LASSERT(lli->lli_mds_write_och != NULL);
752 och_p = &lli->lli_mds_write_och;
753 och_usecount = &lli->lli_open_fd_write_count;
755 LASSERT(lli->lli_mds_read_och != NULL);
756 och_p = &lli->lli_mds_read_och;
757 och_usecount = &lli->lli_open_fd_read_count;
759 if (*och_usecount == 1) {
766 mutex_unlock(&lli->lli_och_mutex);
767 if (rc < 0) /* more than 1 opener */
770 LASSERT(fd->fd_och != NULL);
771 old_handle = fd->fd_och->och_fh;
776 RETURN(ERR_PTR(-ENOMEM));
778 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
779 LUSTRE_OPC_ANY, NULL);
781 GOTO(out, rc = PTR_ERR(op_data));
783 /* To tell the MDT this openhandle is from the same owner */
784 op_data->op_handle = old_handle;
786 it.it_flags = fmode | open_flags;
787 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
788 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
789 &ll_md_blocking_lease_ast,
790 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
791 * it can be cancelled which may mislead applications that the lease is
793 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
794 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
795 * doesn't deal with openhandle, so normal openhandle will be leaked. */
796 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
797 ll_finish_md_op_data(op_data);
798 ptlrpc_req_finished(req);
800 GOTO(out_release_it, rc);
802 if (it_disposition(&it, DISP_LOOKUP_NEG))
803 GOTO(out_release_it, rc = -ENOENT);
805 rc = it_open_error(DISP_OPEN_OPEN, &it);
807 GOTO(out_release_it, rc);
809 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
810 ll_och_fill(sbi->ll_md_exp, &it, och);
812 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
813 GOTO(out_close, rc = -EOPNOTSUPP);
815 /* already get lease, handle lease lock */
816 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
817 if (it.d.lustre.it_lock_mode == 0 ||
818 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
819 /* open lock must return for lease */
820 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
821 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
822 it.d.lustre.it_lock_bits);
823 GOTO(out_close, rc = -EPROTO);
826 ll_intent_release(&it);
830 /* Cancel open lock */
831 if (it.d.lustre.it_lock_mode != 0) {
832 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
833 it.d.lustre.it_lock_mode);
834 it.d.lustre.it_lock_mode = 0;
835 och->och_lease_handle.cookie = 0ULL;
837 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
839 CERROR("%s: error closing file "DFID": %d\n",
840 ll_get_fsname(inode->i_sb, NULL, 0),
841 PFID(&ll_i2info(inode)->lli_fid), rc2);
842 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
844 ll_intent_release(&it);
852 * Release lease and close the file.
853 * It will check if the lease has ever broken.
855 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
858 struct ldlm_lock *lock;
859 bool cancelled = true;
863 lock = ldlm_handle2lock(&och->och_lease_handle);
865 lock_res_and_lock(lock);
866 cancelled = ldlm_is_cancel(lock);
867 unlock_res_and_lock(lock);
871 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
872 PFID(&ll_i2info(inode)->lli_fid), cancelled);
875 ldlm_cli_cancel(&och->och_lease_handle, 0);
876 if (lease_broken != NULL)
877 *lease_broken = cancelled;
879 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
884 /* Fills the obdo with the attributes for the lsm */
885 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
886 struct obd_capa *capa, struct obdo *obdo,
889 struct ptlrpc_request_set *set;
890 struct obd_info oinfo = { { { 0 } } };
895 LASSERT(lsm != NULL);
899 oinfo.oi_oa->o_oi = lsm->lsm_oi;
900 oinfo.oi_oa->o_mode = S_IFREG;
901 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
902 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
903 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
904 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
905 OBD_MD_FLGROUP | OBD_MD_FLDATAVERSION;
906 oinfo.oi_capa = capa;
907 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
908 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
909 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
910 if (dv_flags & LL_DV_WR_FLUSH)
911 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
914 set = ptlrpc_prep_set();
916 CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM);
919 rc = obd_getattr_async(exp, &oinfo, set);
921 rc = ptlrpc_set_wait(set);
922 ptlrpc_set_destroy(set);
925 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
926 OBD_MD_FLATIME | OBD_MD_FLMTIME |
927 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
928 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
929 if (dv_flags & LL_DV_WR_FLUSH &&
930 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
931 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
937 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
939 struct ll_inode_info *lli = ll_i2info(inode);
940 struct cl_object *obj = lli->lli_clob;
941 struct cl_attr *attr = ccc_env_thread_attr(env);
949 ll_inode_size_lock(inode);
951 /* merge timestamps the most recently obtained from mds with
952 timestamps obtained from osts */
953 LTIME_S(inode->i_atime) = lli->lli_atime;
954 LTIME_S(inode->i_mtime) = lli->lli_mtime;
955 LTIME_S(inode->i_ctime) = lli->lli_ctime;
957 atime = LTIME_S(inode->i_atime);
958 mtime = LTIME_S(inode->i_mtime);
959 ctime = LTIME_S(inode->i_ctime);
961 cl_object_attr_lock(obj);
962 rc = cl_object_attr_get(env, obj, attr);
963 cl_object_attr_unlock(obj);
966 GOTO(out_size_unlock, rc);
968 if (atime < attr->cat_atime)
969 atime = attr->cat_atime;
971 if (ctime < attr->cat_ctime)
972 ctime = attr->cat_ctime;
974 if (mtime < attr->cat_mtime)
975 mtime = attr->cat_mtime;
977 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
978 PFID(&lli->lli_fid), attr->cat_size);
980 i_size_write(inode, attr->cat_size);
981 inode->i_blocks = attr->cat_blocks;
983 LTIME_S(inode->i_atime) = atime;
984 LTIME_S(inode->i_mtime) = mtime;
985 LTIME_S(inode->i_ctime) = ctime;
988 ll_inode_size_unlock(inode);
993 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
996 struct obdo obdo = { 0 };
999 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0);
1001 st->st_size = obdo.o_size;
1002 st->st_blocks = obdo.o_blocks;
1003 st->st_mtime = obdo.o_mtime;
1004 st->st_atime = obdo.o_atime;
1005 st->st_ctime = obdo.o_ctime;
1010 static bool file_is_noatime(const struct file *file)
1012 const struct vfsmount *mnt = file->f_path.mnt;
1013 const struct inode *inode = file->f_path.dentry->d_inode;
1015 /* Adapted from file_accessed() and touch_atime().*/
1016 if (file->f_flags & O_NOATIME)
1019 if (inode->i_flags & S_NOATIME)
1022 if (IS_NOATIME(inode))
1025 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1028 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1031 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1037 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1039 struct inode *inode = file->f_dentry->d_inode;
1041 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1043 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1044 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1045 file->f_flags & O_DIRECT ||
1048 io->ci_obj = ll_i2info(inode)->lli_clob;
1049 io->ci_lockreq = CILR_MAYBE;
1050 if (ll_file_nolock(file)) {
1051 io->ci_lockreq = CILR_NEVER;
1052 io->ci_no_srvlock = 1;
1053 } else if (file->f_flags & O_APPEND) {
1054 io->ci_lockreq = CILR_MANDATORY;
1057 io->ci_noatime = file_is_noatime(file);
1061 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1062 struct file *file, enum cl_io_type iot,
1063 loff_t *ppos, size_t count)
1065 struct inode *inode = file->f_dentry->d_inode;
1066 struct ll_inode_info *lli = ll_i2info(inode);
1068 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1071 struct range_lock range;
1074 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1075 file->f_dentry->d_name.name, iot, *ppos, count);
1078 io = ccc_env_thread_io(env);
1079 ll_io_init(io, file, iot == CIT_WRITE);
1081 /* The maximum Lustre file size is variable, based on the
1082 * OST maximum object size and number of stripes. This
1083 * needs another check in addition to the VFS checks earlier. */
1084 end = (io->u.ci_wr.wr_append ? i_size_read(inode) : *ppos) + count;
1085 if (end > ll_file_maxbytes(inode)) {
1087 CDEBUG(D_INODE, "%s: file "DFID" offset %llu > maxbytes "LPU64
1088 ": rc = %zd\n", ll_get_fsname(inode->i_sb, NULL, 0),
1089 PFID(&lli->lli_fid), end, ll_file_maxbytes(inode),
1094 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1095 struct vvp_io *vio = vvp_env_io(env);
1096 bool range_locked = false;
1098 if (file->f_flags & O_APPEND)
1099 range_lock_init(&range, 0, LUSTRE_EOF);
1101 range_lock_init(&range, *ppos, *ppos + count - 1);
1103 vio->vui_fd = LUSTRE_FPRIVATE(file);
1104 vio->vui_io_subtype = args->via_io_subtype;
1106 switch (vio->vui_io_subtype) {
1108 vio->vui_iov = args->u.normal.via_iov;
1109 vio->vui_nrsegs = args->u.normal.via_nrsegs;
1110 vio->vui_tot_nrsegs = vio->vui_nrsegs;
1111 vio->vui_iocb = args->u.normal.via_iocb;
1112 if ((iot == CIT_WRITE) &&
1113 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1114 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1116 result = range_lock(&lli->lli_write_tree,
1121 range_locked = true;
1123 down_read(&lli->lli_trunc_sem);
1126 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1127 vio->u.splice.vui_flags = args->u.splice.via_flags;
1130 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1134 ll_cl_add(file, env, io);
1135 result = cl_io_loop(env, io);
1136 ll_cl_remove(file, env);
1138 if (args->via_io_subtype == IO_NORMAL)
1139 up_read(&lli->lli_trunc_sem);
1141 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1143 range_unlock(&lli->lli_write_tree, &range);
1146 /* cl_io_rw_init() handled IO */
1147 result = io->ci_result;
1150 if (io->ci_nob > 0) {
1151 result = io->ci_nob;
1152 *ppos = io->u.ci_wr.wr.crw_pos;
1156 cl_io_fini(env, io);
1157 /* If any bit been read/written (result != 0), we just return
1158 * short read/write instead of restart io. */
1159 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1160 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zu\n",
1161 iot == CIT_READ ? "read" : "write",
1162 file->f_dentry->d_name.name, *ppos, count);
1163 LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
1167 if (iot == CIT_READ) {
1169 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1170 LPROC_LL_READ_BYTES, result);
1171 } else if (iot == CIT_WRITE) {
1173 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1174 LPROC_LL_WRITE_BYTES, result);
1175 fd->fd_write_failed = false;
1176 } else if (result != -ERESTARTSYS) {
1177 fd->fd_write_failed = true;
1180 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1187 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1189 static int ll_file_get_iov_count(const struct iovec *iov,
1190 unsigned long *nr_segs, size_t *count)
1195 for (seg = 0; seg < *nr_segs; seg++) {
1196 const struct iovec *iv = &iov[seg];
1199 * If any segment has a negative length, or the cumulative
1200 * length ever wraps negative then return -EINVAL.
1203 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1205 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1210 cnt -= iv->iov_len; /* This segment is no good */
1217 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1218 unsigned long nr_segs, loff_t pos)
1221 struct vvp_io_args *args;
1227 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1231 env = cl_env_get(&refcheck);
1233 RETURN(PTR_ERR(env));
1235 args = vvp_env_args(env, IO_NORMAL);
1236 args->u.normal.via_iov = (struct iovec *)iov;
1237 args->u.normal.via_nrsegs = nr_segs;
1238 args->u.normal.via_iocb = iocb;
1240 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1241 &iocb->ki_pos, count);
1242 cl_env_put(env, &refcheck);
1246 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1250 struct iovec *local_iov;
1251 struct kiocb *kiocb;
1256 env = cl_env_get(&refcheck);
1258 RETURN(PTR_ERR(env));
1260 local_iov = &vvp_env_info(env)->vti_local_iov;
1261 kiocb = &vvp_env_info(env)->vti_kiocb;
1262 local_iov->iov_base = (void __user *)buf;
1263 local_iov->iov_len = count;
1264 init_sync_kiocb(kiocb, file);
1265 kiocb->ki_pos = *ppos;
1266 #ifdef HAVE_KIOCB_KI_LEFT
1267 kiocb->ki_left = count;
1269 kiocb->ki_nbytes = count;
1272 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1273 *ppos = kiocb->ki_pos;
1275 cl_env_put(env, &refcheck);
1280 * Write to a file (through the page cache).
1283 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1284 unsigned long nr_segs, loff_t pos)
1287 struct vvp_io_args *args;
1293 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1297 env = cl_env_get(&refcheck);
1299 RETURN(PTR_ERR(env));
1301 args = vvp_env_args(env, IO_NORMAL);
1302 args->u.normal.via_iov = (struct iovec *)iov;
1303 args->u.normal.via_nrsegs = nr_segs;
1304 args->u.normal.via_iocb = iocb;
1306 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1307 &iocb->ki_pos, count);
1308 cl_env_put(env, &refcheck);
1312 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1313 size_t count, loff_t *ppos)
1316 struct iovec *local_iov;
1317 struct kiocb *kiocb;
1322 env = cl_env_get(&refcheck);
1324 RETURN(PTR_ERR(env));
1326 local_iov = &vvp_env_info(env)->vti_local_iov;
1327 kiocb = &vvp_env_info(env)->vti_kiocb;
1328 local_iov->iov_base = (void __user *)buf;
1329 local_iov->iov_len = count;
1330 init_sync_kiocb(kiocb, file);
1331 kiocb->ki_pos = *ppos;
1332 #ifdef HAVE_KIOCB_KI_LEFT
1333 kiocb->ki_left = count;
1335 kiocb->ki_nbytes = count;
1338 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1339 *ppos = kiocb->ki_pos;
1341 cl_env_put(env, &refcheck);
1346 * Send file content (through pagecache) somewhere with helper
1348 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1349 struct pipe_inode_info *pipe, size_t count,
1353 struct vvp_io_args *args;
1358 env = cl_env_get(&refcheck);
1360 RETURN(PTR_ERR(env));
1362 args = vvp_env_args(env, IO_SPLICE);
1363 args->u.splice.via_pipe = pipe;
1364 args->u.splice.via_flags = flags;
1366 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1367 cl_env_put(env, &refcheck);
1371 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1372 __u64 flags, struct lov_user_md *lum,
1375 struct lov_stripe_md *lsm = NULL;
1376 struct lookup_intent oit = {
1378 .it_flags = flags | MDS_OPEN_BY_FID,
1383 lsm = ccc_inode_lsm_get(inode);
1385 ccc_inode_lsm_put(inode, lsm);
1386 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1387 PFID(ll_inode2fid(inode)));
1388 GOTO(out, rc = -EEXIST);
1391 ll_inode_size_lock(inode);
1392 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1394 GOTO(out_unlock, rc);
1396 rc = oit.d.lustre.it_status;
1398 GOTO(out_unlock, rc);
1400 ll_release_openhandle(file->f_dentry, &oit);
1403 ll_inode_size_unlock(inode);
1404 ll_intent_release(&oit);
1405 ccc_inode_lsm_put(inode, lsm);
1407 cl_lov_delay_create_clear(&file->f_flags);
1412 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1413 struct lov_mds_md **lmmp, int *lmm_size,
1414 struct ptlrpc_request **request)
1416 struct ll_sb_info *sbi = ll_i2sbi(inode);
1417 struct mdt_body *body;
1418 struct lov_mds_md *lmm = NULL;
1419 struct ptlrpc_request *req = NULL;
1420 struct md_op_data *op_data;
1423 rc = ll_get_default_mdsize(sbi, &lmmsize);
1427 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1428 strlen(filename), lmmsize,
1429 LUSTRE_OPC_ANY, NULL);
1430 if (IS_ERR(op_data))
1431 RETURN(PTR_ERR(op_data));
1433 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1434 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1435 ll_finish_md_op_data(op_data);
1437 CDEBUG(D_INFO, "md_getattr_name failed "
1438 "on %s: rc %d\n", filename, rc);
1442 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1443 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1445 lmmsize = body->mbo_eadatasize;
1447 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1449 GOTO(out, rc = -ENODATA);
1452 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1453 LASSERT(lmm != NULL);
1455 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1456 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1457 GOTO(out, rc = -EPROTO);
1461 * This is coming from the MDS, so is probably in
1462 * little endian. We convert it to host endian before
1463 * passing it to userspace.
1465 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1468 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1469 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1472 /* if function called for directory - we should
1473 * avoid swab not existent lsm objects */
1474 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1475 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1476 if (S_ISREG(body->mbo_mode))
1477 lustre_swab_lov_user_md_objects(
1478 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1480 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1481 lustre_swab_lov_user_md_v3(
1482 (struct lov_user_md_v3 *)lmm);
1483 if (S_ISREG(body->mbo_mode))
1484 lustre_swab_lov_user_md_objects(
1485 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1492 *lmm_size = lmmsize;
1497 static int ll_lov_setea(struct inode *inode, struct file *file,
1500 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1501 struct lov_user_md *lump;
1502 int lum_size = sizeof(struct lov_user_md) +
1503 sizeof(struct lov_user_ost_data);
1507 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1510 OBD_ALLOC_LARGE(lump, lum_size);
1514 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1515 OBD_FREE_LARGE(lump, lum_size);
1519 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1521 OBD_FREE_LARGE(lump, lum_size);
1525 static int ll_file_getstripe(struct inode *inode,
1526 struct lov_user_md __user *lum)
1533 env = cl_env_get(&refcheck);
1535 RETURN(PTR_ERR(env));
1537 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1538 cl_env_put(env, &refcheck);
1542 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1545 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1546 struct lov_user_md *klum;
1548 __u64 flags = FMODE_WRITE;
1551 rc = ll_copy_user_md(lum, &klum);
1556 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1560 put_user(0, &lum->lmm_stripe_count);
1562 ll_layout_refresh(inode, &gen);
1563 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1566 OBD_FREE(klum, lum_size);
1571 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1573 struct ll_inode_info *lli = ll_i2info(inode);
1574 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1575 struct ccc_grouplock grouplock;
1580 CWARN("group id for group lock must not be 0\n");
1584 if (ll_file_nolock(file))
1585 RETURN(-EOPNOTSUPP);
1587 spin_lock(&lli->lli_lock);
1588 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1589 CWARN("group lock already existed with gid %lu\n",
1590 fd->fd_grouplock.cg_gid);
1591 spin_unlock(&lli->lli_lock);
1594 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1595 spin_unlock(&lli->lli_lock);
1597 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1598 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1602 spin_lock(&lli->lli_lock);
1603 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1604 spin_unlock(&lli->lli_lock);
1605 CERROR("another thread just won the race\n");
1606 cl_put_grouplock(&grouplock);
1610 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1611 fd->fd_grouplock = grouplock;
1612 spin_unlock(&lli->lli_lock);
1614 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1618 static int ll_put_grouplock(struct inode *inode, struct file *file,
1621 struct ll_inode_info *lli = ll_i2info(inode);
1622 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1623 struct ccc_grouplock grouplock;
1626 spin_lock(&lli->lli_lock);
1627 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1628 spin_unlock(&lli->lli_lock);
1629 CWARN("no group lock held\n");
1632 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1634 if (fd->fd_grouplock.cg_gid != arg) {
1635 CWARN("group lock %lu doesn't match current id %lu\n",
1636 arg, fd->fd_grouplock.cg_gid);
1637 spin_unlock(&lli->lli_lock);
1641 grouplock = fd->fd_grouplock;
1642 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1643 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1644 spin_unlock(&lli->lli_lock);
1646 cl_put_grouplock(&grouplock);
1647 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1652 * Close inode open handle
1654 * \param dentry [in] dentry which contains the inode
1655 * \param it [in,out] intent which contains open info and result
1658 * \retval <0 failure
1660 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1662 struct inode *inode = dentry->d_inode;
1663 struct obd_client_handle *och;
1669 /* Root ? Do nothing. */
1670 if (dentry->d_inode->i_sb->s_root == dentry)
1673 /* No open handle to close? Move away */
1674 if (!it_disposition(it, DISP_OPEN_OPEN))
1677 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1679 OBD_ALLOC(och, sizeof(*och));
1681 GOTO(out, rc = -ENOMEM);
1683 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1685 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1688 /* this one is in place of ll_file_open */
1689 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1690 ptlrpc_req_finished(it->d.lustre.it_data);
1691 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1697 * Get size for inode for which FIEMAP mapping is requested.
1698 * Make the FIEMAP get_info call and returns the result.
1699 * \param fiemap kernel buffer to hold extens
1700 * \param num_bytes kernel buffer size
1702 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1708 struct ll_fiemap_info_key fmkey = { .name = KEY_FIEMAP, };
1711 /* Checks for fiemap flags */
1712 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1713 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1717 /* Check for FIEMAP_FLAG_SYNC */
1718 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1719 rc = filemap_fdatawrite(inode->i_mapping);
1724 env = cl_env_get(&refcheck);
1726 RETURN(PTR_ERR(env));
1728 if (i_size_read(inode) == 0) {
1729 rc = ll_glimpse_size(inode);
1734 fmkey.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1735 obdo_from_inode(&fmkey.oa, inode, OBD_MD_FLSIZE);
1736 obdo_set_parent_fid(&fmkey.oa, &ll_i2info(inode)->lli_fid);
1738 /* If filesize is 0, then there would be no objects for mapping */
1739 if (fmkey.oa.o_size == 0) {
1740 fiemap->fm_mapped_extents = 0;
1744 fmkey.fiemap = *fiemap;
1746 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1747 &fmkey, fiemap, &num_bytes);
1749 cl_env_put(env, &refcheck);
1753 int ll_fid2path(struct inode *inode, void __user *arg)
1755 struct obd_export *exp = ll_i2mdexp(inode);
1756 const struct getinfo_fid2path __user *gfin = arg;
1758 struct getinfo_fid2path *gfout;
1764 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1765 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1768 /* Only need to get the buflen */
1769 if (get_user(pathlen, &gfin->gf_pathlen))
1772 if (pathlen > PATH_MAX)
1775 outsize = sizeof(*gfout) + pathlen;
1776 OBD_ALLOC(gfout, outsize);
1780 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1781 GOTO(gf_free, rc = -EFAULT);
1783 /* Call mdc_iocontrol */
1784 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1788 if (copy_to_user(arg, gfout, outsize))
1792 OBD_FREE(gfout, outsize);
1796 static int ll_ioctl_fiemap(struct inode *inode, struct fiemap __user *arg)
1798 struct fiemap *fiemap;
1804 /* Get the extent count so we can calculate the size of
1805 * required fiemap buffer */
1806 if (get_user(extent_count, &arg->fm_extent_count))
1810 (SIZE_MAX - sizeof(*fiemap)) / sizeof(struct ll_fiemap_extent))
1812 num_bytes = sizeof(*fiemap) + (extent_count *
1813 sizeof(struct ll_fiemap_extent));
1815 OBD_ALLOC_LARGE(fiemap, num_bytes);
1819 /* get the fiemap value */
1820 if (copy_from_user(fiemap, arg, sizeof(*fiemap)))
1821 GOTO(error, rc = -EFAULT);
1823 /* If fm_extent_count is non-zero, read the first extent since
1824 * it is used to calculate end_offset and device from previous
1826 if (extent_count != 0) {
1827 if (copy_from_user(&fiemap->fm_extents[0],
1828 (char __user *)arg + sizeof(*fiemap),
1829 sizeof(struct ll_fiemap_extent)))
1830 GOTO(error, rc = -EFAULT);
1833 rc = ll_do_fiemap(inode, fiemap, num_bytes);
1837 ret_bytes = sizeof(struct fiemap);
1839 if (extent_count != 0)
1840 ret_bytes += (fiemap->fm_mapped_extents *
1841 sizeof(struct ll_fiemap_extent));
1843 if (copy_to_user((void __user *)arg, fiemap, ret_bytes))
1847 OBD_FREE_LARGE(fiemap, num_bytes);
1852 * Read the data_version for inode.
1854 * This value is computed using stripe object version on OST.
1855 * Version is computed using server side locking.
1857 * @param flags if do sync on the OST side;
1859 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1860 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1862 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1869 /* If no file object initialized, we consider its version is 0. */
1870 if (ll_i2info(inode)->lli_clob == NULL) {
1875 env = cl_env_get(&refcheck);
1877 RETURN(PTR_ERR(env));
1879 rc = cl_object_data_version(env, ll_i2info(inode)->lli_clob,
1880 data_version, flags);
1881 cl_env_put(env, &refcheck);
1886 * Trigger a HSM release request for the provided inode.
1888 int ll_hsm_release(struct inode *inode)
1890 struct cl_env_nest nest;
1892 struct obd_client_handle *och = NULL;
1893 __u64 data_version = 0;
1897 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1898 ll_get_fsname(inode->i_sb, NULL, 0),
1899 PFID(&ll_i2info(inode)->lli_fid));
1901 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1903 GOTO(out, rc = PTR_ERR(och));
1905 /* Grab latest data_version and [am]time values */
1906 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1910 env = cl_env_nested_get(&nest);
1912 GOTO(out, rc = PTR_ERR(env));
1914 ll_merge_attr(env, inode);
1915 cl_env_nested_put(&nest, env);
1917 /* Release the file.
1918 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1919 * we still need it to pack l_remote_handle to MDT. */
1920 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1926 if (och != NULL && !IS_ERR(och)) /* close the file */
1927 ll_lease_close(och, inode, NULL);
1932 struct ll_swap_stack {
1933 struct iattr ia1, ia2;
1935 struct inode *inode1, *inode2;
1936 bool check_dv1, check_dv2;
1939 static int ll_swap_layouts(struct file *file1, struct file *file2,
1940 struct lustre_swap_layouts *lsl)
1942 struct mdc_swap_layouts msl;
1943 struct md_op_data *op_data;
1946 struct ll_swap_stack *llss = NULL;
1949 OBD_ALLOC_PTR(llss);
1953 llss->inode1 = file1->f_dentry->d_inode;
1954 llss->inode2 = file2->f_dentry->d_inode;
1956 if (!S_ISREG(llss->inode2->i_mode))
1957 GOTO(free, rc = -EINVAL);
1959 if (inode_permission(llss->inode1, MAY_WRITE) ||
1960 inode_permission(llss->inode2, MAY_WRITE))
1961 GOTO(free, rc = -EPERM);
1963 if (llss->inode2->i_sb != llss->inode1->i_sb)
1964 GOTO(free, rc = -EXDEV);
1966 /* we use 2 bool because it is easier to swap than 2 bits */
1967 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1968 llss->check_dv1 = true;
1970 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1971 llss->check_dv2 = true;
1973 /* we cannot use lsl->sl_dvX directly because we may swap them */
1974 llss->dv1 = lsl->sl_dv1;
1975 llss->dv2 = lsl->sl_dv2;
1977 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1978 if (rc == 0) /* same file, done! */
1981 if (rc < 0) { /* sequentialize it */
1982 swap(llss->inode1, llss->inode2);
1984 swap(llss->dv1, llss->dv2);
1985 swap(llss->check_dv1, llss->check_dv2);
1989 if (gid != 0) { /* application asks to flush dirty cache */
1990 rc = ll_get_grouplock(llss->inode1, file1, gid);
1994 rc = ll_get_grouplock(llss->inode2, file2, gid);
1996 ll_put_grouplock(llss->inode1, file1, gid);
2001 /* to be able to restore mtime and atime after swap
2002 * we need to first save them */
2004 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2005 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2006 llss->ia1.ia_atime = llss->inode1->i_atime;
2007 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2008 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2009 llss->ia2.ia_atime = llss->inode2->i_atime;
2010 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2013 /* ultimate check, before swaping the layouts we check if
2014 * dataversion has changed (if requested) */
2015 if (llss->check_dv1) {
2016 rc = ll_data_version(llss->inode1, &dv, 0);
2019 if (dv != llss->dv1)
2020 GOTO(putgl, rc = -EAGAIN);
2023 if (llss->check_dv2) {
2024 rc = ll_data_version(llss->inode2, &dv, 0);
2027 if (dv != llss->dv2)
2028 GOTO(putgl, rc = -EAGAIN);
2031 /* struct md_op_data is used to send the swap args to the mdt
2032 * only flags is missing, so we use struct mdc_swap_layouts
2033 * through the md_op_data->op_data */
2034 /* flags from user space have to be converted before they are send to
2035 * server, no flag is sent today, they are only used on the client */
2038 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2039 0, LUSTRE_OPC_ANY, &msl);
2040 if (IS_ERR(op_data))
2041 GOTO(free, rc = PTR_ERR(op_data));
2043 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2044 sizeof(*op_data), op_data, NULL);
2045 ll_finish_md_op_data(op_data);
2049 ll_put_grouplock(llss->inode2, file2, gid);
2050 ll_put_grouplock(llss->inode1, file1, gid);
2053 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2057 /* clear useless flags */
2058 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2059 llss->ia1.ia_valid &= ~ATTR_MTIME;
2060 llss->ia2.ia_valid &= ~ATTR_MTIME;
2063 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2064 llss->ia1.ia_valid &= ~ATTR_ATIME;
2065 llss->ia2.ia_valid &= ~ATTR_ATIME;
2068 /* update time if requested */
2070 if (llss->ia2.ia_valid != 0) {
2071 mutex_lock(&llss->inode1->i_mutex);
2072 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2073 mutex_unlock(&llss->inode1->i_mutex);
2076 if (llss->ia1.ia_valid != 0) {
2079 mutex_lock(&llss->inode2->i_mutex);
2080 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2081 mutex_unlock(&llss->inode2->i_mutex);
2093 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2095 struct md_op_data *op_data;
2099 /* Detect out-of range masks */
2100 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2103 /* Non-root users are forbidden to set or clear flags which are
2104 * NOT defined in HSM_USER_MASK. */
2105 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2106 !cfs_capable(CFS_CAP_SYS_ADMIN))
2109 /* Detect out-of range archive id */
2110 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2111 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2114 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2115 LUSTRE_OPC_ANY, hss);
2116 if (IS_ERR(op_data))
2117 RETURN(PTR_ERR(op_data));
2119 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2120 sizeof(*op_data), op_data, NULL);
2122 ll_finish_md_op_data(op_data);
2127 static int ll_hsm_import(struct inode *inode, struct file *file,
2128 struct hsm_user_import *hui)
2130 struct hsm_state_set *hss = NULL;
2131 struct iattr *attr = NULL;
2135 if (!S_ISREG(inode->i_mode))
2141 GOTO(out, rc = -ENOMEM);
2143 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2144 hss->hss_archive_id = hui->hui_archive_id;
2145 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2146 rc = ll_hsm_state_set(inode, hss);
2150 OBD_ALLOC_PTR(attr);
2152 GOTO(out, rc = -ENOMEM);
2154 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2155 attr->ia_mode |= S_IFREG;
2156 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2157 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2158 attr->ia_size = hui->hui_size;
2159 attr->ia_mtime.tv_sec = hui->hui_mtime;
2160 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2161 attr->ia_atime.tv_sec = hui->hui_atime;
2162 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2164 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2165 ATTR_UID | ATTR_GID |
2166 ATTR_MTIME | ATTR_MTIME_SET |
2167 ATTR_ATIME | ATTR_ATIME_SET;
2169 mutex_lock(&inode->i_mutex);
2171 rc = ll_setattr_raw(file->f_dentry, attr, true);
2175 mutex_unlock(&inode->i_mutex);
2187 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2189 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2190 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2194 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2196 struct inode *inode = file->f_dentry->d_inode;
2197 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2201 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2202 PFID(ll_inode2fid(inode)), inode, cmd);
2203 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2205 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2206 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2210 case LL_IOC_GETFLAGS:
2211 /* Get the current value of the file flags */
2212 return put_user(fd->fd_flags, (int __user *)arg);
2213 case LL_IOC_SETFLAGS:
2214 case LL_IOC_CLRFLAGS:
2215 /* Set or clear specific file flags */
2216 /* XXX This probably needs checks to ensure the flags are
2217 * not abused, and to handle any flag side effects.
2219 if (get_user(flags, (int __user *) arg))
2222 if (cmd == LL_IOC_SETFLAGS) {
2223 if ((flags & LL_FILE_IGNORE_LOCK) &&
2224 !(file->f_flags & O_DIRECT)) {
2225 CERROR("%s: unable to disable locking on "
2226 "non-O_DIRECT file\n", current->comm);
2230 fd->fd_flags |= flags;
2232 fd->fd_flags &= ~flags;
2235 case LL_IOC_LOV_SETSTRIPE:
2236 RETURN(ll_lov_setstripe(inode, file, arg));
2237 case LL_IOC_LOV_SETEA:
2238 RETURN(ll_lov_setea(inode, file, arg));
2239 case LL_IOC_LOV_SWAP_LAYOUTS: {
2241 struct lustre_swap_layouts lsl;
2243 if (copy_from_user(&lsl, (char __user *)arg,
2244 sizeof(struct lustre_swap_layouts)))
2247 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2250 file2 = fget(lsl.sl_fd);
2255 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2256 rc = ll_swap_layouts(file, file2, &lsl);
2260 case LL_IOC_LOV_GETSTRIPE:
2261 RETURN(ll_file_getstripe(inode,
2262 (struct lov_user_md __user *)arg));
2263 case FSFILT_IOC_FIEMAP:
2264 RETURN(ll_ioctl_fiemap(inode, (struct fiemap __user *)arg));
2265 case FSFILT_IOC_GETFLAGS:
2266 case FSFILT_IOC_SETFLAGS:
2267 RETURN(ll_iocontrol(inode, file, cmd, arg));
2268 case FSFILT_IOC_GETVERSION_OLD:
2269 case FSFILT_IOC_GETVERSION:
2270 RETURN(put_user(inode->i_generation, (int __user *)arg));
2271 case LL_IOC_GROUP_LOCK:
2272 RETURN(ll_get_grouplock(inode, file, arg));
2273 case LL_IOC_GROUP_UNLOCK:
2274 RETURN(ll_put_grouplock(inode, file, arg));
2275 case IOC_OBD_STATFS:
2276 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2278 /* We need to special case any other ioctls we want to handle,
2279 * to send them to the MDS/OST as appropriate and to properly
2280 * network encode the arg field.
2281 case FSFILT_IOC_SETVERSION_OLD:
2282 case FSFILT_IOC_SETVERSION:
2284 case LL_IOC_FLUSHCTX:
2285 RETURN(ll_flush_ctx(inode));
2286 case LL_IOC_PATH2FID: {
2287 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2288 sizeof(struct lu_fid)))
2293 case LL_IOC_GETPARENT:
2294 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2296 case OBD_IOC_FID2PATH:
2297 RETURN(ll_fid2path(inode, (void __user *)arg));
2298 case LL_IOC_DATA_VERSION: {
2299 struct ioc_data_version idv;
2302 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2305 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2306 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2309 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2315 case LL_IOC_GET_MDTIDX: {
2318 mdtidx = ll_get_mdt_idx(inode);
2322 if (put_user((int)mdtidx, (int __user *)arg))
2327 case OBD_IOC_GETDTNAME:
2328 case OBD_IOC_GETMDNAME:
2329 RETURN(ll_get_obd_name(inode, cmd, arg));
2330 case LL_IOC_HSM_STATE_GET: {
2331 struct md_op_data *op_data;
2332 struct hsm_user_state *hus;
2339 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2340 LUSTRE_OPC_ANY, hus);
2341 if (IS_ERR(op_data)) {
2343 RETURN(PTR_ERR(op_data));
2346 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2349 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2352 ll_finish_md_op_data(op_data);
2356 case LL_IOC_HSM_STATE_SET: {
2357 struct hsm_state_set *hss;
2364 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2369 rc = ll_hsm_state_set(inode, hss);
2374 case LL_IOC_HSM_ACTION: {
2375 struct md_op_data *op_data;
2376 struct hsm_current_action *hca;
2383 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2384 LUSTRE_OPC_ANY, hca);
2385 if (IS_ERR(op_data)) {
2387 RETURN(PTR_ERR(op_data));
2390 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2393 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2396 ll_finish_md_op_data(op_data);
2400 case LL_IOC_SET_LEASE: {
2401 struct ll_inode_info *lli = ll_i2info(inode);
2402 struct obd_client_handle *och = NULL;
2407 case LL_LEASE_WRLCK:
2408 if (!(file->f_mode & FMODE_WRITE))
2410 fmode = FMODE_WRITE;
2412 case LL_LEASE_RDLCK:
2413 if (!(file->f_mode & FMODE_READ))
2417 case LL_LEASE_UNLCK:
2418 mutex_lock(&lli->lli_och_mutex);
2419 if (fd->fd_lease_och != NULL) {
2420 och = fd->fd_lease_och;
2421 fd->fd_lease_och = NULL;
2423 mutex_unlock(&lli->lli_och_mutex);
2428 fmode = och->och_flags;
2429 rc = ll_lease_close(och, inode, &lease_broken);
2436 RETURN(ll_lease_type_from_fmode(fmode));
2441 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2443 /* apply for lease */
2444 och = ll_lease_open(inode, file, fmode, 0);
2446 RETURN(PTR_ERR(och));
2449 mutex_lock(&lli->lli_och_mutex);
2450 if (fd->fd_lease_och == NULL) {
2451 fd->fd_lease_och = och;
2454 mutex_unlock(&lli->lli_och_mutex);
2456 /* impossible now that only excl is supported for now */
2457 ll_lease_close(och, inode, &lease_broken);
2462 case LL_IOC_GET_LEASE: {
2463 struct ll_inode_info *lli = ll_i2info(inode);
2464 struct ldlm_lock *lock = NULL;
2467 mutex_lock(&lli->lli_och_mutex);
2468 if (fd->fd_lease_och != NULL) {
2469 struct obd_client_handle *och = fd->fd_lease_och;
2471 lock = ldlm_handle2lock(&och->och_lease_handle);
2473 lock_res_and_lock(lock);
2474 if (!ldlm_is_cancel(lock))
2475 fmode = och->och_flags;
2477 unlock_res_and_lock(lock);
2478 LDLM_LOCK_PUT(lock);
2481 mutex_unlock(&lli->lli_och_mutex);
2483 RETURN(ll_lease_type_from_fmode(fmode));
2485 case LL_IOC_HSM_IMPORT: {
2486 struct hsm_user_import *hui;
2492 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2497 rc = ll_hsm_import(inode, file, hui);
2507 ll_iocontrol_call(inode, file, cmd, arg, &err))
2510 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2511 (void __user *)arg));
2516 #ifndef HAVE_FILE_LLSEEK_SIZE
2517 static inline loff_t
2518 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2520 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2522 if (offset > maxsize)
2525 if (offset != file->f_pos) {
2526 file->f_pos = offset;
2527 file->f_version = 0;
2533 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2534 loff_t maxsize, loff_t eof)
2536 struct inode *inode = file->f_dentry->d_inode;
2544 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2545 * position-querying operation. Avoid rewriting the "same"
2546 * f_pos value back to the file because a concurrent read(),
2547 * write() or lseek() might have altered it
2552 * f_lock protects against read/modify/write race with other
2553 * SEEK_CURs. Note that parallel writes and reads behave
2556 mutex_lock(&inode->i_mutex);
2557 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2558 mutex_unlock(&inode->i_mutex);
2562 * In the generic case the entire file is data, so as long as
2563 * offset isn't at the end of the file then the offset is data.
2570 * There is a virtual hole at the end of the file, so as long as
2571 * offset isn't i_size or larger, return i_size.
2579 return llseek_execute(file, offset, maxsize);
2583 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2585 struct inode *inode = file->f_dentry->d_inode;
2586 loff_t retval, eof = 0;
2589 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2590 (origin == SEEK_CUR) ? file->f_pos : 0);
2591 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2592 PFID(ll_inode2fid(inode)), inode, retval, retval,
2594 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2596 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2597 retval = ll_glimpse_size(inode);
2600 eof = i_size_read(inode);
2603 retval = ll_generic_file_llseek_size(file, offset, origin,
2604 ll_file_maxbytes(inode), eof);
2608 static int ll_flush(struct file *file, fl_owner_t id)
2610 struct inode *inode = file->f_dentry->d_inode;
2611 struct ll_inode_info *lli = ll_i2info(inode);
2612 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2615 LASSERT(!S_ISDIR(inode->i_mode));
2617 /* catch async errors that were recorded back when async writeback
2618 * failed for pages in this mapping. */
2619 rc = lli->lli_async_rc;
2620 lli->lli_async_rc = 0;
2621 if (lli->lli_clob != NULL) {
2622 err = lov_read_and_clear_async_rc(lli->lli_clob);
2627 /* The application has been told write failure already.
2628 * Do not report failure again. */
2629 if (fd->fd_write_failed)
2631 return rc ? -EIO : 0;
2635 * Called to make sure a portion of file has been written out.
2636 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2638 * Return how many pages have been written.
2640 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2641 enum cl_fsync_mode mode, int ignore_layout)
2643 struct cl_env_nest nest;
2646 struct obd_capa *capa = NULL;
2647 struct cl_fsync_io *fio;
2651 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2652 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2655 env = cl_env_nested_get(&nest);
2657 RETURN(PTR_ERR(env));
2659 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2661 io = ccc_env_thread_io(env);
2662 io->ci_obj = ll_i2info(inode)->lli_clob;
2663 io->ci_ignore_layout = ignore_layout;
2665 /* initialize parameters for sync */
2666 fio = &io->u.ci_fsync;
2667 fio->fi_capa = capa;
2668 fio->fi_start = start;
2670 fio->fi_fid = ll_inode2fid(inode);
2671 fio->fi_mode = mode;
2672 fio->fi_nr_written = 0;
2674 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2675 result = cl_io_loop(env, io);
2677 result = io->ci_result;
2679 result = fio->fi_nr_written;
2680 cl_io_fini(env, io);
2681 cl_env_nested_put(&nest, env);
2689 * When dentry is provided (the 'else' case), *file->f_dentry may be
2690 * null and dentry must be used directly rather than pulled from
2691 * *file->f_dentry as is done otherwise.
2694 #ifdef HAVE_FILE_FSYNC_4ARGS
2695 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2697 struct dentry *dentry = file->f_dentry;
2698 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2699 int ll_fsync(struct file *file, int datasync)
2701 struct dentry *dentry = file->f_dentry;
2703 loff_t end = LLONG_MAX;
2705 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2708 loff_t end = LLONG_MAX;
2710 struct inode *inode = dentry->d_inode;
2711 struct ll_inode_info *lli = ll_i2info(inode);
2712 struct ptlrpc_request *req;
2713 struct obd_capa *oc;
2717 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2718 PFID(ll_inode2fid(inode)), inode);
2719 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2721 #ifdef HAVE_FILE_FSYNC_4ARGS
2722 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2723 mutex_lock(&inode->i_mutex);
2725 /* fsync's caller has already called _fdata{sync,write}, we want
2726 * that IO to finish before calling the osc and mdc sync methods */
2727 rc = filemap_fdatawait(inode->i_mapping);
2730 /* catch async errors that were recorded back when async writeback
2731 * failed for pages in this mapping. */
2732 if (!S_ISDIR(inode->i_mode)) {
2733 err = lli->lli_async_rc;
2734 lli->lli_async_rc = 0;
2737 err = lov_read_and_clear_async_rc(lli->lli_clob);
2742 oc = ll_mdscapa_get(inode);
2743 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2749 ptlrpc_req_finished(req);
2751 if (S_ISREG(inode->i_mode)) {
2752 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2754 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2755 if (rc == 0 && err < 0)
2758 fd->fd_write_failed = true;
2760 fd->fd_write_failed = false;
2763 #ifdef HAVE_FILE_FSYNC_4ARGS
2764 mutex_unlock(&inode->i_mutex);
2770 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2772 struct inode *inode = file->f_dentry->d_inode;
2773 struct ll_sb_info *sbi = ll_i2sbi(inode);
2774 struct ldlm_enqueue_info einfo = {
2775 .ei_type = LDLM_FLOCK,
2776 .ei_cb_cp = ldlm_flock_completion_ast,
2777 .ei_cbdata = file_lock,
2779 struct md_op_data *op_data;
2780 struct lustre_handle lockh = {0};
2781 ldlm_policy_data_t flock = {{0}};
2782 int fl_type = file_lock->fl_type;
2788 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2789 PFID(ll_inode2fid(inode)), file_lock);
2791 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2793 if (file_lock->fl_flags & FL_FLOCK) {
2794 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2795 /* flocks are whole-file locks */
2796 flock.l_flock.end = OFFSET_MAX;
2797 /* For flocks owner is determined by the local file desctiptor*/
2798 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2799 } else if (file_lock->fl_flags & FL_POSIX) {
2800 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2801 flock.l_flock.start = file_lock->fl_start;
2802 flock.l_flock.end = file_lock->fl_end;
2806 flock.l_flock.pid = file_lock->fl_pid;
2808 /* Somewhat ugly workaround for svc lockd.
2809 * lockd installs custom fl_lmops->lm_compare_owner that checks
2810 * for the fl_owner to be the same (which it always is on local node
2811 * I guess between lockd processes) and then compares pid.
2812 * As such we assign pid to the owner field to make it all work,
2813 * conflict with normal locks is unlikely since pid space and
2814 * pointer space for current->files are not intersecting */
2815 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2816 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2820 einfo.ei_mode = LCK_PR;
2823 /* An unlock request may or may not have any relation to
2824 * existing locks so we may not be able to pass a lock handle
2825 * via a normal ldlm_lock_cancel() request. The request may even
2826 * unlock a byte range in the middle of an existing lock. In
2827 * order to process an unlock request we need all of the same
2828 * information that is given with a normal read or write record
2829 * lock request. To avoid creating another ldlm unlock (cancel)
2830 * message we'll treat a LCK_NL flock request as an unlock. */
2831 einfo.ei_mode = LCK_NL;
2834 einfo.ei_mode = LCK_PW;
2837 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2852 flags = LDLM_FL_BLOCK_NOWAIT;
2858 flags = LDLM_FL_TEST_LOCK;
2861 CERROR("unknown fcntl lock command: %d\n", cmd);
2865 /* Save the old mode so that if the mode in the lock changes we
2866 * can decrement the appropriate reader or writer refcount. */
2867 file_lock->fl_type = einfo.ei_mode;
2869 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2870 LUSTRE_OPC_ANY, NULL);
2871 if (IS_ERR(op_data))
2872 RETURN(PTR_ERR(op_data));
2874 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2875 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2876 flock.l_flock.pid, flags, einfo.ei_mode,
2877 flock.l_flock.start, flock.l_flock.end);
2879 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2882 /* Restore the file lock type if not TEST lock. */
2883 if (!(flags & LDLM_FL_TEST_LOCK))
2884 file_lock->fl_type = fl_type;
2886 if ((file_lock->fl_flags & FL_FLOCK) &&
2887 (rc == 0 || file_lock->fl_type == F_UNLCK))
2888 rc2 = flock_lock_file_wait(file, file_lock);
2889 if ((file_lock->fl_flags & FL_POSIX) &&
2890 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2891 !(flags & LDLM_FL_TEST_LOCK))
2892 rc2 = posix_lock_file_wait(file, file_lock);
2894 if (rc2 && file_lock->fl_type != F_UNLCK) {
2895 einfo.ei_mode = LCK_NL;
2896 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2901 ll_finish_md_op_data(op_data);
2906 int ll_get_fid_by_name(struct inode *parent, const char *name,
2907 int namelen, struct lu_fid *fid)
2909 struct md_op_data *op_data = NULL;
2910 struct mdt_body *body;
2911 struct ptlrpc_request *req;
2915 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2916 LUSTRE_OPC_ANY, NULL);
2917 if (IS_ERR(op_data))
2918 RETURN(PTR_ERR(op_data));
2920 op_data->op_valid = OBD_MD_FLID;
2921 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2922 ll_finish_md_op_data(op_data);
2926 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2928 GOTO(out_req, rc = -EFAULT);
2930 *fid = body->mbo_fid1;
2932 ptlrpc_req_finished(req);
2936 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2937 const char *name, int namelen)
2939 struct dentry *dchild = NULL;
2940 struct inode *child_inode = NULL;
2941 struct md_op_data *op_data;
2942 struct ptlrpc_request *request = NULL;
2947 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2948 name, PFID(ll_inode2fid(parent)), mdtidx);
2950 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2951 0, LUSTRE_OPC_ANY, NULL);
2952 if (IS_ERR(op_data))
2953 RETURN(PTR_ERR(op_data));
2955 /* Get child FID first */
2956 qstr.hash = full_name_hash(name, namelen);
2959 dchild = d_lookup(file->f_dentry, &qstr);
2960 if (dchild != NULL) {
2961 if (dchild->d_inode != NULL) {
2962 child_inode = igrab(dchild->d_inode);
2963 if (child_inode != NULL) {
2964 mutex_lock(&child_inode->i_mutex);
2965 op_data->op_fid3 = *ll_inode2fid(child_inode);
2966 ll_invalidate_aliases(child_inode);
2971 rc = ll_get_fid_by_name(parent, name, namelen,
2977 if (!fid_is_sane(&op_data->op_fid3)) {
2978 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
2979 ll_get_fsname(parent->i_sb, NULL, 0), name,
2980 PFID(&op_data->op_fid3));
2981 GOTO(out_free, rc = -EINVAL);
2984 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
2989 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
2990 PFID(&op_data->op_fid3), mdtidx);
2991 GOTO(out_free, rc = 0);
2994 op_data->op_mds = mdtidx;
2995 op_data->op_cli_flags = CLI_MIGRATE;
2996 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
2997 namelen, name, namelen, &request);
2999 ll_update_times(request, parent);
3001 ptlrpc_req_finished(request);
3006 if (child_inode != NULL) {
3007 clear_nlink(child_inode);
3008 mutex_unlock(&child_inode->i_mutex);
3012 ll_finish_md_op_data(op_data);
3017 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3025 * test if some locks matching bits and l_req_mode are acquired
3026 * - bits can be in different locks
3027 * - if found clear the common lock bits in *bits
3028 * - the bits not found, are kept in *bits
3030 * \param bits [IN] searched lock bits [IN]
3031 * \param l_req_mode [IN] searched lock mode
3032 * \retval boolean, true iff all bits are found
3034 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3036 struct lustre_handle lockh;
3037 ldlm_policy_data_t policy;
3038 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3039 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3048 fid = &ll_i2info(inode)->lli_fid;
3049 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3050 ldlm_lockname[mode]);
3052 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3053 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3054 policy.l_inodebits.bits = *bits & (1 << i);
3055 if (policy.l_inodebits.bits == 0)
3058 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3059 &policy, mode, &lockh)) {
3060 struct ldlm_lock *lock;
3062 lock = ldlm_handle2lock(&lockh);
3065 ~(lock->l_policy_data.l_inodebits.bits);
3066 LDLM_LOCK_PUT(lock);
3068 *bits &= ~policy.l_inodebits.bits;
3075 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3076 struct lustre_handle *lockh, __u64 flags,
3079 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3084 fid = &ll_i2info(inode)->lli_fid;
3085 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3087 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3088 fid, LDLM_IBITS, &policy, mode, lockh);
3093 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3095 /* Already unlinked. Just update nlink and return success */
3096 if (rc == -ENOENT) {
3098 /* This path cannot be hit for regular files unless in
3099 * case of obscure races, so no need to to validate
3101 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3103 } else if (rc != 0) {
3104 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3105 "%s: revalidate FID "DFID" error: rc = %d\n",
3106 ll_get_fsname(inode->i_sb, NULL, 0),
3107 PFID(ll_inode2fid(inode)), rc);
3113 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3115 struct inode *inode = dentry->d_inode;
3116 struct ptlrpc_request *req = NULL;
3117 struct obd_export *exp;
3121 LASSERT(inode != NULL);
3123 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3124 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3126 exp = ll_i2mdexp(inode);
3128 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3129 * But under CMD case, it caused some lock issues, should be fixed
3130 * with new CMD ibits lock. See bug 12718 */
3131 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3132 struct lookup_intent oit = { .it_op = IT_GETATTR };
3133 struct md_op_data *op_data;
3135 if (ibits == MDS_INODELOCK_LOOKUP)
3136 oit.it_op = IT_LOOKUP;
3138 /* Call getattr by fid, so do not provide name at all. */
3139 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3140 dentry->d_inode, NULL, 0, 0,
3141 LUSTRE_OPC_ANY, NULL);
3142 if (IS_ERR(op_data))
3143 RETURN(PTR_ERR(op_data));
3145 rc = md_intent_lock(exp, op_data, &oit, &req,
3146 &ll_md_blocking_ast, 0);
3147 ll_finish_md_op_data(op_data);
3149 rc = ll_inode_revalidate_fini(inode, rc);
3153 rc = ll_revalidate_it_finish(req, &oit, dentry);
3155 ll_intent_release(&oit);
3159 /* Unlinked? Unhash dentry, so it is not picked up later by
3160 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3161 here to preserve get_cwd functionality on 2.6.
3163 if (!dentry->d_inode->i_nlink)
3164 d_lustre_invalidate(dentry, 0);
3166 ll_lookup_finish_locks(&oit, dentry);
3167 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3168 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3169 u64 valid = OBD_MD_FLGETATTR;
3170 struct md_op_data *op_data;
3173 if (S_ISREG(inode->i_mode)) {
3174 rc = ll_get_default_mdsize(sbi, &ealen);
3177 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3180 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3181 0, ealen, LUSTRE_OPC_ANY,
3183 if (IS_ERR(op_data))
3184 RETURN(PTR_ERR(op_data));
3186 op_data->op_valid = valid;
3187 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3188 * capa for this inode. Because we only keep capas of dirs
3190 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3191 ll_finish_md_op_data(op_data);
3193 rc = ll_inode_revalidate_fini(inode, rc);
3197 rc = ll_prep_inode(&inode, req, NULL, NULL);
3200 ptlrpc_req_finished(req);
3204 static int ll_merge_md_attr(struct inode *inode)
3206 struct cl_attr attr = { 0 };
3209 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3210 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3211 &attr, ll_md_blocking_ast);
3215 set_nlink(inode, attr.cat_nlink);
3216 inode->i_blocks = attr.cat_blocks;
3217 i_size_write(inode, attr.cat_size);
3219 ll_i2info(inode)->lli_atime = attr.cat_atime;
3220 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3221 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3227 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3229 struct inode *inode = dentry->d_inode;
3233 rc = __ll_inode_revalidate(dentry, ibits);
3237 /* if object isn't regular file, don't validate size */
3238 if (!S_ISREG(inode->i_mode)) {
3239 if (S_ISDIR(inode->i_mode) &&
3240 ll_i2info(inode)->lli_lsm_md != NULL) {
3241 rc = ll_merge_md_attr(inode);
3246 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3247 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3248 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3250 /* In case of restore, the MDT has the right size and has
3251 * already send it back without granting the layout lock,
3252 * inode is up-to-date so glimpse is useless.
3253 * Also to glimpse we need the layout, in case of a running
3254 * restore the MDT holds the layout lock so the glimpse will
3255 * block up to the end of restore (getattr will block)
3257 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3258 rc = ll_glimpse_size(inode);
3263 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3265 struct inode *inode = de->d_inode;
3266 struct ll_sb_info *sbi = ll_i2sbi(inode);
3267 struct ll_inode_info *lli = ll_i2info(inode);
3270 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3271 MDS_INODELOCK_LOOKUP);
3272 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3277 stat->dev = inode->i_sb->s_dev;
3278 if (ll_need_32bit_api(sbi))
3279 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3281 stat->ino = inode->i_ino;
3282 stat->mode = inode->i_mode;
3283 stat->uid = inode->i_uid;
3284 stat->gid = inode->i_gid;
3285 stat->rdev = inode->i_rdev;
3286 stat->atime = inode->i_atime;
3287 stat->mtime = inode->i_mtime;
3288 stat->ctime = inode->i_ctime;
3289 stat->blksize = 1 << inode->i_blkbits;
3291 stat->nlink = inode->i_nlink;
3292 stat->size = i_size_read(inode);
3293 stat->blocks = inode->i_blocks;
3298 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3299 __u64 start, __u64 len)
3303 struct ll_user_fiemap *fiemap;
3304 unsigned int extent_count = fieinfo->fi_extents_max;
3306 num_bytes = sizeof(*fiemap) + (extent_count *
3307 sizeof(struct ll_fiemap_extent));
3308 OBD_ALLOC_LARGE(fiemap, num_bytes);
3313 fiemap->fm_flags = fieinfo->fi_flags;
3314 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3315 fiemap->fm_start = start;
3316 fiemap->fm_length = len;
3317 if (extent_count > 0)
3318 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3319 sizeof(struct ll_fiemap_extent));
3321 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3323 fieinfo->fi_flags = fiemap->fm_flags;
3324 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3325 if (extent_count > 0)
3326 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3327 fiemap->fm_mapped_extents *
3328 sizeof(struct ll_fiemap_extent));
3330 OBD_FREE_LARGE(fiemap, num_bytes);
3334 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3336 struct ll_inode_info *lli = ll_i2info(inode);
3337 struct posix_acl *acl = NULL;
3340 spin_lock(&lli->lli_lock);
3341 /* VFS' acl_permission_check->check_acl will release the refcount */
3342 acl = posix_acl_dup(lli->lli_posix_acl);
3343 spin_unlock(&lli->lli_lock);
3348 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3350 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3351 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3353 ll_check_acl(struct inode *inode, int mask)
3356 # ifdef CONFIG_FS_POSIX_ACL
3357 struct posix_acl *acl;
3361 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3362 if (flags & IPERM_FLAG_RCU)
3365 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3370 rc = posix_acl_permission(inode, acl, mask);
3371 posix_acl_release(acl);
3374 # else /* !CONFIG_FS_POSIX_ACL */
3376 # endif /* CONFIG_FS_POSIX_ACL */
3378 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3380 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3381 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3383 # ifdef HAVE_INODE_PERMISION_2ARGS
3384 int ll_inode_permission(struct inode *inode, int mask)
3386 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3391 struct ll_sb_info *sbi;
3392 struct root_squash_info *squash;
3393 struct cred *cred = NULL;
3394 const struct cred *old_cred = NULL;
3396 bool squash_id = false;
3399 #ifdef MAY_NOT_BLOCK
3400 if (mask & MAY_NOT_BLOCK)
3402 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3403 if (flags & IPERM_FLAG_RCU)
3407 /* as root inode are NOT getting validated in lookup operation,
3408 * need to do it before permission check. */
3410 if (inode == inode->i_sb->s_root->d_inode) {
3411 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3412 MDS_INODELOCK_LOOKUP);
3417 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3418 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3420 /* squash fsuid/fsgid if needed */
3421 sbi = ll_i2sbi(inode);
3422 squash = &sbi->ll_squash;
3423 if (unlikely(squash->rsi_uid != 0 &&
3424 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3425 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3429 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3430 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3431 squash->rsi_uid, squash->rsi_gid);
3433 /* update current process's credentials
3434 * and FS capability */
3435 cred = prepare_creds();
3439 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3440 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3441 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3442 if ((1 << cap) & CFS_CAP_FS_MASK)
3443 cap_lower(cred->cap_effective, cap);
3445 old_cred = override_creds(cred);
3448 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3450 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3451 rc = lustre_check_remote_perm(inode, mask);
3453 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3455 /* restore current process's credentials and FS capability */
3457 revert_creds(old_cred);
3464 /* -o localflock - only provides locally consistent flock locks */
3465 struct file_operations ll_file_operations = {
3466 .read = ll_file_read,
3467 .aio_read = ll_file_aio_read,
3468 .write = ll_file_write,
3469 .aio_write = ll_file_aio_write,
3470 .unlocked_ioctl = ll_file_ioctl,
3471 .open = ll_file_open,
3472 .release = ll_file_release,
3473 .mmap = ll_file_mmap,
3474 .llseek = ll_file_seek,
3475 .splice_read = ll_file_splice_read,
3480 struct file_operations ll_file_operations_flock = {
3481 .read = ll_file_read,
3482 .aio_read = ll_file_aio_read,
3483 .write = ll_file_write,
3484 .aio_write = ll_file_aio_write,
3485 .unlocked_ioctl = ll_file_ioctl,
3486 .open = ll_file_open,
3487 .release = ll_file_release,
3488 .mmap = ll_file_mmap,
3489 .llseek = ll_file_seek,
3490 .splice_read = ll_file_splice_read,
3493 .flock = ll_file_flock,
3494 .lock = ll_file_flock
3497 /* These are for -o noflock - to return ENOSYS on flock calls */
3498 struct file_operations ll_file_operations_noflock = {
3499 .read = ll_file_read,
3500 .aio_read = ll_file_aio_read,
3501 .write = ll_file_write,
3502 .aio_write = ll_file_aio_write,
3503 .unlocked_ioctl = ll_file_ioctl,
3504 .open = ll_file_open,
3505 .release = ll_file_release,
3506 .mmap = ll_file_mmap,
3507 .llseek = ll_file_seek,
3508 .splice_read = ll_file_splice_read,
3511 .flock = ll_file_noflock,
3512 .lock = ll_file_noflock
3515 struct inode_operations ll_file_inode_operations = {
3516 .setattr = ll_setattr,
3517 .getattr = ll_getattr,
3518 .permission = ll_inode_permission,
3519 .setxattr = ll_setxattr,
3520 .getxattr = ll_getxattr,
3521 .listxattr = ll_listxattr,
3522 .removexattr = ll_removexattr,
3523 .fiemap = ll_fiemap,
3524 #ifdef HAVE_IOP_GET_ACL
3525 .get_acl = ll_get_acl,
3529 /* dynamic ioctl number support routins */
3530 static struct llioc_ctl_data {
3531 struct rw_semaphore ioc_sem;
3532 struct list_head ioc_head;
3534 __RWSEM_INITIALIZER(llioc.ioc_sem),
3535 LIST_HEAD_INIT(llioc.ioc_head)
3540 struct list_head iocd_list;
3541 unsigned int iocd_size;
3542 llioc_callback_t iocd_cb;
3543 unsigned int iocd_count;
3544 unsigned int iocd_cmd[0];
3547 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3550 struct llioc_data *in_data = NULL;
3553 if (cb == NULL || cmd == NULL ||
3554 count > LLIOC_MAX_CMD || count < 0)
3557 size = sizeof(*in_data) + count * sizeof(unsigned int);
3558 OBD_ALLOC(in_data, size);
3559 if (in_data == NULL)
3562 memset(in_data, 0, sizeof(*in_data));
3563 in_data->iocd_size = size;
3564 in_data->iocd_cb = cb;
3565 in_data->iocd_count = count;
3566 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3568 down_write(&llioc.ioc_sem);
3569 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3570 up_write(&llioc.ioc_sem);
3575 void ll_iocontrol_unregister(void *magic)
3577 struct llioc_data *tmp;
3582 down_write(&llioc.ioc_sem);
3583 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3585 unsigned int size = tmp->iocd_size;
3587 list_del(&tmp->iocd_list);
3588 up_write(&llioc.ioc_sem);
3590 OBD_FREE(tmp, size);
3594 up_write(&llioc.ioc_sem);
3596 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3599 EXPORT_SYMBOL(ll_iocontrol_register);
3600 EXPORT_SYMBOL(ll_iocontrol_unregister);
3602 static enum llioc_iter
3603 ll_iocontrol_call(struct inode *inode, struct file *file,
3604 unsigned int cmd, unsigned long arg, int *rcp)
3606 enum llioc_iter ret = LLIOC_CONT;
3607 struct llioc_data *data;
3608 int rc = -EINVAL, i;
3610 down_read(&llioc.ioc_sem);
3611 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3612 for (i = 0; i < data->iocd_count; i++) {
3613 if (cmd != data->iocd_cmd[i])
3616 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3620 if (ret == LLIOC_STOP)
3623 up_read(&llioc.ioc_sem);
3630 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3632 struct ll_inode_info *lli = ll_i2info(inode);
3633 struct cl_env_nest nest;
3638 if (lli->lli_clob == NULL)
3641 env = cl_env_nested_get(&nest);
3643 RETURN(PTR_ERR(env));
3645 result = cl_conf_set(env, lli->lli_clob, conf);
3646 cl_env_nested_put(&nest, env);
3648 if (conf->coc_opc == OBJECT_CONF_SET) {
3649 struct ldlm_lock *lock = conf->coc_lock;
3651 LASSERT(lock != NULL);
3652 LASSERT(ldlm_has_layout(lock));
3654 struct lustre_md *md = conf->u.coc_md;
3655 __u32 gen = LL_LAYOUT_GEN_EMPTY;
3657 /* it can only be allowed to match after layout is
3658 * applied to inode otherwise false layout would be
3659 * seen. Applying layout shoud happen before dropping
3660 * the intent lock. */
3661 ldlm_lock_allow_match(lock);
3663 lli->lli_has_smd = lsm_has_objects(md->lsm);
3664 if (md->lsm != NULL)
3665 gen = md->lsm->lsm_layout_gen;
3668 DFID ": layout version change: %u -> %u\n",
3669 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3671 ll_layout_version_set(lli, gen);
3677 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3678 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3681 struct ll_sb_info *sbi = ll_i2sbi(inode);
3682 struct obd_capa *oc;
3683 struct ptlrpc_request *req;
3684 struct mdt_body *body;
3691 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3692 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3693 lock->l_lvb_data, lock->l_lvb_len);
3695 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3698 /* if layout lock was granted right away, the layout is returned
3699 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3700 * blocked and then granted via completion ast, we have to fetch
3701 * layout here. Please note that we can't use the LVB buffer in
3702 * completion AST because it doesn't have a large enough buffer */
3703 oc = ll_mdscapa_get(inode);
3704 rc = ll_get_default_mdsize(sbi, &lmmsize);
3706 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3707 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3713 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3715 GOTO(out, rc = -EPROTO);
3717 lmmsize = body->mbo_eadatasize;
3718 if (lmmsize == 0) /* empty layout */
3721 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3723 GOTO(out, rc = -EFAULT);
3725 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3726 if (lvbdata == NULL)
3727 GOTO(out, rc = -ENOMEM);
3729 memcpy(lvbdata, lmm, lmmsize);
3730 lock_res_and_lock(lock);
3731 if (lock->l_lvb_data != NULL)
3732 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3734 lock->l_lvb_data = lvbdata;
3735 lock->l_lvb_len = lmmsize;
3736 unlock_res_and_lock(lock);
3741 ptlrpc_req_finished(req);
3746 * Apply the layout to the inode. Layout lock is held and will be released
3749 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3750 struct inode *inode, __u32 *gen, bool reconf)
3752 struct ll_inode_info *lli = ll_i2info(inode);
3753 struct ll_sb_info *sbi = ll_i2sbi(inode);
3754 struct ldlm_lock *lock;
3755 struct lustre_md md = { NULL };
3756 struct cl_object_conf conf;
3759 bool wait_layout = false;
3762 LASSERT(lustre_handle_is_used(lockh));
3764 lock = ldlm_handle2lock(lockh);
3765 LASSERT(lock != NULL);
3766 LASSERT(ldlm_has_layout(lock));
3768 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d",
3769 PFID(&lli->lli_fid), inode, reconf);
3771 /* in case this is a caching lock and reinstate with new inode */
3772 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3774 lock_res_and_lock(lock);
3775 lvb_ready = ldlm_is_lvb_ready(lock);
3776 unlock_res_and_lock(lock);
3777 /* checking lvb_ready is racy but this is okay. The worst case is
3778 * that multi processes may configure the file on the same time. */
3780 if (lvb_ready || !reconf) {
3783 /* layout_gen must be valid if layout lock is not
3784 * cancelled and stripe has already set */
3785 *gen = ll_layout_version_get(lli);
3791 rc = ll_layout_fetch(inode, lock);
3795 /* for layout lock, lmm is returned in lock's lvb.
3796 * lvb_data is immutable if the lock is held so it's safe to access it
3797 * without res lock. See the description in ldlm_lock_decref_internal()
3798 * for the condition to free lvb_data of layout lock */
3799 if (lock->l_lvb_data != NULL) {
3800 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3801 lock->l_lvb_data, lock->l_lvb_len);
3803 *gen = LL_LAYOUT_GEN_EMPTY;
3805 *gen = md.lsm->lsm_layout_gen;
3808 CERROR("%s: file "DFID" unpackmd error: %d\n",
3809 ll_get_fsname(inode->i_sb, NULL, 0),
3810 PFID(&lli->lli_fid), rc);
3816 /* set layout to file. Unlikely this will fail as old layout was
3817 * surely eliminated */
3818 memset(&conf, 0, sizeof conf);
3819 conf.coc_opc = OBJECT_CONF_SET;
3820 conf.coc_inode = inode;
3821 conf.coc_lock = lock;
3822 conf.u.coc_md = &md;
3823 rc = ll_layout_conf(inode, &conf);
3826 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3828 /* refresh layout failed, need to wait */
3829 wait_layout = rc == -EBUSY;
3833 LDLM_LOCK_PUT(lock);
3834 ldlm_lock_decref(lockh, mode);
3836 /* wait for IO to complete if it's still being used. */
3838 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3839 ll_get_fsname(inode->i_sb, NULL, 0),
3840 PFID(&lli->lli_fid), inode);
3842 memset(&conf, 0, sizeof conf);
3843 conf.coc_opc = OBJECT_CONF_WAIT;
3844 conf.coc_inode = inode;
3845 rc = ll_layout_conf(inode, &conf);
3849 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3850 ll_get_fsname(inode->i_sb, NULL, 0),
3851 PFID(&lli->lli_fid), rc);
3857 * This function checks if there exists a LAYOUT lock on the client side,
3858 * or enqueues it if it doesn't have one in cache.
3860 * This function will not hold layout lock so it may be revoked any time after
3861 * this function returns. Any operations depend on layout should be redone
3864 * This function should be called before lov_io_init() to get an uptodate
3865 * layout version, the caller should save the version number and after IO
3866 * is finished, this function should be called again to verify that layout
3867 * is not changed during IO time.
3869 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3871 struct ll_inode_info *lli = ll_i2info(inode);
3872 struct ll_sb_info *sbi = ll_i2sbi(inode);
3873 struct md_op_data *op_data;
3874 struct lookup_intent it;
3875 struct lustre_handle lockh;
3877 struct ldlm_enqueue_info einfo = {
3878 .ei_type = LDLM_IBITS,
3880 .ei_cb_bl = &ll_md_blocking_ast,
3881 .ei_cb_cp = &ldlm_completion_ast,
3886 *gen = ll_layout_version_get(lli);
3887 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
3891 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3892 LASSERT(S_ISREG(inode->i_mode));
3894 /* take layout lock mutex to enqueue layout lock exclusively. */
3895 mutex_lock(&lli->lli_layout_mutex);
3898 /* mostly layout lock is caching on the local side, so try to match
3899 * it before grabbing layout lock mutex. */
3900 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3901 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3902 if (mode != 0) { /* hit cached lock */
3903 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3907 mutex_unlock(&lli->lli_layout_mutex);
3911 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3912 0, 0, LUSTRE_OPC_ANY, NULL);
3913 if (IS_ERR(op_data)) {
3914 mutex_unlock(&lli->lli_layout_mutex);
3915 RETURN(PTR_ERR(op_data));
3918 /* have to enqueue one */
3919 memset(&it, 0, sizeof(it));
3920 it.it_op = IT_LAYOUT;
3921 lockh.cookie = 0ULL;
3923 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3924 ll_get_fsname(inode->i_sb, NULL, 0),
3925 PFID(&lli->lli_fid), inode);
3927 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3928 if (it.d.lustre.it_data != NULL)
3929 ptlrpc_req_finished(it.d.lustre.it_data);
3930 it.d.lustre.it_data = NULL;
3932 ll_finish_md_op_data(op_data);
3934 mode = it.d.lustre.it_lock_mode;
3935 it.d.lustre.it_lock_mode = 0;
3936 ll_intent_drop_lock(&it);
3939 /* set lock data in case this is a new lock */
3940 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3941 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3945 mutex_unlock(&lli->lli_layout_mutex);
3951 * This function send a restore request to the MDT
3953 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3955 struct hsm_user_request *hur;
3959 len = sizeof(struct hsm_user_request) +
3960 sizeof(struct hsm_user_item);
3961 OBD_ALLOC(hur, len);
3965 hur->hur_request.hr_action = HUA_RESTORE;
3966 hur->hur_request.hr_archive_id = 0;
3967 hur->hur_request.hr_flags = 0;
3968 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3969 sizeof(hur->hur_user_item[0].hui_fid));
3970 hur->hur_user_item[0].hui_extent.offset = offset;
3971 hur->hur_user_item[0].hui_extent.length = length;
3972 hur->hur_request.hr_itemcount = 1;
3973 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,