4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
50 #include <lustre_ioctl.h>
52 #include "cl_object.h"
55 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
57 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
60 static enum llioc_iter
61 ll_iocontrol_call(struct inode *inode, struct file *file,
62 unsigned int cmd, unsigned long arg, int *rcp);
64 static struct ll_file_data *ll_file_data_get(void)
66 struct ll_file_data *fd;
68 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
72 fd->fd_write_failed = false;
77 static void ll_file_data_put(struct ll_file_data *fd)
80 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
83 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
84 struct lustre_handle *fh)
86 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
87 op_data->op_attr.ia_mode = inode->i_mode;
88 op_data->op_attr.ia_atime = inode->i_atime;
89 op_data->op_attr.ia_mtime = inode->i_mtime;
90 op_data->op_attr.ia_ctime = inode->i_ctime;
91 op_data->op_attr.ia_size = i_size_read(inode);
92 op_data->op_attr_blocks = inode->i_blocks;
93 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
95 op_data->op_handle = *fh;
96 op_data->op_capa1 = ll_mdscapa_get(inode);
98 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
99 op_data->op_bias |= MDS_DATA_MODIFIED;
103 * Packs all the attributes into @op_data for the CLOSE rpc.
105 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
106 struct obd_client_handle *och)
110 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
111 ATTR_MTIME | ATTR_MTIME_SET |
112 ATTR_CTIME | ATTR_CTIME_SET;
114 if (!(och->och_flags & FMODE_WRITE))
117 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
120 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
121 ll_prep_md_op_data(op_data, inode, NULL, NULL,
122 0, 0, LUSTRE_OPC_ANY, NULL);
126 static int ll_close_inode_openhandle(struct obd_export *md_exp,
128 struct obd_client_handle *och,
129 const __u64 *data_version)
131 struct obd_export *exp = ll_i2mdexp(inode);
132 struct md_op_data *op_data;
133 struct ptlrpc_request *req = NULL;
134 struct obd_device *obd = class_exp2obd(exp);
140 * XXX: in case of LMV, is this correct to access
143 CERROR("Invalid MDC connection handle "LPX64"\n",
144 ll_i2mdexp(inode)->exp_handle.h_cookie);
148 OBD_ALLOC_PTR(op_data);
150 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
152 ll_prepare_close(inode, op_data, och);
153 if (data_version != NULL) {
154 /* Pass in data_version implies release. */
155 op_data->op_bias |= MDS_HSM_RELEASE;
156 op_data->op_data_version = *data_version;
157 op_data->op_lease_handle = och->och_lease_handle;
158 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
161 rc = md_close(md_exp, op_data, och->och_mod, &req);
163 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
164 ll_i2mdexp(inode)->exp_obd->obd_name,
165 PFID(ll_inode2fid(inode)), rc);
168 /* DATA_MODIFIED flag was successfully sent on close, cancel data
169 * modification flag. */
170 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
171 struct ll_inode_info *lli = ll_i2info(inode);
173 spin_lock(&lli->lli_lock);
174 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
175 spin_unlock(&lli->lli_lock);
178 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
179 struct mdt_body *body;
180 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
181 if (!(body->mbo_valid & OBD_MD_FLRELEASED))
185 ll_finish_md_op_data(op_data);
189 md_clear_open_replay_data(md_exp, och);
190 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
193 if (req) /* This is close request */
194 ptlrpc_req_finished(req);
198 int ll_md_real_close(struct inode *inode, fmode_t fmode)
200 struct ll_inode_info *lli = ll_i2info(inode);
201 struct obd_client_handle **och_p;
202 struct obd_client_handle *och;
207 if (fmode & FMODE_WRITE) {
208 och_p = &lli->lli_mds_write_och;
209 och_usecount = &lli->lli_open_fd_write_count;
210 } else if (fmode & FMODE_EXEC) {
211 och_p = &lli->lli_mds_exec_och;
212 och_usecount = &lli->lli_open_fd_exec_count;
214 LASSERT(fmode & FMODE_READ);
215 och_p = &lli->lli_mds_read_och;
216 och_usecount = &lli->lli_open_fd_read_count;
219 mutex_lock(&lli->lli_och_mutex);
220 if (*och_usecount > 0) {
221 /* There are still users of this handle, so skip
223 mutex_unlock(&lli->lli_och_mutex);
229 mutex_unlock(&lli->lli_och_mutex);
232 /* There might be a race and this handle may already
234 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
241 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
244 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
245 struct ll_inode_info *lli = ll_i2info(inode);
249 /* clear group lock, if present */
250 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
251 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
253 if (fd->fd_lease_och != NULL) {
256 /* Usually the lease is not released when the
257 * application crashed, we need to release here. */
258 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
259 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
260 PFID(&lli->lli_fid), rc, lease_broken);
262 fd->fd_lease_och = NULL;
265 if (fd->fd_och != NULL) {
266 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
271 /* Let's see if we have good enough OPEN lock on the file and if
272 we can skip talking to MDS */
273 if (file->f_dentry->d_inode) { /* Can this ever be false? */
275 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
276 struct lustre_handle lockh;
277 struct inode *inode = file->f_dentry->d_inode;
278 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
280 mutex_lock(&lli->lli_och_mutex);
281 if (fd->fd_omode & FMODE_WRITE) {
283 LASSERT(lli->lli_open_fd_write_count);
284 lli->lli_open_fd_write_count--;
285 } else if (fd->fd_omode & FMODE_EXEC) {
287 LASSERT(lli->lli_open_fd_exec_count);
288 lli->lli_open_fd_exec_count--;
291 LASSERT(lli->lli_open_fd_read_count);
292 lli->lli_open_fd_read_count--;
294 mutex_unlock(&lli->lli_och_mutex);
296 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
297 LDLM_IBITS, &policy, lockmode,
299 rc = ll_md_real_close(file->f_dentry->d_inode,
303 CERROR("released file has negative dentry: file = %p, "
304 "dentry = %p, name = %s\n",
305 file, file->f_dentry, file->f_dentry->d_name.name);
309 LUSTRE_FPRIVATE(file) = NULL;
310 ll_file_data_put(fd);
311 ll_capa_close(inode);
316 /* While this returns an error code, fput() the caller does not, so we need
317 * to make every effort to clean up all of our state here. Also, applications
318 * rarely check close errors and even if an error is returned they will not
319 * re-try the close call.
321 int ll_file_release(struct inode *inode, struct file *file)
323 struct ll_file_data *fd;
324 struct ll_sb_info *sbi = ll_i2sbi(inode);
325 struct ll_inode_info *lli = ll_i2info(inode);
329 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
330 PFID(ll_inode2fid(inode)), inode);
332 #ifdef CONFIG_FS_POSIX_ACL
333 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
334 inode == inode->i_sb->s_root->d_inode) {
335 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
338 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
339 fd->fd_flags &= ~LL_FILE_RMTACL;
340 rct_del(&sbi->ll_rct, current_pid());
341 et_search_free(&sbi->ll_et, current_pid());
346 if (inode->i_sb->s_root != file->f_dentry)
347 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
348 fd = LUSTRE_FPRIVATE(file);
351 /* The last ref on @file, maybe not the the owner pid of statahead,
352 * because parent and child process can share the same file handle. */
353 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
354 ll_deauthorize_statahead(inode, fd);
356 if (inode->i_sb->s_root == file->f_dentry) {
357 LUSTRE_FPRIVATE(file) = NULL;
358 ll_file_data_put(fd);
362 if (!S_ISDIR(inode->i_mode)) {
363 if (lli->lli_clob != NULL)
364 lov_read_and_clear_async_rc(lli->lli_clob);
365 lli->lli_async_rc = 0;
368 rc = ll_md_close(sbi->ll_md_exp, inode, file);
370 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
371 libcfs_debug_dumplog();
376 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
377 struct lookup_intent *itp)
379 struct dentry *de = file->f_dentry;
380 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
381 struct dentry *parent = de->d_parent;
382 const char *name = NULL;
384 struct md_op_data *op_data;
385 struct ptlrpc_request *req = NULL;
389 LASSERT(parent != NULL);
390 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
392 /* if server supports open-by-fid, or file name is invalid, don't pack
393 * name in open request */
394 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
395 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
396 name = de->d_name.name;
397 len = de->d_name.len;
400 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
401 name, len, 0, LUSTRE_OPC_ANY, NULL);
403 RETURN(PTR_ERR(op_data));
404 op_data->op_data = lmm;
405 op_data->op_data_size = lmmsize;
407 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
408 &ll_md_blocking_ast, 0);
409 ll_finish_md_op_data(op_data);
411 /* reason for keep own exit path - don`t flood log
412 * with messages with -ESTALE errors.
414 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
415 it_open_error(DISP_OPEN_OPEN, itp))
417 ll_release_openhandle(de, itp);
421 if (it_disposition(itp, DISP_LOOKUP_NEG))
422 GOTO(out, rc = -ENOENT);
424 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
425 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
426 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
430 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
431 if (!rc && itp->d.lustre.it_lock_mode)
432 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
435 ptlrpc_req_finished(req);
436 ll_intent_drop_lock(itp);
441 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
442 struct obd_client_handle *och)
444 struct ptlrpc_request *req = it->d.lustre.it_data;
445 struct mdt_body *body;
447 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
448 och->och_fh = body->mbo_handle;
449 och->och_fid = body->mbo_fid1;
450 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
451 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
452 och->och_flags = it->it_flags;
454 return md_set_open_replay_data(md_exp, och, it);
457 static int ll_local_open(struct file *file, struct lookup_intent *it,
458 struct ll_file_data *fd, struct obd_client_handle *och)
460 struct inode *inode = file->f_dentry->d_inode;
463 LASSERT(!LUSTRE_FPRIVATE(file));
470 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
475 LUSTRE_FPRIVATE(file) = fd;
476 ll_readahead_init(inode, &fd->fd_ras);
477 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
479 /* ll_cl_context initialize */
480 rwlock_init(&fd->fd_lock);
481 INIT_LIST_HEAD(&fd->fd_lccs);
486 /* Open a file, and (for the very first open) create objects on the OSTs at
487 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
488 * creation or open until ll_lov_setstripe() ioctl is called.
490 * If we already have the stripe MD locally then we don't request it in
491 * md_open(), by passing a lmm_size = 0.
493 * It is up to the application to ensure no other processes open this file
494 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
495 * used. We might be able to avoid races of that sort by getting lli_open_sem
496 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
497 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
499 int ll_file_open(struct inode *inode, struct file *file)
501 struct ll_inode_info *lli = ll_i2info(inode);
502 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
503 .it_flags = file->f_flags };
504 struct obd_client_handle **och_p = NULL;
505 __u64 *och_usecount = NULL;
506 struct ll_file_data *fd;
510 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
511 PFID(ll_inode2fid(inode)), inode, file->f_flags);
513 it = file->private_data; /* XXX: compat macro */
514 file->private_data = NULL; /* prevent ll_local_open assertion */
516 fd = ll_file_data_get();
518 GOTO(out_openerr, rc = -ENOMEM);
521 if (S_ISDIR(inode->i_mode))
522 ll_authorize_statahead(inode, fd);
524 if (inode->i_sb->s_root == file->f_dentry) {
525 LUSTRE_FPRIVATE(file) = fd;
529 if (!it || !it->d.lustre.it_disposition) {
530 /* Convert f_flags into access mode. We cannot use file->f_mode,
531 * because everything but O_ACCMODE mask was stripped from
533 if ((oit.it_flags + 1) & O_ACCMODE)
535 if (file->f_flags & O_TRUNC)
536 oit.it_flags |= FMODE_WRITE;
538 /* kernel only call f_op->open in dentry_open. filp_open calls
539 * dentry_open after call to open_namei that checks permissions.
540 * Only nfsd_open call dentry_open directly without checking
541 * permissions and because of that this code below is safe. */
542 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
543 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
545 /* We do not want O_EXCL here, presumably we opened the file
546 * already? XXX - NFS implications? */
547 oit.it_flags &= ~O_EXCL;
549 /* bug20584, if "it_flags" contains O_CREAT, the file will be
550 * created if necessary, then "IT_CREAT" should be set to keep
551 * consistent with it */
552 if (oit.it_flags & O_CREAT)
553 oit.it_op |= IT_CREAT;
559 /* Let's see if we have file open on MDS already. */
560 if (it->it_flags & FMODE_WRITE) {
561 och_p = &lli->lli_mds_write_och;
562 och_usecount = &lli->lli_open_fd_write_count;
563 } else if (it->it_flags & FMODE_EXEC) {
564 och_p = &lli->lli_mds_exec_och;
565 och_usecount = &lli->lli_open_fd_exec_count;
567 och_p = &lli->lli_mds_read_och;
568 och_usecount = &lli->lli_open_fd_read_count;
571 mutex_lock(&lli->lli_och_mutex);
572 if (*och_p) { /* Open handle is present */
573 if (it_disposition(it, DISP_OPEN_OPEN)) {
574 /* Well, there's extra open request that we do not need,
575 let's close it somehow. This will decref request. */
576 rc = it_open_error(DISP_OPEN_OPEN, it);
578 mutex_unlock(&lli->lli_och_mutex);
579 GOTO(out_openerr, rc);
582 ll_release_openhandle(file->f_dentry, it);
586 rc = ll_local_open(file, it, fd, NULL);
589 mutex_unlock(&lli->lli_och_mutex);
590 GOTO(out_openerr, rc);
593 LASSERT(*och_usecount == 0);
594 if (!it->d.lustre.it_disposition) {
595 /* We cannot just request lock handle now, new ELC code
596 means that one of other OPEN locks for this file
597 could be cancelled, and since blocking ast handler
598 would attempt to grab och_mutex as well, that would
599 result in a deadlock */
600 mutex_unlock(&lli->lli_och_mutex);
602 * Normally called under two situations:
604 * 2. A race/condition on MDS resulting in no open
605 * handle to be returned from LOOKUP|OPEN request,
606 * for example if the target entry was a symlink.
608 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
610 * Always specify MDS_OPEN_BY_FID because we don't want
611 * to get file with different fid.
613 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
614 rc = ll_intent_file_open(file, NULL, 0, it);
616 GOTO(out_openerr, rc);
620 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
622 GOTO(out_och_free, rc = -ENOMEM);
626 /* md_intent_lock() didn't get a request ref if there was an
627 * open error, so don't do cleanup on the request here
629 /* XXX (green): Should not we bail out on any error here, not
630 * just open error? */
631 rc = it_open_error(DISP_OPEN_OPEN, it);
633 GOTO(out_och_free, rc);
635 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
636 "inode %p: disposition %x, status %d\n", inode,
637 it_disposition(it, ~0), it->d.lustre.it_status);
639 rc = ll_local_open(file, it, fd, *och_p);
641 GOTO(out_och_free, rc);
643 mutex_unlock(&lli->lli_och_mutex);
646 /* Must do this outside lli_och_mutex lock to prevent deadlock where
647 different kind of OPEN lock for this same inode gets cancelled
648 by ldlm_cancel_lru */
649 if (!S_ISREG(inode->i_mode))
650 GOTO(out_och_free, rc);
654 if (!lli->lli_has_smd &&
655 (cl_is_lov_delay_create(file->f_flags) ||
656 (file->f_mode & FMODE_WRITE) == 0)) {
657 CDEBUG(D_INODE, "object creation was delayed\n");
658 GOTO(out_och_free, rc);
660 cl_lov_delay_create_clear(&file->f_flags);
661 GOTO(out_och_free, rc);
665 if (och_p && *och_p) {
666 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
667 *och_p = NULL; /* OBD_FREE writes some magic there */
670 mutex_unlock(&lli->lli_och_mutex);
673 if (lli->lli_opendir_key == fd)
674 ll_deauthorize_statahead(inode, fd);
676 ll_file_data_put(fd);
678 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
681 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
682 ptlrpc_req_finished(it->d.lustre.it_data);
683 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
689 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
690 struct ldlm_lock_desc *desc, void *data, int flag)
693 struct lustre_handle lockh;
697 case LDLM_CB_BLOCKING:
698 ldlm_lock2handle(lock, &lockh);
699 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
701 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
705 case LDLM_CB_CANCELING:
713 * Acquire a lease and open the file.
715 static struct obd_client_handle *
716 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
719 struct lookup_intent it = { .it_op = IT_OPEN };
720 struct ll_sb_info *sbi = ll_i2sbi(inode);
721 struct md_op_data *op_data;
722 struct ptlrpc_request *req = NULL;
723 struct lustre_handle old_handle = { 0 };
724 struct obd_client_handle *och = NULL;
729 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
730 RETURN(ERR_PTR(-EINVAL));
733 struct ll_inode_info *lli = ll_i2info(inode);
734 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
735 struct obd_client_handle **och_p;
738 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
739 RETURN(ERR_PTR(-EPERM));
741 /* Get the openhandle of the file */
743 mutex_lock(&lli->lli_och_mutex);
744 if (fd->fd_lease_och != NULL) {
745 mutex_unlock(&lli->lli_och_mutex);
749 if (fd->fd_och == NULL) {
750 if (file->f_mode & FMODE_WRITE) {
751 LASSERT(lli->lli_mds_write_och != NULL);
752 och_p = &lli->lli_mds_write_och;
753 och_usecount = &lli->lli_open_fd_write_count;
755 LASSERT(lli->lli_mds_read_och != NULL);
756 och_p = &lli->lli_mds_read_och;
757 och_usecount = &lli->lli_open_fd_read_count;
759 if (*och_usecount == 1) {
766 mutex_unlock(&lli->lli_och_mutex);
767 if (rc < 0) /* more than 1 opener */
770 LASSERT(fd->fd_och != NULL);
771 old_handle = fd->fd_och->och_fh;
776 RETURN(ERR_PTR(-ENOMEM));
778 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
779 LUSTRE_OPC_ANY, NULL);
781 GOTO(out, rc = PTR_ERR(op_data));
783 /* To tell the MDT this openhandle is from the same owner */
784 op_data->op_handle = old_handle;
786 it.it_flags = fmode | open_flags;
787 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
788 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
789 &ll_md_blocking_lease_ast,
790 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
791 * it can be cancelled which may mislead applications that the lease is
793 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
794 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
795 * doesn't deal with openhandle, so normal openhandle will be leaked. */
796 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
797 ll_finish_md_op_data(op_data);
798 ptlrpc_req_finished(req);
800 GOTO(out_release_it, rc);
802 if (it_disposition(&it, DISP_LOOKUP_NEG))
803 GOTO(out_release_it, rc = -ENOENT);
805 rc = it_open_error(DISP_OPEN_OPEN, &it);
807 GOTO(out_release_it, rc);
809 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
810 ll_och_fill(sbi->ll_md_exp, &it, och);
812 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
813 GOTO(out_close, rc = -EOPNOTSUPP);
815 /* already get lease, handle lease lock */
816 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
817 if (it.d.lustre.it_lock_mode == 0 ||
818 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
819 /* open lock must return for lease */
820 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
821 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
822 it.d.lustre.it_lock_bits);
823 GOTO(out_close, rc = -EPROTO);
826 ll_intent_release(&it);
830 /* Cancel open lock */
831 if (it.d.lustre.it_lock_mode != 0) {
832 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
833 it.d.lustre.it_lock_mode);
834 it.d.lustre.it_lock_mode = 0;
835 och->och_lease_handle.cookie = 0ULL;
837 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
839 CERROR("%s: error closing file "DFID": %d\n",
840 ll_get_fsname(inode->i_sb, NULL, 0),
841 PFID(&ll_i2info(inode)->lli_fid), rc2);
842 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
844 ll_intent_release(&it);
852 * Release lease and close the file.
853 * It will check if the lease has ever broken.
855 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
858 struct ldlm_lock *lock;
859 bool cancelled = true;
863 lock = ldlm_handle2lock(&och->och_lease_handle);
865 lock_res_and_lock(lock);
866 cancelled = ldlm_is_cancel(lock);
867 unlock_res_and_lock(lock);
871 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
872 PFID(&ll_i2info(inode)->lli_fid), cancelled);
875 ldlm_cli_cancel(&och->och_lease_handle, 0);
876 if (lease_broken != NULL)
877 *lease_broken = cancelled;
879 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
884 /* Fills the obdo with the attributes for the lsm */
885 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
886 struct obd_capa *capa, struct obdo *obdo,
889 struct ptlrpc_request_set *set;
890 struct obd_info oinfo = { { { 0 } } };
895 LASSERT(lsm != NULL);
899 oinfo.oi_oa->o_oi = lsm->lsm_oi;
900 oinfo.oi_oa->o_mode = S_IFREG;
901 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
902 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
903 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
904 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
905 OBD_MD_FLGROUP | OBD_MD_FLDATAVERSION;
906 oinfo.oi_capa = capa;
907 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
908 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
909 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
910 if (dv_flags & LL_DV_WR_FLUSH)
911 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
914 set = ptlrpc_prep_set();
916 CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM);
919 rc = obd_getattr_async(exp, &oinfo, set);
921 rc = ptlrpc_set_wait(set);
922 ptlrpc_set_destroy(set);
925 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
926 OBD_MD_FLATIME | OBD_MD_FLMTIME |
927 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
928 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
929 if (dv_flags & LL_DV_WR_FLUSH &&
930 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
931 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
937 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
939 struct ll_inode_info *lli = ll_i2info(inode);
940 struct cl_object *obj = lli->lli_clob;
941 struct cl_attr *attr = ccc_env_thread_attr(env);
949 ll_inode_size_lock(inode);
951 /* merge timestamps the most recently obtained from mds with
952 timestamps obtained from osts */
953 LTIME_S(inode->i_atime) = lli->lli_atime;
954 LTIME_S(inode->i_mtime) = lli->lli_mtime;
955 LTIME_S(inode->i_ctime) = lli->lli_ctime;
957 atime = LTIME_S(inode->i_atime);
958 mtime = LTIME_S(inode->i_mtime);
959 ctime = LTIME_S(inode->i_ctime);
961 cl_object_attr_lock(obj);
962 rc = cl_object_attr_get(env, obj, attr);
963 cl_object_attr_unlock(obj);
966 GOTO(out_size_unlock, rc);
968 if (atime < attr->cat_atime)
969 atime = attr->cat_atime;
971 if (ctime < attr->cat_ctime)
972 ctime = attr->cat_ctime;
974 if (mtime < attr->cat_mtime)
975 mtime = attr->cat_mtime;
977 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
978 PFID(&lli->lli_fid), attr->cat_size);
980 i_size_write(inode, attr->cat_size);
981 inode->i_blocks = attr->cat_blocks;
983 LTIME_S(inode->i_atime) = atime;
984 LTIME_S(inode->i_mtime) = mtime;
985 LTIME_S(inode->i_ctime) = ctime;
988 ll_inode_size_unlock(inode);
993 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
996 struct obdo obdo = { 0 };
999 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0);
1001 st->st_size = obdo.o_size;
1002 st->st_blocks = obdo.o_blocks;
1003 st->st_mtime = obdo.o_mtime;
1004 st->st_atime = obdo.o_atime;
1005 st->st_ctime = obdo.o_ctime;
1010 static bool file_is_noatime(const struct file *file)
1012 const struct vfsmount *mnt = file->f_path.mnt;
1013 const struct inode *inode = file->f_path.dentry->d_inode;
1015 /* Adapted from file_accessed() and touch_atime().*/
1016 if (file->f_flags & O_NOATIME)
1019 if (inode->i_flags & S_NOATIME)
1022 if (IS_NOATIME(inode))
1025 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1028 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1031 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1037 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1039 struct inode *inode = file->f_dentry->d_inode;
1041 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1043 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1044 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1045 file->f_flags & O_DIRECT ||
1048 io->ci_obj = ll_i2info(inode)->lli_clob;
1049 io->ci_lockreq = CILR_MAYBE;
1050 if (ll_file_nolock(file)) {
1051 io->ci_lockreq = CILR_NEVER;
1052 io->ci_no_srvlock = 1;
1053 } else if (file->f_flags & O_APPEND) {
1054 io->ci_lockreq = CILR_MANDATORY;
1057 io->ci_noatime = file_is_noatime(file);
1061 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1062 struct file *file, enum cl_io_type iot,
1063 loff_t *ppos, size_t count)
1065 struct inode *inode = file->f_dentry->d_inode;
1066 struct ll_inode_info *lli = ll_i2info(inode);
1068 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1071 struct range_lock range;
1074 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1075 file->f_dentry->d_name.name, iot, *ppos, count);
1078 io = ccc_env_thread_io(env);
1079 ll_io_init(io, file, iot == CIT_WRITE);
1081 /* The maximum Lustre file size is variable, based on the
1082 * OST maximum object size and number of stripes. This
1083 * needs another check in addition to the VFS checks earlier. */
1084 end = (io->u.ci_wr.wr_append ? i_size_read(inode) : *ppos) + count;
1085 if (end > ll_file_maxbytes(inode)) {
1087 CDEBUG(D_INODE, "%s: file "DFID" offset %llu > maxbytes "LPU64
1088 ": rc = %zd\n", ll_get_fsname(inode->i_sb, NULL, 0),
1089 PFID(&lli->lli_fid), end, ll_file_maxbytes(inode),
1094 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1095 struct vvp_io *vio = vvp_env_io(env);
1096 bool range_locked = false;
1098 if (file->f_flags & O_APPEND)
1099 range_lock_init(&range, 0, LUSTRE_EOF);
1101 range_lock_init(&range, *ppos, *ppos + count - 1);
1103 vio->vui_fd = LUSTRE_FPRIVATE(file);
1104 vio->vui_io_subtype = args->via_io_subtype;
1106 switch (vio->vui_io_subtype) {
1108 vio->vui_iov = args->u.normal.via_iov;
1109 vio->vui_nrsegs = args->u.normal.via_nrsegs;
1110 vio->vui_tot_nrsegs = vio->vui_nrsegs;
1111 vio->vui_iocb = args->u.normal.via_iocb;
1112 if ((iot == CIT_WRITE) &&
1113 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1114 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1116 result = range_lock(&lli->lli_write_tree,
1121 range_locked = true;
1123 down_read(&lli->lli_trunc_sem);
1126 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1127 vio->u.splice.vui_flags = args->u.splice.via_flags;
1130 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1134 ll_cl_add(file, env, io);
1135 result = cl_io_loop(env, io);
1136 ll_cl_remove(file, env);
1138 if (args->via_io_subtype == IO_NORMAL)
1139 up_read(&lli->lli_trunc_sem);
1141 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1143 range_unlock(&lli->lli_write_tree, &range);
1146 /* cl_io_rw_init() handled IO */
1147 result = io->ci_result;
1150 if (io->ci_nob > 0) {
1151 result = io->ci_nob;
1152 *ppos = io->u.ci_wr.wr.crw_pos;
1156 cl_io_fini(env, io);
1157 /* If any bit been read/written (result != 0), we just return
1158 * short read/write instead of restart io. */
1159 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1160 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zu\n",
1161 iot == CIT_READ ? "read" : "write",
1162 file->f_dentry->d_name.name, *ppos, count);
1163 LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
1167 if (iot == CIT_READ) {
1169 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1170 LPROC_LL_READ_BYTES, result);
1171 } else if (iot == CIT_WRITE) {
1173 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1174 LPROC_LL_WRITE_BYTES, result);
1175 fd->fd_write_failed = false;
1176 } else if (result != -ERESTARTSYS) {
1177 fd->fd_write_failed = true;
1180 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1187 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1189 static int ll_file_get_iov_count(const struct iovec *iov,
1190 unsigned long *nr_segs, size_t *count)
1195 for (seg = 0; seg < *nr_segs; seg++) {
1196 const struct iovec *iv = &iov[seg];
1199 * If any segment has a negative length, or the cumulative
1200 * length ever wraps negative then return -EINVAL.
1203 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1205 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1210 cnt -= iv->iov_len; /* This segment is no good */
1217 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1218 unsigned long nr_segs, loff_t pos)
1221 struct vvp_io_args *args;
1227 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1231 env = cl_env_get(&refcheck);
1233 RETURN(PTR_ERR(env));
1235 args = vvp_env_args(env, IO_NORMAL);
1236 args->u.normal.via_iov = (struct iovec *)iov;
1237 args->u.normal.via_nrsegs = nr_segs;
1238 args->u.normal.via_iocb = iocb;
1240 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1241 &iocb->ki_pos, count);
1242 cl_env_put(env, &refcheck);
1246 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1250 struct iovec *local_iov;
1251 struct kiocb *kiocb;
1256 env = cl_env_get(&refcheck);
1258 RETURN(PTR_ERR(env));
1260 local_iov = &vvp_env_info(env)->vti_local_iov;
1261 kiocb = &vvp_env_info(env)->vti_kiocb;
1262 local_iov->iov_base = (void __user *)buf;
1263 local_iov->iov_len = count;
1264 init_sync_kiocb(kiocb, file);
1265 kiocb->ki_pos = *ppos;
1266 #ifdef HAVE_KIOCB_KI_LEFT
1267 kiocb->ki_left = count;
1269 kiocb->ki_nbytes = count;
1272 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1273 *ppos = kiocb->ki_pos;
1275 cl_env_put(env, &refcheck);
1280 * Write to a file (through the page cache).
1283 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1284 unsigned long nr_segs, loff_t pos)
1287 struct vvp_io_args *args;
1293 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1297 env = cl_env_get(&refcheck);
1299 RETURN(PTR_ERR(env));
1301 args = vvp_env_args(env, IO_NORMAL);
1302 args->u.normal.via_iov = (struct iovec *)iov;
1303 args->u.normal.via_nrsegs = nr_segs;
1304 args->u.normal.via_iocb = iocb;
1306 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1307 &iocb->ki_pos, count);
1308 cl_env_put(env, &refcheck);
1312 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1313 size_t count, loff_t *ppos)
1316 struct iovec *local_iov;
1317 struct kiocb *kiocb;
1322 env = cl_env_get(&refcheck);
1324 RETURN(PTR_ERR(env));
1326 local_iov = &vvp_env_info(env)->vti_local_iov;
1327 kiocb = &vvp_env_info(env)->vti_kiocb;
1328 local_iov->iov_base = (void __user *)buf;
1329 local_iov->iov_len = count;
1330 init_sync_kiocb(kiocb, file);
1331 kiocb->ki_pos = *ppos;
1332 #ifdef HAVE_KIOCB_KI_LEFT
1333 kiocb->ki_left = count;
1335 kiocb->ki_nbytes = count;
1338 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1339 *ppos = kiocb->ki_pos;
1341 cl_env_put(env, &refcheck);
1346 * Send file content (through pagecache) somewhere with helper
1348 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1349 struct pipe_inode_info *pipe, size_t count,
1353 struct vvp_io_args *args;
1358 env = cl_env_get(&refcheck);
1360 RETURN(PTR_ERR(env));
1362 args = vvp_env_args(env, IO_SPLICE);
1363 args->u.splice.via_pipe = pipe;
1364 args->u.splice.via_flags = flags;
1366 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1367 cl_env_put(env, &refcheck);
1371 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1372 __u64 flags, struct lov_user_md *lum,
1375 struct lov_stripe_md *lsm = NULL;
1376 struct lookup_intent oit = {
1378 .it_flags = flags | MDS_OPEN_BY_FID,
1383 lsm = ccc_inode_lsm_get(inode);
1385 ccc_inode_lsm_put(inode, lsm);
1386 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1387 PFID(ll_inode2fid(inode)));
1388 GOTO(out, rc = -EEXIST);
1391 ll_inode_size_lock(inode);
1392 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1394 GOTO(out_unlock, rc);
1396 rc = oit.d.lustre.it_status;
1398 GOTO(out_unlock, rc);
1400 ll_release_openhandle(file->f_dentry, &oit);
1403 ll_inode_size_unlock(inode);
1404 ll_intent_release(&oit);
1405 ccc_inode_lsm_put(inode, lsm);
1407 cl_lov_delay_create_clear(&file->f_flags);
1412 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1413 struct lov_mds_md **lmmp, int *lmm_size,
1414 struct ptlrpc_request **request)
1416 struct ll_sb_info *sbi = ll_i2sbi(inode);
1417 struct mdt_body *body;
1418 struct lov_mds_md *lmm = NULL;
1419 struct ptlrpc_request *req = NULL;
1420 struct md_op_data *op_data;
1423 rc = ll_get_default_mdsize(sbi, &lmmsize);
1427 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1428 strlen(filename), lmmsize,
1429 LUSTRE_OPC_ANY, NULL);
1430 if (IS_ERR(op_data))
1431 RETURN(PTR_ERR(op_data));
1433 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1434 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1435 ll_finish_md_op_data(op_data);
1437 CDEBUG(D_INFO, "md_getattr_name failed "
1438 "on %s: rc %d\n", filename, rc);
1442 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1443 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1445 lmmsize = body->mbo_eadatasize;
1447 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1449 GOTO(out, rc = -ENODATA);
1452 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1453 LASSERT(lmm != NULL);
1455 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1456 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1457 GOTO(out, rc = -EPROTO);
1461 * This is coming from the MDS, so is probably in
1462 * little endian. We convert it to host endian before
1463 * passing it to userspace.
1465 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1468 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1469 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1472 /* if function called for directory - we should
1473 * avoid swab not existent lsm objects */
1474 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1475 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1476 if (S_ISREG(body->mbo_mode))
1477 lustre_swab_lov_user_md_objects(
1478 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1480 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1481 lustre_swab_lov_user_md_v3(
1482 (struct lov_user_md_v3 *)lmm);
1483 if (S_ISREG(body->mbo_mode))
1484 lustre_swab_lov_user_md_objects(
1485 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1492 *lmm_size = lmmsize;
1497 static int ll_lov_setea(struct inode *inode, struct file *file,
1500 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1501 struct lov_user_md *lump;
1502 int lum_size = sizeof(struct lov_user_md) +
1503 sizeof(struct lov_user_ost_data);
1507 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1510 OBD_ALLOC_LARGE(lump, lum_size);
1514 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1515 OBD_FREE_LARGE(lump, lum_size);
1519 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1521 OBD_FREE_LARGE(lump, lum_size);
1525 static int ll_file_getstripe(struct inode *inode,
1526 struct lov_user_md __user *lum)
1533 env = cl_env_get(&refcheck);
1535 RETURN(PTR_ERR(env));
1537 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1538 cl_env_put(env, &refcheck);
1542 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1545 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1546 struct lov_user_md *klum;
1548 __u64 flags = FMODE_WRITE;
1551 rc = ll_copy_user_md(lum, &klum);
1556 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1560 put_user(0, &lum->lmm_stripe_count);
1562 ll_layout_refresh(inode, &gen);
1563 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1566 OBD_FREE(klum, lum_size);
1571 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1573 struct ll_inode_info *lli = ll_i2info(inode);
1574 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1575 struct ccc_grouplock grouplock;
1580 CWARN("group id for group lock must not be 0\n");
1584 if (ll_file_nolock(file))
1585 RETURN(-EOPNOTSUPP);
1587 spin_lock(&lli->lli_lock);
1588 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1589 CWARN("group lock already existed with gid %lu\n",
1590 fd->fd_grouplock.cg_gid);
1591 spin_unlock(&lli->lli_lock);
1594 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1595 spin_unlock(&lli->lli_lock);
1597 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1598 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1602 spin_lock(&lli->lli_lock);
1603 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1604 spin_unlock(&lli->lli_lock);
1605 CERROR("another thread just won the race\n");
1606 cl_put_grouplock(&grouplock);
1610 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1611 fd->fd_grouplock = grouplock;
1612 spin_unlock(&lli->lli_lock);
1614 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1618 static int ll_put_grouplock(struct inode *inode, struct file *file,
1621 struct ll_inode_info *lli = ll_i2info(inode);
1622 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1623 struct ccc_grouplock grouplock;
1626 spin_lock(&lli->lli_lock);
1627 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1628 spin_unlock(&lli->lli_lock);
1629 CWARN("no group lock held\n");
1632 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1634 if (fd->fd_grouplock.cg_gid != arg) {
1635 CWARN("group lock %lu doesn't match current id %lu\n",
1636 arg, fd->fd_grouplock.cg_gid);
1637 spin_unlock(&lli->lli_lock);
1641 grouplock = fd->fd_grouplock;
1642 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1643 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1644 spin_unlock(&lli->lli_lock);
1646 cl_put_grouplock(&grouplock);
1647 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1652 * Close inode open handle
1654 * \param dentry [in] dentry which contains the inode
1655 * \param it [in,out] intent which contains open info and result
1658 * \retval <0 failure
1660 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1662 struct inode *inode = dentry->d_inode;
1663 struct obd_client_handle *och;
1669 /* Root ? Do nothing. */
1670 if (dentry->d_inode->i_sb->s_root == dentry)
1673 /* No open handle to close? Move away */
1674 if (!it_disposition(it, DISP_OPEN_OPEN))
1677 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1679 OBD_ALLOC(och, sizeof(*och));
1681 GOTO(out, rc = -ENOMEM);
1683 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1685 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1688 /* this one is in place of ll_file_open */
1689 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1690 ptlrpc_req_finished(it->d.lustre.it_data);
1691 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1697 * Get size for inode for which FIEMAP mapping is requested.
1698 * Make the FIEMAP get_info call and returns the result.
1699 * \param fiemap kernel buffer to hold extens
1700 * \param num_bytes kernel buffer size
1702 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1708 struct ll_fiemap_info_key fmkey = { .name = KEY_FIEMAP, };
1711 /* Checks for fiemap flags */
1712 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1713 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1717 /* Check for FIEMAP_FLAG_SYNC */
1718 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1719 rc = filemap_fdatawrite(inode->i_mapping);
1724 env = cl_env_get(&refcheck);
1726 RETURN(PTR_ERR(env));
1728 if (i_size_read(inode) == 0) {
1729 rc = ll_glimpse_size(inode);
1734 fmkey.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1735 obdo_from_inode(&fmkey.oa, inode, OBD_MD_FLSIZE);
1736 obdo_set_parent_fid(&fmkey.oa, &ll_i2info(inode)->lli_fid);
1738 /* If filesize is 0, then there would be no objects for mapping */
1739 if (fmkey.oa.o_size == 0) {
1740 fiemap->fm_mapped_extents = 0;
1744 fmkey.fiemap = *fiemap;
1746 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1747 &fmkey, fiemap, &num_bytes);
1749 cl_env_put(env, &refcheck);
1753 int ll_fid2path(struct inode *inode, void __user *arg)
1755 struct obd_export *exp = ll_i2mdexp(inode);
1756 const struct getinfo_fid2path __user *gfin = arg;
1758 struct getinfo_fid2path *gfout;
1764 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1765 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1768 /* Only need to get the buflen */
1769 if (get_user(pathlen, &gfin->gf_pathlen))
1772 if (pathlen > PATH_MAX)
1775 outsize = sizeof(*gfout) + pathlen;
1776 OBD_ALLOC(gfout, outsize);
1780 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1781 GOTO(gf_free, rc = -EFAULT);
1783 /* Call mdc_iocontrol */
1784 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1788 if (copy_to_user(arg, gfout, outsize))
1792 OBD_FREE(gfout, outsize);
1796 static int ll_ioctl_fiemap(struct inode *inode, struct fiemap __user *arg)
1798 struct fiemap *fiemap;
1804 /* Get the extent count so we can calculate the size of
1805 * required fiemap buffer */
1806 if (get_user(extent_count, &arg->fm_extent_count))
1810 (SIZE_MAX - sizeof(*fiemap)) / sizeof(struct ll_fiemap_extent))
1812 num_bytes = sizeof(*fiemap) + (extent_count *
1813 sizeof(struct ll_fiemap_extent));
1815 OBD_ALLOC_LARGE(fiemap, num_bytes);
1819 /* get the fiemap value */
1820 if (copy_from_user(fiemap, arg, sizeof(*fiemap)))
1821 GOTO(error, rc = -EFAULT);
1823 /* If fm_extent_count is non-zero, read the first extent since
1824 * it is used to calculate end_offset and device from previous
1826 if (extent_count != 0) {
1827 if (copy_from_user(&fiemap->fm_extents[0],
1828 (char __user *)arg + sizeof(*fiemap),
1829 sizeof(struct ll_fiemap_extent)))
1830 GOTO(error, rc = -EFAULT);
1833 rc = ll_do_fiemap(inode, fiemap, num_bytes);
1837 ret_bytes = sizeof(struct fiemap);
1839 if (extent_count != 0)
1840 ret_bytes += (fiemap->fm_mapped_extents *
1841 sizeof(struct ll_fiemap_extent));
1843 if (copy_to_user((void __user *)arg, fiemap, ret_bytes))
1847 OBD_FREE_LARGE(fiemap, num_bytes);
1852 * Read the data_version for inode.
1854 * This value is computed using stripe object version on OST.
1855 * Version is computed using server side locking.
1857 * @param sync if do sync on the OST side;
1859 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1860 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1862 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1864 struct lov_stripe_md *lsm = NULL;
1865 struct ll_sb_info *sbi = ll_i2sbi(inode);
1866 struct obdo *obdo = NULL;
1870 /* If no stripe, we consider version is 0. */
1871 lsm = ccc_inode_lsm_get(inode);
1872 if (!lsm_has_objects(lsm)) {
1874 CDEBUG(D_INODE, "No object for inode\n");
1878 OBD_ALLOC_PTR(obdo);
1880 GOTO(out, rc = -ENOMEM);
1882 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, flags);
1884 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1887 *data_version = obdo->o_data_version;
1893 ccc_inode_lsm_put(inode, lsm);
1898 * Trigger a HSM release request for the provided inode.
1900 int ll_hsm_release(struct inode *inode)
1902 struct cl_env_nest nest;
1904 struct obd_client_handle *och = NULL;
1905 __u64 data_version = 0;
1909 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1910 ll_get_fsname(inode->i_sb, NULL, 0),
1911 PFID(&ll_i2info(inode)->lli_fid));
1913 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1915 GOTO(out, rc = PTR_ERR(och));
1917 /* Grab latest data_version and [am]time values */
1918 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1922 env = cl_env_nested_get(&nest);
1924 GOTO(out, rc = PTR_ERR(env));
1926 ll_merge_attr(env, inode);
1927 cl_env_nested_put(&nest, env);
1929 /* Release the file.
1930 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1931 * we still need it to pack l_remote_handle to MDT. */
1932 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1938 if (och != NULL && !IS_ERR(och)) /* close the file */
1939 ll_lease_close(och, inode, NULL);
1944 struct ll_swap_stack {
1945 struct iattr ia1, ia2;
1947 struct inode *inode1, *inode2;
1948 bool check_dv1, check_dv2;
1951 static int ll_swap_layouts(struct file *file1, struct file *file2,
1952 struct lustre_swap_layouts *lsl)
1954 struct mdc_swap_layouts msl;
1955 struct md_op_data *op_data;
1958 struct ll_swap_stack *llss = NULL;
1961 OBD_ALLOC_PTR(llss);
1965 llss->inode1 = file1->f_dentry->d_inode;
1966 llss->inode2 = file2->f_dentry->d_inode;
1968 if (!S_ISREG(llss->inode2->i_mode))
1969 GOTO(free, rc = -EINVAL);
1971 if (inode_permission(llss->inode1, MAY_WRITE) ||
1972 inode_permission(llss->inode2, MAY_WRITE))
1973 GOTO(free, rc = -EPERM);
1975 if (llss->inode2->i_sb != llss->inode1->i_sb)
1976 GOTO(free, rc = -EXDEV);
1978 /* we use 2 bool because it is easier to swap than 2 bits */
1979 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1980 llss->check_dv1 = true;
1982 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1983 llss->check_dv2 = true;
1985 /* we cannot use lsl->sl_dvX directly because we may swap them */
1986 llss->dv1 = lsl->sl_dv1;
1987 llss->dv2 = lsl->sl_dv2;
1989 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1990 if (rc == 0) /* same file, done! */
1993 if (rc < 0) { /* sequentialize it */
1994 swap(llss->inode1, llss->inode2);
1996 swap(llss->dv1, llss->dv2);
1997 swap(llss->check_dv1, llss->check_dv2);
2001 if (gid != 0) { /* application asks to flush dirty cache */
2002 rc = ll_get_grouplock(llss->inode1, file1, gid);
2006 rc = ll_get_grouplock(llss->inode2, file2, gid);
2008 ll_put_grouplock(llss->inode1, file1, gid);
2013 /* to be able to restore mtime and atime after swap
2014 * we need to first save them */
2016 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2017 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2018 llss->ia1.ia_atime = llss->inode1->i_atime;
2019 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2020 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2021 llss->ia2.ia_atime = llss->inode2->i_atime;
2022 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2025 /* ultimate check, before swaping the layouts we check if
2026 * dataversion has changed (if requested) */
2027 if (llss->check_dv1) {
2028 rc = ll_data_version(llss->inode1, &dv, 0);
2031 if (dv != llss->dv1)
2032 GOTO(putgl, rc = -EAGAIN);
2035 if (llss->check_dv2) {
2036 rc = ll_data_version(llss->inode2, &dv, 0);
2039 if (dv != llss->dv2)
2040 GOTO(putgl, rc = -EAGAIN);
2043 /* struct md_op_data is used to send the swap args to the mdt
2044 * only flags is missing, so we use struct mdc_swap_layouts
2045 * through the md_op_data->op_data */
2046 /* flags from user space have to be converted before they are send to
2047 * server, no flag is sent today, they are only used on the client */
2050 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2051 0, LUSTRE_OPC_ANY, &msl);
2052 if (IS_ERR(op_data))
2053 GOTO(free, rc = PTR_ERR(op_data));
2055 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2056 sizeof(*op_data), op_data, NULL);
2057 ll_finish_md_op_data(op_data);
2061 ll_put_grouplock(llss->inode2, file2, gid);
2062 ll_put_grouplock(llss->inode1, file1, gid);
2065 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2069 /* clear useless flags */
2070 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2071 llss->ia1.ia_valid &= ~ATTR_MTIME;
2072 llss->ia2.ia_valid &= ~ATTR_MTIME;
2075 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2076 llss->ia1.ia_valid &= ~ATTR_ATIME;
2077 llss->ia2.ia_valid &= ~ATTR_ATIME;
2080 /* update time if requested */
2082 if (llss->ia2.ia_valid != 0) {
2083 mutex_lock(&llss->inode1->i_mutex);
2084 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2085 mutex_unlock(&llss->inode1->i_mutex);
2088 if (llss->ia1.ia_valid != 0) {
2091 mutex_lock(&llss->inode2->i_mutex);
2092 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2093 mutex_unlock(&llss->inode2->i_mutex);
2105 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2107 struct md_op_data *op_data;
2111 /* Detect out-of range masks */
2112 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2115 /* Non-root users are forbidden to set or clear flags which are
2116 * NOT defined in HSM_USER_MASK. */
2117 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2118 !cfs_capable(CFS_CAP_SYS_ADMIN))
2121 /* Detect out-of range archive id */
2122 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2123 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2126 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2127 LUSTRE_OPC_ANY, hss);
2128 if (IS_ERR(op_data))
2129 RETURN(PTR_ERR(op_data));
2131 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2132 sizeof(*op_data), op_data, NULL);
2134 ll_finish_md_op_data(op_data);
2139 static int ll_hsm_import(struct inode *inode, struct file *file,
2140 struct hsm_user_import *hui)
2142 struct hsm_state_set *hss = NULL;
2143 struct iattr *attr = NULL;
2147 if (!S_ISREG(inode->i_mode))
2153 GOTO(out, rc = -ENOMEM);
2155 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2156 hss->hss_archive_id = hui->hui_archive_id;
2157 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2158 rc = ll_hsm_state_set(inode, hss);
2162 OBD_ALLOC_PTR(attr);
2164 GOTO(out, rc = -ENOMEM);
2166 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2167 attr->ia_mode |= S_IFREG;
2168 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2169 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2170 attr->ia_size = hui->hui_size;
2171 attr->ia_mtime.tv_sec = hui->hui_mtime;
2172 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2173 attr->ia_atime.tv_sec = hui->hui_atime;
2174 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2176 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2177 ATTR_UID | ATTR_GID |
2178 ATTR_MTIME | ATTR_MTIME_SET |
2179 ATTR_ATIME | ATTR_ATIME_SET;
2181 mutex_lock(&inode->i_mutex);
2183 rc = ll_setattr_raw(file->f_dentry, attr, true);
2187 mutex_unlock(&inode->i_mutex);
2199 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2201 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2202 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2206 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2208 struct inode *inode = file->f_dentry->d_inode;
2209 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2213 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2214 PFID(ll_inode2fid(inode)), inode, cmd);
2215 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2217 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2218 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2222 case LL_IOC_GETFLAGS:
2223 /* Get the current value of the file flags */
2224 return put_user(fd->fd_flags, (int __user *)arg);
2225 case LL_IOC_SETFLAGS:
2226 case LL_IOC_CLRFLAGS:
2227 /* Set or clear specific file flags */
2228 /* XXX This probably needs checks to ensure the flags are
2229 * not abused, and to handle any flag side effects.
2231 if (get_user(flags, (int __user *) arg))
2234 if (cmd == LL_IOC_SETFLAGS) {
2235 if ((flags & LL_FILE_IGNORE_LOCK) &&
2236 !(file->f_flags & O_DIRECT)) {
2237 CERROR("%s: unable to disable locking on "
2238 "non-O_DIRECT file\n", current->comm);
2242 fd->fd_flags |= flags;
2244 fd->fd_flags &= ~flags;
2247 case LL_IOC_LOV_SETSTRIPE:
2248 RETURN(ll_lov_setstripe(inode, file, arg));
2249 case LL_IOC_LOV_SETEA:
2250 RETURN(ll_lov_setea(inode, file, arg));
2251 case LL_IOC_LOV_SWAP_LAYOUTS: {
2253 struct lustre_swap_layouts lsl;
2255 if (copy_from_user(&lsl, (char __user *)arg,
2256 sizeof(struct lustre_swap_layouts)))
2259 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2262 file2 = fget(lsl.sl_fd);
2267 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2268 rc = ll_swap_layouts(file, file2, &lsl);
2272 case LL_IOC_LOV_GETSTRIPE:
2273 RETURN(ll_file_getstripe(inode,
2274 (struct lov_user_md __user *)arg));
2275 case FSFILT_IOC_FIEMAP:
2276 RETURN(ll_ioctl_fiemap(inode, (struct fiemap __user *)arg));
2277 case FSFILT_IOC_GETFLAGS:
2278 case FSFILT_IOC_SETFLAGS:
2279 RETURN(ll_iocontrol(inode, file, cmd, arg));
2280 case FSFILT_IOC_GETVERSION_OLD:
2281 case FSFILT_IOC_GETVERSION:
2282 RETURN(put_user(inode->i_generation, (int __user *)arg));
2283 case LL_IOC_GROUP_LOCK:
2284 RETURN(ll_get_grouplock(inode, file, arg));
2285 case LL_IOC_GROUP_UNLOCK:
2286 RETURN(ll_put_grouplock(inode, file, arg));
2287 case IOC_OBD_STATFS:
2288 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2290 /* We need to special case any other ioctls we want to handle,
2291 * to send them to the MDS/OST as appropriate and to properly
2292 * network encode the arg field.
2293 case FSFILT_IOC_SETVERSION_OLD:
2294 case FSFILT_IOC_SETVERSION:
2296 case LL_IOC_FLUSHCTX:
2297 RETURN(ll_flush_ctx(inode));
2298 case LL_IOC_PATH2FID: {
2299 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2300 sizeof(struct lu_fid)))
2305 case LL_IOC_GETPARENT:
2306 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2308 case OBD_IOC_FID2PATH:
2309 RETURN(ll_fid2path(inode, (void __user *)arg));
2310 case LL_IOC_DATA_VERSION: {
2311 struct ioc_data_version idv;
2314 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2317 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2318 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2321 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2327 case LL_IOC_GET_MDTIDX: {
2330 mdtidx = ll_get_mdt_idx(inode);
2334 if (put_user((int)mdtidx, (int __user *)arg))
2339 case OBD_IOC_GETDTNAME:
2340 case OBD_IOC_GETMDNAME:
2341 RETURN(ll_get_obd_name(inode, cmd, arg));
2342 case LL_IOC_HSM_STATE_GET: {
2343 struct md_op_data *op_data;
2344 struct hsm_user_state *hus;
2351 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2352 LUSTRE_OPC_ANY, hus);
2353 if (IS_ERR(op_data)) {
2355 RETURN(PTR_ERR(op_data));
2358 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2361 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2364 ll_finish_md_op_data(op_data);
2368 case LL_IOC_HSM_STATE_SET: {
2369 struct hsm_state_set *hss;
2376 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2381 rc = ll_hsm_state_set(inode, hss);
2386 case LL_IOC_HSM_ACTION: {
2387 struct md_op_data *op_data;
2388 struct hsm_current_action *hca;
2395 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2396 LUSTRE_OPC_ANY, hca);
2397 if (IS_ERR(op_data)) {
2399 RETURN(PTR_ERR(op_data));
2402 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2405 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2408 ll_finish_md_op_data(op_data);
2412 case LL_IOC_SET_LEASE: {
2413 struct ll_inode_info *lli = ll_i2info(inode);
2414 struct obd_client_handle *och = NULL;
2419 case LL_LEASE_WRLCK:
2420 if (!(file->f_mode & FMODE_WRITE))
2422 fmode = FMODE_WRITE;
2424 case LL_LEASE_RDLCK:
2425 if (!(file->f_mode & FMODE_READ))
2429 case LL_LEASE_UNLCK:
2430 mutex_lock(&lli->lli_och_mutex);
2431 if (fd->fd_lease_och != NULL) {
2432 och = fd->fd_lease_och;
2433 fd->fd_lease_och = NULL;
2435 mutex_unlock(&lli->lli_och_mutex);
2440 fmode = och->och_flags;
2441 rc = ll_lease_close(och, inode, &lease_broken);
2448 RETURN(ll_lease_type_from_fmode(fmode));
2453 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2455 /* apply for lease */
2456 och = ll_lease_open(inode, file, fmode, 0);
2458 RETURN(PTR_ERR(och));
2461 mutex_lock(&lli->lli_och_mutex);
2462 if (fd->fd_lease_och == NULL) {
2463 fd->fd_lease_och = och;
2466 mutex_unlock(&lli->lli_och_mutex);
2468 /* impossible now that only excl is supported for now */
2469 ll_lease_close(och, inode, &lease_broken);
2474 case LL_IOC_GET_LEASE: {
2475 struct ll_inode_info *lli = ll_i2info(inode);
2476 struct ldlm_lock *lock = NULL;
2479 mutex_lock(&lli->lli_och_mutex);
2480 if (fd->fd_lease_och != NULL) {
2481 struct obd_client_handle *och = fd->fd_lease_och;
2483 lock = ldlm_handle2lock(&och->och_lease_handle);
2485 lock_res_and_lock(lock);
2486 if (!ldlm_is_cancel(lock))
2487 fmode = och->och_flags;
2489 unlock_res_and_lock(lock);
2490 LDLM_LOCK_PUT(lock);
2493 mutex_unlock(&lli->lli_och_mutex);
2495 RETURN(ll_lease_type_from_fmode(fmode));
2497 case LL_IOC_HSM_IMPORT: {
2498 struct hsm_user_import *hui;
2504 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2509 rc = ll_hsm_import(inode, file, hui);
2519 ll_iocontrol_call(inode, file, cmd, arg, &err))
2522 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2523 (void __user *)arg));
2528 #ifndef HAVE_FILE_LLSEEK_SIZE
2529 static inline loff_t
2530 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2532 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2534 if (offset > maxsize)
2537 if (offset != file->f_pos) {
2538 file->f_pos = offset;
2539 file->f_version = 0;
2545 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2546 loff_t maxsize, loff_t eof)
2548 struct inode *inode = file->f_dentry->d_inode;
2556 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2557 * position-querying operation. Avoid rewriting the "same"
2558 * f_pos value back to the file because a concurrent read(),
2559 * write() or lseek() might have altered it
2564 * f_lock protects against read/modify/write race with other
2565 * SEEK_CURs. Note that parallel writes and reads behave
2568 mutex_lock(&inode->i_mutex);
2569 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2570 mutex_unlock(&inode->i_mutex);
2574 * In the generic case the entire file is data, so as long as
2575 * offset isn't at the end of the file then the offset is data.
2582 * There is a virtual hole at the end of the file, so as long as
2583 * offset isn't i_size or larger, return i_size.
2591 return llseek_execute(file, offset, maxsize);
2595 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2597 struct inode *inode = file->f_dentry->d_inode;
2598 loff_t retval, eof = 0;
2601 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2602 (origin == SEEK_CUR) ? file->f_pos : 0);
2603 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2604 PFID(ll_inode2fid(inode)), inode, retval, retval,
2606 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2608 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2609 retval = ll_glimpse_size(inode);
2612 eof = i_size_read(inode);
2615 retval = ll_generic_file_llseek_size(file, offset, origin,
2616 ll_file_maxbytes(inode), eof);
2620 static int ll_flush(struct file *file, fl_owner_t id)
2622 struct inode *inode = file->f_dentry->d_inode;
2623 struct ll_inode_info *lli = ll_i2info(inode);
2624 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2627 LASSERT(!S_ISDIR(inode->i_mode));
2629 /* catch async errors that were recorded back when async writeback
2630 * failed for pages in this mapping. */
2631 rc = lli->lli_async_rc;
2632 lli->lli_async_rc = 0;
2633 if (lli->lli_clob != NULL) {
2634 err = lov_read_and_clear_async_rc(lli->lli_clob);
2639 /* The application has been told write failure already.
2640 * Do not report failure again. */
2641 if (fd->fd_write_failed)
2643 return rc ? -EIO : 0;
2647 * Called to make sure a portion of file has been written out.
2648 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2650 * Return how many pages have been written.
2652 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2653 enum cl_fsync_mode mode, int ignore_layout)
2655 struct cl_env_nest nest;
2658 struct obd_capa *capa = NULL;
2659 struct cl_fsync_io *fio;
2663 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2664 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2667 env = cl_env_nested_get(&nest);
2669 RETURN(PTR_ERR(env));
2671 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2673 io = ccc_env_thread_io(env);
2674 io->ci_obj = ll_i2info(inode)->lli_clob;
2675 io->ci_ignore_layout = ignore_layout;
2677 /* initialize parameters for sync */
2678 fio = &io->u.ci_fsync;
2679 fio->fi_capa = capa;
2680 fio->fi_start = start;
2682 fio->fi_fid = ll_inode2fid(inode);
2683 fio->fi_mode = mode;
2684 fio->fi_nr_written = 0;
2686 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2687 result = cl_io_loop(env, io);
2689 result = io->ci_result;
2691 result = fio->fi_nr_written;
2692 cl_io_fini(env, io);
2693 cl_env_nested_put(&nest, env);
2701 * When dentry is provided (the 'else' case), *file->f_dentry may be
2702 * null and dentry must be used directly rather than pulled from
2703 * *file->f_dentry as is done otherwise.
2706 #ifdef HAVE_FILE_FSYNC_4ARGS
2707 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2709 struct dentry *dentry = file->f_dentry;
2710 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2711 int ll_fsync(struct file *file, int datasync)
2713 struct dentry *dentry = file->f_dentry;
2715 loff_t end = LLONG_MAX;
2717 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2720 loff_t end = LLONG_MAX;
2722 struct inode *inode = dentry->d_inode;
2723 struct ll_inode_info *lli = ll_i2info(inode);
2724 struct ptlrpc_request *req;
2725 struct obd_capa *oc;
2729 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2730 PFID(ll_inode2fid(inode)), inode);
2731 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2733 #ifdef HAVE_FILE_FSYNC_4ARGS
2734 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2735 mutex_lock(&inode->i_mutex);
2737 /* fsync's caller has already called _fdata{sync,write}, we want
2738 * that IO to finish before calling the osc and mdc sync methods */
2739 rc = filemap_fdatawait(inode->i_mapping);
2742 /* catch async errors that were recorded back when async writeback
2743 * failed for pages in this mapping. */
2744 if (!S_ISDIR(inode->i_mode)) {
2745 err = lli->lli_async_rc;
2746 lli->lli_async_rc = 0;
2749 err = lov_read_and_clear_async_rc(lli->lli_clob);
2754 oc = ll_mdscapa_get(inode);
2755 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2761 ptlrpc_req_finished(req);
2763 if (S_ISREG(inode->i_mode)) {
2764 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2766 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2767 if (rc == 0 && err < 0)
2770 fd->fd_write_failed = true;
2772 fd->fd_write_failed = false;
2775 #ifdef HAVE_FILE_FSYNC_4ARGS
2776 mutex_unlock(&inode->i_mutex);
2782 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2784 struct inode *inode = file->f_dentry->d_inode;
2785 struct ll_sb_info *sbi = ll_i2sbi(inode);
2786 struct ldlm_enqueue_info einfo = {
2787 .ei_type = LDLM_FLOCK,
2788 .ei_cb_cp = ldlm_flock_completion_ast,
2789 .ei_cbdata = file_lock,
2791 struct md_op_data *op_data;
2792 struct lustre_handle lockh = {0};
2793 ldlm_policy_data_t flock = {{0}};
2794 int fl_type = file_lock->fl_type;
2800 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2801 PFID(ll_inode2fid(inode)), file_lock);
2803 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2805 if (file_lock->fl_flags & FL_FLOCK) {
2806 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2807 /* flocks are whole-file locks */
2808 flock.l_flock.end = OFFSET_MAX;
2809 /* For flocks owner is determined by the local file desctiptor*/
2810 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2811 } else if (file_lock->fl_flags & FL_POSIX) {
2812 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2813 flock.l_flock.start = file_lock->fl_start;
2814 flock.l_flock.end = file_lock->fl_end;
2818 flock.l_flock.pid = file_lock->fl_pid;
2820 /* Somewhat ugly workaround for svc lockd.
2821 * lockd installs custom fl_lmops->lm_compare_owner that checks
2822 * for the fl_owner to be the same (which it always is on local node
2823 * I guess between lockd processes) and then compares pid.
2824 * As such we assign pid to the owner field to make it all work,
2825 * conflict with normal locks is unlikely since pid space and
2826 * pointer space for current->files are not intersecting */
2827 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2828 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2832 einfo.ei_mode = LCK_PR;
2835 /* An unlock request may or may not have any relation to
2836 * existing locks so we may not be able to pass a lock handle
2837 * via a normal ldlm_lock_cancel() request. The request may even
2838 * unlock a byte range in the middle of an existing lock. In
2839 * order to process an unlock request we need all of the same
2840 * information that is given with a normal read or write record
2841 * lock request. To avoid creating another ldlm unlock (cancel)
2842 * message we'll treat a LCK_NL flock request as an unlock. */
2843 einfo.ei_mode = LCK_NL;
2846 einfo.ei_mode = LCK_PW;
2849 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2864 flags = LDLM_FL_BLOCK_NOWAIT;
2870 flags = LDLM_FL_TEST_LOCK;
2873 CERROR("unknown fcntl lock command: %d\n", cmd);
2877 /* Save the old mode so that if the mode in the lock changes we
2878 * can decrement the appropriate reader or writer refcount. */
2879 file_lock->fl_type = einfo.ei_mode;
2881 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2882 LUSTRE_OPC_ANY, NULL);
2883 if (IS_ERR(op_data))
2884 RETURN(PTR_ERR(op_data));
2886 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2887 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2888 flock.l_flock.pid, flags, einfo.ei_mode,
2889 flock.l_flock.start, flock.l_flock.end);
2891 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2894 /* Restore the file lock type if not TEST lock. */
2895 if (!(flags & LDLM_FL_TEST_LOCK))
2896 file_lock->fl_type = fl_type;
2898 if ((file_lock->fl_flags & FL_FLOCK) &&
2899 (rc == 0 || file_lock->fl_type == F_UNLCK))
2900 rc2 = flock_lock_file_wait(file, file_lock);
2901 if ((file_lock->fl_flags & FL_POSIX) &&
2902 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2903 !(flags & LDLM_FL_TEST_LOCK))
2904 rc2 = posix_lock_file_wait(file, file_lock);
2906 if (rc2 && file_lock->fl_type != F_UNLCK) {
2907 einfo.ei_mode = LCK_NL;
2908 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2913 ll_finish_md_op_data(op_data);
2918 int ll_get_fid_by_name(struct inode *parent, const char *name,
2919 int namelen, struct lu_fid *fid)
2921 struct md_op_data *op_data = NULL;
2922 struct mdt_body *body;
2923 struct ptlrpc_request *req;
2927 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2928 LUSTRE_OPC_ANY, NULL);
2929 if (IS_ERR(op_data))
2930 RETURN(PTR_ERR(op_data));
2932 op_data->op_valid = OBD_MD_FLID;
2933 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2934 ll_finish_md_op_data(op_data);
2938 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2940 GOTO(out_req, rc = -EFAULT);
2942 *fid = body->mbo_fid1;
2944 ptlrpc_req_finished(req);
2948 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2949 const char *name, int namelen)
2951 struct dentry *dchild = NULL;
2952 struct inode *child_inode = NULL;
2953 struct md_op_data *op_data;
2954 struct ptlrpc_request *request = NULL;
2959 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2960 name, PFID(ll_inode2fid(parent)), mdtidx);
2962 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2963 0, LUSTRE_OPC_ANY, NULL);
2964 if (IS_ERR(op_data))
2965 RETURN(PTR_ERR(op_data));
2967 /* Get child FID first */
2968 qstr.hash = full_name_hash(name, namelen);
2971 dchild = d_lookup(file->f_dentry, &qstr);
2972 if (dchild != NULL) {
2973 if (dchild->d_inode != NULL) {
2974 child_inode = igrab(dchild->d_inode);
2975 if (child_inode != NULL) {
2976 mutex_lock(&child_inode->i_mutex);
2977 op_data->op_fid3 = *ll_inode2fid(child_inode);
2978 ll_invalidate_aliases(child_inode);
2983 rc = ll_get_fid_by_name(parent, name, namelen,
2989 if (!fid_is_sane(&op_data->op_fid3)) {
2990 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
2991 ll_get_fsname(parent->i_sb, NULL, 0), name,
2992 PFID(&op_data->op_fid3));
2993 GOTO(out_free, rc = -EINVAL);
2996 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
3001 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
3002 PFID(&op_data->op_fid3), mdtidx);
3003 GOTO(out_free, rc = 0);
3006 op_data->op_mds = mdtidx;
3007 op_data->op_cli_flags = CLI_MIGRATE;
3008 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
3009 namelen, name, namelen, &request);
3011 ll_update_times(request, parent);
3013 ptlrpc_req_finished(request);
3018 if (child_inode != NULL) {
3019 clear_nlink(child_inode);
3020 mutex_unlock(&child_inode->i_mutex);
3024 ll_finish_md_op_data(op_data);
3029 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3037 * test if some locks matching bits and l_req_mode are acquired
3038 * - bits can be in different locks
3039 * - if found clear the common lock bits in *bits
3040 * - the bits not found, are kept in *bits
3042 * \param bits [IN] searched lock bits [IN]
3043 * \param l_req_mode [IN] searched lock mode
3044 * \retval boolean, true iff all bits are found
3046 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3048 struct lustre_handle lockh;
3049 ldlm_policy_data_t policy;
3050 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3051 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3060 fid = &ll_i2info(inode)->lli_fid;
3061 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3062 ldlm_lockname[mode]);
3064 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3065 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3066 policy.l_inodebits.bits = *bits & (1 << i);
3067 if (policy.l_inodebits.bits == 0)
3070 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3071 &policy, mode, &lockh)) {
3072 struct ldlm_lock *lock;
3074 lock = ldlm_handle2lock(&lockh);
3077 ~(lock->l_policy_data.l_inodebits.bits);
3078 LDLM_LOCK_PUT(lock);
3080 *bits &= ~policy.l_inodebits.bits;
3087 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3088 struct lustre_handle *lockh, __u64 flags,
3091 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3096 fid = &ll_i2info(inode)->lli_fid;
3097 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3099 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3100 fid, LDLM_IBITS, &policy, mode, lockh);
3105 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3107 /* Already unlinked. Just update nlink and return success */
3108 if (rc == -ENOENT) {
3110 /* This path cannot be hit for regular files unless in
3111 * case of obscure races, so no need to to validate
3113 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3115 } else if (rc != 0) {
3116 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3117 "%s: revalidate FID "DFID" error: rc = %d\n",
3118 ll_get_fsname(inode->i_sb, NULL, 0),
3119 PFID(ll_inode2fid(inode)), rc);
3125 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3127 struct inode *inode = dentry->d_inode;
3128 struct ptlrpc_request *req = NULL;
3129 struct obd_export *exp;
3133 LASSERT(inode != NULL);
3135 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3136 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3138 exp = ll_i2mdexp(inode);
3140 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3141 * But under CMD case, it caused some lock issues, should be fixed
3142 * with new CMD ibits lock. See bug 12718 */
3143 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3144 struct lookup_intent oit = { .it_op = IT_GETATTR };
3145 struct md_op_data *op_data;
3147 if (ibits == MDS_INODELOCK_LOOKUP)
3148 oit.it_op = IT_LOOKUP;
3150 /* Call getattr by fid, so do not provide name at all. */
3151 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3152 dentry->d_inode, NULL, 0, 0,
3153 LUSTRE_OPC_ANY, NULL);
3154 if (IS_ERR(op_data))
3155 RETURN(PTR_ERR(op_data));
3157 rc = md_intent_lock(exp, op_data, &oit, &req,
3158 &ll_md_blocking_ast, 0);
3159 ll_finish_md_op_data(op_data);
3161 rc = ll_inode_revalidate_fini(inode, rc);
3165 rc = ll_revalidate_it_finish(req, &oit, dentry);
3167 ll_intent_release(&oit);
3171 /* Unlinked? Unhash dentry, so it is not picked up later by
3172 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3173 here to preserve get_cwd functionality on 2.6.
3175 if (!dentry->d_inode->i_nlink)
3176 d_lustre_invalidate(dentry, 0);
3178 ll_lookup_finish_locks(&oit, dentry);
3179 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3180 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3181 u64 valid = OBD_MD_FLGETATTR;
3182 struct md_op_data *op_data;
3185 if (S_ISREG(inode->i_mode)) {
3186 rc = ll_get_default_mdsize(sbi, &ealen);
3189 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3192 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3193 0, ealen, LUSTRE_OPC_ANY,
3195 if (IS_ERR(op_data))
3196 RETURN(PTR_ERR(op_data));
3198 op_data->op_valid = valid;
3199 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3200 * capa for this inode. Because we only keep capas of dirs
3202 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3203 ll_finish_md_op_data(op_data);
3205 rc = ll_inode_revalidate_fini(inode, rc);
3209 rc = ll_prep_inode(&inode, req, NULL, NULL);
3212 ptlrpc_req_finished(req);
3216 static int ll_merge_md_attr(struct inode *inode)
3218 struct cl_attr attr = { 0 };
3221 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3222 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3223 &attr, ll_md_blocking_ast);
3227 set_nlink(inode, attr.cat_nlink);
3228 inode->i_blocks = attr.cat_blocks;
3229 i_size_write(inode, attr.cat_size);
3231 ll_i2info(inode)->lli_atime = attr.cat_atime;
3232 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3233 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3239 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3241 struct inode *inode = dentry->d_inode;
3245 rc = __ll_inode_revalidate(dentry, ibits);
3249 /* if object isn't regular file, don't validate size */
3250 if (!S_ISREG(inode->i_mode)) {
3251 if (S_ISDIR(inode->i_mode) &&
3252 ll_i2info(inode)->lli_lsm_md != NULL) {
3253 rc = ll_merge_md_attr(inode);
3258 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3259 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3260 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3262 /* In case of restore, the MDT has the right size and has
3263 * already send it back without granting the layout lock,
3264 * inode is up-to-date so glimpse is useless.
3265 * Also to glimpse we need the layout, in case of a running
3266 * restore the MDT holds the layout lock so the glimpse will
3267 * block up to the end of restore (getattr will block)
3269 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3270 rc = ll_glimpse_size(inode);
3275 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3277 struct inode *inode = de->d_inode;
3278 struct ll_sb_info *sbi = ll_i2sbi(inode);
3279 struct ll_inode_info *lli = ll_i2info(inode);
3282 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3283 MDS_INODELOCK_LOOKUP);
3284 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3289 stat->dev = inode->i_sb->s_dev;
3290 if (ll_need_32bit_api(sbi))
3291 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3293 stat->ino = inode->i_ino;
3294 stat->mode = inode->i_mode;
3295 stat->uid = inode->i_uid;
3296 stat->gid = inode->i_gid;
3297 stat->rdev = inode->i_rdev;
3298 stat->atime = inode->i_atime;
3299 stat->mtime = inode->i_mtime;
3300 stat->ctime = inode->i_ctime;
3301 stat->blksize = 1 << inode->i_blkbits;
3303 stat->nlink = inode->i_nlink;
3304 stat->size = i_size_read(inode);
3305 stat->blocks = inode->i_blocks;
3310 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3311 __u64 start, __u64 len)
3315 struct ll_user_fiemap *fiemap;
3316 unsigned int extent_count = fieinfo->fi_extents_max;
3318 num_bytes = sizeof(*fiemap) + (extent_count *
3319 sizeof(struct ll_fiemap_extent));
3320 OBD_ALLOC_LARGE(fiemap, num_bytes);
3325 fiemap->fm_flags = fieinfo->fi_flags;
3326 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3327 fiemap->fm_start = start;
3328 fiemap->fm_length = len;
3329 if (extent_count > 0)
3330 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3331 sizeof(struct ll_fiemap_extent));
3333 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3335 fieinfo->fi_flags = fiemap->fm_flags;
3336 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3337 if (extent_count > 0)
3338 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3339 fiemap->fm_mapped_extents *
3340 sizeof(struct ll_fiemap_extent));
3342 OBD_FREE_LARGE(fiemap, num_bytes);
3346 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3348 struct ll_inode_info *lli = ll_i2info(inode);
3349 struct posix_acl *acl = NULL;
3352 spin_lock(&lli->lli_lock);
3353 /* VFS' acl_permission_check->check_acl will release the refcount */
3354 acl = posix_acl_dup(lli->lli_posix_acl);
3355 spin_unlock(&lli->lli_lock);
3360 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3362 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3363 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3365 ll_check_acl(struct inode *inode, int mask)
3368 # ifdef CONFIG_FS_POSIX_ACL
3369 struct posix_acl *acl;
3373 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3374 if (flags & IPERM_FLAG_RCU)
3377 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3382 rc = posix_acl_permission(inode, acl, mask);
3383 posix_acl_release(acl);
3386 # else /* !CONFIG_FS_POSIX_ACL */
3388 # endif /* CONFIG_FS_POSIX_ACL */
3390 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3392 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3393 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3395 # ifdef HAVE_INODE_PERMISION_2ARGS
3396 int ll_inode_permission(struct inode *inode, int mask)
3398 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3403 struct ll_sb_info *sbi;
3404 struct root_squash_info *squash;
3405 struct cred *cred = NULL;
3406 const struct cred *old_cred = NULL;
3408 bool squash_id = false;
3411 #ifdef MAY_NOT_BLOCK
3412 if (mask & MAY_NOT_BLOCK)
3414 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3415 if (flags & IPERM_FLAG_RCU)
3419 /* as root inode are NOT getting validated in lookup operation,
3420 * need to do it before permission check. */
3422 if (inode == inode->i_sb->s_root->d_inode) {
3423 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3424 MDS_INODELOCK_LOOKUP);
3429 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3430 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3432 /* squash fsuid/fsgid if needed */
3433 sbi = ll_i2sbi(inode);
3434 squash = &sbi->ll_squash;
3435 if (unlikely(squash->rsi_uid != 0 &&
3436 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3437 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3441 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3442 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3443 squash->rsi_uid, squash->rsi_gid);
3445 /* update current process's credentials
3446 * and FS capability */
3447 cred = prepare_creds();
3451 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3452 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3453 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3454 if ((1 << cap) & CFS_CAP_FS_MASK)
3455 cap_lower(cred->cap_effective, cap);
3457 old_cred = override_creds(cred);
3460 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3462 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3463 rc = lustre_check_remote_perm(inode, mask);
3465 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3467 /* restore current process's credentials and FS capability */
3469 revert_creds(old_cred);
3476 /* -o localflock - only provides locally consistent flock locks */
3477 struct file_operations ll_file_operations = {
3478 .read = ll_file_read,
3479 .aio_read = ll_file_aio_read,
3480 .write = ll_file_write,
3481 .aio_write = ll_file_aio_write,
3482 .unlocked_ioctl = ll_file_ioctl,
3483 .open = ll_file_open,
3484 .release = ll_file_release,
3485 .mmap = ll_file_mmap,
3486 .llseek = ll_file_seek,
3487 .splice_read = ll_file_splice_read,
3492 struct file_operations ll_file_operations_flock = {
3493 .read = ll_file_read,
3494 .aio_read = ll_file_aio_read,
3495 .write = ll_file_write,
3496 .aio_write = ll_file_aio_write,
3497 .unlocked_ioctl = ll_file_ioctl,
3498 .open = ll_file_open,
3499 .release = ll_file_release,
3500 .mmap = ll_file_mmap,
3501 .llseek = ll_file_seek,
3502 .splice_read = ll_file_splice_read,
3505 .flock = ll_file_flock,
3506 .lock = ll_file_flock
3509 /* These are for -o noflock - to return ENOSYS on flock calls */
3510 struct file_operations ll_file_operations_noflock = {
3511 .read = ll_file_read,
3512 .aio_read = ll_file_aio_read,
3513 .write = ll_file_write,
3514 .aio_write = ll_file_aio_write,
3515 .unlocked_ioctl = ll_file_ioctl,
3516 .open = ll_file_open,
3517 .release = ll_file_release,
3518 .mmap = ll_file_mmap,
3519 .llseek = ll_file_seek,
3520 .splice_read = ll_file_splice_read,
3523 .flock = ll_file_noflock,
3524 .lock = ll_file_noflock
3527 struct inode_operations ll_file_inode_operations = {
3528 .setattr = ll_setattr,
3529 .getattr = ll_getattr,
3530 .permission = ll_inode_permission,
3531 .setxattr = ll_setxattr,
3532 .getxattr = ll_getxattr,
3533 .listxattr = ll_listxattr,
3534 .removexattr = ll_removexattr,
3535 .fiemap = ll_fiemap,
3536 #ifdef HAVE_IOP_GET_ACL
3537 .get_acl = ll_get_acl,
3541 /* dynamic ioctl number support routins */
3542 static struct llioc_ctl_data {
3543 struct rw_semaphore ioc_sem;
3544 struct list_head ioc_head;
3546 __RWSEM_INITIALIZER(llioc.ioc_sem),
3547 LIST_HEAD_INIT(llioc.ioc_head)
3552 struct list_head iocd_list;
3553 unsigned int iocd_size;
3554 llioc_callback_t iocd_cb;
3555 unsigned int iocd_count;
3556 unsigned int iocd_cmd[0];
3559 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3562 struct llioc_data *in_data = NULL;
3565 if (cb == NULL || cmd == NULL ||
3566 count > LLIOC_MAX_CMD || count < 0)
3569 size = sizeof(*in_data) + count * sizeof(unsigned int);
3570 OBD_ALLOC(in_data, size);
3571 if (in_data == NULL)
3574 memset(in_data, 0, sizeof(*in_data));
3575 in_data->iocd_size = size;
3576 in_data->iocd_cb = cb;
3577 in_data->iocd_count = count;
3578 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3580 down_write(&llioc.ioc_sem);
3581 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3582 up_write(&llioc.ioc_sem);
3587 void ll_iocontrol_unregister(void *magic)
3589 struct llioc_data *tmp;
3594 down_write(&llioc.ioc_sem);
3595 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3597 unsigned int size = tmp->iocd_size;
3599 list_del(&tmp->iocd_list);
3600 up_write(&llioc.ioc_sem);
3602 OBD_FREE(tmp, size);
3606 up_write(&llioc.ioc_sem);
3608 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3611 EXPORT_SYMBOL(ll_iocontrol_register);
3612 EXPORT_SYMBOL(ll_iocontrol_unregister);
3614 static enum llioc_iter
3615 ll_iocontrol_call(struct inode *inode, struct file *file,
3616 unsigned int cmd, unsigned long arg, int *rcp)
3618 enum llioc_iter ret = LLIOC_CONT;
3619 struct llioc_data *data;
3620 int rc = -EINVAL, i;
3622 down_read(&llioc.ioc_sem);
3623 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3624 for (i = 0; i < data->iocd_count; i++) {
3625 if (cmd != data->iocd_cmd[i])
3628 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3632 if (ret == LLIOC_STOP)
3635 up_read(&llioc.ioc_sem);
3642 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3644 struct ll_inode_info *lli = ll_i2info(inode);
3645 struct cl_env_nest nest;
3650 if (lli->lli_clob == NULL)
3653 env = cl_env_nested_get(&nest);
3655 RETURN(PTR_ERR(env));
3657 result = cl_conf_set(env, lli->lli_clob, conf);
3658 cl_env_nested_put(&nest, env);
3660 if (conf->coc_opc == OBJECT_CONF_SET) {
3661 struct ldlm_lock *lock = conf->coc_lock;
3663 LASSERT(lock != NULL);
3664 LASSERT(ldlm_has_layout(lock));
3666 struct lustre_md *md = conf->u.coc_md;
3667 __u32 gen = LL_LAYOUT_GEN_EMPTY;
3669 /* it can only be allowed to match after layout is
3670 * applied to inode otherwise false layout would be
3671 * seen. Applying layout shoud happen before dropping
3672 * the intent lock. */
3673 ldlm_lock_allow_match(lock);
3675 lli->lli_has_smd = lsm_has_objects(md->lsm);
3676 if (md->lsm != NULL)
3677 gen = md->lsm->lsm_layout_gen;
3680 DFID ": layout version change: %u -> %u\n",
3681 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3683 ll_layout_version_set(lli, gen);
3689 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3690 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3693 struct ll_sb_info *sbi = ll_i2sbi(inode);
3694 struct obd_capa *oc;
3695 struct ptlrpc_request *req;
3696 struct mdt_body *body;
3703 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3704 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3705 lock->l_lvb_data, lock->l_lvb_len);
3707 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3710 /* if layout lock was granted right away, the layout is returned
3711 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3712 * blocked and then granted via completion ast, we have to fetch
3713 * layout here. Please note that we can't use the LVB buffer in
3714 * completion AST because it doesn't have a large enough buffer */
3715 oc = ll_mdscapa_get(inode);
3716 rc = ll_get_default_mdsize(sbi, &lmmsize);
3718 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3719 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3725 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3727 GOTO(out, rc = -EPROTO);
3729 lmmsize = body->mbo_eadatasize;
3730 if (lmmsize == 0) /* empty layout */
3733 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3735 GOTO(out, rc = -EFAULT);
3737 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3738 if (lvbdata == NULL)
3739 GOTO(out, rc = -ENOMEM);
3741 memcpy(lvbdata, lmm, lmmsize);
3742 lock_res_and_lock(lock);
3743 if (lock->l_lvb_data != NULL)
3744 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3746 lock->l_lvb_data = lvbdata;
3747 lock->l_lvb_len = lmmsize;
3748 unlock_res_and_lock(lock);
3753 ptlrpc_req_finished(req);
3758 * Apply the layout to the inode. Layout lock is held and will be released
3761 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3762 struct inode *inode, __u32 *gen, bool reconf)
3764 struct ll_inode_info *lli = ll_i2info(inode);
3765 struct ll_sb_info *sbi = ll_i2sbi(inode);
3766 struct ldlm_lock *lock;
3767 struct lustre_md md = { NULL };
3768 struct cl_object_conf conf;
3771 bool wait_layout = false;
3774 LASSERT(lustre_handle_is_used(lockh));
3776 lock = ldlm_handle2lock(lockh);
3777 LASSERT(lock != NULL);
3778 LASSERT(ldlm_has_layout(lock));
3780 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d",
3781 PFID(&lli->lli_fid), inode, reconf);
3783 /* in case this is a caching lock and reinstate with new inode */
3784 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3786 lock_res_and_lock(lock);
3787 lvb_ready = ldlm_is_lvb_ready(lock);
3788 unlock_res_and_lock(lock);
3789 /* checking lvb_ready is racy but this is okay. The worst case is
3790 * that multi processes may configure the file on the same time. */
3792 if (lvb_ready || !reconf) {
3795 /* layout_gen must be valid if layout lock is not
3796 * cancelled and stripe has already set */
3797 *gen = ll_layout_version_get(lli);
3803 rc = ll_layout_fetch(inode, lock);
3807 /* for layout lock, lmm is returned in lock's lvb.
3808 * lvb_data is immutable if the lock is held so it's safe to access it
3809 * without res lock. See the description in ldlm_lock_decref_internal()
3810 * for the condition to free lvb_data of layout lock */
3811 if (lock->l_lvb_data != NULL) {
3812 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3813 lock->l_lvb_data, lock->l_lvb_len);
3815 *gen = LL_LAYOUT_GEN_EMPTY;
3817 *gen = md.lsm->lsm_layout_gen;
3820 CERROR("%s: file "DFID" unpackmd error: %d\n",
3821 ll_get_fsname(inode->i_sb, NULL, 0),
3822 PFID(&lli->lli_fid), rc);
3828 /* set layout to file. Unlikely this will fail as old layout was
3829 * surely eliminated */
3830 memset(&conf, 0, sizeof conf);
3831 conf.coc_opc = OBJECT_CONF_SET;
3832 conf.coc_inode = inode;
3833 conf.coc_lock = lock;
3834 conf.u.coc_md = &md;
3835 rc = ll_layout_conf(inode, &conf);
3838 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3840 /* refresh layout failed, need to wait */
3841 wait_layout = rc == -EBUSY;
3845 LDLM_LOCK_PUT(lock);
3846 ldlm_lock_decref(lockh, mode);
3848 /* wait for IO to complete if it's still being used. */
3850 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3851 ll_get_fsname(inode->i_sb, NULL, 0),
3852 PFID(&lli->lli_fid), inode);
3854 memset(&conf, 0, sizeof conf);
3855 conf.coc_opc = OBJECT_CONF_WAIT;
3856 conf.coc_inode = inode;
3857 rc = ll_layout_conf(inode, &conf);
3861 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3862 ll_get_fsname(inode->i_sb, NULL, 0),
3863 PFID(&lli->lli_fid), rc);
3869 * This function checks if there exists a LAYOUT lock on the client side,
3870 * or enqueues it if it doesn't have one in cache.
3872 * This function will not hold layout lock so it may be revoked any time after
3873 * this function returns. Any operations depend on layout should be redone
3876 * This function should be called before lov_io_init() to get an uptodate
3877 * layout version, the caller should save the version number and after IO
3878 * is finished, this function should be called again to verify that layout
3879 * is not changed during IO time.
3881 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3883 struct ll_inode_info *lli = ll_i2info(inode);
3884 struct ll_sb_info *sbi = ll_i2sbi(inode);
3885 struct md_op_data *op_data;
3886 struct lookup_intent it;
3887 struct lustre_handle lockh;
3889 struct ldlm_enqueue_info einfo = {
3890 .ei_type = LDLM_IBITS,
3892 .ei_cb_bl = &ll_md_blocking_ast,
3893 .ei_cb_cp = &ldlm_completion_ast,
3898 *gen = ll_layout_version_get(lli);
3899 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
3903 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3904 LASSERT(S_ISREG(inode->i_mode));
3906 /* take layout lock mutex to enqueue layout lock exclusively. */
3907 mutex_lock(&lli->lli_layout_mutex);
3910 /* mostly layout lock is caching on the local side, so try to match
3911 * it before grabbing layout lock mutex. */
3912 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3913 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3914 if (mode != 0) { /* hit cached lock */
3915 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3919 mutex_unlock(&lli->lli_layout_mutex);
3923 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3924 0, 0, LUSTRE_OPC_ANY, NULL);
3925 if (IS_ERR(op_data)) {
3926 mutex_unlock(&lli->lli_layout_mutex);
3927 RETURN(PTR_ERR(op_data));
3930 /* have to enqueue one */
3931 memset(&it, 0, sizeof(it));
3932 it.it_op = IT_LAYOUT;
3933 lockh.cookie = 0ULL;
3935 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3936 ll_get_fsname(inode->i_sb, NULL, 0),
3937 PFID(&lli->lli_fid), inode);
3939 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3940 if (it.d.lustre.it_data != NULL)
3941 ptlrpc_req_finished(it.d.lustre.it_data);
3942 it.d.lustre.it_data = NULL;
3944 ll_finish_md_op_data(op_data);
3946 mode = it.d.lustre.it_lock_mode;
3947 it.d.lustre.it_lock_mode = 0;
3948 ll_intent_drop_lock(&it);
3951 /* set lock data in case this is a new lock */
3952 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3953 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3957 mutex_unlock(&lli->lli_layout_mutex);
3963 * This function send a restore request to the MDT
3965 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3967 struct hsm_user_request *hur;
3971 len = sizeof(struct hsm_user_request) +
3972 sizeof(struct hsm_user_item);
3973 OBD_ALLOC(hur, len);
3977 hur->hur_request.hr_action = HUA_RESTORE;
3978 hur->hur_request.hr_archive_id = 0;
3979 hur->hur_request.hr_flags = 0;
3980 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3981 sizeof(hur->hur_user_item[0].hui_fid));
3982 hur->hur_user_item[0].hui_extent.offset = offset;
3983 hur->hur_user_item[0].hui_extent.length = length;
3984 hur->hur_request.hr_itemcount = 1;
3985 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,