4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
50 #include <lustre_ioctl.h>
52 #include "cl_object.h"
55 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
57 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
60 static enum llioc_iter
61 ll_iocontrol_call(struct inode *inode, struct file *file,
62 unsigned int cmd, unsigned long arg, int *rcp);
64 static struct ll_file_data *ll_file_data_get(void)
66 struct ll_file_data *fd;
68 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
72 fd->fd_write_failed = false;
77 static void ll_file_data_put(struct ll_file_data *fd)
80 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
83 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
84 struct lustre_handle *fh)
86 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
87 op_data->op_attr.ia_mode = inode->i_mode;
88 op_data->op_attr.ia_atime = inode->i_atime;
89 op_data->op_attr.ia_mtime = inode->i_mtime;
90 op_data->op_attr.ia_ctime = inode->i_ctime;
91 op_data->op_attr.ia_size = i_size_read(inode);
92 op_data->op_attr_blocks = inode->i_blocks;
93 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
95 op_data->op_handle = *fh;
96 op_data->op_capa1 = ll_mdscapa_get(inode);
98 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
99 op_data->op_bias |= MDS_DATA_MODIFIED;
103 * Packs all the attributes into @op_data for the CLOSE rpc.
105 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
106 struct obd_client_handle *och)
110 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
111 ATTR_MTIME | ATTR_MTIME_SET |
112 ATTR_CTIME | ATTR_CTIME_SET;
114 if (!(och->och_flags & FMODE_WRITE))
117 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
120 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
121 ll_prep_md_op_data(op_data, inode, NULL, NULL,
122 0, 0, LUSTRE_OPC_ANY, NULL);
126 static int ll_close_inode_openhandle(struct obd_export *md_exp,
128 struct obd_client_handle *och,
129 const __u64 *data_version)
131 struct obd_export *exp = ll_i2mdexp(inode);
132 struct md_op_data *op_data;
133 struct ptlrpc_request *req = NULL;
134 struct obd_device *obd = class_exp2obd(exp);
140 * XXX: in case of LMV, is this correct to access
143 CERROR("Invalid MDC connection handle "LPX64"\n",
144 ll_i2mdexp(inode)->exp_handle.h_cookie);
148 OBD_ALLOC_PTR(op_data);
150 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
152 ll_prepare_close(inode, op_data, och);
153 if (data_version != NULL) {
154 /* Pass in data_version implies release. */
155 op_data->op_bias |= MDS_HSM_RELEASE;
156 op_data->op_data_version = *data_version;
157 op_data->op_lease_handle = och->och_lease_handle;
158 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
161 rc = md_close(md_exp, op_data, och->och_mod, &req);
163 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
164 ll_i2mdexp(inode)->exp_obd->obd_name,
165 PFID(ll_inode2fid(inode)), rc);
168 /* DATA_MODIFIED flag was successfully sent on close, cancel data
169 * modification flag. */
170 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
171 struct ll_inode_info *lli = ll_i2info(inode);
173 spin_lock(&lli->lli_lock);
174 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
175 spin_unlock(&lli->lli_lock);
178 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
179 struct mdt_body *body;
180 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
181 if (!(body->mbo_valid & OBD_MD_FLRELEASED))
185 ll_finish_md_op_data(op_data);
189 md_clear_open_replay_data(md_exp, och);
190 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
193 if (req) /* This is close request */
194 ptlrpc_req_finished(req);
198 int ll_md_real_close(struct inode *inode, fmode_t fmode)
200 struct ll_inode_info *lli = ll_i2info(inode);
201 struct obd_client_handle **och_p;
202 struct obd_client_handle *och;
207 if (fmode & FMODE_WRITE) {
208 och_p = &lli->lli_mds_write_och;
209 och_usecount = &lli->lli_open_fd_write_count;
210 } else if (fmode & FMODE_EXEC) {
211 och_p = &lli->lli_mds_exec_och;
212 och_usecount = &lli->lli_open_fd_exec_count;
214 LASSERT(fmode & FMODE_READ);
215 och_p = &lli->lli_mds_read_och;
216 och_usecount = &lli->lli_open_fd_read_count;
219 mutex_lock(&lli->lli_och_mutex);
220 if (*och_usecount > 0) {
221 /* There are still users of this handle, so skip
223 mutex_unlock(&lli->lli_och_mutex);
229 mutex_unlock(&lli->lli_och_mutex);
232 /* There might be a race and this handle may already
234 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
241 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
244 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
245 struct ll_inode_info *lli = ll_i2info(inode);
249 /* clear group lock, if present */
250 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
251 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
253 if (fd->fd_lease_och != NULL) {
256 /* Usually the lease is not released when the
257 * application crashed, we need to release here. */
258 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
259 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
260 PFID(&lli->lli_fid), rc, lease_broken);
262 fd->fd_lease_och = NULL;
265 if (fd->fd_och != NULL) {
266 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
271 /* Let's see if we have good enough OPEN lock on the file and if
272 we can skip talking to MDS */
273 if (file->f_dentry->d_inode) { /* Can this ever be false? */
275 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
276 struct lustre_handle lockh;
277 struct inode *inode = file->f_dentry->d_inode;
278 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
280 mutex_lock(&lli->lli_och_mutex);
281 if (fd->fd_omode & FMODE_WRITE) {
283 LASSERT(lli->lli_open_fd_write_count);
284 lli->lli_open_fd_write_count--;
285 } else if (fd->fd_omode & FMODE_EXEC) {
287 LASSERT(lli->lli_open_fd_exec_count);
288 lli->lli_open_fd_exec_count--;
291 LASSERT(lli->lli_open_fd_read_count);
292 lli->lli_open_fd_read_count--;
294 mutex_unlock(&lli->lli_och_mutex);
296 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
297 LDLM_IBITS, &policy, lockmode,
299 rc = ll_md_real_close(file->f_dentry->d_inode,
303 CERROR("released file has negative dentry: file = %p, "
304 "dentry = %p, name = %s\n",
305 file, file->f_dentry, file->f_dentry->d_name.name);
309 LUSTRE_FPRIVATE(file) = NULL;
310 ll_file_data_put(fd);
311 ll_capa_close(inode);
316 /* While this returns an error code, fput() the caller does not, so we need
317 * to make every effort to clean up all of our state here. Also, applications
318 * rarely check close errors and even if an error is returned they will not
319 * re-try the close call.
321 int ll_file_release(struct inode *inode, struct file *file)
323 struct ll_file_data *fd;
324 struct ll_sb_info *sbi = ll_i2sbi(inode);
325 struct ll_inode_info *lli = ll_i2info(inode);
329 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
330 PFID(ll_inode2fid(inode)), inode);
332 #ifdef CONFIG_FS_POSIX_ACL
333 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
334 inode == inode->i_sb->s_root->d_inode) {
335 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
338 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
339 fd->fd_flags &= ~LL_FILE_RMTACL;
340 rct_del(&sbi->ll_rct, current_pid());
341 et_search_free(&sbi->ll_et, current_pid());
346 if (inode->i_sb->s_root != file->f_dentry)
347 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
348 fd = LUSTRE_FPRIVATE(file);
351 /* The last ref on @file, maybe not the the owner pid of statahead,
352 * because parent and child process can share the same file handle. */
353 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
354 ll_deauthorize_statahead(inode, fd);
356 if (inode->i_sb->s_root == file->f_dentry) {
357 LUSTRE_FPRIVATE(file) = NULL;
358 ll_file_data_put(fd);
362 if (!S_ISDIR(inode->i_mode)) {
363 if (lli->lli_clob != NULL)
364 lov_read_and_clear_async_rc(lli->lli_clob);
365 lli->lli_async_rc = 0;
368 rc = ll_md_close(sbi->ll_md_exp, inode, file);
370 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
371 libcfs_debug_dumplog();
376 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
377 struct lookup_intent *itp)
379 struct dentry *de = file->f_dentry;
380 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
381 struct dentry *parent = de->d_parent;
382 const char *name = NULL;
384 struct md_op_data *op_data;
385 struct ptlrpc_request *req = NULL;
389 LASSERT(parent != NULL);
390 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
392 /* if server supports open-by-fid, or file name is invalid, don't pack
393 * name in open request */
394 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
395 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
396 name = de->d_name.name;
397 len = de->d_name.len;
400 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
401 name, len, 0, LUSTRE_OPC_ANY, NULL);
403 RETURN(PTR_ERR(op_data));
404 op_data->op_data = lmm;
405 op_data->op_data_size = lmmsize;
407 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
408 &ll_md_blocking_ast, 0);
409 ll_finish_md_op_data(op_data);
411 /* reason for keep own exit path - don`t flood log
412 * with messages with -ESTALE errors.
414 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
415 it_open_error(DISP_OPEN_OPEN, itp))
417 ll_release_openhandle(de, itp);
421 if (it_disposition(itp, DISP_LOOKUP_NEG))
422 GOTO(out, rc = -ENOENT);
424 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
425 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
426 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
430 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
431 if (!rc && itp->d.lustre.it_lock_mode)
432 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
435 ptlrpc_req_finished(req);
436 ll_intent_drop_lock(itp);
441 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
442 struct obd_client_handle *och)
444 struct ptlrpc_request *req = it->d.lustre.it_data;
445 struct mdt_body *body;
447 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
448 och->och_fh = body->mbo_handle;
449 och->och_fid = body->mbo_fid1;
450 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
451 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
452 och->och_flags = it->it_flags;
454 return md_set_open_replay_data(md_exp, och, it);
457 static int ll_local_open(struct file *file, struct lookup_intent *it,
458 struct ll_file_data *fd, struct obd_client_handle *och)
460 struct inode *inode = file->f_dentry->d_inode;
463 LASSERT(!LUSTRE_FPRIVATE(file));
470 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
475 LUSTRE_FPRIVATE(file) = fd;
476 ll_readahead_init(inode, &fd->fd_ras);
477 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
479 /* ll_cl_context initialize */
480 rwlock_init(&fd->fd_lock);
481 INIT_LIST_HEAD(&fd->fd_lccs);
486 /* Open a file, and (for the very first open) create objects on the OSTs at
487 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
488 * creation or open until ll_lov_setstripe() ioctl is called.
490 * If we already have the stripe MD locally then we don't request it in
491 * md_open(), by passing a lmm_size = 0.
493 * It is up to the application to ensure no other processes open this file
494 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
495 * used. We might be able to avoid races of that sort by getting lli_open_sem
496 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
497 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
499 int ll_file_open(struct inode *inode, struct file *file)
501 struct ll_inode_info *lli = ll_i2info(inode);
502 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
503 .it_flags = file->f_flags };
504 struct obd_client_handle **och_p = NULL;
505 __u64 *och_usecount = NULL;
506 struct ll_file_data *fd;
510 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
511 PFID(ll_inode2fid(inode)), inode, file->f_flags);
513 it = file->private_data; /* XXX: compat macro */
514 file->private_data = NULL; /* prevent ll_local_open assertion */
516 fd = ll_file_data_get();
518 GOTO(out_openerr, rc = -ENOMEM);
521 if (S_ISDIR(inode->i_mode))
522 ll_authorize_statahead(inode, fd);
524 if (inode->i_sb->s_root == file->f_dentry) {
525 LUSTRE_FPRIVATE(file) = fd;
529 if (!it || !it->d.lustre.it_disposition) {
530 /* Convert f_flags into access mode. We cannot use file->f_mode,
531 * because everything but O_ACCMODE mask was stripped from
533 if ((oit.it_flags + 1) & O_ACCMODE)
535 if (file->f_flags & O_TRUNC)
536 oit.it_flags |= FMODE_WRITE;
538 /* kernel only call f_op->open in dentry_open. filp_open calls
539 * dentry_open after call to open_namei that checks permissions.
540 * Only nfsd_open call dentry_open directly without checking
541 * permissions and because of that this code below is safe. */
542 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
543 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
545 /* We do not want O_EXCL here, presumably we opened the file
546 * already? XXX - NFS implications? */
547 oit.it_flags &= ~O_EXCL;
549 /* bug20584, if "it_flags" contains O_CREAT, the file will be
550 * created if necessary, then "IT_CREAT" should be set to keep
551 * consistent with it */
552 if (oit.it_flags & O_CREAT)
553 oit.it_op |= IT_CREAT;
559 /* Let's see if we have file open on MDS already. */
560 if (it->it_flags & FMODE_WRITE) {
561 och_p = &lli->lli_mds_write_och;
562 och_usecount = &lli->lli_open_fd_write_count;
563 } else if (it->it_flags & FMODE_EXEC) {
564 och_p = &lli->lli_mds_exec_och;
565 och_usecount = &lli->lli_open_fd_exec_count;
567 och_p = &lli->lli_mds_read_och;
568 och_usecount = &lli->lli_open_fd_read_count;
571 mutex_lock(&lli->lli_och_mutex);
572 if (*och_p) { /* Open handle is present */
573 if (it_disposition(it, DISP_OPEN_OPEN)) {
574 /* Well, there's extra open request that we do not need,
575 let's close it somehow. This will decref request. */
576 rc = it_open_error(DISP_OPEN_OPEN, it);
578 mutex_unlock(&lli->lli_och_mutex);
579 GOTO(out_openerr, rc);
582 ll_release_openhandle(file->f_dentry, it);
586 rc = ll_local_open(file, it, fd, NULL);
589 mutex_unlock(&lli->lli_och_mutex);
590 GOTO(out_openerr, rc);
593 LASSERT(*och_usecount == 0);
594 if (!it->d.lustre.it_disposition) {
595 /* We cannot just request lock handle now, new ELC code
596 means that one of other OPEN locks for this file
597 could be cancelled, and since blocking ast handler
598 would attempt to grab och_mutex as well, that would
599 result in a deadlock */
600 mutex_unlock(&lli->lli_och_mutex);
602 * Normally called under two situations:
604 * 2. A race/condition on MDS resulting in no open
605 * handle to be returned from LOOKUP|OPEN request,
606 * for example if the target entry was a symlink.
608 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
610 * Always specify MDS_OPEN_BY_FID because we don't want
611 * to get file with different fid.
613 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
614 rc = ll_intent_file_open(file, NULL, 0, it);
616 GOTO(out_openerr, rc);
620 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
622 GOTO(out_och_free, rc = -ENOMEM);
626 /* md_intent_lock() didn't get a request ref if there was an
627 * open error, so don't do cleanup on the request here
629 /* XXX (green): Should not we bail out on any error here, not
630 * just open error? */
631 rc = it_open_error(DISP_OPEN_OPEN, it);
633 GOTO(out_och_free, rc);
635 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
636 "inode %p: disposition %x, status %d\n", inode,
637 it_disposition(it, ~0), it->d.lustre.it_status);
639 rc = ll_local_open(file, it, fd, *och_p);
641 GOTO(out_och_free, rc);
643 mutex_unlock(&lli->lli_och_mutex);
646 /* Must do this outside lli_och_mutex lock to prevent deadlock where
647 different kind of OPEN lock for this same inode gets cancelled
648 by ldlm_cancel_lru */
649 if (!S_ISREG(inode->i_mode))
650 GOTO(out_och_free, rc);
654 if (!lli->lli_has_smd &&
655 (cl_is_lov_delay_create(file->f_flags) ||
656 (file->f_mode & FMODE_WRITE) == 0)) {
657 CDEBUG(D_INODE, "object creation was delayed\n");
658 GOTO(out_och_free, rc);
660 cl_lov_delay_create_clear(&file->f_flags);
661 GOTO(out_och_free, rc);
665 if (och_p && *och_p) {
666 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
667 *och_p = NULL; /* OBD_FREE writes some magic there */
670 mutex_unlock(&lli->lli_och_mutex);
673 if (lli->lli_opendir_key == fd)
674 ll_deauthorize_statahead(inode, fd);
676 ll_file_data_put(fd);
678 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
681 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
682 ptlrpc_req_finished(it->d.lustre.it_data);
683 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
689 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
690 struct ldlm_lock_desc *desc, void *data, int flag)
693 struct lustre_handle lockh;
697 case LDLM_CB_BLOCKING:
698 ldlm_lock2handle(lock, &lockh);
699 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
701 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
705 case LDLM_CB_CANCELING:
713 * Acquire a lease and open the file.
715 static struct obd_client_handle *
716 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
719 struct lookup_intent it = { .it_op = IT_OPEN };
720 struct ll_sb_info *sbi = ll_i2sbi(inode);
721 struct md_op_data *op_data;
722 struct ptlrpc_request *req = NULL;
723 struct lustre_handle old_handle = { 0 };
724 struct obd_client_handle *och = NULL;
729 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
730 RETURN(ERR_PTR(-EINVAL));
733 struct ll_inode_info *lli = ll_i2info(inode);
734 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
735 struct obd_client_handle **och_p;
738 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
739 RETURN(ERR_PTR(-EPERM));
741 /* Get the openhandle of the file */
743 mutex_lock(&lli->lli_och_mutex);
744 if (fd->fd_lease_och != NULL) {
745 mutex_unlock(&lli->lli_och_mutex);
749 if (fd->fd_och == NULL) {
750 if (file->f_mode & FMODE_WRITE) {
751 LASSERT(lli->lli_mds_write_och != NULL);
752 och_p = &lli->lli_mds_write_och;
753 och_usecount = &lli->lli_open_fd_write_count;
755 LASSERT(lli->lli_mds_read_och != NULL);
756 och_p = &lli->lli_mds_read_och;
757 och_usecount = &lli->lli_open_fd_read_count;
759 if (*och_usecount == 1) {
766 mutex_unlock(&lli->lli_och_mutex);
767 if (rc < 0) /* more than 1 opener */
770 LASSERT(fd->fd_och != NULL);
771 old_handle = fd->fd_och->och_fh;
776 RETURN(ERR_PTR(-ENOMEM));
778 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
779 LUSTRE_OPC_ANY, NULL);
781 GOTO(out, rc = PTR_ERR(op_data));
783 /* To tell the MDT this openhandle is from the same owner */
784 op_data->op_handle = old_handle;
786 it.it_flags = fmode | open_flags;
787 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
788 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
789 &ll_md_blocking_lease_ast,
790 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
791 * it can be cancelled which may mislead applications that the lease is
793 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
794 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
795 * doesn't deal with openhandle, so normal openhandle will be leaked. */
796 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
797 ll_finish_md_op_data(op_data);
798 ptlrpc_req_finished(req);
800 GOTO(out_release_it, rc);
802 if (it_disposition(&it, DISP_LOOKUP_NEG))
803 GOTO(out_release_it, rc = -ENOENT);
805 rc = it_open_error(DISP_OPEN_OPEN, &it);
807 GOTO(out_release_it, rc);
809 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
810 ll_och_fill(sbi->ll_md_exp, &it, och);
812 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
813 GOTO(out_close, rc = -EOPNOTSUPP);
815 /* already get lease, handle lease lock */
816 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
817 if (it.d.lustre.it_lock_mode == 0 ||
818 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
819 /* open lock must return for lease */
820 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
821 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
822 it.d.lustre.it_lock_bits);
823 GOTO(out_close, rc = -EPROTO);
826 ll_intent_release(&it);
830 /* Cancel open lock */
831 if (it.d.lustre.it_lock_mode != 0) {
832 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
833 it.d.lustre.it_lock_mode);
834 it.d.lustre.it_lock_mode = 0;
835 och->och_lease_handle.cookie = 0ULL;
837 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
839 CERROR("%s: error closing file "DFID": %d\n",
840 ll_get_fsname(inode->i_sb, NULL, 0),
841 PFID(&ll_i2info(inode)->lli_fid), rc2);
842 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
844 ll_intent_release(&it);
852 * Release lease and close the file.
853 * It will check if the lease has ever broken.
855 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
858 struct ldlm_lock *lock;
859 bool cancelled = true;
863 lock = ldlm_handle2lock(&och->och_lease_handle);
865 lock_res_and_lock(lock);
866 cancelled = ldlm_is_cancel(lock);
867 unlock_res_and_lock(lock);
871 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
872 PFID(&ll_i2info(inode)->lli_fid), cancelled);
875 ldlm_cli_cancel(&och->och_lease_handle, 0);
876 if (lease_broken != NULL)
877 *lease_broken = cancelled;
879 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
884 /* Fills the obdo with the attributes for the lsm */
885 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
886 struct obd_capa *capa, struct obdo *obdo,
889 struct ptlrpc_request_set *set;
890 struct obd_info oinfo = { { { 0 } } };
895 LASSERT(lsm != NULL);
899 oinfo.oi_oa->o_oi = lsm->lsm_oi;
900 oinfo.oi_oa->o_mode = S_IFREG;
901 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
902 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
903 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
904 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
905 OBD_MD_FLGROUP | OBD_MD_FLDATAVERSION;
906 oinfo.oi_capa = capa;
907 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
908 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
909 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
910 if (dv_flags & LL_DV_WR_FLUSH)
911 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
914 set = ptlrpc_prep_set();
916 CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM);
919 rc = obd_getattr_async(exp, &oinfo, set);
921 rc = ptlrpc_set_wait(set);
922 ptlrpc_set_destroy(set);
925 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
926 OBD_MD_FLATIME | OBD_MD_FLMTIME |
927 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
928 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
929 if (dv_flags & LL_DV_WR_FLUSH &&
930 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
931 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
937 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
939 struct ll_inode_info *lli = ll_i2info(inode);
940 struct cl_object *obj = lli->lli_clob;
941 struct cl_attr *attr = ccc_env_thread_attr(env);
949 ll_inode_size_lock(inode);
951 /* merge timestamps the most recently obtained from mds with
952 timestamps obtained from osts */
953 LTIME_S(inode->i_atime) = lli->lli_atime;
954 LTIME_S(inode->i_mtime) = lli->lli_mtime;
955 LTIME_S(inode->i_ctime) = lli->lli_ctime;
957 atime = LTIME_S(inode->i_atime);
958 mtime = LTIME_S(inode->i_mtime);
959 ctime = LTIME_S(inode->i_ctime);
961 cl_object_attr_lock(obj);
962 rc = cl_object_attr_get(env, obj, attr);
963 cl_object_attr_unlock(obj);
966 GOTO(out_size_unlock, rc);
968 if (atime < attr->cat_atime)
969 atime = attr->cat_atime;
971 if (ctime < attr->cat_ctime)
972 ctime = attr->cat_ctime;
974 if (mtime < attr->cat_mtime)
975 mtime = attr->cat_mtime;
977 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
978 PFID(&lli->lli_fid), attr->cat_size);
980 i_size_write(inode, attr->cat_size);
981 inode->i_blocks = attr->cat_blocks;
983 LTIME_S(inode->i_atime) = atime;
984 LTIME_S(inode->i_mtime) = mtime;
985 LTIME_S(inode->i_ctime) = ctime;
988 ll_inode_size_unlock(inode);
993 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
996 struct obdo obdo = { 0 };
999 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0);
1001 st->st_size = obdo.o_size;
1002 st->st_blocks = obdo.o_blocks;
1003 st->st_mtime = obdo.o_mtime;
1004 st->st_atime = obdo.o_atime;
1005 st->st_ctime = obdo.o_ctime;
1010 static bool file_is_noatime(const struct file *file)
1012 const struct vfsmount *mnt = file->f_path.mnt;
1013 const struct inode *inode = file->f_path.dentry->d_inode;
1015 /* Adapted from file_accessed() and touch_atime().*/
1016 if (file->f_flags & O_NOATIME)
1019 if (inode->i_flags & S_NOATIME)
1022 if (IS_NOATIME(inode))
1025 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1028 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1031 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1037 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
1039 struct inode *inode = file->f_dentry->d_inode;
1041 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1043 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1044 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1045 file->f_flags & O_DIRECT ||
1048 io->ci_obj = ll_i2info(inode)->lli_clob;
1049 io->ci_lockreq = CILR_MAYBE;
1050 if (ll_file_nolock(file)) {
1051 io->ci_lockreq = CILR_NEVER;
1052 io->ci_no_srvlock = 1;
1053 } else if (file->f_flags & O_APPEND) {
1054 io->ci_lockreq = CILR_MANDATORY;
1057 io->ci_noatime = file_is_noatime(file);
1061 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1062 struct file *file, enum cl_io_type iot,
1063 loff_t *ppos, size_t count)
1065 struct inode *inode = file->f_dentry->d_inode;
1066 struct ll_inode_info *lli = ll_i2info(inode);
1068 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1071 struct range_lock range;
1074 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1075 file->f_dentry->d_name.name, iot, *ppos, count);
1078 io = ccc_env_thread_io(env);
1079 ll_io_init(io, file, iot == CIT_WRITE);
1081 /* The maximum Lustre file size is variable, based on the
1082 * OST maximum object size and number of stripes. This
1083 * needs another check in addition to the VFS checks earlier. */
1084 end = (io->u.ci_wr.wr_append ? i_size_read(inode) : *ppos) + count;
1085 if (end > ll_file_maxbytes(inode)) {
1087 CDEBUG(D_INODE, "%s: file "DFID" offset %llu > maxbytes "LPU64
1088 ": rc = %zd\n", ll_get_fsname(inode->i_sb, NULL, 0),
1089 PFID(&lli->lli_fid), end, ll_file_maxbytes(inode),
1094 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1095 struct vvp_io *vio = vvp_env_io(env);
1096 bool range_locked = false;
1098 if (file->f_flags & O_APPEND)
1099 range_lock_init(&range, 0, LUSTRE_EOF);
1101 range_lock_init(&range, *ppos, *ppos + count - 1);
1103 vio->vui_fd = LUSTRE_FPRIVATE(file);
1104 vio->vui_io_subtype = args->via_io_subtype;
1106 switch (vio->vui_io_subtype) {
1108 vio->vui_iov = args->u.normal.via_iov;
1109 vio->vui_nrsegs = args->u.normal.via_nrsegs;
1110 vio->vui_tot_nrsegs = vio->vui_nrsegs;
1111 vio->vui_iocb = args->u.normal.via_iocb;
1112 if ((iot == CIT_WRITE) &&
1113 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1114 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1116 result = range_lock(&lli->lli_write_tree,
1121 range_locked = true;
1123 down_read(&lli->lli_trunc_sem);
1126 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1127 vio->u.splice.vui_flags = args->u.splice.via_flags;
1130 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1134 ll_cl_add(file, env, io);
1135 result = cl_io_loop(env, io);
1136 ll_cl_remove(file, env);
1138 if (args->via_io_subtype == IO_NORMAL)
1139 up_read(&lli->lli_trunc_sem);
1141 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1143 range_unlock(&lli->lli_write_tree, &range);
1146 /* cl_io_rw_init() handled IO */
1147 result = io->ci_result;
1150 if (io->ci_nob > 0) {
1151 result = io->ci_nob;
1152 *ppos = io->u.ci_wr.wr.crw_pos;
1156 cl_io_fini(env, io);
1157 /* If any bit been read/written (result != 0), we just return
1158 * short read/write instead of restart io. */
1159 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1160 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zu\n",
1161 iot == CIT_READ ? "read" : "write",
1162 file->f_dentry->d_name.name, *ppos, count);
1163 LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
1167 if (iot == CIT_READ) {
1169 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1170 LPROC_LL_READ_BYTES, result);
1171 } else if (iot == CIT_WRITE) {
1173 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1174 LPROC_LL_WRITE_BYTES, result);
1175 fd->fd_write_failed = false;
1176 } else if (result != -ERESTARTSYS) {
1177 fd->fd_write_failed = true;
1180 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1187 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1189 static int ll_file_get_iov_count(const struct iovec *iov,
1190 unsigned long *nr_segs, size_t *count)
1195 for (seg = 0; seg < *nr_segs; seg++) {
1196 const struct iovec *iv = &iov[seg];
1199 * If any segment has a negative length, or the cumulative
1200 * length ever wraps negative then return -EINVAL.
1203 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1205 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1210 cnt -= iv->iov_len; /* This segment is no good */
1217 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1218 unsigned long nr_segs, loff_t pos)
1221 struct vvp_io_args *args;
1227 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1231 env = cl_env_get(&refcheck);
1233 RETURN(PTR_ERR(env));
1235 args = vvp_env_args(env, IO_NORMAL);
1236 args->u.normal.via_iov = (struct iovec *)iov;
1237 args->u.normal.via_nrsegs = nr_segs;
1238 args->u.normal.via_iocb = iocb;
1240 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1241 &iocb->ki_pos, count);
1242 cl_env_put(env, &refcheck);
1246 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1250 struct iovec *local_iov;
1251 struct kiocb *kiocb;
1256 env = cl_env_get(&refcheck);
1258 RETURN(PTR_ERR(env));
1260 local_iov = &vvp_env_info(env)->vti_local_iov;
1261 kiocb = &vvp_env_info(env)->vti_kiocb;
1262 local_iov->iov_base = (void __user *)buf;
1263 local_iov->iov_len = count;
1264 init_sync_kiocb(kiocb, file);
1265 kiocb->ki_pos = *ppos;
1266 #ifdef HAVE_KIOCB_KI_LEFT
1267 kiocb->ki_left = count;
1269 kiocb->ki_nbytes = count;
1272 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1273 *ppos = kiocb->ki_pos;
1275 cl_env_put(env, &refcheck);
1280 * Write to a file (through the page cache).
1283 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1284 unsigned long nr_segs, loff_t pos)
1287 struct vvp_io_args *args;
1293 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1297 env = cl_env_get(&refcheck);
1299 RETURN(PTR_ERR(env));
1301 args = vvp_env_args(env, IO_NORMAL);
1302 args->u.normal.via_iov = (struct iovec *)iov;
1303 args->u.normal.via_nrsegs = nr_segs;
1304 args->u.normal.via_iocb = iocb;
1306 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1307 &iocb->ki_pos, count);
1308 cl_env_put(env, &refcheck);
1312 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1313 size_t count, loff_t *ppos)
1316 struct iovec *local_iov;
1317 struct kiocb *kiocb;
1322 env = cl_env_get(&refcheck);
1324 RETURN(PTR_ERR(env));
1326 local_iov = &vvp_env_info(env)->vti_local_iov;
1327 kiocb = &vvp_env_info(env)->vti_kiocb;
1328 local_iov->iov_base = (void __user *)buf;
1329 local_iov->iov_len = count;
1330 init_sync_kiocb(kiocb, file);
1331 kiocb->ki_pos = *ppos;
1332 #ifdef HAVE_KIOCB_KI_LEFT
1333 kiocb->ki_left = count;
1335 kiocb->ki_nbytes = count;
1338 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1339 *ppos = kiocb->ki_pos;
1341 cl_env_put(env, &refcheck);
1346 * Send file content (through pagecache) somewhere with helper
1348 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1349 struct pipe_inode_info *pipe, size_t count,
1353 struct vvp_io_args *args;
1358 env = cl_env_get(&refcheck);
1360 RETURN(PTR_ERR(env));
1362 args = vvp_env_args(env, IO_SPLICE);
1363 args->u.splice.via_pipe = pipe;
1364 args->u.splice.via_flags = flags;
1366 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1367 cl_env_put(env, &refcheck);
1371 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1372 __u64 flags, struct lov_user_md *lum,
1375 struct lov_stripe_md *lsm = NULL;
1376 struct lookup_intent oit = {
1378 .it_flags = flags | MDS_OPEN_BY_FID,
1383 lsm = ccc_inode_lsm_get(inode);
1385 ccc_inode_lsm_put(inode, lsm);
1386 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1387 PFID(ll_inode2fid(inode)));
1388 GOTO(out, rc = -EEXIST);
1391 ll_inode_size_lock(inode);
1392 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1394 GOTO(out_unlock, rc);
1396 rc = oit.d.lustre.it_status;
1398 GOTO(out_unlock, rc);
1400 ll_release_openhandle(file->f_dentry, &oit);
1403 ll_inode_size_unlock(inode);
1404 ll_intent_release(&oit);
1405 ccc_inode_lsm_put(inode, lsm);
1407 cl_lov_delay_create_clear(&file->f_flags);
1412 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1413 struct lov_mds_md **lmmp, int *lmm_size,
1414 struct ptlrpc_request **request)
1416 struct ll_sb_info *sbi = ll_i2sbi(inode);
1417 struct mdt_body *body;
1418 struct lov_mds_md *lmm = NULL;
1419 struct ptlrpc_request *req = NULL;
1420 struct md_op_data *op_data;
1423 rc = ll_get_default_mdsize(sbi, &lmmsize);
1427 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1428 strlen(filename), lmmsize,
1429 LUSTRE_OPC_ANY, NULL);
1430 if (IS_ERR(op_data))
1431 RETURN(PTR_ERR(op_data));
1433 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1434 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1435 ll_finish_md_op_data(op_data);
1437 CDEBUG(D_INFO, "md_getattr_name failed "
1438 "on %s: rc %d\n", filename, rc);
1442 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1443 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1445 lmmsize = body->mbo_eadatasize;
1447 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1449 GOTO(out, rc = -ENODATA);
1452 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1453 LASSERT(lmm != NULL);
1455 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1456 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1457 GOTO(out, rc = -EPROTO);
1461 * This is coming from the MDS, so is probably in
1462 * little endian. We convert it to host endian before
1463 * passing it to userspace.
1465 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1468 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1469 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1472 /* if function called for directory - we should
1473 * avoid swab not existent lsm objects */
1474 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1475 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1476 if (S_ISREG(body->mbo_mode))
1477 lustre_swab_lov_user_md_objects(
1478 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1480 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1481 lustre_swab_lov_user_md_v3(
1482 (struct lov_user_md_v3 *)lmm);
1483 if (S_ISREG(body->mbo_mode))
1484 lustre_swab_lov_user_md_objects(
1485 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1492 *lmm_size = lmmsize;
1497 static int ll_lov_setea(struct inode *inode, struct file *file,
1500 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1501 struct lov_user_md *lump;
1502 int lum_size = sizeof(struct lov_user_md) +
1503 sizeof(struct lov_user_ost_data);
1507 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1510 OBD_ALLOC_LARGE(lump, lum_size);
1514 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1515 OBD_FREE_LARGE(lump, lum_size);
1519 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1521 OBD_FREE_LARGE(lump, lum_size);
1525 static int ll_file_getstripe(struct inode *inode,
1526 struct lov_user_md __user *lum)
1533 env = cl_env_get(&refcheck);
1535 RETURN(PTR_ERR(env));
1537 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1538 cl_env_put(env, &refcheck);
1542 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1545 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1546 struct lov_user_md *klum;
1548 __u64 flags = FMODE_WRITE;
1551 rc = ll_copy_user_md(lum, &klum);
1556 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1560 put_user(0, &lum->lmm_stripe_count);
1562 ll_layout_refresh(inode, &gen);
1563 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1566 OBD_FREE(klum, lum_size);
1571 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1573 struct ll_inode_info *lli = ll_i2info(inode);
1574 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1575 struct ccc_grouplock grouplock;
1580 CWARN("group id for group lock must not be 0\n");
1584 if (ll_file_nolock(file))
1585 RETURN(-EOPNOTSUPP);
1587 spin_lock(&lli->lli_lock);
1588 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1589 CWARN("group lock already existed with gid %lu\n",
1590 fd->fd_grouplock.cg_gid);
1591 spin_unlock(&lli->lli_lock);
1594 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1595 spin_unlock(&lli->lli_lock);
1597 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1598 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1602 spin_lock(&lli->lli_lock);
1603 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1604 spin_unlock(&lli->lli_lock);
1605 CERROR("another thread just won the race\n");
1606 cl_put_grouplock(&grouplock);
1610 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1611 fd->fd_grouplock = grouplock;
1612 spin_unlock(&lli->lli_lock);
1614 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1618 static int ll_put_grouplock(struct inode *inode, struct file *file,
1621 struct ll_inode_info *lli = ll_i2info(inode);
1622 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1623 struct ccc_grouplock grouplock;
1626 spin_lock(&lli->lli_lock);
1627 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1628 spin_unlock(&lli->lli_lock);
1629 CWARN("no group lock held\n");
1632 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1634 if (fd->fd_grouplock.cg_gid != arg) {
1635 CWARN("group lock %lu doesn't match current id %lu\n",
1636 arg, fd->fd_grouplock.cg_gid);
1637 spin_unlock(&lli->lli_lock);
1641 grouplock = fd->fd_grouplock;
1642 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1643 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1644 spin_unlock(&lli->lli_lock);
1646 cl_put_grouplock(&grouplock);
1647 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1652 * Close inode open handle
1654 * \param dentry [in] dentry which contains the inode
1655 * \param it [in,out] intent which contains open info and result
1658 * \retval <0 failure
1660 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1662 struct inode *inode = dentry->d_inode;
1663 struct obd_client_handle *och;
1669 /* Root ? Do nothing. */
1670 if (dentry->d_inode->i_sb->s_root == dentry)
1673 /* No open handle to close? Move away */
1674 if (!it_disposition(it, DISP_OPEN_OPEN))
1677 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1679 OBD_ALLOC(och, sizeof(*och));
1681 GOTO(out, rc = -ENOMEM);
1683 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1685 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1688 /* this one is in place of ll_file_open */
1689 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1690 ptlrpc_req_finished(it->d.lustre.it_data);
1691 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1697 * Get size for inode for which FIEMAP mapping is requested.
1698 * Make the FIEMAP get_info call and returns the result.
1699 * \param fiemap kernel buffer to hold extens
1700 * \param num_bytes kernel buffer size
1702 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1708 struct ll_fiemap_info_key fmkey = { .name = KEY_FIEMAP, };
1711 /* Checks for fiemap flags */
1712 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1713 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1717 /* Check for FIEMAP_FLAG_SYNC */
1718 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1719 rc = filemap_fdatawrite(inode->i_mapping);
1724 env = cl_env_get(&refcheck);
1726 RETURN(PTR_ERR(env));
1728 if (i_size_read(inode) == 0) {
1729 rc = ll_glimpse_size(inode);
1734 fmkey.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1735 obdo_from_inode(&fmkey.oa, inode, OBD_MD_FLSIZE);
1736 obdo_set_parent_fid(&fmkey.oa, &ll_i2info(inode)->lli_fid);
1738 /* If filesize is 0, then there would be no objects for mapping */
1739 if (fmkey.oa.o_size == 0) {
1740 fiemap->fm_mapped_extents = 0;
1744 fmkey.fiemap = *fiemap;
1746 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1747 &fmkey, fiemap, &num_bytes);
1749 cl_env_put(env, &refcheck);
1753 int ll_fid2path(struct inode *inode, void __user *arg)
1755 struct obd_export *exp = ll_i2mdexp(inode);
1756 const struct getinfo_fid2path __user *gfin = arg;
1758 struct getinfo_fid2path *gfout;
1764 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1765 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1768 /* Only need to get the buflen */
1769 if (get_user(pathlen, &gfin->gf_pathlen))
1772 if (pathlen > PATH_MAX)
1775 outsize = sizeof(*gfout) + pathlen;
1776 OBD_ALLOC(gfout, outsize);
1780 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1781 GOTO(gf_free, rc = -EFAULT);
1783 /* Call mdc_iocontrol */
1784 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1788 if (copy_to_user(arg, gfout, outsize))
1792 OBD_FREE(gfout, outsize);
1796 static int ll_ioctl_fiemap(struct inode *inode, struct fiemap __user *arg)
1798 struct fiemap *fiemap;
1804 /* Get the extent count so we can calculate the size of
1805 * required fiemap buffer */
1806 if (get_user(extent_count, &arg->fm_extent_count))
1810 (SIZE_MAX - sizeof(*fiemap)) / sizeof(struct ll_fiemap_extent))
1812 num_bytes = sizeof(*fiemap) + (extent_count *
1813 sizeof(struct ll_fiemap_extent));
1815 OBD_ALLOC_LARGE(fiemap, num_bytes);
1819 /* get the fiemap value */
1820 if (copy_from_user(fiemap, arg, sizeof(*fiemap)))
1821 GOTO(error, rc = -EFAULT);
1823 /* If fm_extent_count is non-zero, read the first extent since
1824 * it is used to calculate end_offset and device from previous
1826 if (extent_count != 0) {
1827 if (copy_from_user(&fiemap->fm_extents[0],
1828 (char __user *)arg + sizeof(*fiemap),
1829 sizeof(struct ll_fiemap_extent)))
1830 GOTO(error, rc = -EFAULT);
1833 rc = ll_do_fiemap(inode, fiemap, num_bytes);
1837 ret_bytes = sizeof(struct fiemap);
1839 if (extent_count != 0)
1840 ret_bytes += (fiemap->fm_mapped_extents *
1841 sizeof(struct ll_fiemap_extent));
1843 if (copy_to_user((void __user *)arg, fiemap, ret_bytes))
1847 OBD_FREE_LARGE(fiemap, num_bytes);
1852 * Read the data_version for inode.
1854 * This value is computed using stripe object version on OST.
1855 * Version is computed using server side locking.
1857 * @param sync if do sync on the OST side;
1859 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1860 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1862 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1864 struct lov_stripe_md *lsm = NULL;
1865 struct ll_sb_info *sbi = ll_i2sbi(inode);
1866 struct obdo *obdo = NULL;
1870 /* If no stripe, we consider version is 0. */
1871 lsm = ccc_inode_lsm_get(inode);
1872 if (!lsm_has_objects(lsm)) {
1874 CDEBUG(D_INODE, "No object for inode\n");
1878 OBD_ALLOC_PTR(obdo);
1880 GOTO(out, rc = -ENOMEM);
1882 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, flags);
1884 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1887 *data_version = obdo->o_data_version;
1893 ccc_inode_lsm_put(inode, lsm);
1898 * Trigger a HSM release request for the provided inode.
1900 int ll_hsm_release(struct inode *inode)
1902 struct cl_env_nest nest;
1904 struct obd_client_handle *och = NULL;
1905 __u64 data_version = 0;
1909 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1910 ll_get_fsname(inode->i_sb, NULL, 0),
1911 PFID(&ll_i2info(inode)->lli_fid));
1913 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1915 GOTO(out, rc = PTR_ERR(och));
1917 /* Grab latest data_version and [am]time values */
1918 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1922 env = cl_env_nested_get(&nest);
1924 GOTO(out, rc = PTR_ERR(env));
1926 ll_merge_attr(env, inode);
1927 cl_env_nested_put(&nest, env);
1929 /* Release the file.
1930 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1931 * we still need it to pack l_remote_handle to MDT. */
1932 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1938 if (och != NULL && !IS_ERR(och)) /* close the file */
1939 ll_lease_close(och, inode, NULL);
1944 struct ll_swap_stack {
1945 struct iattr ia1, ia2;
1947 struct inode *inode1, *inode2;
1948 bool check_dv1, check_dv2;
1951 static int ll_swap_layouts(struct file *file1, struct file *file2,
1952 struct lustre_swap_layouts *lsl)
1954 struct mdc_swap_layouts msl;
1955 struct md_op_data *op_data;
1958 struct ll_swap_stack *llss = NULL;
1961 OBD_ALLOC_PTR(llss);
1965 llss->inode1 = file1->f_dentry->d_inode;
1966 llss->inode2 = file2->f_dentry->d_inode;
1968 if (!S_ISREG(llss->inode2->i_mode))
1969 GOTO(free, rc = -EINVAL);
1971 if (inode_permission(llss->inode1, MAY_WRITE) ||
1972 inode_permission(llss->inode2, MAY_WRITE))
1973 GOTO(free, rc = -EPERM);
1975 if (llss->inode2->i_sb != llss->inode1->i_sb)
1976 GOTO(free, rc = -EXDEV);
1978 /* we use 2 bool because it is easier to swap than 2 bits */
1979 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1980 llss->check_dv1 = true;
1982 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1983 llss->check_dv2 = true;
1985 /* we cannot use lsl->sl_dvX directly because we may swap them */
1986 llss->dv1 = lsl->sl_dv1;
1987 llss->dv2 = lsl->sl_dv2;
1989 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1990 if (rc == 0) /* same file, done! */
1993 if (rc < 0) { /* sequentialize it */
1994 swap(llss->inode1, llss->inode2);
1996 swap(llss->dv1, llss->dv2);
1997 swap(llss->check_dv1, llss->check_dv2);
2001 if (gid != 0) { /* application asks to flush dirty cache */
2002 rc = ll_get_grouplock(llss->inode1, file1, gid);
2006 rc = ll_get_grouplock(llss->inode2, file2, gid);
2008 ll_put_grouplock(llss->inode1, file1, gid);
2013 /* to be able to restore mtime and atime after swap
2014 * we need to first save them */
2016 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2017 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2018 llss->ia1.ia_atime = llss->inode1->i_atime;
2019 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2020 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2021 llss->ia2.ia_atime = llss->inode2->i_atime;
2022 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2025 /* ultimate check, before swaping the layouts we check if
2026 * dataversion has changed (if requested) */
2027 if (llss->check_dv1) {
2028 rc = ll_data_version(llss->inode1, &dv, 0);
2031 if (dv != llss->dv1)
2032 GOTO(putgl, rc = -EAGAIN);
2035 if (llss->check_dv2) {
2036 rc = ll_data_version(llss->inode2, &dv, 0);
2039 if (dv != llss->dv2)
2040 GOTO(putgl, rc = -EAGAIN);
2043 /* struct md_op_data is used to send the swap args to the mdt
2044 * only flags is missing, so we use struct mdc_swap_layouts
2045 * through the md_op_data->op_data */
2046 /* flags from user space have to be converted before they are send to
2047 * server, no flag is sent today, they are only used on the client */
2050 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2051 0, LUSTRE_OPC_ANY, &msl);
2052 if (IS_ERR(op_data))
2053 GOTO(free, rc = PTR_ERR(op_data));
2055 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2056 sizeof(*op_data), op_data, NULL);
2057 ll_finish_md_op_data(op_data);
2061 ll_put_grouplock(llss->inode2, file2, gid);
2062 ll_put_grouplock(llss->inode1, file1, gid);
2065 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2069 /* clear useless flags */
2070 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2071 llss->ia1.ia_valid &= ~ATTR_MTIME;
2072 llss->ia2.ia_valid &= ~ATTR_MTIME;
2075 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2076 llss->ia1.ia_valid &= ~ATTR_ATIME;
2077 llss->ia2.ia_valid &= ~ATTR_ATIME;
2080 /* update time if requested */
2082 if (llss->ia2.ia_valid != 0) {
2083 mutex_lock(&llss->inode1->i_mutex);
2084 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2085 mutex_unlock(&llss->inode1->i_mutex);
2088 if (llss->ia1.ia_valid != 0) {
2091 mutex_lock(&llss->inode2->i_mutex);
2092 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2093 mutex_unlock(&llss->inode2->i_mutex);
2105 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2107 struct md_op_data *op_data;
2110 /* Non-root users are forbidden to set or clear flags which are
2111 * NOT defined in HSM_USER_MASK. */
2112 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2113 !cfs_capable(CFS_CAP_SYS_ADMIN))
2116 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2117 LUSTRE_OPC_ANY, hss);
2118 if (IS_ERR(op_data))
2119 RETURN(PTR_ERR(op_data));
2121 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2122 sizeof(*op_data), op_data, NULL);
2124 ll_finish_md_op_data(op_data);
2129 static int ll_hsm_import(struct inode *inode, struct file *file,
2130 struct hsm_user_import *hui)
2132 struct hsm_state_set *hss = NULL;
2133 struct iattr *attr = NULL;
2137 if (!S_ISREG(inode->i_mode))
2143 GOTO(out, rc = -ENOMEM);
2145 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2146 hss->hss_archive_id = hui->hui_archive_id;
2147 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2148 rc = ll_hsm_state_set(inode, hss);
2152 OBD_ALLOC_PTR(attr);
2154 GOTO(out, rc = -ENOMEM);
2156 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2157 attr->ia_mode |= S_IFREG;
2158 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2159 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2160 attr->ia_size = hui->hui_size;
2161 attr->ia_mtime.tv_sec = hui->hui_mtime;
2162 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2163 attr->ia_atime.tv_sec = hui->hui_atime;
2164 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2166 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2167 ATTR_UID | ATTR_GID |
2168 ATTR_MTIME | ATTR_MTIME_SET |
2169 ATTR_ATIME | ATTR_ATIME_SET;
2171 mutex_lock(&inode->i_mutex);
2173 rc = ll_setattr_raw(file->f_dentry, attr, true);
2177 mutex_unlock(&inode->i_mutex);
2189 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2191 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2192 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2196 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2198 struct inode *inode = file->f_dentry->d_inode;
2199 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2203 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2204 PFID(ll_inode2fid(inode)), inode, cmd);
2205 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2207 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2208 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2212 case LL_IOC_GETFLAGS:
2213 /* Get the current value of the file flags */
2214 return put_user(fd->fd_flags, (int __user *)arg);
2215 case LL_IOC_SETFLAGS:
2216 case LL_IOC_CLRFLAGS:
2217 /* Set or clear specific file flags */
2218 /* XXX This probably needs checks to ensure the flags are
2219 * not abused, and to handle any flag side effects.
2221 if (get_user(flags, (int __user *) arg))
2224 if (cmd == LL_IOC_SETFLAGS) {
2225 if ((flags & LL_FILE_IGNORE_LOCK) &&
2226 !(file->f_flags & O_DIRECT)) {
2227 CERROR("%s: unable to disable locking on "
2228 "non-O_DIRECT file\n", current->comm);
2232 fd->fd_flags |= flags;
2234 fd->fd_flags &= ~flags;
2237 case LL_IOC_LOV_SETSTRIPE:
2238 RETURN(ll_lov_setstripe(inode, file, arg));
2239 case LL_IOC_LOV_SETEA:
2240 RETURN(ll_lov_setea(inode, file, arg));
2241 case LL_IOC_LOV_SWAP_LAYOUTS: {
2243 struct lustre_swap_layouts lsl;
2245 if (copy_from_user(&lsl, (char __user *)arg,
2246 sizeof(struct lustre_swap_layouts)))
2249 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2252 file2 = fget(lsl.sl_fd);
2257 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2258 rc = ll_swap_layouts(file, file2, &lsl);
2262 case LL_IOC_LOV_GETSTRIPE:
2263 RETURN(ll_file_getstripe(inode,
2264 (struct lov_user_md __user *)arg));
2265 case FSFILT_IOC_FIEMAP:
2266 RETURN(ll_ioctl_fiemap(inode, (struct fiemap __user *)arg));
2267 case FSFILT_IOC_GETFLAGS:
2268 case FSFILT_IOC_SETFLAGS:
2269 RETURN(ll_iocontrol(inode, file, cmd, arg));
2270 case FSFILT_IOC_GETVERSION_OLD:
2271 case FSFILT_IOC_GETVERSION:
2272 RETURN(put_user(inode->i_generation, (int __user *)arg));
2273 case LL_IOC_GROUP_LOCK:
2274 RETURN(ll_get_grouplock(inode, file, arg));
2275 case LL_IOC_GROUP_UNLOCK:
2276 RETURN(ll_put_grouplock(inode, file, arg));
2277 case IOC_OBD_STATFS:
2278 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2280 /* We need to special case any other ioctls we want to handle,
2281 * to send them to the MDS/OST as appropriate and to properly
2282 * network encode the arg field.
2283 case FSFILT_IOC_SETVERSION_OLD:
2284 case FSFILT_IOC_SETVERSION:
2286 case LL_IOC_FLUSHCTX:
2287 RETURN(ll_flush_ctx(inode));
2288 case LL_IOC_PATH2FID: {
2289 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2290 sizeof(struct lu_fid)))
2295 case LL_IOC_GETPARENT:
2296 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2298 case OBD_IOC_FID2PATH:
2299 RETURN(ll_fid2path(inode, (void __user *)arg));
2300 case LL_IOC_DATA_VERSION: {
2301 struct ioc_data_version idv;
2304 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2307 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2308 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2311 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2317 case LL_IOC_GET_MDTIDX: {
2320 mdtidx = ll_get_mdt_idx(inode);
2324 if (put_user((int)mdtidx, (int __user *)arg))
2329 case OBD_IOC_GETDTNAME:
2330 case OBD_IOC_GETMDNAME:
2331 RETURN(ll_get_obd_name(inode, cmd, arg));
2332 case LL_IOC_HSM_STATE_GET: {
2333 struct md_op_data *op_data;
2334 struct hsm_user_state *hus;
2341 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2342 LUSTRE_OPC_ANY, hus);
2343 if (IS_ERR(op_data)) {
2345 RETURN(PTR_ERR(op_data));
2348 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2351 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2354 ll_finish_md_op_data(op_data);
2358 case LL_IOC_HSM_STATE_SET: {
2359 struct hsm_state_set *hss;
2366 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2371 rc = ll_hsm_state_set(inode, hss);
2376 case LL_IOC_HSM_ACTION: {
2377 struct md_op_data *op_data;
2378 struct hsm_current_action *hca;
2385 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2386 LUSTRE_OPC_ANY, hca);
2387 if (IS_ERR(op_data)) {
2389 RETURN(PTR_ERR(op_data));
2392 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2395 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2398 ll_finish_md_op_data(op_data);
2402 case LL_IOC_SET_LEASE: {
2403 struct ll_inode_info *lli = ll_i2info(inode);
2404 struct obd_client_handle *och = NULL;
2409 case LL_LEASE_WRLCK:
2410 if (!(file->f_mode & FMODE_WRITE))
2412 fmode = FMODE_WRITE;
2414 case LL_LEASE_RDLCK:
2415 if (!(file->f_mode & FMODE_READ))
2419 case LL_LEASE_UNLCK:
2420 mutex_lock(&lli->lli_och_mutex);
2421 if (fd->fd_lease_och != NULL) {
2422 och = fd->fd_lease_och;
2423 fd->fd_lease_och = NULL;
2425 mutex_unlock(&lli->lli_och_mutex);
2430 fmode = och->och_flags;
2431 rc = ll_lease_close(och, inode, &lease_broken);
2438 RETURN(ll_lease_type_from_fmode(fmode));
2443 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2445 /* apply for lease */
2446 och = ll_lease_open(inode, file, fmode, 0);
2448 RETURN(PTR_ERR(och));
2451 mutex_lock(&lli->lli_och_mutex);
2452 if (fd->fd_lease_och == NULL) {
2453 fd->fd_lease_och = och;
2456 mutex_unlock(&lli->lli_och_mutex);
2458 /* impossible now that only excl is supported for now */
2459 ll_lease_close(och, inode, &lease_broken);
2464 case LL_IOC_GET_LEASE: {
2465 struct ll_inode_info *lli = ll_i2info(inode);
2466 struct ldlm_lock *lock = NULL;
2469 mutex_lock(&lli->lli_och_mutex);
2470 if (fd->fd_lease_och != NULL) {
2471 struct obd_client_handle *och = fd->fd_lease_och;
2473 lock = ldlm_handle2lock(&och->och_lease_handle);
2475 lock_res_and_lock(lock);
2476 if (!ldlm_is_cancel(lock))
2477 fmode = och->och_flags;
2479 unlock_res_and_lock(lock);
2480 LDLM_LOCK_PUT(lock);
2483 mutex_unlock(&lli->lli_och_mutex);
2485 RETURN(ll_lease_type_from_fmode(fmode));
2487 case LL_IOC_HSM_IMPORT: {
2488 struct hsm_user_import *hui;
2494 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2499 rc = ll_hsm_import(inode, file, hui);
2509 ll_iocontrol_call(inode, file, cmd, arg, &err))
2512 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2513 (void __user *)arg));
2518 #ifndef HAVE_FILE_LLSEEK_SIZE
2519 static inline loff_t
2520 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2522 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2524 if (offset > maxsize)
2527 if (offset != file->f_pos) {
2528 file->f_pos = offset;
2529 file->f_version = 0;
2535 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2536 loff_t maxsize, loff_t eof)
2538 struct inode *inode = file->f_dentry->d_inode;
2546 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2547 * position-querying operation. Avoid rewriting the "same"
2548 * f_pos value back to the file because a concurrent read(),
2549 * write() or lseek() might have altered it
2554 * f_lock protects against read/modify/write race with other
2555 * SEEK_CURs. Note that parallel writes and reads behave
2558 mutex_lock(&inode->i_mutex);
2559 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2560 mutex_unlock(&inode->i_mutex);
2564 * In the generic case the entire file is data, so as long as
2565 * offset isn't at the end of the file then the offset is data.
2572 * There is a virtual hole at the end of the file, so as long as
2573 * offset isn't i_size or larger, return i_size.
2581 return llseek_execute(file, offset, maxsize);
2585 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2587 struct inode *inode = file->f_dentry->d_inode;
2588 loff_t retval, eof = 0;
2591 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2592 (origin == SEEK_CUR) ? file->f_pos : 0);
2593 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2594 PFID(ll_inode2fid(inode)), inode, retval, retval,
2596 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2598 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2599 retval = ll_glimpse_size(inode);
2602 eof = i_size_read(inode);
2605 retval = ll_generic_file_llseek_size(file, offset, origin,
2606 ll_file_maxbytes(inode), eof);
2610 static int ll_flush(struct file *file, fl_owner_t id)
2612 struct inode *inode = file->f_dentry->d_inode;
2613 struct ll_inode_info *lli = ll_i2info(inode);
2614 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2617 LASSERT(!S_ISDIR(inode->i_mode));
2619 /* catch async errors that were recorded back when async writeback
2620 * failed for pages in this mapping. */
2621 rc = lli->lli_async_rc;
2622 lli->lli_async_rc = 0;
2623 if (lli->lli_clob != NULL) {
2624 err = lov_read_and_clear_async_rc(lli->lli_clob);
2629 /* The application has been told write failure already.
2630 * Do not report failure again. */
2631 if (fd->fd_write_failed)
2633 return rc ? -EIO : 0;
2637 * Called to make sure a portion of file has been written out.
2638 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2640 * Return how many pages have been written.
2642 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2643 enum cl_fsync_mode mode, int ignore_layout)
2645 struct cl_env_nest nest;
2648 struct obd_capa *capa = NULL;
2649 struct cl_fsync_io *fio;
2653 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2654 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2657 env = cl_env_nested_get(&nest);
2659 RETURN(PTR_ERR(env));
2661 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2663 io = ccc_env_thread_io(env);
2664 io->ci_obj = ll_i2info(inode)->lli_clob;
2665 io->ci_ignore_layout = ignore_layout;
2667 /* initialize parameters for sync */
2668 fio = &io->u.ci_fsync;
2669 fio->fi_capa = capa;
2670 fio->fi_start = start;
2672 fio->fi_fid = ll_inode2fid(inode);
2673 fio->fi_mode = mode;
2674 fio->fi_nr_written = 0;
2676 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2677 result = cl_io_loop(env, io);
2679 result = io->ci_result;
2681 result = fio->fi_nr_written;
2682 cl_io_fini(env, io);
2683 cl_env_nested_put(&nest, env);
2691 * When dentry is provided (the 'else' case), *file->f_dentry may be
2692 * null and dentry must be used directly rather than pulled from
2693 * *file->f_dentry as is done otherwise.
2696 #ifdef HAVE_FILE_FSYNC_4ARGS
2697 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2699 struct dentry *dentry = file->f_dentry;
2700 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2701 int ll_fsync(struct file *file, int datasync)
2703 struct dentry *dentry = file->f_dentry;
2705 loff_t end = LLONG_MAX;
2707 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2710 loff_t end = LLONG_MAX;
2712 struct inode *inode = dentry->d_inode;
2713 struct ll_inode_info *lli = ll_i2info(inode);
2714 struct ptlrpc_request *req;
2715 struct obd_capa *oc;
2719 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2720 PFID(ll_inode2fid(inode)), inode);
2721 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2723 #ifdef HAVE_FILE_FSYNC_4ARGS
2724 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2725 mutex_lock(&inode->i_mutex);
2727 /* fsync's caller has already called _fdata{sync,write}, we want
2728 * that IO to finish before calling the osc and mdc sync methods */
2729 rc = filemap_fdatawait(inode->i_mapping);
2732 /* catch async errors that were recorded back when async writeback
2733 * failed for pages in this mapping. */
2734 if (!S_ISDIR(inode->i_mode)) {
2735 err = lli->lli_async_rc;
2736 lli->lli_async_rc = 0;
2739 err = lov_read_and_clear_async_rc(lli->lli_clob);
2744 oc = ll_mdscapa_get(inode);
2745 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2751 ptlrpc_req_finished(req);
2753 if (S_ISREG(inode->i_mode)) {
2754 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2756 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2757 if (rc == 0 && err < 0)
2760 fd->fd_write_failed = true;
2762 fd->fd_write_failed = false;
2765 #ifdef HAVE_FILE_FSYNC_4ARGS
2766 mutex_unlock(&inode->i_mutex);
2772 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2774 struct inode *inode = file->f_dentry->d_inode;
2775 struct ll_sb_info *sbi = ll_i2sbi(inode);
2776 struct ldlm_enqueue_info einfo = {
2777 .ei_type = LDLM_FLOCK,
2778 .ei_cb_cp = ldlm_flock_completion_ast,
2779 .ei_cbdata = file_lock,
2781 struct md_op_data *op_data;
2782 struct lustre_handle lockh = {0};
2783 ldlm_policy_data_t flock = {{0}};
2784 int fl_type = file_lock->fl_type;
2790 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2791 PFID(ll_inode2fid(inode)), file_lock);
2793 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2795 if (file_lock->fl_flags & FL_FLOCK) {
2796 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2797 /* flocks are whole-file locks */
2798 flock.l_flock.end = OFFSET_MAX;
2799 /* For flocks owner is determined by the local file desctiptor*/
2800 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2801 } else if (file_lock->fl_flags & FL_POSIX) {
2802 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2803 flock.l_flock.start = file_lock->fl_start;
2804 flock.l_flock.end = file_lock->fl_end;
2808 flock.l_flock.pid = file_lock->fl_pid;
2810 /* Somewhat ugly workaround for svc lockd.
2811 * lockd installs custom fl_lmops->lm_compare_owner that checks
2812 * for the fl_owner to be the same (which it always is on local node
2813 * I guess between lockd processes) and then compares pid.
2814 * As such we assign pid to the owner field to make it all work,
2815 * conflict with normal locks is unlikely since pid space and
2816 * pointer space for current->files are not intersecting */
2817 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2818 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2822 einfo.ei_mode = LCK_PR;
2825 /* An unlock request may or may not have any relation to
2826 * existing locks so we may not be able to pass a lock handle
2827 * via a normal ldlm_lock_cancel() request. The request may even
2828 * unlock a byte range in the middle of an existing lock. In
2829 * order to process an unlock request we need all of the same
2830 * information that is given with a normal read or write record
2831 * lock request. To avoid creating another ldlm unlock (cancel)
2832 * message we'll treat a LCK_NL flock request as an unlock. */
2833 einfo.ei_mode = LCK_NL;
2836 einfo.ei_mode = LCK_PW;
2839 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2854 flags = LDLM_FL_BLOCK_NOWAIT;
2860 flags = LDLM_FL_TEST_LOCK;
2863 CERROR("unknown fcntl lock command: %d\n", cmd);
2867 /* Save the old mode so that if the mode in the lock changes we
2868 * can decrement the appropriate reader or writer refcount. */
2869 file_lock->fl_type = einfo.ei_mode;
2871 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2872 LUSTRE_OPC_ANY, NULL);
2873 if (IS_ERR(op_data))
2874 RETURN(PTR_ERR(op_data));
2876 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2877 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2878 flock.l_flock.pid, flags, einfo.ei_mode,
2879 flock.l_flock.start, flock.l_flock.end);
2881 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2884 /* Restore the file lock type if not TEST lock. */
2885 if (!(flags & LDLM_FL_TEST_LOCK))
2886 file_lock->fl_type = fl_type;
2888 if ((file_lock->fl_flags & FL_FLOCK) &&
2889 (rc == 0 || file_lock->fl_type == F_UNLCK))
2890 rc2 = flock_lock_file_wait(file, file_lock);
2891 if ((file_lock->fl_flags & FL_POSIX) &&
2892 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2893 !(flags & LDLM_FL_TEST_LOCK))
2894 rc2 = posix_lock_file_wait(file, file_lock);
2896 if (rc2 && file_lock->fl_type != F_UNLCK) {
2897 einfo.ei_mode = LCK_NL;
2898 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2903 ll_finish_md_op_data(op_data);
2908 int ll_get_fid_by_name(struct inode *parent, const char *name,
2909 int namelen, struct lu_fid *fid)
2911 struct md_op_data *op_data = NULL;
2912 struct mdt_body *body;
2913 struct ptlrpc_request *req;
2917 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2918 LUSTRE_OPC_ANY, NULL);
2919 if (IS_ERR(op_data))
2920 RETURN(PTR_ERR(op_data));
2922 op_data->op_valid = OBD_MD_FLID;
2923 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2924 ll_finish_md_op_data(op_data);
2928 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2930 GOTO(out_req, rc = -EFAULT);
2932 *fid = body->mbo_fid1;
2934 ptlrpc_req_finished(req);
2938 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2939 const char *name, int namelen)
2941 struct dentry *dchild = NULL;
2942 struct inode *child_inode = NULL;
2943 struct md_op_data *op_data;
2944 struct ptlrpc_request *request = NULL;
2949 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2950 name, PFID(ll_inode2fid(parent)), mdtidx);
2952 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2953 0, LUSTRE_OPC_ANY, NULL);
2954 if (IS_ERR(op_data))
2955 RETURN(PTR_ERR(op_data));
2957 /* Get child FID first */
2958 qstr.hash = full_name_hash(name, namelen);
2961 dchild = d_lookup(file->f_dentry, &qstr);
2962 if (dchild != NULL) {
2963 if (dchild->d_inode != NULL) {
2964 child_inode = igrab(dchild->d_inode);
2965 if (child_inode != NULL) {
2966 mutex_lock(&child_inode->i_mutex);
2967 op_data->op_fid3 = *ll_inode2fid(child_inode);
2968 ll_invalidate_aliases(child_inode);
2973 rc = ll_get_fid_by_name(parent, name, namelen,
2979 if (!fid_is_sane(&op_data->op_fid3)) {
2980 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
2981 ll_get_fsname(parent->i_sb, NULL, 0), name,
2982 PFID(&op_data->op_fid3));
2983 GOTO(out_free, rc = -EINVAL);
2986 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
2991 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
2992 PFID(&op_data->op_fid3), mdtidx);
2993 GOTO(out_free, rc = 0);
2996 op_data->op_mds = mdtidx;
2997 op_data->op_cli_flags = CLI_MIGRATE;
2998 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
2999 namelen, name, namelen, &request);
3001 ll_update_times(request, parent);
3003 ptlrpc_req_finished(request);
3008 if (child_inode != NULL) {
3009 clear_nlink(child_inode);
3010 mutex_unlock(&child_inode->i_mutex);
3014 ll_finish_md_op_data(op_data);
3019 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3027 * test if some locks matching bits and l_req_mode are acquired
3028 * - bits can be in different locks
3029 * - if found clear the common lock bits in *bits
3030 * - the bits not found, are kept in *bits
3032 * \param bits [IN] searched lock bits [IN]
3033 * \param l_req_mode [IN] searched lock mode
3034 * \retval boolean, true iff all bits are found
3036 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3038 struct lustre_handle lockh;
3039 ldlm_policy_data_t policy;
3040 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3041 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3050 fid = &ll_i2info(inode)->lli_fid;
3051 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3052 ldlm_lockname[mode]);
3054 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3055 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3056 policy.l_inodebits.bits = *bits & (1 << i);
3057 if (policy.l_inodebits.bits == 0)
3060 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3061 &policy, mode, &lockh)) {
3062 struct ldlm_lock *lock;
3064 lock = ldlm_handle2lock(&lockh);
3067 ~(lock->l_policy_data.l_inodebits.bits);
3068 LDLM_LOCK_PUT(lock);
3070 *bits &= ~policy.l_inodebits.bits;
3077 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3078 struct lustre_handle *lockh, __u64 flags,
3081 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3086 fid = &ll_i2info(inode)->lli_fid;
3087 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3089 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3090 fid, LDLM_IBITS, &policy, mode, lockh);
3095 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3097 /* Already unlinked. Just update nlink and return success */
3098 if (rc == -ENOENT) {
3100 /* This path cannot be hit for regular files unless in
3101 * case of obscure races, so no need to to validate
3103 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3105 } else if (rc != 0) {
3106 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3107 "%s: revalidate FID "DFID" error: rc = %d\n",
3108 ll_get_fsname(inode->i_sb, NULL, 0),
3109 PFID(ll_inode2fid(inode)), rc);
3115 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3117 struct inode *inode = dentry->d_inode;
3118 struct ptlrpc_request *req = NULL;
3119 struct obd_export *exp;
3123 LASSERT(inode != NULL);
3125 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3126 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3128 exp = ll_i2mdexp(inode);
3130 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3131 * But under CMD case, it caused some lock issues, should be fixed
3132 * with new CMD ibits lock. See bug 12718 */
3133 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3134 struct lookup_intent oit = { .it_op = IT_GETATTR };
3135 struct md_op_data *op_data;
3137 if (ibits == MDS_INODELOCK_LOOKUP)
3138 oit.it_op = IT_LOOKUP;
3140 /* Call getattr by fid, so do not provide name at all. */
3141 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3142 dentry->d_inode, NULL, 0, 0,
3143 LUSTRE_OPC_ANY, NULL);
3144 if (IS_ERR(op_data))
3145 RETURN(PTR_ERR(op_data));
3147 rc = md_intent_lock(exp, op_data, &oit, &req,
3148 &ll_md_blocking_ast, 0);
3149 ll_finish_md_op_data(op_data);
3151 rc = ll_inode_revalidate_fini(inode, rc);
3155 rc = ll_revalidate_it_finish(req, &oit, dentry);
3157 ll_intent_release(&oit);
3161 /* Unlinked? Unhash dentry, so it is not picked up later by
3162 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3163 here to preserve get_cwd functionality on 2.6.
3165 if (!dentry->d_inode->i_nlink)
3166 d_lustre_invalidate(dentry, 0);
3168 ll_lookup_finish_locks(&oit, dentry);
3169 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3170 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3171 u64 valid = OBD_MD_FLGETATTR;
3172 struct md_op_data *op_data;
3175 if (S_ISREG(inode->i_mode)) {
3176 rc = ll_get_default_mdsize(sbi, &ealen);
3179 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3182 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3183 0, ealen, LUSTRE_OPC_ANY,
3185 if (IS_ERR(op_data))
3186 RETURN(PTR_ERR(op_data));
3188 op_data->op_valid = valid;
3189 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3190 * capa for this inode. Because we only keep capas of dirs
3192 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3193 ll_finish_md_op_data(op_data);
3195 rc = ll_inode_revalidate_fini(inode, rc);
3199 rc = ll_prep_inode(&inode, req, NULL, NULL);
3202 ptlrpc_req_finished(req);
3206 static int ll_merge_md_attr(struct inode *inode)
3208 struct cl_attr attr = { 0 };
3211 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3212 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3213 &attr, ll_md_blocking_ast);
3217 set_nlink(inode, attr.cat_nlink);
3218 inode->i_blocks = attr.cat_blocks;
3219 i_size_write(inode, attr.cat_size);
3221 ll_i2info(inode)->lli_atime = attr.cat_atime;
3222 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3223 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3229 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3231 struct inode *inode = dentry->d_inode;
3235 rc = __ll_inode_revalidate(dentry, ibits);
3239 /* if object isn't regular file, don't validate size */
3240 if (!S_ISREG(inode->i_mode)) {
3241 if (S_ISDIR(inode->i_mode) &&
3242 ll_i2info(inode)->lli_lsm_md != NULL) {
3243 rc = ll_merge_md_attr(inode);
3248 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3249 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3250 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3252 /* In case of restore, the MDT has the right size and has
3253 * already send it back without granting the layout lock,
3254 * inode is up-to-date so glimpse is useless.
3255 * Also to glimpse we need the layout, in case of a running
3256 * restore the MDT holds the layout lock so the glimpse will
3257 * block up to the end of restore (getattr will block)
3259 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3260 rc = ll_glimpse_size(inode);
3265 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3267 struct inode *inode = de->d_inode;
3268 struct ll_sb_info *sbi = ll_i2sbi(inode);
3269 struct ll_inode_info *lli = ll_i2info(inode);
3272 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3273 MDS_INODELOCK_LOOKUP);
3274 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3279 stat->dev = inode->i_sb->s_dev;
3280 if (ll_need_32bit_api(sbi))
3281 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3283 stat->ino = inode->i_ino;
3284 stat->mode = inode->i_mode;
3285 stat->uid = inode->i_uid;
3286 stat->gid = inode->i_gid;
3287 stat->rdev = inode->i_rdev;
3288 stat->atime = inode->i_atime;
3289 stat->mtime = inode->i_mtime;
3290 stat->ctime = inode->i_ctime;
3291 stat->blksize = 1 << inode->i_blkbits;
3293 stat->nlink = inode->i_nlink;
3294 stat->size = i_size_read(inode);
3295 stat->blocks = inode->i_blocks;
3300 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3301 __u64 start, __u64 len)
3305 struct ll_user_fiemap *fiemap;
3306 unsigned int extent_count = fieinfo->fi_extents_max;
3308 num_bytes = sizeof(*fiemap) + (extent_count *
3309 sizeof(struct ll_fiemap_extent));
3310 OBD_ALLOC_LARGE(fiemap, num_bytes);
3315 fiemap->fm_flags = fieinfo->fi_flags;
3316 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3317 fiemap->fm_start = start;
3318 fiemap->fm_length = len;
3319 if (extent_count > 0)
3320 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3321 sizeof(struct ll_fiemap_extent));
3323 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3325 fieinfo->fi_flags = fiemap->fm_flags;
3326 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3327 if (extent_count > 0)
3328 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3329 fiemap->fm_mapped_extents *
3330 sizeof(struct ll_fiemap_extent));
3332 OBD_FREE_LARGE(fiemap, num_bytes);
3336 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3338 struct ll_inode_info *lli = ll_i2info(inode);
3339 struct posix_acl *acl = NULL;
3342 spin_lock(&lli->lli_lock);
3343 /* VFS' acl_permission_check->check_acl will release the refcount */
3344 acl = posix_acl_dup(lli->lli_posix_acl);
3345 spin_unlock(&lli->lli_lock);
3350 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3352 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3353 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3355 ll_check_acl(struct inode *inode, int mask)
3358 # ifdef CONFIG_FS_POSIX_ACL
3359 struct posix_acl *acl;
3363 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3364 if (flags & IPERM_FLAG_RCU)
3367 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3372 rc = posix_acl_permission(inode, acl, mask);
3373 posix_acl_release(acl);
3376 # else /* !CONFIG_FS_POSIX_ACL */
3378 # endif /* CONFIG_FS_POSIX_ACL */
3380 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3382 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3383 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3385 # ifdef HAVE_INODE_PERMISION_2ARGS
3386 int ll_inode_permission(struct inode *inode, int mask)
3388 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3393 struct ll_sb_info *sbi;
3394 struct root_squash_info *squash;
3395 struct cred *cred = NULL;
3396 const struct cred *old_cred = NULL;
3398 bool squash_id = false;
3401 #ifdef MAY_NOT_BLOCK
3402 if (mask & MAY_NOT_BLOCK)
3404 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3405 if (flags & IPERM_FLAG_RCU)
3409 /* as root inode are NOT getting validated in lookup operation,
3410 * need to do it before permission check. */
3412 if (inode == inode->i_sb->s_root->d_inode) {
3413 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3414 MDS_INODELOCK_LOOKUP);
3419 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3420 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3422 /* squash fsuid/fsgid if needed */
3423 sbi = ll_i2sbi(inode);
3424 squash = &sbi->ll_squash;
3425 if (unlikely(squash->rsi_uid != 0 &&
3426 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3427 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3431 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3432 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3433 squash->rsi_uid, squash->rsi_gid);
3435 /* update current process's credentials
3436 * and FS capability */
3437 cred = prepare_creds();
3441 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3442 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3443 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3444 if ((1 << cap) & CFS_CAP_FS_MASK)
3445 cap_lower(cred->cap_effective, cap);
3447 old_cred = override_creds(cred);
3450 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3452 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3453 rc = lustre_check_remote_perm(inode, mask);
3455 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3457 /* restore current process's credentials and FS capability */
3459 revert_creds(old_cred);
3466 /* -o localflock - only provides locally consistent flock locks */
3467 struct file_operations ll_file_operations = {
3468 .read = ll_file_read,
3469 .aio_read = ll_file_aio_read,
3470 .write = ll_file_write,
3471 .aio_write = ll_file_aio_write,
3472 .unlocked_ioctl = ll_file_ioctl,
3473 .open = ll_file_open,
3474 .release = ll_file_release,
3475 .mmap = ll_file_mmap,
3476 .llseek = ll_file_seek,
3477 .splice_read = ll_file_splice_read,
3482 struct file_operations ll_file_operations_flock = {
3483 .read = ll_file_read,
3484 .aio_read = ll_file_aio_read,
3485 .write = ll_file_write,
3486 .aio_write = ll_file_aio_write,
3487 .unlocked_ioctl = ll_file_ioctl,
3488 .open = ll_file_open,
3489 .release = ll_file_release,
3490 .mmap = ll_file_mmap,
3491 .llseek = ll_file_seek,
3492 .splice_read = ll_file_splice_read,
3495 .flock = ll_file_flock,
3496 .lock = ll_file_flock
3499 /* These are for -o noflock - to return ENOSYS on flock calls */
3500 struct file_operations ll_file_operations_noflock = {
3501 .read = ll_file_read,
3502 .aio_read = ll_file_aio_read,
3503 .write = ll_file_write,
3504 .aio_write = ll_file_aio_write,
3505 .unlocked_ioctl = ll_file_ioctl,
3506 .open = ll_file_open,
3507 .release = ll_file_release,
3508 .mmap = ll_file_mmap,
3509 .llseek = ll_file_seek,
3510 .splice_read = ll_file_splice_read,
3513 .flock = ll_file_noflock,
3514 .lock = ll_file_noflock
3517 struct inode_operations ll_file_inode_operations = {
3518 .setattr = ll_setattr,
3519 .getattr = ll_getattr,
3520 .permission = ll_inode_permission,
3521 .setxattr = ll_setxattr,
3522 .getxattr = ll_getxattr,
3523 .listxattr = ll_listxattr,
3524 .removexattr = ll_removexattr,
3525 .fiemap = ll_fiemap,
3526 #ifdef HAVE_IOP_GET_ACL
3527 .get_acl = ll_get_acl,
3531 /* dynamic ioctl number support routins */
3532 static struct llioc_ctl_data {
3533 struct rw_semaphore ioc_sem;
3534 struct list_head ioc_head;
3536 __RWSEM_INITIALIZER(llioc.ioc_sem),
3537 LIST_HEAD_INIT(llioc.ioc_head)
3542 struct list_head iocd_list;
3543 unsigned int iocd_size;
3544 llioc_callback_t iocd_cb;
3545 unsigned int iocd_count;
3546 unsigned int iocd_cmd[0];
3549 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3552 struct llioc_data *in_data = NULL;
3555 if (cb == NULL || cmd == NULL ||
3556 count > LLIOC_MAX_CMD || count < 0)
3559 size = sizeof(*in_data) + count * sizeof(unsigned int);
3560 OBD_ALLOC(in_data, size);
3561 if (in_data == NULL)
3564 memset(in_data, 0, sizeof(*in_data));
3565 in_data->iocd_size = size;
3566 in_data->iocd_cb = cb;
3567 in_data->iocd_count = count;
3568 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3570 down_write(&llioc.ioc_sem);
3571 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3572 up_write(&llioc.ioc_sem);
3577 void ll_iocontrol_unregister(void *magic)
3579 struct llioc_data *tmp;
3584 down_write(&llioc.ioc_sem);
3585 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3587 unsigned int size = tmp->iocd_size;
3589 list_del(&tmp->iocd_list);
3590 up_write(&llioc.ioc_sem);
3592 OBD_FREE(tmp, size);
3596 up_write(&llioc.ioc_sem);
3598 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3601 EXPORT_SYMBOL(ll_iocontrol_register);
3602 EXPORT_SYMBOL(ll_iocontrol_unregister);
3604 static enum llioc_iter
3605 ll_iocontrol_call(struct inode *inode, struct file *file,
3606 unsigned int cmd, unsigned long arg, int *rcp)
3608 enum llioc_iter ret = LLIOC_CONT;
3609 struct llioc_data *data;
3610 int rc = -EINVAL, i;
3612 down_read(&llioc.ioc_sem);
3613 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3614 for (i = 0; i < data->iocd_count; i++) {
3615 if (cmd != data->iocd_cmd[i])
3618 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3622 if (ret == LLIOC_STOP)
3625 up_read(&llioc.ioc_sem);
3632 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3634 struct ll_inode_info *lli = ll_i2info(inode);
3635 struct cl_env_nest nest;
3640 if (lli->lli_clob == NULL)
3643 env = cl_env_nested_get(&nest);
3645 RETURN(PTR_ERR(env));
3647 result = cl_conf_set(env, lli->lli_clob, conf);
3648 cl_env_nested_put(&nest, env);
3650 if (conf->coc_opc == OBJECT_CONF_SET) {
3651 struct ldlm_lock *lock = conf->coc_lock;
3653 LASSERT(lock != NULL);
3654 LASSERT(ldlm_has_layout(lock));
3656 struct lustre_md *md = conf->u.coc_md;
3657 __u32 gen = LL_LAYOUT_GEN_EMPTY;
3659 /* it can only be allowed to match after layout is
3660 * applied to inode otherwise false layout would be
3661 * seen. Applying layout shoud happen before dropping
3662 * the intent lock. */
3663 ldlm_lock_allow_match(lock);
3665 lli->lli_has_smd = lsm_has_objects(md->lsm);
3666 if (md->lsm != NULL)
3667 gen = md->lsm->lsm_layout_gen;
3670 DFID ": layout version change: %u -> %u\n",
3671 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3673 ll_layout_version_set(lli, gen);
3679 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3680 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3683 struct ll_sb_info *sbi = ll_i2sbi(inode);
3684 struct obd_capa *oc;
3685 struct ptlrpc_request *req;
3686 struct mdt_body *body;
3693 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3694 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3695 lock->l_lvb_data, lock->l_lvb_len);
3697 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3700 /* if layout lock was granted right away, the layout is returned
3701 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3702 * blocked and then granted via completion ast, we have to fetch
3703 * layout here. Please note that we can't use the LVB buffer in
3704 * completion AST because it doesn't have a large enough buffer */
3705 oc = ll_mdscapa_get(inode);
3706 rc = ll_get_default_mdsize(sbi, &lmmsize);
3708 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3709 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3715 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3717 GOTO(out, rc = -EPROTO);
3719 lmmsize = body->mbo_eadatasize;
3720 if (lmmsize == 0) /* empty layout */
3723 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3725 GOTO(out, rc = -EFAULT);
3727 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3728 if (lvbdata == NULL)
3729 GOTO(out, rc = -ENOMEM);
3731 memcpy(lvbdata, lmm, lmmsize);
3732 lock_res_and_lock(lock);
3733 if (lock->l_lvb_data != NULL)
3734 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3736 lock->l_lvb_data = lvbdata;
3737 lock->l_lvb_len = lmmsize;
3738 unlock_res_and_lock(lock);
3743 ptlrpc_req_finished(req);
3748 * Apply the layout to the inode. Layout lock is held and will be released
3751 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3752 struct inode *inode, __u32 *gen, bool reconf)
3754 struct ll_inode_info *lli = ll_i2info(inode);
3755 struct ll_sb_info *sbi = ll_i2sbi(inode);
3756 struct ldlm_lock *lock;
3757 struct lustre_md md = { NULL };
3758 struct cl_object_conf conf;
3761 bool wait_layout = false;
3764 LASSERT(lustre_handle_is_used(lockh));
3766 lock = ldlm_handle2lock(lockh);
3767 LASSERT(lock != NULL);
3768 LASSERT(ldlm_has_layout(lock));
3770 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d",
3771 PFID(&lli->lli_fid), inode, reconf);
3773 /* in case this is a caching lock and reinstate with new inode */
3774 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3776 lock_res_and_lock(lock);
3777 lvb_ready = ldlm_is_lvb_ready(lock);
3778 unlock_res_and_lock(lock);
3779 /* checking lvb_ready is racy but this is okay. The worst case is
3780 * that multi processes may configure the file on the same time. */
3782 if (lvb_ready || !reconf) {
3785 /* layout_gen must be valid if layout lock is not
3786 * cancelled and stripe has already set */
3787 *gen = ll_layout_version_get(lli);
3793 rc = ll_layout_fetch(inode, lock);
3797 /* for layout lock, lmm is returned in lock's lvb.
3798 * lvb_data is immutable if the lock is held so it's safe to access it
3799 * without res lock. See the description in ldlm_lock_decref_internal()
3800 * for the condition to free lvb_data of layout lock */
3801 if (lock->l_lvb_data != NULL) {
3802 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3803 lock->l_lvb_data, lock->l_lvb_len);
3805 *gen = LL_LAYOUT_GEN_EMPTY;
3807 *gen = md.lsm->lsm_layout_gen;
3810 CERROR("%s: file "DFID" unpackmd error: %d\n",
3811 ll_get_fsname(inode->i_sb, NULL, 0),
3812 PFID(&lli->lli_fid), rc);
3818 /* set layout to file. Unlikely this will fail as old layout was
3819 * surely eliminated */
3820 memset(&conf, 0, sizeof conf);
3821 conf.coc_opc = OBJECT_CONF_SET;
3822 conf.coc_inode = inode;
3823 conf.coc_lock = lock;
3824 conf.u.coc_md = &md;
3825 rc = ll_layout_conf(inode, &conf);
3828 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3830 /* refresh layout failed, need to wait */
3831 wait_layout = rc == -EBUSY;
3835 LDLM_LOCK_PUT(lock);
3836 ldlm_lock_decref(lockh, mode);
3838 /* wait for IO to complete if it's still being used. */
3840 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3841 ll_get_fsname(inode->i_sb, NULL, 0),
3842 PFID(&lli->lli_fid), inode);
3844 memset(&conf, 0, sizeof conf);
3845 conf.coc_opc = OBJECT_CONF_WAIT;
3846 conf.coc_inode = inode;
3847 rc = ll_layout_conf(inode, &conf);
3851 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3852 ll_get_fsname(inode->i_sb, NULL, 0),
3853 PFID(&lli->lli_fid), rc);
3859 * This function checks if there exists a LAYOUT lock on the client side,
3860 * or enqueues it if it doesn't have one in cache.
3862 * This function will not hold layout lock so it may be revoked any time after
3863 * this function returns. Any operations depend on layout should be redone
3866 * This function should be called before lov_io_init() to get an uptodate
3867 * layout version, the caller should save the version number and after IO
3868 * is finished, this function should be called again to verify that layout
3869 * is not changed during IO time.
3871 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3873 struct ll_inode_info *lli = ll_i2info(inode);
3874 struct ll_sb_info *sbi = ll_i2sbi(inode);
3875 struct md_op_data *op_data;
3876 struct lookup_intent it;
3877 struct lustre_handle lockh;
3879 struct ldlm_enqueue_info einfo = {
3880 .ei_type = LDLM_IBITS,
3882 .ei_cb_bl = &ll_md_blocking_ast,
3883 .ei_cb_cp = &ldlm_completion_ast,
3888 *gen = ll_layout_version_get(lli);
3889 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
3893 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3894 LASSERT(S_ISREG(inode->i_mode));
3896 /* take layout lock mutex to enqueue layout lock exclusively. */
3897 mutex_lock(&lli->lli_layout_mutex);
3900 /* mostly layout lock is caching on the local side, so try to match
3901 * it before grabbing layout lock mutex. */
3902 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3903 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3904 if (mode != 0) { /* hit cached lock */
3905 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3909 mutex_unlock(&lli->lli_layout_mutex);
3913 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3914 0, 0, LUSTRE_OPC_ANY, NULL);
3915 if (IS_ERR(op_data)) {
3916 mutex_unlock(&lli->lli_layout_mutex);
3917 RETURN(PTR_ERR(op_data));
3920 /* have to enqueue one */
3921 memset(&it, 0, sizeof(it));
3922 it.it_op = IT_LAYOUT;
3923 lockh.cookie = 0ULL;
3925 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3926 ll_get_fsname(inode->i_sb, NULL, 0),
3927 PFID(&lli->lli_fid), inode);
3929 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3930 if (it.d.lustre.it_data != NULL)
3931 ptlrpc_req_finished(it.d.lustre.it_data);
3932 it.d.lustre.it_data = NULL;
3934 ll_finish_md_op_data(op_data);
3936 mode = it.d.lustre.it_lock_mode;
3937 it.d.lustre.it_lock_mode = 0;
3938 ll_intent_drop_lock(&it);
3941 /* set lock data in case this is a new lock */
3942 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3943 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3947 mutex_unlock(&lli->lli_layout_mutex);
3953 * This function send a restore request to the MDT
3955 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3957 struct hsm_user_request *hur;
3961 len = sizeof(struct hsm_user_request) +
3962 sizeof(struct hsm_user_item);
3963 OBD_ALLOC(hur, len);
3967 hur->hur_request.hr_action = HUA_RESTORE;
3968 hur->hur_request.hr_archive_id = 0;
3969 hur->hur_request.hr_flags = 0;
3970 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3971 sizeof(hur->hur_user_item[0].hui_fid));
3972 hur->hur_user_item[0].hui_extent.offset = offset;
3973 hur->hur_user_item[0].hui_extent.length = length;
3974 hur->hur_request.hr_itemcount = 1;
3975 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,