4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
50 #include <lustre_ioctl.h>
52 #include "cl_object.h"
55 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
57 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
60 static enum llioc_iter
61 ll_iocontrol_call(struct inode *inode, struct file *file,
62 unsigned int cmd, unsigned long arg, int *rcp);
64 static struct ll_file_data *ll_file_data_get(void)
66 struct ll_file_data *fd;
68 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
72 fd->fd_write_failed = false;
77 static void ll_file_data_put(struct ll_file_data *fd)
80 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
83 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
84 struct lustre_handle *fh)
86 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
87 op_data->op_attr.ia_mode = inode->i_mode;
88 op_data->op_attr.ia_atime = inode->i_atime;
89 op_data->op_attr.ia_mtime = inode->i_mtime;
90 op_data->op_attr.ia_ctime = inode->i_ctime;
91 op_data->op_attr.ia_size = i_size_read(inode);
92 op_data->op_attr_blocks = inode->i_blocks;
93 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
95 op_data->op_handle = *fh;
96 op_data->op_capa1 = ll_mdscapa_get(inode);
98 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
99 op_data->op_bias |= MDS_DATA_MODIFIED;
103 * Packs all the attributes into @op_data for the CLOSE rpc.
105 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
106 struct obd_client_handle *och)
110 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
111 ATTR_MTIME | ATTR_MTIME_SET |
112 ATTR_CTIME | ATTR_CTIME_SET;
114 if (!(och->och_flags & FMODE_WRITE))
117 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
120 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
121 ll_prep_md_op_data(op_data, inode, NULL, NULL,
122 0, 0, LUSTRE_OPC_ANY, NULL);
126 static int ll_close_inode_openhandle(struct obd_export *md_exp,
128 struct obd_client_handle *och,
129 const __u64 *data_version)
131 struct obd_export *exp = ll_i2mdexp(inode);
132 struct md_op_data *op_data;
133 struct ptlrpc_request *req = NULL;
134 struct obd_device *obd = class_exp2obd(exp);
140 * XXX: in case of LMV, is this correct to access
143 CERROR("Invalid MDC connection handle "LPX64"\n",
144 ll_i2mdexp(inode)->exp_handle.h_cookie);
148 OBD_ALLOC_PTR(op_data);
150 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
152 ll_prepare_close(inode, op_data, och);
153 if (data_version != NULL) {
154 /* Pass in data_version implies release. */
155 op_data->op_bias |= MDS_HSM_RELEASE;
156 op_data->op_data_version = *data_version;
157 op_data->op_lease_handle = och->och_lease_handle;
158 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
161 rc = md_close(md_exp, op_data, och->och_mod, &req);
163 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
164 ll_i2mdexp(inode)->exp_obd->obd_name,
165 PFID(ll_inode2fid(inode)), rc);
168 /* DATA_MODIFIED flag was successfully sent on close, cancel data
169 * modification flag. */
170 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
171 struct ll_inode_info *lli = ll_i2info(inode);
173 spin_lock(&lli->lli_lock);
174 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
175 spin_unlock(&lli->lli_lock);
178 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
179 struct mdt_body *body;
180 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
181 if (!(body->mbo_valid & OBD_MD_FLRELEASED))
185 ll_finish_md_op_data(op_data);
189 md_clear_open_replay_data(md_exp, och);
190 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
193 if (req) /* This is close request */
194 ptlrpc_req_finished(req);
198 int ll_md_real_close(struct inode *inode, fmode_t fmode)
200 struct ll_inode_info *lli = ll_i2info(inode);
201 struct obd_client_handle **och_p;
202 struct obd_client_handle *och;
207 if (fmode & FMODE_WRITE) {
208 och_p = &lli->lli_mds_write_och;
209 och_usecount = &lli->lli_open_fd_write_count;
210 } else if (fmode & FMODE_EXEC) {
211 och_p = &lli->lli_mds_exec_och;
212 och_usecount = &lli->lli_open_fd_exec_count;
214 LASSERT(fmode & FMODE_READ);
215 och_p = &lli->lli_mds_read_och;
216 och_usecount = &lli->lli_open_fd_read_count;
219 mutex_lock(&lli->lli_och_mutex);
220 if (*och_usecount > 0) {
221 /* There are still users of this handle, so skip
223 mutex_unlock(&lli->lli_och_mutex);
229 mutex_unlock(&lli->lli_och_mutex);
232 /* There might be a race and this handle may already
234 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
241 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
244 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
245 struct ll_inode_info *lli = ll_i2info(inode);
249 /* clear group lock, if present */
250 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
251 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
253 if (fd->fd_lease_och != NULL) {
256 /* Usually the lease is not released when the
257 * application crashed, we need to release here. */
258 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
259 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
260 PFID(&lli->lli_fid), rc, lease_broken);
262 fd->fd_lease_och = NULL;
265 if (fd->fd_och != NULL) {
266 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
271 /* Let's see if we have good enough OPEN lock on the file and if
272 we can skip talking to MDS */
273 if (file->f_dentry->d_inode) { /* Can this ever be false? */
275 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
276 struct lustre_handle lockh;
277 struct inode *inode = file->f_dentry->d_inode;
278 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
280 mutex_lock(&lli->lli_och_mutex);
281 if (fd->fd_omode & FMODE_WRITE) {
283 LASSERT(lli->lli_open_fd_write_count);
284 lli->lli_open_fd_write_count--;
285 } else if (fd->fd_omode & FMODE_EXEC) {
287 LASSERT(lli->lli_open_fd_exec_count);
288 lli->lli_open_fd_exec_count--;
291 LASSERT(lli->lli_open_fd_read_count);
292 lli->lli_open_fd_read_count--;
294 mutex_unlock(&lli->lli_och_mutex);
296 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
297 LDLM_IBITS, &policy, lockmode,
299 rc = ll_md_real_close(file->f_dentry->d_inode,
303 CERROR("released file has negative dentry: file = %p, "
304 "dentry = %p, name = %s\n",
305 file, file->f_dentry, file->f_dentry->d_name.name);
309 LUSTRE_FPRIVATE(file) = NULL;
310 ll_file_data_put(fd);
311 ll_capa_close(inode);
316 /* While this returns an error code, fput() the caller does not, so we need
317 * to make every effort to clean up all of our state here. Also, applications
318 * rarely check close errors and even if an error is returned they will not
319 * re-try the close call.
321 int ll_file_release(struct inode *inode, struct file *file)
323 struct ll_file_data *fd;
324 struct ll_sb_info *sbi = ll_i2sbi(inode);
325 struct ll_inode_info *lli = ll_i2info(inode);
329 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
330 PFID(ll_inode2fid(inode)), inode);
332 #ifdef CONFIG_FS_POSIX_ACL
333 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
334 inode == inode->i_sb->s_root->d_inode) {
335 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
338 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
339 fd->fd_flags &= ~LL_FILE_RMTACL;
340 rct_del(&sbi->ll_rct, current_pid());
341 et_search_free(&sbi->ll_et, current_pid());
346 if (inode->i_sb->s_root != file->f_dentry)
347 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
348 fd = LUSTRE_FPRIVATE(file);
351 /* The last ref on @file, maybe not the the owner pid of statahead,
352 * because parent and child process can share the same file handle. */
353 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
354 ll_deauthorize_statahead(inode, fd);
356 if (inode->i_sb->s_root == file->f_dentry) {
357 LUSTRE_FPRIVATE(file) = NULL;
358 ll_file_data_put(fd);
362 if (!S_ISDIR(inode->i_mode)) {
363 if (lli->lli_clob != NULL)
364 lov_read_and_clear_async_rc(lli->lli_clob);
365 lli->lli_async_rc = 0;
368 rc = ll_md_close(sbi->ll_md_exp, inode, file);
370 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
371 libcfs_debug_dumplog();
376 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
377 struct lookup_intent *itp)
379 struct dentry *de = file->f_dentry;
380 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
381 struct dentry *parent = de->d_parent;
382 const char *name = NULL;
384 struct md_op_data *op_data;
385 struct ptlrpc_request *req = NULL;
389 LASSERT(parent != NULL);
390 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
392 /* if server supports open-by-fid, or file name is invalid, don't pack
393 * name in open request */
394 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
395 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
396 name = de->d_name.name;
397 len = de->d_name.len;
400 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
401 name, len, 0, LUSTRE_OPC_ANY, NULL);
403 RETURN(PTR_ERR(op_data));
404 op_data->op_data = lmm;
405 op_data->op_data_size = lmmsize;
407 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
408 &ll_md_blocking_ast, 0);
409 ll_finish_md_op_data(op_data);
411 /* reason for keep own exit path - don`t flood log
412 * with messages with -ESTALE errors.
414 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
415 it_open_error(DISP_OPEN_OPEN, itp))
417 ll_release_openhandle(de, itp);
421 if (it_disposition(itp, DISP_LOOKUP_NEG))
422 GOTO(out, rc = -ENOENT);
424 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
425 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
426 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
430 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
431 if (!rc && itp->d.lustre.it_lock_mode)
432 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
435 ptlrpc_req_finished(req);
436 ll_intent_drop_lock(itp);
441 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
442 struct obd_client_handle *och)
444 struct ptlrpc_request *req = it->d.lustre.it_data;
445 struct mdt_body *body;
447 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
448 och->och_fh = body->mbo_handle;
449 och->och_fid = body->mbo_fid1;
450 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
451 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
452 och->och_flags = it->it_flags;
454 return md_set_open_replay_data(md_exp, och, it);
457 static int ll_local_open(struct file *file, struct lookup_intent *it,
458 struct ll_file_data *fd, struct obd_client_handle *och)
460 struct inode *inode = file->f_dentry->d_inode;
463 LASSERT(!LUSTRE_FPRIVATE(file));
470 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
475 LUSTRE_FPRIVATE(file) = fd;
476 ll_readahead_init(inode, &fd->fd_ras);
477 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
479 /* ll_cl_context initialize */
480 rwlock_init(&fd->fd_lock);
481 INIT_LIST_HEAD(&fd->fd_lccs);
486 /* Open a file, and (for the very first open) create objects on the OSTs at
487 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
488 * creation or open until ll_lov_setstripe() ioctl is called.
490 * If we already have the stripe MD locally then we don't request it in
491 * md_open(), by passing a lmm_size = 0.
493 * It is up to the application to ensure no other processes open this file
494 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
495 * used. We might be able to avoid races of that sort by getting lli_open_sem
496 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
497 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
499 int ll_file_open(struct inode *inode, struct file *file)
501 struct ll_inode_info *lli = ll_i2info(inode);
502 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
503 .it_flags = file->f_flags };
504 struct obd_client_handle **och_p = NULL;
505 __u64 *och_usecount = NULL;
506 struct ll_file_data *fd;
510 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
511 PFID(ll_inode2fid(inode)), inode, file->f_flags);
513 it = file->private_data; /* XXX: compat macro */
514 file->private_data = NULL; /* prevent ll_local_open assertion */
516 fd = ll_file_data_get();
518 GOTO(out_openerr, rc = -ENOMEM);
521 if (S_ISDIR(inode->i_mode))
522 ll_authorize_statahead(inode, fd);
524 if (inode->i_sb->s_root == file->f_dentry) {
525 LUSTRE_FPRIVATE(file) = fd;
529 if (!it || !it->d.lustre.it_disposition) {
530 /* Convert f_flags into access mode. We cannot use file->f_mode,
531 * because everything but O_ACCMODE mask was stripped from
533 if ((oit.it_flags + 1) & O_ACCMODE)
535 if (file->f_flags & O_TRUNC)
536 oit.it_flags |= FMODE_WRITE;
538 /* kernel only call f_op->open in dentry_open. filp_open calls
539 * dentry_open after call to open_namei that checks permissions.
540 * Only nfsd_open call dentry_open directly without checking
541 * permissions and because of that this code below is safe. */
542 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
543 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
545 /* We do not want O_EXCL here, presumably we opened the file
546 * already? XXX - NFS implications? */
547 oit.it_flags &= ~O_EXCL;
549 /* bug20584, if "it_flags" contains O_CREAT, the file will be
550 * created if necessary, then "IT_CREAT" should be set to keep
551 * consistent with it */
552 if (oit.it_flags & O_CREAT)
553 oit.it_op |= IT_CREAT;
559 /* Let's see if we have file open on MDS already. */
560 if (it->it_flags & FMODE_WRITE) {
561 och_p = &lli->lli_mds_write_och;
562 och_usecount = &lli->lli_open_fd_write_count;
563 } else if (it->it_flags & FMODE_EXEC) {
564 och_p = &lli->lli_mds_exec_och;
565 och_usecount = &lli->lli_open_fd_exec_count;
567 och_p = &lli->lli_mds_read_och;
568 och_usecount = &lli->lli_open_fd_read_count;
571 mutex_lock(&lli->lli_och_mutex);
572 if (*och_p) { /* Open handle is present */
573 if (it_disposition(it, DISP_OPEN_OPEN)) {
574 /* Well, there's extra open request that we do not need,
575 let's close it somehow. This will decref request. */
576 rc = it_open_error(DISP_OPEN_OPEN, it);
578 mutex_unlock(&lli->lli_och_mutex);
579 GOTO(out_openerr, rc);
582 ll_release_openhandle(file->f_dentry, it);
586 rc = ll_local_open(file, it, fd, NULL);
589 mutex_unlock(&lli->lli_och_mutex);
590 GOTO(out_openerr, rc);
593 LASSERT(*och_usecount == 0);
594 if (!it->d.lustre.it_disposition) {
595 /* We cannot just request lock handle now, new ELC code
596 means that one of other OPEN locks for this file
597 could be cancelled, and since blocking ast handler
598 would attempt to grab och_mutex as well, that would
599 result in a deadlock */
600 mutex_unlock(&lli->lli_och_mutex);
602 * Normally called under two situations:
604 * 2. A race/condition on MDS resulting in no open
605 * handle to be returned from LOOKUP|OPEN request,
606 * for example if the target entry was a symlink.
608 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
610 * Always specify MDS_OPEN_BY_FID because we don't want
611 * to get file with different fid.
613 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
614 rc = ll_intent_file_open(file, NULL, 0, it);
616 GOTO(out_openerr, rc);
620 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
622 GOTO(out_och_free, rc = -ENOMEM);
626 /* md_intent_lock() didn't get a request ref if there was an
627 * open error, so don't do cleanup on the request here
629 /* XXX (green): Should not we bail out on any error here, not
630 * just open error? */
631 rc = it_open_error(DISP_OPEN_OPEN, it);
633 GOTO(out_och_free, rc);
635 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
636 "inode %p: disposition %x, status %d\n", inode,
637 it_disposition(it, ~0), it->d.lustre.it_status);
639 rc = ll_local_open(file, it, fd, *och_p);
641 GOTO(out_och_free, rc);
643 mutex_unlock(&lli->lli_och_mutex);
646 /* Must do this outside lli_och_mutex lock to prevent deadlock where
647 different kind of OPEN lock for this same inode gets cancelled
648 by ldlm_cancel_lru */
649 if (!S_ISREG(inode->i_mode))
650 GOTO(out_och_free, rc);
654 if (!lli->lli_has_smd &&
655 (cl_is_lov_delay_create(file->f_flags) ||
656 (file->f_mode & FMODE_WRITE) == 0)) {
657 CDEBUG(D_INODE, "object creation was delayed\n");
658 GOTO(out_och_free, rc);
660 cl_lov_delay_create_clear(&file->f_flags);
661 GOTO(out_och_free, rc);
665 if (och_p && *och_p) {
666 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
667 *och_p = NULL; /* OBD_FREE writes some magic there */
670 mutex_unlock(&lli->lli_och_mutex);
673 if (lli->lli_opendir_key == fd)
674 ll_deauthorize_statahead(inode, fd);
676 ll_file_data_put(fd);
678 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
681 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
682 ptlrpc_req_finished(it->d.lustre.it_data);
683 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
689 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
690 struct ldlm_lock_desc *desc, void *data, int flag)
693 struct lustre_handle lockh;
697 case LDLM_CB_BLOCKING:
698 ldlm_lock2handle(lock, &lockh);
699 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
701 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
705 case LDLM_CB_CANCELING:
713 * Acquire a lease and open the file.
715 static struct obd_client_handle *
716 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
719 struct lookup_intent it = { .it_op = IT_OPEN };
720 struct ll_sb_info *sbi = ll_i2sbi(inode);
721 struct md_op_data *op_data;
722 struct ptlrpc_request *req = NULL;
723 struct lustre_handle old_handle = { 0 };
724 struct obd_client_handle *och = NULL;
729 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
730 RETURN(ERR_PTR(-EINVAL));
733 struct ll_inode_info *lli = ll_i2info(inode);
734 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
735 struct obd_client_handle **och_p;
738 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
739 RETURN(ERR_PTR(-EPERM));
741 /* Get the openhandle of the file */
743 mutex_lock(&lli->lli_och_mutex);
744 if (fd->fd_lease_och != NULL) {
745 mutex_unlock(&lli->lli_och_mutex);
749 if (fd->fd_och == NULL) {
750 if (file->f_mode & FMODE_WRITE) {
751 LASSERT(lli->lli_mds_write_och != NULL);
752 och_p = &lli->lli_mds_write_och;
753 och_usecount = &lli->lli_open_fd_write_count;
755 LASSERT(lli->lli_mds_read_och != NULL);
756 och_p = &lli->lli_mds_read_och;
757 och_usecount = &lli->lli_open_fd_read_count;
759 if (*och_usecount == 1) {
766 mutex_unlock(&lli->lli_och_mutex);
767 if (rc < 0) /* more than 1 opener */
770 LASSERT(fd->fd_och != NULL);
771 old_handle = fd->fd_och->och_fh;
776 RETURN(ERR_PTR(-ENOMEM));
778 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
779 LUSTRE_OPC_ANY, NULL);
781 GOTO(out, rc = PTR_ERR(op_data));
783 /* To tell the MDT this openhandle is from the same owner */
784 op_data->op_handle = old_handle;
786 it.it_flags = fmode | open_flags;
787 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
788 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
789 &ll_md_blocking_lease_ast,
790 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
791 * it can be cancelled which may mislead applications that the lease is
793 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
794 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
795 * doesn't deal with openhandle, so normal openhandle will be leaked. */
796 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
797 ll_finish_md_op_data(op_data);
798 ptlrpc_req_finished(req);
800 GOTO(out_release_it, rc);
802 if (it_disposition(&it, DISP_LOOKUP_NEG))
803 GOTO(out_release_it, rc = -ENOENT);
805 rc = it_open_error(DISP_OPEN_OPEN, &it);
807 GOTO(out_release_it, rc);
809 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
810 ll_och_fill(sbi->ll_md_exp, &it, och);
812 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
813 GOTO(out_close, rc = -EOPNOTSUPP);
815 /* already get lease, handle lease lock */
816 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
817 if (it.d.lustre.it_lock_mode == 0 ||
818 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
819 /* open lock must return for lease */
820 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
821 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
822 it.d.lustre.it_lock_bits);
823 GOTO(out_close, rc = -EPROTO);
826 ll_intent_release(&it);
830 /* Cancel open lock */
831 if (it.d.lustre.it_lock_mode != 0) {
832 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
833 it.d.lustre.it_lock_mode);
834 it.d.lustre.it_lock_mode = 0;
835 och->och_lease_handle.cookie = 0ULL;
837 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
839 CERROR("%s: error closing file "DFID": %d\n",
840 ll_get_fsname(inode->i_sb, NULL, 0),
841 PFID(&ll_i2info(inode)->lli_fid), rc2);
842 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
844 ll_intent_release(&it);
852 * Release lease and close the file.
853 * It will check if the lease has ever broken.
855 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
858 struct ldlm_lock *lock;
859 bool cancelled = true;
863 lock = ldlm_handle2lock(&och->och_lease_handle);
865 lock_res_and_lock(lock);
866 cancelled = ldlm_is_cancel(lock);
867 unlock_res_and_lock(lock);
871 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
872 PFID(&ll_i2info(inode)->lli_fid), cancelled);
875 ldlm_cli_cancel(&och->och_lease_handle, 0);
876 if (lease_broken != NULL)
877 *lease_broken = cancelled;
879 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
884 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
886 struct ll_inode_info *lli = ll_i2info(inode);
887 struct cl_object *obj = lli->lli_clob;
888 struct cl_attr *attr = ccc_env_thread_attr(env);
896 ll_inode_size_lock(inode);
898 /* merge timestamps the most recently obtained from mds with
899 timestamps obtained from osts */
900 LTIME_S(inode->i_atime) = lli->lli_atime;
901 LTIME_S(inode->i_mtime) = lli->lli_mtime;
902 LTIME_S(inode->i_ctime) = lli->lli_ctime;
904 atime = LTIME_S(inode->i_atime);
905 mtime = LTIME_S(inode->i_mtime);
906 ctime = LTIME_S(inode->i_ctime);
908 cl_object_attr_lock(obj);
909 rc = cl_object_attr_get(env, obj, attr);
910 cl_object_attr_unlock(obj);
913 GOTO(out_size_unlock, rc);
915 if (atime < attr->cat_atime)
916 atime = attr->cat_atime;
918 if (ctime < attr->cat_ctime)
919 ctime = attr->cat_ctime;
921 if (mtime < attr->cat_mtime)
922 mtime = attr->cat_mtime;
924 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
925 PFID(&lli->lli_fid), attr->cat_size);
927 i_size_write(inode, attr->cat_size);
928 inode->i_blocks = attr->cat_blocks;
930 LTIME_S(inode->i_atime) = atime;
931 LTIME_S(inode->i_mtime) = mtime;
932 LTIME_S(inode->i_ctime) = ctime;
935 ll_inode_size_unlock(inode);
940 static bool file_is_noatime(const struct file *file)
942 const struct vfsmount *mnt = file->f_path.mnt;
943 const struct inode *inode = file->f_path.dentry->d_inode;
945 /* Adapted from file_accessed() and touch_atime().*/
946 if (file->f_flags & O_NOATIME)
949 if (inode->i_flags & S_NOATIME)
952 if (IS_NOATIME(inode))
955 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
958 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
961 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
967 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
969 struct inode *inode = file->f_dentry->d_inode;
971 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
973 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
974 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
975 file->f_flags & O_DIRECT ||
978 io->ci_obj = ll_i2info(inode)->lli_clob;
979 io->ci_lockreq = CILR_MAYBE;
980 if (ll_file_nolock(file)) {
981 io->ci_lockreq = CILR_NEVER;
982 io->ci_no_srvlock = 1;
983 } else if (file->f_flags & O_APPEND) {
984 io->ci_lockreq = CILR_MANDATORY;
987 io->ci_noatime = file_is_noatime(file);
991 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
992 struct file *file, enum cl_io_type iot,
993 loff_t *ppos, size_t count)
995 struct inode *inode = file->f_dentry->d_inode;
996 struct ll_inode_info *lli = ll_i2info(inode);
998 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1001 struct range_lock range;
1004 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1005 file->f_dentry->d_name.name, iot, *ppos, count);
1008 io = ccc_env_thread_io(env);
1009 ll_io_init(io, file, iot == CIT_WRITE);
1011 /* The maximum Lustre file size is variable, based on the
1012 * OST maximum object size and number of stripes. This
1013 * needs another check in addition to the VFS checks earlier. */
1014 end = (io->u.ci_wr.wr_append ? i_size_read(inode) : *ppos) + count;
1015 if (end > ll_file_maxbytes(inode)) {
1017 CDEBUG(D_INODE, "%s: file "DFID" offset %llu > maxbytes "LPU64
1018 ": rc = %zd\n", ll_get_fsname(inode->i_sb, NULL, 0),
1019 PFID(&lli->lli_fid), end, ll_file_maxbytes(inode),
1024 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1025 struct vvp_io *vio = vvp_env_io(env);
1026 bool range_locked = false;
1028 if (file->f_flags & O_APPEND)
1029 range_lock_init(&range, 0, LUSTRE_EOF);
1031 range_lock_init(&range, *ppos, *ppos + count - 1);
1033 vio->vui_fd = LUSTRE_FPRIVATE(file);
1034 vio->vui_io_subtype = args->via_io_subtype;
1036 switch (vio->vui_io_subtype) {
1038 vio->vui_iov = args->u.normal.via_iov;
1039 vio->vui_nrsegs = args->u.normal.via_nrsegs;
1040 vio->vui_tot_nrsegs = vio->vui_nrsegs;
1041 vio->vui_iocb = args->u.normal.via_iocb;
1042 if ((iot == CIT_WRITE) &&
1043 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1044 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1046 result = range_lock(&lli->lli_write_tree,
1051 range_locked = true;
1053 down_read(&lli->lli_trunc_sem);
1056 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1057 vio->u.splice.vui_flags = args->u.splice.via_flags;
1060 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1064 ll_cl_add(file, env, io);
1065 result = cl_io_loop(env, io);
1066 ll_cl_remove(file, env);
1068 if (args->via_io_subtype == IO_NORMAL)
1069 up_read(&lli->lli_trunc_sem);
1071 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1073 range_unlock(&lli->lli_write_tree, &range);
1076 /* cl_io_rw_init() handled IO */
1077 result = io->ci_result;
1080 if (io->ci_nob > 0) {
1081 result = io->ci_nob;
1082 *ppos = io->u.ci_wr.wr.crw_pos;
1086 cl_io_fini(env, io);
1087 /* If any bit been read/written (result != 0), we just return
1088 * short read/write instead of restart io. */
1089 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1090 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zu\n",
1091 iot == CIT_READ ? "read" : "write",
1092 file->f_dentry->d_name.name, *ppos, count);
1093 LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
1097 if (iot == CIT_READ) {
1099 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1100 LPROC_LL_READ_BYTES, result);
1101 } else if (iot == CIT_WRITE) {
1103 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1104 LPROC_LL_WRITE_BYTES, result);
1105 fd->fd_write_failed = false;
1106 } else if (result != -ERESTARTSYS) {
1107 fd->fd_write_failed = true;
1110 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1117 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1119 static int ll_file_get_iov_count(const struct iovec *iov,
1120 unsigned long *nr_segs, size_t *count)
1125 for (seg = 0; seg < *nr_segs; seg++) {
1126 const struct iovec *iv = &iov[seg];
1129 * If any segment has a negative length, or the cumulative
1130 * length ever wraps negative then return -EINVAL.
1133 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1135 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1140 cnt -= iv->iov_len; /* This segment is no good */
1147 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1148 unsigned long nr_segs, loff_t pos)
1151 struct vvp_io_args *args;
1157 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1161 env = cl_env_get(&refcheck);
1163 RETURN(PTR_ERR(env));
1165 args = vvp_env_args(env, IO_NORMAL);
1166 args->u.normal.via_iov = (struct iovec *)iov;
1167 args->u.normal.via_nrsegs = nr_segs;
1168 args->u.normal.via_iocb = iocb;
1170 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1171 &iocb->ki_pos, count);
1172 cl_env_put(env, &refcheck);
1176 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1180 struct iovec *local_iov;
1181 struct kiocb *kiocb;
1186 env = cl_env_get(&refcheck);
1188 RETURN(PTR_ERR(env));
1190 local_iov = &vvp_env_info(env)->vti_local_iov;
1191 kiocb = &vvp_env_info(env)->vti_kiocb;
1192 local_iov->iov_base = (void __user *)buf;
1193 local_iov->iov_len = count;
1194 init_sync_kiocb(kiocb, file);
1195 kiocb->ki_pos = *ppos;
1196 #ifdef HAVE_KIOCB_KI_LEFT
1197 kiocb->ki_left = count;
1199 kiocb->ki_nbytes = count;
1202 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1203 *ppos = kiocb->ki_pos;
1205 cl_env_put(env, &refcheck);
1210 * Write to a file (through the page cache).
1213 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1214 unsigned long nr_segs, loff_t pos)
1217 struct vvp_io_args *args;
1223 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1227 env = cl_env_get(&refcheck);
1229 RETURN(PTR_ERR(env));
1231 args = vvp_env_args(env, IO_NORMAL);
1232 args->u.normal.via_iov = (struct iovec *)iov;
1233 args->u.normal.via_nrsegs = nr_segs;
1234 args->u.normal.via_iocb = iocb;
1236 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1237 &iocb->ki_pos, count);
1238 cl_env_put(env, &refcheck);
1242 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1243 size_t count, loff_t *ppos)
1246 struct iovec *local_iov;
1247 struct kiocb *kiocb;
1252 env = cl_env_get(&refcheck);
1254 RETURN(PTR_ERR(env));
1256 local_iov = &vvp_env_info(env)->vti_local_iov;
1257 kiocb = &vvp_env_info(env)->vti_kiocb;
1258 local_iov->iov_base = (void __user *)buf;
1259 local_iov->iov_len = count;
1260 init_sync_kiocb(kiocb, file);
1261 kiocb->ki_pos = *ppos;
1262 #ifdef HAVE_KIOCB_KI_LEFT
1263 kiocb->ki_left = count;
1265 kiocb->ki_nbytes = count;
1268 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1269 *ppos = kiocb->ki_pos;
1271 cl_env_put(env, &refcheck);
1276 * Send file content (through pagecache) somewhere with helper
1278 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1279 struct pipe_inode_info *pipe, size_t count,
1283 struct vvp_io_args *args;
1288 env = cl_env_get(&refcheck);
1290 RETURN(PTR_ERR(env));
1292 args = vvp_env_args(env, IO_SPLICE);
1293 args->u.splice.via_pipe = pipe;
1294 args->u.splice.via_flags = flags;
1296 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1297 cl_env_put(env, &refcheck);
1301 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1302 __u64 flags, struct lov_user_md *lum,
1305 struct lov_stripe_md *lsm = NULL;
1306 struct lookup_intent oit = {
1308 .it_flags = flags | MDS_OPEN_BY_FID,
1313 lsm = ccc_inode_lsm_get(inode);
1315 ccc_inode_lsm_put(inode, lsm);
1316 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1317 PFID(ll_inode2fid(inode)));
1318 GOTO(out, rc = -EEXIST);
1321 ll_inode_size_lock(inode);
1322 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1324 GOTO(out_unlock, rc);
1326 rc = oit.d.lustre.it_status;
1328 GOTO(out_unlock, rc);
1330 ll_release_openhandle(file->f_dentry, &oit);
1333 ll_inode_size_unlock(inode);
1334 ll_intent_release(&oit);
1335 ccc_inode_lsm_put(inode, lsm);
1337 cl_lov_delay_create_clear(&file->f_flags);
1342 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1343 struct lov_mds_md **lmmp, int *lmm_size,
1344 struct ptlrpc_request **request)
1346 struct ll_sb_info *sbi = ll_i2sbi(inode);
1347 struct mdt_body *body;
1348 struct lov_mds_md *lmm = NULL;
1349 struct ptlrpc_request *req = NULL;
1350 struct md_op_data *op_data;
1353 rc = ll_get_default_mdsize(sbi, &lmmsize);
1357 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1358 strlen(filename), lmmsize,
1359 LUSTRE_OPC_ANY, NULL);
1360 if (IS_ERR(op_data))
1361 RETURN(PTR_ERR(op_data));
1363 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1364 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1365 ll_finish_md_op_data(op_data);
1367 CDEBUG(D_INFO, "md_getattr_name failed "
1368 "on %s: rc %d\n", filename, rc);
1372 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1373 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1375 lmmsize = body->mbo_eadatasize;
1377 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1379 GOTO(out, rc = -ENODATA);
1382 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1383 LASSERT(lmm != NULL);
1385 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1386 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1387 GOTO(out, rc = -EPROTO);
1391 * This is coming from the MDS, so is probably in
1392 * little endian. We convert it to host endian before
1393 * passing it to userspace.
1395 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1398 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1399 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1402 /* if function called for directory - we should
1403 * avoid swab not existent lsm objects */
1404 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1405 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1406 if (S_ISREG(body->mbo_mode))
1407 lustre_swab_lov_user_md_objects(
1408 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1410 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1411 lustre_swab_lov_user_md_v3(
1412 (struct lov_user_md_v3 *)lmm);
1413 if (S_ISREG(body->mbo_mode))
1414 lustre_swab_lov_user_md_objects(
1415 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1422 *lmm_size = lmmsize;
1427 static int ll_lov_setea(struct inode *inode, struct file *file,
1430 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1431 struct lov_user_md *lump;
1432 int lum_size = sizeof(struct lov_user_md) +
1433 sizeof(struct lov_user_ost_data);
1437 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1440 OBD_ALLOC_LARGE(lump, lum_size);
1444 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1445 OBD_FREE_LARGE(lump, lum_size);
1449 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1451 OBD_FREE_LARGE(lump, lum_size);
1455 static int ll_file_getstripe(struct inode *inode,
1456 struct lov_user_md __user *lum)
1463 env = cl_env_get(&refcheck);
1465 RETURN(PTR_ERR(env));
1467 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1468 cl_env_put(env, &refcheck);
1472 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1475 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1476 struct lov_user_md *klum;
1478 __u64 flags = FMODE_WRITE;
1481 rc = ll_copy_user_md(lum, &klum);
1486 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1490 put_user(0, &lum->lmm_stripe_count);
1492 ll_layout_refresh(inode, &gen);
1493 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1496 OBD_FREE(klum, lum_size);
1501 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1503 struct ll_inode_info *lli = ll_i2info(inode);
1504 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1505 struct ccc_grouplock grouplock;
1510 CWARN("group id for group lock must not be 0\n");
1514 if (ll_file_nolock(file))
1515 RETURN(-EOPNOTSUPP);
1517 spin_lock(&lli->lli_lock);
1518 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1519 CWARN("group lock already existed with gid %lu\n",
1520 fd->fd_grouplock.cg_gid);
1521 spin_unlock(&lli->lli_lock);
1524 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1525 spin_unlock(&lli->lli_lock);
1527 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1528 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1532 spin_lock(&lli->lli_lock);
1533 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1534 spin_unlock(&lli->lli_lock);
1535 CERROR("another thread just won the race\n");
1536 cl_put_grouplock(&grouplock);
1540 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1541 fd->fd_grouplock = grouplock;
1542 spin_unlock(&lli->lli_lock);
1544 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1548 static int ll_put_grouplock(struct inode *inode, struct file *file,
1551 struct ll_inode_info *lli = ll_i2info(inode);
1552 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1553 struct ccc_grouplock grouplock;
1556 spin_lock(&lli->lli_lock);
1557 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1558 spin_unlock(&lli->lli_lock);
1559 CWARN("no group lock held\n");
1562 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1564 if (fd->fd_grouplock.cg_gid != arg) {
1565 CWARN("group lock %lu doesn't match current id %lu\n",
1566 arg, fd->fd_grouplock.cg_gid);
1567 spin_unlock(&lli->lli_lock);
1571 grouplock = fd->fd_grouplock;
1572 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1573 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1574 spin_unlock(&lli->lli_lock);
1576 cl_put_grouplock(&grouplock);
1577 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1582 * Close inode open handle
1584 * \param dentry [in] dentry which contains the inode
1585 * \param it [in,out] intent which contains open info and result
1588 * \retval <0 failure
1590 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1592 struct inode *inode = dentry->d_inode;
1593 struct obd_client_handle *och;
1599 /* Root ? Do nothing. */
1600 if (dentry->d_inode->i_sb->s_root == dentry)
1603 /* No open handle to close? Move away */
1604 if (!it_disposition(it, DISP_OPEN_OPEN))
1607 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1609 OBD_ALLOC(och, sizeof(*och));
1611 GOTO(out, rc = -ENOMEM);
1613 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1615 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1618 /* this one is in place of ll_file_open */
1619 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1620 ptlrpc_req_finished(it->d.lustre.it_data);
1621 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1627 * Get size for inode for which FIEMAP mapping is requested.
1628 * Make the FIEMAP get_info call and returns the result.
1629 * \param fiemap kernel buffer to hold extens
1630 * \param num_bytes kernel buffer size
1632 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1638 struct ll_fiemap_info_key fmkey = { .name = KEY_FIEMAP, };
1641 /* Checks for fiemap flags */
1642 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1643 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1647 /* Check for FIEMAP_FLAG_SYNC */
1648 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1649 rc = filemap_fdatawrite(inode->i_mapping);
1654 env = cl_env_get(&refcheck);
1656 RETURN(PTR_ERR(env));
1658 if (i_size_read(inode) == 0) {
1659 rc = ll_glimpse_size(inode);
1664 fmkey.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1665 obdo_from_inode(&fmkey.oa, inode, OBD_MD_FLSIZE);
1666 obdo_set_parent_fid(&fmkey.oa, &ll_i2info(inode)->lli_fid);
1668 /* If filesize is 0, then there would be no objects for mapping */
1669 if (fmkey.oa.o_size == 0) {
1670 fiemap->fm_mapped_extents = 0;
1674 fmkey.fiemap = *fiemap;
1676 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1677 &fmkey, fiemap, &num_bytes);
1679 cl_env_put(env, &refcheck);
1683 int ll_fid2path(struct inode *inode, void __user *arg)
1685 struct obd_export *exp = ll_i2mdexp(inode);
1686 const struct getinfo_fid2path __user *gfin = arg;
1688 struct getinfo_fid2path *gfout;
1694 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1695 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1698 /* Only need to get the buflen */
1699 if (get_user(pathlen, &gfin->gf_pathlen))
1702 if (pathlen > PATH_MAX)
1705 outsize = sizeof(*gfout) + pathlen;
1706 OBD_ALLOC(gfout, outsize);
1710 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1711 GOTO(gf_free, rc = -EFAULT);
1713 /* Call mdc_iocontrol */
1714 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1718 if (copy_to_user(arg, gfout, outsize))
1722 OBD_FREE(gfout, outsize);
1726 static int ll_ioctl_fiemap(struct inode *inode, struct fiemap __user *arg)
1728 struct fiemap *fiemap;
1734 /* Get the extent count so we can calculate the size of
1735 * required fiemap buffer */
1736 if (get_user(extent_count, &arg->fm_extent_count))
1740 (SIZE_MAX - sizeof(*fiemap)) / sizeof(struct ll_fiemap_extent))
1742 num_bytes = sizeof(*fiemap) + (extent_count *
1743 sizeof(struct ll_fiemap_extent));
1745 OBD_ALLOC_LARGE(fiemap, num_bytes);
1749 /* get the fiemap value */
1750 if (copy_from_user(fiemap, arg, sizeof(*fiemap)))
1751 GOTO(error, rc = -EFAULT);
1753 /* If fm_extent_count is non-zero, read the first extent since
1754 * it is used to calculate end_offset and device from previous
1756 if (extent_count != 0) {
1757 if (copy_from_user(&fiemap->fm_extents[0],
1758 (char __user *)arg + sizeof(*fiemap),
1759 sizeof(struct ll_fiemap_extent)))
1760 GOTO(error, rc = -EFAULT);
1763 rc = ll_do_fiemap(inode, fiemap, num_bytes);
1767 ret_bytes = sizeof(struct fiemap);
1769 if (extent_count != 0)
1770 ret_bytes += (fiemap->fm_mapped_extents *
1771 sizeof(struct ll_fiemap_extent));
1773 if (copy_to_user((void __user *)arg, fiemap, ret_bytes))
1777 OBD_FREE_LARGE(fiemap, num_bytes);
1782 * Read the data_version for inode.
1784 * This value is computed using stripe object version on OST.
1785 * Version is computed using server side locking.
1787 * @param flags if do sync on the OST side;
1789 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1790 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1792 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1799 /* If no file object initialized, we consider its version is 0. */
1800 if (ll_i2info(inode)->lli_clob == NULL) {
1805 env = cl_env_get(&refcheck);
1807 RETURN(PTR_ERR(env));
1809 rc = cl_object_data_version(env, ll_i2info(inode)->lli_clob,
1810 data_version, flags);
1811 cl_env_put(env, &refcheck);
1816 * Trigger a HSM release request for the provided inode.
1818 int ll_hsm_release(struct inode *inode)
1820 struct cl_env_nest nest;
1822 struct obd_client_handle *och = NULL;
1823 __u64 data_version = 0;
1827 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1828 ll_get_fsname(inode->i_sb, NULL, 0),
1829 PFID(&ll_i2info(inode)->lli_fid));
1831 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1833 GOTO(out, rc = PTR_ERR(och));
1835 /* Grab latest data_version and [am]time values */
1836 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1840 env = cl_env_nested_get(&nest);
1842 GOTO(out, rc = PTR_ERR(env));
1844 ll_merge_attr(env, inode);
1845 cl_env_nested_put(&nest, env);
1847 /* Release the file.
1848 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1849 * we still need it to pack l_remote_handle to MDT. */
1850 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1856 if (och != NULL && !IS_ERR(och)) /* close the file */
1857 ll_lease_close(och, inode, NULL);
1862 struct ll_swap_stack {
1863 struct iattr ia1, ia2;
1865 struct inode *inode1, *inode2;
1866 bool check_dv1, check_dv2;
1869 static int ll_swap_layouts(struct file *file1, struct file *file2,
1870 struct lustre_swap_layouts *lsl)
1872 struct mdc_swap_layouts msl;
1873 struct md_op_data *op_data;
1876 struct ll_swap_stack *llss = NULL;
1879 OBD_ALLOC_PTR(llss);
1883 llss->inode1 = file1->f_dentry->d_inode;
1884 llss->inode2 = file2->f_dentry->d_inode;
1886 if (!S_ISREG(llss->inode2->i_mode))
1887 GOTO(free, rc = -EINVAL);
1889 if (inode_permission(llss->inode1, MAY_WRITE) ||
1890 inode_permission(llss->inode2, MAY_WRITE))
1891 GOTO(free, rc = -EPERM);
1893 if (llss->inode2->i_sb != llss->inode1->i_sb)
1894 GOTO(free, rc = -EXDEV);
1896 /* we use 2 bool because it is easier to swap than 2 bits */
1897 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1898 llss->check_dv1 = true;
1900 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1901 llss->check_dv2 = true;
1903 /* we cannot use lsl->sl_dvX directly because we may swap them */
1904 llss->dv1 = lsl->sl_dv1;
1905 llss->dv2 = lsl->sl_dv2;
1907 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1908 if (rc == 0) /* same file, done! */
1911 if (rc < 0) { /* sequentialize it */
1912 swap(llss->inode1, llss->inode2);
1914 swap(llss->dv1, llss->dv2);
1915 swap(llss->check_dv1, llss->check_dv2);
1919 if (gid != 0) { /* application asks to flush dirty cache */
1920 rc = ll_get_grouplock(llss->inode1, file1, gid);
1924 rc = ll_get_grouplock(llss->inode2, file2, gid);
1926 ll_put_grouplock(llss->inode1, file1, gid);
1931 /* to be able to restore mtime and atime after swap
1932 * we need to first save them */
1934 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
1935 llss->ia1.ia_mtime = llss->inode1->i_mtime;
1936 llss->ia1.ia_atime = llss->inode1->i_atime;
1937 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
1938 llss->ia2.ia_mtime = llss->inode2->i_mtime;
1939 llss->ia2.ia_atime = llss->inode2->i_atime;
1940 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
1943 /* ultimate check, before swaping the layouts we check if
1944 * dataversion has changed (if requested) */
1945 if (llss->check_dv1) {
1946 rc = ll_data_version(llss->inode1, &dv, 0);
1949 if (dv != llss->dv1)
1950 GOTO(putgl, rc = -EAGAIN);
1953 if (llss->check_dv2) {
1954 rc = ll_data_version(llss->inode2, &dv, 0);
1957 if (dv != llss->dv2)
1958 GOTO(putgl, rc = -EAGAIN);
1961 /* struct md_op_data is used to send the swap args to the mdt
1962 * only flags is missing, so we use struct mdc_swap_layouts
1963 * through the md_op_data->op_data */
1964 /* flags from user space have to be converted before they are send to
1965 * server, no flag is sent today, they are only used on the client */
1968 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
1969 0, LUSTRE_OPC_ANY, &msl);
1970 if (IS_ERR(op_data))
1971 GOTO(free, rc = PTR_ERR(op_data));
1973 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
1974 sizeof(*op_data), op_data, NULL);
1975 ll_finish_md_op_data(op_data);
1979 ll_put_grouplock(llss->inode2, file2, gid);
1980 ll_put_grouplock(llss->inode1, file1, gid);
1983 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
1987 /* clear useless flags */
1988 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
1989 llss->ia1.ia_valid &= ~ATTR_MTIME;
1990 llss->ia2.ia_valid &= ~ATTR_MTIME;
1993 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
1994 llss->ia1.ia_valid &= ~ATTR_ATIME;
1995 llss->ia2.ia_valid &= ~ATTR_ATIME;
1998 /* update time if requested */
2000 if (llss->ia2.ia_valid != 0) {
2001 mutex_lock(&llss->inode1->i_mutex);
2002 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2003 mutex_unlock(&llss->inode1->i_mutex);
2006 if (llss->ia1.ia_valid != 0) {
2009 mutex_lock(&llss->inode2->i_mutex);
2010 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2011 mutex_unlock(&llss->inode2->i_mutex);
2023 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2025 struct md_op_data *op_data;
2029 /* Detect out-of range masks */
2030 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2033 /* Non-root users are forbidden to set or clear flags which are
2034 * NOT defined in HSM_USER_MASK. */
2035 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2036 !cfs_capable(CFS_CAP_SYS_ADMIN))
2039 /* Detect out-of range archive id */
2040 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2041 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2044 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2045 LUSTRE_OPC_ANY, hss);
2046 if (IS_ERR(op_data))
2047 RETURN(PTR_ERR(op_data));
2049 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2050 sizeof(*op_data), op_data, NULL);
2052 ll_finish_md_op_data(op_data);
2057 static int ll_hsm_import(struct inode *inode, struct file *file,
2058 struct hsm_user_import *hui)
2060 struct hsm_state_set *hss = NULL;
2061 struct iattr *attr = NULL;
2065 if (!S_ISREG(inode->i_mode))
2071 GOTO(out, rc = -ENOMEM);
2073 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2074 hss->hss_archive_id = hui->hui_archive_id;
2075 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2076 rc = ll_hsm_state_set(inode, hss);
2080 OBD_ALLOC_PTR(attr);
2082 GOTO(out, rc = -ENOMEM);
2084 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2085 attr->ia_mode |= S_IFREG;
2086 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2087 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2088 attr->ia_size = hui->hui_size;
2089 attr->ia_mtime.tv_sec = hui->hui_mtime;
2090 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2091 attr->ia_atime.tv_sec = hui->hui_atime;
2092 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2094 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2095 ATTR_UID | ATTR_GID |
2096 ATTR_MTIME | ATTR_MTIME_SET |
2097 ATTR_ATIME | ATTR_ATIME_SET;
2099 mutex_lock(&inode->i_mutex);
2101 rc = ll_setattr_raw(file->f_dentry, attr, true);
2105 mutex_unlock(&inode->i_mutex);
2117 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2119 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2120 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2124 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2126 struct inode *inode = file->f_dentry->d_inode;
2127 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2131 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2132 PFID(ll_inode2fid(inode)), inode, cmd);
2133 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2135 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2136 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2140 case LL_IOC_GETFLAGS:
2141 /* Get the current value of the file flags */
2142 return put_user(fd->fd_flags, (int __user *)arg);
2143 case LL_IOC_SETFLAGS:
2144 case LL_IOC_CLRFLAGS:
2145 /* Set or clear specific file flags */
2146 /* XXX This probably needs checks to ensure the flags are
2147 * not abused, and to handle any flag side effects.
2149 if (get_user(flags, (int __user *) arg))
2152 if (cmd == LL_IOC_SETFLAGS) {
2153 if ((flags & LL_FILE_IGNORE_LOCK) &&
2154 !(file->f_flags & O_DIRECT)) {
2155 CERROR("%s: unable to disable locking on "
2156 "non-O_DIRECT file\n", current->comm);
2160 fd->fd_flags |= flags;
2162 fd->fd_flags &= ~flags;
2165 case LL_IOC_LOV_SETSTRIPE:
2166 RETURN(ll_lov_setstripe(inode, file, arg));
2167 case LL_IOC_LOV_SETEA:
2168 RETURN(ll_lov_setea(inode, file, arg));
2169 case LL_IOC_LOV_SWAP_LAYOUTS: {
2171 struct lustre_swap_layouts lsl;
2173 if (copy_from_user(&lsl, (char __user *)arg,
2174 sizeof(struct lustre_swap_layouts)))
2177 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2180 file2 = fget(lsl.sl_fd);
2185 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2186 rc = ll_swap_layouts(file, file2, &lsl);
2190 case LL_IOC_LOV_GETSTRIPE:
2191 RETURN(ll_file_getstripe(inode,
2192 (struct lov_user_md __user *)arg));
2193 case FSFILT_IOC_FIEMAP:
2194 RETURN(ll_ioctl_fiemap(inode, (struct fiemap __user *)arg));
2195 case FSFILT_IOC_GETFLAGS:
2196 case FSFILT_IOC_SETFLAGS:
2197 RETURN(ll_iocontrol(inode, file, cmd, arg));
2198 case FSFILT_IOC_GETVERSION_OLD:
2199 case FSFILT_IOC_GETVERSION:
2200 RETURN(put_user(inode->i_generation, (int __user *)arg));
2201 case LL_IOC_GROUP_LOCK:
2202 RETURN(ll_get_grouplock(inode, file, arg));
2203 case LL_IOC_GROUP_UNLOCK:
2204 RETURN(ll_put_grouplock(inode, file, arg));
2205 case IOC_OBD_STATFS:
2206 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2208 /* We need to special case any other ioctls we want to handle,
2209 * to send them to the MDS/OST as appropriate and to properly
2210 * network encode the arg field.
2211 case FSFILT_IOC_SETVERSION_OLD:
2212 case FSFILT_IOC_SETVERSION:
2214 case LL_IOC_FLUSHCTX:
2215 RETURN(ll_flush_ctx(inode));
2216 case LL_IOC_PATH2FID: {
2217 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2218 sizeof(struct lu_fid)))
2223 case LL_IOC_GETPARENT:
2224 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2226 case OBD_IOC_FID2PATH:
2227 RETURN(ll_fid2path(inode, (void __user *)arg));
2228 case LL_IOC_DATA_VERSION: {
2229 struct ioc_data_version idv;
2232 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2235 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2236 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2239 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2245 case LL_IOC_GET_MDTIDX: {
2248 mdtidx = ll_get_mdt_idx(inode);
2252 if (put_user((int)mdtidx, (int __user *)arg))
2257 case OBD_IOC_GETDTNAME:
2258 case OBD_IOC_GETMDNAME:
2259 RETURN(ll_get_obd_name(inode, cmd, arg));
2260 case LL_IOC_HSM_STATE_GET: {
2261 struct md_op_data *op_data;
2262 struct hsm_user_state *hus;
2269 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2270 LUSTRE_OPC_ANY, hus);
2271 if (IS_ERR(op_data)) {
2273 RETURN(PTR_ERR(op_data));
2276 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2279 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2282 ll_finish_md_op_data(op_data);
2286 case LL_IOC_HSM_STATE_SET: {
2287 struct hsm_state_set *hss;
2294 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2299 rc = ll_hsm_state_set(inode, hss);
2304 case LL_IOC_HSM_ACTION: {
2305 struct md_op_data *op_data;
2306 struct hsm_current_action *hca;
2313 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2314 LUSTRE_OPC_ANY, hca);
2315 if (IS_ERR(op_data)) {
2317 RETURN(PTR_ERR(op_data));
2320 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2323 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2326 ll_finish_md_op_data(op_data);
2330 case LL_IOC_SET_LEASE: {
2331 struct ll_inode_info *lli = ll_i2info(inode);
2332 struct obd_client_handle *och = NULL;
2337 case LL_LEASE_WRLCK:
2338 if (!(file->f_mode & FMODE_WRITE))
2340 fmode = FMODE_WRITE;
2342 case LL_LEASE_RDLCK:
2343 if (!(file->f_mode & FMODE_READ))
2347 case LL_LEASE_UNLCK:
2348 mutex_lock(&lli->lli_och_mutex);
2349 if (fd->fd_lease_och != NULL) {
2350 och = fd->fd_lease_och;
2351 fd->fd_lease_och = NULL;
2353 mutex_unlock(&lli->lli_och_mutex);
2358 fmode = och->och_flags;
2359 rc = ll_lease_close(och, inode, &lease_broken);
2366 RETURN(ll_lease_type_from_fmode(fmode));
2371 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2373 /* apply for lease */
2374 och = ll_lease_open(inode, file, fmode, 0);
2376 RETURN(PTR_ERR(och));
2379 mutex_lock(&lli->lli_och_mutex);
2380 if (fd->fd_lease_och == NULL) {
2381 fd->fd_lease_och = och;
2384 mutex_unlock(&lli->lli_och_mutex);
2386 /* impossible now that only excl is supported for now */
2387 ll_lease_close(och, inode, &lease_broken);
2392 case LL_IOC_GET_LEASE: {
2393 struct ll_inode_info *lli = ll_i2info(inode);
2394 struct ldlm_lock *lock = NULL;
2397 mutex_lock(&lli->lli_och_mutex);
2398 if (fd->fd_lease_och != NULL) {
2399 struct obd_client_handle *och = fd->fd_lease_och;
2401 lock = ldlm_handle2lock(&och->och_lease_handle);
2403 lock_res_and_lock(lock);
2404 if (!ldlm_is_cancel(lock))
2405 fmode = och->och_flags;
2407 unlock_res_and_lock(lock);
2408 LDLM_LOCK_PUT(lock);
2411 mutex_unlock(&lli->lli_och_mutex);
2413 RETURN(ll_lease_type_from_fmode(fmode));
2415 case LL_IOC_HSM_IMPORT: {
2416 struct hsm_user_import *hui;
2422 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2427 rc = ll_hsm_import(inode, file, hui);
2437 ll_iocontrol_call(inode, file, cmd, arg, &err))
2440 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2441 (void __user *)arg));
2446 #ifndef HAVE_FILE_LLSEEK_SIZE
2447 static inline loff_t
2448 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2450 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2452 if (offset > maxsize)
2455 if (offset != file->f_pos) {
2456 file->f_pos = offset;
2457 file->f_version = 0;
2463 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2464 loff_t maxsize, loff_t eof)
2466 struct inode *inode = file->f_dentry->d_inode;
2474 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2475 * position-querying operation. Avoid rewriting the "same"
2476 * f_pos value back to the file because a concurrent read(),
2477 * write() or lseek() might have altered it
2482 * f_lock protects against read/modify/write race with other
2483 * SEEK_CURs. Note that parallel writes and reads behave
2486 mutex_lock(&inode->i_mutex);
2487 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2488 mutex_unlock(&inode->i_mutex);
2492 * In the generic case the entire file is data, so as long as
2493 * offset isn't at the end of the file then the offset is data.
2500 * There is a virtual hole at the end of the file, so as long as
2501 * offset isn't i_size or larger, return i_size.
2509 return llseek_execute(file, offset, maxsize);
2513 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2515 struct inode *inode = file->f_dentry->d_inode;
2516 loff_t retval, eof = 0;
2519 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2520 (origin == SEEK_CUR) ? file->f_pos : 0);
2521 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2522 PFID(ll_inode2fid(inode)), inode, retval, retval,
2524 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2526 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2527 retval = ll_glimpse_size(inode);
2530 eof = i_size_read(inode);
2533 retval = ll_generic_file_llseek_size(file, offset, origin,
2534 ll_file_maxbytes(inode), eof);
2538 static int ll_flush(struct file *file, fl_owner_t id)
2540 struct inode *inode = file->f_dentry->d_inode;
2541 struct ll_inode_info *lli = ll_i2info(inode);
2542 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2545 LASSERT(!S_ISDIR(inode->i_mode));
2547 /* catch async errors that were recorded back when async writeback
2548 * failed for pages in this mapping. */
2549 rc = lli->lli_async_rc;
2550 lli->lli_async_rc = 0;
2551 if (lli->lli_clob != NULL) {
2552 err = lov_read_and_clear_async_rc(lli->lli_clob);
2557 /* The application has been told write failure already.
2558 * Do not report failure again. */
2559 if (fd->fd_write_failed)
2561 return rc ? -EIO : 0;
2565 * Called to make sure a portion of file has been written out.
2566 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2568 * Return how many pages have been written.
2570 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2571 enum cl_fsync_mode mode, int ignore_layout)
2573 struct cl_env_nest nest;
2576 struct obd_capa *capa = NULL;
2577 struct cl_fsync_io *fio;
2581 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2582 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2585 env = cl_env_nested_get(&nest);
2587 RETURN(PTR_ERR(env));
2589 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2591 io = ccc_env_thread_io(env);
2592 io->ci_obj = ll_i2info(inode)->lli_clob;
2593 io->ci_ignore_layout = ignore_layout;
2595 /* initialize parameters for sync */
2596 fio = &io->u.ci_fsync;
2597 fio->fi_capa = capa;
2598 fio->fi_start = start;
2600 fio->fi_fid = ll_inode2fid(inode);
2601 fio->fi_mode = mode;
2602 fio->fi_nr_written = 0;
2604 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2605 result = cl_io_loop(env, io);
2607 result = io->ci_result;
2609 result = fio->fi_nr_written;
2610 cl_io_fini(env, io);
2611 cl_env_nested_put(&nest, env);
2619 * When dentry is provided (the 'else' case), *file->f_dentry may be
2620 * null and dentry must be used directly rather than pulled from
2621 * *file->f_dentry as is done otherwise.
2624 #ifdef HAVE_FILE_FSYNC_4ARGS
2625 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2627 struct dentry *dentry = file->f_dentry;
2628 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2629 int ll_fsync(struct file *file, int datasync)
2631 struct dentry *dentry = file->f_dentry;
2633 loff_t end = LLONG_MAX;
2635 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2638 loff_t end = LLONG_MAX;
2640 struct inode *inode = dentry->d_inode;
2641 struct ll_inode_info *lli = ll_i2info(inode);
2642 struct ptlrpc_request *req;
2643 struct obd_capa *oc;
2647 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2648 PFID(ll_inode2fid(inode)), inode);
2649 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2651 #ifdef HAVE_FILE_FSYNC_4ARGS
2652 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2653 mutex_lock(&inode->i_mutex);
2655 /* fsync's caller has already called _fdata{sync,write}, we want
2656 * that IO to finish before calling the osc and mdc sync methods */
2657 rc = filemap_fdatawait(inode->i_mapping);
2660 /* catch async errors that were recorded back when async writeback
2661 * failed for pages in this mapping. */
2662 if (!S_ISDIR(inode->i_mode)) {
2663 err = lli->lli_async_rc;
2664 lli->lli_async_rc = 0;
2667 err = lov_read_and_clear_async_rc(lli->lli_clob);
2672 oc = ll_mdscapa_get(inode);
2673 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2679 ptlrpc_req_finished(req);
2681 if (S_ISREG(inode->i_mode)) {
2682 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2684 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2685 if (rc == 0 && err < 0)
2688 fd->fd_write_failed = true;
2690 fd->fd_write_failed = false;
2693 #ifdef HAVE_FILE_FSYNC_4ARGS
2694 mutex_unlock(&inode->i_mutex);
2700 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2702 struct inode *inode = file->f_dentry->d_inode;
2703 struct ll_sb_info *sbi = ll_i2sbi(inode);
2704 struct ldlm_enqueue_info einfo = {
2705 .ei_type = LDLM_FLOCK,
2706 .ei_cb_cp = ldlm_flock_completion_ast,
2707 .ei_cbdata = file_lock,
2709 struct md_op_data *op_data;
2710 struct lustre_handle lockh = {0};
2711 ldlm_policy_data_t flock = {{0}};
2712 int fl_type = file_lock->fl_type;
2718 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2719 PFID(ll_inode2fid(inode)), file_lock);
2721 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2723 if (file_lock->fl_flags & FL_FLOCK) {
2724 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2725 /* flocks are whole-file locks */
2726 flock.l_flock.end = OFFSET_MAX;
2727 /* For flocks owner is determined by the local file desctiptor*/
2728 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2729 } else if (file_lock->fl_flags & FL_POSIX) {
2730 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2731 flock.l_flock.start = file_lock->fl_start;
2732 flock.l_flock.end = file_lock->fl_end;
2736 flock.l_flock.pid = file_lock->fl_pid;
2738 /* Somewhat ugly workaround for svc lockd.
2739 * lockd installs custom fl_lmops->lm_compare_owner that checks
2740 * for the fl_owner to be the same (which it always is on local node
2741 * I guess between lockd processes) and then compares pid.
2742 * As such we assign pid to the owner field to make it all work,
2743 * conflict with normal locks is unlikely since pid space and
2744 * pointer space for current->files are not intersecting */
2745 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2746 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2750 einfo.ei_mode = LCK_PR;
2753 /* An unlock request may or may not have any relation to
2754 * existing locks so we may not be able to pass a lock handle
2755 * via a normal ldlm_lock_cancel() request. The request may even
2756 * unlock a byte range in the middle of an existing lock. In
2757 * order to process an unlock request we need all of the same
2758 * information that is given with a normal read or write record
2759 * lock request. To avoid creating another ldlm unlock (cancel)
2760 * message we'll treat a LCK_NL flock request as an unlock. */
2761 einfo.ei_mode = LCK_NL;
2764 einfo.ei_mode = LCK_PW;
2767 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2782 flags = LDLM_FL_BLOCK_NOWAIT;
2788 flags = LDLM_FL_TEST_LOCK;
2791 CERROR("unknown fcntl lock command: %d\n", cmd);
2795 /* Save the old mode so that if the mode in the lock changes we
2796 * can decrement the appropriate reader or writer refcount. */
2797 file_lock->fl_type = einfo.ei_mode;
2799 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2800 LUSTRE_OPC_ANY, NULL);
2801 if (IS_ERR(op_data))
2802 RETURN(PTR_ERR(op_data));
2804 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2805 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2806 flock.l_flock.pid, flags, einfo.ei_mode,
2807 flock.l_flock.start, flock.l_flock.end);
2809 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2812 /* Restore the file lock type if not TEST lock. */
2813 if (!(flags & LDLM_FL_TEST_LOCK))
2814 file_lock->fl_type = fl_type;
2816 if ((file_lock->fl_flags & FL_FLOCK) &&
2817 (rc == 0 || file_lock->fl_type == F_UNLCK))
2818 rc2 = flock_lock_file_wait(file, file_lock);
2819 if ((file_lock->fl_flags & FL_POSIX) &&
2820 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2821 !(flags & LDLM_FL_TEST_LOCK))
2822 rc2 = posix_lock_file_wait(file, file_lock);
2824 if (rc2 && file_lock->fl_type != F_UNLCK) {
2825 einfo.ei_mode = LCK_NL;
2826 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2831 ll_finish_md_op_data(op_data);
2836 int ll_get_fid_by_name(struct inode *parent, const char *name,
2837 int namelen, struct lu_fid *fid)
2839 struct md_op_data *op_data = NULL;
2840 struct mdt_body *body;
2841 struct ptlrpc_request *req;
2845 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2846 LUSTRE_OPC_ANY, NULL);
2847 if (IS_ERR(op_data))
2848 RETURN(PTR_ERR(op_data));
2850 op_data->op_valid = OBD_MD_FLID;
2851 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2852 ll_finish_md_op_data(op_data);
2856 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2858 GOTO(out_req, rc = -EFAULT);
2860 *fid = body->mbo_fid1;
2862 ptlrpc_req_finished(req);
2866 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2867 const char *name, int namelen)
2869 struct dentry *dchild = NULL;
2870 struct inode *child_inode = NULL;
2871 struct md_op_data *op_data;
2872 struct ptlrpc_request *request = NULL;
2877 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2878 name, PFID(ll_inode2fid(parent)), mdtidx);
2880 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2881 0, LUSTRE_OPC_ANY, NULL);
2882 if (IS_ERR(op_data))
2883 RETURN(PTR_ERR(op_data));
2885 /* Get child FID first */
2886 qstr.hash = full_name_hash(name, namelen);
2889 dchild = d_lookup(file->f_dentry, &qstr);
2890 if (dchild != NULL) {
2891 if (dchild->d_inode != NULL) {
2892 child_inode = igrab(dchild->d_inode);
2893 if (child_inode != NULL) {
2894 mutex_lock(&child_inode->i_mutex);
2895 op_data->op_fid3 = *ll_inode2fid(child_inode);
2896 ll_invalidate_aliases(child_inode);
2901 rc = ll_get_fid_by_name(parent, name, namelen,
2907 if (!fid_is_sane(&op_data->op_fid3)) {
2908 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
2909 ll_get_fsname(parent->i_sb, NULL, 0), name,
2910 PFID(&op_data->op_fid3));
2911 GOTO(out_free, rc = -EINVAL);
2914 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
2919 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
2920 PFID(&op_data->op_fid3), mdtidx);
2921 GOTO(out_free, rc = 0);
2924 op_data->op_mds = mdtidx;
2925 op_data->op_cli_flags = CLI_MIGRATE;
2926 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
2927 namelen, name, namelen, &request);
2929 ll_update_times(request, parent);
2931 ptlrpc_req_finished(request);
2936 if (child_inode != NULL) {
2937 clear_nlink(child_inode);
2938 mutex_unlock(&child_inode->i_mutex);
2942 ll_finish_md_op_data(op_data);
2947 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2955 * test if some locks matching bits and l_req_mode are acquired
2956 * - bits can be in different locks
2957 * - if found clear the common lock bits in *bits
2958 * - the bits not found, are kept in *bits
2960 * \param bits [IN] searched lock bits [IN]
2961 * \param l_req_mode [IN] searched lock mode
2962 * \retval boolean, true iff all bits are found
2964 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2966 struct lustre_handle lockh;
2967 ldlm_policy_data_t policy;
2968 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2969 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2978 fid = &ll_i2info(inode)->lli_fid;
2979 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2980 ldlm_lockname[mode]);
2982 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2983 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2984 policy.l_inodebits.bits = *bits & (1 << i);
2985 if (policy.l_inodebits.bits == 0)
2988 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2989 &policy, mode, &lockh)) {
2990 struct ldlm_lock *lock;
2992 lock = ldlm_handle2lock(&lockh);
2995 ~(lock->l_policy_data.l_inodebits.bits);
2996 LDLM_LOCK_PUT(lock);
2998 *bits &= ~policy.l_inodebits.bits;
3005 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3006 struct lustre_handle *lockh, __u64 flags,
3009 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3014 fid = &ll_i2info(inode)->lli_fid;
3015 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3017 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3018 fid, LDLM_IBITS, &policy, mode, lockh);
3023 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3025 /* Already unlinked. Just update nlink and return success */
3026 if (rc == -ENOENT) {
3028 /* This path cannot be hit for regular files unless in
3029 * case of obscure races, so no need to to validate
3031 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3033 } else if (rc != 0) {
3034 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3035 "%s: revalidate FID "DFID" error: rc = %d\n",
3036 ll_get_fsname(inode->i_sb, NULL, 0),
3037 PFID(ll_inode2fid(inode)), rc);
3043 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3045 struct inode *inode = dentry->d_inode;
3046 struct ptlrpc_request *req = NULL;
3047 struct obd_export *exp;
3051 LASSERT(inode != NULL);
3053 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3054 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3056 exp = ll_i2mdexp(inode);
3058 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3059 * But under CMD case, it caused some lock issues, should be fixed
3060 * with new CMD ibits lock. See bug 12718 */
3061 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3062 struct lookup_intent oit = { .it_op = IT_GETATTR };
3063 struct md_op_data *op_data;
3065 if (ibits == MDS_INODELOCK_LOOKUP)
3066 oit.it_op = IT_LOOKUP;
3068 /* Call getattr by fid, so do not provide name at all. */
3069 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3070 dentry->d_inode, NULL, 0, 0,
3071 LUSTRE_OPC_ANY, NULL);
3072 if (IS_ERR(op_data))
3073 RETURN(PTR_ERR(op_data));
3075 rc = md_intent_lock(exp, op_data, &oit, &req,
3076 &ll_md_blocking_ast, 0);
3077 ll_finish_md_op_data(op_data);
3079 rc = ll_inode_revalidate_fini(inode, rc);
3083 rc = ll_revalidate_it_finish(req, &oit, dentry);
3085 ll_intent_release(&oit);
3089 /* Unlinked? Unhash dentry, so it is not picked up later by
3090 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3091 here to preserve get_cwd functionality on 2.6.
3093 if (!dentry->d_inode->i_nlink)
3094 d_lustre_invalidate(dentry, 0);
3096 ll_lookup_finish_locks(&oit, dentry);
3097 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3098 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3099 u64 valid = OBD_MD_FLGETATTR;
3100 struct md_op_data *op_data;
3103 if (S_ISREG(inode->i_mode)) {
3104 rc = ll_get_default_mdsize(sbi, &ealen);
3107 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3110 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3111 0, ealen, LUSTRE_OPC_ANY,
3113 if (IS_ERR(op_data))
3114 RETURN(PTR_ERR(op_data));
3116 op_data->op_valid = valid;
3117 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3118 * capa for this inode. Because we only keep capas of dirs
3120 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3121 ll_finish_md_op_data(op_data);
3123 rc = ll_inode_revalidate_fini(inode, rc);
3127 rc = ll_prep_inode(&inode, req, NULL, NULL);
3130 ptlrpc_req_finished(req);
3134 static int ll_merge_md_attr(struct inode *inode)
3136 struct cl_attr attr = { 0 };
3139 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3140 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3141 &attr, ll_md_blocking_ast);
3145 set_nlink(inode, attr.cat_nlink);
3146 inode->i_blocks = attr.cat_blocks;
3147 i_size_write(inode, attr.cat_size);
3149 ll_i2info(inode)->lli_atime = attr.cat_atime;
3150 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3151 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3157 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3159 struct inode *inode = dentry->d_inode;
3163 rc = __ll_inode_revalidate(dentry, ibits);
3167 /* if object isn't regular file, don't validate size */
3168 if (!S_ISREG(inode->i_mode)) {
3169 if (S_ISDIR(inode->i_mode) &&
3170 ll_i2info(inode)->lli_lsm_md != NULL) {
3171 rc = ll_merge_md_attr(inode);
3176 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3177 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3178 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3180 /* In case of restore, the MDT has the right size and has
3181 * already send it back without granting the layout lock,
3182 * inode is up-to-date so glimpse is useless.
3183 * Also to glimpse we need the layout, in case of a running
3184 * restore the MDT holds the layout lock so the glimpse will
3185 * block up to the end of restore (getattr will block)
3187 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3188 rc = ll_glimpse_size(inode);
3193 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3195 struct inode *inode = de->d_inode;
3196 struct ll_sb_info *sbi = ll_i2sbi(inode);
3197 struct ll_inode_info *lli = ll_i2info(inode);
3200 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3201 MDS_INODELOCK_LOOKUP);
3202 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3207 stat->dev = inode->i_sb->s_dev;
3208 if (ll_need_32bit_api(sbi))
3209 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3211 stat->ino = inode->i_ino;
3212 stat->mode = inode->i_mode;
3213 stat->uid = inode->i_uid;
3214 stat->gid = inode->i_gid;
3215 stat->rdev = inode->i_rdev;
3216 stat->atime = inode->i_atime;
3217 stat->mtime = inode->i_mtime;
3218 stat->ctime = inode->i_ctime;
3219 stat->blksize = 1 << inode->i_blkbits;
3221 stat->nlink = inode->i_nlink;
3222 stat->size = i_size_read(inode);
3223 stat->blocks = inode->i_blocks;
3228 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3229 __u64 start, __u64 len)
3233 struct ll_user_fiemap *fiemap;
3234 unsigned int extent_count = fieinfo->fi_extents_max;
3236 num_bytes = sizeof(*fiemap) + (extent_count *
3237 sizeof(struct ll_fiemap_extent));
3238 OBD_ALLOC_LARGE(fiemap, num_bytes);
3243 fiemap->fm_flags = fieinfo->fi_flags;
3244 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3245 fiemap->fm_start = start;
3246 fiemap->fm_length = len;
3247 if (extent_count > 0)
3248 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3249 sizeof(struct ll_fiemap_extent));
3251 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3253 fieinfo->fi_flags = fiemap->fm_flags;
3254 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3255 if (extent_count > 0)
3256 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3257 fiemap->fm_mapped_extents *
3258 sizeof(struct ll_fiemap_extent));
3260 OBD_FREE_LARGE(fiemap, num_bytes);
3264 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3266 struct ll_inode_info *lli = ll_i2info(inode);
3267 struct posix_acl *acl = NULL;
3270 spin_lock(&lli->lli_lock);
3271 /* VFS' acl_permission_check->check_acl will release the refcount */
3272 acl = posix_acl_dup(lli->lli_posix_acl);
3273 spin_unlock(&lli->lli_lock);
3278 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3280 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3281 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3283 ll_check_acl(struct inode *inode, int mask)
3286 # ifdef CONFIG_FS_POSIX_ACL
3287 struct posix_acl *acl;
3291 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3292 if (flags & IPERM_FLAG_RCU)
3295 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3300 rc = posix_acl_permission(inode, acl, mask);
3301 posix_acl_release(acl);
3304 # else /* !CONFIG_FS_POSIX_ACL */
3306 # endif /* CONFIG_FS_POSIX_ACL */
3308 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3310 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3311 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3313 # ifdef HAVE_INODE_PERMISION_2ARGS
3314 int ll_inode_permission(struct inode *inode, int mask)
3316 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3321 struct ll_sb_info *sbi;
3322 struct root_squash_info *squash;
3323 struct cred *cred = NULL;
3324 const struct cred *old_cred = NULL;
3326 bool squash_id = false;
3329 #ifdef MAY_NOT_BLOCK
3330 if (mask & MAY_NOT_BLOCK)
3332 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3333 if (flags & IPERM_FLAG_RCU)
3337 /* as root inode are NOT getting validated in lookup operation,
3338 * need to do it before permission check. */
3340 if (inode == inode->i_sb->s_root->d_inode) {
3341 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3342 MDS_INODELOCK_LOOKUP);
3347 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3348 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3350 /* squash fsuid/fsgid if needed */
3351 sbi = ll_i2sbi(inode);
3352 squash = &sbi->ll_squash;
3353 if (unlikely(squash->rsi_uid != 0 &&
3354 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3355 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3359 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3360 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3361 squash->rsi_uid, squash->rsi_gid);
3363 /* update current process's credentials
3364 * and FS capability */
3365 cred = prepare_creds();
3369 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3370 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3371 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3372 if ((1 << cap) & CFS_CAP_FS_MASK)
3373 cap_lower(cred->cap_effective, cap);
3375 old_cred = override_creds(cred);
3378 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3380 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3381 rc = lustre_check_remote_perm(inode, mask);
3383 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3385 /* restore current process's credentials and FS capability */
3387 revert_creds(old_cred);
3394 /* -o localflock - only provides locally consistent flock locks */
3395 struct file_operations ll_file_operations = {
3396 .read = ll_file_read,
3397 .aio_read = ll_file_aio_read,
3398 .write = ll_file_write,
3399 .aio_write = ll_file_aio_write,
3400 .unlocked_ioctl = ll_file_ioctl,
3401 .open = ll_file_open,
3402 .release = ll_file_release,
3403 .mmap = ll_file_mmap,
3404 .llseek = ll_file_seek,
3405 .splice_read = ll_file_splice_read,
3410 struct file_operations ll_file_operations_flock = {
3411 .read = ll_file_read,
3412 .aio_read = ll_file_aio_read,
3413 .write = ll_file_write,
3414 .aio_write = ll_file_aio_write,
3415 .unlocked_ioctl = ll_file_ioctl,
3416 .open = ll_file_open,
3417 .release = ll_file_release,
3418 .mmap = ll_file_mmap,
3419 .llseek = ll_file_seek,
3420 .splice_read = ll_file_splice_read,
3423 .flock = ll_file_flock,
3424 .lock = ll_file_flock
3427 /* These are for -o noflock - to return ENOSYS on flock calls */
3428 struct file_operations ll_file_operations_noflock = {
3429 .read = ll_file_read,
3430 .aio_read = ll_file_aio_read,
3431 .write = ll_file_write,
3432 .aio_write = ll_file_aio_write,
3433 .unlocked_ioctl = ll_file_ioctl,
3434 .open = ll_file_open,
3435 .release = ll_file_release,
3436 .mmap = ll_file_mmap,
3437 .llseek = ll_file_seek,
3438 .splice_read = ll_file_splice_read,
3441 .flock = ll_file_noflock,
3442 .lock = ll_file_noflock
3445 struct inode_operations ll_file_inode_operations = {
3446 .setattr = ll_setattr,
3447 .getattr = ll_getattr,
3448 .permission = ll_inode_permission,
3449 .setxattr = ll_setxattr,
3450 .getxattr = ll_getxattr,
3451 .listxattr = ll_listxattr,
3452 .removexattr = ll_removexattr,
3453 .fiemap = ll_fiemap,
3454 #ifdef HAVE_IOP_GET_ACL
3455 .get_acl = ll_get_acl,
3459 /* dynamic ioctl number support routins */
3460 static struct llioc_ctl_data {
3461 struct rw_semaphore ioc_sem;
3462 struct list_head ioc_head;
3464 __RWSEM_INITIALIZER(llioc.ioc_sem),
3465 LIST_HEAD_INIT(llioc.ioc_head)
3470 struct list_head iocd_list;
3471 unsigned int iocd_size;
3472 llioc_callback_t iocd_cb;
3473 unsigned int iocd_count;
3474 unsigned int iocd_cmd[0];
3477 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3480 struct llioc_data *in_data = NULL;
3483 if (cb == NULL || cmd == NULL ||
3484 count > LLIOC_MAX_CMD || count < 0)
3487 size = sizeof(*in_data) + count * sizeof(unsigned int);
3488 OBD_ALLOC(in_data, size);
3489 if (in_data == NULL)
3492 memset(in_data, 0, sizeof(*in_data));
3493 in_data->iocd_size = size;
3494 in_data->iocd_cb = cb;
3495 in_data->iocd_count = count;
3496 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3498 down_write(&llioc.ioc_sem);
3499 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3500 up_write(&llioc.ioc_sem);
3505 void ll_iocontrol_unregister(void *magic)
3507 struct llioc_data *tmp;
3512 down_write(&llioc.ioc_sem);
3513 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3515 unsigned int size = tmp->iocd_size;
3517 list_del(&tmp->iocd_list);
3518 up_write(&llioc.ioc_sem);
3520 OBD_FREE(tmp, size);
3524 up_write(&llioc.ioc_sem);
3526 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3529 EXPORT_SYMBOL(ll_iocontrol_register);
3530 EXPORT_SYMBOL(ll_iocontrol_unregister);
3532 static enum llioc_iter
3533 ll_iocontrol_call(struct inode *inode, struct file *file,
3534 unsigned int cmd, unsigned long arg, int *rcp)
3536 enum llioc_iter ret = LLIOC_CONT;
3537 struct llioc_data *data;
3538 int rc = -EINVAL, i;
3540 down_read(&llioc.ioc_sem);
3541 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3542 for (i = 0; i < data->iocd_count; i++) {
3543 if (cmd != data->iocd_cmd[i])
3546 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3550 if (ret == LLIOC_STOP)
3553 up_read(&llioc.ioc_sem);
3560 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3562 struct ll_inode_info *lli = ll_i2info(inode);
3563 struct cl_env_nest nest;
3568 if (lli->lli_clob == NULL)
3571 env = cl_env_nested_get(&nest);
3573 RETURN(PTR_ERR(env));
3575 result = cl_conf_set(env, lli->lli_clob, conf);
3576 cl_env_nested_put(&nest, env);
3578 if (conf->coc_opc == OBJECT_CONF_SET) {
3579 struct ldlm_lock *lock = conf->coc_lock;
3581 LASSERT(lock != NULL);
3582 LASSERT(ldlm_has_layout(lock));
3584 struct lustre_md *md = conf->u.coc_md;
3585 __u32 gen = LL_LAYOUT_GEN_EMPTY;
3587 /* it can only be allowed to match after layout is
3588 * applied to inode otherwise false layout would be
3589 * seen. Applying layout shoud happen before dropping
3590 * the intent lock. */
3591 ldlm_lock_allow_match(lock);
3593 lli->lli_has_smd = lsm_has_objects(md->lsm);
3594 if (md->lsm != NULL)
3595 gen = md->lsm->lsm_layout_gen;
3598 DFID ": layout version change: %u -> %u\n",
3599 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3601 ll_layout_version_set(lli, gen);
3607 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3608 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3611 struct ll_sb_info *sbi = ll_i2sbi(inode);
3612 struct obd_capa *oc;
3613 struct ptlrpc_request *req;
3614 struct mdt_body *body;
3621 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3622 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3623 lock->l_lvb_data, lock->l_lvb_len);
3625 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3628 /* if layout lock was granted right away, the layout is returned
3629 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3630 * blocked and then granted via completion ast, we have to fetch
3631 * layout here. Please note that we can't use the LVB buffer in
3632 * completion AST because it doesn't have a large enough buffer */
3633 oc = ll_mdscapa_get(inode);
3634 rc = ll_get_default_mdsize(sbi, &lmmsize);
3636 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3637 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3643 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3645 GOTO(out, rc = -EPROTO);
3647 lmmsize = body->mbo_eadatasize;
3648 if (lmmsize == 0) /* empty layout */
3651 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3653 GOTO(out, rc = -EFAULT);
3655 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3656 if (lvbdata == NULL)
3657 GOTO(out, rc = -ENOMEM);
3659 memcpy(lvbdata, lmm, lmmsize);
3660 lock_res_and_lock(lock);
3661 if (lock->l_lvb_data != NULL)
3662 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3664 lock->l_lvb_data = lvbdata;
3665 lock->l_lvb_len = lmmsize;
3666 unlock_res_and_lock(lock);
3671 ptlrpc_req_finished(req);
3676 * Apply the layout to the inode. Layout lock is held and will be released
3679 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3680 struct inode *inode, __u32 *gen, bool reconf)
3682 struct ll_inode_info *lli = ll_i2info(inode);
3683 struct ll_sb_info *sbi = ll_i2sbi(inode);
3684 struct ldlm_lock *lock;
3685 struct lustre_md md = { NULL };
3686 struct cl_object_conf conf;
3689 bool wait_layout = false;
3692 LASSERT(lustre_handle_is_used(lockh));
3694 lock = ldlm_handle2lock(lockh);
3695 LASSERT(lock != NULL);
3696 LASSERT(ldlm_has_layout(lock));
3698 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d",
3699 PFID(&lli->lli_fid), inode, reconf);
3701 /* in case this is a caching lock and reinstate with new inode */
3702 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3704 lock_res_and_lock(lock);
3705 lvb_ready = ldlm_is_lvb_ready(lock);
3706 unlock_res_and_lock(lock);
3707 /* checking lvb_ready is racy but this is okay. The worst case is
3708 * that multi processes may configure the file on the same time. */
3710 if (lvb_ready || !reconf) {
3713 /* layout_gen must be valid if layout lock is not
3714 * cancelled and stripe has already set */
3715 *gen = ll_layout_version_get(lli);
3721 rc = ll_layout_fetch(inode, lock);
3725 /* for layout lock, lmm is returned in lock's lvb.
3726 * lvb_data is immutable if the lock is held so it's safe to access it
3727 * without res lock. See the description in ldlm_lock_decref_internal()
3728 * for the condition to free lvb_data of layout lock */
3729 if (lock->l_lvb_data != NULL) {
3730 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3731 lock->l_lvb_data, lock->l_lvb_len);
3733 *gen = LL_LAYOUT_GEN_EMPTY;
3735 *gen = md.lsm->lsm_layout_gen;
3738 CERROR("%s: file "DFID" unpackmd error: %d\n",
3739 ll_get_fsname(inode->i_sb, NULL, 0),
3740 PFID(&lli->lli_fid), rc);
3746 /* set layout to file. Unlikely this will fail as old layout was
3747 * surely eliminated */
3748 memset(&conf, 0, sizeof conf);
3749 conf.coc_opc = OBJECT_CONF_SET;
3750 conf.coc_inode = inode;
3751 conf.coc_lock = lock;
3752 conf.u.coc_md = &md;
3753 rc = ll_layout_conf(inode, &conf);
3756 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3758 /* refresh layout failed, need to wait */
3759 wait_layout = rc == -EBUSY;
3763 LDLM_LOCK_PUT(lock);
3764 ldlm_lock_decref(lockh, mode);
3766 /* wait for IO to complete if it's still being used. */
3768 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3769 ll_get_fsname(inode->i_sb, NULL, 0),
3770 PFID(&lli->lli_fid), inode);
3772 memset(&conf, 0, sizeof conf);
3773 conf.coc_opc = OBJECT_CONF_WAIT;
3774 conf.coc_inode = inode;
3775 rc = ll_layout_conf(inode, &conf);
3779 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3780 ll_get_fsname(inode->i_sb, NULL, 0),
3781 PFID(&lli->lli_fid), rc);
3787 * This function checks if there exists a LAYOUT lock on the client side,
3788 * or enqueues it if it doesn't have one in cache.
3790 * This function will not hold layout lock so it may be revoked any time after
3791 * this function returns. Any operations depend on layout should be redone
3794 * This function should be called before lov_io_init() to get an uptodate
3795 * layout version, the caller should save the version number and after IO
3796 * is finished, this function should be called again to verify that layout
3797 * is not changed during IO time.
3799 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3801 struct ll_inode_info *lli = ll_i2info(inode);
3802 struct ll_sb_info *sbi = ll_i2sbi(inode);
3803 struct md_op_data *op_data;
3804 struct lookup_intent it;
3805 struct lustre_handle lockh;
3807 struct ldlm_enqueue_info einfo = {
3808 .ei_type = LDLM_IBITS,
3810 .ei_cb_bl = &ll_md_blocking_ast,
3811 .ei_cb_cp = &ldlm_completion_ast,
3816 *gen = ll_layout_version_get(lli);
3817 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
3821 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3822 LASSERT(S_ISREG(inode->i_mode));
3824 /* take layout lock mutex to enqueue layout lock exclusively. */
3825 mutex_lock(&lli->lli_layout_mutex);
3828 /* mostly layout lock is caching on the local side, so try to match
3829 * it before grabbing layout lock mutex. */
3830 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3831 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3832 if (mode != 0) { /* hit cached lock */
3833 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3837 mutex_unlock(&lli->lli_layout_mutex);
3841 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3842 0, 0, LUSTRE_OPC_ANY, NULL);
3843 if (IS_ERR(op_data)) {
3844 mutex_unlock(&lli->lli_layout_mutex);
3845 RETURN(PTR_ERR(op_data));
3848 /* have to enqueue one */
3849 memset(&it, 0, sizeof(it));
3850 it.it_op = IT_LAYOUT;
3851 lockh.cookie = 0ULL;
3853 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3854 ll_get_fsname(inode->i_sb, NULL, 0),
3855 PFID(&lli->lli_fid), inode);
3857 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3858 if (it.d.lustre.it_data != NULL)
3859 ptlrpc_req_finished(it.d.lustre.it_data);
3860 it.d.lustre.it_data = NULL;
3862 ll_finish_md_op_data(op_data);
3864 mode = it.d.lustre.it_lock_mode;
3865 it.d.lustre.it_lock_mode = 0;
3866 ll_intent_drop_lock(&it);
3869 /* set lock data in case this is a new lock */
3870 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3871 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3875 mutex_unlock(&lli->lli_layout_mutex);
3881 * This function send a restore request to the MDT
3883 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3885 struct hsm_user_request *hur;
3889 len = sizeof(struct hsm_user_request) +
3890 sizeof(struct hsm_user_item);
3891 OBD_ALLOC(hur, len);
3895 hur->hur_request.hr_action = HUA_RESTORE;
3896 hur->hur_request.hr_archive_id = 0;
3897 hur->hur_request.hr_flags = 0;
3898 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3899 sizeof(hur->hur_user_item[0].hui_fid));
3900 hur->hur_user_item[0].hui_extent.offset = offset;
3901 hur->hur_user_item[0].hui_extent.length = length;
3902 hur->hur_request.hr_itemcount = 1;
3903 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,