4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include <linux/user_namespace.h>
49 #ifdef HAVE_UIDGID_HEADER
50 # include <linux/uidgid.h>
52 #include <lustre/ll_fiemap.h>
53 #include <lustre_ioctl.h>
55 #include "cl_object.h"
57 #include "llite_internal.h"
58 #include "vvp_internal.h"
61 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
63 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
66 static enum llioc_iter
67 ll_iocontrol_call(struct inode *inode, struct file *file,
68 unsigned int cmd, unsigned long arg, int *rcp);
70 static struct ll_file_data *ll_file_data_get(void)
72 struct ll_file_data *fd;
74 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
78 fd->fd_write_failed = false;
83 static void ll_file_data_put(struct ll_file_data *fd)
86 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
89 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
90 struct lustre_handle *fh)
92 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
93 op_data->op_attr.ia_mode = inode->i_mode;
94 op_data->op_attr.ia_atime = inode->i_atime;
95 op_data->op_attr.ia_mtime = inode->i_mtime;
96 op_data->op_attr.ia_ctime = inode->i_ctime;
97 op_data->op_attr.ia_size = i_size_read(inode);
98 op_data->op_attr_blocks = inode->i_blocks;
99 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
101 op_data->op_handle = *fh;
102 op_data->op_capa1 = ll_mdscapa_get(inode);
104 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
105 op_data->op_bias |= MDS_DATA_MODIFIED;
109 * Packs all the attributes into @op_data for the CLOSE rpc.
111 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
112 struct obd_client_handle *och)
116 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
117 ATTR_MTIME | ATTR_MTIME_SET |
118 ATTR_CTIME | ATTR_CTIME_SET;
120 if (!(och->och_flags & FMODE_WRITE))
123 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
126 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
127 ll_prep_md_op_data(op_data, inode, NULL, NULL,
128 0, 0, LUSTRE_OPC_ANY, NULL);
132 static int ll_close_inode_openhandle(struct obd_export *md_exp,
134 struct obd_client_handle *och,
135 const __u64 *data_version)
137 struct obd_export *exp = ll_i2mdexp(inode);
138 struct md_op_data *op_data;
139 struct ptlrpc_request *req = NULL;
140 struct obd_device *obd = class_exp2obd(exp);
146 * XXX: in case of LMV, is this correct to access
149 CERROR("Invalid MDC connection handle "LPX64"\n",
150 ll_i2mdexp(inode)->exp_handle.h_cookie);
154 OBD_ALLOC_PTR(op_data);
156 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
158 ll_prepare_close(inode, op_data, och);
159 if (data_version != NULL) {
160 /* Pass in data_version implies release. */
161 op_data->op_bias |= MDS_HSM_RELEASE;
162 op_data->op_data_version = *data_version;
163 op_data->op_lease_handle = och->och_lease_handle;
164 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
167 rc = md_close(md_exp, op_data, och->och_mod, &req);
169 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
170 ll_i2mdexp(inode)->exp_obd->obd_name,
171 PFID(ll_inode2fid(inode)), rc);
174 /* DATA_MODIFIED flag was successfully sent on close, cancel data
175 * modification flag. */
176 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
177 struct ll_inode_info *lli = ll_i2info(inode);
179 spin_lock(&lli->lli_lock);
180 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
181 spin_unlock(&lli->lli_lock);
184 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
185 struct mdt_body *body;
186 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
187 if (!(body->mbo_valid & OBD_MD_FLRELEASED))
191 ll_finish_md_op_data(op_data);
195 md_clear_open_replay_data(md_exp, och);
196 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
199 if (req) /* This is close request */
200 ptlrpc_req_finished(req);
204 int ll_md_real_close(struct inode *inode, fmode_t fmode)
206 struct ll_inode_info *lli = ll_i2info(inode);
207 struct obd_client_handle **och_p;
208 struct obd_client_handle *och;
213 if (fmode & FMODE_WRITE) {
214 och_p = &lli->lli_mds_write_och;
215 och_usecount = &lli->lli_open_fd_write_count;
216 } else if (fmode & FMODE_EXEC) {
217 och_p = &lli->lli_mds_exec_och;
218 och_usecount = &lli->lli_open_fd_exec_count;
220 LASSERT(fmode & FMODE_READ);
221 och_p = &lli->lli_mds_read_och;
222 och_usecount = &lli->lli_open_fd_read_count;
225 mutex_lock(&lli->lli_och_mutex);
226 if (*och_usecount > 0) {
227 /* There are still users of this handle, so skip
229 mutex_unlock(&lli->lli_och_mutex);
235 mutex_unlock(&lli->lli_och_mutex);
238 /* There might be a race and this handle may already
240 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
247 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
250 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
251 struct ll_inode_info *lli = ll_i2info(inode);
255 /* clear group lock, if present */
256 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
257 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
259 if (fd->fd_lease_och != NULL) {
262 /* Usually the lease is not released when the
263 * application crashed, we need to release here. */
264 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
265 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
266 PFID(&lli->lli_fid), rc, lease_broken);
268 fd->fd_lease_och = NULL;
271 if (fd->fd_och != NULL) {
272 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
277 /* Let's see if we have good enough OPEN lock on the file and if
278 we can skip talking to MDS */
279 if (file->f_dentry->d_inode) { /* Can this ever be false? */
281 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
282 struct lustre_handle lockh;
283 struct inode *inode = file->f_dentry->d_inode;
284 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
286 mutex_lock(&lli->lli_och_mutex);
287 if (fd->fd_omode & FMODE_WRITE) {
289 LASSERT(lli->lli_open_fd_write_count);
290 lli->lli_open_fd_write_count--;
291 } else if (fd->fd_omode & FMODE_EXEC) {
293 LASSERT(lli->lli_open_fd_exec_count);
294 lli->lli_open_fd_exec_count--;
297 LASSERT(lli->lli_open_fd_read_count);
298 lli->lli_open_fd_read_count--;
300 mutex_unlock(&lli->lli_och_mutex);
302 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
303 LDLM_IBITS, &policy, lockmode,
305 rc = ll_md_real_close(file->f_dentry->d_inode,
309 CERROR("released file has negative dentry: file = %p, "
310 "dentry = %p, name = %s\n",
311 file, file->f_dentry, file->f_dentry->d_name.name);
315 LUSTRE_FPRIVATE(file) = NULL;
316 ll_file_data_put(fd);
317 ll_capa_close(inode);
322 /* While this returns an error code, fput() the caller does not, so we need
323 * to make every effort to clean up all of our state here. Also, applications
324 * rarely check close errors and even if an error is returned they will not
325 * re-try the close call.
327 int ll_file_release(struct inode *inode, struct file *file)
329 struct ll_file_data *fd;
330 struct ll_sb_info *sbi = ll_i2sbi(inode);
331 struct ll_inode_info *lli = ll_i2info(inode);
335 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
336 PFID(ll_inode2fid(inode)), inode);
338 #ifdef CONFIG_FS_POSIX_ACL
339 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
340 inode == inode->i_sb->s_root->d_inode) {
341 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
344 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
345 fd->fd_flags &= ~LL_FILE_RMTACL;
346 rct_del(&sbi->ll_rct, current_pid());
347 et_search_free(&sbi->ll_et, current_pid());
352 if (inode->i_sb->s_root != file->f_dentry)
353 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
354 fd = LUSTRE_FPRIVATE(file);
357 /* The last ref on @file, maybe not the the owner pid of statahead,
358 * because parent and child process can share the same file handle. */
359 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
360 ll_deauthorize_statahead(inode, fd);
362 if (inode->i_sb->s_root == file->f_dentry) {
363 LUSTRE_FPRIVATE(file) = NULL;
364 ll_file_data_put(fd);
368 if (!S_ISDIR(inode->i_mode)) {
369 if (lli->lli_clob != NULL)
370 lov_read_and_clear_async_rc(lli->lli_clob);
371 lli->lli_async_rc = 0;
374 rc = ll_md_close(sbi->ll_md_exp, inode, file);
376 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
377 libcfs_debug_dumplog();
382 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
383 struct lookup_intent *itp)
385 struct dentry *de = file->f_dentry;
386 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
387 struct dentry *parent = de->d_parent;
388 const char *name = NULL;
390 struct md_op_data *op_data;
391 struct ptlrpc_request *req = NULL;
395 LASSERT(parent != NULL);
396 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
398 /* if server supports open-by-fid, or file name is invalid, don't pack
399 * name in open request */
400 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
401 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
402 name = de->d_name.name;
403 len = de->d_name.len;
406 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
407 name, len, 0, LUSTRE_OPC_ANY, NULL);
409 RETURN(PTR_ERR(op_data));
410 op_data->op_data = lmm;
411 op_data->op_data_size = lmmsize;
413 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
414 &ll_md_blocking_ast, 0);
415 ll_finish_md_op_data(op_data);
417 /* reason for keep own exit path - don`t flood log
418 * with messages with -ESTALE errors.
420 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
421 it_open_error(DISP_OPEN_OPEN, itp))
423 ll_release_openhandle(de, itp);
427 if (it_disposition(itp, DISP_LOOKUP_NEG))
428 GOTO(out, rc = -ENOENT);
430 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
431 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
432 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
436 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
437 if (!rc && itp->d.lustre.it_lock_mode)
438 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
441 ptlrpc_req_finished(req);
442 ll_intent_drop_lock(itp);
447 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
448 struct obd_client_handle *och)
450 struct ptlrpc_request *req = it->d.lustre.it_data;
451 struct mdt_body *body;
453 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
454 och->och_fh = body->mbo_handle;
455 och->och_fid = body->mbo_fid1;
456 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
457 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
458 och->och_flags = it->it_flags;
460 return md_set_open_replay_data(md_exp, och, it);
463 static int ll_local_open(struct file *file, struct lookup_intent *it,
464 struct ll_file_data *fd, struct obd_client_handle *och)
466 struct inode *inode = file->f_dentry->d_inode;
469 LASSERT(!LUSTRE_FPRIVATE(file));
476 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
481 LUSTRE_FPRIVATE(file) = fd;
482 ll_readahead_init(inode, &fd->fd_ras);
483 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
485 /* ll_cl_context initialize */
486 rwlock_init(&fd->fd_lock);
487 INIT_LIST_HEAD(&fd->fd_lccs);
492 /* Open a file, and (for the very first open) create objects on the OSTs at
493 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
494 * creation or open until ll_lov_setstripe() ioctl is called.
496 * If we already have the stripe MD locally then we don't request it in
497 * md_open(), by passing a lmm_size = 0.
499 * It is up to the application to ensure no other processes open this file
500 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
501 * used. We might be able to avoid races of that sort by getting lli_open_sem
502 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
503 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
505 int ll_file_open(struct inode *inode, struct file *file)
507 struct ll_inode_info *lli = ll_i2info(inode);
508 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
509 .it_flags = file->f_flags };
510 struct obd_client_handle **och_p = NULL;
511 __u64 *och_usecount = NULL;
512 struct ll_file_data *fd;
516 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
517 PFID(ll_inode2fid(inode)), inode, file->f_flags);
519 it = file->private_data; /* XXX: compat macro */
520 file->private_data = NULL; /* prevent ll_local_open assertion */
522 fd = ll_file_data_get();
524 GOTO(out_openerr, rc = -ENOMEM);
527 if (S_ISDIR(inode->i_mode))
528 ll_authorize_statahead(inode, fd);
530 if (inode->i_sb->s_root == file->f_dentry) {
531 LUSTRE_FPRIVATE(file) = fd;
535 if (!it || !it->d.lustre.it_disposition) {
536 /* Convert f_flags into access mode. We cannot use file->f_mode,
537 * because everything but O_ACCMODE mask was stripped from
539 if ((oit.it_flags + 1) & O_ACCMODE)
541 if (file->f_flags & O_TRUNC)
542 oit.it_flags |= FMODE_WRITE;
544 /* kernel only call f_op->open in dentry_open. filp_open calls
545 * dentry_open after call to open_namei that checks permissions.
546 * Only nfsd_open call dentry_open directly without checking
547 * permissions and because of that this code below is safe. */
548 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
549 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
551 /* We do not want O_EXCL here, presumably we opened the file
552 * already? XXX - NFS implications? */
553 oit.it_flags &= ~O_EXCL;
555 /* bug20584, if "it_flags" contains O_CREAT, the file will be
556 * created if necessary, then "IT_CREAT" should be set to keep
557 * consistent with it */
558 if (oit.it_flags & O_CREAT)
559 oit.it_op |= IT_CREAT;
565 /* Let's see if we have file open on MDS already. */
566 if (it->it_flags & FMODE_WRITE) {
567 och_p = &lli->lli_mds_write_och;
568 och_usecount = &lli->lli_open_fd_write_count;
569 } else if (it->it_flags & FMODE_EXEC) {
570 och_p = &lli->lli_mds_exec_och;
571 och_usecount = &lli->lli_open_fd_exec_count;
573 och_p = &lli->lli_mds_read_och;
574 och_usecount = &lli->lli_open_fd_read_count;
577 mutex_lock(&lli->lli_och_mutex);
578 if (*och_p) { /* Open handle is present */
579 if (it_disposition(it, DISP_OPEN_OPEN)) {
580 /* Well, there's extra open request that we do not need,
581 let's close it somehow. This will decref request. */
582 rc = it_open_error(DISP_OPEN_OPEN, it);
584 mutex_unlock(&lli->lli_och_mutex);
585 GOTO(out_openerr, rc);
588 ll_release_openhandle(file->f_dentry, it);
592 rc = ll_local_open(file, it, fd, NULL);
595 mutex_unlock(&lli->lli_och_mutex);
596 GOTO(out_openerr, rc);
599 LASSERT(*och_usecount == 0);
600 if (!it->d.lustre.it_disposition) {
601 /* We cannot just request lock handle now, new ELC code
602 means that one of other OPEN locks for this file
603 could be cancelled, and since blocking ast handler
604 would attempt to grab och_mutex as well, that would
605 result in a deadlock */
606 mutex_unlock(&lli->lli_och_mutex);
608 * Normally called under two situations:
610 * 2. A race/condition on MDS resulting in no open
611 * handle to be returned from LOOKUP|OPEN request,
612 * for example if the target entry was a symlink.
614 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
616 * Always specify MDS_OPEN_BY_FID because we don't want
617 * to get file with different fid.
619 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
620 rc = ll_intent_file_open(file, NULL, 0, it);
622 GOTO(out_openerr, rc);
626 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
628 GOTO(out_och_free, rc = -ENOMEM);
632 /* md_intent_lock() didn't get a request ref if there was an
633 * open error, so don't do cleanup on the request here
635 /* XXX (green): Should not we bail out on any error here, not
636 * just open error? */
637 rc = it_open_error(DISP_OPEN_OPEN, it);
639 GOTO(out_och_free, rc);
641 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
642 "inode %p: disposition %x, status %d\n", inode,
643 it_disposition(it, ~0), it->d.lustre.it_status);
645 rc = ll_local_open(file, it, fd, *och_p);
647 GOTO(out_och_free, rc);
649 mutex_unlock(&lli->lli_och_mutex);
652 /* Must do this outside lli_och_mutex lock to prevent deadlock where
653 different kind of OPEN lock for this same inode gets cancelled
654 by ldlm_cancel_lru */
655 if (!S_ISREG(inode->i_mode))
656 GOTO(out_och_free, rc);
660 cl_lov_delay_create_clear(&file->f_flags);
661 GOTO(out_och_free, rc);
665 if (och_p && *och_p) {
666 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
667 *och_p = NULL; /* OBD_FREE writes some magic there */
670 mutex_unlock(&lli->lli_och_mutex);
673 if (lli->lli_opendir_key == fd)
674 ll_deauthorize_statahead(inode, fd);
676 ll_file_data_put(fd);
678 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
681 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
682 ptlrpc_req_finished(it->d.lustre.it_data);
683 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
689 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
690 struct ldlm_lock_desc *desc, void *data, int flag)
693 struct lustre_handle lockh;
697 case LDLM_CB_BLOCKING:
698 ldlm_lock2handle(lock, &lockh);
699 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
701 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
705 case LDLM_CB_CANCELING:
713 * Acquire a lease and open the file.
715 static struct obd_client_handle *
716 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
719 struct lookup_intent it = { .it_op = IT_OPEN };
720 struct ll_sb_info *sbi = ll_i2sbi(inode);
721 struct md_op_data *op_data;
722 struct ptlrpc_request *req = NULL;
723 struct lustre_handle old_handle = { 0 };
724 struct obd_client_handle *och = NULL;
729 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
730 RETURN(ERR_PTR(-EINVAL));
733 struct ll_inode_info *lli = ll_i2info(inode);
734 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
735 struct obd_client_handle **och_p;
738 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
739 RETURN(ERR_PTR(-EPERM));
741 /* Get the openhandle of the file */
743 mutex_lock(&lli->lli_och_mutex);
744 if (fd->fd_lease_och != NULL) {
745 mutex_unlock(&lli->lli_och_mutex);
749 if (fd->fd_och == NULL) {
750 if (file->f_mode & FMODE_WRITE) {
751 LASSERT(lli->lli_mds_write_och != NULL);
752 och_p = &lli->lli_mds_write_och;
753 och_usecount = &lli->lli_open_fd_write_count;
755 LASSERT(lli->lli_mds_read_och != NULL);
756 och_p = &lli->lli_mds_read_och;
757 och_usecount = &lli->lli_open_fd_read_count;
759 if (*och_usecount == 1) {
766 mutex_unlock(&lli->lli_och_mutex);
767 if (rc < 0) /* more than 1 opener */
770 LASSERT(fd->fd_och != NULL);
771 old_handle = fd->fd_och->och_fh;
776 RETURN(ERR_PTR(-ENOMEM));
778 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
779 LUSTRE_OPC_ANY, NULL);
781 GOTO(out, rc = PTR_ERR(op_data));
783 /* To tell the MDT this openhandle is from the same owner */
784 op_data->op_handle = old_handle;
786 it.it_flags = fmode | open_flags;
787 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
788 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
789 &ll_md_blocking_lease_ast,
790 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
791 * it can be cancelled which may mislead applications that the lease is
793 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
794 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
795 * doesn't deal with openhandle, so normal openhandle will be leaked. */
796 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
797 ll_finish_md_op_data(op_data);
798 ptlrpc_req_finished(req);
800 GOTO(out_release_it, rc);
802 if (it_disposition(&it, DISP_LOOKUP_NEG))
803 GOTO(out_release_it, rc = -ENOENT);
805 rc = it_open_error(DISP_OPEN_OPEN, &it);
807 GOTO(out_release_it, rc);
809 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
810 ll_och_fill(sbi->ll_md_exp, &it, och);
812 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
813 GOTO(out_close, rc = -EOPNOTSUPP);
815 /* already get lease, handle lease lock */
816 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
817 if (it.d.lustre.it_lock_mode == 0 ||
818 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
819 /* open lock must return for lease */
820 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
821 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
822 it.d.lustre.it_lock_bits);
823 GOTO(out_close, rc = -EPROTO);
826 ll_intent_release(&it);
830 /* Cancel open lock */
831 if (it.d.lustre.it_lock_mode != 0) {
832 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
833 it.d.lustre.it_lock_mode);
834 it.d.lustre.it_lock_mode = 0;
835 och->och_lease_handle.cookie = 0ULL;
837 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
839 CERROR("%s: error closing file "DFID": %d\n",
840 ll_get_fsname(inode->i_sb, NULL, 0),
841 PFID(&ll_i2info(inode)->lli_fid), rc2);
842 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
844 ll_intent_release(&it);
852 * Release lease and close the file.
853 * It will check if the lease has ever broken.
855 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
858 struct ldlm_lock *lock;
859 bool cancelled = true;
863 lock = ldlm_handle2lock(&och->och_lease_handle);
865 lock_res_and_lock(lock);
866 cancelled = ldlm_is_cancel(lock);
867 unlock_res_and_lock(lock);
871 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
872 PFID(&ll_i2info(inode)->lli_fid), cancelled);
875 ldlm_cli_cancel(&och->och_lease_handle, 0);
876 if (lease_broken != NULL)
877 *lease_broken = cancelled;
879 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
884 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
886 struct ll_inode_info *lli = ll_i2info(inode);
887 struct cl_object *obj = lli->lli_clob;
888 struct cl_attr *attr = vvp_env_thread_attr(env);
896 ll_inode_size_lock(inode);
898 /* merge timestamps the most recently obtained from mds with
899 timestamps obtained from osts */
900 LTIME_S(inode->i_atime) = lli->lli_atime;
901 LTIME_S(inode->i_mtime) = lli->lli_mtime;
902 LTIME_S(inode->i_ctime) = lli->lli_ctime;
904 atime = LTIME_S(inode->i_atime);
905 mtime = LTIME_S(inode->i_mtime);
906 ctime = LTIME_S(inode->i_ctime);
908 cl_object_attr_lock(obj);
909 rc = cl_object_attr_get(env, obj, attr);
910 cl_object_attr_unlock(obj);
913 GOTO(out_size_unlock, rc);
915 if (atime < attr->cat_atime)
916 atime = attr->cat_atime;
918 if (ctime < attr->cat_ctime)
919 ctime = attr->cat_ctime;
921 if (mtime < attr->cat_mtime)
922 mtime = attr->cat_mtime;
924 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
925 PFID(&lli->lli_fid), attr->cat_size);
927 i_size_write(inode, attr->cat_size);
928 inode->i_blocks = attr->cat_blocks;
930 LTIME_S(inode->i_atime) = atime;
931 LTIME_S(inode->i_mtime) = mtime;
932 LTIME_S(inode->i_ctime) = ctime;
935 ll_inode_size_unlock(inode);
940 static bool file_is_noatime(const struct file *file)
942 const struct vfsmount *mnt = file->f_path.mnt;
943 const struct inode *inode = file->f_path.dentry->d_inode;
945 /* Adapted from file_accessed() and touch_atime().*/
946 if (file->f_flags & O_NOATIME)
949 if (inode->i_flags & S_NOATIME)
952 if (IS_NOATIME(inode))
955 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
958 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
961 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
967 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
969 struct inode *inode = file->f_dentry->d_inode;
971 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
973 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
974 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
975 file->f_flags & O_DIRECT ||
978 io->ci_obj = ll_i2info(inode)->lli_clob;
979 io->ci_lockreq = CILR_MAYBE;
980 if (ll_file_nolock(file)) {
981 io->ci_lockreq = CILR_NEVER;
982 io->ci_no_srvlock = 1;
983 } else if (file->f_flags & O_APPEND) {
984 io->ci_lockreq = CILR_MANDATORY;
987 io->ci_noatime = file_is_noatime(file);
991 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
992 struct file *file, enum cl_io_type iot,
993 loff_t *ppos, size_t count)
995 struct vvp_io *vio = vvp_env_io(env);
996 struct inode *inode = file->f_dentry->d_inode;
997 struct ll_inode_info *lli = ll_i2info(inode);
998 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1002 struct range_lock range;
1006 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1007 file->f_dentry->d_name.name, iot, *ppos, count);
1010 io = vvp_env_thread_io(env);
1011 ll_io_init(io, file, iot == CIT_WRITE);
1013 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1014 bool range_locked = false;
1016 if (file->f_flags & O_APPEND)
1017 range_lock_init(&range, 0, LUSTRE_EOF);
1019 range_lock_init(&range, *ppos, *ppos + count - 1);
1021 vio->vui_fd = LUSTRE_FPRIVATE(file);
1022 vio->vui_io_subtype = args->via_io_subtype;
1024 switch (vio->vui_io_subtype) {
1026 vio->vui_iov = args->u.normal.via_iov;
1027 vio->vui_nrsegs = args->u.normal.via_nrsegs;
1028 vio->vui_tot_nrsegs = vio->vui_nrsegs;
1029 vio->vui_iocb = args->u.normal.via_iocb;
1030 /* Direct IO reads must also take range lock,
1031 * or multiple reads will try to work on the same pages
1032 * See LU-6227 for details. */
1033 if (((iot == CIT_WRITE) ||
1034 (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
1035 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1036 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1038 rc = range_lock(&lli->lli_write_tree, &range);
1042 range_locked = true;
1044 down_read(&lli->lli_trunc_sem);
1047 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1048 vio->u.splice.vui_flags = args->u.splice.via_flags;
1051 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1055 ll_cl_add(file, env, io);
1056 rc = cl_io_loop(env, io);
1057 ll_cl_remove(file, env);
1059 if (args->via_io_subtype == IO_NORMAL)
1060 up_read(&lli->lli_trunc_sem);
1062 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1064 range_unlock(&lli->lli_write_tree, &range);
1067 /* cl_io_rw_init() handled IO */
1071 if (io->ci_nob > 0) {
1072 result += io->ci_nob;
1073 count -= io->ci_nob;
1074 *ppos = io->u.ci_wr.wr.crw_pos; /* for splice */
1076 /* prepare IO restart */
1077 if (count > 0 && args->via_io_subtype == IO_NORMAL) {
1078 args->u.normal.via_iov = vio->vui_iov;
1079 args->u.normal.via_nrsegs = vio->vui_tot_nrsegs;
1084 cl_io_fini(env, io);
1086 if ((rc == 0 || rc == -ENODATA) && count > 0 && io->ci_need_restart) {
1088 "%s: restart %s from %lld, count:%zu, result: %zd\n",
1089 file->f_dentry->d_name.name,
1090 iot == CIT_READ ? "read" : "write",
1091 *ppos, count, result);
1095 if (iot == CIT_READ) {
1097 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1098 LPROC_LL_READ_BYTES, result);
1099 } else if (iot == CIT_WRITE) {
1101 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1102 LPROC_LL_WRITE_BYTES, result);
1103 fd->fd_write_failed = false;
1104 } else if (rc != -ERESTARTSYS) {
1105 fd->fd_write_failed = true;
1109 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1111 return result > 0 ? result : rc;
1115 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1117 static int ll_file_get_iov_count(const struct iovec *iov,
1118 unsigned long *nr_segs, size_t *count)
1123 for (seg = 0; seg < *nr_segs; seg++) {
1124 const struct iovec *iv = &iov[seg];
1127 * If any segment has a negative length, or the cumulative
1128 * length ever wraps negative then return -EINVAL.
1131 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1133 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1138 cnt -= iv->iov_len; /* This segment is no good */
1145 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1146 unsigned long nr_segs, loff_t pos)
1149 struct vvp_io_args *args;
1150 struct iovec *local_iov;
1156 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1160 env = cl_env_get(&refcheck);
1162 RETURN(PTR_ERR(env));
1165 local_iov = &ll_env_info(env)->lti_local_iov;
1168 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1169 if (local_iov == NULL) {
1170 cl_env_put(env, &refcheck);
1174 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1177 args = ll_env_args(env, IO_NORMAL);
1178 args->u.normal.via_iov = local_iov;
1179 args->u.normal.via_nrsegs = nr_segs;
1180 args->u.normal.via_iocb = iocb;
1182 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1183 &iocb->ki_pos, count);
1185 cl_env_put(env, &refcheck);
1188 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1193 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1197 struct iovec iov = { .iov_base = buf, .iov_len = count };
1198 struct kiocb *kiocb;
1203 env = cl_env_get(&refcheck);
1205 RETURN(PTR_ERR(env));
1207 kiocb = &ll_env_info(env)->lti_kiocb;
1208 init_sync_kiocb(kiocb, file);
1209 kiocb->ki_pos = *ppos;
1210 #ifdef HAVE_KIOCB_KI_LEFT
1211 kiocb->ki_left = count;
1213 kiocb->ki_nbytes = count;
1216 result = ll_file_aio_read(kiocb, &iov, 1, kiocb->ki_pos);
1217 *ppos = kiocb->ki_pos;
1219 cl_env_put(env, &refcheck);
1224 * Write to a file (through the page cache).
1227 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1228 unsigned long nr_segs, loff_t pos)
1231 struct vvp_io_args *args;
1232 struct iovec *local_iov;
1238 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1242 env = cl_env_get(&refcheck);
1244 RETURN(PTR_ERR(env));
1247 local_iov = &ll_env_info(env)->lti_local_iov;
1250 OBD_ALLOC(local_iov, sizeof(*iov) * nr_segs);
1251 if (local_iov == NULL) {
1252 cl_env_put(env, &refcheck);
1256 memcpy(local_iov, iov, sizeof(*iov) * nr_segs);
1259 args = ll_env_args(env, IO_NORMAL);
1260 args->u.normal.via_iov = local_iov;
1261 args->u.normal.via_nrsegs = nr_segs;
1262 args->u.normal.via_iocb = iocb;
1264 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1265 &iocb->ki_pos, count);
1266 cl_env_put(env, &refcheck);
1269 OBD_FREE(local_iov, sizeof(*iov) * nr_segs);
1274 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1275 size_t count, loff_t *ppos)
1278 struct iovec iov = { .iov_base = (void __user *)buf,
1280 struct kiocb *kiocb;
1285 env = cl_env_get(&refcheck);
1287 RETURN(PTR_ERR(env));
1289 kiocb = &ll_env_info(env)->lti_kiocb;
1290 init_sync_kiocb(kiocb, file);
1291 kiocb->ki_pos = *ppos;
1292 #ifdef HAVE_KIOCB_KI_LEFT
1293 kiocb->ki_left = count;
1295 kiocb->ki_nbytes = count;
1298 result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
1299 *ppos = kiocb->ki_pos;
1301 cl_env_put(env, &refcheck);
1306 * Send file content (through pagecache) somewhere with helper
1308 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1309 struct pipe_inode_info *pipe, size_t count,
1313 struct vvp_io_args *args;
1318 env = cl_env_get(&refcheck);
1320 RETURN(PTR_ERR(env));
1322 args = ll_env_args(env, IO_SPLICE);
1323 args->u.splice.via_pipe = pipe;
1324 args->u.splice.via_flags = flags;
1326 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1327 cl_env_put(env, &refcheck);
1331 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1332 __u64 flags, struct lov_user_md *lum,
1335 struct lookup_intent oit = {
1337 .it_flags = flags | MDS_OPEN_BY_FID,
1342 ll_inode_size_lock(inode);
1343 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1345 GOTO(out_unlock, rc);
1347 ll_release_openhandle(file->f_dentry, &oit);
1350 ll_inode_size_unlock(inode);
1351 ll_intent_release(&oit);
1352 cl_lov_delay_create_clear(&file->f_flags);
1357 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1358 struct lov_mds_md **lmmp, int *lmm_size,
1359 struct ptlrpc_request **request)
1361 struct ll_sb_info *sbi = ll_i2sbi(inode);
1362 struct mdt_body *body;
1363 struct lov_mds_md *lmm = NULL;
1364 struct ptlrpc_request *req = NULL;
1365 struct md_op_data *op_data;
1368 rc = ll_get_default_mdsize(sbi, &lmmsize);
1372 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1373 strlen(filename), lmmsize,
1374 LUSTRE_OPC_ANY, NULL);
1375 if (IS_ERR(op_data))
1376 RETURN(PTR_ERR(op_data));
1378 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1379 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1380 ll_finish_md_op_data(op_data);
1382 CDEBUG(D_INFO, "md_getattr_name failed "
1383 "on %s: rc %d\n", filename, rc);
1387 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1388 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1390 lmmsize = body->mbo_eadatasize;
1392 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1394 GOTO(out, rc = -ENODATA);
1397 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1398 LASSERT(lmm != NULL);
1400 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1401 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1402 GOTO(out, rc = -EPROTO);
1406 * This is coming from the MDS, so is probably in
1407 * little endian. We convert it to host endian before
1408 * passing it to userspace.
1410 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1413 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1414 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1417 /* if function called for directory - we should
1418 * avoid swab not existent lsm objects */
1419 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1420 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1421 if (S_ISREG(body->mbo_mode))
1422 lustre_swab_lov_user_md_objects(
1423 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1425 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1426 lustre_swab_lov_user_md_v3(
1427 (struct lov_user_md_v3 *)lmm);
1428 if (S_ISREG(body->mbo_mode))
1429 lustre_swab_lov_user_md_objects(
1430 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1437 *lmm_size = lmmsize;
1442 static int ll_lov_setea(struct inode *inode, struct file *file,
1445 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1446 struct lov_user_md *lump;
1447 int lum_size = sizeof(struct lov_user_md) +
1448 sizeof(struct lov_user_ost_data);
1452 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1455 OBD_ALLOC_LARGE(lump, lum_size);
1459 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1460 OBD_FREE_LARGE(lump, lum_size);
1464 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1466 OBD_FREE_LARGE(lump, lum_size);
1470 static int ll_file_getstripe(struct inode *inode,
1471 struct lov_user_md __user *lum)
1478 env = cl_env_get(&refcheck);
1480 RETURN(PTR_ERR(env));
1482 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1483 cl_env_put(env, &refcheck);
1487 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1490 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1491 struct lov_user_md *klum;
1493 __u64 flags = FMODE_WRITE;
1496 rc = ll_copy_user_md(lum, &klum);
1501 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1505 put_user(0, &lum->lmm_stripe_count);
1507 ll_layout_refresh(inode, &gen);
1508 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1511 OBD_FREE(klum, lum_size);
1516 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1518 struct ll_inode_info *lli = ll_i2info(inode);
1519 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1520 struct ll_grouplock grouplock;
1525 CWARN("group id for group lock must not be 0\n");
1529 if (ll_file_nolock(file))
1530 RETURN(-EOPNOTSUPP);
1532 spin_lock(&lli->lli_lock);
1533 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1534 CWARN("group lock already existed with gid %lu\n",
1535 fd->fd_grouplock.lg_gid);
1536 spin_unlock(&lli->lli_lock);
1539 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1540 spin_unlock(&lli->lli_lock);
1542 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1543 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1547 spin_lock(&lli->lli_lock);
1548 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1549 spin_unlock(&lli->lli_lock);
1550 CERROR("another thread just won the race\n");
1551 cl_put_grouplock(&grouplock);
1555 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1556 fd->fd_grouplock = grouplock;
1557 spin_unlock(&lli->lli_lock);
1559 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1563 static int ll_put_grouplock(struct inode *inode, struct file *file,
1566 struct ll_inode_info *lli = ll_i2info(inode);
1567 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1568 struct ll_grouplock grouplock;
1571 spin_lock(&lli->lli_lock);
1572 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1573 spin_unlock(&lli->lli_lock);
1574 CWARN("no group lock held\n");
1578 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1580 if (fd->fd_grouplock.lg_gid != arg) {
1581 CWARN("group lock %lu doesn't match current id %lu\n",
1582 arg, fd->fd_grouplock.lg_gid);
1583 spin_unlock(&lli->lli_lock);
1587 grouplock = fd->fd_grouplock;
1588 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1589 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1590 spin_unlock(&lli->lli_lock);
1592 cl_put_grouplock(&grouplock);
1593 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1598 * Close inode open handle
1600 * \param dentry [in] dentry which contains the inode
1601 * \param it [in,out] intent which contains open info and result
1604 * \retval <0 failure
1606 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1608 struct inode *inode = dentry->d_inode;
1609 struct obd_client_handle *och;
1615 /* Root ? Do nothing. */
1616 if (dentry->d_inode->i_sb->s_root == dentry)
1619 /* No open handle to close? Move away */
1620 if (!it_disposition(it, DISP_OPEN_OPEN))
1623 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1625 OBD_ALLOC(och, sizeof(*och));
1627 GOTO(out, rc = -ENOMEM);
1629 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1631 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1634 /* this one is in place of ll_file_open */
1635 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1636 ptlrpc_req_finished(it->d.lustre.it_data);
1637 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1643 * Get size for inode for which FIEMAP mapping is requested.
1644 * Make the FIEMAP get_info call and returns the result.
1645 * \param fiemap kernel buffer to hold extens
1646 * \param num_bytes kernel buffer size
1648 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1654 struct ll_fiemap_info_key fmkey = { .name = KEY_FIEMAP, };
1657 /* Checks for fiemap flags */
1658 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1659 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1663 /* Check for FIEMAP_FLAG_SYNC */
1664 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1665 rc = filemap_fdatawrite(inode->i_mapping);
1670 env = cl_env_get(&refcheck);
1672 RETURN(PTR_ERR(env));
1674 if (i_size_read(inode) == 0) {
1675 rc = ll_glimpse_size(inode);
1680 fmkey.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1681 obdo_from_inode(&fmkey.oa, inode, OBD_MD_FLSIZE);
1682 obdo_set_parent_fid(&fmkey.oa, &ll_i2info(inode)->lli_fid);
1684 /* If filesize is 0, then there would be no objects for mapping */
1685 if (fmkey.oa.o_size == 0) {
1686 fiemap->fm_mapped_extents = 0;
1690 fmkey.fiemap = *fiemap;
1692 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1693 &fmkey, fiemap, &num_bytes);
1695 cl_env_put(env, &refcheck);
1699 int ll_fid2path(struct inode *inode, void __user *arg)
1701 struct obd_export *exp = ll_i2mdexp(inode);
1702 const struct getinfo_fid2path __user *gfin = arg;
1704 struct getinfo_fid2path *gfout;
1710 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1711 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1714 /* Only need to get the buflen */
1715 if (get_user(pathlen, &gfin->gf_pathlen))
1718 if (pathlen > PATH_MAX)
1721 outsize = sizeof(*gfout) + pathlen;
1722 OBD_ALLOC(gfout, outsize);
1726 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1727 GOTO(gf_free, rc = -EFAULT);
1729 /* Call mdc_iocontrol */
1730 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1734 if (copy_to_user(arg, gfout, outsize))
1738 OBD_FREE(gfout, outsize);
1742 static int ll_ioctl_fiemap(struct inode *inode, struct fiemap __user *arg)
1744 struct fiemap *fiemap;
1750 /* Get the extent count so we can calculate the size of
1751 * required fiemap buffer */
1752 if (get_user(extent_count, &arg->fm_extent_count))
1756 (SIZE_MAX - sizeof(*fiemap)) / sizeof(struct ll_fiemap_extent))
1758 num_bytes = sizeof(*fiemap) + (extent_count *
1759 sizeof(struct ll_fiemap_extent));
1761 OBD_ALLOC_LARGE(fiemap, num_bytes);
1765 /* get the fiemap value */
1766 if (copy_from_user(fiemap, arg, sizeof(*fiemap)))
1767 GOTO(error, rc = -EFAULT);
1769 /* If fm_extent_count is non-zero, read the first extent since
1770 * it is used to calculate end_offset and device from previous
1772 if (extent_count != 0) {
1773 if (copy_from_user(&fiemap->fm_extents[0],
1774 (char __user *)arg + sizeof(*fiemap),
1775 sizeof(struct ll_fiemap_extent)))
1776 GOTO(error, rc = -EFAULT);
1779 rc = ll_do_fiemap(inode, fiemap, num_bytes);
1783 ret_bytes = sizeof(struct fiemap);
1785 if (extent_count != 0)
1786 ret_bytes += (fiemap->fm_mapped_extents *
1787 sizeof(struct ll_fiemap_extent));
1789 if (copy_to_user((void __user *)arg, fiemap, ret_bytes))
1793 OBD_FREE_LARGE(fiemap, num_bytes);
1798 * Read the data_version for inode.
1800 * This value is computed using stripe object version on OST.
1801 * Version is computed using server side locking.
1803 * @param flags if do sync on the OST side;
1805 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1806 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1808 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1810 struct cl_object *obj = ll_i2info(inode)->lli_clob;
1818 /* If no file object initialized, we consider its version is 0. */
1824 env = cl_env_get(&refcheck);
1826 RETURN(PTR_ERR(env));
1828 io = vvp_env_thread_io(env);
1830 io->u.ci_data_version.dv_data_version = 0;
1831 io->u.ci_data_version.dv_flags = flags;
1834 if (cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj) == 0)
1835 result = cl_io_loop(env, io);
1837 result = io->ci_result;
1839 *data_version = io->u.ci_data_version.dv_data_version;
1841 cl_io_fini(env, io);
1843 if (unlikely(io->ci_need_restart))
1846 cl_env_put(env, &refcheck);
1852 * Trigger a HSM release request for the provided inode.
1854 int ll_hsm_release(struct inode *inode)
1856 struct cl_env_nest nest;
1858 struct obd_client_handle *och = NULL;
1859 __u64 data_version = 0;
1863 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1864 ll_get_fsname(inode->i_sb, NULL, 0),
1865 PFID(&ll_i2info(inode)->lli_fid));
1867 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1869 GOTO(out, rc = PTR_ERR(och));
1871 /* Grab latest data_version and [am]time values */
1872 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1876 env = cl_env_nested_get(&nest);
1878 GOTO(out, rc = PTR_ERR(env));
1880 ll_merge_attr(env, inode);
1881 cl_env_nested_put(&nest, env);
1883 /* Release the file.
1884 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1885 * we still need it to pack l_remote_handle to MDT. */
1886 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1892 if (och != NULL && !IS_ERR(och)) /* close the file */
1893 ll_lease_close(och, inode, NULL);
1898 struct ll_swap_stack {
1899 struct iattr ia1, ia2;
1901 struct inode *inode1, *inode2;
1902 bool check_dv1, check_dv2;
1905 static int ll_swap_layouts(struct file *file1, struct file *file2,
1906 struct lustre_swap_layouts *lsl)
1908 struct mdc_swap_layouts msl;
1909 struct md_op_data *op_data;
1912 struct ll_swap_stack *llss = NULL;
1915 OBD_ALLOC_PTR(llss);
1919 llss->inode1 = file1->f_dentry->d_inode;
1920 llss->inode2 = file2->f_dentry->d_inode;
1922 if (!S_ISREG(llss->inode2->i_mode))
1923 GOTO(free, rc = -EINVAL);
1925 if (inode_permission(llss->inode1, MAY_WRITE) ||
1926 inode_permission(llss->inode2, MAY_WRITE))
1927 GOTO(free, rc = -EPERM);
1929 if (llss->inode2->i_sb != llss->inode1->i_sb)
1930 GOTO(free, rc = -EXDEV);
1932 /* we use 2 bool because it is easier to swap than 2 bits */
1933 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1934 llss->check_dv1 = true;
1936 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1937 llss->check_dv2 = true;
1939 /* we cannot use lsl->sl_dvX directly because we may swap them */
1940 llss->dv1 = lsl->sl_dv1;
1941 llss->dv2 = lsl->sl_dv2;
1943 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1944 if (rc == 0) /* same file, done! */
1947 if (rc < 0) { /* sequentialize it */
1948 swap(llss->inode1, llss->inode2);
1950 swap(llss->dv1, llss->dv2);
1951 swap(llss->check_dv1, llss->check_dv2);
1955 if (gid != 0) { /* application asks to flush dirty cache */
1956 rc = ll_get_grouplock(llss->inode1, file1, gid);
1960 rc = ll_get_grouplock(llss->inode2, file2, gid);
1962 ll_put_grouplock(llss->inode1, file1, gid);
1967 /* to be able to restore mtime and atime after swap
1968 * we need to first save them */
1970 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
1971 llss->ia1.ia_mtime = llss->inode1->i_mtime;
1972 llss->ia1.ia_atime = llss->inode1->i_atime;
1973 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
1974 llss->ia2.ia_mtime = llss->inode2->i_mtime;
1975 llss->ia2.ia_atime = llss->inode2->i_atime;
1976 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
1979 /* ultimate check, before swaping the layouts we check if
1980 * dataversion has changed (if requested) */
1981 if (llss->check_dv1) {
1982 rc = ll_data_version(llss->inode1, &dv, 0);
1985 if (dv != llss->dv1)
1986 GOTO(putgl, rc = -EAGAIN);
1989 if (llss->check_dv2) {
1990 rc = ll_data_version(llss->inode2, &dv, 0);
1993 if (dv != llss->dv2)
1994 GOTO(putgl, rc = -EAGAIN);
1997 /* struct md_op_data is used to send the swap args to the mdt
1998 * only flags is missing, so we use struct mdc_swap_layouts
1999 * through the md_op_data->op_data */
2000 /* flags from user space have to be converted before they are send to
2001 * server, no flag is sent today, they are only used on the client */
2004 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2005 0, LUSTRE_OPC_ANY, &msl);
2006 if (IS_ERR(op_data))
2007 GOTO(free, rc = PTR_ERR(op_data));
2009 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2010 sizeof(*op_data), op_data, NULL);
2011 ll_finish_md_op_data(op_data);
2015 ll_put_grouplock(llss->inode2, file2, gid);
2016 ll_put_grouplock(llss->inode1, file1, gid);
2019 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2023 /* clear useless flags */
2024 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2025 llss->ia1.ia_valid &= ~ATTR_MTIME;
2026 llss->ia2.ia_valid &= ~ATTR_MTIME;
2029 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2030 llss->ia1.ia_valid &= ~ATTR_ATIME;
2031 llss->ia2.ia_valid &= ~ATTR_ATIME;
2034 /* update time if requested */
2036 if (llss->ia2.ia_valid != 0) {
2037 mutex_lock(&llss->inode1->i_mutex);
2038 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2039 mutex_unlock(&llss->inode1->i_mutex);
2042 if (llss->ia1.ia_valid != 0) {
2045 mutex_lock(&llss->inode2->i_mutex);
2046 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2047 mutex_unlock(&llss->inode2->i_mutex);
2059 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2061 struct md_op_data *op_data;
2065 /* Detect out-of range masks */
2066 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2069 /* Non-root users are forbidden to set or clear flags which are
2070 * NOT defined in HSM_USER_MASK. */
2071 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2072 !cfs_capable(CFS_CAP_SYS_ADMIN))
2075 /* Detect out-of range archive id */
2076 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2077 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2080 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2081 LUSTRE_OPC_ANY, hss);
2082 if (IS_ERR(op_data))
2083 RETURN(PTR_ERR(op_data));
2085 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2086 sizeof(*op_data), op_data, NULL);
2088 ll_finish_md_op_data(op_data);
2093 static int ll_hsm_import(struct inode *inode, struct file *file,
2094 struct hsm_user_import *hui)
2096 struct hsm_state_set *hss = NULL;
2097 struct iattr *attr = NULL;
2101 if (!S_ISREG(inode->i_mode))
2107 GOTO(out, rc = -ENOMEM);
2109 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2110 hss->hss_archive_id = hui->hui_archive_id;
2111 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2112 rc = ll_hsm_state_set(inode, hss);
2116 OBD_ALLOC_PTR(attr);
2118 GOTO(out, rc = -ENOMEM);
2120 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2121 attr->ia_mode |= S_IFREG;
2122 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2123 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2124 attr->ia_size = hui->hui_size;
2125 attr->ia_mtime.tv_sec = hui->hui_mtime;
2126 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2127 attr->ia_atime.tv_sec = hui->hui_atime;
2128 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2130 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2131 ATTR_UID | ATTR_GID |
2132 ATTR_MTIME | ATTR_MTIME_SET |
2133 ATTR_ATIME | ATTR_ATIME_SET;
2135 mutex_lock(&inode->i_mutex);
2137 rc = ll_setattr_raw(file->f_dentry, attr, true);
2141 mutex_unlock(&inode->i_mutex);
2153 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2155 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2156 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2160 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2162 struct inode *inode = file->f_dentry->d_inode;
2163 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2167 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2168 PFID(ll_inode2fid(inode)), inode, cmd);
2169 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2171 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2172 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2176 case LL_IOC_GETFLAGS:
2177 /* Get the current value of the file flags */
2178 return put_user(fd->fd_flags, (int __user *)arg);
2179 case LL_IOC_SETFLAGS:
2180 case LL_IOC_CLRFLAGS:
2181 /* Set or clear specific file flags */
2182 /* XXX This probably needs checks to ensure the flags are
2183 * not abused, and to handle any flag side effects.
2185 if (get_user(flags, (int __user *) arg))
2188 if (cmd == LL_IOC_SETFLAGS) {
2189 if ((flags & LL_FILE_IGNORE_LOCK) &&
2190 !(file->f_flags & O_DIRECT)) {
2191 CERROR("%s: unable to disable locking on "
2192 "non-O_DIRECT file\n", current->comm);
2196 fd->fd_flags |= flags;
2198 fd->fd_flags &= ~flags;
2201 case LL_IOC_LOV_SETSTRIPE:
2202 RETURN(ll_lov_setstripe(inode, file, arg));
2203 case LL_IOC_LOV_SETEA:
2204 RETURN(ll_lov_setea(inode, file, arg));
2205 case LL_IOC_LOV_SWAP_LAYOUTS: {
2207 struct lustre_swap_layouts lsl;
2209 if (copy_from_user(&lsl, (char __user *)arg,
2210 sizeof(struct lustre_swap_layouts)))
2213 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2216 file2 = fget(lsl.sl_fd);
2221 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2222 rc = ll_swap_layouts(file, file2, &lsl);
2226 case LL_IOC_LOV_GETSTRIPE:
2227 RETURN(ll_file_getstripe(inode,
2228 (struct lov_user_md __user *)arg));
2229 case FSFILT_IOC_FIEMAP:
2230 RETURN(ll_ioctl_fiemap(inode, (struct fiemap __user *)arg));
2231 case FSFILT_IOC_GETFLAGS:
2232 case FSFILT_IOC_SETFLAGS:
2233 RETURN(ll_iocontrol(inode, file, cmd, arg));
2234 case FSFILT_IOC_GETVERSION_OLD:
2235 case FSFILT_IOC_GETVERSION:
2236 RETURN(put_user(inode->i_generation, (int __user *)arg));
2237 case LL_IOC_GROUP_LOCK:
2238 RETURN(ll_get_grouplock(inode, file, arg));
2239 case LL_IOC_GROUP_UNLOCK:
2240 RETURN(ll_put_grouplock(inode, file, arg));
2241 case IOC_OBD_STATFS:
2242 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2244 /* We need to special case any other ioctls we want to handle,
2245 * to send them to the MDS/OST as appropriate and to properly
2246 * network encode the arg field.
2247 case FSFILT_IOC_SETVERSION_OLD:
2248 case FSFILT_IOC_SETVERSION:
2250 case LL_IOC_FLUSHCTX:
2251 RETURN(ll_flush_ctx(inode));
2252 case LL_IOC_PATH2FID: {
2253 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2254 sizeof(struct lu_fid)))
2259 case LL_IOC_GETPARENT:
2260 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2262 case OBD_IOC_FID2PATH:
2263 RETURN(ll_fid2path(inode, (void __user *)arg));
2264 case LL_IOC_DATA_VERSION: {
2265 struct ioc_data_version idv;
2268 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2271 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2272 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2275 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2281 case LL_IOC_GET_MDTIDX: {
2284 mdtidx = ll_get_mdt_idx(inode);
2288 if (put_user((int)mdtidx, (int __user *)arg))
2293 case OBD_IOC_GETDTNAME:
2294 case OBD_IOC_GETMDNAME:
2295 RETURN(ll_get_obd_name(inode, cmd, arg));
2296 case LL_IOC_HSM_STATE_GET: {
2297 struct md_op_data *op_data;
2298 struct hsm_user_state *hus;
2305 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2306 LUSTRE_OPC_ANY, hus);
2307 if (IS_ERR(op_data)) {
2309 RETURN(PTR_ERR(op_data));
2312 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2315 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2318 ll_finish_md_op_data(op_data);
2322 case LL_IOC_HSM_STATE_SET: {
2323 struct hsm_state_set *hss;
2330 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2335 rc = ll_hsm_state_set(inode, hss);
2340 case LL_IOC_HSM_ACTION: {
2341 struct md_op_data *op_data;
2342 struct hsm_current_action *hca;
2349 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2350 LUSTRE_OPC_ANY, hca);
2351 if (IS_ERR(op_data)) {
2353 RETURN(PTR_ERR(op_data));
2356 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2359 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2362 ll_finish_md_op_data(op_data);
2366 case LL_IOC_SET_LEASE: {
2367 struct ll_inode_info *lli = ll_i2info(inode);
2368 struct obd_client_handle *och = NULL;
2373 case LL_LEASE_WRLCK:
2374 if (!(file->f_mode & FMODE_WRITE))
2376 fmode = FMODE_WRITE;
2378 case LL_LEASE_RDLCK:
2379 if (!(file->f_mode & FMODE_READ))
2383 case LL_LEASE_UNLCK:
2384 mutex_lock(&lli->lli_och_mutex);
2385 if (fd->fd_lease_och != NULL) {
2386 och = fd->fd_lease_och;
2387 fd->fd_lease_och = NULL;
2389 mutex_unlock(&lli->lli_och_mutex);
2394 fmode = och->och_flags;
2395 rc = ll_lease_close(och, inode, &lease_broken);
2402 RETURN(ll_lease_type_from_fmode(fmode));
2407 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2409 /* apply for lease */
2410 och = ll_lease_open(inode, file, fmode, 0);
2412 RETURN(PTR_ERR(och));
2415 mutex_lock(&lli->lli_och_mutex);
2416 if (fd->fd_lease_och == NULL) {
2417 fd->fd_lease_och = och;
2420 mutex_unlock(&lli->lli_och_mutex);
2422 /* impossible now that only excl is supported for now */
2423 ll_lease_close(och, inode, &lease_broken);
2428 case LL_IOC_GET_LEASE: {
2429 struct ll_inode_info *lli = ll_i2info(inode);
2430 struct ldlm_lock *lock = NULL;
2433 mutex_lock(&lli->lli_och_mutex);
2434 if (fd->fd_lease_och != NULL) {
2435 struct obd_client_handle *och = fd->fd_lease_och;
2437 lock = ldlm_handle2lock(&och->och_lease_handle);
2439 lock_res_and_lock(lock);
2440 if (!ldlm_is_cancel(lock))
2441 fmode = och->och_flags;
2443 unlock_res_and_lock(lock);
2444 LDLM_LOCK_PUT(lock);
2447 mutex_unlock(&lli->lli_och_mutex);
2449 RETURN(ll_lease_type_from_fmode(fmode));
2451 case LL_IOC_HSM_IMPORT: {
2452 struct hsm_user_import *hui;
2458 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2463 rc = ll_hsm_import(inode, file, hui);
2473 ll_iocontrol_call(inode, file, cmd, arg, &err))
2476 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2477 (void __user *)arg));
2482 #ifndef HAVE_FILE_LLSEEK_SIZE
2483 static inline loff_t
2484 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2486 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2488 if (offset > maxsize)
2491 if (offset != file->f_pos) {
2492 file->f_pos = offset;
2493 file->f_version = 0;
2499 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2500 loff_t maxsize, loff_t eof)
2502 struct inode *inode = file->f_dentry->d_inode;
2510 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2511 * position-querying operation. Avoid rewriting the "same"
2512 * f_pos value back to the file because a concurrent read(),
2513 * write() or lseek() might have altered it
2518 * f_lock protects against read/modify/write race with other
2519 * SEEK_CURs. Note that parallel writes and reads behave
2522 mutex_lock(&inode->i_mutex);
2523 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2524 mutex_unlock(&inode->i_mutex);
2528 * In the generic case the entire file is data, so as long as
2529 * offset isn't at the end of the file then the offset is data.
2536 * There is a virtual hole at the end of the file, so as long as
2537 * offset isn't i_size or larger, return i_size.
2545 return llseek_execute(file, offset, maxsize);
2549 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2551 struct inode *inode = file->f_dentry->d_inode;
2552 loff_t retval, eof = 0;
2555 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2556 (origin == SEEK_CUR) ? file->f_pos : 0);
2557 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2558 PFID(ll_inode2fid(inode)), inode, retval, retval,
2560 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2562 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2563 retval = ll_glimpse_size(inode);
2566 eof = i_size_read(inode);
2569 retval = ll_generic_file_llseek_size(file, offset, origin,
2570 ll_file_maxbytes(inode), eof);
2574 static int ll_flush(struct file *file, fl_owner_t id)
2576 struct inode *inode = file->f_dentry->d_inode;
2577 struct ll_inode_info *lli = ll_i2info(inode);
2578 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2581 LASSERT(!S_ISDIR(inode->i_mode));
2583 /* catch async errors that were recorded back when async writeback
2584 * failed for pages in this mapping. */
2585 rc = lli->lli_async_rc;
2586 lli->lli_async_rc = 0;
2587 if (lli->lli_clob != NULL) {
2588 err = lov_read_and_clear_async_rc(lli->lli_clob);
2593 /* The application has been told write failure already.
2594 * Do not report failure again. */
2595 if (fd->fd_write_failed)
2597 return rc ? -EIO : 0;
2601 * Called to make sure a portion of file has been written out.
2602 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2604 * Return how many pages have been written.
2606 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2607 enum cl_fsync_mode mode, int ignore_layout)
2609 struct cl_env_nest nest;
2612 struct obd_capa *capa = NULL;
2613 struct cl_fsync_io *fio;
2617 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2618 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2621 env = cl_env_nested_get(&nest);
2623 RETURN(PTR_ERR(env));
2625 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2627 io = vvp_env_thread_io(env);
2628 io->ci_obj = ll_i2info(inode)->lli_clob;
2629 io->ci_ignore_layout = ignore_layout;
2631 /* initialize parameters for sync */
2632 fio = &io->u.ci_fsync;
2633 fio->fi_capa = capa;
2634 fio->fi_start = start;
2636 fio->fi_fid = ll_inode2fid(inode);
2637 fio->fi_mode = mode;
2638 fio->fi_nr_written = 0;
2640 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2641 result = cl_io_loop(env, io);
2643 result = io->ci_result;
2645 result = fio->fi_nr_written;
2646 cl_io_fini(env, io);
2647 cl_env_nested_put(&nest, env);
2655 * When dentry is provided (the 'else' case), *file->f_dentry may be
2656 * null and dentry must be used directly rather than pulled from
2657 * *file->f_dentry as is done otherwise.
2660 #ifdef HAVE_FILE_FSYNC_4ARGS
2661 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2663 struct dentry *dentry = file->f_dentry;
2664 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2665 int ll_fsync(struct file *file, int datasync)
2667 struct dentry *dentry = file->f_dentry;
2669 loff_t end = LLONG_MAX;
2671 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2674 loff_t end = LLONG_MAX;
2676 struct inode *inode = dentry->d_inode;
2677 struct ll_inode_info *lli = ll_i2info(inode);
2678 struct ptlrpc_request *req;
2679 struct obd_capa *oc;
2683 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2684 PFID(ll_inode2fid(inode)), inode);
2685 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2687 #ifdef HAVE_FILE_FSYNC_4ARGS
2688 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2689 mutex_lock(&inode->i_mutex);
2691 /* fsync's caller has already called _fdata{sync,write}, we want
2692 * that IO to finish before calling the osc and mdc sync methods */
2693 rc = filemap_fdatawait(inode->i_mapping);
2696 /* catch async errors that were recorded back when async writeback
2697 * failed for pages in this mapping. */
2698 if (!S_ISDIR(inode->i_mode)) {
2699 err = lli->lli_async_rc;
2700 lli->lli_async_rc = 0;
2703 err = lov_read_and_clear_async_rc(lli->lli_clob);
2708 oc = ll_mdscapa_get(inode);
2709 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2715 ptlrpc_req_finished(req);
2717 if (S_ISREG(inode->i_mode)) {
2718 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2720 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2721 if (rc == 0 && err < 0)
2724 fd->fd_write_failed = true;
2726 fd->fd_write_failed = false;
2729 #ifdef HAVE_FILE_FSYNC_4ARGS
2730 mutex_unlock(&inode->i_mutex);
2736 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2738 struct inode *inode = file->f_dentry->d_inode;
2739 struct ll_sb_info *sbi = ll_i2sbi(inode);
2740 struct ldlm_enqueue_info einfo = {
2741 .ei_type = LDLM_FLOCK,
2742 .ei_cb_cp = ldlm_flock_completion_ast,
2743 .ei_cbdata = file_lock,
2745 struct md_op_data *op_data;
2746 struct lustre_handle lockh = {0};
2747 ldlm_policy_data_t flock = {{0}};
2748 int fl_type = file_lock->fl_type;
2754 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2755 PFID(ll_inode2fid(inode)), file_lock);
2757 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2759 if (file_lock->fl_flags & FL_FLOCK) {
2760 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2761 /* flocks are whole-file locks */
2762 flock.l_flock.end = OFFSET_MAX;
2763 /* For flocks owner is determined by the local file desctiptor*/
2764 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2765 } else if (file_lock->fl_flags & FL_POSIX) {
2766 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2767 flock.l_flock.start = file_lock->fl_start;
2768 flock.l_flock.end = file_lock->fl_end;
2772 flock.l_flock.pid = file_lock->fl_pid;
2774 /* Somewhat ugly workaround for svc lockd.
2775 * lockd installs custom fl_lmops->lm_compare_owner that checks
2776 * for the fl_owner to be the same (which it always is on local node
2777 * I guess between lockd processes) and then compares pid.
2778 * As such we assign pid to the owner field to make it all work,
2779 * conflict with normal locks is unlikely since pid space and
2780 * pointer space for current->files are not intersecting */
2781 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2782 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2786 einfo.ei_mode = LCK_PR;
2789 /* An unlock request may or may not have any relation to
2790 * existing locks so we may not be able to pass a lock handle
2791 * via a normal ldlm_lock_cancel() request. The request may even
2792 * unlock a byte range in the middle of an existing lock. In
2793 * order to process an unlock request we need all of the same
2794 * information that is given with a normal read or write record
2795 * lock request. To avoid creating another ldlm unlock (cancel)
2796 * message we'll treat a LCK_NL flock request as an unlock. */
2797 einfo.ei_mode = LCK_NL;
2800 einfo.ei_mode = LCK_PW;
2803 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2818 flags = LDLM_FL_BLOCK_NOWAIT;
2824 flags = LDLM_FL_TEST_LOCK;
2827 CERROR("unknown fcntl lock command: %d\n", cmd);
2831 /* Save the old mode so that if the mode in the lock changes we
2832 * can decrement the appropriate reader or writer refcount. */
2833 file_lock->fl_type = einfo.ei_mode;
2835 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2836 LUSTRE_OPC_ANY, NULL);
2837 if (IS_ERR(op_data))
2838 RETURN(PTR_ERR(op_data));
2840 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2841 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2842 flock.l_flock.pid, flags, einfo.ei_mode,
2843 flock.l_flock.start, flock.l_flock.end);
2845 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2848 /* Restore the file lock type if not TEST lock. */
2849 if (!(flags & LDLM_FL_TEST_LOCK))
2850 file_lock->fl_type = fl_type;
2852 if ((file_lock->fl_flags & FL_FLOCK) &&
2853 (rc == 0 || file_lock->fl_type == F_UNLCK))
2854 rc2 = flock_lock_file_wait(file, file_lock);
2855 if ((file_lock->fl_flags & FL_POSIX) &&
2856 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2857 !(flags & LDLM_FL_TEST_LOCK))
2858 rc2 = posix_lock_file_wait(file, file_lock);
2860 if (rc2 && file_lock->fl_type != F_UNLCK) {
2861 einfo.ei_mode = LCK_NL;
2862 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2867 ll_finish_md_op_data(op_data);
2872 int ll_get_fid_by_name(struct inode *parent, const char *name,
2873 int namelen, struct lu_fid *fid)
2875 struct md_op_data *op_data = NULL;
2876 struct mdt_body *body;
2877 struct ptlrpc_request *req;
2881 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2882 LUSTRE_OPC_ANY, NULL);
2883 if (IS_ERR(op_data))
2884 RETURN(PTR_ERR(op_data));
2886 op_data->op_valid = OBD_MD_FLID;
2887 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2888 ll_finish_md_op_data(op_data);
2892 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2894 GOTO(out_req, rc = -EFAULT);
2896 *fid = body->mbo_fid1;
2898 ptlrpc_req_finished(req);
2902 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2903 const char *name, int namelen)
2905 struct dentry *dchild = NULL;
2906 struct inode *child_inode = NULL;
2907 struct md_op_data *op_data;
2908 struct ptlrpc_request *request = NULL;
2913 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2914 name, PFID(ll_inode2fid(parent)), mdtidx);
2916 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2917 0, LUSTRE_OPC_ANY, NULL);
2918 if (IS_ERR(op_data))
2919 RETURN(PTR_ERR(op_data));
2921 /* Get child FID first */
2922 qstr.hash = full_name_hash(name, namelen);
2925 dchild = d_lookup(file->f_dentry, &qstr);
2926 if (dchild != NULL) {
2927 if (dchild->d_inode != NULL) {
2928 child_inode = igrab(dchild->d_inode);
2929 if (child_inode != NULL) {
2930 mutex_lock(&child_inode->i_mutex);
2931 op_data->op_fid3 = *ll_inode2fid(child_inode);
2932 ll_invalidate_aliases(child_inode);
2937 rc = ll_get_fid_by_name(parent, name, namelen,
2943 if (!fid_is_sane(&op_data->op_fid3)) {
2944 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
2945 ll_get_fsname(parent->i_sb, NULL, 0), name,
2946 PFID(&op_data->op_fid3));
2947 GOTO(out_free, rc = -EINVAL);
2950 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
2955 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
2956 PFID(&op_data->op_fid3), mdtidx);
2957 GOTO(out_free, rc = 0);
2960 op_data->op_mds = mdtidx;
2961 op_data->op_cli_flags = CLI_MIGRATE;
2962 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
2963 namelen, name, namelen, &request);
2965 ll_update_times(request, parent);
2967 ptlrpc_req_finished(request);
2972 if (child_inode != NULL) {
2973 clear_nlink(child_inode);
2974 mutex_unlock(&child_inode->i_mutex);
2978 ll_finish_md_op_data(op_data);
2983 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2991 * test if some locks matching bits and l_req_mode are acquired
2992 * - bits can be in different locks
2993 * - if found clear the common lock bits in *bits
2994 * - the bits not found, are kept in *bits
2996 * \param bits [IN] searched lock bits [IN]
2997 * \param l_req_mode [IN] searched lock mode
2998 * \retval boolean, true iff all bits are found
3000 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3002 struct lustre_handle lockh;
3003 ldlm_policy_data_t policy;
3004 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3005 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3014 fid = &ll_i2info(inode)->lli_fid;
3015 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3016 ldlm_lockname[mode]);
3018 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3019 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3020 policy.l_inodebits.bits = *bits & (1 << i);
3021 if (policy.l_inodebits.bits == 0)
3024 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3025 &policy, mode, &lockh)) {
3026 struct ldlm_lock *lock;
3028 lock = ldlm_handle2lock(&lockh);
3031 ~(lock->l_policy_data.l_inodebits.bits);
3032 LDLM_LOCK_PUT(lock);
3034 *bits &= ~policy.l_inodebits.bits;
3041 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3042 struct lustre_handle *lockh, __u64 flags,
3045 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3050 fid = &ll_i2info(inode)->lli_fid;
3051 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3053 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3054 fid, LDLM_IBITS, &policy, mode, lockh);
3059 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3061 /* Already unlinked. Just update nlink and return success */
3062 if (rc == -ENOENT) {
3064 /* This path cannot be hit for regular files unless in
3065 * case of obscure races, so no need to to validate
3067 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3069 } else if (rc != 0) {
3070 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3071 "%s: revalidate FID "DFID" error: rc = %d\n",
3072 ll_get_fsname(inode->i_sb, NULL, 0),
3073 PFID(ll_inode2fid(inode)), rc);
3079 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3081 struct inode *inode = dentry->d_inode;
3082 struct ptlrpc_request *req = NULL;
3083 struct obd_export *exp;
3087 LASSERT(inode != NULL);
3089 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3090 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3092 exp = ll_i2mdexp(inode);
3094 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3095 * But under CMD case, it caused some lock issues, should be fixed
3096 * with new CMD ibits lock. See bug 12718 */
3097 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3098 struct lookup_intent oit = { .it_op = IT_GETATTR };
3099 struct md_op_data *op_data;
3101 if (ibits == MDS_INODELOCK_LOOKUP)
3102 oit.it_op = IT_LOOKUP;
3104 /* Call getattr by fid, so do not provide name at all. */
3105 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3106 dentry->d_inode, NULL, 0, 0,
3107 LUSTRE_OPC_ANY, NULL);
3108 if (IS_ERR(op_data))
3109 RETURN(PTR_ERR(op_data));
3111 rc = md_intent_lock(exp, op_data, &oit, &req,
3112 &ll_md_blocking_ast, 0);
3113 ll_finish_md_op_data(op_data);
3115 rc = ll_inode_revalidate_fini(inode, rc);
3119 rc = ll_revalidate_it_finish(req, &oit, dentry);
3121 ll_intent_release(&oit);
3125 /* Unlinked? Unhash dentry, so it is not picked up later by
3126 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3127 here to preserve get_cwd functionality on 2.6.
3129 if (!dentry->d_inode->i_nlink)
3130 d_lustre_invalidate(dentry, 0);
3132 ll_lookup_finish_locks(&oit, dentry);
3133 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3134 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3135 u64 valid = OBD_MD_FLGETATTR;
3136 struct md_op_data *op_data;
3139 if (S_ISREG(inode->i_mode)) {
3140 rc = ll_get_default_mdsize(sbi, &ealen);
3143 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3146 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3147 0, ealen, LUSTRE_OPC_ANY,
3149 if (IS_ERR(op_data))
3150 RETURN(PTR_ERR(op_data));
3152 op_data->op_valid = valid;
3153 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3154 * capa for this inode. Because we only keep capas of dirs
3156 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3157 ll_finish_md_op_data(op_data);
3159 rc = ll_inode_revalidate_fini(inode, rc);
3163 rc = ll_prep_inode(&inode, req, NULL, NULL);
3166 ptlrpc_req_finished(req);
3170 static int ll_merge_md_attr(struct inode *inode)
3172 struct cl_attr attr = { 0 };
3175 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3176 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3177 &attr, ll_md_blocking_ast);
3181 set_nlink(inode, attr.cat_nlink);
3182 inode->i_blocks = attr.cat_blocks;
3183 i_size_write(inode, attr.cat_size);
3185 ll_i2info(inode)->lli_atime = attr.cat_atime;
3186 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3187 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3193 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3195 struct inode *inode = dentry->d_inode;
3199 rc = __ll_inode_revalidate(dentry, ibits);
3203 /* if object isn't regular file, don't validate size */
3204 if (!S_ISREG(inode->i_mode)) {
3205 if (S_ISDIR(inode->i_mode) &&
3206 ll_i2info(inode)->lli_lsm_md != NULL) {
3207 rc = ll_merge_md_attr(inode);
3212 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3213 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3214 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3216 /* In case of restore, the MDT has the right size and has
3217 * already send it back without granting the layout lock,
3218 * inode is up-to-date so glimpse is useless.
3219 * Also to glimpse we need the layout, in case of a running
3220 * restore the MDT holds the layout lock so the glimpse will
3221 * block up to the end of restore (getattr will block)
3223 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3224 rc = ll_glimpse_size(inode);
3229 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3231 struct inode *inode = de->d_inode;
3232 struct ll_sb_info *sbi = ll_i2sbi(inode);
3233 struct ll_inode_info *lli = ll_i2info(inode);
3236 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3237 MDS_INODELOCK_LOOKUP);
3238 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3243 stat->dev = inode->i_sb->s_dev;
3244 if (ll_need_32bit_api(sbi))
3245 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3247 stat->ino = inode->i_ino;
3248 stat->mode = inode->i_mode;
3249 stat->uid = inode->i_uid;
3250 stat->gid = inode->i_gid;
3251 stat->rdev = inode->i_rdev;
3252 stat->atime = inode->i_atime;
3253 stat->mtime = inode->i_mtime;
3254 stat->ctime = inode->i_ctime;
3255 stat->blksize = 1 << inode->i_blkbits;
3257 stat->nlink = inode->i_nlink;
3258 stat->size = i_size_read(inode);
3259 stat->blocks = inode->i_blocks;
3264 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3265 __u64 start, __u64 len)
3269 struct ll_user_fiemap *fiemap;
3270 unsigned int extent_count = fieinfo->fi_extents_max;
3272 num_bytes = sizeof(*fiemap) + (extent_count *
3273 sizeof(struct ll_fiemap_extent));
3274 OBD_ALLOC_LARGE(fiemap, num_bytes);
3279 fiemap->fm_flags = fieinfo->fi_flags;
3280 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3281 fiemap->fm_start = start;
3282 fiemap->fm_length = len;
3283 if (extent_count > 0)
3284 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3285 sizeof(struct ll_fiemap_extent));
3287 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3289 fieinfo->fi_flags = fiemap->fm_flags;
3290 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3291 if (extent_count > 0)
3292 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3293 fiemap->fm_mapped_extents *
3294 sizeof(struct ll_fiemap_extent));
3296 OBD_FREE_LARGE(fiemap, num_bytes);
3300 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3302 struct ll_inode_info *lli = ll_i2info(inode);
3303 struct posix_acl *acl = NULL;
3306 spin_lock(&lli->lli_lock);
3307 /* VFS' acl_permission_check->check_acl will release the refcount */
3308 acl = posix_acl_dup(lli->lli_posix_acl);
3309 spin_unlock(&lli->lli_lock);
3314 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3316 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3317 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3319 ll_check_acl(struct inode *inode, int mask)
3322 # ifdef CONFIG_FS_POSIX_ACL
3323 struct posix_acl *acl;
3327 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3328 if (flags & IPERM_FLAG_RCU)
3331 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3336 rc = posix_acl_permission(inode, acl, mask);
3337 posix_acl_release(acl);
3340 # else /* !CONFIG_FS_POSIX_ACL */
3342 # endif /* CONFIG_FS_POSIX_ACL */
3344 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3346 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3347 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3349 # ifdef HAVE_INODE_PERMISION_2ARGS
3350 int ll_inode_permission(struct inode *inode, int mask)
3352 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3357 struct ll_sb_info *sbi;
3358 struct root_squash_info *squash;
3359 struct cred *cred = NULL;
3360 const struct cred *old_cred = NULL;
3362 bool squash_id = false;
3365 #ifdef MAY_NOT_BLOCK
3366 if (mask & MAY_NOT_BLOCK)
3368 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3369 if (flags & IPERM_FLAG_RCU)
3373 /* as root inode are NOT getting validated in lookup operation,
3374 * need to do it before permission check. */
3376 if (inode == inode->i_sb->s_root->d_inode) {
3377 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3378 MDS_INODELOCK_LOOKUP);
3383 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3384 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3386 /* squash fsuid/fsgid if needed */
3387 sbi = ll_i2sbi(inode);
3388 squash = &sbi->ll_squash;
3389 if (unlikely(squash->rsi_uid != 0 &&
3390 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3391 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3395 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3396 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3397 squash->rsi_uid, squash->rsi_gid);
3399 /* update current process's credentials
3400 * and FS capability */
3401 cred = prepare_creds();
3405 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3406 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3407 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3408 if ((1 << cap) & CFS_CAP_FS_MASK)
3409 cap_lower(cred->cap_effective, cap);
3411 old_cred = override_creds(cred);
3414 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3416 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3417 rc = lustre_check_remote_perm(inode, mask);
3419 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3421 /* restore current process's credentials and FS capability */
3423 revert_creds(old_cred);
3430 /* -o localflock - only provides locally consistent flock locks */
3431 struct file_operations ll_file_operations = {
3432 .read = ll_file_read,
3433 .aio_read = ll_file_aio_read,
3434 .write = ll_file_write,
3435 .aio_write = ll_file_aio_write,
3436 .unlocked_ioctl = ll_file_ioctl,
3437 .open = ll_file_open,
3438 .release = ll_file_release,
3439 .mmap = ll_file_mmap,
3440 .llseek = ll_file_seek,
3441 .splice_read = ll_file_splice_read,
3446 struct file_operations ll_file_operations_flock = {
3447 .read = ll_file_read,
3448 .aio_read = ll_file_aio_read,
3449 .write = ll_file_write,
3450 .aio_write = ll_file_aio_write,
3451 .unlocked_ioctl = ll_file_ioctl,
3452 .open = ll_file_open,
3453 .release = ll_file_release,
3454 .mmap = ll_file_mmap,
3455 .llseek = ll_file_seek,
3456 .splice_read = ll_file_splice_read,
3459 .flock = ll_file_flock,
3460 .lock = ll_file_flock
3463 /* These are for -o noflock - to return ENOSYS on flock calls */
3464 struct file_operations ll_file_operations_noflock = {
3465 .read = ll_file_read,
3466 .aio_read = ll_file_aio_read,
3467 .write = ll_file_write,
3468 .aio_write = ll_file_aio_write,
3469 .unlocked_ioctl = ll_file_ioctl,
3470 .open = ll_file_open,
3471 .release = ll_file_release,
3472 .mmap = ll_file_mmap,
3473 .llseek = ll_file_seek,
3474 .splice_read = ll_file_splice_read,
3477 .flock = ll_file_noflock,
3478 .lock = ll_file_noflock
3481 struct inode_operations ll_file_inode_operations = {
3482 .setattr = ll_setattr,
3483 .getattr = ll_getattr,
3484 .permission = ll_inode_permission,
3485 .setxattr = ll_setxattr,
3486 .getxattr = ll_getxattr,
3487 .listxattr = ll_listxattr,
3488 .removexattr = ll_removexattr,
3489 .fiemap = ll_fiemap,
3490 #ifdef HAVE_IOP_GET_ACL
3491 .get_acl = ll_get_acl,
3495 /* dynamic ioctl number support routins */
3496 static struct llioc_ctl_data {
3497 struct rw_semaphore ioc_sem;
3498 struct list_head ioc_head;
3500 __RWSEM_INITIALIZER(llioc.ioc_sem),
3501 LIST_HEAD_INIT(llioc.ioc_head)
3506 struct list_head iocd_list;
3507 unsigned int iocd_size;
3508 llioc_callback_t iocd_cb;
3509 unsigned int iocd_count;
3510 unsigned int iocd_cmd[0];
3513 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3516 struct llioc_data *in_data = NULL;
3519 if (cb == NULL || cmd == NULL ||
3520 count > LLIOC_MAX_CMD || count < 0)
3523 size = sizeof(*in_data) + count * sizeof(unsigned int);
3524 OBD_ALLOC(in_data, size);
3525 if (in_data == NULL)
3528 memset(in_data, 0, sizeof(*in_data));
3529 in_data->iocd_size = size;
3530 in_data->iocd_cb = cb;
3531 in_data->iocd_count = count;
3532 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3534 down_write(&llioc.ioc_sem);
3535 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3536 up_write(&llioc.ioc_sem);
3541 void ll_iocontrol_unregister(void *magic)
3543 struct llioc_data *tmp;
3548 down_write(&llioc.ioc_sem);
3549 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3551 unsigned int size = tmp->iocd_size;
3553 list_del(&tmp->iocd_list);
3554 up_write(&llioc.ioc_sem);
3556 OBD_FREE(tmp, size);
3560 up_write(&llioc.ioc_sem);
3562 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3565 EXPORT_SYMBOL(ll_iocontrol_register);
3566 EXPORT_SYMBOL(ll_iocontrol_unregister);
3568 static enum llioc_iter
3569 ll_iocontrol_call(struct inode *inode, struct file *file,
3570 unsigned int cmd, unsigned long arg, int *rcp)
3572 enum llioc_iter ret = LLIOC_CONT;
3573 struct llioc_data *data;
3574 int rc = -EINVAL, i;
3576 down_read(&llioc.ioc_sem);
3577 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3578 for (i = 0; i < data->iocd_count; i++) {
3579 if (cmd != data->iocd_cmd[i])
3582 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3586 if (ret == LLIOC_STOP)
3589 up_read(&llioc.ioc_sem);
3596 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3598 struct ll_inode_info *lli = ll_i2info(inode);
3599 struct cl_object *obj = lli->lli_clob;
3600 struct cl_env_nest nest;
3608 env = cl_env_nested_get(&nest);
3610 RETURN(PTR_ERR(env));
3612 rc = cl_conf_set(env, lli->lli_clob, conf);
3616 if (conf->coc_opc == OBJECT_CONF_SET) {
3617 struct ldlm_lock *lock = conf->coc_lock;
3618 struct cl_layout cl = {
3622 LASSERT(lock != NULL);
3623 LASSERT(ldlm_has_layout(lock));
3625 /* it can only be allowed to match after layout is
3626 * applied to inode otherwise false layout would be
3627 * seen. Applying layout shoud happen before dropping
3628 * the intent lock. */
3629 ldlm_lock_allow_match(lock);
3631 rc = cl_object_layout_get(env, obj, &cl);
3636 DFID": layout version change: %u -> %u\n",
3637 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3639 ll_layout_version_set(lli, cl.cl_layout_gen);
3643 cl_env_nested_put(&nest, env);
3648 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3649 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3652 struct ll_sb_info *sbi = ll_i2sbi(inode);
3653 struct obd_capa *oc;
3654 struct ptlrpc_request *req;
3655 struct mdt_body *body;
3662 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3663 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3664 lock->l_lvb_data, lock->l_lvb_len);
3666 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3669 /* if layout lock was granted right away, the layout is returned
3670 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3671 * blocked and then granted via completion ast, we have to fetch
3672 * layout here. Please note that we can't use the LVB buffer in
3673 * completion AST because it doesn't have a large enough buffer */
3674 oc = ll_mdscapa_get(inode);
3675 rc = ll_get_default_mdsize(sbi, &lmmsize);
3677 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3678 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3684 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3686 GOTO(out, rc = -EPROTO);
3688 lmmsize = body->mbo_eadatasize;
3689 if (lmmsize == 0) /* empty layout */
3692 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3694 GOTO(out, rc = -EFAULT);
3696 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3697 if (lvbdata == NULL)
3698 GOTO(out, rc = -ENOMEM);
3700 memcpy(lvbdata, lmm, lmmsize);
3701 lock_res_and_lock(lock);
3702 if (lock->l_lvb_data != NULL)
3703 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3705 lock->l_lvb_data = lvbdata;
3706 lock->l_lvb_len = lmmsize;
3707 unlock_res_and_lock(lock);
3712 ptlrpc_req_finished(req);
3717 * Apply the layout to the inode. Layout lock is held and will be released
3720 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3721 struct inode *inode)
3723 struct ll_inode_info *lli = ll_i2info(inode);
3724 struct ll_sb_info *sbi = ll_i2sbi(inode);
3725 struct ldlm_lock *lock;
3726 struct lustre_md md = { NULL };
3727 struct cl_object_conf conf;
3730 bool wait_layout = false;
3733 LASSERT(lustre_handle_is_used(lockh));
3735 lock = ldlm_handle2lock(lockh);
3736 LASSERT(lock != NULL);
3737 LASSERT(ldlm_has_layout(lock));
3739 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured",
3740 PFID(&lli->lli_fid), inode);
3742 /* in case this is a caching lock and reinstate with new inode */
3743 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3745 lock_res_and_lock(lock);
3746 lvb_ready = ldlm_is_lvb_ready(lock);
3747 unlock_res_and_lock(lock);
3748 /* checking lvb_ready is racy but this is okay. The worst case is
3749 * that multi processes may configure the file on the same time. */
3754 rc = ll_layout_fetch(inode, lock);
3758 /* for layout lock, lmm is returned in lock's lvb.
3759 * lvb_data is immutable if the lock is held so it's safe to access it
3760 * without res lock. See the description in ldlm_lock_decref_internal()
3761 * for the condition to free lvb_data of layout lock */
3762 if (lock->l_lvb_data != NULL) {
3763 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3764 lock->l_lvb_data, lock->l_lvb_len);
3766 CERROR("%s: file "DFID" unpackmd error: %d\n",
3767 ll_get_fsname(inode->i_sb, NULL, 0),
3768 PFID(&lli->lli_fid), rc);
3772 LASSERTF(md.lsm != NULL, "lvb_data = %p, lvb_len = %u\n",
3773 lock->l_lvb_data, lock->l_lvb_len);
3778 /* set layout to file. Unlikely this will fail as old layout was
3779 * surely eliminated */
3780 memset(&conf, 0, sizeof conf);
3781 conf.coc_opc = OBJECT_CONF_SET;
3782 conf.coc_inode = inode;
3783 conf.coc_lock = lock;
3784 conf.u.coc_md = &md;
3785 rc = ll_layout_conf(inode, &conf);
3788 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3790 /* refresh layout failed, need to wait */
3791 wait_layout = rc == -EBUSY;
3795 LDLM_LOCK_PUT(lock);
3796 ldlm_lock_decref(lockh, mode);
3798 /* wait for IO to complete if it's still being used. */
3800 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3801 ll_get_fsname(inode->i_sb, NULL, 0),
3802 PFID(&lli->lli_fid), inode);
3804 memset(&conf, 0, sizeof conf);
3805 conf.coc_opc = OBJECT_CONF_WAIT;
3806 conf.coc_inode = inode;
3807 rc = ll_layout_conf(inode, &conf);
3811 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3812 ll_get_fsname(inode->i_sb, NULL, 0),
3813 PFID(&lli->lli_fid), rc);
3818 static int ll_layout_refresh_locked(struct inode *inode)
3820 struct ll_inode_info *lli = ll_i2info(inode);
3821 struct ll_sb_info *sbi = ll_i2sbi(inode);
3822 struct md_op_data *op_data;
3823 struct lookup_intent it;
3824 struct lustre_handle lockh;
3826 struct ldlm_enqueue_info einfo = {
3827 .ei_type = LDLM_IBITS,
3829 .ei_cb_bl = &ll_md_blocking_ast,
3830 .ei_cb_cp = &ldlm_completion_ast,
3836 /* mostly layout lock is caching on the local side, so try to match
3837 * it before grabbing layout lock mutex. */
3838 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3839 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3840 if (mode != 0) { /* hit cached lock */
3841 rc = ll_layout_lock_set(&lockh, mode, inode);
3848 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3849 0, 0, LUSTRE_OPC_ANY, NULL);
3850 if (IS_ERR(op_data))
3851 RETURN(PTR_ERR(op_data));
3853 /* have to enqueue one */
3854 memset(&it, 0, sizeof(it));
3855 it.it_op = IT_LAYOUT;
3856 lockh.cookie = 0ULL;
3858 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3859 ll_get_fsname(inode->i_sb, NULL, 0),
3860 PFID(&lli->lli_fid), inode);
3862 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3863 if (it.d.lustre.it_data != NULL)
3864 ptlrpc_req_finished(it.d.lustre.it_data);
3865 it.d.lustre.it_data = NULL;
3867 ll_finish_md_op_data(op_data);
3869 mode = it.d.lustre.it_lock_mode;
3870 it.d.lustre.it_lock_mode = 0;
3871 ll_intent_drop_lock(&it);
3874 /* set lock data in case this is a new lock */
3875 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3876 rc = ll_layout_lock_set(&lockh, mode, inode);
3885 * This function checks if there exists a LAYOUT lock on the client side,
3886 * or enqueues it if it doesn't have one in cache.
3888 * This function will not hold layout lock so it may be revoked any time after
3889 * this function returns. Any operations depend on layout should be redone
3892 * This function should be called before lov_io_init() to get an uptodate
3893 * layout version, the caller should save the version number and after IO
3894 * is finished, this function should be called again to verify that layout
3895 * is not changed during IO time.
3897 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3899 struct ll_inode_info *lli = ll_i2info(inode);
3900 struct ll_sb_info *sbi = ll_i2sbi(inode);
3904 *gen = ll_layout_version_get(lli);
3905 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE)
3909 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3910 LASSERT(S_ISREG(inode->i_mode));
3912 /* take layout lock mutex to enqueue layout lock exclusively. */
3913 mutex_lock(&lli->lli_layout_mutex);
3915 rc = ll_layout_refresh_locked(inode);
3919 *gen = ll_layout_version_get(lli);
3921 mutex_unlock(&lli->lli_layout_mutex);
3927 * This function send a restore request to the MDT
3929 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3931 struct hsm_user_request *hur;
3935 len = sizeof(struct hsm_user_request) +
3936 sizeof(struct hsm_user_item);
3937 OBD_ALLOC(hur, len);
3941 hur->hur_request.hr_action = HUA_RESTORE;
3942 hur->hur_request.hr_archive_id = 0;
3943 hur->hur_request.hr_flags = 0;
3944 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3945 sizeof(hur->hur_user_item[0].hui_fid));
3946 hur->hur_user_item[0].hui_extent.offset = offset;
3947 hur->hur_user_item[0].hui_extent.length = length;
3948 hur->hur_request.hr_itemcount = 1;
3949 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,