4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2014, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <linux/pagemap.h>
46 #include <linux/file.h>
47 #include <linux/sched.h>
48 #include <lustre/ll_fiemap.h>
49 #include <lustre_ioctl.h>
51 #include "cl_object.h"
53 #include "llite_internal.h"
54 #include "vvp_internal.h"
57 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
59 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
62 static enum llioc_iter
63 ll_iocontrol_call(struct inode *inode, struct file *file,
64 unsigned int cmd, unsigned long arg, int *rcp);
66 static struct ll_file_data *ll_file_data_get(void)
68 struct ll_file_data *fd;
70 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, GFP_NOFS);
74 fd->fd_write_failed = false;
79 static void ll_file_data_put(struct ll_file_data *fd)
82 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
85 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
86 struct lustre_handle *fh)
88 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
89 op_data->op_attr.ia_mode = inode->i_mode;
90 op_data->op_attr.ia_atime = inode->i_atime;
91 op_data->op_attr.ia_mtime = inode->i_mtime;
92 op_data->op_attr.ia_ctime = inode->i_ctime;
93 op_data->op_attr.ia_size = i_size_read(inode);
94 op_data->op_attr_blocks = inode->i_blocks;
95 op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags);
97 op_data->op_handle = *fh;
98 op_data->op_capa1 = ll_mdscapa_get(inode);
100 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
101 op_data->op_bias |= MDS_DATA_MODIFIED;
105 * Packs all the attributes into @op_data for the CLOSE rpc.
107 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
108 struct obd_client_handle *och)
112 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
113 ATTR_MTIME | ATTR_MTIME_SET |
114 ATTR_CTIME | ATTR_CTIME_SET;
116 if (!(och->och_flags & FMODE_WRITE))
119 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
122 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
123 ll_prep_md_op_data(op_data, inode, NULL, NULL,
124 0, 0, LUSTRE_OPC_ANY, NULL);
128 static int ll_close_inode_openhandle(struct obd_export *md_exp,
130 struct obd_client_handle *och,
131 const __u64 *data_version)
133 struct obd_export *exp = ll_i2mdexp(inode);
134 struct md_op_data *op_data;
135 struct ptlrpc_request *req = NULL;
136 struct obd_device *obd = class_exp2obd(exp);
142 * XXX: in case of LMV, is this correct to access
145 CERROR("Invalid MDC connection handle "LPX64"\n",
146 ll_i2mdexp(inode)->exp_handle.h_cookie);
150 OBD_ALLOC_PTR(op_data);
152 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
154 ll_prepare_close(inode, op_data, och);
155 if (data_version != NULL) {
156 /* Pass in data_version implies release. */
157 op_data->op_bias |= MDS_HSM_RELEASE;
158 op_data->op_data_version = *data_version;
159 op_data->op_lease_handle = och->och_lease_handle;
160 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
163 rc = md_close(md_exp, op_data, och->och_mod, &req);
165 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
166 ll_i2mdexp(inode)->exp_obd->obd_name,
167 PFID(ll_inode2fid(inode)), rc);
170 /* DATA_MODIFIED flag was successfully sent on close, cancel data
171 * modification flag. */
172 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
173 struct ll_inode_info *lli = ll_i2info(inode);
175 spin_lock(&lli->lli_lock);
176 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
177 spin_unlock(&lli->lli_lock);
180 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
181 struct mdt_body *body;
182 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
183 if (!(body->mbo_valid & OBD_MD_FLRELEASED))
187 ll_finish_md_op_data(op_data);
191 md_clear_open_replay_data(md_exp, och);
192 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
195 if (req) /* This is close request */
196 ptlrpc_req_finished(req);
200 int ll_md_real_close(struct inode *inode, fmode_t fmode)
202 struct ll_inode_info *lli = ll_i2info(inode);
203 struct obd_client_handle **och_p;
204 struct obd_client_handle *och;
209 if (fmode & FMODE_WRITE) {
210 och_p = &lli->lli_mds_write_och;
211 och_usecount = &lli->lli_open_fd_write_count;
212 } else if (fmode & FMODE_EXEC) {
213 och_p = &lli->lli_mds_exec_och;
214 och_usecount = &lli->lli_open_fd_exec_count;
216 LASSERT(fmode & FMODE_READ);
217 och_p = &lli->lli_mds_read_och;
218 och_usecount = &lli->lli_open_fd_read_count;
221 mutex_lock(&lli->lli_och_mutex);
222 if (*och_usecount > 0) {
223 /* There are still users of this handle, so skip
225 mutex_unlock(&lli->lli_och_mutex);
231 mutex_unlock(&lli->lli_och_mutex);
234 /* There might be a race and this handle may already
236 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
243 static int ll_md_close(struct obd_export *md_exp, struct inode *inode,
246 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
247 struct ll_inode_info *lli = ll_i2info(inode);
251 /* clear group lock, if present */
252 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
253 ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid);
255 if (fd->fd_lease_och != NULL) {
258 /* Usually the lease is not released when the
259 * application crashed, we need to release here. */
260 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
261 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
262 PFID(&lli->lli_fid), rc, lease_broken);
264 fd->fd_lease_och = NULL;
267 if (fd->fd_och != NULL) {
268 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
273 /* Let's see if we have good enough OPEN lock on the file and if
274 we can skip talking to MDS */
275 if (file->f_dentry->d_inode) { /* Can this ever be false? */
277 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
278 struct lustre_handle lockh;
279 struct inode *inode = file->f_dentry->d_inode;
280 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
282 mutex_lock(&lli->lli_och_mutex);
283 if (fd->fd_omode & FMODE_WRITE) {
285 LASSERT(lli->lli_open_fd_write_count);
286 lli->lli_open_fd_write_count--;
287 } else if (fd->fd_omode & FMODE_EXEC) {
289 LASSERT(lli->lli_open_fd_exec_count);
290 lli->lli_open_fd_exec_count--;
293 LASSERT(lli->lli_open_fd_read_count);
294 lli->lli_open_fd_read_count--;
296 mutex_unlock(&lli->lli_och_mutex);
298 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
299 LDLM_IBITS, &policy, lockmode,
301 rc = ll_md_real_close(file->f_dentry->d_inode,
305 CERROR("released file has negative dentry: file = %p, "
306 "dentry = %p, name = %s\n",
307 file, file->f_dentry, file->f_dentry->d_name.name);
311 LUSTRE_FPRIVATE(file) = NULL;
312 ll_file_data_put(fd);
313 ll_capa_close(inode);
318 /* While this returns an error code, fput() the caller does not, so we need
319 * to make every effort to clean up all of our state here. Also, applications
320 * rarely check close errors and even if an error is returned they will not
321 * re-try the close call.
323 int ll_file_release(struct inode *inode, struct file *file)
325 struct ll_file_data *fd;
326 struct ll_sb_info *sbi = ll_i2sbi(inode);
327 struct ll_inode_info *lli = ll_i2info(inode);
331 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
332 PFID(ll_inode2fid(inode)), inode);
334 #ifdef CONFIG_FS_POSIX_ACL
335 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
336 inode == inode->i_sb->s_root->d_inode) {
337 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
340 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
341 fd->fd_flags &= ~LL_FILE_RMTACL;
342 rct_del(&sbi->ll_rct, current_pid());
343 et_search_free(&sbi->ll_et, current_pid());
348 if (inode->i_sb->s_root != file->f_dentry)
349 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
350 fd = LUSTRE_FPRIVATE(file);
353 /* The last ref on @file, maybe not the the owner pid of statahead,
354 * because parent and child process can share the same file handle. */
355 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
356 ll_deauthorize_statahead(inode, fd);
358 if (inode->i_sb->s_root == file->f_dentry) {
359 LUSTRE_FPRIVATE(file) = NULL;
360 ll_file_data_put(fd);
364 if (!S_ISDIR(inode->i_mode)) {
365 if (lli->lli_clob != NULL)
366 lov_read_and_clear_async_rc(lli->lli_clob);
367 lli->lli_async_rc = 0;
370 rc = ll_md_close(sbi->ll_md_exp, inode, file);
372 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
373 libcfs_debug_dumplog();
378 static int ll_intent_file_open(struct file *file, void *lmm, int lmmsize,
379 struct lookup_intent *itp)
381 struct dentry *de = file->f_dentry;
382 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
383 struct dentry *parent = de->d_parent;
384 const char *name = NULL;
386 struct md_op_data *op_data;
387 struct ptlrpc_request *req = NULL;
391 LASSERT(parent != NULL);
392 LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
394 /* if server supports open-by-fid, or file name is invalid, don't pack
395 * name in open request */
396 if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
397 lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
398 name = de->d_name.name;
399 len = de->d_name.len;
402 op_data = ll_prep_md_op_data(NULL, parent->d_inode, de->d_inode,
403 name, len, 0, LUSTRE_OPC_ANY, NULL);
405 RETURN(PTR_ERR(op_data));
406 op_data->op_data = lmm;
407 op_data->op_data_size = lmmsize;
409 rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
410 &ll_md_blocking_ast, 0);
411 ll_finish_md_op_data(op_data);
413 /* reason for keep own exit path - don`t flood log
414 * with messages with -ESTALE errors.
416 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
417 it_open_error(DISP_OPEN_OPEN, itp))
419 ll_release_openhandle(de, itp);
423 if (it_disposition(itp, DISP_LOOKUP_NEG))
424 GOTO(out, rc = -ENOENT);
426 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
427 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
428 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
432 rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
433 if (!rc && itp->d.lustre.it_lock_mode)
434 ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
437 ptlrpc_req_finished(req);
438 ll_intent_drop_lock(itp);
443 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
444 struct obd_client_handle *och)
446 struct ptlrpc_request *req = it->d.lustre.it_data;
447 struct mdt_body *body;
449 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
450 och->och_fh = body->mbo_handle;
451 och->och_fid = body->mbo_fid1;
452 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
453 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
454 och->och_flags = it->it_flags;
456 return md_set_open_replay_data(md_exp, och, it);
459 static int ll_local_open(struct file *file, struct lookup_intent *it,
460 struct ll_file_data *fd, struct obd_client_handle *och)
462 struct inode *inode = file->f_dentry->d_inode;
465 LASSERT(!LUSTRE_FPRIVATE(file));
472 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
477 LUSTRE_FPRIVATE(file) = fd;
478 ll_readahead_init(inode, &fd->fd_ras);
479 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
481 /* ll_cl_context initialize */
482 rwlock_init(&fd->fd_lock);
483 INIT_LIST_HEAD(&fd->fd_lccs);
488 /* Open a file, and (for the very first open) create objects on the OSTs at
489 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
490 * creation or open until ll_lov_setstripe() ioctl is called.
492 * If we already have the stripe MD locally then we don't request it in
493 * md_open(), by passing a lmm_size = 0.
495 * It is up to the application to ensure no other processes open this file
496 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
497 * used. We might be able to avoid races of that sort by getting lli_open_sem
498 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
499 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
501 int ll_file_open(struct inode *inode, struct file *file)
503 struct ll_inode_info *lli = ll_i2info(inode);
504 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
505 .it_flags = file->f_flags };
506 struct obd_client_handle **och_p = NULL;
507 __u64 *och_usecount = NULL;
508 struct ll_file_data *fd;
512 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
513 PFID(ll_inode2fid(inode)), inode, file->f_flags);
515 it = file->private_data; /* XXX: compat macro */
516 file->private_data = NULL; /* prevent ll_local_open assertion */
518 fd = ll_file_data_get();
520 GOTO(out_openerr, rc = -ENOMEM);
523 if (S_ISDIR(inode->i_mode))
524 ll_authorize_statahead(inode, fd);
526 if (inode->i_sb->s_root == file->f_dentry) {
527 LUSTRE_FPRIVATE(file) = fd;
531 if (!it || !it->d.lustre.it_disposition) {
532 /* Convert f_flags into access mode. We cannot use file->f_mode,
533 * because everything but O_ACCMODE mask was stripped from
535 if ((oit.it_flags + 1) & O_ACCMODE)
537 if (file->f_flags & O_TRUNC)
538 oit.it_flags |= FMODE_WRITE;
540 /* kernel only call f_op->open in dentry_open. filp_open calls
541 * dentry_open after call to open_namei that checks permissions.
542 * Only nfsd_open call dentry_open directly without checking
543 * permissions and because of that this code below is safe. */
544 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
545 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
547 /* We do not want O_EXCL here, presumably we opened the file
548 * already? XXX - NFS implications? */
549 oit.it_flags &= ~O_EXCL;
551 /* bug20584, if "it_flags" contains O_CREAT, the file will be
552 * created if necessary, then "IT_CREAT" should be set to keep
553 * consistent with it */
554 if (oit.it_flags & O_CREAT)
555 oit.it_op |= IT_CREAT;
561 /* Let's see if we have file open on MDS already. */
562 if (it->it_flags & FMODE_WRITE) {
563 och_p = &lli->lli_mds_write_och;
564 och_usecount = &lli->lli_open_fd_write_count;
565 } else if (it->it_flags & FMODE_EXEC) {
566 och_p = &lli->lli_mds_exec_och;
567 och_usecount = &lli->lli_open_fd_exec_count;
569 och_p = &lli->lli_mds_read_och;
570 och_usecount = &lli->lli_open_fd_read_count;
573 mutex_lock(&lli->lli_och_mutex);
574 if (*och_p) { /* Open handle is present */
575 if (it_disposition(it, DISP_OPEN_OPEN)) {
576 /* Well, there's extra open request that we do not need,
577 let's close it somehow. This will decref request. */
578 rc = it_open_error(DISP_OPEN_OPEN, it);
580 mutex_unlock(&lli->lli_och_mutex);
581 GOTO(out_openerr, rc);
584 ll_release_openhandle(file->f_dentry, it);
588 rc = ll_local_open(file, it, fd, NULL);
591 mutex_unlock(&lli->lli_och_mutex);
592 GOTO(out_openerr, rc);
595 LASSERT(*och_usecount == 0);
596 if (!it->d.lustre.it_disposition) {
597 /* We cannot just request lock handle now, new ELC code
598 means that one of other OPEN locks for this file
599 could be cancelled, and since blocking ast handler
600 would attempt to grab och_mutex as well, that would
601 result in a deadlock */
602 mutex_unlock(&lli->lli_och_mutex);
604 * Normally called under two situations:
606 * 2. A race/condition on MDS resulting in no open
607 * handle to be returned from LOOKUP|OPEN request,
608 * for example if the target entry was a symlink.
610 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
612 * Always specify MDS_OPEN_BY_FID because we don't want
613 * to get file with different fid.
615 it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
616 rc = ll_intent_file_open(file, NULL, 0, it);
618 GOTO(out_openerr, rc);
622 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
624 GOTO(out_och_free, rc = -ENOMEM);
628 /* md_intent_lock() didn't get a request ref if there was an
629 * open error, so don't do cleanup on the request here
631 /* XXX (green): Should not we bail out on any error here, not
632 * just open error? */
633 rc = it_open_error(DISP_OPEN_OPEN, it);
635 GOTO(out_och_free, rc);
637 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
638 "inode %p: disposition %x, status %d\n", inode,
639 it_disposition(it, ~0), it->d.lustre.it_status);
641 rc = ll_local_open(file, it, fd, *och_p);
643 GOTO(out_och_free, rc);
645 mutex_unlock(&lli->lli_och_mutex);
648 /* Must do this outside lli_och_mutex lock to prevent deadlock where
649 different kind of OPEN lock for this same inode gets cancelled
650 by ldlm_cancel_lru */
651 if (!S_ISREG(inode->i_mode))
652 GOTO(out_och_free, rc);
656 if (!lli->lli_has_smd &&
657 (cl_is_lov_delay_create(file->f_flags) ||
658 (file->f_mode & FMODE_WRITE) == 0)) {
659 CDEBUG(D_INODE, "object creation was delayed\n");
660 GOTO(out_och_free, rc);
662 cl_lov_delay_create_clear(&file->f_flags);
663 GOTO(out_och_free, rc);
667 if (och_p && *och_p) {
668 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
669 *och_p = NULL; /* OBD_FREE writes some magic there */
672 mutex_unlock(&lli->lli_och_mutex);
675 if (lli->lli_opendir_key == fd)
676 ll_deauthorize_statahead(inode, fd);
678 ll_file_data_put(fd);
680 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
683 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
684 ptlrpc_req_finished(it->d.lustre.it_data);
685 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
691 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
692 struct ldlm_lock_desc *desc, void *data, int flag)
695 struct lustre_handle lockh;
699 case LDLM_CB_BLOCKING:
700 ldlm_lock2handle(lock, &lockh);
701 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
703 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
707 case LDLM_CB_CANCELING:
715 * Acquire a lease and open the file.
717 static struct obd_client_handle *
718 ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
721 struct lookup_intent it = { .it_op = IT_OPEN };
722 struct ll_sb_info *sbi = ll_i2sbi(inode);
723 struct md_op_data *op_data;
724 struct ptlrpc_request *req = NULL;
725 struct lustre_handle old_handle = { 0 };
726 struct obd_client_handle *och = NULL;
731 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
732 RETURN(ERR_PTR(-EINVAL));
735 struct ll_inode_info *lli = ll_i2info(inode);
736 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
737 struct obd_client_handle **och_p;
740 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
741 RETURN(ERR_PTR(-EPERM));
743 /* Get the openhandle of the file */
745 mutex_lock(&lli->lli_och_mutex);
746 if (fd->fd_lease_och != NULL) {
747 mutex_unlock(&lli->lli_och_mutex);
751 if (fd->fd_och == NULL) {
752 if (file->f_mode & FMODE_WRITE) {
753 LASSERT(lli->lli_mds_write_och != NULL);
754 och_p = &lli->lli_mds_write_och;
755 och_usecount = &lli->lli_open_fd_write_count;
757 LASSERT(lli->lli_mds_read_och != NULL);
758 och_p = &lli->lli_mds_read_och;
759 och_usecount = &lli->lli_open_fd_read_count;
761 if (*och_usecount == 1) {
768 mutex_unlock(&lli->lli_och_mutex);
769 if (rc < 0) /* more than 1 opener */
772 LASSERT(fd->fd_och != NULL);
773 old_handle = fd->fd_och->och_fh;
778 RETURN(ERR_PTR(-ENOMEM));
780 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
781 LUSTRE_OPC_ANY, NULL);
783 GOTO(out, rc = PTR_ERR(op_data));
785 /* To tell the MDT this openhandle is from the same owner */
786 op_data->op_handle = old_handle;
788 it.it_flags = fmode | open_flags;
789 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
790 rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
791 &ll_md_blocking_lease_ast,
792 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
793 * it can be cancelled which may mislead applications that the lease is
795 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
796 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
797 * doesn't deal with openhandle, so normal openhandle will be leaked. */
798 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
799 ll_finish_md_op_data(op_data);
800 ptlrpc_req_finished(req);
802 GOTO(out_release_it, rc);
804 if (it_disposition(&it, DISP_LOOKUP_NEG))
805 GOTO(out_release_it, rc = -ENOENT);
807 rc = it_open_error(DISP_OPEN_OPEN, &it);
809 GOTO(out_release_it, rc);
811 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
812 ll_och_fill(sbi->ll_md_exp, &it, och);
814 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
815 GOTO(out_close, rc = -EOPNOTSUPP);
817 /* already get lease, handle lease lock */
818 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
819 if (it.d.lustre.it_lock_mode == 0 ||
820 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
821 /* open lock must return for lease */
822 CERROR(DFID "lease granted but no open lock, %d/"LPU64".\n",
823 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
824 it.d.lustre.it_lock_bits);
825 GOTO(out_close, rc = -EPROTO);
828 ll_intent_release(&it);
832 /* Cancel open lock */
833 if (it.d.lustre.it_lock_mode != 0) {
834 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
835 it.d.lustre.it_lock_mode);
836 it.d.lustre.it_lock_mode = 0;
837 och->och_lease_handle.cookie = 0ULL;
839 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
841 CERROR("%s: error closing file "DFID": %d\n",
842 ll_get_fsname(inode->i_sb, NULL, 0),
843 PFID(&ll_i2info(inode)->lli_fid), rc2);
844 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
846 ll_intent_release(&it);
854 * Release lease and close the file.
855 * It will check if the lease has ever broken.
857 static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
860 struct ldlm_lock *lock;
861 bool cancelled = true;
865 lock = ldlm_handle2lock(&och->och_lease_handle);
867 lock_res_and_lock(lock);
868 cancelled = ldlm_is_cancel(lock);
869 unlock_res_and_lock(lock);
873 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
874 PFID(&ll_i2info(inode)->lli_fid), cancelled);
877 ldlm_cli_cancel(&och->och_lease_handle, 0);
878 if (lease_broken != NULL)
879 *lease_broken = cancelled;
881 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
886 int ll_merge_attr(const struct lu_env *env, struct inode *inode)
888 struct ll_inode_info *lli = ll_i2info(inode);
889 struct cl_object *obj = lli->lli_clob;
890 struct cl_attr *attr = vvp_env_thread_attr(env);
898 ll_inode_size_lock(inode);
900 /* merge timestamps the most recently obtained from mds with
901 timestamps obtained from osts */
902 LTIME_S(inode->i_atime) = lli->lli_atime;
903 LTIME_S(inode->i_mtime) = lli->lli_mtime;
904 LTIME_S(inode->i_ctime) = lli->lli_ctime;
906 atime = LTIME_S(inode->i_atime);
907 mtime = LTIME_S(inode->i_mtime);
908 ctime = LTIME_S(inode->i_ctime);
910 cl_object_attr_lock(obj);
911 rc = cl_object_attr_get(env, obj, attr);
912 cl_object_attr_unlock(obj);
915 GOTO(out_size_unlock, rc);
917 if (atime < attr->cat_atime)
918 atime = attr->cat_atime;
920 if (ctime < attr->cat_ctime)
921 ctime = attr->cat_ctime;
923 if (mtime < attr->cat_mtime)
924 mtime = attr->cat_mtime;
926 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
927 PFID(&lli->lli_fid), attr->cat_size);
929 i_size_write(inode, attr->cat_size);
930 inode->i_blocks = attr->cat_blocks;
932 LTIME_S(inode->i_atime) = atime;
933 LTIME_S(inode->i_mtime) = mtime;
934 LTIME_S(inode->i_ctime) = ctime;
937 ll_inode_size_unlock(inode);
942 static bool file_is_noatime(const struct file *file)
944 const struct vfsmount *mnt = file->f_path.mnt;
945 const struct inode *inode = file->f_path.dentry->d_inode;
947 /* Adapted from file_accessed() and touch_atime().*/
948 if (file->f_flags & O_NOATIME)
951 if (inode->i_flags & S_NOATIME)
954 if (IS_NOATIME(inode))
957 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
960 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
963 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
969 static void ll_io_init(struct cl_io *io, const struct file *file, int write)
971 struct inode *inode = file->f_dentry->d_inode;
973 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
975 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
976 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
977 file->f_flags & O_DIRECT ||
980 io->ci_obj = ll_i2info(inode)->lli_clob;
981 io->ci_lockreq = CILR_MAYBE;
982 if (ll_file_nolock(file)) {
983 io->ci_lockreq = CILR_NEVER;
984 io->ci_no_srvlock = 1;
985 } else if (file->f_flags & O_APPEND) {
986 io->ci_lockreq = CILR_MANDATORY;
989 io->ci_noatime = file_is_noatime(file);
993 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
994 struct file *file, enum cl_io_type iot,
995 loff_t *ppos, size_t count)
997 struct inode *inode = file->f_dentry->d_inode;
998 struct ll_inode_info *lli = ll_i2info(inode);
1000 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1003 struct range_lock range;
1006 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zu\n",
1007 file->f_dentry->d_name.name, iot, *ppos, count);
1010 io = vvp_env_thread_io(env);
1011 ll_io_init(io, file, iot == CIT_WRITE);
1013 /* The maximum Lustre file size is variable, based on the
1014 * OST maximum object size and number of stripes. This
1015 * needs another check in addition to the VFS checks earlier. */
1016 end = (io->u.ci_wr.wr_append ? i_size_read(inode) : *ppos) + count;
1017 if (end > ll_file_maxbytes(inode)) {
1019 CDEBUG(D_INODE, "%s: file "DFID" offset %llu > maxbytes "LPU64
1020 ": rc = %zd\n", ll_get_fsname(inode->i_sb, NULL, 0),
1021 PFID(&lli->lli_fid), end, ll_file_maxbytes(inode),
1026 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1027 struct vvp_io *vio = vvp_env_io(env);
1028 bool range_locked = false;
1030 if (file->f_flags & O_APPEND)
1031 range_lock_init(&range, 0, LUSTRE_EOF);
1033 range_lock_init(&range, *ppos, *ppos + count - 1);
1035 vio->vui_fd = LUSTRE_FPRIVATE(file);
1036 vio->vui_io_subtype = args->via_io_subtype;
1038 switch (vio->vui_io_subtype) {
1040 vio->vui_iov = args->u.normal.via_iov;
1041 vio->vui_nrsegs = args->u.normal.via_nrsegs;
1042 vio->vui_tot_nrsegs = vio->vui_nrsegs;
1043 vio->vui_iocb = args->u.normal.via_iocb;
1044 if ((iot == CIT_WRITE) &&
1045 !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1046 CDEBUG(D_VFSTRACE, "Range lock "RL_FMT"\n",
1048 result = range_lock(&lli->lli_write_tree,
1053 range_locked = true;
1055 down_read(&lli->lli_trunc_sem);
1058 vio->u.splice.vui_pipe = args->u.splice.via_pipe;
1059 vio->u.splice.vui_flags = args->u.splice.via_flags;
1062 CERROR("unknown IO subtype %u\n", vio->vui_io_subtype);
1066 ll_cl_add(file, env, io);
1067 result = cl_io_loop(env, io);
1068 ll_cl_remove(file, env);
1070 if (args->via_io_subtype == IO_NORMAL)
1071 up_read(&lli->lli_trunc_sem);
1073 CDEBUG(D_VFSTRACE, "Range unlock "RL_FMT"\n",
1075 range_unlock(&lli->lli_write_tree, &range);
1078 /* cl_io_rw_init() handled IO */
1079 result = io->ci_result;
1082 if (io->ci_nob > 0) {
1083 result = io->ci_nob;
1084 *ppos = io->u.ci_wr.wr.crw_pos;
1088 cl_io_fini(env, io);
1089 /* If any bit been read/written (result != 0), we just return
1090 * short read/write instead of restart io. */
1091 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1092 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zu\n",
1093 iot == CIT_READ ? "read" : "write",
1094 file->f_dentry->d_name.name, *ppos, count);
1095 LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
1099 if (iot == CIT_READ) {
1101 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1102 LPROC_LL_READ_BYTES, result);
1103 } else if (iot == CIT_WRITE) {
1105 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1106 LPROC_LL_WRITE_BYTES, result);
1107 fd->fd_write_failed = false;
1108 } else if (result != -ERESTARTSYS) {
1109 fd->fd_write_failed = true;
1112 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1119 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1121 static int ll_file_get_iov_count(const struct iovec *iov,
1122 unsigned long *nr_segs, size_t *count)
1127 for (seg = 0; seg < *nr_segs; seg++) {
1128 const struct iovec *iv = &iov[seg];
1131 * If any segment has a negative length, or the cumulative
1132 * length ever wraps negative then return -EINVAL.
1135 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1137 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1142 cnt -= iv->iov_len; /* This segment is no good */
1149 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1150 unsigned long nr_segs, loff_t pos)
1153 struct vvp_io_args *args;
1159 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1163 env = cl_env_get(&refcheck);
1165 RETURN(PTR_ERR(env));
1167 args = ll_env_args(env, IO_NORMAL);
1168 args->u.normal.via_iov = (struct iovec *)iov;
1169 args->u.normal.via_nrsegs = nr_segs;
1170 args->u.normal.via_iocb = iocb;
1172 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1173 &iocb->ki_pos, count);
1174 cl_env_put(env, &refcheck);
1178 static ssize_t ll_file_read(struct file *file, char __user *buf, size_t count,
1182 struct iovec *local_iov;
1183 struct kiocb *kiocb;
1188 env = cl_env_get(&refcheck);
1190 RETURN(PTR_ERR(env));
1192 local_iov = &ll_env_info(env)->lti_local_iov;
1193 kiocb = &ll_env_info(env)->lti_kiocb;
1194 local_iov->iov_base = (void __user *)buf;
1195 local_iov->iov_len = count;
1196 init_sync_kiocb(kiocb, file);
1197 kiocb->ki_pos = *ppos;
1198 #ifdef HAVE_KIOCB_KI_LEFT
1199 kiocb->ki_left = count;
1201 kiocb->ki_nbytes = count;
1204 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1205 *ppos = kiocb->ki_pos;
1207 cl_env_put(env, &refcheck);
1212 * Write to a file (through the page cache).
1215 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1216 unsigned long nr_segs, loff_t pos)
1219 struct vvp_io_args *args;
1225 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1229 env = cl_env_get(&refcheck);
1231 RETURN(PTR_ERR(env));
1233 args = ll_env_args(env, IO_NORMAL);
1234 args->u.normal.via_iov = (struct iovec *)iov;
1235 args->u.normal.via_nrsegs = nr_segs;
1236 args->u.normal.via_iocb = iocb;
1238 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1239 &iocb->ki_pos, count);
1240 cl_env_put(env, &refcheck);
1244 static ssize_t ll_file_write(struct file *file, const char __user *buf,
1245 size_t count, loff_t *ppos)
1248 struct iovec *local_iov;
1249 struct kiocb *kiocb;
1254 env = cl_env_get(&refcheck);
1256 RETURN(PTR_ERR(env));
1258 local_iov = &ll_env_info(env)->lti_local_iov;
1259 kiocb = &ll_env_info(env)->lti_kiocb;
1260 local_iov->iov_base = (void __user *)buf;
1261 local_iov->iov_len = count;
1262 init_sync_kiocb(kiocb, file);
1263 kiocb->ki_pos = *ppos;
1264 #ifdef HAVE_KIOCB_KI_LEFT
1265 kiocb->ki_left = count;
1267 kiocb->ki_nbytes = count;
1270 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1271 *ppos = kiocb->ki_pos;
1273 cl_env_put(env, &refcheck);
1278 * Send file content (through pagecache) somewhere with helper
1280 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1281 struct pipe_inode_info *pipe, size_t count,
1285 struct vvp_io_args *args;
1290 env = cl_env_get(&refcheck);
1292 RETURN(PTR_ERR(env));
1294 args = ll_env_args(env, IO_SPLICE);
1295 args->u.splice.via_pipe = pipe;
1296 args->u.splice.via_flags = flags;
1298 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1299 cl_env_put(env, &refcheck);
1303 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1304 __u64 flags, struct lov_user_md *lum,
1307 struct lov_stripe_md *lsm = NULL;
1308 struct lookup_intent oit = {
1310 .it_flags = flags | MDS_OPEN_BY_FID,
1315 lsm = ccc_inode_lsm_get(inode);
1317 ccc_inode_lsm_put(inode, lsm);
1318 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1319 PFID(ll_inode2fid(inode)));
1320 GOTO(out, rc = -EEXIST);
1323 ll_inode_size_lock(inode);
1324 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1326 GOTO(out_unlock, rc);
1328 rc = oit.d.lustre.it_status;
1330 GOTO(out_unlock, rc);
1332 ll_release_openhandle(file->f_dentry, &oit);
1335 ll_inode_size_unlock(inode);
1336 ll_intent_release(&oit);
1337 ccc_inode_lsm_put(inode, lsm);
1339 cl_lov_delay_create_clear(&file->f_flags);
1344 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1345 struct lov_mds_md **lmmp, int *lmm_size,
1346 struct ptlrpc_request **request)
1348 struct ll_sb_info *sbi = ll_i2sbi(inode);
1349 struct mdt_body *body;
1350 struct lov_mds_md *lmm = NULL;
1351 struct ptlrpc_request *req = NULL;
1352 struct md_op_data *op_data;
1355 rc = ll_get_default_mdsize(sbi, &lmmsize);
1359 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1360 strlen(filename), lmmsize,
1361 LUSTRE_OPC_ANY, NULL);
1362 if (IS_ERR(op_data))
1363 RETURN(PTR_ERR(op_data));
1365 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1366 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1367 ll_finish_md_op_data(op_data);
1369 CDEBUG(D_INFO, "md_getattr_name failed "
1370 "on %s: rc %d\n", filename, rc);
1374 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1375 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1377 lmmsize = body->mbo_eadatasize;
1379 if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1381 GOTO(out, rc = -ENODATA);
1384 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1385 LASSERT(lmm != NULL);
1387 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1388 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1389 GOTO(out, rc = -EPROTO);
1393 * This is coming from the MDS, so is probably in
1394 * little endian. We convert it to host endian before
1395 * passing it to userspace.
1397 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1400 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1401 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1404 /* if function called for directory - we should
1405 * avoid swab not existent lsm objects */
1406 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1407 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1408 if (S_ISREG(body->mbo_mode))
1409 lustre_swab_lov_user_md_objects(
1410 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1412 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1413 lustre_swab_lov_user_md_v3(
1414 (struct lov_user_md_v3 *)lmm);
1415 if (S_ISREG(body->mbo_mode))
1416 lustre_swab_lov_user_md_objects(
1417 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1424 *lmm_size = lmmsize;
1429 static int ll_lov_setea(struct inode *inode, struct file *file,
1432 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1433 struct lov_user_md *lump;
1434 int lum_size = sizeof(struct lov_user_md) +
1435 sizeof(struct lov_user_ost_data);
1439 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1442 OBD_ALLOC_LARGE(lump, lum_size);
1446 if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) {
1447 OBD_FREE_LARGE(lump, lum_size);
1451 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1453 OBD_FREE_LARGE(lump, lum_size);
1457 static int ll_file_getstripe(struct inode *inode,
1458 struct lov_user_md __user *lum)
1465 env = cl_env_get(&refcheck);
1467 RETURN(PTR_ERR(env));
1469 rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
1470 cl_env_put(env, &refcheck);
1474 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1477 struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
1478 struct lov_user_md *klum;
1480 __u64 flags = FMODE_WRITE;
1483 rc = ll_copy_user_md(lum, &klum);
1488 rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
1492 put_user(0, &lum->lmm_stripe_count);
1494 ll_layout_refresh(inode, &gen);
1495 rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
1498 OBD_FREE(klum, lum_size);
1503 ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1505 struct ll_inode_info *lli = ll_i2info(inode);
1506 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1507 struct ll_grouplock grouplock;
1512 CWARN("group id for group lock must not be 0\n");
1516 if (ll_file_nolock(file))
1517 RETURN(-EOPNOTSUPP);
1519 spin_lock(&lli->lli_lock);
1520 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1521 CWARN("group lock already existed with gid %lu\n",
1522 fd->fd_grouplock.lg_gid);
1523 spin_unlock(&lli->lli_lock);
1526 LASSERT(fd->fd_grouplock.lg_lock == NULL);
1527 spin_unlock(&lli->lli_lock);
1529 rc = cl_get_grouplock(ll_i2info(inode)->lli_clob,
1530 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1534 spin_lock(&lli->lli_lock);
1535 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1536 spin_unlock(&lli->lli_lock);
1537 CERROR("another thread just won the race\n");
1538 cl_put_grouplock(&grouplock);
1542 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1543 fd->fd_grouplock = grouplock;
1544 spin_unlock(&lli->lli_lock);
1546 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1550 static int ll_put_grouplock(struct inode *inode, struct file *file,
1553 struct ll_inode_info *lli = ll_i2info(inode);
1554 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1555 struct ll_grouplock grouplock;
1558 spin_lock(&lli->lli_lock);
1559 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1560 spin_unlock(&lli->lli_lock);
1561 CWARN("no group lock held\n");
1565 LASSERT(fd->fd_grouplock.lg_lock != NULL);
1567 if (fd->fd_grouplock.lg_gid != arg) {
1568 CWARN("group lock %lu doesn't match current id %lu\n",
1569 arg, fd->fd_grouplock.lg_gid);
1570 spin_unlock(&lli->lli_lock);
1574 grouplock = fd->fd_grouplock;
1575 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1576 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1577 spin_unlock(&lli->lli_lock);
1579 cl_put_grouplock(&grouplock);
1580 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1585 * Close inode open handle
1587 * \param dentry [in] dentry which contains the inode
1588 * \param it [in,out] intent which contains open info and result
1591 * \retval <0 failure
1593 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1595 struct inode *inode = dentry->d_inode;
1596 struct obd_client_handle *och;
1602 /* Root ? Do nothing. */
1603 if (dentry->d_inode->i_sb->s_root == dentry)
1606 /* No open handle to close? Move away */
1607 if (!it_disposition(it, DISP_OPEN_OPEN))
1610 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1612 OBD_ALLOC(och, sizeof(*och));
1614 GOTO(out, rc = -ENOMEM);
1616 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1618 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1621 /* this one is in place of ll_file_open */
1622 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1623 ptlrpc_req_finished(it->d.lustre.it_data);
1624 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1630 * Get size for inode for which FIEMAP mapping is requested.
1631 * Make the FIEMAP get_info call and returns the result.
1632 * \param fiemap kernel buffer to hold extens
1633 * \param num_bytes kernel buffer size
1635 static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap,
1641 struct ll_fiemap_info_key fmkey = { .name = KEY_FIEMAP, };
1644 /* Checks for fiemap flags */
1645 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1646 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1650 /* Check for FIEMAP_FLAG_SYNC */
1651 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1652 rc = filemap_fdatawrite(inode->i_mapping);
1657 env = cl_env_get(&refcheck);
1659 RETURN(PTR_ERR(env));
1661 if (i_size_read(inode) == 0) {
1662 rc = ll_glimpse_size(inode);
1667 fmkey.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1668 obdo_from_inode(&fmkey.oa, inode, OBD_MD_FLSIZE);
1669 obdo_set_parent_fid(&fmkey.oa, &ll_i2info(inode)->lli_fid);
1671 /* If filesize is 0, then there would be no objects for mapping */
1672 if (fmkey.oa.o_size == 0) {
1673 fiemap->fm_mapped_extents = 0;
1677 fmkey.fiemap = *fiemap;
1679 rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob,
1680 &fmkey, fiemap, &num_bytes);
1682 cl_env_put(env, &refcheck);
1686 int ll_fid2path(struct inode *inode, void __user *arg)
1688 struct obd_export *exp = ll_i2mdexp(inode);
1689 const struct getinfo_fid2path __user *gfin = arg;
1691 struct getinfo_fid2path *gfout;
1697 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1698 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1701 /* Only need to get the buflen */
1702 if (get_user(pathlen, &gfin->gf_pathlen))
1705 if (pathlen > PATH_MAX)
1708 outsize = sizeof(*gfout) + pathlen;
1709 OBD_ALLOC(gfout, outsize);
1713 if (copy_from_user(gfout, arg, sizeof(*gfout)))
1714 GOTO(gf_free, rc = -EFAULT);
1716 /* Call mdc_iocontrol */
1717 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1721 if (copy_to_user(arg, gfout, outsize))
1725 OBD_FREE(gfout, outsize);
1729 static int ll_ioctl_fiemap(struct inode *inode, struct fiemap __user *arg)
1731 struct fiemap *fiemap;
1737 /* Get the extent count so we can calculate the size of
1738 * required fiemap buffer */
1739 if (get_user(extent_count, &arg->fm_extent_count))
1743 (SIZE_MAX - sizeof(*fiemap)) / sizeof(struct ll_fiemap_extent))
1745 num_bytes = sizeof(*fiemap) + (extent_count *
1746 sizeof(struct ll_fiemap_extent));
1748 OBD_ALLOC_LARGE(fiemap, num_bytes);
1752 /* get the fiemap value */
1753 if (copy_from_user(fiemap, arg, sizeof(*fiemap)))
1754 GOTO(error, rc = -EFAULT);
1756 /* If fm_extent_count is non-zero, read the first extent since
1757 * it is used to calculate end_offset and device from previous
1759 if (extent_count != 0) {
1760 if (copy_from_user(&fiemap->fm_extents[0],
1761 (char __user *)arg + sizeof(*fiemap),
1762 sizeof(struct ll_fiemap_extent)))
1763 GOTO(error, rc = -EFAULT);
1766 rc = ll_do_fiemap(inode, fiemap, num_bytes);
1770 ret_bytes = sizeof(struct fiemap);
1772 if (extent_count != 0)
1773 ret_bytes += (fiemap->fm_mapped_extents *
1774 sizeof(struct ll_fiemap_extent));
1776 if (copy_to_user((void __user *)arg, fiemap, ret_bytes))
1780 OBD_FREE_LARGE(fiemap, num_bytes);
1785 * Read the data_version for inode.
1787 * This value is computed using stripe object version on OST.
1788 * Version is computed using server side locking.
1790 * @param flags if do sync on the OST side;
1792 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1793 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1795 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1802 /* If no file object initialized, we consider its version is 0. */
1803 if (ll_i2info(inode)->lli_clob == NULL) {
1808 env = cl_env_get(&refcheck);
1810 RETURN(PTR_ERR(env));
1812 rc = cl_object_data_version(env, ll_i2info(inode)->lli_clob,
1813 data_version, flags);
1814 cl_env_put(env, &refcheck);
1819 * Trigger a HSM release request for the provided inode.
1821 int ll_hsm_release(struct inode *inode)
1823 struct cl_env_nest nest;
1825 struct obd_client_handle *och = NULL;
1826 __u64 data_version = 0;
1830 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1831 ll_get_fsname(inode->i_sb, NULL, 0),
1832 PFID(&ll_i2info(inode)->lli_fid));
1834 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1836 GOTO(out, rc = PTR_ERR(och));
1838 /* Grab latest data_version and [am]time values */
1839 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
1843 env = cl_env_nested_get(&nest);
1845 GOTO(out, rc = PTR_ERR(env));
1847 ll_merge_attr(env, inode);
1848 cl_env_nested_put(&nest, env);
1850 /* Release the file.
1851 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1852 * we still need it to pack l_remote_handle to MDT. */
1853 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1859 if (och != NULL && !IS_ERR(och)) /* close the file */
1860 ll_lease_close(och, inode, NULL);
1865 struct ll_swap_stack {
1866 struct iattr ia1, ia2;
1868 struct inode *inode1, *inode2;
1869 bool check_dv1, check_dv2;
1872 static int ll_swap_layouts(struct file *file1, struct file *file2,
1873 struct lustre_swap_layouts *lsl)
1875 struct mdc_swap_layouts msl;
1876 struct md_op_data *op_data;
1879 struct ll_swap_stack *llss = NULL;
1882 OBD_ALLOC_PTR(llss);
1886 llss->inode1 = file1->f_dentry->d_inode;
1887 llss->inode2 = file2->f_dentry->d_inode;
1889 if (!S_ISREG(llss->inode2->i_mode))
1890 GOTO(free, rc = -EINVAL);
1892 if (inode_permission(llss->inode1, MAY_WRITE) ||
1893 inode_permission(llss->inode2, MAY_WRITE))
1894 GOTO(free, rc = -EPERM);
1896 if (llss->inode2->i_sb != llss->inode1->i_sb)
1897 GOTO(free, rc = -EXDEV);
1899 /* we use 2 bool because it is easier to swap than 2 bits */
1900 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
1901 llss->check_dv1 = true;
1903 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
1904 llss->check_dv2 = true;
1906 /* we cannot use lsl->sl_dvX directly because we may swap them */
1907 llss->dv1 = lsl->sl_dv1;
1908 llss->dv2 = lsl->sl_dv2;
1910 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
1911 if (rc == 0) /* same file, done! */
1914 if (rc < 0) { /* sequentialize it */
1915 swap(llss->inode1, llss->inode2);
1917 swap(llss->dv1, llss->dv2);
1918 swap(llss->check_dv1, llss->check_dv2);
1922 if (gid != 0) { /* application asks to flush dirty cache */
1923 rc = ll_get_grouplock(llss->inode1, file1, gid);
1927 rc = ll_get_grouplock(llss->inode2, file2, gid);
1929 ll_put_grouplock(llss->inode1, file1, gid);
1934 /* to be able to restore mtime and atime after swap
1935 * we need to first save them */
1937 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
1938 llss->ia1.ia_mtime = llss->inode1->i_mtime;
1939 llss->ia1.ia_atime = llss->inode1->i_atime;
1940 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
1941 llss->ia2.ia_mtime = llss->inode2->i_mtime;
1942 llss->ia2.ia_atime = llss->inode2->i_atime;
1943 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
1946 /* ultimate check, before swaping the layouts we check if
1947 * dataversion has changed (if requested) */
1948 if (llss->check_dv1) {
1949 rc = ll_data_version(llss->inode1, &dv, 0);
1952 if (dv != llss->dv1)
1953 GOTO(putgl, rc = -EAGAIN);
1956 if (llss->check_dv2) {
1957 rc = ll_data_version(llss->inode2, &dv, 0);
1960 if (dv != llss->dv2)
1961 GOTO(putgl, rc = -EAGAIN);
1964 /* struct md_op_data is used to send the swap args to the mdt
1965 * only flags is missing, so we use struct mdc_swap_layouts
1966 * through the md_op_data->op_data */
1967 /* flags from user space have to be converted before they are send to
1968 * server, no flag is sent today, they are only used on the client */
1971 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
1972 0, LUSTRE_OPC_ANY, &msl);
1973 if (IS_ERR(op_data))
1974 GOTO(free, rc = PTR_ERR(op_data));
1976 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
1977 sizeof(*op_data), op_data, NULL);
1978 ll_finish_md_op_data(op_data);
1982 ll_put_grouplock(llss->inode2, file2, gid);
1983 ll_put_grouplock(llss->inode1, file1, gid);
1986 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
1990 /* clear useless flags */
1991 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
1992 llss->ia1.ia_valid &= ~ATTR_MTIME;
1993 llss->ia2.ia_valid &= ~ATTR_MTIME;
1996 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
1997 llss->ia1.ia_valid &= ~ATTR_ATIME;
1998 llss->ia2.ia_valid &= ~ATTR_ATIME;
2001 /* update time if requested */
2003 if (llss->ia2.ia_valid != 0) {
2004 mutex_lock(&llss->inode1->i_mutex);
2005 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2006 mutex_unlock(&llss->inode1->i_mutex);
2009 if (llss->ia1.ia_valid != 0) {
2012 mutex_lock(&llss->inode2->i_mutex);
2013 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2014 mutex_unlock(&llss->inode2->i_mutex);
2026 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2028 struct md_op_data *op_data;
2032 /* Detect out-of range masks */
2033 if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK)
2036 /* Non-root users are forbidden to set or clear flags which are
2037 * NOT defined in HSM_USER_MASK. */
2038 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2039 !cfs_capable(CFS_CAP_SYS_ADMIN))
2042 /* Detect out-of range archive id */
2043 if ((hss->hss_valid & HSS_ARCHIVE_ID) &&
2044 (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE))
2047 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2048 LUSTRE_OPC_ANY, hss);
2049 if (IS_ERR(op_data))
2050 RETURN(PTR_ERR(op_data));
2052 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2053 sizeof(*op_data), op_data, NULL);
2055 ll_finish_md_op_data(op_data);
2060 static int ll_hsm_import(struct inode *inode, struct file *file,
2061 struct hsm_user_import *hui)
2063 struct hsm_state_set *hss = NULL;
2064 struct iattr *attr = NULL;
2068 if (!S_ISREG(inode->i_mode))
2074 GOTO(out, rc = -ENOMEM);
2076 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2077 hss->hss_archive_id = hui->hui_archive_id;
2078 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2079 rc = ll_hsm_state_set(inode, hss);
2083 OBD_ALLOC_PTR(attr);
2085 GOTO(out, rc = -ENOMEM);
2087 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2088 attr->ia_mode |= S_IFREG;
2089 attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid);
2090 attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid);
2091 attr->ia_size = hui->hui_size;
2092 attr->ia_mtime.tv_sec = hui->hui_mtime;
2093 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2094 attr->ia_atime.tv_sec = hui->hui_atime;
2095 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2097 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2098 ATTR_UID | ATTR_GID |
2099 ATTR_MTIME | ATTR_MTIME_SET |
2100 ATTR_ATIME | ATTR_ATIME_SET;
2102 mutex_lock(&inode->i_mutex);
2104 rc = ll_setattr_raw(file->f_dentry, attr, true);
2108 mutex_unlock(&inode->i_mutex);
2120 static inline long ll_lease_type_from_fmode(fmode_t fmode)
2122 return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
2123 ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
2127 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2129 struct inode *inode = file->f_dentry->d_inode;
2130 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2134 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2135 PFID(ll_inode2fid(inode)), inode, cmd);
2136 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2138 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2139 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2143 case LL_IOC_GETFLAGS:
2144 /* Get the current value of the file flags */
2145 return put_user(fd->fd_flags, (int __user *)arg);
2146 case LL_IOC_SETFLAGS:
2147 case LL_IOC_CLRFLAGS:
2148 /* Set or clear specific file flags */
2149 /* XXX This probably needs checks to ensure the flags are
2150 * not abused, and to handle any flag side effects.
2152 if (get_user(flags, (int __user *) arg))
2155 if (cmd == LL_IOC_SETFLAGS) {
2156 if ((flags & LL_FILE_IGNORE_LOCK) &&
2157 !(file->f_flags & O_DIRECT)) {
2158 CERROR("%s: unable to disable locking on "
2159 "non-O_DIRECT file\n", current->comm);
2163 fd->fd_flags |= flags;
2165 fd->fd_flags &= ~flags;
2168 case LL_IOC_LOV_SETSTRIPE:
2169 RETURN(ll_lov_setstripe(inode, file, arg));
2170 case LL_IOC_LOV_SETEA:
2171 RETURN(ll_lov_setea(inode, file, arg));
2172 case LL_IOC_LOV_SWAP_LAYOUTS: {
2174 struct lustre_swap_layouts lsl;
2176 if (copy_from_user(&lsl, (char __user *)arg,
2177 sizeof(struct lustre_swap_layouts)))
2180 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2183 file2 = fget(lsl.sl_fd);
2188 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2189 rc = ll_swap_layouts(file, file2, &lsl);
2193 case LL_IOC_LOV_GETSTRIPE:
2194 RETURN(ll_file_getstripe(inode,
2195 (struct lov_user_md __user *)arg));
2196 case FSFILT_IOC_FIEMAP:
2197 RETURN(ll_ioctl_fiemap(inode, (struct fiemap __user *)arg));
2198 case FSFILT_IOC_GETFLAGS:
2199 case FSFILT_IOC_SETFLAGS:
2200 RETURN(ll_iocontrol(inode, file, cmd, arg));
2201 case FSFILT_IOC_GETVERSION_OLD:
2202 case FSFILT_IOC_GETVERSION:
2203 RETURN(put_user(inode->i_generation, (int __user *)arg));
2204 case LL_IOC_GROUP_LOCK:
2205 RETURN(ll_get_grouplock(inode, file, arg));
2206 case LL_IOC_GROUP_UNLOCK:
2207 RETURN(ll_put_grouplock(inode, file, arg));
2208 case IOC_OBD_STATFS:
2209 RETURN(ll_obd_statfs(inode, (void __user *)arg));
2211 /* We need to special case any other ioctls we want to handle,
2212 * to send them to the MDS/OST as appropriate and to properly
2213 * network encode the arg field.
2214 case FSFILT_IOC_SETVERSION_OLD:
2215 case FSFILT_IOC_SETVERSION:
2217 case LL_IOC_FLUSHCTX:
2218 RETURN(ll_flush_ctx(inode));
2219 case LL_IOC_PATH2FID: {
2220 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
2221 sizeof(struct lu_fid)))
2226 case LL_IOC_GETPARENT:
2227 RETURN(ll_getparent(file, (struct getparent __user *)arg));
2229 case OBD_IOC_FID2PATH:
2230 RETURN(ll_fid2path(inode, (void __user *)arg));
2231 case LL_IOC_DATA_VERSION: {
2232 struct ioc_data_version idv;
2235 if (copy_from_user(&idv, (char __user *)arg, sizeof(idv)))
2238 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2239 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2242 copy_to_user((char __user *)arg, &idv, sizeof(idv)))
2248 case LL_IOC_GET_MDTIDX: {
2251 mdtidx = ll_get_mdt_idx(inode);
2255 if (put_user((int)mdtidx, (int __user *)arg))
2260 case OBD_IOC_GETDTNAME:
2261 case OBD_IOC_GETMDNAME:
2262 RETURN(ll_get_obd_name(inode, cmd, arg));
2263 case LL_IOC_HSM_STATE_GET: {
2264 struct md_op_data *op_data;
2265 struct hsm_user_state *hus;
2272 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2273 LUSTRE_OPC_ANY, hus);
2274 if (IS_ERR(op_data)) {
2276 RETURN(PTR_ERR(op_data));
2279 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2282 if (copy_to_user((void __user *)arg, hus, sizeof(*hus)))
2285 ll_finish_md_op_data(op_data);
2289 case LL_IOC_HSM_STATE_SET: {
2290 struct hsm_state_set *hss;
2297 if (copy_from_user(hss, (char __user *)arg, sizeof(*hss))) {
2302 rc = ll_hsm_state_set(inode, hss);
2307 case LL_IOC_HSM_ACTION: {
2308 struct md_op_data *op_data;
2309 struct hsm_current_action *hca;
2316 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2317 LUSTRE_OPC_ANY, hca);
2318 if (IS_ERR(op_data)) {
2320 RETURN(PTR_ERR(op_data));
2323 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2326 if (copy_to_user((char __user *)arg, hca, sizeof(*hca)))
2329 ll_finish_md_op_data(op_data);
2333 case LL_IOC_SET_LEASE: {
2334 struct ll_inode_info *lli = ll_i2info(inode);
2335 struct obd_client_handle *och = NULL;
2340 case LL_LEASE_WRLCK:
2341 if (!(file->f_mode & FMODE_WRITE))
2343 fmode = FMODE_WRITE;
2345 case LL_LEASE_RDLCK:
2346 if (!(file->f_mode & FMODE_READ))
2350 case LL_LEASE_UNLCK:
2351 mutex_lock(&lli->lli_och_mutex);
2352 if (fd->fd_lease_och != NULL) {
2353 och = fd->fd_lease_och;
2354 fd->fd_lease_och = NULL;
2356 mutex_unlock(&lli->lli_och_mutex);
2361 fmode = och->och_flags;
2362 rc = ll_lease_close(och, inode, &lease_broken);
2369 RETURN(ll_lease_type_from_fmode(fmode));
2374 CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
2376 /* apply for lease */
2377 och = ll_lease_open(inode, file, fmode, 0);
2379 RETURN(PTR_ERR(och));
2382 mutex_lock(&lli->lli_och_mutex);
2383 if (fd->fd_lease_och == NULL) {
2384 fd->fd_lease_och = och;
2387 mutex_unlock(&lli->lli_och_mutex);
2389 /* impossible now that only excl is supported for now */
2390 ll_lease_close(och, inode, &lease_broken);
2395 case LL_IOC_GET_LEASE: {
2396 struct ll_inode_info *lli = ll_i2info(inode);
2397 struct ldlm_lock *lock = NULL;
2400 mutex_lock(&lli->lli_och_mutex);
2401 if (fd->fd_lease_och != NULL) {
2402 struct obd_client_handle *och = fd->fd_lease_och;
2404 lock = ldlm_handle2lock(&och->och_lease_handle);
2406 lock_res_and_lock(lock);
2407 if (!ldlm_is_cancel(lock))
2408 fmode = och->och_flags;
2410 unlock_res_and_lock(lock);
2411 LDLM_LOCK_PUT(lock);
2414 mutex_unlock(&lli->lli_och_mutex);
2416 RETURN(ll_lease_type_from_fmode(fmode));
2418 case LL_IOC_HSM_IMPORT: {
2419 struct hsm_user_import *hui;
2425 if (copy_from_user(hui, (void __user *)arg, sizeof(*hui))) {
2430 rc = ll_hsm_import(inode, file, hui);
2440 ll_iocontrol_call(inode, file, cmd, arg, &err))
2443 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2444 (void __user *)arg));
2449 #ifndef HAVE_FILE_LLSEEK_SIZE
2450 static inline loff_t
2451 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2453 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2455 if (offset > maxsize)
2458 if (offset != file->f_pos) {
2459 file->f_pos = offset;
2460 file->f_version = 0;
2466 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2467 loff_t maxsize, loff_t eof)
2469 struct inode *inode = file->f_dentry->d_inode;
2477 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2478 * position-querying operation. Avoid rewriting the "same"
2479 * f_pos value back to the file because a concurrent read(),
2480 * write() or lseek() might have altered it
2485 * f_lock protects against read/modify/write race with other
2486 * SEEK_CURs. Note that parallel writes and reads behave
2489 mutex_lock(&inode->i_mutex);
2490 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2491 mutex_unlock(&inode->i_mutex);
2495 * In the generic case the entire file is data, so as long as
2496 * offset isn't at the end of the file then the offset is data.
2503 * There is a virtual hole at the end of the file, so as long as
2504 * offset isn't i_size or larger, return i_size.
2512 return llseek_execute(file, offset, maxsize);
2516 static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2518 struct inode *inode = file->f_dentry->d_inode;
2519 loff_t retval, eof = 0;
2522 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2523 (origin == SEEK_CUR) ? file->f_pos : 0);
2524 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2525 PFID(ll_inode2fid(inode)), inode, retval, retval,
2527 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2529 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2530 retval = ll_glimpse_size(inode);
2533 eof = i_size_read(inode);
2536 retval = ll_generic_file_llseek_size(file, offset, origin,
2537 ll_file_maxbytes(inode), eof);
2541 static int ll_flush(struct file *file, fl_owner_t id)
2543 struct inode *inode = file->f_dentry->d_inode;
2544 struct ll_inode_info *lli = ll_i2info(inode);
2545 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2548 LASSERT(!S_ISDIR(inode->i_mode));
2550 /* catch async errors that were recorded back when async writeback
2551 * failed for pages in this mapping. */
2552 rc = lli->lli_async_rc;
2553 lli->lli_async_rc = 0;
2554 if (lli->lli_clob != NULL) {
2555 err = lov_read_and_clear_async_rc(lli->lli_clob);
2560 /* The application has been told write failure already.
2561 * Do not report failure again. */
2562 if (fd->fd_write_failed)
2564 return rc ? -EIO : 0;
2568 * Called to make sure a portion of file has been written out.
2569 * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST.
2571 * Return how many pages have been written.
2573 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2574 enum cl_fsync_mode mode, int ignore_layout)
2576 struct cl_env_nest nest;
2579 struct obd_capa *capa = NULL;
2580 struct cl_fsync_io *fio;
2584 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2585 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2588 env = cl_env_nested_get(&nest);
2590 RETURN(PTR_ERR(env));
2592 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2594 io = vvp_env_thread_io(env);
2595 io->ci_obj = ll_i2info(inode)->lli_clob;
2596 io->ci_ignore_layout = ignore_layout;
2598 /* initialize parameters for sync */
2599 fio = &io->u.ci_fsync;
2600 fio->fi_capa = capa;
2601 fio->fi_start = start;
2603 fio->fi_fid = ll_inode2fid(inode);
2604 fio->fi_mode = mode;
2605 fio->fi_nr_written = 0;
2607 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2608 result = cl_io_loop(env, io);
2610 result = io->ci_result;
2612 result = fio->fi_nr_written;
2613 cl_io_fini(env, io);
2614 cl_env_nested_put(&nest, env);
2622 * When dentry is provided (the 'else' case), *file->f_dentry may be
2623 * null and dentry must be used directly rather than pulled from
2624 * *file->f_dentry as is done otherwise.
2627 #ifdef HAVE_FILE_FSYNC_4ARGS
2628 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2630 struct dentry *dentry = file->f_dentry;
2631 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2632 int ll_fsync(struct file *file, int datasync)
2634 struct dentry *dentry = file->f_dentry;
2636 loff_t end = LLONG_MAX;
2638 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2641 loff_t end = LLONG_MAX;
2643 struct inode *inode = dentry->d_inode;
2644 struct ll_inode_info *lli = ll_i2info(inode);
2645 struct ptlrpc_request *req;
2646 struct obd_capa *oc;
2650 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2651 PFID(ll_inode2fid(inode)), inode);
2652 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2654 #ifdef HAVE_FILE_FSYNC_4ARGS
2655 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2656 mutex_lock(&inode->i_mutex);
2658 /* fsync's caller has already called _fdata{sync,write}, we want
2659 * that IO to finish before calling the osc and mdc sync methods */
2660 rc = filemap_fdatawait(inode->i_mapping);
2663 /* catch async errors that were recorded back when async writeback
2664 * failed for pages in this mapping. */
2665 if (!S_ISDIR(inode->i_mode)) {
2666 err = lli->lli_async_rc;
2667 lli->lli_async_rc = 0;
2670 err = lov_read_and_clear_async_rc(lli->lli_clob);
2675 oc = ll_mdscapa_get(inode);
2676 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2682 ptlrpc_req_finished(req);
2684 if (S_ISREG(inode->i_mode)) {
2685 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2687 err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0);
2688 if (rc == 0 && err < 0)
2691 fd->fd_write_failed = true;
2693 fd->fd_write_failed = false;
2696 #ifdef HAVE_FILE_FSYNC_4ARGS
2697 mutex_unlock(&inode->i_mutex);
2703 ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2705 struct inode *inode = file->f_dentry->d_inode;
2706 struct ll_sb_info *sbi = ll_i2sbi(inode);
2707 struct ldlm_enqueue_info einfo = {
2708 .ei_type = LDLM_FLOCK,
2709 .ei_cb_cp = ldlm_flock_completion_ast,
2710 .ei_cbdata = file_lock,
2712 struct md_op_data *op_data;
2713 struct lustre_handle lockh = {0};
2714 ldlm_policy_data_t flock = {{0}};
2715 int fl_type = file_lock->fl_type;
2721 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2722 PFID(ll_inode2fid(inode)), file_lock);
2724 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2726 if (file_lock->fl_flags & FL_FLOCK) {
2727 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2728 /* flocks are whole-file locks */
2729 flock.l_flock.end = OFFSET_MAX;
2730 /* For flocks owner is determined by the local file desctiptor*/
2731 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2732 } else if (file_lock->fl_flags & FL_POSIX) {
2733 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2734 flock.l_flock.start = file_lock->fl_start;
2735 flock.l_flock.end = file_lock->fl_end;
2739 flock.l_flock.pid = file_lock->fl_pid;
2741 /* Somewhat ugly workaround for svc lockd.
2742 * lockd installs custom fl_lmops->lm_compare_owner that checks
2743 * for the fl_owner to be the same (which it always is on local node
2744 * I guess between lockd processes) and then compares pid.
2745 * As such we assign pid to the owner field to make it all work,
2746 * conflict with normal locks is unlikely since pid space and
2747 * pointer space for current->files are not intersecting */
2748 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2749 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2753 einfo.ei_mode = LCK_PR;
2756 /* An unlock request may or may not have any relation to
2757 * existing locks so we may not be able to pass a lock handle
2758 * via a normal ldlm_lock_cancel() request. The request may even
2759 * unlock a byte range in the middle of an existing lock. In
2760 * order to process an unlock request we need all of the same
2761 * information that is given with a normal read or write record
2762 * lock request. To avoid creating another ldlm unlock (cancel)
2763 * message we'll treat a LCK_NL flock request as an unlock. */
2764 einfo.ei_mode = LCK_NL;
2767 einfo.ei_mode = LCK_PW;
2770 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
2785 flags = LDLM_FL_BLOCK_NOWAIT;
2791 flags = LDLM_FL_TEST_LOCK;
2794 CERROR("unknown fcntl lock command: %d\n", cmd);
2798 /* Save the old mode so that if the mode in the lock changes we
2799 * can decrement the appropriate reader or writer refcount. */
2800 file_lock->fl_type = einfo.ei_mode;
2802 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2803 LUSTRE_OPC_ANY, NULL);
2804 if (IS_ERR(op_data))
2805 RETURN(PTR_ERR(op_data));
2807 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2808 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2809 flock.l_flock.pid, flags, einfo.ei_mode,
2810 flock.l_flock.start, flock.l_flock.end);
2812 rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
2815 /* Restore the file lock type if not TEST lock. */
2816 if (!(flags & LDLM_FL_TEST_LOCK))
2817 file_lock->fl_type = fl_type;
2819 if ((file_lock->fl_flags & FL_FLOCK) &&
2820 (rc == 0 || file_lock->fl_type == F_UNLCK))
2821 rc2 = flock_lock_file_wait(file, file_lock);
2822 if ((file_lock->fl_flags & FL_POSIX) &&
2823 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2824 !(flags & LDLM_FL_TEST_LOCK))
2825 rc2 = posix_lock_file_wait(file, file_lock);
2827 if (rc2 && file_lock->fl_type != F_UNLCK) {
2828 einfo.ei_mode = LCK_NL;
2829 md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
2834 ll_finish_md_op_data(op_data);
2839 int ll_get_fid_by_name(struct inode *parent, const char *name,
2840 int namelen, struct lu_fid *fid)
2842 struct md_op_data *op_data = NULL;
2843 struct mdt_body *body;
2844 struct ptlrpc_request *req;
2848 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
2849 LUSTRE_OPC_ANY, NULL);
2850 if (IS_ERR(op_data))
2851 RETURN(PTR_ERR(op_data));
2853 op_data->op_valid = OBD_MD_FLID;
2854 rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
2855 ll_finish_md_op_data(op_data);
2859 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2861 GOTO(out_req, rc = -EFAULT);
2863 *fid = body->mbo_fid1;
2865 ptlrpc_req_finished(req);
2869 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
2870 const char *name, int namelen)
2872 struct dentry *dchild = NULL;
2873 struct inode *child_inode = NULL;
2874 struct md_op_data *op_data;
2875 struct ptlrpc_request *request = NULL;
2880 CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
2881 name, PFID(ll_inode2fid(parent)), mdtidx);
2883 op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
2884 0, LUSTRE_OPC_ANY, NULL);
2885 if (IS_ERR(op_data))
2886 RETURN(PTR_ERR(op_data));
2888 /* Get child FID first */
2889 qstr.hash = full_name_hash(name, namelen);
2892 dchild = d_lookup(file->f_dentry, &qstr);
2893 if (dchild != NULL) {
2894 if (dchild->d_inode != NULL) {
2895 child_inode = igrab(dchild->d_inode);
2896 if (child_inode != NULL) {
2897 mutex_lock(&child_inode->i_mutex);
2898 op_data->op_fid3 = *ll_inode2fid(child_inode);
2899 ll_invalidate_aliases(child_inode);
2904 rc = ll_get_fid_by_name(parent, name, namelen,
2910 if (!fid_is_sane(&op_data->op_fid3)) {
2911 CERROR("%s: migrate %s , but fid "DFID" is insane\n",
2912 ll_get_fsname(parent->i_sb, NULL, 0), name,
2913 PFID(&op_data->op_fid3));
2914 GOTO(out_free, rc = -EINVAL);
2917 rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
2922 CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
2923 PFID(&op_data->op_fid3), mdtidx);
2924 GOTO(out_free, rc = 0);
2927 op_data->op_mds = mdtidx;
2928 op_data->op_cli_flags = CLI_MIGRATE;
2929 rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
2930 namelen, name, namelen, &request);
2932 ll_update_times(request, parent);
2934 ptlrpc_req_finished(request);
2939 if (child_inode != NULL) {
2940 clear_nlink(child_inode);
2941 mutex_unlock(&child_inode->i_mutex);
2945 ll_finish_md_op_data(op_data);
2950 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2958 * test if some locks matching bits and l_req_mode are acquired
2959 * - bits can be in different locks
2960 * - if found clear the common lock bits in *bits
2961 * - the bits not found, are kept in *bits
2963 * \param bits [IN] searched lock bits [IN]
2964 * \param l_req_mode [IN] searched lock mode
2965 * \retval boolean, true iff all bits are found
2967 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2969 struct lustre_handle lockh;
2970 ldlm_policy_data_t policy;
2971 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2972 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2981 fid = &ll_i2info(inode)->lli_fid;
2982 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2983 ldlm_lockname[mode]);
2985 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2986 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2987 policy.l_inodebits.bits = *bits & (1 << i);
2988 if (policy.l_inodebits.bits == 0)
2991 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2992 &policy, mode, &lockh)) {
2993 struct ldlm_lock *lock;
2995 lock = ldlm_handle2lock(&lockh);
2998 ~(lock->l_policy_data.l_inodebits.bits);
2999 LDLM_LOCK_PUT(lock);
3001 *bits &= ~policy.l_inodebits.bits;
3008 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3009 struct lustre_handle *lockh, __u64 flags,
3012 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3017 fid = &ll_i2info(inode)->lli_fid;
3018 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3020 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3021 fid, LDLM_IBITS, &policy, mode, lockh);
3026 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3028 /* Already unlinked. Just update nlink and return success */
3029 if (rc == -ENOENT) {
3031 /* This path cannot be hit for regular files unless in
3032 * case of obscure races, so no need to to validate
3034 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3036 } else if (rc != 0) {
3037 CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
3038 "%s: revalidate FID "DFID" error: rc = %d\n",
3039 ll_get_fsname(inode->i_sb, NULL, 0),
3040 PFID(ll_inode2fid(inode)), rc);
3046 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3048 struct inode *inode = dentry->d_inode;
3049 struct ptlrpc_request *req = NULL;
3050 struct obd_export *exp;
3054 LASSERT(inode != NULL);
3056 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3057 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3059 exp = ll_i2mdexp(inode);
3061 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3062 * But under CMD case, it caused some lock issues, should be fixed
3063 * with new CMD ibits lock. See bug 12718 */
3064 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3065 struct lookup_intent oit = { .it_op = IT_GETATTR };
3066 struct md_op_data *op_data;
3068 if (ibits == MDS_INODELOCK_LOOKUP)
3069 oit.it_op = IT_LOOKUP;
3071 /* Call getattr by fid, so do not provide name at all. */
3072 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3073 dentry->d_inode, NULL, 0, 0,
3074 LUSTRE_OPC_ANY, NULL);
3075 if (IS_ERR(op_data))
3076 RETURN(PTR_ERR(op_data));
3078 rc = md_intent_lock(exp, op_data, &oit, &req,
3079 &ll_md_blocking_ast, 0);
3080 ll_finish_md_op_data(op_data);
3082 rc = ll_inode_revalidate_fini(inode, rc);
3086 rc = ll_revalidate_it_finish(req, &oit, dentry);
3088 ll_intent_release(&oit);
3092 /* Unlinked? Unhash dentry, so it is not picked up later by
3093 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3094 here to preserve get_cwd functionality on 2.6.
3096 if (!dentry->d_inode->i_nlink)
3097 d_lustre_invalidate(dentry, 0);
3099 ll_lookup_finish_locks(&oit, dentry);
3100 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3101 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3102 u64 valid = OBD_MD_FLGETATTR;
3103 struct md_op_data *op_data;
3106 if (S_ISREG(inode->i_mode)) {
3107 rc = ll_get_default_mdsize(sbi, &ealen);
3110 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3113 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3114 0, ealen, LUSTRE_OPC_ANY,
3116 if (IS_ERR(op_data))
3117 RETURN(PTR_ERR(op_data));
3119 op_data->op_valid = valid;
3120 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3121 * capa for this inode. Because we only keep capas of dirs
3123 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3124 ll_finish_md_op_data(op_data);
3126 rc = ll_inode_revalidate_fini(inode, rc);
3130 rc = ll_prep_inode(&inode, req, NULL, NULL);
3133 ptlrpc_req_finished(req);
3137 static int ll_merge_md_attr(struct inode *inode)
3139 struct cl_attr attr = { 0 };
3142 LASSERT(ll_i2info(inode)->lli_lsm_md != NULL);
3143 rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
3144 &attr, ll_md_blocking_ast);
3148 set_nlink(inode, attr.cat_nlink);
3149 inode->i_blocks = attr.cat_blocks;
3150 i_size_write(inode, attr.cat_size);
3152 ll_i2info(inode)->lli_atime = attr.cat_atime;
3153 ll_i2info(inode)->lli_mtime = attr.cat_mtime;
3154 ll_i2info(inode)->lli_ctime = attr.cat_ctime;
3160 ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
3162 struct inode *inode = dentry->d_inode;
3166 rc = __ll_inode_revalidate(dentry, ibits);
3170 /* if object isn't regular file, don't validate size */
3171 if (!S_ISREG(inode->i_mode)) {
3172 if (S_ISDIR(inode->i_mode) &&
3173 ll_i2info(inode)->lli_lsm_md != NULL) {
3174 rc = ll_merge_md_attr(inode);
3179 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
3180 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
3181 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
3183 /* In case of restore, the MDT has the right size and has
3184 * already send it back without granting the layout lock,
3185 * inode is up-to-date so glimpse is useless.
3186 * Also to glimpse we need the layout, in case of a running
3187 * restore the MDT holds the layout lock so the glimpse will
3188 * block up to the end of restore (getattr will block)
3190 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3191 rc = ll_glimpse_size(inode);
3196 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3198 struct inode *inode = de->d_inode;
3199 struct ll_sb_info *sbi = ll_i2sbi(inode);
3200 struct ll_inode_info *lli = ll_i2info(inode);
3203 res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
3204 MDS_INODELOCK_LOOKUP);
3205 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3210 stat->dev = inode->i_sb->s_dev;
3211 if (ll_need_32bit_api(sbi))
3212 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3214 stat->ino = inode->i_ino;
3215 stat->mode = inode->i_mode;
3216 stat->uid = inode->i_uid;
3217 stat->gid = inode->i_gid;
3218 stat->rdev = inode->i_rdev;
3219 stat->atime = inode->i_atime;
3220 stat->mtime = inode->i_mtime;
3221 stat->ctime = inode->i_ctime;
3222 stat->blksize = 1 << inode->i_blkbits;
3224 stat->nlink = inode->i_nlink;
3225 stat->size = i_size_read(inode);
3226 stat->blocks = inode->i_blocks;
3231 static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3232 __u64 start, __u64 len)
3236 struct ll_user_fiemap *fiemap;
3237 unsigned int extent_count = fieinfo->fi_extents_max;
3239 num_bytes = sizeof(*fiemap) + (extent_count *
3240 sizeof(struct ll_fiemap_extent));
3241 OBD_ALLOC_LARGE(fiemap, num_bytes);
3246 fiemap->fm_flags = fieinfo->fi_flags;
3247 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3248 fiemap->fm_start = start;
3249 fiemap->fm_length = len;
3250 if (extent_count > 0)
3251 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3252 sizeof(struct ll_fiemap_extent));
3254 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3256 fieinfo->fi_flags = fiemap->fm_flags;
3257 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3258 if (extent_count > 0)
3259 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3260 fiemap->fm_mapped_extents *
3261 sizeof(struct ll_fiemap_extent));
3263 OBD_FREE_LARGE(fiemap, num_bytes);
3267 struct posix_acl *ll_get_acl(struct inode *inode, int type)
3269 struct ll_inode_info *lli = ll_i2info(inode);
3270 struct posix_acl *acl = NULL;
3273 spin_lock(&lli->lli_lock);
3274 /* VFS' acl_permission_check->check_acl will release the refcount */
3275 acl = posix_acl_dup(lli->lli_posix_acl);
3276 spin_unlock(&lli->lli_lock);
3281 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3283 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3284 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3286 ll_check_acl(struct inode *inode, int mask)
3289 # ifdef CONFIG_FS_POSIX_ACL
3290 struct posix_acl *acl;
3294 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3295 if (flags & IPERM_FLAG_RCU)
3298 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3303 rc = posix_acl_permission(inode, acl, mask);
3304 posix_acl_release(acl);
3307 # else /* !CONFIG_FS_POSIX_ACL */
3309 # endif /* CONFIG_FS_POSIX_ACL */
3311 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3313 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3314 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3316 # ifdef HAVE_INODE_PERMISION_2ARGS
3317 int ll_inode_permission(struct inode *inode, int mask)
3319 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3324 struct ll_sb_info *sbi;
3325 struct root_squash_info *squash;
3326 struct cred *cred = NULL;
3327 const struct cred *old_cred = NULL;
3329 bool squash_id = false;
3332 #ifdef MAY_NOT_BLOCK
3333 if (mask & MAY_NOT_BLOCK)
3335 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3336 if (flags & IPERM_FLAG_RCU)
3340 /* as root inode are NOT getting validated in lookup operation,
3341 * need to do it before permission check. */
3343 if (inode == inode->i_sb->s_root->d_inode) {
3344 rc = __ll_inode_revalidate(inode->i_sb->s_root,
3345 MDS_INODELOCK_LOOKUP);
3350 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3351 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3353 /* squash fsuid/fsgid if needed */
3354 sbi = ll_i2sbi(inode);
3355 squash = &sbi->ll_squash;
3356 if (unlikely(squash->rsi_uid != 0 &&
3357 uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
3358 !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
3362 CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
3363 __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
3364 squash->rsi_uid, squash->rsi_gid);
3366 /* update current process's credentials
3367 * and FS capability */
3368 cred = prepare_creds();
3372 cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
3373 cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
3374 for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
3375 if ((1 << cap) & CFS_CAP_FS_MASK)
3376 cap_lower(cred->cap_effective, cap);
3378 old_cred = override_creds(cred);
3381 ll_stats_ops_tally(sbi, LPROC_LL_INODE_PERM, 1);
3383 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
3384 rc = lustre_check_remote_perm(inode, mask);
3386 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3388 /* restore current process's credentials and FS capability */
3390 revert_creds(old_cred);
3397 /* -o localflock - only provides locally consistent flock locks */
3398 struct file_operations ll_file_operations = {
3399 .read = ll_file_read,
3400 .aio_read = ll_file_aio_read,
3401 .write = ll_file_write,
3402 .aio_write = ll_file_aio_write,
3403 .unlocked_ioctl = ll_file_ioctl,
3404 .open = ll_file_open,
3405 .release = ll_file_release,
3406 .mmap = ll_file_mmap,
3407 .llseek = ll_file_seek,
3408 .splice_read = ll_file_splice_read,
3413 struct file_operations ll_file_operations_flock = {
3414 .read = ll_file_read,
3415 .aio_read = ll_file_aio_read,
3416 .write = ll_file_write,
3417 .aio_write = ll_file_aio_write,
3418 .unlocked_ioctl = ll_file_ioctl,
3419 .open = ll_file_open,
3420 .release = ll_file_release,
3421 .mmap = ll_file_mmap,
3422 .llseek = ll_file_seek,
3423 .splice_read = ll_file_splice_read,
3426 .flock = ll_file_flock,
3427 .lock = ll_file_flock
3430 /* These are for -o noflock - to return ENOSYS on flock calls */
3431 struct file_operations ll_file_operations_noflock = {
3432 .read = ll_file_read,
3433 .aio_read = ll_file_aio_read,
3434 .write = ll_file_write,
3435 .aio_write = ll_file_aio_write,
3436 .unlocked_ioctl = ll_file_ioctl,
3437 .open = ll_file_open,
3438 .release = ll_file_release,
3439 .mmap = ll_file_mmap,
3440 .llseek = ll_file_seek,
3441 .splice_read = ll_file_splice_read,
3444 .flock = ll_file_noflock,
3445 .lock = ll_file_noflock
3448 struct inode_operations ll_file_inode_operations = {
3449 .setattr = ll_setattr,
3450 .getattr = ll_getattr,
3451 .permission = ll_inode_permission,
3452 .setxattr = ll_setxattr,
3453 .getxattr = ll_getxattr,
3454 .listxattr = ll_listxattr,
3455 .removexattr = ll_removexattr,
3456 .fiemap = ll_fiemap,
3457 #ifdef HAVE_IOP_GET_ACL
3458 .get_acl = ll_get_acl,
3462 /* dynamic ioctl number support routins */
3463 static struct llioc_ctl_data {
3464 struct rw_semaphore ioc_sem;
3465 struct list_head ioc_head;
3467 __RWSEM_INITIALIZER(llioc.ioc_sem),
3468 LIST_HEAD_INIT(llioc.ioc_head)
3473 struct list_head iocd_list;
3474 unsigned int iocd_size;
3475 llioc_callback_t iocd_cb;
3476 unsigned int iocd_count;
3477 unsigned int iocd_cmd[0];
3480 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3483 struct llioc_data *in_data = NULL;
3486 if (cb == NULL || cmd == NULL ||
3487 count > LLIOC_MAX_CMD || count < 0)
3490 size = sizeof(*in_data) + count * sizeof(unsigned int);
3491 OBD_ALLOC(in_data, size);
3492 if (in_data == NULL)
3495 memset(in_data, 0, sizeof(*in_data));
3496 in_data->iocd_size = size;
3497 in_data->iocd_cb = cb;
3498 in_data->iocd_count = count;
3499 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3501 down_write(&llioc.ioc_sem);
3502 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3503 up_write(&llioc.ioc_sem);
3508 void ll_iocontrol_unregister(void *magic)
3510 struct llioc_data *tmp;
3515 down_write(&llioc.ioc_sem);
3516 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3518 unsigned int size = tmp->iocd_size;
3520 list_del(&tmp->iocd_list);
3521 up_write(&llioc.ioc_sem);
3523 OBD_FREE(tmp, size);
3527 up_write(&llioc.ioc_sem);
3529 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3532 EXPORT_SYMBOL(ll_iocontrol_register);
3533 EXPORT_SYMBOL(ll_iocontrol_unregister);
3535 static enum llioc_iter
3536 ll_iocontrol_call(struct inode *inode, struct file *file,
3537 unsigned int cmd, unsigned long arg, int *rcp)
3539 enum llioc_iter ret = LLIOC_CONT;
3540 struct llioc_data *data;
3541 int rc = -EINVAL, i;
3543 down_read(&llioc.ioc_sem);
3544 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3545 for (i = 0; i < data->iocd_count; i++) {
3546 if (cmd != data->iocd_cmd[i])
3549 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3553 if (ret == LLIOC_STOP)
3556 up_read(&llioc.ioc_sem);
3563 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3565 struct ll_inode_info *lli = ll_i2info(inode);
3566 struct cl_env_nest nest;
3571 if (lli->lli_clob == NULL)
3574 env = cl_env_nested_get(&nest);
3576 RETURN(PTR_ERR(env));
3578 result = cl_conf_set(env, lli->lli_clob, conf);
3579 cl_env_nested_put(&nest, env);
3581 if (conf->coc_opc == OBJECT_CONF_SET) {
3582 struct ldlm_lock *lock = conf->coc_lock;
3584 LASSERT(lock != NULL);
3585 LASSERT(ldlm_has_layout(lock));
3587 struct lustre_md *md = conf->u.coc_md;
3588 __u32 gen = LL_LAYOUT_GEN_EMPTY;
3590 /* it can only be allowed to match after layout is
3591 * applied to inode otherwise false layout would be
3592 * seen. Applying layout shoud happen before dropping
3593 * the intent lock. */
3594 ldlm_lock_allow_match(lock);
3596 lli->lli_has_smd = lsm_has_objects(md->lsm);
3597 if (md->lsm != NULL)
3598 gen = md->lsm->lsm_layout_gen;
3601 DFID ": layout version change: %u -> %u\n",
3602 PFID(&lli->lli_fid), ll_layout_version_get(lli),
3604 ll_layout_version_set(lli, gen);
3610 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3611 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3614 struct ll_sb_info *sbi = ll_i2sbi(inode);
3615 struct obd_capa *oc;
3616 struct ptlrpc_request *req;
3617 struct mdt_body *body;
3624 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3625 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3626 lock->l_lvb_data, lock->l_lvb_len);
3628 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3631 /* if layout lock was granted right away, the layout is returned
3632 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3633 * blocked and then granted via completion ast, we have to fetch
3634 * layout here. Please note that we can't use the LVB buffer in
3635 * completion AST because it doesn't have a large enough buffer */
3636 oc = ll_mdscapa_get(inode);
3637 rc = ll_get_default_mdsize(sbi, &lmmsize);
3639 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3640 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3646 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3648 GOTO(out, rc = -EPROTO);
3650 lmmsize = body->mbo_eadatasize;
3651 if (lmmsize == 0) /* empty layout */
3654 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3656 GOTO(out, rc = -EFAULT);
3658 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3659 if (lvbdata == NULL)
3660 GOTO(out, rc = -ENOMEM);
3662 memcpy(lvbdata, lmm, lmmsize);
3663 lock_res_and_lock(lock);
3664 if (lock->l_lvb_data != NULL)
3665 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3667 lock->l_lvb_data = lvbdata;
3668 lock->l_lvb_len = lmmsize;
3669 unlock_res_and_lock(lock);
3674 ptlrpc_req_finished(req);
3679 * Apply the layout to the inode. Layout lock is held and will be released
3682 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3683 struct inode *inode, __u32 *gen, bool reconf)
3685 struct ll_inode_info *lli = ll_i2info(inode);
3686 struct ll_sb_info *sbi = ll_i2sbi(inode);
3687 struct ldlm_lock *lock;
3688 struct lustre_md md = { NULL };
3689 struct cl_object_conf conf;
3692 bool wait_layout = false;
3695 LASSERT(lustre_handle_is_used(lockh));
3697 lock = ldlm_handle2lock(lockh);
3698 LASSERT(lock != NULL);
3699 LASSERT(ldlm_has_layout(lock));
3701 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d",
3702 PFID(&lli->lli_fid), inode, reconf);
3704 /* in case this is a caching lock and reinstate with new inode */
3705 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3707 lock_res_and_lock(lock);
3708 lvb_ready = ldlm_is_lvb_ready(lock);
3709 unlock_res_and_lock(lock);
3710 /* checking lvb_ready is racy but this is okay. The worst case is
3711 * that multi processes may configure the file on the same time. */
3713 if (lvb_ready || !reconf) {
3716 /* layout_gen must be valid if layout lock is not
3717 * cancelled and stripe has already set */
3718 *gen = ll_layout_version_get(lli);
3724 rc = ll_layout_fetch(inode, lock);
3728 /* for layout lock, lmm is returned in lock's lvb.
3729 * lvb_data is immutable if the lock is held so it's safe to access it
3730 * without res lock. See the description in ldlm_lock_decref_internal()
3731 * for the condition to free lvb_data of layout lock */
3732 if (lock->l_lvb_data != NULL) {
3733 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3734 lock->l_lvb_data, lock->l_lvb_len);
3736 *gen = LL_LAYOUT_GEN_EMPTY;
3738 *gen = md.lsm->lsm_layout_gen;
3741 CERROR("%s: file "DFID" unpackmd error: %d\n",
3742 ll_get_fsname(inode->i_sb, NULL, 0),
3743 PFID(&lli->lli_fid), rc);
3749 /* set layout to file. Unlikely this will fail as old layout was
3750 * surely eliminated */
3751 memset(&conf, 0, sizeof conf);
3752 conf.coc_opc = OBJECT_CONF_SET;
3753 conf.coc_inode = inode;
3754 conf.coc_lock = lock;
3755 conf.u.coc_md = &md;
3756 rc = ll_layout_conf(inode, &conf);
3759 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3761 /* refresh layout failed, need to wait */
3762 wait_layout = rc == -EBUSY;
3766 LDLM_LOCK_PUT(lock);
3767 ldlm_lock_decref(lockh, mode);
3769 /* wait for IO to complete if it's still being used. */
3771 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3772 ll_get_fsname(inode->i_sb, NULL, 0),
3773 PFID(&lli->lli_fid), inode);
3775 memset(&conf, 0, sizeof conf);
3776 conf.coc_opc = OBJECT_CONF_WAIT;
3777 conf.coc_inode = inode;
3778 rc = ll_layout_conf(inode, &conf);
3782 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3783 ll_get_fsname(inode->i_sb, NULL, 0),
3784 PFID(&lli->lli_fid), rc);
3790 * This function checks if there exists a LAYOUT lock on the client side,
3791 * or enqueues it if it doesn't have one in cache.
3793 * This function will not hold layout lock so it may be revoked any time after
3794 * this function returns. Any operations depend on layout should be redone
3797 * This function should be called before lov_io_init() to get an uptodate
3798 * layout version, the caller should save the version number and after IO
3799 * is finished, this function should be called again to verify that layout
3800 * is not changed during IO time.
3802 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3804 struct ll_inode_info *lli = ll_i2info(inode);
3805 struct ll_sb_info *sbi = ll_i2sbi(inode);
3806 struct md_op_data *op_data;
3807 struct lookup_intent it;
3808 struct lustre_handle lockh;
3810 struct ldlm_enqueue_info einfo = {
3811 .ei_type = LDLM_IBITS,
3813 .ei_cb_bl = &ll_md_blocking_ast,
3814 .ei_cb_cp = &ldlm_completion_ast,
3819 *gen = ll_layout_version_get(lli);
3820 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != LL_LAYOUT_GEN_NONE)
3824 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3825 LASSERT(S_ISREG(inode->i_mode));
3827 /* take layout lock mutex to enqueue layout lock exclusively. */
3828 mutex_lock(&lli->lli_layout_mutex);
3831 /* mostly layout lock is caching on the local side, so try to match
3832 * it before grabbing layout lock mutex. */
3833 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3834 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3835 if (mode != 0) { /* hit cached lock */
3836 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3840 mutex_unlock(&lli->lli_layout_mutex);
3844 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3845 0, 0, LUSTRE_OPC_ANY, NULL);
3846 if (IS_ERR(op_data)) {
3847 mutex_unlock(&lli->lli_layout_mutex);
3848 RETURN(PTR_ERR(op_data));
3851 /* have to enqueue one */
3852 memset(&it, 0, sizeof(it));
3853 it.it_op = IT_LAYOUT;
3854 lockh.cookie = 0ULL;
3856 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)",
3857 ll_get_fsname(inode->i_sb, NULL, 0),
3858 PFID(&lli->lli_fid), inode);
3860 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
3861 if (it.d.lustre.it_data != NULL)
3862 ptlrpc_req_finished(it.d.lustre.it_data);
3863 it.d.lustre.it_data = NULL;
3865 ll_finish_md_op_data(op_data);
3867 mode = it.d.lustre.it_lock_mode;
3868 it.d.lustre.it_lock_mode = 0;
3869 ll_intent_drop_lock(&it);
3872 /* set lock data in case this is a new lock */
3873 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3874 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3878 mutex_unlock(&lli->lli_layout_mutex);
3884 * This function send a restore request to the MDT
3886 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3888 struct hsm_user_request *hur;
3892 len = sizeof(struct hsm_user_request) +
3893 sizeof(struct hsm_user_item);
3894 OBD_ALLOC(hur, len);
3898 hur->hur_request.hr_action = HUA_RESTORE;
3899 hur->hur_request.hr_archive_id = 0;
3900 hur->hur_request.hr_flags = 0;
3901 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3902 sizeof(hur->hur_user_item[0].hui_fid));
3903 hur->hur_user_item[0].hui_extent.offset = offset;
3904 hur->hur_user_item[0].hui_extent.length = length;
3905 hur->hur_request.hr_itemcount = 1;
3906 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp,