4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
61 fd->fd_write_failed = false;
66 static void ll_file_data_put(struct ll_file_data *fd)
69 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
72 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
73 struct lustre_handle *fh)
75 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
76 op_data->op_attr.ia_mode = inode->i_mode;
77 op_data->op_attr.ia_atime = inode->i_atime;
78 op_data->op_attr.ia_mtime = inode->i_mtime;
79 op_data->op_attr.ia_ctime = inode->i_ctime;
80 op_data->op_attr.ia_size = i_size_read(inode);
81 op_data->op_attr_blocks = inode->i_blocks;
82 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
83 ll_inode_to_ext_flags(inode->i_flags);
84 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
86 op_data->op_handle = *fh;
87 op_data->op_capa1 = ll_mdscapa_get(inode);
89 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
90 op_data->op_bias |= MDS_DATA_MODIFIED;
94 * Closes the IO epoch and packs all the attributes into @op_data for
97 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
98 struct obd_client_handle *och)
102 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
103 ATTR_MTIME | ATTR_MTIME_SET |
104 ATTR_CTIME | ATTR_CTIME_SET;
106 if (!(och->och_flags & FMODE_WRITE))
109 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
110 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
112 ll_ioepoch_close(inode, op_data, &och, 0);
115 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
116 ll_prep_md_op_data(op_data, inode, NULL, NULL,
117 0, 0, LUSTRE_OPC_ANY, NULL);
121 static int ll_close_inode_openhandle(struct obd_export *md_exp,
123 struct obd_client_handle *och,
124 const __u64 *data_version)
126 struct obd_export *exp = ll_i2mdexp(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
129 struct obd_device *obd = class_exp2obd(exp);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle "LPX64"\n",
140 ll_i2mdexp(inode)->exp_handle.h_cookie);
144 OBD_ALLOC_PTR(op_data);
146 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
148 ll_prepare_close(inode, op_data, och);
149 if (data_version != NULL) {
150 /* Pass in data_version implies release. */
151 op_data->op_bias |= MDS_HSM_RELEASE;
152 op_data->op_data_version = *data_version;
153 op_data->op_lease_handle = och->och_lease_handle;
154 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
156 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
157 rc = md_close(md_exp, op_data, och->och_mod, &req);
159 /* This close must have the epoch closed. */
160 LASSERT(epoch_close);
161 /* MDS has instructed us to obtain Size-on-MDS attribute from
162 * OSTs and send setattr to back to MDS. */
163 rc = ll_som_update(inode, op_data);
165 CERROR("inode %lu mdc Size-on-MDS update failed: "
166 "rc = %d\n", inode->i_ino, rc);
170 CERROR("inode %lu mdc close failed: rc = %d\n",
174 /* DATA_MODIFIED flag was successfully sent on close, cancel data
175 * modification flag. */
176 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
177 struct ll_inode_info *lli = ll_i2info(inode);
179 spin_lock(&lli->lli_lock);
180 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
181 spin_unlock(&lli->lli_lock);
185 rc = ll_objects_destroy(req, inode);
187 CERROR("inode %lu ll_objects destroy: rc = %d\n",
191 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
192 struct mdt_body *body;
193 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
194 if (!(body->valid & OBD_MD_FLRELEASED))
198 ll_finish_md_op_data(op_data);
202 if (exp_connect_som(exp) && !epoch_close &&
203 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
204 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
206 md_clear_open_replay_data(md_exp, och);
207 /* Free @och if it is not waiting for DONE_WRITING. */
208 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
211 if (req) /* This is close request */
212 ptlrpc_req_finished(req);
216 int ll_md_real_close(struct inode *inode, int flags)
218 struct ll_inode_info *lli = ll_i2info(inode);
219 struct obd_client_handle **och_p;
220 struct obd_client_handle *och;
225 if (flags & FMODE_WRITE) {
226 och_p = &lli->lli_mds_write_och;
227 och_usecount = &lli->lli_open_fd_write_count;
228 } else if (flags & FMODE_EXEC) {
229 och_p = &lli->lli_mds_exec_och;
230 och_usecount = &lli->lli_open_fd_exec_count;
232 LASSERT(flags & FMODE_READ);
233 och_p = &lli->lli_mds_read_och;
234 och_usecount = &lli->lli_open_fd_read_count;
237 mutex_lock(&lli->lli_och_mutex);
238 if (*och_usecount) { /* There are still users of this handle, so
240 mutex_unlock(&lli->lli_och_mutex);
245 mutex_unlock(&lli->lli_och_mutex);
247 if (och) { /* There might be a race and somebody have freed this och
249 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
256 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
259 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
260 struct ll_inode_info *lli = ll_i2info(inode);
264 /* clear group lock, if present */
265 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
266 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
268 if (fd->fd_lease_och != NULL) {
271 /* Usually the lease is not released when the
272 * application crashed, we need to release here. */
273 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
274 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
275 PFID(&lli->lli_fid), rc, lease_broken);
277 fd->fd_lease_och = NULL;
280 if (fd->fd_och != NULL) {
281 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
286 /* Let's see if we have good enough OPEN lock on the file and if
287 we can skip talking to MDS */
288 if (file->f_dentry->d_inode) { /* Can this ever be false? */
290 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
291 struct lustre_handle lockh;
292 struct inode *inode = file->f_dentry->d_inode;
293 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
295 mutex_lock(&lli->lli_och_mutex);
296 if (fd->fd_omode & FMODE_WRITE) {
298 LASSERT(lli->lli_open_fd_write_count);
299 lli->lli_open_fd_write_count--;
300 } else if (fd->fd_omode & FMODE_EXEC) {
302 LASSERT(lli->lli_open_fd_exec_count);
303 lli->lli_open_fd_exec_count--;
306 LASSERT(lli->lli_open_fd_read_count);
307 lli->lli_open_fd_read_count--;
309 mutex_unlock(&lli->lli_och_mutex);
311 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
312 LDLM_IBITS, &policy, lockmode,
314 rc = ll_md_real_close(file->f_dentry->d_inode,
318 CERROR("Releasing a file %p with negative dentry %p. Name %s",
319 file, file->f_dentry, file->f_dentry->d_name.name);
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
325 ll_capa_close(inode);
330 /* While this returns an error code, fput() the caller does not, so we need
331 * to make every effort to clean up all of our state here. Also, applications
332 * rarely check close errors and even if an error is returned they will not
333 * re-try the close call.
335 int ll_file_release(struct inode *inode, struct file *file)
337 struct ll_file_data *fd;
338 struct ll_sb_info *sbi = ll_i2sbi(inode);
339 struct ll_inode_info *lli = ll_i2info(inode);
343 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
344 inode->i_generation, inode);
346 #ifdef CONFIG_FS_POSIX_ACL
347 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
348 inode == inode->i_sb->s_root->d_inode) {
349 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
352 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
353 fd->fd_flags &= ~LL_FILE_RMTACL;
354 rct_del(&sbi->ll_rct, cfs_curproc_pid());
355 et_search_free(&sbi->ll_et, cfs_curproc_pid());
360 if (inode->i_sb->s_root != file->f_dentry)
361 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
362 fd = LUSTRE_FPRIVATE(file);
365 /* The last ref on @file, maybe not the the owner pid of statahead.
366 * Different processes can open the same dir, "ll_opendir_key" means:
367 * it is me that should stop the statahead thread. */
368 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
369 lli->lli_opendir_pid != 0)
370 ll_stop_statahead(inode, lli->lli_opendir_key);
372 if (inode->i_sb->s_root == file->f_dentry) {
373 LUSTRE_FPRIVATE(file) = NULL;
374 ll_file_data_put(fd);
378 if (!S_ISDIR(inode->i_mode)) {
379 lov_read_and_clear_async_rc(lli->lli_clob);
380 lli->lli_async_rc = 0;
383 rc = ll_md_close(sbi->ll_md_exp, inode, file);
385 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
386 libcfs_debug_dumplog();
391 static int ll_intent_file_open(struct file *file, void *lmm,
392 int lmmsize, struct lookup_intent *itp)
394 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
395 struct dentry *parent = file->f_dentry->d_parent;
396 struct md_op_data *op_data;
397 struct ptlrpc_request *req;
398 __u32 opc = LUSTRE_OPC_ANY;
405 /* Usually we come here only for NFSD, and we want open lock.
406 But we can also get here with pre 2.6.15 patchless kernels, and in
407 that case that lock is also ok */
408 /* We can also get here if there was cached open handle in revalidate_it
409 * but it disappeared while we were getting from there to ll_file_open.
410 * But this means this file was closed and immediatelly opened which
411 * makes a good candidate for using OPEN lock */
412 /* If lmmsize & lmm are not 0, we are just setting stripe info
413 * parameters. No need for the open lock */
414 if (lmm == NULL && lmmsize == 0) {
415 itp->it_flags |= MDS_OPEN_LOCK;
416 if (itp->it_flags & FMODE_WRITE)
417 opc = LUSTRE_OPC_CREATE;
420 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
421 file->f_dentry->d_inode, NULL, 0,
425 RETURN(PTR_ERR(op_data));
427 itp->it_flags |= MDS_OPEN_BY_FID;
428 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
429 0 /*unused */, &req, ll_md_blocking_ast, 0);
430 ll_finish_md_op_data(op_data);
432 /* reason for keep own exit path - don`t flood log
433 * with messages with -ESTALE errors.
435 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
436 it_open_error(DISP_OPEN_OPEN, itp))
438 ll_release_openhandle(file->f_dentry, itp);
442 if (it_disposition(itp, DISP_LOOKUP_NEG))
443 GOTO(out, rc = -ENOENT);
445 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
446 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
447 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
451 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
452 if (!rc && itp->d.lustre.it_lock_mode)
453 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
457 ptlrpc_req_finished(itp->d.lustre.it_data);
458 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
459 ll_intent_drop_lock(itp);
465 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
466 * not believe attributes if a few ioepoch holders exist. Attributes for
467 * previous ioepoch if new one is opened are also skipped by MDS.
469 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
471 if (ioepoch && lli->lli_ioepoch != ioepoch) {
472 lli->lli_ioepoch = ioepoch;
473 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
474 ioepoch, PFID(&lli->lli_fid));
478 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
479 struct obd_client_handle *och)
481 struct ptlrpc_request *req = it->d.lustre.it_data;
482 struct mdt_body *body;
484 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
485 och->och_fh = body->handle;
486 och->och_fid = body->fid1;
487 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
488 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
489 och->och_flags = it->it_flags;
491 return md_set_open_replay_data(md_exp, och, req);
494 int ll_local_open(struct file *file, struct lookup_intent *it,
495 struct ll_file_data *fd, struct obd_client_handle *och)
497 struct inode *inode = file->f_dentry->d_inode;
498 struct ll_inode_info *lli = ll_i2info(inode);
501 LASSERT(!LUSTRE_FPRIVATE(file));
506 struct ptlrpc_request *req = it->d.lustre.it_data;
507 struct mdt_body *body;
510 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
514 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
515 ll_ioepoch_open(lli, body->ioepoch);
518 LUSTRE_FPRIVATE(file) = fd;
519 ll_readahead_init(inode, &fd->fd_ras);
520 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
525 /* Open a file, and (for the very first open) create objects on the OSTs at
526 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
527 * creation or open until ll_lov_setstripe() ioctl is called.
529 * If we already have the stripe MD locally then we don't request it in
530 * md_open(), by passing a lmm_size = 0.
532 * It is up to the application to ensure no other processes open this file
533 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
534 * used. We might be able to avoid races of that sort by getting lli_open_sem
535 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
536 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
538 int ll_file_open(struct inode *inode, struct file *file)
540 struct ll_inode_info *lli = ll_i2info(inode);
541 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
542 .it_flags = file->f_flags };
543 struct obd_client_handle **och_p = NULL;
544 __u64 *och_usecount = NULL;
545 struct ll_file_data *fd;
546 int rc = 0, opendir_set = 0;
549 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
550 inode->i_generation, inode, file->f_flags);
552 it = file->private_data; /* XXX: compat macro */
553 file->private_data = NULL; /* prevent ll_local_open assertion */
555 fd = ll_file_data_get();
557 GOTO(out_openerr, rc = -ENOMEM);
560 if (S_ISDIR(inode->i_mode)) {
561 spin_lock(&lli->lli_sa_lock);
562 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
563 lli->lli_opendir_pid == 0) {
564 lli->lli_opendir_key = fd;
565 lli->lli_opendir_pid = cfs_curproc_pid();
568 spin_unlock(&lli->lli_sa_lock);
571 if (inode->i_sb->s_root == file->f_dentry) {
572 LUSTRE_FPRIVATE(file) = fd;
576 if (!it || !it->d.lustre.it_disposition) {
577 /* Convert f_flags into access mode. We cannot use file->f_mode,
578 * because everything but O_ACCMODE mask was stripped from
580 if ((oit.it_flags + 1) & O_ACCMODE)
582 if (file->f_flags & O_TRUNC)
583 oit.it_flags |= FMODE_WRITE;
585 /* kernel only call f_op->open in dentry_open. filp_open calls
586 * dentry_open after call to open_namei that checks permissions.
587 * Only nfsd_open call dentry_open directly without checking
588 * permissions and because of that this code below is safe. */
589 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
590 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
592 /* We do not want O_EXCL here, presumably we opened the file
593 * already? XXX - NFS implications? */
594 oit.it_flags &= ~O_EXCL;
596 /* bug20584, if "it_flags" contains O_CREAT, the file will be
597 * created if necessary, then "IT_CREAT" should be set to keep
598 * consistent with it */
599 if (oit.it_flags & O_CREAT)
600 oit.it_op |= IT_CREAT;
606 /* Let's see if we have file open on MDS already. */
607 if (it->it_flags & FMODE_WRITE) {
608 och_p = &lli->lli_mds_write_och;
609 och_usecount = &lli->lli_open_fd_write_count;
610 } else if (it->it_flags & FMODE_EXEC) {
611 och_p = &lli->lli_mds_exec_och;
612 och_usecount = &lli->lli_open_fd_exec_count;
614 och_p = &lli->lli_mds_read_och;
615 och_usecount = &lli->lli_open_fd_read_count;
618 mutex_lock(&lli->lli_och_mutex);
619 if (*och_p) { /* Open handle is present */
620 if (it_disposition(it, DISP_OPEN_OPEN)) {
621 /* Well, there's extra open request that we do not need,
622 let's close it somehow. This will decref request. */
623 rc = it_open_error(DISP_OPEN_OPEN, it);
625 mutex_unlock(&lli->lli_och_mutex);
626 GOTO(out_openerr, rc);
629 ll_release_openhandle(file->f_dentry, it);
633 rc = ll_local_open(file, it, fd, NULL);
636 mutex_unlock(&lli->lli_och_mutex);
637 GOTO(out_openerr, rc);
640 LASSERT(*och_usecount == 0);
641 if (!it->d.lustre.it_disposition) {
642 /* We cannot just request lock handle now, new ELC code
643 means that one of other OPEN locks for this file
644 could be cancelled, and since blocking ast handler
645 would attempt to grab och_mutex as well, that would
646 result in a deadlock */
647 mutex_unlock(&lli->lli_och_mutex);
648 it->it_create_mode |= M_CHECK_STALE;
649 rc = ll_intent_file_open(file, NULL, 0, it);
650 it->it_create_mode &= ~M_CHECK_STALE;
652 GOTO(out_openerr, rc);
656 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
658 GOTO(out_och_free, rc = -ENOMEM);
662 /* md_intent_lock() didn't get a request ref if there was an
663 * open error, so don't do cleanup on the request here
665 /* XXX (green): Should not we bail out on any error here, not
666 * just open error? */
667 rc = it_open_error(DISP_OPEN_OPEN, it);
669 GOTO(out_och_free, rc);
671 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
673 rc = ll_local_open(file, it, fd, *och_p);
675 GOTO(out_och_free, rc);
677 mutex_unlock(&lli->lli_och_mutex);
680 /* Must do this outside lli_och_mutex lock to prevent deadlock where
681 different kind of OPEN lock for this same inode gets cancelled
682 by ldlm_cancel_lru */
683 if (!S_ISREG(inode->i_mode))
684 GOTO(out_och_free, rc);
688 if (!lli->lli_has_smd) {
689 if (file->f_flags & O_LOV_DELAY_CREATE ||
690 !(file->f_mode & FMODE_WRITE)) {
691 CDEBUG(D_INODE, "object creation was delayed\n");
692 GOTO(out_och_free, rc);
695 file->f_flags &= ~O_LOV_DELAY_CREATE;
696 GOTO(out_och_free, rc);
700 if (och_p && *och_p) {
701 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
702 *och_p = NULL; /* OBD_FREE writes some magic there */
705 mutex_unlock(&lli->lli_och_mutex);
708 if (opendir_set != 0)
709 ll_stop_statahead(inode, lli->lli_opendir_key);
711 ll_file_data_put(fd);
713 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
716 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
717 ptlrpc_req_finished(it->d.lustre.it_data);
718 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
724 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
725 struct ldlm_lock_desc *desc, void *data, int flag)
728 struct lustre_handle lockh;
732 case LDLM_CB_BLOCKING:
733 ldlm_lock2handle(lock, &lockh);
734 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
736 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
740 case LDLM_CB_CANCELING:
748 * Acquire a lease and open the file.
750 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
751 fmode_t fmode, __u64 open_flags)
753 struct lookup_intent it = { .it_op = IT_OPEN };
754 struct ll_sb_info *sbi = ll_i2sbi(inode);
755 struct md_op_data *op_data;
756 struct ptlrpc_request *req;
757 struct lustre_handle old_handle = { 0 };
758 struct obd_client_handle *och = NULL;
763 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
764 RETURN(ERR_PTR(-EINVAL));
767 struct ll_inode_info *lli = ll_i2info(inode);
768 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
769 struct obd_client_handle **och_p;
772 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
773 RETURN(ERR_PTR(-EPERM));
775 /* Get the openhandle of the file */
777 mutex_lock(&lli->lli_och_mutex);
778 if (fd->fd_lease_och != NULL) {
779 mutex_unlock(&lli->lli_och_mutex);
783 if (fd->fd_och == NULL) {
784 if (file->f_mode & FMODE_WRITE) {
785 LASSERT(lli->lli_mds_write_och != NULL);
786 och_p = &lli->lli_mds_write_och;
787 och_usecount = &lli->lli_open_fd_write_count;
789 LASSERT(lli->lli_mds_read_och != NULL);
790 och_p = &lli->lli_mds_read_och;
791 och_usecount = &lli->lli_open_fd_read_count;
793 if (*och_usecount == 1) {
800 mutex_unlock(&lli->lli_och_mutex);
801 if (rc < 0) /* more than 1 opener */
804 LASSERT(fd->fd_och != NULL);
805 old_handle = fd->fd_och->och_fh;
810 RETURN(ERR_PTR(-ENOMEM));
812 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
813 LUSTRE_OPC_ANY, NULL);
815 GOTO(out, rc = PTR_ERR(op_data));
817 /* To tell the MDT this openhandle is from the same owner */
818 op_data->op_handle = old_handle;
820 it.it_flags = fmode | open_flags;
821 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
822 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
823 ll_md_blocking_lease_ast,
824 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
825 * it can be cancelled which may mislead applications that the lease is
827 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
828 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
829 * doesn't deal with openhandle, so normal openhandle will be leaked. */
830 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
831 ll_finish_md_op_data(op_data);
833 ptlrpc_req_finished(req);
834 it_clear_disposition(&it, DISP_ENQ_COMPLETE);
837 GOTO(out_release_it, rc);
839 if (it_disposition(&it, DISP_LOOKUP_NEG))
840 GOTO(out_release_it, rc = -ENOENT);
842 rc = it_open_error(DISP_OPEN_OPEN, &it);
844 GOTO(out_release_it, rc);
846 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
847 ll_och_fill(sbi->ll_md_exp, &it, och);
849 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
850 GOTO(out_close, rc = -EOPNOTSUPP);
852 /* already get lease, handle lease lock */
853 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
854 if (it.d.lustre.it_lock_mode == 0 ||
855 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
856 /* open lock must return for lease */
857 CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
858 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
859 it.d.lustre.it_lock_bits);
860 GOTO(out_close, rc = -EPROTO);
863 ll_intent_release(&it);
867 /* Cancel open lock */
868 if (it.d.lustre.it_lock_mode != 0) {
869 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
870 it.d.lustre.it_lock_mode);
871 it.d.lustre.it_lock_mode = 0;
872 och->och_lease_handle.cookie = 0ULL;
874 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
876 CERROR("%s: error closing file "DFID": %d\n",
877 ll_get_fsname(inode->i_sb, NULL, 0),
878 PFID(&ll_i2info(inode)->lli_fid), rc2);
879 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
881 ll_intent_release(&it);
887 EXPORT_SYMBOL(ll_lease_open);
890 * Release lease and close the file.
891 * It will check if the lease has ever broken.
893 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
896 struct ldlm_lock *lock;
897 bool cancelled = true;
901 lock = ldlm_handle2lock(&och->och_lease_handle);
903 lock_res_and_lock(lock);
904 cancelled = ldlm_is_cancel(lock);
905 unlock_res_and_lock(lock);
909 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
910 PFID(&ll_i2info(inode)->lli_fid), cancelled);
913 ldlm_cli_cancel(&och->och_lease_handle, 0);
914 if (lease_broken != NULL)
915 *lease_broken = cancelled;
917 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
921 EXPORT_SYMBOL(ll_lease_close);
923 /* Fills the obdo with the attributes for the lsm */
924 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
925 struct obd_capa *capa, struct obdo *obdo,
926 __u64 ioepoch, int sync)
928 struct ptlrpc_request_set *set;
929 struct obd_info oinfo = { { { 0 } } };
934 LASSERT(lsm != NULL);
938 oinfo.oi_oa->o_oi = lsm->lsm_oi;
939 oinfo.oi_oa->o_mode = S_IFREG;
940 oinfo.oi_oa->o_ioepoch = ioepoch;
941 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
942 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
943 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
944 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
945 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
946 OBD_MD_FLDATAVERSION;
947 oinfo.oi_capa = capa;
949 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
950 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
953 set = ptlrpc_prep_set();
955 CERROR("can't allocate ptlrpc set\n");
958 rc = obd_getattr_async(exp, &oinfo, set);
960 rc = ptlrpc_set_wait(set);
961 ptlrpc_set_destroy(set);
964 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
965 OBD_MD_FLATIME | OBD_MD_FLMTIME |
966 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
967 OBD_MD_FLDATAVERSION);
972 * Performs the getattr on the inode and updates its fields.
973 * If @sync != 0, perform the getattr under the server-side lock.
975 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
976 __u64 ioepoch, int sync)
978 struct obd_capa *capa = ll_mdscapa_get(inode);
979 struct lov_stripe_md *lsm;
983 lsm = ccc_inode_lsm_get(inode);
984 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
985 capa, obdo, ioepoch, sync);
988 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
990 obdo_refresh_inode(inode, obdo, obdo->o_valid);
991 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
992 " blksize %lu\n", POSTID(oi), i_size_read(inode),
993 (unsigned long long)inode->i_blocks,
994 (unsigned long)ll_inode_blksize(inode));
996 ccc_inode_lsm_put(inode, lsm);
1000 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
1002 struct ll_inode_info *lli = ll_i2info(inode);
1003 struct cl_object *obj = lli->lli_clob;
1004 struct cl_attr *attr = ccc_env_thread_attr(env);
1010 ll_inode_size_lock(inode);
1011 /* merge timestamps the most recently obtained from mds with
1012 timestamps obtained from osts */
1013 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1014 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1015 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1016 inode_init_lvb(inode, &lvb);
1018 cl_object_attr_lock(obj);
1019 rc = cl_object_attr_get(env, obj, attr);
1020 cl_object_attr_unlock(obj);
1023 if (lvb.lvb_atime < attr->cat_atime)
1024 lvb.lvb_atime = attr->cat_atime;
1025 if (lvb.lvb_ctime < attr->cat_ctime)
1026 lvb.lvb_ctime = attr->cat_ctime;
1027 if (lvb.lvb_mtime < attr->cat_mtime)
1028 lvb.lvb_mtime = attr->cat_mtime;
1030 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1031 PFID(&lli->lli_fid), attr->cat_size);
1032 cl_isize_write_nolock(inode, attr->cat_size);
1034 inode->i_blocks = attr->cat_blocks;
1036 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1037 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1038 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1040 ll_inode_size_unlock(inode);
1045 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1048 struct obdo obdo = { 0 };
1051 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1053 st->st_size = obdo.o_size;
1054 st->st_blocks = obdo.o_blocks;
1055 st->st_mtime = obdo.o_mtime;
1056 st->st_atime = obdo.o_atime;
1057 st->st_ctime = obdo.o_ctime;
1062 static bool file_is_noatime(const struct file *file)
1064 const struct vfsmount *mnt = file->f_path.mnt;
1065 const struct inode *inode = file->f_path.dentry->d_inode;
1067 /* Adapted from file_accessed() and touch_atime().*/
1068 if (file->f_flags & O_NOATIME)
1071 if (inode->i_flags & S_NOATIME)
1074 if (IS_NOATIME(inode))
1077 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1080 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1083 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1089 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1091 struct inode *inode = file->f_dentry->d_inode;
1093 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1095 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1096 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1097 file->f_flags & O_DIRECT ||
1100 io->ci_obj = ll_i2info(inode)->lli_clob;
1101 io->ci_lockreq = CILR_MAYBE;
1102 if (ll_file_nolock(file)) {
1103 io->ci_lockreq = CILR_NEVER;
1104 io->ci_no_srvlock = 1;
1105 } else if (file->f_flags & O_APPEND) {
1106 io->ci_lockreq = CILR_MANDATORY;
1109 io->ci_noatime = file_is_noatime(file);
1113 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1114 struct file *file, enum cl_io_type iot,
1115 loff_t *ppos, size_t count)
1117 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1118 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1124 io = ccc_env_thread_io(env);
1125 ll_io_init(io, file, iot == CIT_WRITE);
1127 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1128 struct vvp_io *vio = vvp_env_io(env);
1129 struct ccc_io *cio = ccc_env_io(env);
1130 int write_mutex_locked = 0;
1132 cio->cui_fd = LUSTRE_FPRIVATE(file);
1133 vio->cui_io_subtype = args->via_io_subtype;
1135 switch (vio->cui_io_subtype) {
1137 cio->cui_iov = args->u.normal.via_iov;
1138 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1139 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1140 cio->cui_iocb = args->u.normal.via_iocb;
1141 if ((iot == CIT_WRITE) &&
1142 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1143 if (mutex_lock_interruptible(&lli->
1145 GOTO(out, result = -ERESTARTSYS);
1146 write_mutex_locked = 1;
1147 } else if (iot == CIT_READ) {
1148 down_read(&lli->lli_trunc_sem);
1152 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1153 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1156 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1157 vio->u.splice.cui_flags = args->u.splice.via_flags;
1160 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1163 result = cl_io_loop(env, io);
1164 if (write_mutex_locked)
1165 mutex_unlock(&lli->lli_write_mutex);
1166 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1167 up_read(&lli->lli_trunc_sem);
1169 /* cl_io_rw_init() handled IO */
1170 result = io->ci_result;
1173 if (io->ci_nob > 0) {
1174 result = io->ci_nob;
1175 *ppos = io->u.ci_wr.wr.crw_pos;
1179 cl_io_fini(env, io);
1180 /* If any bit been read/written (result != 0), we just return
1181 * short read/write instead of restart io. */
1182 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1183 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1184 iot == CIT_READ ? "read" : "write",
1185 file->f_dentry->d_name.name, *ppos, count);
1186 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1190 if (iot == CIT_READ) {
1192 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1193 LPROC_LL_READ_BYTES, result);
1194 } else if (iot == CIT_WRITE) {
1196 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1197 LPROC_LL_WRITE_BYTES, result);
1198 fd->fd_write_failed = false;
1199 } else if (result != -ERESTARTSYS) {
1200 fd->fd_write_failed = true;
1209 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1211 static int ll_file_get_iov_count(const struct iovec *iov,
1212 unsigned long *nr_segs, size_t *count)
1217 for (seg = 0; seg < *nr_segs; seg++) {
1218 const struct iovec *iv = &iov[seg];
1221 * If any segment has a negative length, or the cumulative
1222 * length ever wraps negative then return -EINVAL.
1225 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1227 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1232 cnt -= iv->iov_len; /* This segment is no good */
1239 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1240 unsigned long nr_segs, loff_t pos)
1243 struct vvp_io_args *args;
1249 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1253 env = cl_env_get(&refcheck);
1255 RETURN(PTR_ERR(env));
1257 args = vvp_env_args(env, IO_NORMAL);
1258 args->u.normal.via_iov = (struct iovec *)iov;
1259 args->u.normal.via_nrsegs = nr_segs;
1260 args->u.normal.via_iocb = iocb;
1262 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1263 &iocb->ki_pos, count);
1264 cl_env_put(env, &refcheck);
1268 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1272 struct iovec *local_iov;
1273 struct kiocb *kiocb;
1278 env = cl_env_get(&refcheck);
1280 RETURN(PTR_ERR(env));
1282 local_iov = &vvp_env_info(env)->vti_local_iov;
1283 kiocb = &vvp_env_info(env)->vti_kiocb;
1284 local_iov->iov_base = (void __user *)buf;
1285 local_iov->iov_len = count;
1286 init_sync_kiocb(kiocb, file);
1287 kiocb->ki_pos = *ppos;
1288 kiocb->ki_left = count;
1290 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1291 *ppos = kiocb->ki_pos;
1293 cl_env_put(env, &refcheck);
1298 * Write to a file (through the page cache).
1301 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1302 unsigned long nr_segs, loff_t pos)
1305 struct vvp_io_args *args;
1311 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1315 env = cl_env_get(&refcheck);
1317 RETURN(PTR_ERR(env));
1319 args = vvp_env_args(env, IO_NORMAL);
1320 args->u.normal.via_iov = (struct iovec *)iov;
1321 args->u.normal.via_nrsegs = nr_segs;
1322 args->u.normal.via_iocb = iocb;
1324 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1325 &iocb->ki_pos, count);
1326 cl_env_put(env, &refcheck);
1330 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1334 struct iovec *local_iov;
1335 struct kiocb *kiocb;
1340 env = cl_env_get(&refcheck);
1342 RETURN(PTR_ERR(env));
1344 local_iov = &vvp_env_info(env)->vti_local_iov;
1345 kiocb = &vvp_env_info(env)->vti_kiocb;
1346 local_iov->iov_base = (void __user *)buf;
1347 local_iov->iov_len = count;
1348 init_sync_kiocb(kiocb, file);
1349 kiocb->ki_pos = *ppos;
1350 kiocb->ki_left = count;
1352 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1353 *ppos = kiocb->ki_pos;
1355 cl_env_put(env, &refcheck);
1360 * Send file content (through pagecache) somewhere with helper
1362 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1363 struct pipe_inode_info *pipe, size_t count,
1367 struct vvp_io_args *args;
1372 env = cl_env_get(&refcheck);
1374 RETURN(PTR_ERR(env));
1376 args = vvp_env_args(env, IO_SPLICE);
1377 args->u.splice.via_pipe = pipe;
1378 args->u.splice.via_flags = flags;
1380 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1381 cl_env_put(env, &refcheck);
1385 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1388 struct obd_export *exp = ll_i2dtexp(inode);
1389 struct obd_trans_info oti = { 0 };
1390 struct obdo *oa = NULL;
1393 struct lov_stripe_md *lsm = NULL, *lsm2;
1400 lsm = ccc_inode_lsm_get(inode);
1401 if (!lsm_has_objects(lsm))
1402 GOTO(out, rc = -ENOENT);
1404 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1405 (lsm->lsm_stripe_count));
1407 OBD_ALLOC_LARGE(lsm2, lsm_size);
1409 GOTO(out, rc = -ENOMEM);
1412 oa->o_nlink = ost_idx;
1413 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1414 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1415 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1416 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1417 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1418 memcpy(lsm2, lsm, lsm_size);
1419 ll_inode_size_lock(inode);
1420 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1421 ll_inode_size_unlock(inode);
1423 OBD_FREE_LARGE(lsm2, lsm_size);
1426 ccc_inode_lsm_put(inode, lsm);
1431 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1433 struct ll_recreate_obj ucreat;
1437 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1440 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1444 ostid_set_seq_mdt0(&oi);
1445 ostid_set_id(&oi, ucreat.lrc_id);
1446 RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
1449 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1456 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1459 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1462 fid_to_ostid(&fid, &oi);
1463 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1464 RETURN(ll_lov_recreate(inode, &oi, ost_idx));
1467 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1468 __u64 flags, struct lov_user_md *lum,
1471 struct lov_stripe_md *lsm = NULL;
1472 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1476 lsm = ccc_inode_lsm_get(inode);
1478 ccc_inode_lsm_put(inode, lsm);
1479 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1484 ll_inode_size_lock(inode);
1485 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1488 rc = oit.d.lustre.it_status;
1490 GOTO(out_req_free, rc);
1492 ll_release_openhandle(file->f_dentry, &oit);
1495 ll_inode_size_unlock(inode);
1496 ll_intent_release(&oit);
1497 ccc_inode_lsm_put(inode, lsm);
1500 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1504 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1505 struct lov_mds_md **lmmp, int *lmm_size,
1506 struct ptlrpc_request **request)
1508 struct ll_sb_info *sbi = ll_i2sbi(inode);
1509 struct mdt_body *body;
1510 struct lov_mds_md *lmm = NULL;
1511 struct ptlrpc_request *req = NULL;
1512 struct md_op_data *op_data;
1515 rc = ll_get_max_mdsize(sbi, &lmmsize);
1519 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1520 strlen(filename), lmmsize,
1521 LUSTRE_OPC_ANY, NULL);
1522 if (IS_ERR(op_data))
1523 RETURN(PTR_ERR(op_data));
1525 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1526 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1527 ll_finish_md_op_data(op_data);
1529 CDEBUG(D_INFO, "md_getattr_name failed "
1530 "on %s: rc %d\n", filename, rc);
1534 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1535 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1537 lmmsize = body->eadatasize;
1539 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1541 GOTO(out, rc = -ENODATA);
1544 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1545 LASSERT(lmm != NULL);
1547 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1548 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1549 GOTO(out, rc = -EPROTO);
1553 * This is coming from the MDS, so is probably in
1554 * little endian. We convert it to host endian before
1555 * passing it to userspace.
1557 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1560 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1561 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1564 /* if function called for directory - we should
1565 * avoid swab not existent lsm objects */
1566 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1567 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1568 if (S_ISREG(body->mode))
1569 lustre_swab_lov_user_md_objects(
1570 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1572 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1573 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1574 if (S_ISREG(body->mode))
1575 lustre_swab_lov_user_md_objects(
1576 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1583 *lmm_size = lmmsize;
1588 static int ll_lov_setea(struct inode *inode, struct file *file,
1591 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1592 struct lov_user_md *lump;
1593 int lum_size = sizeof(struct lov_user_md) +
1594 sizeof(struct lov_user_ost_data);
1598 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1601 OBD_ALLOC_LARGE(lump, lum_size);
1605 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1606 OBD_FREE_LARGE(lump, lum_size);
1610 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1612 OBD_FREE_LARGE(lump, lum_size);
1616 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1619 struct lov_user_md_v3 lumv3;
1620 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1621 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1622 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1624 __u64 flags = FMODE_WRITE;
1627 /* first try with v1 which is smaller than v3 */
1628 lum_size = sizeof(struct lov_user_md_v1);
1629 if (copy_from_user(lumv1, lumv1p, lum_size))
1632 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1633 lum_size = sizeof(struct lov_user_md_v3);
1634 if (copy_from_user(&lumv3, lumv3p, lum_size))
1638 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1640 struct lov_stripe_md *lsm;
1643 put_user(0, &lumv1p->lmm_stripe_count);
1645 ll_layout_refresh(inode, &gen);
1646 lsm = ccc_inode_lsm_get(inode);
1647 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1648 0, lsm, (void *)arg);
1649 ccc_inode_lsm_put(inode, lsm);
1654 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1656 struct lov_stripe_md *lsm;
1660 lsm = ccc_inode_lsm_get(inode);
1662 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1664 ccc_inode_lsm_put(inode, lsm);
1668 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1670 struct ll_inode_info *lli = ll_i2info(inode);
1671 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1672 struct ccc_grouplock grouplock;
1676 if (ll_file_nolock(file))
1677 RETURN(-EOPNOTSUPP);
1679 spin_lock(&lli->lli_lock);
1680 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1681 CWARN("group lock already existed with gid %lu\n",
1682 fd->fd_grouplock.cg_gid);
1683 spin_unlock(&lli->lli_lock);
1686 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1687 spin_unlock(&lli->lli_lock);
1689 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1690 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1694 spin_lock(&lli->lli_lock);
1695 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1696 spin_unlock(&lli->lli_lock);
1697 CERROR("another thread just won the race\n");
1698 cl_put_grouplock(&grouplock);
1702 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1703 fd->fd_grouplock = grouplock;
1704 spin_unlock(&lli->lli_lock);
1706 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1710 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1712 struct ll_inode_info *lli = ll_i2info(inode);
1713 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1714 struct ccc_grouplock grouplock;
1717 spin_lock(&lli->lli_lock);
1718 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1719 spin_unlock(&lli->lli_lock);
1720 CWARN("no group lock held\n");
1723 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1725 if (fd->fd_grouplock.cg_gid != arg) {
1726 CWARN("group lock %lu doesn't match current id %lu\n",
1727 arg, fd->fd_grouplock.cg_gid);
1728 spin_unlock(&lli->lli_lock);
1732 grouplock = fd->fd_grouplock;
1733 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1734 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1735 spin_unlock(&lli->lli_lock);
1737 cl_put_grouplock(&grouplock);
1738 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1743 * Close inode open handle
1745 * \param dentry [in] dentry which contains the inode
1746 * \param it [in,out] intent which contains open info and result
1749 * \retval <0 failure
1751 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1753 struct inode *inode = dentry->d_inode;
1754 struct obd_client_handle *och;
1760 /* Root ? Do nothing. */
1761 if (dentry->d_inode->i_sb->s_root == dentry)
1764 /* No open handle to close? Move away */
1765 if (!it_disposition(it, DISP_OPEN_OPEN))
1768 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1770 OBD_ALLOC(och, sizeof(*och));
1772 GOTO(out, rc = -ENOMEM);
1774 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1776 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1779 /* this one is in place of ll_file_open */
1780 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1781 ptlrpc_req_finished(it->d.lustre.it_data);
1782 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1788 * Get size for inode for which FIEMAP mapping is requested.
1789 * Make the FIEMAP get_info call and returns the result.
1791 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1794 struct obd_export *exp = ll_i2dtexp(inode);
1795 struct lov_stripe_md *lsm = NULL;
1796 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1797 int vallen = num_bytes;
1801 /* Checks for fiemap flags */
1802 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1803 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1807 /* Check for FIEMAP_FLAG_SYNC */
1808 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1809 rc = filemap_fdatawrite(inode->i_mapping);
1814 lsm = ccc_inode_lsm_get(inode);
1818 /* If the stripe_count > 1 and the application does not understand
1819 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1821 if (lsm->lsm_stripe_count > 1 &&
1822 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1823 GOTO(out, rc = -EOPNOTSUPP);
1825 fm_key.oa.o_oi = lsm->lsm_oi;
1826 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1828 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1829 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1830 /* If filesize is 0, then there would be no objects for mapping */
1831 if (fm_key.oa.o_size == 0) {
1832 fiemap->fm_mapped_extents = 0;
1836 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1838 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1841 CERROR("obd_get_info failed: rc = %d\n", rc);
1844 ccc_inode_lsm_put(inode, lsm);
1848 int ll_fid2path(struct inode *inode, void *arg)
1850 struct obd_export *exp = ll_i2mdexp(inode);
1851 struct getinfo_fid2path *gfout, *gfin;
1855 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1856 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1859 /* Need to get the buflen */
1860 OBD_ALLOC_PTR(gfin);
1863 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1868 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1869 OBD_ALLOC(gfout, outsize);
1870 if (gfout == NULL) {
1874 memcpy(gfout, gfin, sizeof(*gfout));
1877 /* Call mdc_iocontrol */
1878 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1882 if (copy_to_user(arg, gfout, outsize))
1886 OBD_FREE(gfout, outsize);
1890 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1892 struct ll_user_fiemap *fiemap_s;
1893 size_t num_bytes, ret_bytes;
1894 unsigned int extent_count;
1897 /* Get the extent count so we can calculate the size of
1898 * required fiemap buffer */
1899 if (get_user(extent_count,
1900 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1902 num_bytes = sizeof(*fiemap_s) + (extent_count *
1903 sizeof(struct ll_fiemap_extent));
1905 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1906 if (fiemap_s == NULL)
1909 /* get the fiemap value */
1910 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1912 GOTO(error, rc = -EFAULT);
1914 /* If fm_extent_count is non-zero, read the first extent since
1915 * it is used to calculate end_offset and device from previous
1918 if (copy_from_user(&fiemap_s->fm_extents[0],
1919 (char __user *)arg + sizeof(*fiemap_s),
1920 sizeof(struct ll_fiemap_extent)))
1921 GOTO(error, rc = -EFAULT);
1924 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1928 ret_bytes = sizeof(struct ll_user_fiemap);
1930 if (extent_count != 0)
1931 ret_bytes += (fiemap_s->fm_mapped_extents *
1932 sizeof(struct ll_fiemap_extent));
1934 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1938 OBD_FREE_LARGE(fiemap_s, num_bytes);
1943 * Read the data_version for inode.
1945 * This value is computed using stripe object version on OST.
1946 * Version is computed using server side locking.
1948 * @param extent_lock Take extent lock. Not needed if a process is already
1949 * holding the OST object group locks.
1951 int ll_data_version(struct inode *inode, __u64 *data_version,
1954 struct lov_stripe_md *lsm = NULL;
1955 struct ll_sb_info *sbi = ll_i2sbi(inode);
1956 struct obdo *obdo = NULL;
1960 /* If no stripe, we consider version is 0. */
1961 lsm = ccc_inode_lsm_get(inode);
1962 if (!lsm_has_objects(lsm)) {
1964 CDEBUG(D_INODE, "No object for inode\n");
1968 OBD_ALLOC_PTR(obdo);
1970 GOTO(out, rc = -ENOMEM);
1972 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1974 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1977 *data_version = obdo->o_data_version;
1983 ccc_inode_lsm_put(inode, lsm);
1988 * Trigger a HSM release request for the provided inode.
1990 int ll_hsm_release(struct inode *inode)
1992 struct cl_env_nest nest;
1994 struct obd_client_handle *och = NULL;
1995 __u64 data_version = 0;
1999 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
2000 ll_get_fsname(inode->i_sb, NULL, 0),
2001 PFID(&ll_i2info(inode)->lli_fid));
2003 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
2005 GOTO(out, rc = PTR_ERR(och));
2007 /* Grab latest data_version and [am]time values */
2008 rc = ll_data_version(inode, &data_version, 1);
2012 env = cl_env_nested_get(&nest);
2014 GOTO(out, rc = PTR_ERR(env));
2016 ll_merge_lvb(env, inode);
2017 cl_env_nested_put(&nest, env);
2019 /* Release the file.
2020 * NB: lease lock handle is released in mdc_hsm_release_pack() because
2021 * we still need it to pack l_remote_handle to MDT. */
2022 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
2028 if (och != NULL && !IS_ERR(och)) /* close the file */
2029 ll_lease_close(och, inode, NULL);
2034 struct ll_swap_stack {
2035 struct iattr ia1, ia2;
2037 struct inode *inode1, *inode2;
2038 bool check_dv1, check_dv2;
2041 static int ll_swap_layouts(struct file *file1, struct file *file2,
2042 struct lustre_swap_layouts *lsl)
2044 struct mdc_swap_layouts msl;
2045 struct md_op_data *op_data;
2048 struct ll_swap_stack *llss = NULL;
2051 OBD_ALLOC_PTR(llss);
2055 llss->inode1 = file1->f_dentry->d_inode;
2056 llss->inode2 = file2->f_dentry->d_inode;
2058 if (!S_ISREG(llss->inode2->i_mode))
2059 GOTO(free, rc = -EINVAL);
2061 if (inode_permission(llss->inode1, MAY_WRITE) ||
2062 inode_permission(llss->inode2, MAY_WRITE))
2063 GOTO(free, rc = -EPERM);
2065 if (llss->inode2->i_sb != llss->inode1->i_sb)
2066 GOTO(free, rc = -EXDEV);
2068 /* we use 2 bool because it is easier to swap than 2 bits */
2069 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2070 llss->check_dv1 = true;
2072 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2073 llss->check_dv2 = true;
2075 /* we cannot use lsl->sl_dvX directly because we may swap them */
2076 llss->dv1 = lsl->sl_dv1;
2077 llss->dv2 = lsl->sl_dv2;
2079 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2080 if (rc == 0) /* same file, done! */
2083 if (rc < 0) { /* sequentialize it */
2084 swap(llss->inode1, llss->inode2);
2086 swap(llss->dv1, llss->dv2);
2087 swap(llss->check_dv1, llss->check_dv2);
2091 if (gid != 0) { /* application asks to flush dirty cache */
2092 rc = ll_get_grouplock(llss->inode1, file1, gid);
2096 rc = ll_get_grouplock(llss->inode2, file2, gid);
2098 ll_put_grouplock(llss->inode1, file1, gid);
2103 /* to be able to restore mtime and atime after swap
2104 * we need to first save them */
2106 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2107 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2108 llss->ia1.ia_atime = llss->inode1->i_atime;
2109 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2110 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2111 llss->ia2.ia_atime = llss->inode2->i_atime;
2112 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2115 /* ultimate check, before swaping the layouts we check if
2116 * dataversion has changed (if requested) */
2117 if (llss->check_dv1) {
2118 rc = ll_data_version(llss->inode1, &dv, 0);
2121 if (dv != llss->dv1)
2122 GOTO(putgl, rc = -EAGAIN);
2125 if (llss->check_dv2) {
2126 rc = ll_data_version(llss->inode2, &dv, 0);
2129 if (dv != llss->dv2)
2130 GOTO(putgl, rc = -EAGAIN);
2133 /* struct md_op_data is used to send the swap args to the mdt
2134 * only flags is missing, so we use struct mdc_swap_layouts
2135 * through the md_op_data->op_data */
2136 /* flags from user space have to be converted before they are send to
2137 * server, no flag is sent today, they are only used on the client */
2140 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2141 0, LUSTRE_OPC_ANY, &msl);
2142 if (IS_ERR(op_data))
2143 GOTO(free, rc = PTR_ERR(op_data));
2145 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2146 sizeof(*op_data), op_data, NULL);
2147 ll_finish_md_op_data(op_data);
2151 ll_put_grouplock(llss->inode2, file2, gid);
2152 ll_put_grouplock(llss->inode1, file1, gid);
2155 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2159 /* clear useless flags */
2160 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2161 llss->ia1.ia_valid &= ~ATTR_MTIME;
2162 llss->ia2.ia_valid &= ~ATTR_MTIME;
2165 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2166 llss->ia1.ia_valid &= ~ATTR_ATIME;
2167 llss->ia2.ia_valid &= ~ATTR_ATIME;
2170 /* update time if requested */
2172 if (llss->ia2.ia_valid != 0) {
2173 mutex_lock(&llss->inode1->i_mutex);
2174 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2175 mutex_unlock(&llss->inode1->i_mutex);
2178 if (llss->ia1.ia_valid != 0) {
2181 mutex_lock(&llss->inode2->i_mutex);
2182 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2183 mutex_unlock(&llss->inode2->i_mutex);
2195 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2197 struct md_op_data *op_data;
2200 /* Non-root users are forbidden to set or clear flags which are
2201 * NOT defined in HSM_USER_MASK. */
2202 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2203 !cfs_capable(CFS_CAP_SYS_ADMIN))
2206 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2207 LUSTRE_OPC_ANY, hss);
2208 if (IS_ERR(op_data))
2209 RETURN(PTR_ERR(op_data));
2211 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2212 sizeof(*op_data), op_data, NULL);
2214 ll_finish_md_op_data(op_data);
2219 static int ll_hsm_import(struct inode *inode, struct file *file,
2220 struct hsm_user_import *hui)
2222 struct hsm_state_set *hss = NULL;
2223 struct iattr *attr = NULL;
2227 if (!S_ISREG(inode->i_mode))
2233 GOTO(out, rc = -ENOMEM);
2235 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2236 hss->hss_archive_id = hui->hui_archive_id;
2237 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2238 rc = ll_hsm_state_set(inode, hss);
2242 OBD_ALLOC_PTR(attr);
2244 GOTO(out, rc = -ENOMEM);
2246 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2247 attr->ia_mode |= S_IFREG;
2248 attr->ia_uid = hui->hui_uid;
2249 attr->ia_gid = hui->hui_gid;
2250 attr->ia_size = hui->hui_size;
2251 attr->ia_mtime.tv_sec = hui->hui_mtime;
2252 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2253 attr->ia_atime.tv_sec = hui->hui_atime;
2254 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2256 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2257 ATTR_UID | ATTR_GID |
2258 ATTR_MTIME | ATTR_MTIME_SET |
2259 ATTR_ATIME | ATTR_ATIME_SET;
2261 rc = ll_setattr_raw(file->f_dentry, attr, true);
2275 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2277 struct inode *inode = file->f_dentry->d_inode;
2278 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2282 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
2283 inode->i_generation, inode, cmd);
2284 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2286 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2287 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2291 case LL_IOC_GETFLAGS:
2292 /* Get the current value of the file flags */
2293 return put_user(fd->fd_flags, (int *)arg);
2294 case LL_IOC_SETFLAGS:
2295 case LL_IOC_CLRFLAGS:
2296 /* Set or clear specific file flags */
2297 /* XXX This probably needs checks to ensure the flags are
2298 * not abused, and to handle any flag side effects.
2300 if (get_user(flags, (int *) arg))
2303 if (cmd == LL_IOC_SETFLAGS) {
2304 if ((flags & LL_FILE_IGNORE_LOCK) &&
2305 !(file->f_flags & O_DIRECT)) {
2306 CERROR("%s: unable to disable locking on "
2307 "non-O_DIRECT file\n", current->comm);
2311 fd->fd_flags |= flags;
2313 fd->fd_flags &= ~flags;
2316 case LL_IOC_LOV_SETSTRIPE:
2317 RETURN(ll_lov_setstripe(inode, file, arg));
2318 case LL_IOC_LOV_SETEA:
2319 RETURN(ll_lov_setea(inode, file, arg));
2320 case LL_IOC_LOV_SWAP_LAYOUTS: {
2322 struct lustre_swap_layouts lsl;
2324 if (copy_from_user(&lsl, (char *)arg,
2325 sizeof(struct lustre_swap_layouts)))
2328 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2331 file2 = fget(lsl.sl_fd);
2336 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2337 rc = ll_swap_layouts(file, file2, &lsl);
2341 case LL_IOC_LOV_GETSTRIPE:
2342 RETURN(ll_lov_getstripe(inode, arg));
2343 case LL_IOC_RECREATE_OBJ:
2344 RETURN(ll_lov_recreate_obj(inode, arg));
2345 case LL_IOC_RECREATE_FID:
2346 RETURN(ll_lov_recreate_fid(inode, arg));
2347 case FSFILT_IOC_FIEMAP:
2348 RETURN(ll_ioctl_fiemap(inode, arg));
2349 case FSFILT_IOC_GETFLAGS:
2350 case FSFILT_IOC_SETFLAGS:
2351 RETURN(ll_iocontrol(inode, file, cmd, arg));
2352 case FSFILT_IOC_GETVERSION_OLD:
2353 case FSFILT_IOC_GETVERSION:
2354 RETURN(put_user(inode->i_generation, (int *)arg));
2355 case LL_IOC_GROUP_LOCK:
2356 RETURN(ll_get_grouplock(inode, file, arg));
2357 case LL_IOC_GROUP_UNLOCK:
2358 RETURN(ll_put_grouplock(inode, file, arg));
2359 case IOC_OBD_STATFS:
2360 RETURN(ll_obd_statfs(inode, (void *)arg));
2362 /* We need to special case any other ioctls we want to handle,
2363 * to send them to the MDS/OST as appropriate and to properly
2364 * network encode the arg field.
2365 case FSFILT_IOC_SETVERSION_OLD:
2366 case FSFILT_IOC_SETVERSION:
2368 case LL_IOC_FLUSHCTX:
2369 RETURN(ll_flush_ctx(inode));
2370 case LL_IOC_PATH2FID: {
2371 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2372 sizeof(struct lu_fid)))
2377 case OBD_IOC_FID2PATH:
2378 RETURN(ll_fid2path(inode, (void *)arg));
2379 case LL_IOC_DATA_VERSION: {
2380 struct ioc_data_version idv;
2383 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2386 rc = ll_data_version(inode, &idv.idv_version,
2387 !(idv.idv_flags & LL_DV_NOFLUSH));
2389 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2395 case LL_IOC_GET_MDTIDX: {
2398 mdtidx = ll_get_mdt_idx(inode);
2402 if (put_user((int)mdtidx, (int*)arg))
2407 case OBD_IOC_GETDTNAME:
2408 case OBD_IOC_GETMDNAME:
2409 RETURN(ll_get_obd_name(inode, cmd, arg));
2410 case LL_IOC_HSM_STATE_GET: {
2411 struct md_op_data *op_data;
2412 struct hsm_user_state *hus;
2419 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2420 LUSTRE_OPC_ANY, hus);
2421 if (IS_ERR(op_data)) {
2423 RETURN(PTR_ERR(op_data));
2426 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2429 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2432 ll_finish_md_op_data(op_data);
2436 case LL_IOC_HSM_STATE_SET: {
2437 struct hsm_state_set *hss;
2444 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2449 rc = ll_hsm_state_set(inode, hss);
2454 case LL_IOC_HSM_ACTION: {
2455 struct md_op_data *op_data;
2456 struct hsm_current_action *hca;
2463 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2464 LUSTRE_OPC_ANY, hca);
2465 if (IS_ERR(op_data)) {
2467 RETURN(PTR_ERR(op_data));
2470 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2473 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2476 ll_finish_md_op_data(op_data);
2480 case LL_IOC_SET_LEASE: {
2481 struct ll_inode_info *lli = ll_i2info(inode);
2482 struct obd_client_handle *och = NULL;
2488 if (!(file->f_mode & FMODE_WRITE))
2493 if (!(file->f_mode & FMODE_READ))
2498 mutex_lock(&lli->lli_och_mutex);
2499 if (fd->fd_lease_och != NULL) {
2500 och = fd->fd_lease_och;
2501 fd->fd_lease_och = NULL;
2503 mutex_unlock(&lli->lli_och_mutex);
2506 mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
2507 rc = ll_lease_close(och, inode, &lease_broken);
2508 if (rc == 0 && lease_broken)
2514 /* return the type of lease or error */
2515 RETURN(rc < 0 ? rc : (int)mode);
2520 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2522 /* apply for lease */
2523 och = ll_lease_open(inode, file, mode, 0);
2525 RETURN(PTR_ERR(och));
2528 mutex_lock(&lli->lli_och_mutex);
2529 if (fd->fd_lease_och == NULL) {
2530 fd->fd_lease_och = och;
2533 mutex_unlock(&lli->lli_och_mutex);
2535 /* impossible now that only excl is supported for now */
2536 ll_lease_close(och, inode, &lease_broken);
2541 case LL_IOC_GET_LEASE: {
2542 struct ll_inode_info *lli = ll_i2info(inode);
2543 struct ldlm_lock *lock = NULL;
2546 mutex_lock(&lli->lli_och_mutex);
2547 if (fd->fd_lease_och != NULL) {
2548 struct obd_client_handle *och = fd->fd_lease_och;
2550 lock = ldlm_handle2lock(&och->och_lease_handle);
2552 lock_res_and_lock(lock);
2553 if (!ldlm_is_cancel(lock))
2554 rc = och->och_flags &
2555 (FMODE_READ | FMODE_WRITE);
2556 unlock_res_and_lock(lock);
2557 LDLM_LOCK_PUT(lock);
2560 mutex_unlock(&lli->lli_och_mutex);
2563 case LL_IOC_HSM_IMPORT: {
2564 struct hsm_user_import *hui;
2570 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2575 rc = ll_hsm_import(inode, file, hui);
2584 ll_iocontrol_call(inode, file, cmd, arg, &err))
2587 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2593 #ifndef HAVE_FILE_LLSEEK_SIZE
2594 static inline loff_t
2595 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2597 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2599 if (offset > maxsize)
2602 if (offset != file->f_pos) {
2603 file->f_pos = offset;
2604 file->f_version = 0;
2610 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2611 loff_t maxsize, loff_t eof)
2613 struct inode *inode = file->f_dentry->d_inode;
2621 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2622 * position-querying operation. Avoid rewriting the "same"
2623 * f_pos value back to the file because a concurrent read(),
2624 * write() or lseek() might have altered it
2629 * f_lock protects against read/modify/write race with other
2630 * SEEK_CURs. Note that parallel writes and reads behave
2633 mutex_lock(&inode->i_mutex);
2634 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2635 mutex_unlock(&inode->i_mutex);
2639 * In the generic case the entire file is data, so as long as
2640 * offset isn't at the end of the file then the offset is data.
2647 * There is a virtual hole at the end of the file, so as long as
2648 * offset isn't i_size or larger, return i_size.
2656 return llseek_execute(file, offset, maxsize);
2660 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2662 struct inode *inode = file->f_dentry->d_inode;
2663 loff_t retval, eof = 0;
2666 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2667 (origin == SEEK_CUR) ? file->f_pos : 0);
2668 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2669 inode->i_ino, inode->i_generation, inode, retval, retval,
2671 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2673 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2674 retval = ll_glimpse_size(inode);
2677 eof = i_size_read(inode);
2680 retval = ll_generic_file_llseek_size(file, offset, origin,
2681 ll_file_maxbytes(inode), eof);
2685 int ll_flush(struct file *file, fl_owner_t id)
2687 struct inode *inode = file->f_dentry->d_inode;
2688 struct ll_inode_info *lli = ll_i2info(inode);
2689 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2692 LASSERT(!S_ISDIR(inode->i_mode));
2694 /* catch async errors that were recorded back when async writeback
2695 * failed for pages in this mapping. */
2696 rc = lli->lli_async_rc;
2697 lli->lli_async_rc = 0;
2698 err = lov_read_and_clear_async_rc(lli->lli_clob);
2702 /* The application has been told write failure already.
2703 * Do not report failure again. */
2704 if (fd->fd_write_failed)
2706 return rc ? -EIO : 0;
2710 * Called to make sure a portion of file has been written out.
2711 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2713 * Return how many pages have been written.
2715 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2716 enum cl_fsync_mode mode, int ignore_layout)
2718 struct cl_env_nest nest;
2721 struct obd_capa *capa = NULL;
2722 struct cl_fsync_io *fio;
2726 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2727 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2730 env = cl_env_nested_get(&nest);
2732 RETURN(PTR_ERR(env));
2734 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2736 io = ccc_env_thread_io(env);
2737 io->ci_obj = cl_i2info(inode)->lli_clob;
2738 io->ci_ignore_layout = ignore_layout;
2740 /* initialize parameters for sync */
2741 fio = &io->u.ci_fsync;
2742 fio->fi_capa = capa;
2743 fio->fi_start = start;
2745 fio->fi_fid = ll_inode2fid(inode);
2746 fio->fi_mode = mode;
2747 fio->fi_nr_written = 0;
2749 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2750 result = cl_io_loop(env, io);
2752 result = io->ci_result;
2754 result = fio->fi_nr_written;
2755 cl_io_fini(env, io);
2756 cl_env_nested_put(&nest, env);
2764 * When dentry is provided (the 'else' case), *file->f_dentry may be
2765 * null and dentry must be used directly rather than pulled from
2766 * *file->f_dentry as is done otherwise.
2769 #ifdef HAVE_FILE_FSYNC_4ARGS
2770 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2772 struct dentry *dentry = file->f_dentry;
2773 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2774 int ll_fsync(struct file *file, int datasync)
2776 struct dentry *dentry = file->f_dentry;
2778 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2781 struct inode *inode = dentry->d_inode;
2782 struct ll_inode_info *lli = ll_i2info(inode);
2783 struct ptlrpc_request *req;
2784 struct obd_capa *oc;
2788 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2789 inode->i_generation, inode);
2790 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2792 #ifdef HAVE_FILE_FSYNC_4ARGS
2793 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2794 mutex_lock(&inode->i_mutex);
2796 /* fsync's caller has already called _fdata{sync,write}, we want
2797 * that IO to finish before calling the osc and mdc sync methods */
2798 rc = filemap_fdatawait(inode->i_mapping);
2801 /* catch async errors that were recorded back when async writeback
2802 * failed for pages in this mapping. */
2803 if (!S_ISDIR(inode->i_mode)) {
2804 err = lli->lli_async_rc;
2805 lli->lli_async_rc = 0;
2808 err = lov_read_and_clear_async_rc(lli->lli_clob);
2813 oc = ll_mdscapa_get(inode);
2814 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2820 ptlrpc_req_finished(req);
2822 if (datasync && S_ISREG(inode->i_mode)) {
2823 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2825 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2827 if (rc == 0 && err < 0)
2830 fd->fd_write_failed = true;
2832 fd->fd_write_failed = false;
2835 #ifdef HAVE_FILE_FSYNC_4ARGS
2836 mutex_unlock(&inode->i_mutex);
2841 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2843 struct inode *inode = file->f_dentry->d_inode;
2844 struct ll_sb_info *sbi = ll_i2sbi(inode);
2845 struct ldlm_enqueue_info einfo = {
2846 .ei_type = LDLM_FLOCK,
2847 .ei_cb_cp = ldlm_flock_completion_ast,
2848 .ei_cbdata = file_lock,
2850 struct md_op_data *op_data;
2851 struct lustre_handle lockh = {0};
2852 ldlm_policy_data_t flock = {{0}};
2858 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2859 inode->i_ino, file_lock);
2861 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2863 if (file_lock->fl_flags & FL_FLOCK) {
2864 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2865 /* flocks are whole-file locks */
2866 flock.l_flock.end = OFFSET_MAX;
2867 /* For flocks owner is determined by the local file desctiptor*/
2868 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2869 } else if (file_lock->fl_flags & FL_POSIX) {
2870 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2871 flock.l_flock.start = file_lock->fl_start;
2872 flock.l_flock.end = file_lock->fl_end;
2876 flock.l_flock.pid = file_lock->fl_pid;
2878 /* Somewhat ugly workaround for svc lockd.
2879 * lockd installs custom fl_lmops->lm_compare_owner that checks
2880 * for the fl_owner to be the same (which it always is on local node
2881 * I guess between lockd processes) and then compares pid.
2882 * As such we assign pid to the owner field to make it all work,
2883 * conflict with normal locks is unlikely since pid space and
2884 * pointer space for current->files are not intersecting */
2885 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2886 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2888 switch (file_lock->fl_type) {
2890 einfo.ei_mode = LCK_PR;
2893 /* An unlock request may or may not have any relation to
2894 * existing locks so we may not be able to pass a lock handle
2895 * via a normal ldlm_lock_cancel() request. The request may even
2896 * unlock a byte range in the middle of an existing lock. In
2897 * order to process an unlock request we need all of the same
2898 * information that is given with a normal read or write record
2899 * lock request. To avoid creating another ldlm unlock (cancel)
2900 * message we'll treat a LCK_NL flock request as an unlock. */
2901 einfo.ei_mode = LCK_NL;
2904 einfo.ei_mode = LCK_PW;
2907 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2908 file_lock->fl_type);
2923 flags = LDLM_FL_BLOCK_NOWAIT;
2929 flags = LDLM_FL_TEST_LOCK;
2930 /* Save the old mode so that if the mode in the lock changes we
2931 * can decrement the appropriate reader or writer refcount. */
2932 file_lock->fl_type = einfo.ei_mode;
2935 CERROR("unknown fcntl lock command: %d\n", cmd);
2939 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2940 LUSTRE_OPC_ANY, NULL);
2941 if (IS_ERR(op_data))
2942 RETURN(PTR_ERR(op_data));
2944 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2945 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2946 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2948 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2949 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2951 if ((file_lock->fl_flags & FL_FLOCK) &&
2952 (rc == 0 || file_lock->fl_type == F_UNLCK))
2953 rc2 = flock_lock_file_wait(file, file_lock);
2954 if ((file_lock->fl_flags & FL_POSIX) &&
2955 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2956 !(flags & LDLM_FL_TEST_LOCK))
2957 rc2 = posix_lock_file_wait(file, file_lock);
2959 if (rc2 && file_lock->fl_type != F_UNLCK) {
2960 einfo.ei_mode = LCK_NL;
2961 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2962 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2966 ll_finish_md_op_data(op_data);
2971 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2979 * test if some locks matching bits and l_req_mode are acquired
2980 * - bits can be in different locks
2981 * - if found clear the common lock bits in *bits
2982 * - the bits not found, are kept in *bits
2984 * \param bits [IN] searched lock bits [IN]
2985 * \param l_req_mode [IN] searched lock mode
2986 * \retval boolean, true iff all bits are found
2988 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2990 struct lustre_handle lockh;
2991 ldlm_policy_data_t policy;
2992 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2993 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3002 fid = &ll_i2info(inode)->lli_fid;
3003 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3004 ldlm_lockname[mode]);
3006 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3007 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3008 policy.l_inodebits.bits = *bits & (1 << i);
3009 if (policy.l_inodebits.bits == 0)
3012 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3013 &policy, mode, &lockh)) {
3014 struct ldlm_lock *lock;
3016 lock = ldlm_handle2lock(&lockh);
3019 ~(lock->l_policy_data.l_inodebits.bits);
3020 LDLM_LOCK_PUT(lock);
3022 *bits &= ~policy.l_inodebits.bits;
3029 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3030 struct lustre_handle *lockh, __u64 flags,
3033 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3038 fid = &ll_i2info(inode)->lli_fid;
3039 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3041 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3042 fid, LDLM_IBITS, &policy, mode, lockh);
3047 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3049 /* Already unlinked. Just update nlink and return success */
3050 if (rc == -ENOENT) {
3052 /* This path cannot be hit for regular files unless in
3053 * case of obscure races, so no need to to validate
3055 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3057 } else if (rc != 0) {
3058 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
3059 ll_get_fsname(inode->i_sb, NULL, 0),
3060 PFID(ll_inode2fid(inode)), rc);
3066 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3069 struct inode *inode = dentry->d_inode;
3070 struct ptlrpc_request *req = NULL;
3071 struct obd_export *exp;
3075 LASSERT(inode != NULL);
3077 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
3078 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
3080 exp = ll_i2mdexp(inode);
3082 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3083 * But under CMD case, it caused some lock issues, should be fixed
3084 * with new CMD ibits lock. See bug 12718 */
3085 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3086 struct lookup_intent oit = { .it_op = IT_GETATTR };
3087 struct md_op_data *op_data;
3089 if (ibits == MDS_INODELOCK_LOOKUP)
3090 oit.it_op = IT_LOOKUP;
3092 /* Call getattr by fid, so do not provide name at all. */
3093 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
3094 dentry->d_inode, NULL, 0, 0,
3095 LUSTRE_OPC_ANY, NULL);
3096 if (IS_ERR(op_data))
3097 RETURN(PTR_ERR(op_data));
3099 oit.it_create_mode |= M_CHECK_STALE;
3100 rc = md_intent_lock(exp, op_data, NULL, 0,
3101 /* we are not interested in name
3104 ll_md_blocking_ast, 0);
3105 ll_finish_md_op_data(op_data);
3106 oit.it_create_mode &= ~M_CHECK_STALE;
3108 rc = ll_inode_revalidate_fini(inode, rc);
3112 rc = ll_revalidate_it_finish(req, &oit, dentry);
3114 ll_intent_release(&oit);
3118 /* Unlinked? Unhash dentry, so it is not picked up later by
3119 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3120 here to preserve get_cwd functionality on 2.6.
3122 if (!dentry->d_inode->i_nlink)
3123 d_lustre_invalidate(dentry, 0);
3125 ll_lookup_finish_locks(&oit, dentry);
3126 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3127 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3128 obd_valid valid = OBD_MD_FLGETATTR;
3129 struct md_op_data *op_data;
3132 if (S_ISREG(inode->i_mode)) {
3133 rc = ll_get_max_mdsize(sbi, &ealen);
3136 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3139 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3140 0, ealen, LUSTRE_OPC_ANY,
3142 if (IS_ERR(op_data))
3143 RETURN(PTR_ERR(op_data));
3145 op_data->op_valid = valid;
3146 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3147 * capa for this inode. Because we only keep capas of dirs
3149 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3150 ll_finish_md_op_data(op_data);
3152 rc = ll_inode_revalidate_fini(inode, rc);
3156 rc = ll_prep_inode(&inode, req, NULL, NULL);
3159 ptlrpc_req_finished(req);
3163 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3166 struct inode *inode = dentry->d_inode;
3170 rc = __ll_inode_revalidate_it(dentry, it, ibits);
3174 /* if object isn't regular file, don't validate size */
3175 if (!S_ISREG(inode->i_mode)) {
3176 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3177 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3178 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3180 /* In case of restore, the MDT has the right size and has
3181 * already send it back without granting the layout lock,
3182 * inode is up-to-date so glimpse is useless.
3183 * Also to glimpse we need the layout, in case of a running
3184 * restore the MDT holds the layout lock so the glimpse will
3185 * block up to the end of restore (getattr will block)
3187 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3188 rc = ll_glimpse_size(inode);
3193 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
3194 struct lookup_intent *it, struct kstat *stat)
3196 struct inode *inode = de->d_inode;
3197 struct ll_sb_info *sbi = ll_i2sbi(inode);
3198 struct ll_inode_info *lli = ll_i2info(inode);
3201 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3202 MDS_INODELOCK_LOOKUP);
3203 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3208 stat->dev = inode->i_sb->s_dev;
3209 if (ll_need_32bit_api(sbi))
3210 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3212 stat->ino = inode->i_ino;
3213 stat->mode = inode->i_mode;
3214 stat->nlink = inode->i_nlink;
3215 stat->uid = inode->i_uid;
3216 stat->gid = inode->i_gid;
3217 stat->rdev = inode->i_rdev;
3218 stat->atime = inode->i_atime;
3219 stat->mtime = inode->i_mtime;
3220 stat->ctime = inode->i_ctime;
3221 stat->blksize = 1 << inode->i_blkbits;
3223 stat->size = i_size_read(inode);
3224 stat->blocks = inode->i_blocks;
3228 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3230 struct lookup_intent it = { .it_op = IT_GETATTR };
3232 return ll_getattr_it(mnt, de, &it, stat);
3235 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3236 __u64 start, __u64 len)
3240 struct ll_user_fiemap *fiemap;
3241 unsigned int extent_count = fieinfo->fi_extents_max;
3243 num_bytes = sizeof(*fiemap) + (extent_count *
3244 sizeof(struct ll_fiemap_extent));
3245 OBD_ALLOC_LARGE(fiemap, num_bytes);
3250 fiemap->fm_flags = fieinfo->fi_flags;
3251 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3252 fiemap->fm_start = start;
3253 fiemap->fm_length = len;
3254 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3255 sizeof(struct ll_fiemap_extent));
3257 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3259 fieinfo->fi_flags = fiemap->fm_flags;
3260 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3261 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3262 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3264 OBD_FREE_LARGE(fiemap, num_bytes);
3268 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3270 struct ll_inode_info *lli = ll_i2info(inode);
3271 struct posix_acl *acl = NULL;
3274 spin_lock(&lli->lli_lock);
3275 /* VFS' acl_permission_check->check_acl will release the refcount */
3276 acl = posix_acl_dup(lli->lli_posix_acl);
3277 spin_unlock(&lli->lli_lock);
3282 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3284 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3285 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3287 ll_check_acl(struct inode *inode, int mask)
3290 # ifdef CONFIG_FS_POSIX_ACL
3291 struct posix_acl *acl;
3295 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3296 if (flags & IPERM_FLAG_RCU)
3299 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3304 rc = posix_acl_permission(inode, acl, mask);
3305 posix_acl_release(acl);
3308 # else /* !CONFIG_FS_POSIX_ACL */
3310 # endif /* CONFIG_FS_POSIX_ACL */
3312 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3314 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3315 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3317 # ifdef HAVE_INODE_PERMISION_2ARGS
3318 int ll_inode_permission(struct inode *inode, int mask)
3320 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3327 #ifdef MAY_NOT_BLOCK
3328 if (mask & MAY_NOT_BLOCK)
3330 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3331 if (flags & IPERM_FLAG_RCU)
3335 /* as root inode are NOT getting validated in lookup operation,
3336 * need to do it before permission check. */
3338 if (inode == inode->i_sb->s_root->d_inode) {
3339 struct lookup_intent it = { .it_op = IT_LOOKUP };
3341 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3342 MDS_INODELOCK_LOOKUP);
3347 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3348 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
3350 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3351 return lustre_check_remote_perm(inode, mask);
3353 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3354 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3359 /* -o localflock - only provides locally consistent flock locks */
3360 struct file_operations ll_file_operations = {
3361 .read = ll_file_read,
3362 .aio_read = ll_file_aio_read,
3363 .write = ll_file_write,
3364 .aio_write = ll_file_aio_write,
3365 .unlocked_ioctl = ll_file_ioctl,
3366 .open = ll_file_open,
3367 .release = ll_file_release,
3368 .mmap = ll_file_mmap,
3369 .llseek = ll_file_seek,
3370 .splice_read = ll_file_splice_read,
3375 struct file_operations ll_file_operations_flock = {
3376 .read = ll_file_read,
3377 .aio_read = ll_file_aio_read,
3378 .write = ll_file_write,
3379 .aio_write = ll_file_aio_write,
3380 .unlocked_ioctl = ll_file_ioctl,
3381 .open = ll_file_open,
3382 .release = ll_file_release,
3383 .mmap = ll_file_mmap,
3384 .llseek = ll_file_seek,
3385 .splice_read = ll_file_splice_read,
3388 .flock = ll_file_flock,
3389 .lock = ll_file_flock
3392 /* These are for -o noflock - to return ENOSYS on flock calls */
3393 struct file_operations ll_file_operations_noflock = {
3394 .read = ll_file_read,
3395 .aio_read = ll_file_aio_read,
3396 .write = ll_file_write,
3397 .aio_write = ll_file_aio_write,
3398 .unlocked_ioctl = ll_file_ioctl,
3399 .open = ll_file_open,
3400 .release = ll_file_release,
3401 .mmap = ll_file_mmap,
3402 .llseek = ll_file_seek,
3403 .splice_read = ll_file_splice_read,
3406 .flock = ll_file_noflock,
3407 .lock = ll_file_noflock
3410 struct inode_operations ll_file_inode_operations = {
3411 .setattr = ll_setattr,
3412 .getattr = ll_getattr,
3413 .permission = ll_inode_permission,
3414 .setxattr = ll_setxattr,
3415 .getxattr = ll_getxattr,
3416 .listxattr = ll_listxattr,
3417 .removexattr = ll_removexattr,
3418 .fiemap = ll_fiemap,
3419 #ifdef HAVE_IOP_GET_ACL
3420 .get_acl = ll_get_acl,
3424 /* dynamic ioctl number support routins */
3425 static struct llioc_ctl_data {
3426 struct rw_semaphore ioc_sem;
3427 cfs_list_t ioc_head;
3429 __RWSEM_INITIALIZER(llioc.ioc_sem),
3430 CFS_LIST_HEAD_INIT(llioc.ioc_head)
3435 cfs_list_t iocd_list;
3436 unsigned int iocd_size;
3437 llioc_callback_t iocd_cb;
3438 unsigned int iocd_count;
3439 unsigned int iocd_cmd[0];
3442 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3445 struct llioc_data *in_data = NULL;
3448 if (cb == NULL || cmd == NULL ||
3449 count > LLIOC_MAX_CMD || count < 0)
3452 size = sizeof(*in_data) + count * sizeof(unsigned int);
3453 OBD_ALLOC(in_data, size);
3454 if (in_data == NULL)
3457 memset(in_data, 0, sizeof(*in_data));
3458 in_data->iocd_size = size;
3459 in_data->iocd_cb = cb;
3460 in_data->iocd_count = count;
3461 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3463 down_write(&llioc.ioc_sem);
3464 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3465 up_write(&llioc.ioc_sem);
3470 void ll_iocontrol_unregister(void *magic)
3472 struct llioc_data *tmp;
3477 down_write(&llioc.ioc_sem);
3478 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3480 unsigned int size = tmp->iocd_size;
3482 cfs_list_del(&tmp->iocd_list);
3483 up_write(&llioc.ioc_sem);
3485 OBD_FREE(tmp, size);
3489 up_write(&llioc.ioc_sem);
3491 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3494 EXPORT_SYMBOL(ll_iocontrol_register);
3495 EXPORT_SYMBOL(ll_iocontrol_unregister);
3497 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3498 unsigned int cmd, unsigned long arg, int *rcp)
3500 enum llioc_iter ret = LLIOC_CONT;
3501 struct llioc_data *data;
3502 int rc = -EINVAL, i;
3504 down_read(&llioc.ioc_sem);
3505 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3506 for (i = 0; i < data->iocd_count; i++) {
3507 if (cmd != data->iocd_cmd[i])
3510 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3514 if (ret == LLIOC_STOP)
3517 up_read(&llioc.ioc_sem);
3524 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3526 struct ll_inode_info *lli = ll_i2info(inode);
3527 struct cl_env_nest nest;
3532 if (lli->lli_clob == NULL)
3535 env = cl_env_nested_get(&nest);
3537 RETURN(PTR_ERR(env));
3539 result = cl_conf_set(env, lli->lli_clob, conf);
3540 cl_env_nested_put(&nest, env);
3542 if (conf->coc_opc == OBJECT_CONF_SET) {
3543 struct ldlm_lock *lock = conf->coc_lock;
3545 LASSERT(lock != NULL);
3546 LASSERT(ldlm_has_layout(lock));
3548 /* it can only be allowed to match after layout is
3549 * applied to inode otherwise false layout would be
3550 * seen. Applying layout shoud happen before dropping
3551 * the intent lock. */
3552 ldlm_lock_allow_match(lock);
3558 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3559 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3562 struct ll_sb_info *sbi = ll_i2sbi(inode);
3563 struct obd_capa *oc;
3564 struct ptlrpc_request *req;
3565 struct mdt_body *body;
3572 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3573 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
3574 lock->l_lvb_data, lock->l_lvb_len);
3576 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY))
3579 /* if layout lock was granted right away, the layout is returned
3580 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3581 * blocked and then granted via completion ast, we have to fetch
3582 * layout here. Please note that we can't use the LVB buffer in
3583 * completion AST because it doesn't have a large enough buffer */
3584 oc = ll_mdscapa_get(inode);
3585 rc = ll_get_max_mdsize(sbi, &lmmsize);
3587 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3588 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3594 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3595 if (body == NULL || body->eadatasize > lmmsize)
3596 GOTO(out, rc = -EPROTO);
3598 lmmsize = body->eadatasize;
3599 if (lmmsize == 0) /* empty layout */
3602 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3604 GOTO(out, rc = -EFAULT);
3606 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3607 if (lvbdata == NULL)
3608 GOTO(out, rc = -ENOMEM);
3610 memcpy(lvbdata, lmm, lmmsize);
3611 lock_res_and_lock(lock);
3612 if (lock->l_lvb_data != NULL)
3613 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3615 lock->l_lvb_data = lvbdata;
3616 lock->l_lvb_len = lmmsize;
3617 unlock_res_and_lock(lock);
3622 ptlrpc_req_finished(req);
3627 * Apply the layout to the inode. Layout lock is held and will be released
3630 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3631 struct inode *inode, __u32 *gen, bool reconf)
3633 struct ll_inode_info *lli = ll_i2info(inode);
3634 struct ll_sb_info *sbi = ll_i2sbi(inode);
3635 struct ldlm_lock *lock;
3636 struct lustre_md md = { NULL };
3637 struct cl_object_conf conf;
3640 bool wait_layout = false;
3643 LASSERT(lustre_handle_is_used(lockh));
3645 lock = ldlm_handle2lock(lockh);
3646 LASSERT(lock != NULL);
3647 LASSERT(ldlm_has_layout(lock));
3649 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3650 inode, PFID(&lli->lli_fid), reconf);
3652 /* in case this is a caching lock and reinstate with new inode */
3653 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3655 lock_res_and_lock(lock);
3656 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3657 unlock_res_and_lock(lock);
3658 /* checking lvb_ready is racy but this is okay. The worst case is
3659 * that multi processes may configure the file on the same time. */
3661 if (lvb_ready || !reconf) {
3664 /* layout_gen must be valid if layout lock is not
3665 * cancelled and stripe has already set */
3666 *gen = lli->lli_layout_gen;
3672 rc = ll_layout_fetch(inode, lock);
3676 /* for layout lock, lmm is returned in lock's lvb.
3677 * lvb_data is immutable if the lock is held so it's safe to access it
3678 * without res lock. See the description in ldlm_lock_decref_internal()
3679 * for the condition to free lvb_data of layout lock */
3680 if (lock->l_lvb_data != NULL) {
3681 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3682 lock->l_lvb_data, lock->l_lvb_len);
3684 *gen = LL_LAYOUT_GEN_EMPTY;
3686 *gen = md.lsm->lsm_layout_gen;
3689 CERROR("%s: file "DFID" unpackmd error: %d\n",
3690 ll_get_fsname(inode->i_sb, NULL, 0),
3691 PFID(&lli->lli_fid), rc);
3697 /* set layout to file. Unlikely this will fail as old layout was
3698 * surely eliminated */
3699 memset(&conf, 0, sizeof conf);
3700 conf.coc_opc = OBJECT_CONF_SET;
3701 conf.coc_inode = inode;
3702 conf.coc_lock = lock;
3703 conf.u.coc_md = &md;
3704 rc = ll_layout_conf(inode, &conf);
3707 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3709 /* refresh layout failed, need to wait */
3710 wait_layout = rc == -EBUSY;
3714 LDLM_LOCK_PUT(lock);
3715 ldlm_lock_decref(lockh, mode);
3717 /* wait for IO to complete if it's still being used. */
3719 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3720 ll_get_fsname(inode->i_sb, NULL, 0),
3721 inode, PFID(&lli->lli_fid));
3723 memset(&conf, 0, sizeof conf);
3724 conf.coc_opc = OBJECT_CONF_WAIT;
3725 conf.coc_inode = inode;
3726 rc = ll_layout_conf(inode, &conf);
3730 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3731 PFID(&lli->lli_fid), rc);
3737 * This function checks if there exists a LAYOUT lock on the client side,
3738 * or enqueues it if it doesn't have one in cache.
3740 * This function will not hold layout lock so it may be revoked any time after
3741 * this function returns. Any operations depend on layout should be redone
3744 * This function should be called before lov_io_init() to get an uptodate
3745 * layout version, the caller should save the version number and after IO
3746 * is finished, this function should be called again to verify that layout
3747 * is not changed during IO time.
3749 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3751 struct ll_inode_info *lli = ll_i2info(inode);
3752 struct ll_sb_info *sbi = ll_i2sbi(inode);
3753 struct md_op_data *op_data;
3754 struct lookup_intent it;
3755 struct lustre_handle lockh;
3757 struct ldlm_enqueue_info einfo = {
3758 .ei_type = LDLM_IBITS,
3760 .ei_cb_bl = ll_md_blocking_ast,
3761 .ei_cb_cp = ldlm_completion_ast,
3766 *gen = lli->lli_layout_gen;
3767 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3771 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3772 LASSERT(S_ISREG(inode->i_mode));
3774 /* mostly layout lock is caching on the local side, so try to match
3775 * it before grabbing layout lock mutex. */
3776 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3777 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3778 if (mode != 0) { /* hit cached lock */
3779 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3783 /* better hold lli_layout_mutex to try again otherwise
3784 * it will have starvation problem. */
3787 /* take layout lock mutex to enqueue layout lock exclusively. */
3788 mutex_lock(&lli->lli_layout_mutex);
3791 /* try again. Maybe somebody else has done this. */
3792 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3793 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3794 if (mode != 0) { /* hit cached lock */
3795 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3799 mutex_unlock(&lli->lli_layout_mutex);
3803 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3804 0, 0, LUSTRE_OPC_ANY, NULL);
3805 if (IS_ERR(op_data)) {
3806 mutex_unlock(&lli->lli_layout_mutex);
3807 RETURN(PTR_ERR(op_data));
3810 /* have to enqueue one */
3811 memset(&it, 0, sizeof(it));
3812 it.it_op = IT_LAYOUT;
3813 lockh.cookie = 0ULL;
3815 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3816 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3817 PFID(&lli->lli_fid));
3819 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3821 if (it.d.lustre.it_data != NULL)
3822 ptlrpc_req_finished(it.d.lustre.it_data);
3823 it.d.lustre.it_data = NULL;
3825 ll_finish_md_op_data(op_data);
3827 mode = it.d.lustre.it_lock_mode;
3828 it.d.lustre.it_lock_mode = 0;
3829 ll_intent_drop_lock(&it);
3832 /* set lock data in case this is a new lock */
3833 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3834 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3838 mutex_unlock(&lli->lli_layout_mutex);
3844 * This function send a restore request to the MDT
3846 int ll_layout_restore(struct inode *inode)
3848 struct hsm_user_request *hur;
3852 len = sizeof(struct hsm_user_request) +
3853 sizeof(struct hsm_user_item);
3854 OBD_ALLOC(hur, len);
3858 hur->hur_request.hr_action = HUA_RESTORE;
3859 hur->hur_request.hr_archive_id = 0;
3860 hur->hur_request.hr_flags = 0;
3861 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3862 sizeof(hur->hur_user_item[0].hui_fid));
3863 hur->hur_user_item[0].hui_extent.length = -1;
3864 hur->hur_request.hr_itemcount = 1;
3865 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,