4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
61 fd->fd_write_failed = false;
66 static void ll_file_data_put(struct ll_file_data *fd)
69 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
72 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
73 struct lustre_handle *fh)
75 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
76 op_data->op_attr.ia_mode = inode->i_mode;
77 op_data->op_attr.ia_atime = inode->i_atime;
78 op_data->op_attr.ia_mtime = inode->i_mtime;
79 op_data->op_attr.ia_ctime = inode->i_ctime;
80 op_data->op_attr.ia_size = i_size_read(inode);
81 op_data->op_attr_blocks = inode->i_blocks;
82 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
83 ll_inode_to_ext_flags(inode->i_flags);
84 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
86 op_data->op_handle = *fh;
87 op_data->op_capa1 = ll_mdscapa_get(inode);
89 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
90 op_data->op_bias |= MDS_DATA_MODIFIED;
94 * Closes the IO epoch and packs all the attributes into @op_data for
97 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
98 struct obd_client_handle *och)
102 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
103 ATTR_MTIME | ATTR_MTIME_SET |
104 ATTR_CTIME | ATTR_CTIME_SET;
106 if (!(och->och_flags & FMODE_WRITE))
109 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
110 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
112 ll_ioepoch_close(inode, op_data, &och, 0);
115 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
116 ll_prep_md_op_data(op_data, inode, NULL, NULL,
117 0, 0, LUSTRE_OPC_ANY, NULL);
121 static int ll_close_inode_openhandle(struct obd_export *md_exp,
123 struct obd_client_handle *och,
124 const __u64 *data_version)
126 struct obd_export *exp = ll_i2mdexp(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
129 struct obd_device *obd = class_exp2obd(exp);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle "LPX64"\n",
140 ll_i2mdexp(inode)->exp_handle.h_cookie);
144 OBD_ALLOC_PTR(op_data);
146 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
148 ll_prepare_close(inode, op_data, och);
149 if (data_version != NULL) {
150 /* Pass in data_version implies release. */
151 op_data->op_bias |= MDS_HSM_RELEASE;
152 op_data->op_data_version = *data_version;
153 op_data->op_lease_handle = och->och_lease_handle;
154 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
156 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
157 rc = md_close(md_exp, op_data, och->och_mod, &req);
159 /* This close must have the epoch closed. */
160 LASSERT(epoch_close);
161 /* MDS has instructed us to obtain Size-on-MDS attribute from
162 * OSTs and send setattr to back to MDS. */
163 rc = ll_som_update(inode, op_data);
165 CERROR("inode %lu mdc Size-on-MDS update failed: "
166 "rc = %d\n", inode->i_ino, rc);
170 CERROR("inode %lu mdc close failed: rc = %d\n",
174 /* DATA_MODIFIED flag was successfully sent on close, cancel data
175 * modification flag. */
176 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
177 struct ll_inode_info *lli = ll_i2info(inode);
179 spin_lock(&lli->lli_lock);
180 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
181 spin_unlock(&lli->lli_lock);
185 rc = ll_objects_destroy(req, inode);
187 CERROR("inode %lu ll_objects destroy: rc = %d\n",
191 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
192 struct mdt_body *body;
193 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
194 if (!(body->valid & OBD_MD_FLRELEASED))
198 ll_finish_md_op_data(op_data);
202 if (exp_connect_som(exp) && !epoch_close &&
203 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
204 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
206 md_clear_open_replay_data(md_exp, och);
207 /* Free @och if it is not waiting for DONE_WRITING. */
208 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
211 if (req) /* This is close request */
212 ptlrpc_req_finished(req);
216 int ll_md_real_close(struct inode *inode, int flags)
218 struct ll_inode_info *lli = ll_i2info(inode);
219 struct obd_client_handle **och_p;
220 struct obd_client_handle *och;
225 if (flags & FMODE_WRITE) {
226 och_p = &lli->lli_mds_write_och;
227 och_usecount = &lli->lli_open_fd_write_count;
228 } else if (flags & FMODE_EXEC) {
229 och_p = &lli->lli_mds_exec_och;
230 och_usecount = &lli->lli_open_fd_exec_count;
232 LASSERT(flags & FMODE_READ);
233 och_p = &lli->lli_mds_read_och;
234 och_usecount = &lli->lli_open_fd_read_count;
237 mutex_lock(&lli->lli_och_mutex);
238 if (*och_usecount) { /* There are still users of this handle, so
240 mutex_unlock(&lli->lli_och_mutex);
245 mutex_unlock(&lli->lli_och_mutex);
247 if (och) { /* There might be a race and somebody have freed this och
249 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
256 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
259 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
260 struct ll_inode_info *lli = ll_i2info(inode);
264 /* clear group lock, if present */
265 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
266 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
268 if (fd->fd_lease_och != NULL) {
271 /* Usually the lease is not released when the
272 * application crashed, we need to release here. */
273 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
274 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
275 PFID(&lli->lli_fid), rc, lease_broken);
277 fd->fd_lease_och = NULL;
280 if (fd->fd_och != NULL) {
281 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
286 /* Let's see if we have good enough OPEN lock on the file and if
287 we can skip talking to MDS */
288 if (file->f_dentry->d_inode) { /* Can this ever be false? */
290 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
291 struct lustre_handle lockh;
292 struct inode *inode = file->f_dentry->d_inode;
293 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
295 mutex_lock(&lli->lli_och_mutex);
296 if (fd->fd_omode & FMODE_WRITE) {
298 LASSERT(lli->lli_open_fd_write_count);
299 lli->lli_open_fd_write_count--;
300 } else if (fd->fd_omode & FMODE_EXEC) {
302 LASSERT(lli->lli_open_fd_exec_count);
303 lli->lli_open_fd_exec_count--;
306 LASSERT(lli->lli_open_fd_read_count);
307 lli->lli_open_fd_read_count--;
309 mutex_unlock(&lli->lli_och_mutex);
311 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
312 LDLM_IBITS, &policy, lockmode,
314 rc = ll_md_real_close(file->f_dentry->d_inode,
318 CERROR("Releasing a file %p with negative dentry %p. Name %s",
319 file, file->f_dentry, file->f_dentry->d_name.name);
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
325 ll_capa_close(inode);
330 /* While this returns an error code, fput() the caller does not, so we need
331 * to make every effort to clean up all of our state here. Also, applications
332 * rarely check close errors and even if an error is returned they will not
333 * re-try the close call.
335 int ll_file_release(struct inode *inode, struct file *file)
337 struct ll_file_data *fd;
338 struct ll_sb_info *sbi = ll_i2sbi(inode);
339 struct ll_inode_info *lli = ll_i2info(inode);
343 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
344 inode->i_generation, inode);
346 #ifdef CONFIG_FS_POSIX_ACL
347 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
348 inode == inode->i_sb->s_root->d_inode) {
349 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
352 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
353 fd->fd_flags &= ~LL_FILE_RMTACL;
354 rct_del(&sbi->ll_rct, cfs_curproc_pid());
355 et_search_free(&sbi->ll_et, cfs_curproc_pid());
360 if (inode->i_sb->s_root != file->f_dentry)
361 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
362 fd = LUSTRE_FPRIVATE(file);
365 /* The last ref on @file, maybe not the the owner pid of statahead.
366 * Different processes can open the same dir, "ll_opendir_key" means:
367 * it is me that should stop the statahead thread. */
368 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
369 lli->lli_opendir_pid != 0)
370 ll_stop_statahead(inode, lli->lli_opendir_key);
372 if (inode->i_sb->s_root == file->f_dentry) {
373 LUSTRE_FPRIVATE(file) = NULL;
374 ll_file_data_put(fd);
378 if (!S_ISDIR(inode->i_mode)) {
379 lov_read_and_clear_async_rc(lli->lli_clob);
380 lli->lli_async_rc = 0;
383 rc = ll_md_close(sbi->ll_md_exp, inode, file);
385 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
386 libcfs_debug_dumplog();
391 static int ll_intent_file_open(struct file *file, void *lmm,
392 int lmmsize, struct lookup_intent *itp)
394 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
395 struct dentry *parent = file->f_dentry->d_parent;
396 struct md_op_data *op_data;
397 struct ptlrpc_request *req;
398 __u32 opc = LUSTRE_OPC_ANY;
405 /* Usually we come here only for NFSD, and we want open lock.
406 But we can also get here with pre 2.6.15 patchless kernels, and in
407 that case that lock is also ok */
408 /* We can also get here if there was cached open handle in revalidate_it
409 * but it disappeared while we were getting from there to ll_file_open.
410 * But this means this file was closed and immediatelly opened which
411 * makes a good candidate for using OPEN lock */
412 /* If lmmsize & lmm are not 0, we are just setting stripe info
413 * parameters. No need for the open lock */
414 if (lmm == NULL && lmmsize == 0) {
415 itp->it_flags |= MDS_OPEN_LOCK;
416 if (itp->it_flags & FMODE_WRITE)
417 opc = LUSTRE_OPC_CREATE;
420 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
421 file->f_dentry->d_inode, NULL, 0,
425 RETURN(PTR_ERR(op_data));
427 itp->it_flags |= MDS_OPEN_BY_FID;
428 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
429 0 /*unused */, &req, ll_md_blocking_ast, 0);
430 ll_finish_md_op_data(op_data);
432 /* reason for keep own exit path - don`t flood log
433 * with messages with -ESTALE errors.
435 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
436 it_open_error(DISP_OPEN_OPEN, itp))
438 ll_release_openhandle(file->f_dentry, itp);
442 if (it_disposition(itp, DISP_LOOKUP_NEG))
443 GOTO(out, rc = -ENOENT);
445 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
446 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
447 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
451 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
452 if (!rc && itp->d.lustre.it_lock_mode)
453 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
457 ptlrpc_req_finished(itp->d.lustre.it_data);
458 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
459 ll_intent_drop_lock(itp);
465 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
466 * not believe attributes if a few ioepoch holders exist. Attributes for
467 * previous ioepoch if new one is opened are also skipped by MDS.
469 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
471 if (ioepoch && lli->lli_ioepoch != ioepoch) {
472 lli->lli_ioepoch = ioepoch;
473 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
474 ioepoch, PFID(&lli->lli_fid));
478 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
479 struct obd_client_handle *och)
481 struct ptlrpc_request *req = it->d.lustre.it_data;
482 struct mdt_body *body;
484 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
485 och->och_fh = body->handle;
486 och->och_fid = body->fid1;
487 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
488 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
489 och->och_flags = it->it_flags;
491 return md_set_open_replay_data(md_exp, och, req);
494 int ll_local_open(struct file *file, struct lookup_intent *it,
495 struct ll_file_data *fd, struct obd_client_handle *och)
497 struct inode *inode = file->f_dentry->d_inode;
498 struct ll_inode_info *lli = ll_i2info(inode);
501 LASSERT(!LUSTRE_FPRIVATE(file));
506 struct ptlrpc_request *req = it->d.lustre.it_data;
507 struct mdt_body *body;
510 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
514 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
515 ll_ioepoch_open(lli, body->ioepoch);
518 LUSTRE_FPRIVATE(file) = fd;
519 ll_readahead_init(inode, &fd->fd_ras);
520 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
525 /* Open a file, and (for the very first open) create objects on the OSTs at
526 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
527 * creation or open until ll_lov_setstripe() ioctl is called.
529 * If we already have the stripe MD locally then we don't request it in
530 * md_open(), by passing a lmm_size = 0.
532 * It is up to the application to ensure no other processes open this file
533 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
534 * used. We might be able to avoid races of that sort by getting lli_open_sem
535 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
536 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
538 int ll_file_open(struct inode *inode, struct file *file)
540 struct ll_inode_info *lli = ll_i2info(inode);
541 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
542 .it_flags = file->f_flags };
543 struct obd_client_handle **och_p = NULL;
544 __u64 *och_usecount = NULL;
545 struct ll_file_data *fd;
546 int rc = 0, opendir_set = 0;
549 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
550 inode->i_generation, inode, file->f_flags);
552 it = file->private_data; /* XXX: compat macro */
553 file->private_data = NULL; /* prevent ll_local_open assertion */
555 fd = ll_file_data_get();
557 GOTO(out_openerr, rc = -ENOMEM);
560 if (S_ISDIR(inode->i_mode)) {
561 spin_lock(&lli->lli_sa_lock);
562 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
563 lli->lli_opendir_pid == 0) {
564 lli->lli_opendir_key = fd;
565 lli->lli_opendir_pid = cfs_curproc_pid();
568 spin_unlock(&lli->lli_sa_lock);
571 if (inode->i_sb->s_root == file->f_dentry) {
572 LUSTRE_FPRIVATE(file) = fd;
576 if (!it || !it->d.lustre.it_disposition) {
577 /* Convert f_flags into access mode. We cannot use file->f_mode,
578 * because everything but O_ACCMODE mask was stripped from
580 if ((oit.it_flags + 1) & O_ACCMODE)
582 if (file->f_flags & O_TRUNC)
583 oit.it_flags |= FMODE_WRITE;
585 /* kernel only call f_op->open in dentry_open. filp_open calls
586 * dentry_open after call to open_namei that checks permissions.
587 * Only nfsd_open call dentry_open directly without checking
588 * permissions and because of that this code below is safe. */
589 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
590 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
592 /* We do not want O_EXCL here, presumably we opened the file
593 * already? XXX - NFS implications? */
594 oit.it_flags &= ~O_EXCL;
596 /* bug20584, if "it_flags" contains O_CREAT, the file will be
597 * created if necessary, then "IT_CREAT" should be set to keep
598 * consistent with it */
599 if (oit.it_flags & O_CREAT)
600 oit.it_op |= IT_CREAT;
606 /* Let's see if we have file open on MDS already. */
607 if (it->it_flags & FMODE_WRITE) {
608 och_p = &lli->lli_mds_write_och;
609 och_usecount = &lli->lli_open_fd_write_count;
610 } else if (it->it_flags & FMODE_EXEC) {
611 och_p = &lli->lli_mds_exec_och;
612 och_usecount = &lli->lli_open_fd_exec_count;
614 och_p = &lli->lli_mds_read_och;
615 och_usecount = &lli->lli_open_fd_read_count;
618 mutex_lock(&lli->lli_och_mutex);
619 if (*och_p) { /* Open handle is present */
620 if (it_disposition(it, DISP_OPEN_OPEN)) {
621 /* Well, there's extra open request that we do not need,
622 let's close it somehow. This will decref request. */
623 rc = it_open_error(DISP_OPEN_OPEN, it);
625 mutex_unlock(&lli->lli_och_mutex);
626 GOTO(out_openerr, rc);
629 ll_release_openhandle(file->f_dentry, it);
633 rc = ll_local_open(file, it, fd, NULL);
636 mutex_unlock(&lli->lli_och_mutex);
637 GOTO(out_openerr, rc);
640 LASSERT(*och_usecount == 0);
641 if (!it->d.lustre.it_disposition) {
642 /* We cannot just request lock handle now, new ELC code
643 means that one of other OPEN locks for this file
644 could be cancelled, and since blocking ast handler
645 would attempt to grab och_mutex as well, that would
646 result in a deadlock */
647 mutex_unlock(&lli->lli_och_mutex);
648 it->it_create_mode |= M_CHECK_STALE;
649 rc = ll_intent_file_open(file, NULL, 0, it);
650 it->it_create_mode &= ~M_CHECK_STALE;
652 GOTO(out_openerr, rc);
656 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
658 GOTO(out_och_free, rc = -ENOMEM);
662 /* md_intent_lock() didn't get a request ref if there was an
663 * open error, so don't do cleanup on the request here
665 /* XXX (green): Should not we bail out on any error here, not
666 * just open error? */
667 rc = it_open_error(DISP_OPEN_OPEN, it);
669 GOTO(out_och_free, rc);
671 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
673 rc = ll_local_open(file, it, fd, *och_p);
675 GOTO(out_och_free, rc);
677 mutex_unlock(&lli->lli_och_mutex);
680 /* Must do this outside lli_och_mutex lock to prevent deadlock where
681 different kind of OPEN lock for this same inode gets cancelled
682 by ldlm_cancel_lru */
683 if (!S_ISREG(inode->i_mode))
684 GOTO(out_och_free, rc);
688 if (!lli->lli_has_smd) {
689 if (file->f_flags & O_LOV_DELAY_CREATE ||
690 !(file->f_mode & FMODE_WRITE)) {
691 CDEBUG(D_INODE, "object creation was delayed\n");
692 GOTO(out_och_free, rc);
695 file->f_flags &= ~O_LOV_DELAY_CREATE;
696 GOTO(out_och_free, rc);
700 if (och_p && *och_p) {
701 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
702 *och_p = NULL; /* OBD_FREE writes some magic there */
705 mutex_unlock(&lli->lli_och_mutex);
708 if (opendir_set != 0)
709 ll_stop_statahead(inode, lli->lli_opendir_key);
711 ll_file_data_put(fd);
713 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
716 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
717 ptlrpc_req_finished(it->d.lustre.it_data);
718 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
724 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
725 struct ldlm_lock_desc *desc, void *data, int flag)
728 struct lustre_handle lockh;
732 case LDLM_CB_BLOCKING:
733 ldlm_lock2handle(lock, &lockh);
734 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
736 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
740 case LDLM_CB_CANCELING:
748 * Acquire a lease and open the file.
750 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
751 fmode_t fmode, __u64 open_flags)
753 struct lookup_intent it = { .it_op = IT_OPEN };
754 struct ll_sb_info *sbi = ll_i2sbi(inode);
755 struct md_op_data *op_data;
756 struct ptlrpc_request *req;
757 struct lustre_handle old_handle = { 0 };
758 struct obd_client_handle *och = NULL;
763 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
764 RETURN(ERR_PTR(-EINVAL));
767 struct ll_inode_info *lli = ll_i2info(inode);
768 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
769 struct obd_client_handle **och_p;
772 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
773 RETURN(ERR_PTR(-EPERM));
775 /* Get the openhandle of the file */
777 mutex_lock(&lli->lli_och_mutex);
778 if (fd->fd_lease_och != NULL) {
779 mutex_unlock(&lli->lli_och_mutex);
783 if (fd->fd_och == NULL) {
784 if (file->f_mode & FMODE_WRITE) {
785 LASSERT(lli->lli_mds_write_och != NULL);
786 och_p = &lli->lli_mds_write_och;
787 och_usecount = &lli->lli_open_fd_write_count;
789 LASSERT(lli->lli_mds_read_och != NULL);
790 och_p = &lli->lli_mds_read_och;
791 och_usecount = &lli->lli_open_fd_read_count;
793 if (*och_usecount == 1) {
800 mutex_unlock(&lli->lli_och_mutex);
801 if (rc < 0) /* more than 1 opener */
804 LASSERT(fd->fd_och != NULL);
805 old_handle = fd->fd_och->och_fh;
810 RETURN(ERR_PTR(-ENOMEM));
812 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
813 LUSTRE_OPC_ANY, NULL);
815 GOTO(out, rc = PTR_ERR(op_data));
817 /* To tell the MDT this openhandle is from the same owner */
818 op_data->op_handle = old_handle;
820 it.it_flags = fmode | open_flags;
821 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
822 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
823 ll_md_blocking_lease_ast,
824 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
825 * it can be cancelled which may mislead applications that the lease is
827 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
828 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
829 * doesn't deal with openhandle, so normal openhandle will be leaked. */
830 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
831 ll_finish_md_op_data(op_data);
833 ptlrpc_req_finished(req);
834 it_clear_disposition(&it, DISP_ENQ_COMPLETE);
837 GOTO(out_release_it, rc);
839 if (it_disposition(&it, DISP_LOOKUP_NEG))
840 GOTO(out_release_it, rc = -ENOENT);
842 rc = it_open_error(DISP_OPEN_OPEN, &it);
844 GOTO(out_release_it, rc);
846 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
847 ll_och_fill(sbi->ll_md_exp, &it, och);
849 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
850 GOTO(out_close, rc = -EOPNOTSUPP);
852 /* already get lease, handle lease lock */
853 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
854 if (it.d.lustre.it_lock_mode == 0 ||
855 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
856 /* open lock must return for lease */
857 CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
858 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
859 it.d.lustre.it_lock_bits);
860 GOTO(out_close, rc = -EPROTO);
863 ll_intent_release(&it);
867 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
869 CERROR("Close openhandle returned %d\n", rc2);
871 /* cancel open lock */
872 if (it.d.lustre.it_lock_mode != 0) {
873 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
874 it.d.lustre.it_lock_mode);
875 it.d.lustre.it_lock_mode = 0;
878 ll_intent_release(&it);
883 EXPORT_SYMBOL(ll_lease_open);
886 * Release lease and close the file.
887 * It will check if the lease has ever broken.
889 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
892 struct ldlm_lock *lock;
893 bool cancelled = true;
897 lock = ldlm_handle2lock(&och->och_lease_handle);
899 lock_res_and_lock(lock);
900 cancelled = ldlm_is_cancel(lock);
901 unlock_res_and_lock(lock);
905 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
906 PFID(&ll_i2info(inode)->lli_fid), cancelled);
909 ldlm_cli_cancel(&och->och_lease_handle, 0);
910 if (lease_broken != NULL)
911 *lease_broken = cancelled;
913 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
917 EXPORT_SYMBOL(ll_lease_close);
919 /* Fills the obdo with the attributes for the lsm */
920 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
921 struct obd_capa *capa, struct obdo *obdo,
922 __u64 ioepoch, int sync)
924 struct ptlrpc_request_set *set;
925 struct obd_info oinfo = { { { 0 } } };
930 LASSERT(lsm != NULL);
934 oinfo.oi_oa->o_oi = lsm->lsm_oi;
935 oinfo.oi_oa->o_mode = S_IFREG;
936 oinfo.oi_oa->o_ioepoch = ioepoch;
937 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
938 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
939 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
940 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
941 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
942 OBD_MD_FLDATAVERSION;
943 oinfo.oi_capa = capa;
945 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
946 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
949 set = ptlrpc_prep_set();
951 CERROR("can't allocate ptlrpc set\n");
954 rc = obd_getattr_async(exp, &oinfo, set);
956 rc = ptlrpc_set_wait(set);
957 ptlrpc_set_destroy(set);
960 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
961 OBD_MD_FLATIME | OBD_MD_FLMTIME |
962 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
963 OBD_MD_FLDATAVERSION);
968 * Performs the getattr on the inode and updates its fields.
969 * If @sync != 0, perform the getattr under the server-side lock.
971 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
972 __u64 ioepoch, int sync)
974 struct obd_capa *capa = ll_mdscapa_get(inode);
975 struct lov_stripe_md *lsm;
979 lsm = ccc_inode_lsm_get(inode);
980 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
981 capa, obdo, ioepoch, sync);
984 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
986 obdo_refresh_inode(inode, obdo, obdo->o_valid);
987 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
988 " blksize %lu\n", POSTID(oi), i_size_read(inode),
989 (unsigned long long)inode->i_blocks,
990 (unsigned long)ll_inode_blksize(inode));
992 ccc_inode_lsm_put(inode, lsm);
996 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
998 struct ll_inode_info *lli = ll_i2info(inode);
999 struct cl_object *obj = lli->lli_clob;
1000 struct cl_attr *attr = ccc_env_thread_attr(env);
1006 ll_inode_size_lock(inode);
1007 /* merge timestamps the most recently obtained from mds with
1008 timestamps obtained from osts */
1009 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1010 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1011 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1012 inode_init_lvb(inode, &lvb);
1014 cl_object_attr_lock(obj);
1015 rc = cl_object_attr_get(env, obj, attr);
1016 cl_object_attr_unlock(obj);
1019 if (lvb.lvb_atime < attr->cat_atime)
1020 lvb.lvb_atime = attr->cat_atime;
1021 if (lvb.lvb_ctime < attr->cat_ctime)
1022 lvb.lvb_ctime = attr->cat_ctime;
1023 if (lvb.lvb_mtime < attr->cat_mtime)
1024 lvb.lvb_mtime = attr->cat_mtime;
1026 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1027 PFID(&lli->lli_fid), attr->cat_size);
1028 cl_isize_write_nolock(inode, attr->cat_size);
1030 inode->i_blocks = attr->cat_blocks;
1032 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1033 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1034 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1036 ll_inode_size_unlock(inode);
1041 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1044 struct obdo obdo = { 0 };
1047 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1049 st->st_size = obdo.o_size;
1050 st->st_blocks = obdo.o_blocks;
1051 st->st_mtime = obdo.o_mtime;
1052 st->st_atime = obdo.o_atime;
1053 st->st_ctime = obdo.o_ctime;
1058 static bool file_is_noatime(const struct file *file)
1060 const struct vfsmount *mnt = file->f_path.mnt;
1061 const struct inode *inode = file->f_path.dentry->d_inode;
1063 /* Adapted from file_accessed() and touch_atime().*/
1064 if (file->f_flags & O_NOATIME)
1067 if (inode->i_flags & S_NOATIME)
1070 if (IS_NOATIME(inode))
1073 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1076 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1079 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1085 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1087 struct inode *inode = file->f_dentry->d_inode;
1089 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1091 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1092 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1093 file->f_flags & O_DIRECT ||
1096 io->ci_obj = ll_i2info(inode)->lli_clob;
1097 io->ci_lockreq = CILR_MAYBE;
1098 if (ll_file_nolock(file)) {
1099 io->ci_lockreq = CILR_NEVER;
1100 io->ci_no_srvlock = 1;
1101 } else if (file->f_flags & O_APPEND) {
1102 io->ci_lockreq = CILR_MANDATORY;
1105 io->ci_noatime = file_is_noatime(file);
1109 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1110 struct file *file, enum cl_io_type iot,
1111 loff_t *ppos, size_t count)
1113 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1114 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1120 io = ccc_env_thread_io(env);
1121 ll_io_init(io, file, iot == CIT_WRITE);
1123 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1124 struct vvp_io *vio = vvp_env_io(env);
1125 struct ccc_io *cio = ccc_env_io(env);
1126 int write_mutex_locked = 0;
1128 cio->cui_fd = LUSTRE_FPRIVATE(file);
1129 vio->cui_io_subtype = args->via_io_subtype;
1131 switch (vio->cui_io_subtype) {
1133 cio->cui_iov = args->u.normal.via_iov;
1134 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1135 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1136 cio->cui_iocb = args->u.normal.via_iocb;
1137 if ((iot == CIT_WRITE) &&
1138 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1139 if (mutex_lock_interruptible(&lli->
1141 GOTO(out, result = -ERESTARTSYS);
1142 write_mutex_locked = 1;
1143 } else if (iot == CIT_READ) {
1144 down_read(&lli->lli_trunc_sem);
1148 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1149 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1152 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1153 vio->u.splice.cui_flags = args->u.splice.via_flags;
1156 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1159 result = cl_io_loop(env, io);
1160 if (write_mutex_locked)
1161 mutex_unlock(&lli->lli_write_mutex);
1162 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1163 up_read(&lli->lli_trunc_sem);
1165 /* cl_io_rw_init() handled IO */
1166 result = io->ci_result;
1169 if (io->ci_nob > 0) {
1170 result = io->ci_nob;
1171 *ppos = io->u.ci_wr.wr.crw_pos;
1175 cl_io_fini(env, io);
1176 /* If any bit been read/written (result != 0), we just return
1177 * short read/write instead of restart io. */
1178 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1179 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1180 iot == CIT_READ ? "read" : "write",
1181 file->f_dentry->d_name.name, *ppos, count);
1182 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1186 if (iot == CIT_READ) {
1188 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1189 LPROC_LL_READ_BYTES, result);
1190 } else if (iot == CIT_WRITE) {
1192 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1193 LPROC_LL_WRITE_BYTES, result);
1194 fd->fd_write_failed = false;
1195 } else if (result != -ERESTARTSYS) {
1196 fd->fd_write_failed = true;
1205 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1207 static int ll_file_get_iov_count(const struct iovec *iov,
1208 unsigned long *nr_segs, size_t *count)
1213 for (seg = 0; seg < *nr_segs; seg++) {
1214 const struct iovec *iv = &iov[seg];
1217 * If any segment has a negative length, or the cumulative
1218 * length ever wraps negative then return -EINVAL.
1221 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1223 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1228 cnt -= iv->iov_len; /* This segment is no good */
1235 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1236 unsigned long nr_segs, loff_t pos)
1239 struct vvp_io_args *args;
1245 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1249 env = cl_env_get(&refcheck);
1251 RETURN(PTR_ERR(env));
1253 args = vvp_env_args(env, IO_NORMAL);
1254 args->u.normal.via_iov = (struct iovec *)iov;
1255 args->u.normal.via_nrsegs = nr_segs;
1256 args->u.normal.via_iocb = iocb;
1258 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1259 &iocb->ki_pos, count);
1260 cl_env_put(env, &refcheck);
1264 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1268 struct iovec *local_iov;
1269 struct kiocb *kiocb;
1274 env = cl_env_get(&refcheck);
1276 RETURN(PTR_ERR(env));
1278 local_iov = &vvp_env_info(env)->vti_local_iov;
1279 kiocb = &vvp_env_info(env)->vti_kiocb;
1280 local_iov->iov_base = (void __user *)buf;
1281 local_iov->iov_len = count;
1282 init_sync_kiocb(kiocb, file);
1283 kiocb->ki_pos = *ppos;
1284 kiocb->ki_left = count;
1286 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1287 *ppos = kiocb->ki_pos;
1289 cl_env_put(env, &refcheck);
1294 * Write to a file (through the page cache).
1297 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1298 unsigned long nr_segs, loff_t pos)
1301 struct vvp_io_args *args;
1307 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1311 env = cl_env_get(&refcheck);
1313 RETURN(PTR_ERR(env));
1315 args = vvp_env_args(env, IO_NORMAL);
1316 args->u.normal.via_iov = (struct iovec *)iov;
1317 args->u.normal.via_nrsegs = nr_segs;
1318 args->u.normal.via_iocb = iocb;
1320 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1321 &iocb->ki_pos, count);
1322 cl_env_put(env, &refcheck);
1326 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1330 struct iovec *local_iov;
1331 struct kiocb *kiocb;
1336 env = cl_env_get(&refcheck);
1338 RETURN(PTR_ERR(env));
1340 local_iov = &vvp_env_info(env)->vti_local_iov;
1341 kiocb = &vvp_env_info(env)->vti_kiocb;
1342 local_iov->iov_base = (void __user *)buf;
1343 local_iov->iov_len = count;
1344 init_sync_kiocb(kiocb, file);
1345 kiocb->ki_pos = *ppos;
1346 kiocb->ki_left = count;
1348 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1349 *ppos = kiocb->ki_pos;
1351 cl_env_put(env, &refcheck);
1356 * Send file content (through pagecache) somewhere with helper
1358 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1359 struct pipe_inode_info *pipe, size_t count,
1363 struct vvp_io_args *args;
1368 env = cl_env_get(&refcheck);
1370 RETURN(PTR_ERR(env));
1372 args = vvp_env_args(env, IO_SPLICE);
1373 args->u.splice.via_pipe = pipe;
1374 args->u.splice.via_flags = flags;
1376 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1377 cl_env_put(env, &refcheck);
1381 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1384 struct obd_export *exp = ll_i2dtexp(inode);
1385 struct obd_trans_info oti = { 0 };
1386 struct obdo *oa = NULL;
1389 struct lov_stripe_md *lsm = NULL, *lsm2;
1396 lsm = ccc_inode_lsm_get(inode);
1397 if (!lsm_has_objects(lsm))
1398 GOTO(out, rc = -ENOENT);
1400 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1401 (lsm->lsm_stripe_count));
1403 OBD_ALLOC_LARGE(lsm2, lsm_size);
1405 GOTO(out, rc = -ENOMEM);
1408 oa->o_nlink = ost_idx;
1409 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1410 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1411 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1412 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1413 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1414 memcpy(lsm2, lsm, lsm_size);
1415 ll_inode_size_lock(inode);
1416 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1417 ll_inode_size_unlock(inode);
1419 OBD_FREE_LARGE(lsm2, lsm_size);
1422 ccc_inode_lsm_put(inode, lsm);
1427 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1429 struct ll_recreate_obj ucreat;
1433 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1436 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1440 ostid_set_seq_mdt0(&oi);
1441 ostid_set_id(&oi, ucreat.lrc_id);
1442 RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
1445 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1452 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1455 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1458 fid_to_ostid(&fid, &oi);
1459 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1460 RETURN(ll_lov_recreate(inode, &oi, ost_idx));
1463 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1464 __u64 flags, struct lov_user_md *lum,
1467 struct lov_stripe_md *lsm = NULL;
1468 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1472 lsm = ccc_inode_lsm_get(inode);
1474 ccc_inode_lsm_put(inode, lsm);
1475 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1480 ll_inode_size_lock(inode);
1481 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1484 rc = oit.d.lustre.it_status;
1486 GOTO(out_req_free, rc);
1488 ll_release_openhandle(file->f_dentry, &oit);
1491 ll_inode_size_unlock(inode);
1492 ll_intent_release(&oit);
1493 ccc_inode_lsm_put(inode, lsm);
1496 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1500 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1501 struct lov_mds_md **lmmp, int *lmm_size,
1502 struct ptlrpc_request **request)
1504 struct ll_sb_info *sbi = ll_i2sbi(inode);
1505 struct mdt_body *body;
1506 struct lov_mds_md *lmm = NULL;
1507 struct ptlrpc_request *req = NULL;
1508 struct md_op_data *op_data;
1511 rc = ll_get_max_mdsize(sbi, &lmmsize);
1515 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1516 strlen(filename), lmmsize,
1517 LUSTRE_OPC_ANY, NULL);
1518 if (IS_ERR(op_data))
1519 RETURN(PTR_ERR(op_data));
1521 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1522 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1523 ll_finish_md_op_data(op_data);
1525 CDEBUG(D_INFO, "md_getattr_name failed "
1526 "on %s: rc %d\n", filename, rc);
1530 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1531 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1533 lmmsize = body->eadatasize;
1535 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1537 GOTO(out, rc = -ENODATA);
1540 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1541 LASSERT(lmm != NULL);
1543 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1544 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1545 GOTO(out, rc = -EPROTO);
1549 * This is coming from the MDS, so is probably in
1550 * little endian. We convert it to host endian before
1551 * passing it to userspace.
1553 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1556 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1557 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1560 /* if function called for directory - we should
1561 * avoid swab not existent lsm objects */
1562 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1563 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1564 if (S_ISREG(body->mode))
1565 lustre_swab_lov_user_md_objects(
1566 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1568 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1569 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1570 if (S_ISREG(body->mode))
1571 lustre_swab_lov_user_md_objects(
1572 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1579 *lmm_size = lmmsize;
1584 static int ll_lov_setea(struct inode *inode, struct file *file,
1587 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1588 struct lov_user_md *lump;
1589 int lum_size = sizeof(struct lov_user_md) +
1590 sizeof(struct lov_user_ost_data);
1594 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1597 OBD_ALLOC_LARGE(lump, lum_size);
1601 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1602 OBD_FREE_LARGE(lump, lum_size);
1606 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1608 OBD_FREE_LARGE(lump, lum_size);
1612 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1615 struct lov_user_md_v3 lumv3;
1616 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1617 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1618 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1620 __u64 flags = FMODE_WRITE;
1623 /* first try with v1 which is smaller than v3 */
1624 lum_size = sizeof(struct lov_user_md_v1);
1625 if (copy_from_user(lumv1, lumv1p, lum_size))
1628 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1629 lum_size = sizeof(struct lov_user_md_v3);
1630 if (copy_from_user(&lumv3, lumv3p, lum_size))
1634 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1636 struct lov_stripe_md *lsm;
1639 put_user(0, &lumv1p->lmm_stripe_count);
1641 ll_layout_refresh(inode, &gen);
1642 lsm = ccc_inode_lsm_get(inode);
1643 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1644 0, lsm, (void *)arg);
1645 ccc_inode_lsm_put(inode, lsm);
1650 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1652 struct lov_stripe_md *lsm;
1656 lsm = ccc_inode_lsm_get(inode);
1658 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1660 ccc_inode_lsm_put(inode, lsm);
1664 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1666 struct ll_inode_info *lli = ll_i2info(inode);
1667 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1668 struct ccc_grouplock grouplock;
1672 if (ll_file_nolock(file))
1673 RETURN(-EOPNOTSUPP);
1675 spin_lock(&lli->lli_lock);
1676 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1677 CWARN("group lock already existed with gid %lu\n",
1678 fd->fd_grouplock.cg_gid);
1679 spin_unlock(&lli->lli_lock);
1682 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1683 spin_unlock(&lli->lli_lock);
1685 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1686 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1690 spin_lock(&lli->lli_lock);
1691 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1692 spin_unlock(&lli->lli_lock);
1693 CERROR("another thread just won the race\n");
1694 cl_put_grouplock(&grouplock);
1698 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1699 fd->fd_grouplock = grouplock;
1700 spin_unlock(&lli->lli_lock);
1702 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1706 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1708 struct ll_inode_info *lli = ll_i2info(inode);
1709 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1710 struct ccc_grouplock grouplock;
1713 spin_lock(&lli->lli_lock);
1714 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1715 spin_unlock(&lli->lli_lock);
1716 CWARN("no group lock held\n");
1719 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1721 if (fd->fd_grouplock.cg_gid != arg) {
1722 CWARN("group lock %lu doesn't match current id %lu\n",
1723 arg, fd->fd_grouplock.cg_gid);
1724 spin_unlock(&lli->lli_lock);
1728 grouplock = fd->fd_grouplock;
1729 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1730 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1731 spin_unlock(&lli->lli_lock);
1733 cl_put_grouplock(&grouplock);
1734 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1739 * Close inode open handle
1741 * \param dentry [in] dentry which contains the inode
1742 * \param it [in,out] intent which contains open info and result
1745 * \retval <0 failure
1747 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1749 struct inode *inode = dentry->d_inode;
1750 struct obd_client_handle *och;
1756 /* Root ? Do nothing. */
1757 if (dentry->d_inode->i_sb->s_root == dentry)
1760 /* No open handle to close? Move away */
1761 if (!it_disposition(it, DISP_OPEN_OPEN))
1764 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1766 OBD_ALLOC(och, sizeof(*och));
1768 GOTO(out, rc = -ENOMEM);
1770 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1772 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1775 /* this one is in place of ll_file_open */
1776 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1777 ptlrpc_req_finished(it->d.lustre.it_data);
1778 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1784 * Get size for inode for which FIEMAP mapping is requested.
1785 * Make the FIEMAP get_info call and returns the result.
1787 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1790 struct obd_export *exp = ll_i2dtexp(inode);
1791 struct lov_stripe_md *lsm = NULL;
1792 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1793 int vallen = num_bytes;
1797 /* Checks for fiemap flags */
1798 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1799 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1803 /* Check for FIEMAP_FLAG_SYNC */
1804 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1805 rc = filemap_fdatawrite(inode->i_mapping);
1810 lsm = ccc_inode_lsm_get(inode);
1814 /* If the stripe_count > 1 and the application does not understand
1815 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1817 if (lsm->lsm_stripe_count > 1 &&
1818 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1819 GOTO(out, rc = -EOPNOTSUPP);
1821 fm_key.oa.o_oi = lsm->lsm_oi;
1822 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1824 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1825 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1826 /* If filesize is 0, then there would be no objects for mapping */
1827 if (fm_key.oa.o_size == 0) {
1828 fiemap->fm_mapped_extents = 0;
1832 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1834 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1837 CERROR("obd_get_info failed: rc = %d\n", rc);
1840 ccc_inode_lsm_put(inode, lsm);
1844 int ll_fid2path(struct inode *inode, void *arg)
1846 struct obd_export *exp = ll_i2mdexp(inode);
1847 struct getinfo_fid2path *gfout, *gfin;
1851 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1852 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1855 /* Need to get the buflen */
1856 OBD_ALLOC_PTR(gfin);
1859 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1864 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1865 OBD_ALLOC(gfout, outsize);
1866 if (gfout == NULL) {
1870 memcpy(gfout, gfin, sizeof(*gfout));
1873 /* Call mdc_iocontrol */
1874 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1878 if (copy_to_user(arg, gfout, outsize))
1882 OBD_FREE(gfout, outsize);
1886 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1888 struct ll_user_fiemap *fiemap_s;
1889 size_t num_bytes, ret_bytes;
1890 unsigned int extent_count;
1893 /* Get the extent count so we can calculate the size of
1894 * required fiemap buffer */
1895 if (get_user(extent_count,
1896 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1898 num_bytes = sizeof(*fiemap_s) + (extent_count *
1899 sizeof(struct ll_fiemap_extent));
1901 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1902 if (fiemap_s == NULL)
1905 /* get the fiemap value */
1906 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1908 GOTO(error, rc = -EFAULT);
1910 /* If fm_extent_count is non-zero, read the first extent since
1911 * it is used to calculate end_offset and device from previous
1914 if (copy_from_user(&fiemap_s->fm_extents[0],
1915 (char __user *)arg + sizeof(*fiemap_s),
1916 sizeof(struct ll_fiemap_extent)))
1917 GOTO(error, rc = -EFAULT);
1920 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1924 ret_bytes = sizeof(struct ll_user_fiemap);
1926 if (extent_count != 0)
1927 ret_bytes += (fiemap_s->fm_mapped_extents *
1928 sizeof(struct ll_fiemap_extent));
1930 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1934 OBD_FREE_LARGE(fiemap_s, num_bytes);
1939 * Read the data_version for inode.
1941 * This value is computed using stripe object version on OST.
1942 * Version is computed using server side locking.
1944 * @param extent_lock Take extent lock. Not needed if a process is already
1945 * holding the OST object group locks.
1947 int ll_data_version(struct inode *inode, __u64 *data_version,
1950 struct lov_stripe_md *lsm = NULL;
1951 struct ll_sb_info *sbi = ll_i2sbi(inode);
1952 struct obdo *obdo = NULL;
1956 /* If no stripe, we consider version is 0. */
1957 lsm = ccc_inode_lsm_get(inode);
1958 if (!lsm_has_objects(lsm)) {
1960 CDEBUG(D_INODE, "No object for inode\n");
1964 OBD_ALLOC_PTR(obdo);
1966 GOTO(out, rc = -ENOMEM);
1968 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1970 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1973 *data_version = obdo->o_data_version;
1979 ccc_inode_lsm_put(inode, lsm);
1984 * Trigger a HSM release request for the provided inode.
1986 int ll_hsm_release(struct inode *inode)
1988 struct cl_env_nest nest;
1990 struct obd_client_handle *och = NULL;
1991 __u64 data_version = 0;
1995 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1996 ll_get_fsname(inode->i_sb, NULL, 0),
1997 PFID(&ll_i2info(inode)->lli_fid));
1999 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
2001 GOTO(out, rc = PTR_ERR(och));
2003 /* Grab latest data_version and [am]time values */
2004 rc = ll_data_version(inode, &data_version, 1);
2008 env = cl_env_nested_get(&nest);
2010 GOTO(out, rc = PTR_ERR(env));
2012 ll_merge_lvb(env, inode);
2013 cl_env_nested_put(&nest, env);
2015 /* Release the file.
2016 * NB: lease lock handle is released in mdc_hsm_release_pack() because
2017 * we still need it to pack l_remote_handle to MDT. */
2018 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
2024 if (och != NULL && !IS_ERR(och)) /* close the file */
2025 ll_lease_close(och, inode, NULL);
2030 struct ll_swap_stack {
2031 struct iattr ia1, ia2;
2033 struct inode *inode1, *inode2;
2034 bool check_dv1, check_dv2;
2037 static int ll_swap_layouts(struct file *file1, struct file *file2,
2038 struct lustre_swap_layouts *lsl)
2040 struct mdc_swap_layouts msl;
2041 struct md_op_data *op_data;
2044 struct ll_swap_stack *llss = NULL;
2047 OBD_ALLOC_PTR(llss);
2051 llss->inode1 = file1->f_dentry->d_inode;
2052 llss->inode2 = file2->f_dentry->d_inode;
2054 if (!S_ISREG(llss->inode2->i_mode))
2055 GOTO(free, rc = -EINVAL);
2057 if (inode_permission(llss->inode1, MAY_WRITE) ||
2058 inode_permission(llss->inode2, MAY_WRITE))
2059 GOTO(free, rc = -EPERM);
2061 if (llss->inode2->i_sb != llss->inode1->i_sb)
2062 GOTO(free, rc = -EXDEV);
2064 /* we use 2 bool because it is easier to swap than 2 bits */
2065 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2066 llss->check_dv1 = true;
2068 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2069 llss->check_dv2 = true;
2071 /* we cannot use lsl->sl_dvX directly because we may swap them */
2072 llss->dv1 = lsl->sl_dv1;
2073 llss->dv2 = lsl->sl_dv2;
2075 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2076 if (rc == 0) /* same file, done! */
2079 if (rc < 0) { /* sequentialize it */
2080 swap(llss->inode1, llss->inode2);
2082 swap(llss->dv1, llss->dv2);
2083 swap(llss->check_dv1, llss->check_dv2);
2087 if (gid != 0) { /* application asks to flush dirty cache */
2088 rc = ll_get_grouplock(llss->inode1, file1, gid);
2092 rc = ll_get_grouplock(llss->inode2, file2, gid);
2094 ll_put_grouplock(llss->inode1, file1, gid);
2099 /* to be able to restore mtime and atime after swap
2100 * we need to first save them */
2102 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2103 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2104 llss->ia1.ia_atime = llss->inode1->i_atime;
2105 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2106 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2107 llss->ia2.ia_atime = llss->inode2->i_atime;
2108 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2111 /* ultimate check, before swaping the layouts we check if
2112 * dataversion has changed (if requested) */
2113 if (llss->check_dv1) {
2114 rc = ll_data_version(llss->inode1, &dv, 0);
2117 if (dv != llss->dv1)
2118 GOTO(putgl, rc = -EAGAIN);
2121 if (llss->check_dv2) {
2122 rc = ll_data_version(llss->inode2, &dv, 0);
2125 if (dv != llss->dv2)
2126 GOTO(putgl, rc = -EAGAIN);
2129 /* struct md_op_data is used to send the swap args to the mdt
2130 * only flags is missing, so we use struct mdc_swap_layouts
2131 * through the md_op_data->op_data */
2132 /* flags from user space have to be converted before they are send to
2133 * server, no flag is sent today, they are only used on the client */
2136 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2137 0, LUSTRE_OPC_ANY, &msl);
2138 if (IS_ERR(op_data))
2139 GOTO(free, rc = PTR_ERR(op_data));
2141 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2142 sizeof(*op_data), op_data, NULL);
2143 ll_finish_md_op_data(op_data);
2147 ll_put_grouplock(llss->inode2, file2, gid);
2148 ll_put_grouplock(llss->inode1, file1, gid);
2151 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2155 /* clear useless flags */
2156 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2157 llss->ia1.ia_valid &= ~ATTR_MTIME;
2158 llss->ia2.ia_valid &= ~ATTR_MTIME;
2161 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2162 llss->ia1.ia_valid &= ~ATTR_ATIME;
2163 llss->ia2.ia_valid &= ~ATTR_ATIME;
2166 /* update time if requested */
2168 if (llss->ia2.ia_valid != 0) {
2169 mutex_lock(&llss->inode1->i_mutex);
2170 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2171 mutex_unlock(&llss->inode1->i_mutex);
2174 if (llss->ia1.ia_valid != 0) {
2177 mutex_lock(&llss->inode2->i_mutex);
2178 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2179 mutex_unlock(&llss->inode2->i_mutex);
2191 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2193 struct md_op_data *op_data;
2196 /* Non-root users are forbidden to set or clear flags which are
2197 * NOT defined in HSM_USER_MASK. */
2198 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2199 !cfs_capable(CFS_CAP_SYS_ADMIN))
2202 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2203 LUSTRE_OPC_ANY, hss);
2204 if (IS_ERR(op_data))
2205 RETURN(PTR_ERR(op_data));
2207 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2208 sizeof(*op_data), op_data, NULL);
2210 ll_finish_md_op_data(op_data);
2215 static int ll_hsm_import(struct inode *inode, struct file *file,
2216 struct hsm_user_import *hui)
2218 struct hsm_state_set *hss = NULL;
2219 struct iattr *attr = NULL;
2223 if (!S_ISREG(inode->i_mode))
2229 GOTO(out, rc = -ENOMEM);
2231 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2232 hss->hss_archive_id = hui->hui_archive_id;
2233 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2234 rc = ll_hsm_state_set(inode, hss);
2238 OBD_ALLOC_PTR(attr);
2240 GOTO(out, rc = -ENOMEM);
2242 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2243 attr->ia_mode |= S_IFREG;
2244 attr->ia_uid = hui->hui_uid;
2245 attr->ia_gid = hui->hui_gid;
2246 attr->ia_size = hui->hui_size;
2247 attr->ia_mtime.tv_sec = hui->hui_mtime;
2248 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2249 attr->ia_atime.tv_sec = hui->hui_atime;
2250 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2252 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2253 ATTR_UID | ATTR_GID |
2254 ATTR_MTIME | ATTR_MTIME_SET |
2255 ATTR_ATIME | ATTR_ATIME_SET;
2257 rc = ll_setattr_raw(file->f_dentry, attr, true);
2271 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2273 struct inode *inode = file->f_dentry->d_inode;
2274 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2278 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
2279 inode->i_generation, inode, cmd);
2280 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2282 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2283 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2287 case LL_IOC_GETFLAGS:
2288 /* Get the current value of the file flags */
2289 return put_user(fd->fd_flags, (int *)arg);
2290 case LL_IOC_SETFLAGS:
2291 case LL_IOC_CLRFLAGS:
2292 /* Set or clear specific file flags */
2293 /* XXX This probably needs checks to ensure the flags are
2294 * not abused, and to handle any flag side effects.
2296 if (get_user(flags, (int *) arg))
2299 if (cmd == LL_IOC_SETFLAGS) {
2300 if ((flags & LL_FILE_IGNORE_LOCK) &&
2301 !(file->f_flags & O_DIRECT)) {
2302 CERROR("%s: unable to disable locking on "
2303 "non-O_DIRECT file\n", current->comm);
2307 fd->fd_flags |= flags;
2309 fd->fd_flags &= ~flags;
2312 case LL_IOC_LOV_SETSTRIPE:
2313 RETURN(ll_lov_setstripe(inode, file, arg));
2314 case LL_IOC_LOV_SETEA:
2315 RETURN(ll_lov_setea(inode, file, arg));
2316 case LL_IOC_LOV_SWAP_LAYOUTS: {
2318 struct lustre_swap_layouts lsl;
2320 if (copy_from_user(&lsl, (char *)arg,
2321 sizeof(struct lustre_swap_layouts)))
2324 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2327 file2 = fget(lsl.sl_fd);
2332 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2333 rc = ll_swap_layouts(file, file2, &lsl);
2337 case LL_IOC_LOV_GETSTRIPE:
2338 RETURN(ll_lov_getstripe(inode, arg));
2339 case LL_IOC_RECREATE_OBJ:
2340 RETURN(ll_lov_recreate_obj(inode, arg));
2341 case LL_IOC_RECREATE_FID:
2342 RETURN(ll_lov_recreate_fid(inode, arg));
2343 case FSFILT_IOC_FIEMAP:
2344 RETURN(ll_ioctl_fiemap(inode, arg));
2345 case FSFILT_IOC_GETFLAGS:
2346 case FSFILT_IOC_SETFLAGS:
2347 RETURN(ll_iocontrol(inode, file, cmd, arg));
2348 case FSFILT_IOC_GETVERSION_OLD:
2349 case FSFILT_IOC_GETVERSION:
2350 RETURN(put_user(inode->i_generation, (int *)arg));
2351 case LL_IOC_GROUP_LOCK:
2352 RETURN(ll_get_grouplock(inode, file, arg));
2353 case LL_IOC_GROUP_UNLOCK:
2354 RETURN(ll_put_grouplock(inode, file, arg));
2355 case IOC_OBD_STATFS:
2356 RETURN(ll_obd_statfs(inode, (void *)arg));
2358 /* We need to special case any other ioctls we want to handle,
2359 * to send them to the MDS/OST as appropriate and to properly
2360 * network encode the arg field.
2361 case FSFILT_IOC_SETVERSION_OLD:
2362 case FSFILT_IOC_SETVERSION:
2364 case LL_IOC_FLUSHCTX:
2365 RETURN(ll_flush_ctx(inode));
2366 case LL_IOC_PATH2FID: {
2367 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2368 sizeof(struct lu_fid)))
2373 case OBD_IOC_FID2PATH:
2374 RETURN(ll_fid2path(inode, (void *)arg));
2375 case LL_IOC_DATA_VERSION: {
2376 struct ioc_data_version idv;
2379 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2382 rc = ll_data_version(inode, &idv.idv_version,
2383 !(idv.idv_flags & LL_DV_NOFLUSH));
2385 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2391 case LL_IOC_GET_MDTIDX: {
2394 mdtidx = ll_get_mdt_idx(inode);
2398 if (put_user((int)mdtidx, (int*)arg))
2403 case OBD_IOC_GETDTNAME:
2404 case OBD_IOC_GETMDNAME:
2405 RETURN(ll_get_obd_name(inode, cmd, arg));
2406 case LL_IOC_HSM_STATE_GET: {
2407 struct md_op_data *op_data;
2408 struct hsm_user_state *hus;
2415 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2416 LUSTRE_OPC_ANY, hus);
2417 if (IS_ERR(op_data)) {
2419 RETURN(PTR_ERR(op_data));
2422 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2425 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2428 ll_finish_md_op_data(op_data);
2432 case LL_IOC_HSM_STATE_SET: {
2433 struct hsm_state_set *hss;
2440 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2445 rc = ll_hsm_state_set(inode, hss);
2450 case LL_IOC_HSM_ACTION: {
2451 struct md_op_data *op_data;
2452 struct hsm_current_action *hca;
2459 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2460 LUSTRE_OPC_ANY, hca);
2461 if (IS_ERR(op_data)) {
2463 RETURN(PTR_ERR(op_data));
2466 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2469 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2472 ll_finish_md_op_data(op_data);
2476 case LL_IOC_SET_LEASE: {
2477 struct ll_inode_info *lli = ll_i2info(inode);
2478 struct obd_client_handle *och = NULL;
2484 if (!(file->f_mode & FMODE_WRITE))
2489 if (!(file->f_mode & FMODE_READ))
2494 mutex_lock(&lli->lli_och_mutex);
2495 if (fd->fd_lease_och != NULL) {
2496 och = fd->fd_lease_och;
2497 fd->fd_lease_och = NULL;
2499 mutex_unlock(&lli->lli_och_mutex);
2502 mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
2503 rc = ll_lease_close(och, inode, &lease_broken);
2504 if (rc == 0 && lease_broken)
2510 /* return the type of lease or error */
2511 RETURN(rc < 0 ? rc : (int)mode);
2516 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2518 /* apply for lease */
2519 och = ll_lease_open(inode, file, mode, 0);
2521 RETURN(PTR_ERR(och));
2524 mutex_lock(&lli->lli_och_mutex);
2525 if (fd->fd_lease_och == NULL) {
2526 fd->fd_lease_och = och;
2529 mutex_unlock(&lli->lli_och_mutex);
2531 /* impossible now that only excl is supported for now */
2532 ll_lease_close(och, inode, &lease_broken);
2537 case LL_IOC_GET_LEASE: {
2538 struct ll_inode_info *lli = ll_i2info(inode);
2539 struct ldlm_lock *lock = NULL;
2542 mutex_lock(&lli->lli_och_mutex);
2543 if (fd->fd_lease_och != NULL) {
2544 struct obd_client_handle *och = fd->fd_lease_och;
2546 lock = ldlm_handle2lock(&och->och_lease_handle);
2548 lock_res_and_lock(lock);
2549 if (!ldlm_is_cancel(lock))
2550 rc = och->och_flags &
2551 (FMODE_READ | FMODE_WRITE);
2552 unlock_res_and_lock(lock);
2553 LDLM_LOCK_PUT(lock);
2556 mutex_unlock(&lli->lli_och_mutex);
2559 case LL_IOC_HSM_IMPORT: {
2560 struct hsm_user_import *hui;
2566 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2571 rc = ll_hsm_import(inode, file, hui);
2580 ll_iocontrol_call(inode, file, cmd, arg, &err))
2583 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2589 #ifndef HAVE_FILE_LLSEEK_SIZE
2590 static inline loff_t
2591 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2593 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2595 if (offset > maxsize)
2598 if (offset != file->f_pos) {
2599 file->f_pos = offset;
2600 file->f_version = 0;
2606 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2607 loff_t maxsize, loff_t eof)
2609 struct inode *inode = file->f_dentry->d_inode;
2617 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2618 * position-querying operation. Avoid rewriting the "same"
2619 * f_pos value back to the file because a concurrent read(),
2620 * write() or lseek() might have altered it
2625 * f_lock protects against read/modify/write race with other
2626 * SEEK_CURs. Note that parallel writes and reads behave
2629 mutex_lock(&inode->i_mutex);
2630 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2631 mutex_unlock(&inode->i_mutex);
2635 * In the generic case the entire file is data, so as long as
2636 * offset isn't at the end of the file then the offset is data.
2643 * There is a virtual hole at the end of the file, so as long as
2644 * offset isn't i_size or larger, return i_size.
2652 return llseek_execute(file, offset, maxsize);
2656 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2658 struct inode *inode = file->f_dentry->d_inode;
2659 loff_t retval, eof = 0;
2662 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2663 (origin == SEEK_CUR) ? file->f_pos : 0);
2664 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2665 inode->i_ino, inode->i_generation, inode, retval, retval,
2667 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2669 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2670 retval = ll_glimpse_size(inode);
2673 eof = i_size_read(inode);
2676 retval = ll_generic_file_llseek_size(file, offset, origin,
2677 ll_file_maxbytes(inode), eof);
2681 int ll_flush(struct file *file, fl_owner_t id)
2683 struct inode *inode = file->f_dentry->d_inode;
2684 struct ll_inode_info *lli = ll_i2info(inode);
2685 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2688 LASSERT(!S_ISDIR(inode->i_mode));
2690 /* catch async errors that were recorded back when async writeback
2691 * failed for pages in this mapping. */
2692 rc = lli->lli_async_rc;
2693 lli->lli_async_rc = 0;
2694 err = lov_read_and_clear_async_rc(lli->lli_clob);
2698 /* The application has been told write failure already.
2699 * Do not report failure again. */
2700 if (fd->fd_write_failed)
2702 return rc ? -EIO : 0;
2706 * Called to make sure a portion of file has been written out.
2707 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2709 * Return how many pages have been written.
2711 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2712 enum cl_fsync_mode mode, int ignore_layout)
2714 struct cl_env_nest nest;
2717 struct obd_capa *capa = NULL;
2718 struct cl_fsync_io *fio;
2722 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2723 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2726 env = cl_env_nested_get(&nest);
2728 RETURN(PTR_ERR(env));
2730 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2732 io = ccc_env_thread_io(env);
2733 io->ci_obj = cl_i2info(inode)->lli_clob;
2734 io->ci_ignore_layout = ignore_layout;
2736 /* initialize parameters for sync */
2737 fio = &io->u.ci_fsync;
2738 fio->fi_capa = capa;
2739 fio->fi_start = start;
2741 fio->fi_fid = ll_inode2fid(inode);
2742 fio->fi_mode = mode;
2743 fio->fi_nr_written = 0;
2745 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2746 result = cl_io_loop(env, io);
2748 result = io->ci_result;
2750 result = fio->fi_nr_written;
2751 cl_io_fini(env, io);
2752 cl_env_nested_put(&nest, env);
2760 * When dentry is provided (the 'else' case), *file->f_dentry may be
2761 * null and dentry must be used directly rather than pulled from
2762 * *file->f_dentry as is done otherwise.
2765 #ifdef HAVE_FILE_FSYNC_4ARGS
2766 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2768 struct dentry *dentry = file->f_dentry;
2769 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2770 int ll_fsync(struct file *file, int datasync)
2772 struct dentry *dentry = file->f_dentry;
2774 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2777 struct inode *inode = dentry->d_inode;
2778 struct ll_inode_info *lli = ll_i2info(inode);
2779 struct ptlrpc_request *req;
2780 struct obd_capa *oc;
2784 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2785 inode->i_generation, inode);
2786 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2788 #ifdef HAVE_FILE_FSYNC_4ARGS
2789 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2790 mutex_lock(&inode->i_mutex);
2792 /* fsync's caller has already called _fdata{sync,write}, we want
2793 * that IO to finish before calling the osc and mdc sync methods */
2794 rc = filemap_fdatawait(inode->i_mapping);
2797 /* catch async errors that were recorded back when async writeback
2798 * failed for pages in this mapping. */
2799 if (!S_ISDIR(inode->i_mode)) {
2800 err = lli->lli_async_rc;
2801 lli->lli_async_rc = 0;
2804 err = lov_read_and_clear_async_rc(lli->lli_clob);
2809 oc = ll_mdscapa_get(inode);
2810 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2816 ptlrpc_req_finished(req);
2818 if (datasync && S_ISREG(inode->i_mode)) {
2819 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2821 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2823 if (rc == 0 && err < 0)
2826 fd->fd_write_failed = true;
2828 fd->fd_write_failed = false;
2831 #ifdef HAVE_FILE_FSYNC_4ARGS
2832 mutex_unlock(&inode->i_mutex);
2837 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2839 struct inode *inode = file->f_dentry->d_inode;
2840 struct ll_sb_info *sbi = ll_i2sbi(inode);
2841 struct ldlm_enqueue_info einfo = {
2842 .ei_type = LDLM_FLOCK,
2843 .ei_cb_cp = ldlm_flock_completion_ast,
2844 .ei_cbdata = file_lock,
2846 struct md_op_data *op_data;
2847 struct lustre_handle lockh = {0};
2848 ldlm_policy_data_t flock = {{0}};
2854 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2855 inode->i_ino, file_lock);
2857 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2859 if (file_lock->fl_flags & FL_FLOCK) {
2860 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2861 /* flocks are whole-file locks */
2862 flock.l_flock.end = OFFSET_MAX;
2863 /* For flocks owner is determined by the local file desctiptor*/
2864 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2865 } else if (file_lock->fl_flags & FL_POSIX) {
2866 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2867 flock.l_flock.start = file_lock->fl_start;
2868 flock.l_flock.end = file_lock->fl_end;
2872 flock.l_flock.pid = file_lock->fl_pid;
2874 /* Somewhat ugly workaround for svc lockd.
2875 * lockd installs custom fl_lmops->lm_compare_owner that checks
2876 * for the fl_owner to be the same (which it always is on local node
2877 * I guess between lockd processes) and then compares pid.
2878 * As such we assign pid to the owner field to make it all work,
2879 * conflict with normal locks is unlikely since pid space and
2880 * pointer space for current->files are not intersecting */
2881 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2882 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2884 switch (file_lock->fl_type) {
2886 einfo.ei_mode = LCK_PR;
2889 /* An unlock request may or may not have any relation to
2890 * existing locks so we may not be able to pass a lock handle
2891 * via a normal ldlm_lock_cancel() request. The request may even
2892 * unlock a byte range in the middle of an existing lock. In
2893 * order to process an unlock request we need all of the same
2894 * information that is given with a normal read or write record
2895 * lock request. To avoid creating another ldlm unlock (cancel)
2896 * message we'll treat a LCK_NL flock request as an unlock. */
2897 einfo.ei_mode = LCK_NL;
2900 einfo.ei_mode = LCK_PW;
2903 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2904 file_lock->fl_type);
2919 flags = LDLM_FL_BLOCK_NOWAIT;
2925 flags = LDLM_FL_TEST_LOCK;
2926 /* Save the old mode so that if the mode in the lock changes we
2927 * can decrement the appropriate reader or writer refcount. */
2928 file_lock->fl_type = einfo.ei_mode;
2931 CERROR("unknown fcntl lock command: %d\n", cmd);
2935 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2936 LUSTRE_OPC_ANY, NULL);
2937 if (IS_ERR(op_data))
2938 RETURN(PTR_ERR(op_data));
2940 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2941 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2942 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2944 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2945 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2947 if ((file_lock->fl_flags & FL_FLOCK) &&
2948 (rc == 0 || file_lock->fl_type == F_UNLCK))
2949 rc2 = flock_lock_file_wait(file, file_lock);
2950 if ((file_lock->fl_flags & FL_POSIX) &&
2951 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2952 !(flags & LDLM_FL_TEST_LOCK))
2953 rc2 = posix_lock_file_wait(file, file_lock);
2955 if (rc2 && file_lock->fl_type != F_UNLCK) {
2956 einfo.ei_mode = LCK_NL;
2957 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2958 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2962 ll_finish_md_op_data(op_data);
2967 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2975 * test if some locks matching bits and l_req_mode are acquired
2976 * - bits can be in different locks
2977 * - if found clear the common lock bits in *bits
2978 * - the bits not found, are kept in *bits
2980 * \param bits [IN] searched lock bits [IN]
2981 * \param l_req_mode [IN] searched lock mode
2982 * \retval boolean, true iff all bits are found
2984 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2986 struct lustre_handle lockh;
2987 ldlm_policy_data_t policy;
2988 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2989 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2998 fid = &ll_i2info(inode)->lli_fid;
2999 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3000 ldlm_lockname[mode]);
3002 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3003 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3004 policy.l_inodebits.bits = *bits & (1 << i);
3005 if (policy.l_inodebits.bits == 0)
3008 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3009 &policy, mode, &lockh)) {
3010 struct ldlm_lock *lock;
3012 lock = ldlm_handle2lock(&lockh);
3015 ~(lock->l_policy_data.l_inodebits.bits);
3016 LDLM_LOCK_PUT(lock);
3018 *bits &= ~policy.l_inodebits.bits;
3025 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3026 struct lustre_handle *lockh, __u64 flags,
3029 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3034 fid = &ll_i2info(inode)->lli_fid;
3035 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3037 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3038 fid, LDLM_IBITS, &policy, mode, lockh);
3043 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3045 /* Already unlinked. Just update nlink and return success */
3046 if (rc == -ENOENT) {
3048 /* This path cannot be hit for regular files unless in
3049 * case of obscure races, so no need to to validate
3051 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3053 } else if (rc != 0) {
3054 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
3055 ll_get_fsname(inode->i_sb, NULL, 0),
3056 PFID(ll_inode2fid(inode)), rc);
3062 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3065 struct inode *inode = dentry->d_inode;
3066 struct ptlrpc_request *req = NULL;
3067 struct obd_export *exp;
3071 LASSERT(inode != NULL);
3073 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
3074 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
3076 exp = ll_i2mdexp(inode);
3078 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3079 * But under CMD case, it caused some lock issues, should be fixed
3080 * with new CMD ibits lock. See bug 12718 */
3081 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3082 struct lookup_intent oit = { .it_op = IT_GETATTR };
3083 struct md_op_data *op_data;
3085 if (ibits == MDS_INODELOCK_LOOKUP)
3086 oit.it_op = IT_LOOKUP;
3088 /* Call getattr by fid, so do not provide name at all. */
3089 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
3090 dentry->d_inode, NULL, 0, 0,
3091 LUSTRE_OPC_ANY, NULL);
3092 if (IS_ERR(op_data))
3093 RETURN(PTR_ERR(op_data));
3095 oit.it_create_mode |= M_CHECK_STALE;
3096 rc = md_intent_lock(exp, op_data, NULL, 0,
3097 /* we are not interested in name
3100 ll_md_blocking_ast, 0);
3101 ll_finish_md_op_data(op_data);
3102 oit.it_create_mode &= ~M_CHECK_STALE;
3104 rc = ll_inode_revalidate_fini(inode, rc);
3108 rc = ll_revalidate_it_finish(req, &oit, dentry);
3110 ll_intent_release(&oit);
3114 /* Unlinked? Unhash dentry, so it is not picked up later by
3115 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3116 here to preserve get_cwd functionality on 2.6.
3118 if (!dentry->d_inode->i_nlink)
3119 d_lustre_invalidate(dentry, 0);
3121 ll_lookup_finish_locks(&oit, dentry);
3122 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3123 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3124 obd_valid valid = OBD_MD_FLGETATTR;
3125 struct md_op_data *op_data;
3128 if (S_ISREG(inode->i_mode)) {
3129 rc = ll_get_max_mdsize(sbi, &ealen);
3132 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3135 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3136 0, ealen, LUSTRE_OPC_ANY,
3138 if (IS_ERR(op_data))
3139 RETURN(PTR_ERR(op_data));
3141 op_data->op_valid = valid;
3142 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3143 * capa for this inode. Because we only keep capas of dirs
3145 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3146 ll_finish_md_op_data(op_data);
3148 rc = ll_inode_revalidate_fini(inode, rc);
3152 rc = ll_prep_inode(&inode, req, NULL, NULL);
3155 ptlrpc_req_finished(req);
3159 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3162 struct inode *inode = dentry->d_inode;
3166 rc = __ll_inode_revalidate_it(dentry, it, ibits);
3170 /* if object isn't regular file, don't validate size */
3171 if (!S_ISREG(inode->i_mode)) {
3172 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3173 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3174 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3176 /* In case of restore, the MDT has the right size and has
3177 * already send it back without granting the layout lock,
3178 * inode is up-to-date so glimpse is useless.
3179 * Also to glimpse we need the layout, in case of a running
3180 * restore the MDT holds the layout lock so the glimpse will
3181 * block up to the end of restore (getattr will block)
3183 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3184 rc = ll_glimpse_size(inode);
3189 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
3190 struct lookup_intent *it, struct kstat *stat)
3192 struct inode *inode = de->d_inode;
3193 struct ll_sb_info *sbi = ll_i2sbi(inode);
3194 struct ll_inode_info *lli = ll_i2info(inode);
3197 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3198 MDS_INODELOCK_LOOKUP);
3199 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3204 stat->dev = inode->i_sb->s_dev;
3205 if (ll_need_32bit_api(sbi))
3206 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3208 stat->ino = inode->i_ino;
3209 stat->mode = inode->i_mode;
3210 stat->nlink = inode->i_nlink;
3211 stat->uid = inode->i_uid;
3212 stat->gid = inode->i_gid;
3213 stat->rdev = inode->i_rdev;
3214 stat->atime = inode->i_atime;
3215 stat->mtime = inode->i_mtime;
3216 stat->ctime = inode->i_ctime;
3217 stat->blksize = 1 << inode->i_blkbits;
3219 stat->size = i_size_read(inode);
3220 stat->blocks = inode->i_blocks;
3224 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3226 struct lookup_intent it = { .it_op = IT_GETATTR };
3228 return ll_getattr_it(mnt, de, &it, stat);
3231 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3232 __u64 start, __u64 len)
3236 struct ll_user_fiemap *fiemap;
3237 unsigned int extent_count = fieinfo->fi_extents_max;
3239 num_bytes = sizeof(*fiemap) + (extent_count *
3240 sizeof(struct ll_fiemap_extent));
3241 OBD_ALLOC_LARGE(fiemap, num_bytes);
3246 fiemap->fm_flags = fieinfo->fi_flags;
3247 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3248 fiemap->fm_start = start;
3249 fiemap->fm_length = len;
3250 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3251 sizeof(struct ll_fiemap_extent));
3253 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3255 fieinfo->fi_flags = fiemap->fm_flags;
3256 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3257 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3258 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3260 OBD_FREE_LARGE(fiemap, num_bytes);
3264 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3266 struct ll_inode_info *lli = ll_i2info(inode);
3267 struct posix_acl *acl = NULL;
3270 spin_lock(&lli->lli_lock);
3271 /* VFS' acl_permission_check->check_acl will release the refcount */
3272 acl = posix_acl_dup(lli->lli_posix_acl);
3273 spin_unlock(&lli->lli_lock);
3278 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3280 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3281 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3283 ll_check_acl(struct inode *inode, int mask)
3286 # ifdef CONFIG_FS_POSIX_ACL
3287 struct posix_acl *acl;
3291 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3292 if (flags & IPERM_FLAG_RCU)
3295 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3300 rc = posix_acl_permission(inode, acl, mask);
3301 posix_acl_release(acl);
3304 # else /* !CONFIG_FS_POSIX_ACL */
3306 # endif /* CONFIG_FS_POSIX_ACL */
3308 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3310 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3311 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3313 # ifdef HAVE_INODE_PERMISION_2ARGS
3314 int ll_inode_permission(struct inode *inode, int mask)
3316 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3323 #ifdef MAY_NOT_BLOCK
3324 if (mask & MAY_NOT_BLOCK)
3326 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3327 if (flags & IPERM_FLAG_RCU)
3331 /* as root inode are NOT getting validated in lookup operation,
3332 * need to do it before permission check. */
3334 if (inode == inode->i_sb->s_root->d_inode) {
3335 struct lookup_intent it = { .it_op = IT_LOOKUP };
3337 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3338 MDS_INODELOCK_LOOKUP);
3343 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3344 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
3346 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3347 return lustre_check_remote_perm(inode, mask);
3349 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3350 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3355 /* -o localflock - only provides locally consistent flock locks */
3356 struct file_operations ll_file_operations = {
3357 .read = ll_file_read,
3358 .aio_read = ll_file_aio_read,
3359 .write = ll_file_write,
3360 .aio_write = ll_file_aio_write,
3361 .unlocked_ioctl = ll_file_ioctl,
3362 .open = ll_file_open,
3363 .release = ll_file_release,
3364 .mmap = ll_file_mmap,
3365 .llseek = ll_file_seek,
3366 .splice_read = ll_file_splice_read,
3371 struct file_operations ll_file_operations_flock = {
3372 .read = ll_file_read,
3373 .aio_read = ll_file_aio_read,
3374 .write = ll_file_write,
3375 .aio_write = ll_file_aio_write,
3376 .unlocked_ioctl = ll_file_ioctl,
3377 .open = ll_file_open,
3378 .release = ll_file_release,
3379 .mmap = ll_file_mmap,
3380 .llseek = ll_file_seek,
3381 .splice_read = ll_file_splice_read,
3384 .flock = ll_file_flock,
3385 .lock = ll_file_flock
3388 /* These are for -o noflock - to return ENOSYS on flock calls */
3389 struct file_operations ll_file_operations_noflock = {
3390 .read = ll_file_read,
3391 .aio_read = ll_file_aio_read,
3392 .write = ll_file_write,
3393 .aio_write = ll_file_aio_write,
3394 .unlocked_ioctl = ll_file_ioctl,
3395 .open = ll_file_open,
3396 .release = ll_file_release,
3397 .mmap = ll_file_mmap,
3398 .llseek = ll_file_seek,
3399 .splice_read = ll_file_splice_read,
3402 .flock = ll_file_noflock,
3403 .lock = ll_file_noflock
3406 struct inode_operations ll_file_inode_operations = {
3407 .setattr = ll_setattr,
3408 .getattr = ll_getattr,
3409 .permission = ll_inode_permission,
3410 .setxattr = ll_setxattr,
3411 .getxattr = ll_getxattr,
3412 .listxattr = ll_listxattr,
3413 .removexattr = ll_removexattr,
3414 .fiemap = ll_fiemap,
3415 #ifdef HAVE_IOP_GET_ACL
3416 .get_acl = ll_get_acl,
3420 /* dynamic ioctl number support routins */
3421 static struct llioc_ctl_data {
3422 struct rw_semaphore ioc_sem;
3423 cfs_list_t ioc_head;
3425 __RWSEM_INITIALIZER(llioc.ioc_sem),
3426 CFS_LIST_HEAD_INIT(llioc.ioc_head)
3431 cfs_list_t iocd_list;
3432 unsigned int iocd_size;
3433 llioc_callback_t iocd_cb;
3434 unsigned int iocd_count;
3435 unsigned int iocd_cmd[0];
3438 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3441 struct llioc_data *in_data = NULL;
3444 if (cb == NULL || cmd == NULL ||
3445 count > LLIOC_MAX_CMD || count < 0)
3448 size = sizeof(*in_data) + count * sizeof(unsigned int);
3449 OBD_ALLOC(in_data, size);
3450 if (in_data == NULL)
3453 memset(in_data, 0, sizeof(*in_data));
3454 in_data->iocd_size = size;
3455 in_data->iocd_cb = cb;
3456 in_data->iocd_count = count;
3457 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3459 down_write(&llioc.ioc_sem);
3460 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3461 up_write(&llioc.ioc_sem);
3466 void ll_iocontrol_unregister(void *magic)
3468 struct llioc_data *tmp;
3473 down_write(&llioc.ioc_sem);
3474 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3476 unsigned int size = tmp->iocd_size;
3478 cfs_list_del(&tmp->iocd_list);
3479 up_write(&llioc.ioc_sem);
3481 OBD_FREE(tmp, size);
3485 up_write(&llioc.ioc_sem);
3487 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3490 EXPORT_SYMBOL(ll_iocontrol_register);
3491 EXPORT_SYMBOL(ll_iocontrol_unregister);
3493 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3494 unsigned int cmd, unsigned long arg, int *rcp)
3496 enum llioc_iter ret = LLIOC_CONT;
3497 struct llioc_data *data;
3498 int rc = -EINVAL, i;
3500 down_read(&llioc.ioc_sem);
3501 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3502 for (i = 0; i < data->iocd_count; i++) {
3503 if (cmd != data->iocd_cmd[i])
3506 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3510 if (ret == LLIOC_STOP)
3513 up_read(&llioc.ioc_sem);
3520 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3522 struct ll_inode_info *lli = ll_i2info(inode);
3523 struct cl_env_nest nest;
3528 if (lli->lli_clob == NULL)
3531 env = cl_env_nested_get(&nest);
3533 RETURN(PTR_ERR(env));
3535 result = cl_conf_set(env, lli->lli_clob, conf);
3536 cl_env_nested_put(&nest, env);
3538 if (conf->coc_opc == OBJECT_CONF_SET) {
3539 struct ldlm_lock *lock = conf->coc_lock;
3541 LASSERT(lock != NULL);
3542 LASSERT(ldlm_has_layout(lock));
3544 /* it can only be allowed to match after layout is
3545 * applied to inode otherwise false layout would be
3546 * seen. Applying layout shoud happen before dropping
3547 * the intent lock. */
3548 ldlm_lock_allow_match(lock);
3554 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3555 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3558 struct ll_sb_info *sbi = ll_i2sbi(inode);
3559 struct obd_capa *oc;
3560 struct ptlrpc_request *req;
3561 struct mdt_body *body;
3568 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3569 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
3570 lock->l_lvb_data, lock->l_lvb_len);
3572 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY))
3575 /* if layout lock was granted right away, the layout is returned
3576 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3577 * blocked and then granted via completion ast, we have to fetch
3578 * layout here. Please note that we can't use the LVB buffer in
3579 * completion AST because it doesn't have a large enough buffer */
3580 oc = ll_mdscapa_get(inode);
3581 rc = ll_get_max_mdsize(sbi, &lmmsize);
3583 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3584 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3590 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3591 if (body == NULL || body->eadatasize > lmmsize)
3592 GOTO(out, rc = -EPROTO);
3594 lmmsize = body->eadatasize;
3595 if (lmmsize == 0) /* empty layout */
3598 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3600 GOTO(out, rc = -EFAULT);
3602 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3603 if (lvbdata == NULL)
3604 GOTO(out, rc = -ENOMEM);
3606 memcpy(lvbdata, lmm, lmmsize);
3607 lock_res_and_lock(lock);
3608 if (lock->l_lvb_data != NULL)
3609 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3611 lock->l_lvb_data = lvbdata;
3612 lock->l_lvb_len = lmmsize;
3613 unlock_res_and_lock(lock);
3618 ptlrpc_req_finished(req);
3623 * Apply the layout to the inode. Layout lock is held and will be released
3626 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3627 struct inode *inode, __u32 *gen, bool reconf)
3629 struct ll_inode_info *lli = ll_i2info(inode);
3630 struct ll_sb_info *sbi = ll_i2sbi(inode);
3631 struct ldlm_lock *lock;
3632 struct lustre_md md = { NULL };
3633 struct cl_object_conf conf;
3636 bool wait_layout = false;
3639 LASSERT(lustre_handle_is_used(lockh));
3641 lock = ldlm_handle2lock(lockh);
3642 LASSERT(lock != NULL);
3643 LASSERT(ldlm_has_layout(lock));
3645 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3646 inode, PFID(&lli->lli_fid), reconf);
3648 /* in case this is a caching lock and reinstate with new inode */
3649 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3651 lock_res_and_lock(lock);
3652 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3653 unlock_res_and_lock(lock);
3654 /* checking lvb_ready is racy but this is okay. The worst case is
3655 * that multi processes may configure the file on the same time. */
3657 if (lvb_ready || !reconf) {
3660 /* layout_gen must be valid if layout lock is not
3661 * cancelled and stripe has already set */
3662 *gen = lli->lli_layout_gen;
3668 rc = ll_layout_fetch(inode, lock);
3672 /* for layout lock, lmm is returned in lock's lvb.
3673 * lvb_data is immutable if the lock is held so it's safe to access it
3674 * without res lock. See the description in ldlm_lock_decref_internal()
3675 * for the condition to free lvb_data of layout lock */
3676 if (lock->l_lvb_data != NULL) {
3677 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3678 lock->l_lvb_data, lock->l_lvb_len);
3680 *gen = LL_LAYOUT_GEN_EMPTY;
3682 *gen = md.lsm->lsm_layout_gen;
3685 CERROR("%s: file "DFID" unpackmd error: %d\n",
3686 ll_get_fsname(inode->i_sb, NULL, 0),
3687 PFID(&lli->lli_fid), rc);
3693 /* set layout to file. Unlikely this will fail as old layout was
3694 * surely eliminated */
3695 memset(&conf, 0, sizeof conf);
3696 conf.coc_opc = OBJECT_CONF_SET;
3697 conf.coc_inode = inode;
3698 conf.coc_lock = lock;
3699 conf.u.coc_md = &md;
3700 rc = ll_layout_conf(inode, &conf);
3703 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3705 /* refresh layout failed, need to wait */
3706 wait_layout = rc == -EBUSY;
3710 LDLM_LOCK_PUT(lock);
3711 ldlm_lock_decref(lockh, mode);
3713 /* wait for IO to complete if it's still being used. */
3715 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3716 ll_get_fsname(inode->i_sb, NULL, 0),
3717 inode, PFID(&lli->lli_fid));
3719 memset(&conf, 0, sizeof conf);
3720 conf.coc_opc = OBJECT_CONF_WAIT;
3721 conf.coc_inode = inode;
3722 rc = ll_layout_conf(inode, &conf);
3726 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3727 PFID(&lli->lli_fid), rc);
3733 * This function checks if there exists a LAYOUT lock on the client side,
3734 * or enqueues it if it doesn't have one in cache.
3736 * This function will not hold layout lock so it may be revoked any time after
3737 * this function returns. Any operations depend on layout should be redone
3740 * This function should be called before lov_io_init() to get an uptodate
3741 * layout version, the caller should save the version number and after IO
3742 * is finished, this function should be called again to verify that layout
3743 * is not changed during IO time.
3745 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3747 struct ll_inode_info *lli = ll_i2info(inode);
3748 struct ll_sb_info *sbi = ll_i2sbi(inode);
3749 struct md_op_data *op_data;
3750 struct lookup_intent it;
3751 struct lustre_handle lockh;
3753 struct ldlm_enqueue_info einfo = {
3754 .ei_type = LDLM_IBITS,
3756 .ei_cb_bl = ll_md_blocking_ast,
3757 .ei_cb_cp = ldlm_completion_ast,
3762 *gen = lli->lli_layout_gen;
3763 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3767 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3768 LASSERT(S_ISREG(inode->i_mode));
3770 /* mostly layout lock is caching on the local side, so try to match
3771 * it before grabbing layout lock mutex. */
3772 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3773 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3774 if (mode != 0) { /* hit cached lock */
3775 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3779 /* better hold lli_layout_mutex to try again otherwise
3780 * it will have starvation problem. */
3783 /* take layout lock mutex to enqueue layout lock exclusively. */
3784 mutex_lock(&lli->lli_layout_mutex);
3787 /* try again. Maybe somebody else has done this. */
3788 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3789 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3790 if (mode != 0) { /* hit cached lock */
3791 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3795 mutex_unlock(&lli->lli_layout_mutex);
3799 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3800 0, 0, LUSTRE_OPC_ANY, NULL);
3801 if (IS_ERR(op_data)) {
3802 mutex_unlock(&lli->lli_layout_mutex);
3803 RETURN(PTR_ERR(op_data));
3806 /* have to enqueue one */
3807 memset(&it, 0, sizeof(it));
3808 it.it_op = IT_LAYOUT;
3809 lockh.cookie = 0ULL;
3811 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3812 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3813 PFID(&lli->lli_fid));
3815 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3817 if (it.d.lustre.it_data != NULL)
3818 ptlrpc_req_finished(it.d.lustre.it_data);
3819 it.d.lustre.it_data = NULL;
3821 ll_finish_md_op_data(op_data);
3823 mode = it.d.lustre.it_lock_mode;
3824 it.d.lustre.it_lock_mode = 0;
3825 ll_intent_drop_lock(&it);
3828 /* set lock data in case this is a new lock */
3829 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3830 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3834 mutex_unlock(&lli->lli_layout_mutex);
3840 * This function send a restore request to the MDT
3842 int ll_layout_restore(struct inode *inode)
3844 struct hsm_user_request *hur;
3848 len = sizeof(struct hsm_user_request) +
3849 sizeof(struct hsm_user_item);
3850 OBD_ALLOC(hur, len);
3854 hur->hur_request.hr_action = HUA_RESTORE;
3855 hur->hur_request.hr_archive_id = 0;
3856 hur->hur_request.hr_flags = 0;
3857 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3858 sizeof(hur->hur_user_item[0].hui_fid));
3859 hur->hur_user_item[0].hui_extent.length = -1;
3860 hur->hur_request.hr_itemcount = 1;
3861 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,