4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
61 fd->fd_write_failed = false;
66 static void ll_file_data_put(struct ll_file_data *fd)
69 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
72 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
73 struct lustre_handle *fh)
75 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
76 op_data->op_attr.ia_mode = inode->i_mode;
77 op_data->op_attr.ia_atime = inode->i_atime;
78 op_data->op_attr.ia_mtime = inode->i_mtime;
79 op_data->op_attr.ia_ctime = inode->i_ctime;
80 op_data->op_attr.ia_size = i_size_read(inode);
81 op_data->op_attr_blocks = inode->i_blocks;
82 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
83 ll_inode_to_ext_flags(inode->i_flags);
84 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
86 op_data->op_handle = *fh;
87 op_data->op_capa1 = ll_mdscapa_get(inode);
89 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
90 op_data->op_bias |= MDS_DATA_MODIFIED;
94 * Closes the IO epoch and packs all the attributes into @op_data for
97 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
98 struct obd_client_handle *och)
102 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
103 ATTR_MTIME | ATTR_MTIME_SET |
104 ATTR_CTIME | ATTR_CTIME_SET;
106 if (!(och->och_flags & FMODE_WRITE))
109 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
110 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
112 ll_ioepoch_close(inode, op_data, &och, 0);
115 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
116 ll_prep_md_op_data(op_data, inode, NULL, NULL,
117 0, 0, LUSTRE_OPC_ANY, NULL);
121 static int ll_close_inode_openhandle(struct obd_export *md_exp,
123 struct obd_client_handle *och,
124 const __u64 *data_version)
126 struct obd_export *exp = ll_i2mdexp(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
129 struct obd_device *obd = class_exp2obd(exp);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle "LPX64"\n",
140 ll_i2mdexp(inode)->exp_handle.h_cookie);
144 OBD_ALLOC_PTR(op_data);
146 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
148 ll_prepare_close(inode, op_data, och);
149 if (data_version != NULL) {
150 /* Pass in data_version implies release. */
151 op_data->op_bias |= MDS_HSM_RELEASE;
152 op_data->op_data_version = *data_version;
153 op_data->op_lease_handle = och->och_lease_handle;
154 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
156 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
157 rc = md_close(md_exp, op_data, och->och_mod, &req);
159 /* This close must have the epoch closed. */
160 LASSERT(epoch_close);
161 /* MDS has instructed us to obtain Size-on-MDS attribute from
162 * OSTs and send setattr to back to MDS. */
163 rc = ll_som_update(inode, op_data);
165 CERROR("inode %lu mdc Size-on-MDS update failed: "
166 "rc = %d\n", inode->i_ino, rc);
170 CERROR("inode %lu mdc close failed: rc = %d\n",
174 /* DATA_MODIFIED flag was successfully sent on close, cancel data
175 * modification flag. */
176 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
177 struct ll_inode_info *lli = ll_i2info(inode);
179 spin_lock(&lli->lli_lock);
180 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
181 spin_unlock(&lli->lli_lock);
185 rc = ll_objects_destroy(req, inode);
187 CERROR("inode %lu ll_objects destroy: rc = %d\n",
191 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
192 struct mdt_body *body;
193 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
194 if (!(body->valid & OBD_MD_FLRELEASED))
198 ll_finish_md_op_data(op_data);
202 if (exp_connect_som(exp) && !epoch_close &&
203 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
204 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
206 md_clear_open_replay_data(md_exp, och);
207 /* Free @och if it is not waiting for DONE_WRITING. */
208 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
211 if (req) /* This is close request */
212 ptlrpc_req_finished(req);
216 int ll_md_real_close(struct inode *inode, int flags)
218 struct ll_inode_info *lli = ll_i2info(inode);
219 struct obd_client_handle **och_p;
220 struct obd_client_handle *och;
225 if (flags & FMODE_WRITE) {
226 och_p = &lli->lli_mds_write_och;
227 och_usecount = &lli->lli_open_fd_write_count;
228 } else if (flags & FMODE_EXEC) {
229 och_p = &lli->lli_mds_exec_och;
230 och_usecount = &lli->lli_open_fd_exec_count;
232 LASSERT(flags & FMODE_READ);
233 och_p = &lli->lli_mds_read_och;
234 och_usecount = &lli->lli_open_fd_read_count;
237 mutex_lock(&lli->lli_och_mutex);
238 if (*och_usecount) { /* There are still users of this handle, so
240 mutex_unlock(&lli->lli_och_mutex);
245 mutex_unlock(&lli->lli_och_mutex);
247 if (och) { /* There might be a race and somebody have freed this och
249 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
256 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
259 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
260 struct ll_inode_info *lli = ll_i2info(inode);
264 /* clear group lock, if present */
265 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
266 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
268 if (fd->fd_lease_och != NULL) {
271 /* Usually the lease is not released when the
272 * application crashed, we need to release here. */
273 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
274 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
275 PFID(&lli->lli_fid), rc, lease_broken);
277 fd->fd_lease_och = NULL;
280 if (fd->fd_och != NULL) {
281 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
286 /* Let's see if we have good enough OPEN lock on the file and if
287 we can skip talking to MDS */
288 if (file->f_dentry->d_inode) { /* Can this ever be false? */
290 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
291 struct lustre_handle lockh;
292 struct inode *inode = file->f_dentry->d_inode;
293 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
295 mutex_lock(&lli->lli_och_mutex);
296 if (fd->fd_omode & FMODE_WRITE) {
298 LASSERT(lli->lli_open_fd_write_count);
299 lli->lli_open_fd_write_count--;
300 } else if (fd->fd_omode & FMODE_EXEC) {
302 LASSERT(lli->lli_open_fd_exec_count);
303 lli->lli_open_fd_exec_count--;
306 LASSERT(lli->lli_open_fd_read_count);
307 lli->lli_open_fd_read_count--;
309 mutex_unlock(&lli->lli_och_mutex);
311 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
312 LDLM_IBITS, &policy, lockmode,
314 rc = ll_md_real_close(file->f_dentry->d_inode,
318 CERROR("Releasing a file %p with negative dentry %p. Name %s",
319 file, file->f_dentry, file->f_dentry->d_name.name);
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
325 ll_capa_close(inode);
330 /* While this returns an error code, fput() the caller does not, so we need
331 * to make every effort to clean up all of our state here. Also, applications
332 * rarely check close errors and even if an error is returned they will not
333 * re-try the close call.
335 int ll_file_release(struct inode *inode, struct file *file)
337 struct ll_file_data *fd;
338 struct ll_sb_info *sbi = ll_i2sbi(inode);
339 struct ll_inode_info *lli = ll_i2info(inode);
343 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
344 inode->i_generation, inode);
346 #ifdef CONFIG_FS_POSIX_ACL
347 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
348 inode == inode->i_sb->s_root->d_inode) {
349 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
352 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
353 fd->fd_flags &= ~LL_FILE_RMTACL;
354 rct_del(&sbi->ll_rct, cfs_curproc_pid());
355 et_search_free(&sbi->ll_et, cfs_curproc_pid());
360 if (inode->i_sb->s_root != file->f_dentry)
361 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
362 fd = LUSTRE_FPRIVATE(file);
365 /* The last ref on @file, maybe not the the owner pid of statahead.
366 * Different processes can open the same dir, "ll_opendir_key" means:
367 * it is me that should stop the statahead thread. */
368 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
369 lli->lli_opendir_pid != 0)
370 ll_stop_statahead(inode, lli->lli_opendir_key);
372 if (inode->i_sb->s_root == file->f_dentry) {
373 LUSTRE_FPRIVATE(file) = NULL;
374 ll_file_data_put(fd);
378 if (!S_ISDIR(inode->i_mode)) {
379 lov_read_and_clear_async_rc(lli->lli_clob);
380 lli->lli_async_rc = 0;
383 rc = ll_md_close(sbi->ll_md_exp, inode, file);
385 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
386 libcfs_debug_dumplog();
391 static int ll_intent_file_open(struct file *file, void *lmm,
392 int lmmsize, struct lookup_intent *itp)
394 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
395 struct dentry *parent = file->f_dentry->d_parent;
396 struct md_op_data *op_data;
397 struct ptlrpc_request *req;
398 __u32 opc = LUSTRE_OPC_ANY;
405 /* Usually we come here only for NFSD, and we want open lock.
406 But we can also get here with pre 2.6.15 patchless kernels, and in
407 that case that lock is also ok */
408 /* We can also get here if there was cached open handle in revalidate_it
409 * but it disappeared while we were getting from there to ll_file_open.
410 * But this means this file was closed and immediatelly opened which
411 * makes a good candidate for using OPEN lock */
412 /* If lmmsize & lmm are not 0, we are just setting stripe info
413 * parameters. No need for the open lock */
414 if (lmm == NULL && lmmsize == 0) {
415 itp->it_flags |= MDS_OPEN_LOCK;
416 if (itp->it_flags & FMODE_WRITE)
417 opc = LUSTRE_OPC_CREATE;
420 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
421 file->f_dentry->d_inode, NULL, 0,
425 RETURN(PTR_ERR(op_data));
427 itp->it_flags |= MDS_OPEN_BY_FID;
428 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
429 0 /*unused */, &req, ll_md_blocking_ast, 0);
430 ll_finish_md_op_data(op_data);
432 /* reason for keep own exit path - don`t flood log
433 * with messages with -ESTALE errors.
435 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
436 it_open_error(DISP_OPEN_OPEN, itp))
438 ll_release_openhandle(file->f_dentry, itp);
442 if (it_disposition(itp, DISP_LOOKUP_NEG))
443 GOTO(out, rc = -ENOENT);
445 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
446 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
447 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
451 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
452 if (!rc && itp->d.lustre.it_lock_mode)
453 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
457 ptlrpc_req_finished(itp->d.lustre.it_data);
458 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
459 ll_intent_drop_lock(itp);
465 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
466 * not believe attributes if a few ioepoch holders exist. Attributes for
467 * previous ioepoch if new one is opened are also skipped by MDS.
469 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
471 if (ioepoch && lli->lli_ioepoch != ioepoch) {
472 lli->lli_ioepoch = ioepoch;
473 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
474 ioepoch, PFID(&lli->lli_fid));
478 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
479 struct obd_client_handle *och)
481 struct ptlrpc_request *req = it->d.lustre.it_data;
482 struct mdt_body *body;
484 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
485 och->och_fh = body->handle;
486 och->och_fid = body->fid1;
487 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
488 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
489 och->och_flags = it->it_flags;
491 return md_set_open_replay_data(md_exp, och, req);
494 int ll_local_open(struct file *file, struct lookup_intent *it,
495 struct ll_file_data *fd, struct obd_client_handle *och)
497 struct inode *inode = file->f_dentry->d_inode;
498 struct ll_inode_info *lli = ll_i2info(inode);
501 LASSERT(!LUSTRE_FPRIVATE(file));
506 struct ptlrpc_request *req = it->d.lustre.it_data;
507 struct mdt_body *body;
510 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
514 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
515 ll_ioepoch_open(lli, body->ioepoch);
518 LUSTRE_FPRIVATE(file) = fd;
519 ll_readahead_init(inode, &fd->fd_ras);
520 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
525 /* Open a file, and (for the very first open) create objects on the OSTs at
526 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
527 * creation or open until ll_lov_setstripe() ioctl is called.
529 * If we already have the stripe MD locally then we don't request it in
530 * md_open(), by passing a lmm_size = 0.
532 * It is up to the application to ensure no other processes open this file
533 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
534 * used. We might be able to avoid races of that sort by getting lli_open_sem
535 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
536 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
538 int ll_file_open(struct inode *inode, struct file *file)
540 struct ll_inode_info *lli = ll_i2info(inode);
541 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
542 .it_flags = file->f_flags };
543 struct obd_client_handle **och_p = NULL;
544 __u64 *och_usecount = NULL;
545 struct ll_file_data *fd;
546 int rc = 0, opendir_set = 0;
549 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
550 inode->i_generation, inode, file->f_flags);
552 it = file->private_data; /* XXX: compat macro */
553 file->private_data = NULL; /* prevent ll_local_open assertion */
555 fd = ll_file_data_get();
557 GOTO(out_openerr, rc = -ENOMEM);
560 if (S_ISDIR(inode->i_mode)) {
561 spin_lock(&lli->lli_sa_lock);
562 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
563 lli->lli_opendir_pid == 0) {
564 lli->lli_opendir_key = fd;
565 lli->lli_opendir_pid = cfs_curproc_pid();
568 spin_unlock(&lli->lli_sa_lock);
571 if (inode->i_sb->s_root == file->f_dentry) {
572 LUSTRE_FPRIVATE(file) = fd;
576 if (!it || !it->d.lustre.it_disposition) {
577 /* Convert f_flags into access mode. We cannot use file->f_mode,
578 * because everything but O_ACCMODE mask was stripped from
580 if ((oit.it_flags + 1) & O_ACCMODE)
582 if (file->f_flags & O_TRUNC)
583 oit.it_flags |= FMODE_WRITE;
585 /* kernel only call f_op->open in dentry_open. filp_open calls
586 * dentry_open after call to open_namei that checks permissions.
587 * Only nfsd_open call dentry_open directly without checking
588 * permissions and because of that this code below is safe. */
589 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
590 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
592 /* We do not want O_EXCL here, presumably we opened the file
593 * already? XXX - NFS implications? */
594 oit.it_flags &= ~O_EXCL;
596 /* bug20584, if "it_flags" contains O_CREAT, the file will be
597 * created if necessary, then "IT_CREAT" should be set to keep
598 * consistent with it */
599 if (oit.it_flags & O_CREAT)
600 oit.it_op |= IT_CREAT;
606 /* Let's see if we have file open on MDS already. */
607 if (it->it_flags & FMODE_WRITE) {
608 och_p = &lli->lli_mds_write_och;
609 och_usecount = &lli->lli_open_fd_write_count;
610 } else if (it->it_flags & FMODE_EXEC) {
611 och_p = &lli->lli_mds_exec_och;
612 och_usecount = &lli->lli_open_fd_exec_count;
614 och_p = &lli->lli_mds_read_och;
615 och_usecount = &lli->lli_open_fd_read_count;
618 mutex_lock(&lli->lli_och_mutex);
619 if (*och_p) { /* Open handle is present */
620 if (it_disposition(it, DISP_OPEN_OPEN)) {
621 /* Well, there's extra open request that we do not need,
622 let's close it somehow. This will decref request. */
623 rc = it_open_error(DISP_OPEN_OPEN, it);
625 mutex_unlock(&lli->lli_och_mutex);
626 GOTO(out_openerr, rc);
629 ll_release_openhandle(file->f_dentry, it);
633 rc = ll_local_open(file, it, fd, NULL);
636 mutex_unlock(&lli->lli_och_mutex);
637 GOTO(out_openerr, rc);
640 LASSERT(*och_usecount == 0);
641 if (!it->d.lustre.it_disposition) {
642 /* We cannot just request lock handle now, new ELC code
643 means that one of other OPEN locks for this file
644 could be cancelled, and since blocking ast handler
645 would attempt to grab och_mutex as well, that would
646 result in a deadlock */
647 mutex_unlock(&lli->lli_och_mutex);
648 it->it_create_mode |= M_CHECK_STALE;
649 rc = ll_intent_file_open(file, NULL, 0, it);
650 it->it_create_mode &= ~M_CHECK_STALE;
652 GOTO(out_openerr, rc);
656 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
658 GOTO(out_och_free, rc = -ENOMEM);
662 /* md_intent_lock() didn't get a request ref if there was an
663 * open error, so don't do cleanup on the request here
665 /* XXX (green): Should not we bail out on any error here, not
666 * just open error? */
667 rc = it_open_error(DISP_OPEN_OPEN, it);
669 GOTO(out_och_free, rc);
671 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
673 rc = ll_local_open(file, it, fd, *och_p);
675 GOTO(out_och_free, rc);
677 mutex_unlock(&lli->lli_och_mutex);
680 /* Must do this outside lli_och_mutex lock to prevent deadlock where
681 different kind of OPEN lock for this same inode gets cancelled
682 by ldlm_cancel_lru */
683 if (!S_ISREG(inode->i_mode))
684 GOTO(out_och_free, rc);
688 if (!lli->lli_has_smd) {
689 if (file->f_flags & O_LOV_DELAY_CREATE ||
690 !(file->f_mode & FMODE_WRITE)) {
691 CDEBUG(D_INODE, "object creation was delayed\n");
692 GOTO(out_och_free, rc);
695 file->f_flags &= ~O_LOV_DELAY_CREATE;
696 GOTO(out_och_free, rc);
700 if (och_p && *och_p) {
701 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
702 *och_p = NULL; /* OBD_FREE writes some magic there */
705 mutex_unlock(&lli->lli_och_mutex);
708 if (opendir_set != 0)
709 ll_stop_statahead(inode, lli->lli_opendir_key);
711 ll_file_data_put(fd);
713 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
716 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
717 ptlrpc_req_finished(it->d.lustre.it_data);
718 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
724 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
725 struct ldlm_lock_desc *desc, void *data, int flag)
728 struct lustre_handle lockh;
732 case LDLM_CB_BLOCKING:
733 ldlm_lock2handle(lock, &lockh);
734 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
736 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
740 case LDLM_CB_CANCELING:
748 * Acquire a lease and open the file.
750 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
751 fmode_t fmode, __u64 open_flags)
753 struct lookup_intent it = { .it_op = IT_OPEN };
754 struct ll_sb_info *sbi = ll_i2sbi(inode);
755 struct md_op_data *op_data;
756 struct ptlrpc_request *req;
757 struct lustre_handle old_handle = { 0 };
758 struct obd_client_handle *och = NULL;
763 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
764 RETURN(ERR_PTR(-EINVAL));
767 struct ll_inode_info *lli = ll_i2info(inode);
768 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
769 struct obd_client_handle **och_p;
772 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
773 RETURN(ERR_PTR(-EPERM));
775 /* Get the openhandle of the file */
777 mutex_lock(&lli->lli_och_mutex);
778 if (fd->fd_lease_och != NULL) {
779 mutex_unlock(&lli->lli_och_mutex);
783 if (fd->fd_och == NULL) {
784 if (file->f_mode & FMODE_WRITE) {
785 LASSERT(lli->lli_mds_write_och != NULL);
786 och_p = &lli->lli_mds_write_och;
787 och_usecount = &lli->lli_open_fd_write_count;
789 LASSERT(lli->lli_mds_read_och != NULL);
790 och_p = &lli->lli_mds_read_och;
791 och_usecount = &lli->lli_open_fd_read_count;
793 if (*och_usecount == 1) {
800 mutex_unlock(&lli->lli_och_mutex);
801 if (rc < 0) /* more than 1 opener */
804 LASSERT(fd->fd_och != NULL);
805 old_handle = fd->fd_och->och_fh;
810 RETURN(ERR_PTR(-ENOMEM));
812 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
813 LUSTRE_OPC_ANY, NULL);
815 GOTO(out, rc = PTR_ERR(op_data));
817 /* To tell the MDT this openhandle is from the same owner */
818 op_data->op_handle = old_handle;
820 it.it_flags = fmode | open_flags;
821 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
822 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
823 ll_md_blocking_lease_ast,
824 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
825 * it can be cancelled which may mislead applications that the lease is
827 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
828 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
829 * doesn't deal with openhandle, so normal openhandle will be leaked. */
830 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
831 ll_finish_md_op_data(op_data);
833 ptlrpc_req_finished(req);
834 it_clear_disposition(&it, DISP_ENQ_COMPLETE);
837 GOTO(out_release_it, rc);
839 if (it_disposition(&it, DISP_LOOKUP_NEG))
840 GOTO(out_release_it, rc = -ENOENT);
842 rc = it_open_error(DISP_OPEN_OPEN, &it);
844 GOTO(out_release_it, rc);
846 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
847 ll_och_fill(sbi->ll_md_exp, &it, och);
849 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
850 GOTO(out_close, rc = -EOPNOTSUPP);
852 /* already get lease, handle lease lock */
853 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
854 if (it.d.lustre.it_lock_mode == 0 ||
855 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
856 /* open lock must return for lease */
857 CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
858 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
859 it.d.lustre.it_lock_bits);
860 GOTO(out_close, rc = -EPROTO);
863 ll_intent_release(&it);
867 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
869 CERROR("Close openhandle returned %d\n", rc2);
871 /* cancel open lock */
872 if (it.d.lustre.it_lock_mode != 0) {
873 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
874 it.d.lustre.it_lock_mode);
875 it.d.lustre.it_lock_mode = 0;
878 ll_intent_release(&it);
883 EXPORT_SYMBOL(ll_lease_open);
886 * Release lease and close the file.
887 * It will check if the lease has ever broken.
889 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
892 struct ldlm_lock *lock;
893 bool cancelled = true;
897 lock = ldlm_handle2lock(&och->och_lease_handle);
899 lock_res_and_lock(lock);
900 cancelled = ldlm_is_cancel(lock);
901 unlock_res_and_lock(lock);
905 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
906 PFID(&ll_i2info(inode)->lli_fid), cancelled);
909 ldlm_cli_cancel(&och->och_lease_handle, 0);
910 if (lease_broken != NULL)
911 *lease_broken = cancelled;
913 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
917 EXPORT_SYMBOL(ll_lease_close);
919 /* Fills the obdo with the attributes for the lsm */
920 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
921 struct obd_capa *capa, struct obdo *obdo,
922 __u64 ioepoch, int sync)
924 struct ptlrpc_request_set *set;
925 struct obd_info oinfo = { { { 0 } } };
930 LASSERT(lsm != NULL);
934 oinfo.oi_oa->o_oi = lsm->lsm_oi;
935 oinfo.oi_oa->o_mode = S_IFREG;
936 oinfo.oi_oa->o_ioepoch = ioepoch;
937 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
938 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
939 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
940 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
941 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
942 OBD_MD_FLDATAVERSION;
943 oinfo.oi_capa = capa;
945 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
946 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
949 set = ptlrpc_prep_set();
951 CERROR("can't allocate ptlrpc set\n");
954 rc = obd_getattr_async(exp, &oinfo, set);
956 rc = ptlrpc_set_wait(set);
957 ptlrpc_set_destroy(set);
960 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
961 OBD_MD_FLATIME | OBD_MD_FLMTIME |
962 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
963 OBD_MD_FLDATAVERSION);
968 * Performs the getattr on the inode and updates its fields.
969 * If @sync != 0, perform the getattr under the server-side lock.
971 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
972 __u64 ioepoch, int sync)
974 struct obd_capa *capa = ll_mdscapa_get(inode);
975 struct lov_stripe_md *lsm;
979 lsm = ccc_inode_lsm_get(inode);
980 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
981 capa, obdo, ioepoch, sync);
984 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
986 obdo_refresh_inode(inode, obdo, obdo->o_valid);
987 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
988 " blksize %lu\n", POSTID(oi), i_size_read(inode),
989 (unsigned long long)inode->i_blocks,
990 (unsigned long)ll_inode_blksize(inode));
992 ccc_inode_lsm_put(inode, lsm);
996 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
998 struct ll_inode_info *lli = ll_i2info(inode);
999 struct cl_object *obj = lli->lli_clob;
1000 struct cl_attr *attr = ccc_env_thread_attr(env);
1006 ll_inode_size_lock(inode);
1007 /* merge timestamps the most recently obtained from mds with
1008 timestamps obtained from osts */
1009 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1010 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1011 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1012 inode_init_lvb(inode, &lvb);
1014 cl_object_attr_lock(obj);
1015 rc = cl_object_attr_get(env, obj, attr);
1016 cl_object_attr_unlock(obj);
1019 if (lvb.lvb_atime < attr->cat_atime)
1020 lvb.lvb_atime = attr->cat_atime;
1021 if (lvb.lvb_ctime < attr->cat_ctime)
1022 lvb.lvb_ctime = attr->cat_ctime;
1023 if (lvb.lvb_mtime < attr->cat_mtime)
1024 lvb.lvb_mtime = attr->cat_mtime;
1026 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1027 PFID(&lli->lli_fid), attr->cat_size);
1028 cl_isize_write_nolock(inode, attr->cat_size);
1030 inode->i_blocks = attr->cat_blocks;
1032 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1033 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1034 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1036 ll_inode_size_unlock(inode);
1041 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1044 struct obdo obdo = { 0 };
1047 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1049 st->st_size = obdo.o_size;
1050 st->st_blocks = obdo.o_blocks;
1051 st->st_mtime = obdo.o_mtime;
1052 st->st_atime = obdo.o_atime;
1053 st->st_ctime = obdo.o_ctime;
1058 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1060 struct inode *inode = file->f_dentry->d_inode;
1062 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1064 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1065 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1066 file->f_flags & O_DIRECT ||
1069 io->ci_obj = ll_i2info(inode)->lli_clob;
1070 io->ci_lockreq = CILR_MAYBE;
1071 if (ll_file_nolock(file)) {
1072 io->ci_lockreq = CILR_NEVER;
1073 io->ci_no_srvlock = 1;
1074 } else if (file->f_flags & O_APPEND) {
1075 io->ci_lockreq = CILR_MANDATORY;
1080 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1081 struct file *file, enum cl_io_type iot,
1082 loff_t *ppos, size_t count)
1084 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1085 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1091 io = ccc_env_thread_io(env);
1092 ll_io_init(io, file, iot == CIT_WRITE);
1094 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1095 struct vvp_io *vio = vvp_env_io(env);
1096 struct ccc_io *cio = ccc_env_io(env);
1097 int write_mutex_locked = 0;
1099 cio->cui_fd = LUSTRE_FPRIVATE(file);
1100 vio->cui_io_subtype = args->via_io_subtype;
1102 switch (vio->cui_io_subtype) {
1104 cio->cui_iov = args->u.normal.via_iov;
1105 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1106 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1107 cio->cui_iocb = args->u.normal.via_iocb;
1108 if ((iot == CIT_WRITE) &&
1109 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1110 if (mutex_lock_interruptible(&lli->
1112 GOTO(out, result = -ERESTARTSYS);
1113 write_mutex_locked = 1;
1114 } else if (iot == CIT_READ) {
1115 down_read(&lli->lli_trunc_sem);
1119 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1120 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1123 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1124 vio->u.splice.cui_flags = args->u.splice.via_flags;
1127 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1130 result = cl_io_loop(env, io);
1131 if (write_mutex_locked)
1132 mutex_unlock(&lli->lli_write_mutex);
1133 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1134 up_read(&lli->lli_trunc_sem);
1136 /* cl_io_rw_init() handled IO */
1137 result = io->ci_result;
1140 if (io->ci_nob > 0) {
1141 result = io->ci_nob;
1142 *ppos = io->u.ci_wr.wr.crw_pos;
1146 cl_io_fini(env, io);
1147 /* If any bit been read/written (result != 0), we just return
1148 * short read/write instead of restart io. */
1149 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1150 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1151 iot == CIT_READ ? "read" : "write",
1152 file->f_dentry->d_name.name, *ppos, count);
1153 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1157 if (iot == CIT_READ) {
1159 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1160 LPROC_LL_READ_BYTES, result);
1161 } else if (iot == CIT_WRITE) {
1163 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1164 LPROC_LL_WRITE_BYTES, result);
1165 fd->fd_write_failed = false;
1166 } else if (result != -ERESTARTSYS) {
1167 fd->fd_write_failed = true;
1176 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1178 static int ll_file_get_iov_count(const struct iovec *iov,
1179 unsigned long *nr_segs, size_t *count)
1184 for (seg = 0; seg < *nr_segs; seg++) {
1185 const struct iovec *iv = &iov[seg];
1188 * If any segment has a negative length, or the cumulative
1189 * length ever wraps negative then return -EINVAL.
1192 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1194 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1199 cnt -= iv->iov_len; /* This segment is no good */
1206 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1207 unsigned long nr_segs, loff_t pos)
1210 struct vvp_io_args *args;
1216 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1220 env = cl_env_get(&refcheck);
1222 RETURN(PTR_ERR(env));
1224 args = vvp_env_args(env, IO_NORMAL);
1225 args->u.normal.via_iov = (struct iovec *)iov;
1226 args->u.normal.via_nrsegs = nr_segs;
1227 args->u.normal.via_iocb = iocb;
1229 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1230 &iocb->ki_pos, count);
1231 cl_env_put(env, &refcheck);
1235 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1239 struct iovec *local_iov;
1240 struct kiocb *kiocb;
1245 env = cl_env_get(&refcheck);
1247 RETURN(PTR_ERR(env));
1249 local_iov = &vvp_env_info(env)->vti_local_iov;
1250 kiocb = &vvp_env_info(env)->vti_kiocb;
1251 local_iov->iov_base = (void __user *)buf;
1252 local_iov->iov_len = count;
1253 init_sync_kiocb(kiocb, file);
1254 kiocb->ki_pos = *ppos;
1255 kiocb->ki_left = count;
1257 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1258 *ppos = kiocb->ki_pos;
1260 cl_env_put(env, &refcheck);
1265 * Write to a file (through the page cache).
1268 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1269 unsigned long nr_segs, loff_t pos)
1272 struct vvp_io_args *args;
1278 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1282 env = cl_env_get(&refcheck);
1284 RETURN(PTR_ERR(env));
1286 args = vvp_env_args(env, IO_NORMAL);
1287 args->u.normal.via_iov = (struct iovec *)iov;
1288 args->u.normal.via_nrsegs = nr_segs;
1289 args->u.normal.via_iocb = iocb;
1291 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1292 &iocb->ki_pos, count);
1293 cl_env_put(env, &refcheck);
1297 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1301 struct iovec *local_iov;
1302 struct kiocb *kiocb;
1307 env = cl_env_get(&refcheck);
1309 RETURN(PTR_ERR(env));
1311 local_iov = &vvp_env_info(env)->vti_local_iov;
1312 kiocb = &vvp_env_info(env)->vti_kiocb;
1313 local_iov->iov_base = (void __user *)buf;
1314 local_iov->iov_len = count;
1315 init_sync_kiocb(kiocb, file);
1316 kiocb->ki_pos = *ppos;
1317 kiocb->ki_left = count;
1319 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1320 *ppos = kiocb->ki_pos;
1322 cl_env_put(env, &refcheck);
1327 * Send file content (through pagecache) somewhere with helper
1329 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1330 struct pipe_inode_info *pipe, size_t count,
1334 struct vvp_io_args *args;
1339 env = cl_env_get(&refcheck);
1341 RETURN(PTR_ERR(env));
1343 args = vvp_env_args(env, IO_SPLICE);
1344 args->u.splice.via_pipe = pipe;
1345 args->u.splice.via_flags = flags;
1347 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1348 cl_env_put(env, &refcheck);
1352 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1355 struct obd_export *exp = ll_i2dtexp(inode);
1356 struct obd_trans_info oti = { 0 };
1357 struct obdo *oa = NULL;
1360 struct lov_stripe_md *lsm = NULL, *lsm2;
1367 lsm = ccc_inode_lsm_get(inode);
1368 if (!lsm_has_objects(lsm))
1369 GOTO(out, rc = -ENOENT);
1371 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1372 (lsm->lsm_stripe_count));
1374 OBD_ALLOC_LARGE(lsm2, lsm_size);
1376 GOTO(out, rc = -ENOMEM);
1379 oa->o_nlink = ost_idx;
1380 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1381 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1382 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1383 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1384 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1385 memcpy(lsm2, lsm, lsm_size);
1386 ll_inode_size_lock(inode);
1387 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1388 ll_inode_size_unlock(inode);
1390 OBD_FREE_LARGE(lsm2, lsm_size);
1393 ccc_inode_lsm_put(inode, lsm);
1398 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1400 struct ll_recreate_obj ucreat;
1404 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1407 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1411 ostid_set_seq_mdt0(&oi);
1412 ostid_set_id(&oi, ucreat.lrc_id);
1413 RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
1416 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1423 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1426 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1429 fid_to_ostid(&fid, &oi);
1430 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1431 RETURN(ll_lov_recreate(inode, &oi, ost_idx));
1434 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1435 __u64 flags, struct lov_user_md *lum,
1438 struct lov_stripe_md *lsm = NULL;
1439 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1443 lsm = ccc_inode_lsm_get(inode);
1445 ccc_inode_lsm_put(inode, lsm);
1446 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1451 ll_inode_size_lock(inode);
1452 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1455 rc = oit.d.lustre.it_status;
1457 GOTO(out_req_free, rc);
1459 ll_release_openhandle(file->f_dentry, &oit);
1462 ll_inode_size_unlock(inode);
1463 ll_intent_release(&oit);
1464 ccc_inode_lsm_put(inode, lsm);
1467 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1471 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1472 struct lov_mds_md **lmmp, int *lmm_size,
1473 struct ptlrpc_request **request)
1475 struct ll_sb_info *sbi = ll_i2sbi(inode);
1476 struct mdt_body *body;
1477 struct lov_mds_md *lmm = NULL;
1478 struct ptlrpc_request *req = NULL;
1479 struct md_op_data *op_data;
1482 rc = ll_get_max_mdsize(sbi, &lmmsize);
1486 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1487 strlen(filename), lmmsize,
1488 LUSTRE_OPC_ANY, NULL);
1489 if (IS_ERR(op_data))
1490 RETURN(PTR_ERR(op_data));
1492 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1493 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1494 ll_finish_md_op_data(op_data);
1496 CDEBUG(D_INFO, "md_getattr_name failed "
1497 "on %s: rc %d\n", filename, rc);
1501 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1502 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1504 lmmsize = body->eadatasize;
1506 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1508 GOTO(out, rc = -ENODATA);
1511 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1512 LASSERT(lmm != NULL);
1514 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1515 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1516 GOTO(out, rc = -EPROTO);
1520 * This is coming from the MDS, so is probably in
1521 * little endian. We convert it to host endian before
1522 * passing it to userspace.
1524 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1527 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1528 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1531 /* if function called for directory - we should
1532 * avoid swab not existent lsm objects */
1533 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1534 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1535 if (S_ISREG(body->mode))
1536 lustre_swab_lov_user_md_objects(
1537 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1539 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1540 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1541 if (S_ISREG(body->mode))
1542 lustre_swab_lov_user_md_objects(
1543 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1550 *lmm_size = lmmsize;
1555 static int ll_lov_setea(struct inode *inode, struct file *file,
1558 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1559 struct lov_user_md *lump;
1560 int lum_size = sizeof(struct lov_user_md) +
1561 sizeof(struct lov_user_ost_data);
1565 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1568 OBD_ALLOC_LARGE(lump, lum_size);
1572 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1573 OBD_FREE_LARGE(lump, lum_size);
1577 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1579 OBD_FREE_LARGE(lump, lum_size);
1583 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1586 struct lov_user_md_v3 lumv3;
1587 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1588 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1589 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1591 __u64 flags = FMODE_WRITE;
1594 /* first try with v1 which is smaller than v3 */
1595 lum_size = sizeof(struct lov_user_md_v1);
1596 if (copy_from_user(lumv1, lumv1p, lum_size))
1599 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1600 lum_size = sizeof(struct lov_user_md_v3);
1601 if (copy_from_user(&lumv3, lumv3p, lum_size))
1605 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1607 struct lov_stripe_md *lsm;
1610 put_user(0, &lumv1p->lmm_stripe_count);
1612 ll_layout_refresh(inode, &gen);
1613 lsm = ccc_inode_lsm_get(inode);
1614 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1615 0, lsm, (void *)arg);
1616 ccc_inode_lsm_put(inode, lsm);
1621 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1623 struct lov_stripe_md *lsm;
1627 lsm = ccc_inode_lsm_get(inode);
1629 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1631 ccc_inode_lsm_put(inode, lsm);
1635 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1637 struct ll_inode_info *lli = ll_i2info(inode);
1638 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1639 struct ccc_grouplock grouplock;
1643 if (ll_file_nolock(file))
1644 RETURN(-EOPNOTSUPP);
1646 spin_lock(&lli->lli_lock);
1647 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1648 CWARN("group lock already existed with gid %lu\n",
1649 fd->fd_grouplock.cg_gid);
1650 spin_unlock(&lli->lli_lock);
1653 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1654 spin_unlock(&lli->lli_lock);
1656 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1657 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1661 spin_lock(&lli->lli_lock);
1662 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1663 spin_unlock(&lli->lli_lock);
1664 CERROR("another thread just won the race\n");
1665 cl_put_grouplock(&grouplock);
1669 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1670 fd->fd_grouplock = grouplock;
1671 spin_unlock(&lli->lli_lock);
1673 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1677 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1679 struct ll_inode_info *lli = ll_i2info(inode);
1680 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1681 struct ccc_grouplock grouplock;
1684 spin_lock(&lli->lli_lock);
1685 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1686 spin_unlock(&lli->lli_lock);
1687 CWARN("no group lock held\n");
1690 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1692 if (fd->fd_grouplock.cg_gid != arg) {
1693 CWARN("group lock %lu doesn't match current id %lu\n",
1694 arg, fd->fd_grouplock.cg_gid);
1695 spin_unlock(&lli->lli_lock);
1699 grouplock = fd->fd_grouplock;
1700 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1701 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1702 spin_unlock(&lli->lli_lock);
1704 cl_put_grouplock(&grouplock);
1705 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1710 * Close inode open handle
1712 * \param dentry [in] dentry which contains the inode
1713 * \param it [in,out] intent which contains open info and result
1716 * \retval <0 failure
1718 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1720 struct inode *inode = dentry->d_inode;
1721 struct obd_client_handle *och;
1727 /* Root ? Do nothing. */
1728 if (dentry->d_inode->i_sb->s_root == dentry)
1731 /* No open handle to close? Move away */
1732 if (!it_disposition(it, DISP_OPEN_OPEN))
1735 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1737 OBD_ALLOC(och, sizeof(*och));
1739 GOTO(out, rc = -ENOMEM);
1741 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1743 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1746 /* this one is in place of ll_file_open */
1747 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1748 ptlrpc_req_finished(it->d.lustre.it_data);
1749 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1755 * Get size for inode for which FIEMAP mapping is requested.
1756 * Make the FIEMAP get_info call and returns the result.
1758 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1761 struct obd_export *exp = ll_i2dtexp(inode);
1762 struct lov_stripe_md *lsm = NULL;
1763 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1764 int vallen = num_bytes;
1768 /* Checks for fiemap flags */
1769 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1770 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1774 /* Check for FIEMAP_FLAG_SYNC */
1775 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1776 rc = filemap_fdatawrite(inode->i_mapping);
1781 lsm = ccc_inode_lsm_get(inode);
1785 /* If the stripe_count > 1 and the application does not understand
1786 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1788 if (lsm->lsm_stripe_count > 1 &&
1789 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1790 GOTO(out, rc = -EOPNOTSUPP);
1792 fm_key.oa.o_oi = lsm->lsm_oi;
1793 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1795 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1796 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1797 /* If filesize is 0, then there would be no objects for mapping */
1798 if (fm_key.oa.o_size == 0) {
1799 fiemap->fm_mapped_extents = 0;
1803 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1805 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1808 CERROR("obd_get_info failed: rc = %d\n", rc);
1811 ccc_inode_lsm_put(inode, lsm);
1815 int ll_fid2path(struct inode *inode, void *arg)
1817 struct obd_export *exp = ll_i2mdexp(inode);
1818 struct getinfo_fid2path *gfout, *gfin;
1822 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1823 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1826 /* Need to get the buflen */
1827 OBD_ALLOC_PTR(gfin);
1830 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1835 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1836 OBD_ALLOC(gfout, outsize);
1837 if (gfout == NULL) {
1841 memcpy(gfout, gfin, sizeof(*gfout));
1844 /* Call mdc_iocontrol */
1845 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1849 if (copy_to_user(arg, gfout, outsize))
1853 OBD_FREE(gfout, outsize);
1857 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1859 struct ll_user_fiemap *fiemap_s;
1860 size_t num_bytes, ret_bytes;
1861 unsigned int extent_count;
1864 /* Get the extent count so we can calculate the size of
1865 * required fiemap buffer */
1866 if (get_user(extent_count,
1867 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1869 num_bytes = sizeof(*fiemap_s) + (extent_count *
1870 sizeof(struct ll_fiemap_extent));
1872 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1873 if (fiemap_s == NULL)
1876 /* get the fiemap value */
1877 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1879 GOTO(error, rc = -EFAULT);
1881 /* If fm_extent_count is non-zero, read the first extent since
1882 * it is used to calculate end_offset and device from previous
1885 if (copy_from_user(&fiemap_s->fm_extents[0],
1886 (char __user *)arg + sizeof(*fiemap_s),
1887 sizeof(struct ll_fiemap_extent)))
1888 GOTO(error, rc = -EFAULT);
1891 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1895 ret_bytes = sizeof(struct ll_user_fiemap);
1897 if (extent_count != 0)
1898 ret_bytes += (fiemap_s->fm_mapped_extents *
1899 sizeof(struct ll_fiemap_extent));
1901 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1905 OBD_FREE_LARGE(fiemap_s, num_bytes);
1910 * Read the data_version for inode.
1912 * This value is computed using stripe object version on OST.
1913 * Version is computed using server side locking.
1915 * @param extent_lock Take extent lock. Not needed if a process is already
1916 * holding the OST object group locks.
1918 int ll_data_version(struct inode *inode, __u64 *data_version,
1921 struct lov_stripe_md *lsm = NULL;
1922 struct ll_sb_info *sbi = ll_i2sbi(inode);
1923 struct obdo *obdo = NULL;
1927 /* If no stripe, we consider version is 0. */
1928 lsm = ccc_inode_lsm_get(inode);
1929 if (!lsm_has_objects(lsm)) {
1931 CDEBUG(D_INODE, "No object for inode\n");
1935 OBD_ALLOC_PTR(obdo);
1937 GOTO(out, rc = -ENOMEM);
1939 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1941 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1944 *data_version = obdo->o_data_version;
1950 ccc_inode_lsm_put(inode, lsm);
1955 * Trigger a HSM release request for the provided inode.
1957 int ll_hsm_release(struct inode *inode)
1959 struct cl_env_nest nest;
1961 struct obd_client_handle *och = NULL;
1962 __u64 data_version = 0;
1966 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1967 ll_get_fsname(inode->i_sb, NULL, 0),
1968 PFID(&ll_i2info(inode)->lli_fid));
1970 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1972 GOTO(out, rc = PTR_ERR(och));
1974 /* Grab latest data_version and [am]time values */
1975 rc = ll_data_version(inode, &data_version, 1);
1979 env = cl_env_nested_get(&nest);
1981 GOTO(out, rc = PTR_ERR(env));
1983 ll_merge_lvb(env, inode);
1984 cl_env_nested_put(&nest, env);
1986 /* Release the file.
1987 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1988 * we still need it to pack l_remote_handle to MDT. */
1989 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1995 if (och != NULL && !IS_ERR(och)) /* close the file */
1996 ll_lease_close(och, inode, NULL);
2001 struct ll_swap_stack {
2002 struct iattr ia1, ia2;
2004 struct inode *inode1, *inode2;
2005 bool check_dv1, check_dv2;
2008 static int ll_swap_layouts(struct file *file1, struct file *file2,
2009 struct lustre_swap_layouts *lsl)
2011 struct mdc_swap_layouts msl;
2012 struct md_op_data *op_data;
2015 struct ll_swap_stack *llss = NULL;
2018 OBD_ALLOC_PTR(llss);
2022 llss->inode1 = file1->f_dentry->d_inode;
2023 llss->inode2 = file2->f_dentry->d_inode;
2025 if (!S_ISREG(llss->inode2->i_mode))
2026 GOTO(free, rc = -EINVAL);
2028 if (inode_permission(llss->inode1, MAY_WRITE) ||
2029 inode_permission(llss->inode2, MAY_WRITE))
2030 GOTO(free, rc = -EPERM);
2032 if (llss->inode2->i_sb != llss->inode1->i_sb)
2033 GOTO(free, rc = -EXDEV);
2035 /* we use 2 bool because it is easier to swap than 2 bits */
2036 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2037 llss->check_dv1 = true;
2039 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2040 llss->check_dv2 = true;
2042 /* we cannot use lsl->sl_dvX directly because we may swap them */
2043 llss->dv1 = lsl->sl_dv1;
2044 llss->dv2 = lsl->sl_dv2;
2046 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2047 if (rc == 0) /* same file, done! */
2050 if (rc < 0) { /* sequentialize it */
2051 swap(llss->inode1, llss->inode2);
2053 swap(llss->dv1, llss->dv2);
2054 swap(llss->check_dv1, llss->check_dv2);
2058 if (gid != 0) { /* application asks to flush dirty cache */
2059 rc = ll_get_grouplock(llss->inode1, file1, gid);
2063 rc = ll_get_grouplock(llss->inode2, file2, gid);
2065 ll_put_grouplock(llss->inode1, file1, gid);
2070 /* to be able to restore mtime and atime after swap
2071 * we need to first save them */
2073 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2074 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2075 llss->ia1.ia_atime = llss->inode1->i_atime;
2076 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2077 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2078 llss->ia2.ia_atime = llss->inode2->i_atime;
2079 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2082 /* ultimate check, before swaping the layouts we check if
2083 * dataversion has changed (if requested) */
2084 if (llss->check_dv1) {
2085 rc = ll_data_version(llss->inode1, &dv, 0);
2088 if (dv != llss->dv1)
2089 GOTO(putgl, rc = -EAGAIN);
2092 if (llss->check_dv2) {
2093 rc = ll_data_version(llss->inode2, &dv, 0);
2096 if (dv != llss->dv2)
2097 GOTO(putgl, rc = -EAGAIN);
2100 /* struct md_op_data is used to send the swap args to the mdt
2101 * only flags is missing, so we use struct mdc_swap_layouts
2102 * through the md_op_data->op_data */
2103 /* flags from user space have to be converted before they are send to
2104 * server, no flag is sent today, they are only used on the client */
2107 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2108 0, LUSTRE_OPC_ANY, &msl);
2109 if (IS_ERR(op_data))
2110 GOTO(free, rc = PTR_ERR(op_data));
2112 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2113 sizeof(*op_data), op_data, NULL);
2114 ll_finish_md_op_data(op_data);
2118 ll_put_grouplock(llss->inode2, file2, gid);
2119 ll_put_grouplock(llss->inode1, file1, gid);
2122 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2126 /* clear useless flags */
2127 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2128 llss->ia1.ia_valid &= ~ATTR_MTIME;
2129 llss->ia2.ia_valid &= ~ATTR_MTIME;
2132 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2133 llss->ia1.ia_valid &= ~ATTR_ATIME;
2134 llss->ia2.ia_valid &= ~ATTR_ATIME;
2137 /* update time if requested */
2139 if (llss->ia2.ia_valid != 0) {
2140 mutex_lock(&llss->inode1->i_mutex);
2141 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2142 mutex_unlock(&llss->inode1->i_mutex);
2145 if (llss->ia1.ia_valid != 0) {
2148 mutex_lock(&llss->inode2->i_mutex);
2149 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2150 mutex_unlock(&llss->inode2->i_mutex);
2162 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2164 struct md_op_data *op_data;
2167 /* Non-root users are forbidden to set or clear flags which are
2168 * NOT defined in HSM_USER_MASK. */
2169 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2170 !cfs_capable(CFS_CAP_SYS_ADMIN))
2173 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2174 LUSTRE_OPC_ANY, hss);
2175 if (IS_ERR(op_data))
2176 RETURN(PTR_ERR(op_data));
2178 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2179 sizeof(*op_data), op_data, NULL);
2181 ll_finish_md_op_data(op_data);
2186 static int ll_hsm_import(struct inode *inode, struct file *file,
2187 struct hsm_user_import *hui)
2189 struct hsm_state_set *hss = NULL;
2190 struct iattr *attr = NULL;
2194 if (!S_ISREG(inode->i_mode))
2200 GOTO(out, rc = -ENOMEM);
2202 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2203 hss->hss_archive_id = hui->hui_archive_id;
2204 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2205 rc = ll_hsm_state_set(inode, hss);
2209 OBD_ALLOC_PTR(attr);
2211 GOTO(out, rc = -ENOMEM);
2213 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2214 attr->ia_mode |= S_IFREG;
2215 attr->ia_uid = hui->hui_uid;
2216 attr->ia_gid = hui->hui_gid;
2217 attr->ia_size = hui->hui_size;
2218 attr->ia_mtime.tv_sec = hui->hui_mtime;
2219 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2220 attr->ia_atime.tv_sec = hui->hui_atime;
2221 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2223 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2224 ATTR_UID | ATTR_GID |
2225 ATTR_MTIME | ATTR_MTIME_SET |
2226 ATTR_ATIME | ATTR_ATIME_SET;
2228 rc = ll_setattr_raw(file->f_dentry, attr, true);
2242 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2244 struct inode *inode = file->f_dentry->d_inode;
2245 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2249 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
2250 inode->i_generation, inode, cmd);
2251 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2253 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2254 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2258 case LL_IOC_GETFLAGS:
2259 /* Get the current value of the file flags */
2260 return put_user(fd->fd_flags, (int *)arg);
2261 case LL_IOC_SETFLAGS:
2262 case LL_IOC_CLRFLAGS:
2263 /* Set or clear specific file flags */
2264 /* XXX This probably needs checks to ensure the flags are
2265 * not abused, and to handle any flag side effects.
2267 if (get_user(flags, (int *) arg))
2270 if (cmd == LL_IOC_SETFLAGS) {
2271 if ((flags & LL_FILE_IGNORE_LOCK) &&
2272 !(file->f_flags & O_DIRECT)) {
2273 CERROR("%s: unable to disable locking on "
2274 "non-O_DIRECT file\n", current->comm);
2278 fd->fd_flags |= flags;
2280 fd->fd_flags &= ~flags;
2283 case LL_IOC_LOV_SETSTRIPE:
2284 RETURN(ll_lov_setstripe(inode, file, arg));
2285 case LL_IOC_LOV_SETEA:
2286 RETURN(ll_lov_setea(inode, file, arg));
2287 case LL_IOC_LOV_SWAP_LAYOUTS: {
2289 struct lustre_swap_layouts lsl;
2291 if (copy_from_user(&lsl, (char *)arg,
2292 sizeof(struct lustre_swap_layouts)))
2295 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2298 file2 = fget(lsl.sl_fd);
2303 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2304 rc = ll_swap_layouts(file, file2, &lsl);
2308 case LL_IOC_LOV_GETSTRIPE:
2309 RETURN(ll_lov_getstripe(inode, arg));
2310 case LL_IOC_RECREATE_OBJ:
2311 RETURN(ll_lov_recreate_obj(inode, arg));
2312 case LL_IOC_RECREATE_FID:
2313 RETURN(ll_lov_recreate_fid(inode, arg));
2314 case FSFILT_IOC_FIEMAP:
2315 RETURN(ll_ioctl_fiemap(inode, arg));
2316 case FSFILT_IOC_GETFLAGS:
2317 case FSFILT_IOC_SETFLAGS:
2318 RETURN(ll_iocontrol(inode, file, cmd, arg));
2319 case FSFILT_IOC_GETVERSION_OLD:
2320 case FSFILT_IOC_GETVERSION:
2321 RETURN(put_user(inode->i_generation, (int *)arg));
2322 case LL_IOC_GROUP_LOCK:
2323 RETURN(ll_get_grouplock(inode, file, arg));
2324 case LL_IOC_GROUP_UNLOCK:
2325 RETURN(ll_put_grouplock(inode, file, arg));
2326 case IOC_OBD_STATFS:
2327 RETURN(ll_obd_statfs(inode, (void *)arg));
2329 /* We need to special case any other ioctls we want to handle,
2330 * to send them to the MDS/OST as appropriate and to properly
2331 * network encode the arg field.
2332 case FSFILT_IOC_SETVERSION_OLD:
2333 case FSFILT_IOC_SETVERSION:
2335 case LL_IOC_FLUSHCTX:
2336 RETURN(ll_flush_ctx(inode));
2337 case LL_IOC_PATH2FID: {
2338 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2339 sizeof(struct lu_fid)))
2344 case OBD_IOC_FID2PATH:
2345 RETURN(ll_fid2path(inode, (void *)arg));
2346 case LL_IOC_DATA_VERSION: {
2347 struct ioc_data_version idv;
2350 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2353 rc = ll_data_version(inode, &idv.idv_version,
2354 !(idv.idv_flags & LL_DV_NOFLUSH));
2356 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2362 case LL_IOC_GET_MDTIDX: {
2365 mdtidx = ll_get_mdt_idx(inode);
2369 if (put_user((int)mdtidx, (int*)arg))
2374 case OBD_IOC_GETDTNAME:
2375 case OBD_IOC_GETMDNAME:
2376 RETURN(ll_get_obd_name(inode, cmd, arg));
2377 case LL_IOC_HSM_STATE_GET: {
2378 struct md_op_data *op_data;
2379 struct hsm_user_state *hus;
2386 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2387 LUSTRE_OPC_ANY, hus);
2388 if (IS_ERR(op_data)) {
2390 RETURN(PTR_ERR(op_data));
2393 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2396 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2399 ll_finish_md_op_data(op_data);
2403 case LL_IOC_HSM_STATE_SET: {
2404 struct hsm_state_set *hss;
2411 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2416 rc = ll_hsm_state_set(inode, hss);
2421 case LL_IOC_HSM_ACTION: {
2422 struct md_op_data *op_data;
2423 struct hsm_current_action *hca;
2430 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2431 LUSTRE_OPC_ANY, hca);
2432 if (IS_ERR(op_data)) {
2434 RETURN(PTR_ERR(op_data));
2437 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2440 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2443 ll_finish_md_op_data(op_data);
2447 case LL_IOC_SET_LEASE: {
2448 struct ll_inode_info *lli = ll_i2info(inode);
2449 struct obd_client_handle *och = NULL;
2455 if (!(file->f_mode & FMODE_WRITE))
2460 if (!(file->f_mode & FMODE_READ))
2465 mutex_lock(&lli->lli_och_mutex);
2466 if (fd->fd_lease_och != NULL) {
2467 och = fd->fd_lease_och;
2468 fd->fd_lease_och = NULL;
2470 mutex_unlock(&lli->lli_och_mutex);
2473 mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
2474 rc = ll_lease_close(och, inode, &lease_broken);
2475 if (rc == 0 && lease_broken)
2481 /* return the type of lease or error */
2482 RETURN(rc < 0 ? rc : (int)mode);
2487 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2489 /* apply for lease */
2490 och = ll_lease_open(inode, file, mode, 0);
2492 RETURN(PTR_ERR(och));
2495 mutex_lock(&lli->lli_och_mutex);
2496 if (fd->fd_lease_och == NULL) {
2497 fd->fd_lease_och = och;
2500 mutex_unlock(&lli->lli_och_mutex);
2502 /* impossible now that only excl is supported for now */
2503 ll_lease_close(och, inode, &lease_broken);
2508 case LL_IOC_GET_LEASE: {
2509 struct ll_inode_info *lli = ll_i2info(inode);
2510 struct ldlm_lock *lock = NULL;
2513 mutex_lock(&lli->lli_och_mutex);
2514 if (fd->fd_lease_och != NULL) {
2515 struct obd_client_handle *och = fd->fd_lease_och;
2517 lock = ldlm_handle2lock(&och->och_lease_handle);
2519 lock_res_and_lock(lock);
2520 if (!ldlm_is_cancel(lock))
2521 rc = och->och_flags &
2522 (FMODE_READ | FMODE_WRITE);
2523 unlock_res_and_lock(lock);
2524 LDLM_LOCK_PUT(lock);
2527 mutex_unlock(&lli->lli_och_mutex);
2530 case LL_IOC_HSM_IMPORT: {
2531 struct hsm_user_import *hui;
2537 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2542 rc = ll_hsm_import(inode, file, hui);
2551 ll_iocontrol_call(inode, file, cmd, arg, &err))
2554 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2560 #ifndef HAVE_FILE_LLSEEK_SIZE
2561 static inline loff_t
2562 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2564 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2566 if (offset > maxsize)
2569 if (offset != file->f_pos) {
2570 file->f_pos = offset;
2571 file->f_version = 0;
2577 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2578 loff_t maxsize, loff_t eof)
2580 struct inode *inode = file->f_dentry->d_inode;
2588 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2589 * position-querying operation. Avoid rewriting the "same"
2590 * f_pos value back to the file because a concurrent read(),
2591 * write() or lseek() might have altered it
2596 * f_lock protects against read/modify/write race with other
2597 * SEEK_CURs. Note that parallel writes and reads behave
2600 mutex_lock(&inode->i_mutex);
2601 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2602 mutex_unlock(&inode->i_mutex);
2606 * In the generic case the entire file is data, so as long as
2607 * offset isn't at the end of the file then the offset is data.
2614 * There is a virtual hole at the end of the file, so as long as
2615 * offset isn't i_size or larger, return i_size.
2623 return llseek_execute(file, offset, maxsize);
2627 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2629 struct inode *inode = file->f_dentry->d_inode;
2630 loff_t retval, eof = 0;
2633 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2634 (origin == SEEK_CUR) ? file->f_pos : 0);
2635 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2636 inode->i_ino, inode->i_generation, inode, retval, retval,
2638 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2640 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2641 retval = ll_glimpse_size(inode);
2644 eof = i_size_read(inode);
2647 retval = ll_generic_file_llseek_size(file, offset, origin,
2648 ll_file_maxbytes(inode), eof);
2652 int ll_flush(struct file *file, fl_owner_t id)
2654 struct inode *inode = file->f_dentry->d_inode;
2655 struct ll_inode_info *lli = ll_i2info(inode);
2656 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2659 LASSERT(!S_ISDIR(inode->i_mode));
2661 /* catch async errors that were recorded back when async writeback
2662 * failed for pages in this mapping. */
2663 rc = lli->lli_async_rc;
2664 lli->lli_async_rc = 0;
2665 err = lov_read_and_clear_async_rc(lli->lli_clob);
2669 /* The application has been told write failure already.
2670 * Do not report failure again. */
2671 if (fd->fd_write_failed)
2673 return rc ? -EIO : 0;
2677 * Called to make sure a portion of file has been written out.
2678 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2680 * Return how many pages have been written.
2682 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2683 enum cl_fsync_mode mode, int ignore_layout)
2685 struct cl_env_nest nest;
2688 struct obd_capa *capa = NULL;
2689 struct cl_fsync_io *fio;
2693 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2694 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2697 env = cl_env_nested_get(&nest);
2699 RETURN(PTR_ERR(env));
2701 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2703 io = ccc_env_thread_io(env);
2704 io->ci_obj = cl_i2info(inode)->lli_clob;
2705 io->ci_ignore_layout = ignore_layout;
2707 /* initialize parameters for sync */
2708 fio = &io->u.ci_fsync;
2709 fio->fi_capa = capa;
2710 fio->fi_start = start;
2712 fio->fi_fid = ll_inode2fid(inode);
2713 fio->fi_mode = mode;
2714 fio->fi_nr_written = 0;
2716 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2717 result = cl_io_loop(env, io);
2719 result = io->ci_result;
2721 result = fio->fi_nr_written;
2722 cl_io_fini(env, io);
2723 cl_env_nested_put(&nest, env);
2731 * When dentry is provided (the 'else' case), *file->f_dentry may be
2732 * null and dentry must be used directly rather than pulled from
2733 * *file->f_dentry as is done otherwise.
2736 #ifdef HAVE_FILE_FSYNC_4ARGS
2737 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2739 struct dentry *dentry = file->f_dentry;
2740 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2741 int ll_fsync(struct file *file, int datasync)
2743 struct dentry *dentry = file->f_dentry;
2745 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2748 struct inode *inode = dentry->d_inode;
2749 struct ll_inode_info *lli = ll_i2info(inode);
2750 struct ptlrpc_request *req;
2751 struct obd_capa *oc;
2755 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2756 inode->i_generation, inode);
2757 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2759 #ifdef HAVE_FILE_FSYNC_4ARGS
2760 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2761 mutex_lock(&inode->i_mutex);
2763 /* fsync's caller has already called _fdata{sync,write}, we want
2764 * that IO to finish before calling the osc and mdc sync methods */
2765 rc = filemap_fdatawait(inode->i_mapping);
2768 /* catch async errors that were recorded back when async writeback
2769 * failed for pages in this mapping. */
2770 if (!S_ISDIR(inode->i_mode)) {
2771 err = lli->lli_async_rc;
2772 lli->lli_async_rc = 0;
2775 err = lov_read_and_clear_async_rc(lli->lli_clob);
2780 oc = ll_mdscapa_get(inode);
2781 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2787 ptlrpc_req_finished(req);
2789 if (datasync && S_ISREG(inode->i_mode)) {
2790 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2792 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2794 if (rc == 0 && err < 0)
2797 fd->fd_write_failed = true;
2799 fd->fd_write_failed = false;
2802 #ifdef HAVE_FILE_FSYNC_4ARGS
2803 mutex_unlock(&inode->i_mutex);
2808 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2810 struct inode *inode = file->f_dentry->d_inode;
2811 struct ll_sb_info *sbi = ll_i2sbi(inode);
2812 struct ldlm_enqueue_info einfo = {
2813 .ei_type = LDLM_FLOCK,
2814 .ei_cb_cp = ldlm_flock_completion_ast,
2815 .ei_cbdata = file_lock,
2817 struct md_op_data *op_data;
2818 struct lustre_handle lockh = {0};
2819 ldlm_policy_data_t flock = {{0}};
2825 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2826 inode->i_ino, file_lock);
2828 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2830 if (file_lock->fl_flags & FL_FLOCK) {
2831 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2832 /* flocks are whole-file locks */
2833 flock.l_flock.end = OFFSET_MAX;
2834 /* For flocks owner is determined by the local file desctiptor*/
2835 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2836 } else if (file_lock->fl_flags & FL_POSIX) {
2837 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2838 flock.l_flock.start = file_lock->fl_start;
2839 flock.l_flock.end = file_lock->fl_end;
2843 flock.l_flock.pid = file_lock->fl_pid;
2845 /* Somewhat ugly workaround for svc lockd.
2846 * lockd installs custom fl_lmops->lm_compare_owner that checks
2847 * for the fl_owner to be the same (which it always is on local node
2848 * I guess between lockd processes) and then compares pid.
2849 * As such we assign pid to the owner field to make it all work,
2850 * conflict with normal locks is unlikely since pid space and
2851 * pointer space for current->files are not intersecting */
2852 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2853 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2855 switch (file_lock->fl_type) {
2857 einfo.ei_mode = LCK_PR;
2860 /* An unlock request may or may not have any relation to
2861 * existing locks so we may not be able to pass a lock handle
2862 * via a normal ldlm_lock_cancel() request. The request may even
2863 * unlock a byte range in the middle of an existing lock. In
2864 * order to process an unlock request we need all of the same
2865 * information that is given with a normal read or write record
2866 * lock request. To avoid creating another ldlm unlock (cancel)
2867 * message we'll treat a LCK_NL flock request as an unlock. */
2868 einfo.ei_mode = LCK_NL;
2871 einfo.ei_mode = LCK_PW;
2874 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2875 file_lock->fl_type);
2890 flags = LDLM_FL_BLOCK_NOWAIT;
2896 flags = LDLM_FL_TEST_LOCK;
2897 /* Save the old mode so that if the mode in the lock changes we
2898 * can decrement the appropriate reader or writer refcount. */
2899 file_lock->fl_type = einfo.ei_mode;
2902 CERROR("unknown fcntl lock command: %d\n", cmd);
2906 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2907 LUSTRE_OPC_ANY, NULL);
2908 if (IS_ERR(op_data))
2909 RETURN(PTR_ERR(op_data));
2911 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2912 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2913 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2915 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2916 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2918 if ((file_lock->fl_flags & FL_FLOCK) &&
2919 (rc == 0 || file_lock->fl_type == F_UNLCK))
2920 rc2 = flock_lock_file_wait(file, file_lock);
2921 if ((file_lock->fl_flags & FL_POSIX) &&
2922 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2923 !(flags & LDLM_FL_TEST_LOCK))
2924 rc2 = posix_lock_file_wait(file, file_lock);
2926 if (rc2 && file_lock->fl_type != F_UNLCK) {
2927 einfo.ei_mode = LCK_NL;
2928 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2929 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2933 ll_finish_md_op_data(op_data);
2938 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2946 * test if some locks matching bits and l_req_mode are acquired
2947 * - bits can be in different locks
2948 * - if found clear the common lock bits in *bits
2949 * - the bits not found, are kept in *bits
2951 * \param bits [IN] searched lock bits [IN]
2952 * \param l_req_mode [IN] searched lock mode
2953 * \retval boolean, true iff all bits are found
2955 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2957 struct lustre_handle lockh;
2958 ldlm_policy_data_t policy;
2959 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2960 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2969 fid = &ll_i2info(inode)->lli_fid;
2970 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2971 ldlm_lockname[mode]);
2973 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2974 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2975 policy.l_inodebits.bits = *bits & (1 << i);
2976 if (policy.l_inodebits.bits == 0)
2979 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2980 &policy, mode, &lockh)) {
2981 struct ldlm_lock *lock;
2983 lock = ldlm_handle2lock(&lockh);
2986 ~(lock->l_policy_data.l_inodebits.bits);
2987 LDLM_LOCK_PUT(lock);
2989 *bits &= ~policy.l_inodebits.bits;
2996 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2997 struct lustre_handle *lockh, __u64 flags,
3000 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3005 fid = &ll_i2info(inode)->lli_fid;
3006 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3008 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3009 fid, LDLM_IBITS, &policy, mode, lockh);
3014 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3016 /* Already unlinked. Just update nlink and return success */
3017 if (rc == -ENOENT) {
3019 /* This path cannot be hit for regular files unless in
3020 * case of obscure races, so no need to to validate
3022 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3024 } else if (rc != 0) {
3025 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
3026 ll_get_fsname(inode->i_sb, NULL, 0),
3027 PFID(ll_inode2fid(inode)), rc);
3033 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3036 struct inode *inode = dentry->d_inode;
3037 struct ptlrpc_request *req = NULL;
3038 struct obd_export *exp;
3042 LASSERT(inode != NULL);
3044 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
3045 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
3047 exp = ll_i2mdexp(inode);
3049 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3050 * But under CMD case, it caused some lock issues, should be fixed
3051 * with new CMD ibits lock. See bug 12718 */
3052 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3053 struct lookup_intent oit = { .it_op = IT_GETATTR };
3054 struct md_op_data *op_data;
3056 if (ibits == MDS_INODELOCK_LOOKUP)
3057 oit.it_op = IT_LOOKUP;
3059 /* Call getattr by fid, so do not provide name at all. */
3060 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
3061 dentry->d_inode, NULL, 0, 0,
3062 LUSTRE_OPC_ANY, NULL);
3063 if (IS_ERR(op_data))
3064 RETURN(PTR_ERR(op_data));
3066 oit.it_create_mode |= M_CHECK_STALE;
3067 rc = md_intent_lock(exp, op_data, NULL, 0,
3068 /* we are not interested in name
3071 ll_md_blocking_ast, 0);
3072 ll_finish_md_op_data(op_data);
3073 oit.it_create_mode &= ~M_CHECK_STALE;
3075 rc = ll_inode_revalidate_fini(inode, rc);
3079 rc = ll_revalidate_it_finish(req, &oit, dentry);
3081 ll_intent_release(&oit);
3085 /* Unlinked? Unhash dentry, so it is not picked up later by
3086 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3087 here to preserve get_cwd functionality on 2.6.
3089 if (!dentry->d_inode->i_nlink)
3090 d_lustre_invalidate(dentry, 0);
3092 ll_lookup_finish_locks(&oit, dentry);
3093 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3094 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3095 obd_valid valid = OBD_MD_FLGETATTR;
3096 struct md_op_data *op_data;
3099 if (S_ISREG(inode->i_mode)) {
3100 rc = ll_get_max_mdsize(sbi, &ealen);
3103 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3106 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3107 0, ealen, LUSTRE_OPC_ANY,
3109 if (IS_ERR(op_data))
3110 RETURN(PTR_ERR(op_data));
3112 op_data->op_valid = valid;
3113 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3114 * capa for this inode. Because we only keep capas of dirs
3116 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3117 ll_finish_md_op_data(op_data);
3119 rc = ll_inode_revalidate_fini(inode, rc);
3123 rc = ll_prep_inode(&inode, req, NULL, NULL);
3126 ptlrpc_req_finished(req);
3130 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3133 struct inode *inode = dentry->d_inode;
3137 rc = __ll_inode_revalidate_it(dentry, it, ibits);
3141 /* if object isn't regular file, don't validate size */
3142 if (!S_ISREG(inode->i_mode)) {
3143 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3144 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3145 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3147 /* In case of restore, the MDT has the right size and has
3148 * already send it back without granting the layout lock,
3149 * inode is up-to-date so glimpse is useless.
3150 * Also to glimpse we need the layout, in case of a running
3151 * restore the MDT holds the layout lock so the glimpse will
3152 * block up to the end of restore (getattr will block)
3154 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3155 rc = ll_glimpse_size(inode);
3160 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
3161 struct lookup_intent *it, struct kstat *stat)
3163 struct inode *inode = de->d_inode;
3164 struct ll_sb_info *sbi = ll_i2sbi(inode);
3165 struct ll_inode_info *lli = ll_i2info(inode);
3168 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3169 MDS_INODELOCK_LOOKUP);
3170 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3175 stat->dev = inode->i_sb->s_dev;
3176 if (ll_need_32bit_api(sbi))
3177 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3179 stat->ino = inode->i_ino;
3180 stat->mode = inode->i_mode;
3181 stat->nlink = inode->i_nlink;
3182 stat->uid = inode->i_uid;
3183 stat->gid = inode->i_gid;
3184 stat->rdev = inode->i_rdev;
3185 stat->atime = inode->i_atime;
3186 stat->mtime = inode->i_mtime;
3187 stat->ctime = inode->i_ctime;
3188 stat->blksize = 1 << inode->i_blkbits;
3190 stat->size = i_size_read(inode);
3191 stat->blocks = inode->i_blocks;
3195 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3197 struct lookup_intent it = { .it_op = IT_GETATTR };
3199 return ll_getattr_it(mnt, de, &it, stat);
3202 #ifdef HAVE_LINUX_FIEMAP_H
3203 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3204 __u64 start, __u64 len)
3208 struct ll_user_fiemap *fiemap;
3209 unsigned int extent_count = fieinfo->fi_extents_max;
3211 num_bytes = sizeof(*fiemap) + (extent_count *
3212 sizeof(struct ll_fiemap_extent));
3213 OBD_ALLOC_LARGE(fiemap, num_bytes);
3218 fiemap->fm_flags = fieinfo->fi_flags;
3219 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3220 fiemap->fm_start = start;
3221 fiemap->fm_length = len;
3222 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3223 sizeof(struct ll_fiemap_extent));
3225 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3227 fieinfo->fi_flags = fiemap->fm_flags;
3228 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3229 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3230 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3232 OBD_FREE_LARGE(fiemap, num_bytes);
3237 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3239 struct ll_inode_info *lli = ll_i2info(inode);
3240 struct posix_acl *acl = NULL;
3243 spin_lock(&lli->lli_lock);
3244 /* VFS' acl_permission_check->check_acl will release the refcount */
3245 acl = posix_acl_dup(lli->lli_posix_acl);
3246 spin_unlock(&lli->lli_lock);
3251 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3253 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3254 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3256 ll_check_acl(struct inode *inode, int mask)
3259 # ifdef CONFIG_FS_POSIX_ACL
3260 struct posix_acl *acl;
3264 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3265 if (flags & IPERM_FLAG_RCU)
3268 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3273 rc = posix_acl_permission(inode, acl, mask);
3274 posix_acl_release(acl);
3277 # else /* !CONFIG_FS_POSIX_ACL */
3279 # endif /* CONFIG_FS_POSIX_ACL */
3281 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3283 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3284 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3286 # ifdef HAVE_INODE_PERMISION_2ARGS
3287 int ll_inode_permission(struct inode *inode, int mask)
3289 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3296 #ifdef MAY_NOT_BLOCK
3297 if (mask & MAY_NOT_BLOCK)
3299 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3300 if (flags & IPERM_FLAG_RCU)
3304 /* as root inode are NOT getting validated in lookup operation,
3305 * need to do it before permission check. */
3307 if (inode == inode->i_sb->s_root->d_inode) {
3308 struct lookup_intent it = { .it_op = IT_LOOKUP };
3310 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3311 MDS_INODELOCK_LOOKUP);
3316 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3317 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
3319 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3320 return lustre_check_remote_perm(inode, mask);
3322 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3323 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3328 /* -o localflock - only provides locally consistent flock locks */
3329 struct file_operations ll_file_operations = {
3330 .read = ll_file_read,
3331 .aio_read = ll_file_aio_read,
3332 .write = ll_file_write,
3333 .aio_write = ll_file_aio_write,
3334 .unlocked_ioctl = ll_file_ioctl,
3335 .open = ll_file_open,
3336 .release = ll_file_release,
3337 .mmap = ll_file_mmap,
3338 .llseek = ll_file_seek,
3339 .splice_read = ll_file_splice_read,
3344 struct file_operations ll_file_operations_flock = {
3345 .read = ll_file_read,
3346 .aio_read = ll_file_aio_read,
3347 .write = ll_file_write,
3348 .aio_write = ll_file_aio_write,
3349 .unlocked_ioctl = ll_file_ioctl,
3350 .open = ll_file_open,
3351 .release = ll_file_release,
3352 .mmap = ll_file_mmap,
3353 .llseek = ll_file_seek,
3354 .splice_read = ll_file_splice_read,
3357 .flock = ll_file_flock,
3358 .lock = ll_file_flock
3361 /* These are for -o noflock - to return ENOSYS on flock calls */
3362 struct file_operations ll_file_operations_noflock = {
3363 .read = ll_file_read,
3364 .aio_read = ll_file_aio_read,
3365 .write = ll_file_write,
3366 .aio_write = ll_file_aio_write,
3367 .unlocked_ioctl = ll_file_ioctl,
3368 .open = ll_file_open,
3369 .release = ll_file_release,
3370 .mmap = ll_file_mmap,
3371 .llseek = ll_file_seek,
3372 .splice_read = ll_file_splice_read,
3375 .flock = ll_file_noflock,
3376 .lock = ll_file_noflock
3379 struct inode_operations ll_file_inode_operations = {
3380 .setattr = ll_setattr,
3381 .getattr = ll_getattr,
3382 .permission = ll_inode_permission,
3383 .setxattr = ll_setxattr,
3384 .getxattr = ll_getxattr,
3385 .listxattr = ll_listxattr,
3386 .removexattr = ll_removexattr,
3387 #ifdef HAVE_LINUX_FIEMAP_H
3388 .fiemap = ll_fiemap,
3390 #ifdef HAVE_IOP_GET_ACL
3391 .get_acl = ll_get_acl,
3395 /* dynamic ioctl number support routins */
3396 static struct llioc_ctl_data {
3397 struct rw_semaphore ioc_sem;
3398 cfs_list_t ioc_head;
3400 __RWSEM_INITIALIZER(llioc.ioc_sem),
3401 CFS_LIST_HEAD_INIT(llioc.ioc_head)
3406 cfs_list_t iocd_list;
3407 unsigned int iocd_size;
3408 llioc_callback_t iocd_cb;
3409 unsigned int iocd_count;
3410 unsigned int iocd_cmd[0];
3413 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3416 struct llioc_data *in_data = NULL;
3419 if (cb == NULL || cmd == NULL ||
3420 count > LLIOC_MAX_CMD || count < 0)
3423 size = sizeof(*in_data) + count * sizeof(unsigned int);
3424 OBD_ALLOC(in_data, size);
3425 if (in_data == NULL)
3428 memset(in_data, 0, sizeof(*in_data));
3429 in_data->iocd_size = size;
3430 in_data->iocd_cb = cb;
3431 in_data->iocd_count = count;
3432 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3434 down_write(&llioc.ioc_sem);
3435 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3436 up_write(&llioc.ioc_sem);
3441 void ll_iocontrol_unregister(void *magic)
3443 struct llioc_data *tmp;
3448 down_write(&llioc.ioc_sem);
3449 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3451 unsigned int size = tmp->iocd_size;
3453 cfs_list_del(&tmp->iocd_list);
3454 up_write(&llioc.ioc_sem);
3456 OBD_FREE(tmp, size);
3460 up_write(&llioc.ioc_sem);
3462 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3465 EXPORT_SYMBOL(ll_iocontrol_register);
3466 EXPORT_SYMBOL(ll_iocontrol_unregister);
3468 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3469 unsigned int cmd, unsigned long arg, int *rcp)
3471 enum llioc_iter ret = LLIOC_CONT;
3472 struct llioc_data *data;
3473 int rc = -EINVAL, i;
3475 down_read(&llioc.ioc_sem);
3476 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3477 for (i = 0; i < data->iocd_count; i++) {
3478 if (cmd != data->iocd_cmd[i])
3481 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3485 if (ret == LLIOC_STOP)
3488 up_read(&llioc.ioc_sem);
3495 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3497 struct ll_inode_info *lli = ll_i2info(inode);
3498 struct cl_env_nest nest;
3503 if (lli->lli_clob == NULL)
3506 env = cl_env_nested_get(&nest);
3508 RETURN(PTR_ERR(env));
3510 result = cl_conf_set(env, lli->lli_clob, conf);
3511 cl_env_nested_put(&nest, env);
3513 if (conf->coc_opc == OBJECT_CONF_SET) {
3514 struct ldlm_lock *lock = conf->coc_lock;
3516 LASSERT(lock != NULL);
3517 LASSERT(ldlm_has_layout(lock));
3519 /* it can only be allowed to match after layout is
3520 * applied to inode otherwise false layout would be
3521 * seen. Applying layout shoud happen before dropping
3522 * the intent lock. */
3523 ldlm_lock_allow_match(lock);
3529 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3530 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3533 struct ll_sb_info *sbi = ll_i2sbi(inode);
3534 struct obd_capa *oc;
3535 struct ptlrpc_request *req;
3536 struct mdt_body *body;
3543 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3544 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
3545 lock->l_lvb_data, lock->l_lvb_len);
3547 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY))
3550 /* if layout lock was granted right away, the layout is returned
3551 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3552 * blocked and then granted via completion ast, we have to fetch
3553 * layout here. Please note that we can't use the LVB buffer in
3554 * completion AST because it doesn't have a large enough buffer */
3555 oc = ll_mdscapa_get(inode);
3556 rc = ll_get_max_mdsize(sbi, &lmmsize);
3558 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3559 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3565 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3566 if (body == NULL || body->eadatasize > lmmsize)
3567 GOTO(out, rc = -EPROTO);
3569 lmmsize = body->eadatasize;
3570 if (lmmsize == 0) /* empty layout */
3573 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3575 GOTO(out, rc = -EFAULT);
3577 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3578 if (lvbdata == NULL)
3579 GOTO(out, rc = -ENOMEM);
3581 memcpy(lvbdata, lmm, lmmsize);
3582 lock_res_and_lock(lock);
3583 if (lock->l_lvb_data != NULL)
3584 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3586 lock->l_lvb_data = lvbdata;
3587 lock->l_lvb_len = lmmsize;
3588 unlock_res_and_lock(lock);
3593 ptlrpc_req_finished(req);
3598 * Apply the layout to the inode. Layout lock is held and will be released
3601 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3602 struct inode *inode, __u32 *gen, bool reconf)
3604 struct ll_inode_info *lli = ll_i2info(inode);
3605 struct ll_sb_info *sbi = ll_i2sbi(inode);
3606 struct ldlm_lock *lock;
3607 struct lustre_md md = { NULL };
3608 struct cl_object_conf conf;
3611 bool wait_layout = false;
3614 LASSERT(lustre_handle_is_used(lockh));
3616 lock = ldlm_handle2lock(lockh);
3617 LASSERT(lock != NULL);
3618 LASSERT(ldlm_has_layout(lock));
3620 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3621 inode, PFID(&lli->lli_fid), reconf);
3623 /* in case this is a caching lock and reinstate with new inode */
3624 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3626 lock_res_and_lock(lock);
3627 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3628 unlock_res_and_lock(lock);
3629 /* checking lvb_ready is racy but this is okay. The worst case is
3630 * that multi processes may configure the file on the same time. */
3632 if (lvb_ready || !reconf) {
3635 /* layout_gen must be valid if layout lock is not
3636 * cancelled and stripe has already set */
3637 *gen = lli->lli_layout_gen;
3643 rc = ll_layout_fetch(inode, lock);
3647 /* for layout lock, lmm is returned in lock's lvb.
3648 * lvb_data is immutable if the lock is held so it's safe to access it
3649 * without res lock. See the description in ldlm_lock_decref_internal()
3650 * for the condition to free lvb_data of layout lock */
3651 if (lock->l_lvb_data != NULL) {
3652 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3653 lock->l_lvb_data, lock->l_lvb_len);
3655 *gen = LL_LAYOUT_GEN_EMPTY;
3657 *gen = md.lsm->lsm_layout_gen;
3660 CERROR("%s: file "DFID" unpackmd error: %d\n",
3661 ll_get_fsname(inode->i_sb, NULL, 0),
3662 PFID(&lli->lli_fid), rc);
3668 /* set layout to file. Unlikely this will fail as old layout was
3669 * surely eliminated */
3670 memset(&conf, 0, sizeof conf);
3671 conf.coc_opc = OBJECT_CONF_SET;
3672 conf.coc_inode = inode;
3673 conf.coc_lock = lock;
3674 conf.u.coc_md = &md;
3675 rc = ll_layout_conf(inode, &conf);
3678 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3680 /* refresh layout failed, need to wait */
3681 wait_layout = rc == -EBUSY;
3685 LDLM_LOCK_PUT(lock);
3686 ldlm_lock_decref(lockh, mode);
3688 /* wait for IO to complete if it's still being used. */
3690 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3691 ll_get_fsname(inode->i_sb, NULL, 0),
3692 inode, PFID(&lli->lli_fid));
3694 memset(&conf, 0, sizeof conf);
3695 conf.coc_opc = OBJECT_CONF_WAIT;
3696 conf.coc_inode = inode;
3697 rc = ll_layout_conf(inode, &conf);
3701 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3702 PFID(&lli->lli_fid), rc);
3708 * This function checks if there exists a LAYOUT lock on the client side,
3709 * or enqueues it if it doesn't have one in cache.
3711 * This function will not hold layout lock so it may be revoked any time after
3712 * this function returns. Any operations depend on layout should be redone
3715 * This function should be called before lov_io_init() to get an uptodate
3716 * layout version, the caller should save the version number and after IO
3717 * is finished, this function should be called again to verify that layout
3718 * is not changed during IO time.
3720 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3722 struct ll_inode_info *lli = ll_i2info(inode);
3723 struct ll_sb_info *sbi = ll_i2sbi(inode);
3724 struct md_op_data *op_data;
3725 struct lookup_intent it;
3726 struct lustre_handle lockh;
3728 struct ldlm_enqueue_info einfo = {
3729 .ei_type = LDLM_IBITS,
3731 .ei_cb_bl = ll_md_blocking_ast,
3732 .ei_cb_cp = ldlm_completion_ast,
3737 *gen = lli->lli_layout_gen;
3738 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3742 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3743 LASSERT(S_ISREG(inode->i_mode));
3745 /* mostly layout lock is caching on the local side, so try to match
3746 * it before grabbing layout lock mutex. */
3747 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3748 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3749 if (mode != 0) { /* hit cached lock */
3750 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3754 /* better hold lli_layout_mutex to try again otherwise
3755 * it will have starvation problem. */
3758 /* take layout lock mutex to enqueue layout lock exclusively. */
3759 mutex_lock(&lli->lli_layout_mutex);
3762 /* try again. Maybe somebody else has done this. */
3763 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3764 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3765 if (mode != 0) { /* hit cached lock */
3766 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3770 mutex_unlock(&lli->lli_layout_mutex);
3774 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3775 0, 0, LUSTRE_OPC_ANY, NULL);
3776 if (IS_ERR(op_data)) {
3777 mutex_unlock(&lli->lli_layout_mutex);
3778 RETURN(PTR_ERR(op_data));
3781 /* have to enqueue one */
3782 memset(&it, 0, sizeof(it));
3783 it.it_op = IT_LAYOUT;
3784 lockh.cookie = 0ULL;
3786 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3787 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3788 PFID(&lli->lli_fid));
3790 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3792 if (it.d.lustre.it_data != NULL)
3793 ptlrpc_req_finished(it.d.lustre.it_data);
3794 it.d.lustre.it_data = NULL;
3796 ll_finish_md_op_data(op_data);
3798 mode = it.d.lustre.it_lock_mode;
3799 it.d.lustre.it_lock_mode = 0;
3800 ll_intent_drop_lock(&it);
3803 /* set lock data in case this is a new lock */
3804 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3805 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3809 mutex_unlock(&lli->lli_layout_mutex);
3815 * This function send a restore request to the MDT
3817 int ll_layout_restore(struct inode *inode)
3819 struct hsm_user_request *hur;
3823 len = sizeof(struct hsm_user_request) +
3824 sizeof(struct hsm_user_item);
3825 OBD_ALLOC(hur, len);
3829 hur->hur_request.hr_action = HUA_RESTORE;
3830 hur->hur_request.hr_archive_id = 0;
3831 hur->hur_request.hr_flags = 0;
3832 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3833 sizeof(hur->hur_user_item[0].hui_fid));
3834 hur->hur_user_item[0].hui_extent.length = -1;
3835 hur->hur_request.hr_itemcount = 1;
3836 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,