4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
61 fd->fd_write_failed = false;
66 static void ll_file_data_put(struct ll_file_data *fd)
69 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
72 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
73 struct lustre_handle *fh)
75 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
76 op_data->op_attr.ia_mode = inode->i_mode;
77 op_data->op_attr.ia_atime = inode->i_atime;
78 op_data->op_attr.ia_mtime = inode->i_mtime;
79 op_data->op_attr.ia_ctime = inode->i_ctime;
80 op_data->op_attr.ia_size = i_size_read(inode);
81 op_data->op_attr_blocks = inode->i_blocks;
82 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
83 ll_inode_to_ext_flags(inode->i_flags);
84 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
86 op_data->op_handle = *fh;
87 op_data->op_capa1 = ll_mdscapa_get(inode);
89 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
90 op_data->op_bias |= MDS_DATA_MODIFIED;
94 * Closes the IO epoch and packs all the attributes into @op_data for
97 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
98 struct obd_client_handle *och)
102 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
103 ATTR_MTIME | ATTR_MTIME_SET |
104 ATTR_CTIME | ATTR_CTIME_SET;
106 if (!(och->och_flags & FMODE_WRITE))
109 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
110 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
112 ll_ioepoch_close(inode, op_data, &och, 0);
115 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
116 ll_prep_md_op_data(op_data, inode, NULL, NULL,
117 0, 0, LUSTRE_OPC_ANY, NULL);
121 static int ll_close_inode_openhandle(struct obd_export *md_exp,
123 struct obd_client_handle *och,
124 const __u64 *data_version)
126 struct obd_export *exp = ll_i2mdexp(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
129 struct obd_device *obd = class_exp2obd(exp);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle "LPX64"\n",
140 ll_i2mdexp(inode)->exp_handle.h_cookie);
144 OBD_ALLOC_PTR(op_data);
146 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
148 ll_prepare_close(inode, op_data, och);
149 if (data_version != NULL) {
150 /* Pass in data_version implies release. */
151 op_data->op_bias |= MDS_HSM_RELEASE;
152 op_data->op_data_version = *data_version;
153 op_data->op_lease_handle = och->och_lease_handle;
154 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
156 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
157 rc = md_close(md_exp, op_data, och->och_mod, &req);
159 /* This close must have the epoch closed. */
160 LASSERT(epoch_close);
161 /* MDS has instructed us to obtain Size-on-MDS attribute from
162 * OSTs and send setattr to back to MDS. */
163 rc = ll_som_update(inode, op_data);
165 CERROR("inode %lu mdc Size-on-MDS update failed: "
166 "rc = %d\n", inode->i_ino, rc);
170 CERROR("inode %lu mdc close failed: rc = %d\n",
174 /* DATA_MODIFIED flag was successfully sent on close, cancel data
175 * modification flag. */
176 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
177 struct ll_inode_info *lli = ll_i2info(inode);
179 spin_lock(&lli->lli_lock);
180 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
181 spin_unlock(&lli->lli_lock);
185 rc = ll_objects_destroy(req, inode);
187 CERROR("inode %lu ll_objects destroy: rc = %d\n",
191 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
192 struct mdt_body *body;
193 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
194 if (!(body->valid & OBD_MD_FLRELEASED))
198 ll_finish_md_op_data(op_data);
202 if (exp_connect_som(exp) && !epoch_close &&
203 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
204 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
206 md_clear_open_replay_data(md_exp, och);
207 /* Free @och if it is not waiting for DONE_WRITING. */
208 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
211 if (req) /* This is close request */
212 ptlrpc_req_finished(req);
216 int ll_md_real_close(struct inode *inode, int flags)
218 struct ll_inode_info *lli = ll_i2info(inode);
219 struct obd_client_handle **och_p;
220 struct obd_client_handle *och;
225 if (flags & FMODE_WRITE) {
226 och_p = &lli->lli_mds_write_och;
227 och_usecount = &lli->lli_open_fd_write_count;
228 } else if (flags & FMODE_EXEC) {
229 och_p = &lli->lli_mds_exec_och;
230 och_usecount = &lli->lli_open_fd_exec_count;
232 LASSERT(flags & FMODE_READ);
233 och_p = &lli->lli_mds_read_och;
234 och_usecount = &lli->lli_open_fd_read_count;
237 mutex_lock(&lli->lli_och_mutex);
238 if (*och_usecount) { /* There are still users of this handle, so
240 mutex_unlock(&lli->lli_och_mutex);
245 mutex_unlock(&lli->lli_och_mutex);
247 if (och) { /* There might be a race and somebody have freed this och
249 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
256 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
259 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
260 struct ll_inode_info *lli = ll_i2info(inode);
264 /* clear group lock, if present */
265 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
266 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
268 if (fd->fd_lease_och != NULL) {
271 /* Usually the lease is not released when the
272 * application crashed, we need to release here. */
273 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
274 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
275 PFID(&lli->lli_fid), rc, lease_broken);
277 fd->fd_lease_och = NULL;
280 if (fd->fd_och != NULL) {
281 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
286 /* Let's see if we have good enough OPEN lock on the file and if
287 we can skip talking to MDS */
288 if (file->f_dentry->d_inode) { /* Can this ever be false? */
290 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
291 struct lustre_handle lockh;
292 struct inode *inode = file->f_dentry->d_inode;
293 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
295 mutex_lock(&lli->lli_och_mutex);
296 if (fd->fd_omode & FMODE_WRITE) {
298 LASSERT(lli->lli_open_fd_write_count);
299 lli->lli_open_fd_write_count--;
300 } else if (fd->fd_omode & FMODE_EXEC) {
302 LASSERT(lli->lli_open_fd_exec_count);
303 lli->lli_open_fd_exec_count--;
306 LASSERT(lli->lli_open_fd_read_count);
307 lli->lli_open_fd_read_count--;
309 mutex_unlock(&lli->lli_och_mutex);
311 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
312 LDLM_IBITS, &policy, lockmode,
314 rc = ll_md_real_close(file->f_dentry->d_inode,
318 CERROR("Releasing a file %p with negative dentry %p. Name %s",
319 file, file->f_dentry, file->f_dentry->d_name.name);
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
325 ll_capa_close(inode);
330 /* While this returns an error code, fput() the caller does not, so we need
331 * to make every effort to clean up all of our state here. Also, applications
332 * rarely check close errors and even if an error is returned they will not
333 * re-try the close call.
335 int ll_file_release(struct inode *inode, struct file *file)
337 struct ll_file_data *fd;
338 struct ll_sb_info *sbi = ll_i2sbi(inode);
339 struct ll_inode_info *lli = ll_i2info(inode);
343 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
344 inode->i_generation, inode);
346 #ifdef CONFIG_FS_POSIX_ACL
347 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
348 inode == inode->i_sb->s_root->d_inode) {
349 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
352 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
353 fd->fd_flags &= ~LL_FILE_RMTACL;
354 rct_del(&sbi->ll_rct, cfs_curproc_pid());
355 et_search_free(&sbi->ll_et, cfs_curproc_pid());
360 if (inode->i_sb->s_root != file->f_dentry)
361 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
362 fd = LUSTRE_FPRIVATE(file);
365 /* The last ref on @file, maybe not the the owner pid of statahead.
366 * Different processes can open the same dir, "ll_opendir_key" means:
367 * it is me that should stop the statahead thread. */
368 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
369 lli->lli_opendir_pid != 0)
370 ll_stop_statahead(inode, lli->lli_opendir_key);
372 if (inode->i_sb->s_root == file->f_dentry) {
373 LUSTRE_FPRIVATE(file) = NULL;
374 ll_file_data_put(fd);
378 if (!S_ISDIR(inode->i_mode)) {
379 lov_read_and_clear_async_rc(lli->lli_clob);
380 lli->lli_async_rc = 0;
383 rc = ll_md_close(sbi->ll_md_exp, inode, file);
385 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
386 libcfs_debug_dumplog();
391 static int ll_intent_file_open(struct file *file, void *lmm,
392 int lmmsize, struct lookup_intent *itp)
394 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
395 struct dentry *parent = file->f_dentry->d_parent;
396 struct md_op_data *op_data;
397 struct ptlrpc_request *req;
398 __u32 opc = LUSTRE_OPC_ANY;
405 /* Usually we come here only for NFSD, and we want open lock.
406 But we can also get here with pre 2.6.15 patchless kernels, and in
407 that case that lock is also ok */
408 /* We can also get here if there was cached open handle in revalidate_it
409 * but it disappeared while we were getting from there to ll_file_open.
410 * But this means this file was closed and immediatelly opened which
411 * makes a good candidate for using OPEN lock */
412 /* If lmmsize & lmm are not 0, we are just setting stripe info
413 * parameters. No need for the open lock */
414 if (lmm == NULL && lmmsize == 0) {
415 itp->it_flags |= MDS_OPEN_LOCK;
416 if (itp->it_flags & FMODE_WRITE)
417 opc = LUSTRE_OPC_CREATE;
420 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
421 file->f_dentry->d_inode, NULL, 0,
425 RETURN(PTR_ERR(op_data));
427 itp->it_flags |= MDS_OPEN_BY_FID;
428 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
429 0 /*unused */, &req, ll_md_blocking_ast, 0);
430 ll_finish_md_op_data(op_data);
432 /* reason for keep own exit path - don`t flood log
433 * with messages with -ESTALE errors.
435 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
436 it_open_error(DISP_OPEN_OPEN, itp))
438 ll_release_openhandle(file->f_dentry, itp);
442 if (it_disposition(itp, DISP_LOOKUP_NEG))
443 GOTO(out, rc = -ENOENT);
445 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
446 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
447 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
451 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
452 if (!rc && itp->d.lustre.it_lock_mode)
453 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
457 ptlrpc_req_finished(itp->d.lustre.it_data);
458 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
459 ll_intent_drop_lock(itp);
465 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
466 * not believe attributes if a few ioepoch holders exist. Attributes for
467 * previous ioepoch if new one is opened are also skipped by MDS.
469 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
471 if (ioepoch && lli->lli_ioepoch != ioepoch) {
472 lli->lli_ioepoch = ioepoch;
473 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
474 ioepoch, PFID(&lli->lli_fid));
478 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
479 struct obd_client_handle *och)
481 struct ptlrpc_request *req = it->d.lustre.it_data;
482 struct mdt_body *body;
484 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
485 och->och_fh = body->handle;
486 och->och_fid = body->fid1;
487 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
488 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
489 och->och_flags = it->it_flags;
491 return md_set_open_replay_data(md_exp, och, req);
494 int ll_local_open(struct file *file, struct lookup_intent *it,
495 struct ll_file_data *fd, struct obd_client_handle *och)
497 struct inode *inode = file->f_dentry->d_inode;
498 struct ll_inode_info *lli = ll_i2info(inode);
501 LASSERT(!LUSTRE_FPRIVATE(file));
506 struct ptlrpc_request *req = it->d.lustre.it_data;
507 struct mdt_body *body;
510 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
514 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
515 ll_ioepoch_open(lli, body->ioepoch);
518 LUSTRE_FPRIVATE(file) = fd;
519 ll_readahead_init(inode, &fd->fd_ras);
520 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
525 /* Open a file, and (for the very first open) create objects on the OSTs at
526 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
527 * creation or open until ll_lov_setstripe() ioctl is called.
529 * If we already have the stripe MD locally then we don't request it in
530 * md_open(), by passing a lmm_size = 0.
532 * It is up to the application to ensure no other processes open this file
533 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
534 * used. We might be able to avoid races of that sort by getting lli_open_sem
535 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
536 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
538 int ll_file_open(struct inode *inode, struct file *file)
540 struct ll_inode_info *lli = ll_i2info(inode);
541 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
542 .it_flags = file->f_flags };
543 struct obd_client_handle **och_p = NULL;
544 __u64 *och_usecount = NULL;
545 struct ll_file_data *fd;
546 int rc = 0, opendir_set = 0;
549 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
550 inode->i_generation, inode, file->f_flags);
552 it = file->private_data; /* XXX: compat macro */
553 file->private_data = NULL; /* prevent ll_local_open assertion */
555 fd = ll_file_data_get();
557 GOTO(out_openerr, rc = -ENOMEM);
560 if (S_ISDIR(inode->i_mode)) {
561 spin_lock(&lli->lli_sa_lock);
562 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
563 lli->lli_opendir_pid == 0) {
564 lli->lli_opendir_key = fd;
565 lli->lli_opendir_pid = cfs_curproc_pid();
568 spin_unlock(&lli->lli_sa_lock);
571 if (inode->i_sb->s_root == file->f_dentry) {
572 LUSTRE_FPRIVATE(file) = fd;
576 if (!it || !it->d.lustre.it_disposition) {
577 /* Convert f_flags into access mode. We cannot use file->f_mode,
578 * because everything but O_ACCMODE mask was stripped from
580 if ((oit.it_flags + 1) & O_ACCMODE)
582 if (file->f_flags & O_TRUNC)
583 oit.it_flags |= FMODE_WRITE;
585 /* kernel only call f_op->open in dentry_open. filp_open calls
586 * dentry_open after call to open_namei that checks permissions.
587 * Only nfsd_open call dentry_open directly without checking
588 * permissions and because of that this code below is safe. */
589 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
590 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
592 /* We do not want O_EXCL here, presumably we opened the file
593 * already? XXX - NFS implications? */
594 oit.it_flags &= ~O_EXCL;
596 /* bug20584, if "it_flags" contains O_CREAT, the file will be
597 * created if necessary, then "IT_CREAT" should be set to keep
598 * consistent with it */
599 if (oit.it_flags & O_CREAT)
600 oit.it_op |= IT_CREAT;
606 /* Let's see if we have file open on MDS already. */
607 if (it->it_flags & FMODE_WRITE) {
608 och_p = &lli->lli_mds_write_och;
609 och_usecount = &lli->lli_open_fd_write_count;
610 } else if (it->it_flags & FMODE_EXEC) {
611 och_p = &lli->lli_mds_exec_och;
612 och_usecount = &lli->lli_open_fd_exec_count;
614 och_p = &lli->lli_mds_read_och;
615 och_usecount = &lli->lli_open_fd_read_count;
618 mutex_lock(&lli->lli_och_mutex);
619 if (*och_p) { /* Open handle is present */
620 if (it_disposition(it, DISP_OPEN_OPEN)) {
621 /* Well, there's extra open request that we do not need,
622 let's close it somehow. This will decref request. */
623 rc = it_open_error(DISP_OPEN_OPEN, it);
625 mutex_unlock(&lli->lli_och_mutex);
626 GOTO(out_openerr, rc);
629 ll_release_openhandle(file->f_dentry, it);
633 rc = ll_local_open(file, it, fd, NULL);
636 mutex_unlock(&lli->lli_och_mutex);
637 GOTO(out_openerr, rc);
640 LASSERT(*och_usecount == 0);
641 if (!it->d.lustre.it_disposition) {
642 /* We cannot just request lock handle now, new ELC code
643 means that one of other OPEN locks for this file
644 could be cancelled, and since blocking ast handler
645 would attempt to grab och_mutex as well, that would
646 result in a deadlock */
647 mutex_unlock(&lli->lli_och_mutex);
648 it->it_create_mode |= M_CHECK_STALE;
649 rc = ll_intent_file_open(file, NULL, 0, it);
650 it->it_create_mode &= ~M_CHECK_STALE;
652 GOTO(out_openerr, rc);
656 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
658 GOTO(out_och_free, rc = -ENOMEM);
662 /* md_intent_lock() didn't get a request ref if there was an
663 * open error, so don't do cleanup on the request here
665 /* XXX (green): Should not we bail out on any error here, not
666 * just open error? */
667 rc = it_open_error(DISP_OPEN_OPEN, it);
669 GOTO(out_och_free, rc);
671 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
673 rc = ll_local_open(file, it, fd, *och_p);
675 GOTO(out_och_free, rc);
677 mutex_unlock(&lli->lli_och_mutex);
680 /* Must do this outside lli_och_mutex lock to prevent deadlock where
681 different kind of OPEN lock for this same inode gets cancelled
682 by ldlm_cancel_lru */
683 if (!S_ISREG(inode->i_mode))
684 GOTO(out_och_free, rc);
688 if (!lli->lli_has_smd) {
689 if (file->f_flags & O_LOV_DELAY_CREATE ||
690 !(file->f_mode & FMODE_WRITE)) {
691 CDEBUG(D_INODE, "object creation was delayed\n");
692 GOTO(out_och_free, rc);
695 file->f_flags &= ~O_LOV_DELAY_CREATE;
696 GOTO(out_och_free, rc);
700 if (och_p && *och_p) {
701 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
702 *och_p = NULL; /* OBD_FREE writes some magic there */
705 mutex_unlock(&lli->lli_och_mutex);
708 if (opendir_set != 0)
709 ll_stop_statahead(inode, lli->lli_opendir_key);
711 ll_file_data_put(fd);
713 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
716 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
717 ptlrpc_req_finished(it->d.lustre.it_data);
718 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
724 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
725 struct ldlm_lock_desc *desc, void *data, int flag)
728 struct lustre_handle lockh;
732 case LDLM_CB_BLOCKING:
733 ldlm_lock2handle(lock, &lockh);
734 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
736 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
740 case LDLM_CB_CANCELING:
748 * Acquire a lease and open the file.
750 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
751 fmode_t fmode, __u64 open_flags)
753 struct lookup_intent it = { .it_op = IT_OPEN };
754 struct ll_sb_info *sbi = ll_i2sbi(inode);
755 struct md_op_data *op_data;
756 struct ptlrpc_request *req;
757 struct lustre_handle old_handle = { 0 };
758 struct obd_client_handle *och = NULL;
763 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
764 RETURN(ERR_PTR(-EINVAL));
767 struct ll_inode_info *lli = ll_i2info(inode);
768 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
769 struct obd_client_handle **och_p;
772 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
773 RETURN(ERR_PTR(-EPERM));
775 /* Get the openhandle of the file */
777 mutex_lock(&lli->lli_och_mutex);
778 if (fd->fd_lease_och != NULL) {
779 mutex_unlock(&lli->lli_och_mutex);
783 if (fd->fd_och == NULL) {
784 if (file->f_mode & FMODE_WRITE) {
785 LASSERT(lli->lli_mds_write_och != NULL);
786 och_p = &lli->lli_mds_write_och;
787 och_usecount = &lli->lli_open_fd_write_count;
789 LASSERT(lli->lli_mds_read_och != NULL);
790 och_p = &lli->lli_mds_read_och;
791 och_usecount = &lli->lli_open_fd_read_count;
793 if (*och_usecount == 1) {
800 mutex_unlock(&lli->lli_och_mutex);
801 if (rc < 0) /* more than 1 opener */
804 LASSERT(fd->fd_och != NULL);
805 old_handle = fd->fd_och->och_fh;
810 RETURN(ERR_PTR(-ENOMEM));
812 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
813 LUSTRE_OPC_ANY, NULL);
815 GOTO(out, rc = PTR_ERR(op_data));
817 /* To tell the MDT this openhandle is from the same owner */
818 op_data->op_handle = old_handle;
820 it.it_flags = fmode | open_flags;
821 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
822 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
823 ll_md_blocking_lease_ast,
824 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
825 * it can be cancelled which may mislead applications that the lease is
827 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
828 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
829 * doesn't deal with openhandle, so normal openhandle will be leaked. */
830 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
831 ll_finish_md_op_data(op_data);
833 ptlrpc_req_finished(req);
834 it_clear_disposition(&it, DISP_ENQ_COMPLETE);
837 GOTO(out_release_it, rc);
839 if (it_disposition(&it, DISP_LOOKUP_NEG))
840 GOTO(out_release_it, rc = -ENOENT);
842 rc = it_open_error(DISP_OPEN_OPEN, &it);
844 GOTO(out_release_it, rc);
846 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
847 ll_och_fill(sbi->ll_md_exp, &it, och);
849 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
850 GOTO(out_close, rc = -EOPNOTSUPP);
852 /* already get lease, handle lease lock */
853 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
854 if (it.d.lustre.it_lock_mode == 0 ||
855 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
856 /* open lock must return for lease */
857 CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
858 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
859 it.d.lustre.it_lock_bits);
860 GOTO(out_close, rc = -EPROTO);
863 ll_intent_release(&it);
867 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
869 CERROR("Close openhandle returned %d\n", rc2);
871 /* cancel open lock */
872 if (it.d.lustre.it_lock_mode != 0) {
873 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
874 it.d.lustre.it_lock_mode);
875 it.d.lustre.it_lock_mode = 0;
878 ll_intent_release(&it);
883 EXPORT_SYMBOL(ll_lease_open);
886 * Release lease and close the file.
887 * It will check if the lease has ever broken.
889 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
892 struct ldlm_lock *lock;
893 bool cancelled = true;
897 lock = ldlm_handle2lock(&och->och_lease_handle);
899 lock_res_and_lock(lock);
900 cancelled = ldlm_is_cancel(lock);
901 unlock_res_and_lock(lock);
905 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
906 PFID(&ll_i2info(inode)->lli_fid), cancelled);
909 ldlm_cli_cancel(&och->och_lease_handle, 0);
910 if (lease_broken != NULL)
911 *lease_broken = cancelled;
913 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
917 EXPORT_SYMBOL(ll_lease_close);
919 /* Fills the obdo with the attributes for the lsm */
920 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
921 struct obd_capa *capa, struct obdo *obdo,
922 __u64 ioepoch, int sync)
924 struct ptlrpc_request_set *set;
925 struct obd_info oinfo = { { { 0 } } };
930 LASSERT(lsm != NULL);
934 oinfo.oi_oa->o_oi = lsm->lsm_oi;
935 oinfo.oi_oa->o_mode = S_IFREG;
936 oinfo.oi_oa->o_ioepoch = ioepoch;
937 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
938 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
939 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
940 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
941 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
942 OBD_MD_FLDATAVERSION;
943 oinfo.oi_capa = capa;
945 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
946 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
949 set = ptlrpc_prep_set();
951 CERROR("can't allocate ptlrpc set\n");
954 rc = obd_getattr_async(exp, &oinfo, set);
956 rc = ptlrpc_set_wait(set);
957 ptlrpc_set_destroy(set);
960 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
961 OBD_MD_FLATIME | OBD_MD_FLMTIME |
962 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
963 OBD_MD_FLDATAVERSION);
968 * Performs the getattr on the inode and updates its fields.
969 * If @sync != 0, perform the getattr under the server-side lock.
971 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
972 __u64 ioepoch, int sync)
974 struct obd_capa *capa = ll_mdscapa_get(inode);
975 struct lov_stripe_md *lsm;
979 lsm = ccc_inode_lsm_get(inode);
980 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
981 capa, obdo, ioepoch, sync);
984 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
986 obdo_refresh_inode(inode, obdo, obdo->o_valid);
987 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
988 " blksize %lu\n", POSTID(oi), i_size_read(inode),
989 (unsigned long long)inode->i_blocks,
990 (unsigned long)ll_inode_blksize(inode));
992 ccc_inode_lsm_put(inode, lsm);
996 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
998 struct ll_inode_info *lli = ll_i2info(inode);
999 struct cl_object *obj = lli->lli_clob;
1000 struct cl_attr *attr = ccc_env_thread_attr(env);
1006 ll_inode_size_lock(inode);
1007 /* merge timestamps the most recently obtained from mds with
1008 timestamps obtained from osts */
1009 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1010 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1011 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1012 inode_init_lvb(inode, &lvb);
1014 cl_object_attr_lock(obj);
1015 rc = cl_object_attr_get(env, obj, attr);
1016 cl_object_attr_unlock(obj);
1019 if (lvb.lvb_atime < attr->cat_atime)
1020 lvb.lvb_atime = attr->cat_atime;
1021 if (lvb.lvb_ctime < attr->cat_ctime)
1022 lvb.lvb_ctime = attr->cat_ctime;
1023 if (lvb.lvb_mtime < attr->cat_mtime)
1024 lvb.lvb_mtime = attr->cat_mtime;
1026 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1027 PFID(&lli->lli_fid), attr->cat_size);
1028 cl_isize_write_nolock(inode, attr->cat_size);
1030 inode->i_blocks = attr->cat_blocks;
1032 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1033 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1034 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1036 ll_inode_size_unlock(inode);
1041 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1044 struct obdo obdo = { 0 };
1047 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1049 st->st_size = obdo.o_size;
1050 st->st_blocks = obdo.o_blocks;
1051 st->st_mtime = obdo.o_mtime;
1052 st->st_atime = obdo.o_atime;
1053 st->st_ctime = obdo.o_ctime;
1058 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1060 struct inode *inode = file->f_dentry->d_inode;
1062 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1064 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1065 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1066 file->f_flags & O_DIRECT ||
1069 io->ci_obj = ll_i2info(inode)->lli_clob;
1070 io->ci_lockreq = CILR_MAYBE;
1071 if (ll_file_nolock(file)) {
1072 io->ci_lockreq = CILR_NEVER;
1073 io->ci_no_srvlock = 1;
1074 } else if (file->f_flags & O_APPEND) {
1075 io->ci_lockreq = CILR_MANDATORY;
1080 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1081 struct file *file, enum cl_io_type iot,
1082 loff_t *ppos, size_t count)
1084 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1085 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1091 io = ccc_env_thread_io(env);
1092 ll_io_init(io, file, iot == CIT_WRITE);
1094 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1095 struct vvp_io *vio = vvp_env_io(env);
1096 struct ccc_io *cio = ccc_env_io(env);
1097 int write_mutex_locked = 0;
1099 cio->cui_fd = LUSTRE_FPRIVATE(file);
1100 vio->cui_io_subtype = args->via_io_subtype;
1102 switch (vio->cui_io_subtype) {
1104 cio->cui_iov = args->u.normal.via_iov;
1105 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1106 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1107 cio->cui_iocb = args->u.normal.via_iocb;
1108 if ((iot == CIT_WRITE) &&
1109 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1110 if (mutex_lock_interruptible(&lli->
1112 GOTO(out, result = -ERESTARTSYS);
1113 write_mutex_locked = 1;
1114 } else if (iot == CIT_READ) {
1115 down_read(&lli->lli_trunc_sem);
1119 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1120 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1123 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1124 vio->u.splice.cui_flags = args->u.splice.via_flags;
1127 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1130 result = cl_io_loop(env, io);
1131 if (write_mutex_locked)
1132 mutex_unlock(&lli->lli_write_mutex);
1133 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1134 up_read(&lli->lli_trunc_sem);
1136 /* cl_io_rw_init() handled IO */
1137 result = io->ci_result;
1140 if (io->ci_nob > 0) {
1141 result = io->ci_nob;
1142 *ppos = io->u.ci_wr.wr.crw_pos;
1146 cl_io_fini(env, io);
1147 /* If any bit been read/written (result != 0), we just return
1148 * short read/write instead of restart io. */
1149 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1150 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1151 iot == CIT_READ ? "read" : "write",
1152 file->f_dentry->d_name.name, *ppos, count);
1153 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1157 if (iot == CIT_READ) {
1159 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1160 LPROC_LL_READ_BYTES, result);
1161 } else if (iot == CIT_WRITE) {
1163 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1164 LPROC_LL_WRITE_BYTES, result);
1165 fd->fd_write_failed = false;
1166 } else if (result != -ERESTARTSYS) {
1167 fd->fd_write_failed = true;
1176 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1178 static int ll_file_get_iov_count(const struct iovec *iov,
1179 unsigned long *nr_segs, size_t *count)
1184 for (seg = 0; seg < *nr_segs; seg++) {
1185 const struct iovec *iv = &iov[seg];
1188 * If any segment has a negative length, or the cumulative
1189 * length ever wraps negative then return -EINVAL.
1192 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1194 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1199 cnt -= iv->iov_len; /* This segment is no good */
1206 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1207 unsigned long nr_segs, loff_t pos)
1210 struct vvp_io_args *args;
1216 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1220 env = cl_env_get(&refcheck);
1222 RETURN(PTR_ERR(env));
1224 args = vvp_env_args(env, IO_NORMAL);
1225 args->u.normal.via_iov = (struct iovec *)iov;
1226 args->u.normal.via_nrsegs = nr_segs;
1227 args->u.normal.via_iocb = iocb;
1229 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1230 &iocb->ki_pos, count);
1231 cl_env_put(env, &refcheck);
1235 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1239 struct iovec *local_iov;
1240 struct kiocb *kiocb;
1245 env = cl_env_get(&refcheck);
1247 RETURN(PTR_ERR(env));
1249 local_iov = &vvp_env_info(env)->vti_local_iov;
1250 kiocb = &vvp_env_info(env)->vti_kiocb;
1251 local_iov->iov_base = (void __user *)buf;
1252 local_iov->iov_len = count;
1253 init_sync_kiocb(kiocb, file);
1254 kiocb->ki_pos = *ppos;
1255 kiocb->ki_left = count;
1257 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1258 *ppos = kiocb->ki_pos;
1260 cl_env_put(env, &refcheck);
1265 * Write to a file (through the page cache).
1268 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1269 unsigned long nr_segs, loff_t pos)
1272 struct vvp_io_args *args;
1278 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1282 env = cl_env_get(&refcheck);
1284 RETURN(PTR_ERR(env));
1286 args = vvp_env_args(env, IO_NORMAL);
1287 args->u.normal.via_iov = (struct iovec *)iov;
1288 args->u.normal.via_nrsegs = nr_segs;
1289 args->u.normal.via_iocb = iocb;
1291 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1292 &iocb->ki_pos, count);
1293 cl_env_put(env, &refcheck);
1297 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1301 struct iovec *local_iov;
1302 struct kiocb *kiocb;
1307 env = cl_env_get(&refcheck);
1309 RETURN(PTR_ERR(env));
1311 local_iov = &vvp_env_info(env)->vti_local_iov;
1312 kiocb = &vvp_env_info(env)->vti_kiocb;
1313 local_iov->iov_base = (void __user *)buf;
1314 local_iov->iov_len = count;
1315 init_sync_kiocb(kiocb, file);
1316 kiocb->ki_pos = *ppos;
1317 kiocb->ki_left = count;
1319 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1320 *ppos = kiocb->ki_pos;
1322 cl_env_put(env, &refcheck);
1327 * Send file content (through pagecache) somewhere with helper
1329 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1330 struct pipe_inode_info *pipe, size_t count,
1334 struct vvp_io_args *args;
1339 env = cl_env_get(&refcheck);
1341 RETURN(PTR_ERR(env));
1343 args = vvp_env_args(env, IO_SPLICE);
1344 args->u.splice.via_pipe = pipe;
1345 args->u.splice.via_flags = flags;
1347 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1348 cl_env_put(env, &refcheck);
1352 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1355 struct obd_export *exp = ll_i2dtexp(inode);
1356 struct obd_trans_info oti = { 0 };
1357 struct obdo *oa = NULL;
1360 struct lov_stripe_md *lsm = NULL, *lsm2;
1367 lsm = ccc_inode_lsm_get(inode);
1368 if (!lsm_has_objects(lsm))
1369 GOTO(out, rc = -ENOENT);
1371 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1372 (lsm->lsm_stripe_count));
1374 OBD_ALLOC_LARGE(lsm2, lsm_size);
1376 GOTO(out, rc = -ENOMEM);
1379 oa->o_nlink = ost_idx;
1380 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1381 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1382 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1383 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1384 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1385 memcpy(lsm2, lsm, lsm_size);
1386 ll_inode_size_lock(inode);
1387 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1388 ll_inode_size_unlock(inode);
1390 OBD_FREE_LARGE(lsm2, lsm_size);
1393 ccc_inode_lsm_put(inode, lsm);
1398 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1400 struct ll_recreate_obj ucreat;
1404 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1407 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1411 ostid_set_seq_mdt0(&oi);
1412 ostid_set_id(&oi, ucreat.lrc_id);
1413 RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
1416 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1423 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1426 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1429 fid_to_ostid(&fid, &oi);
1430 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1431 RETURN(ll_lov_recreate(inode, &oi, ost_idx));
1434 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1435 int flags, struct lov_user_md *lum, int lum_size)
1437 struct lov_stripe_md *lsm = NULL;
1438 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1442 lsm = ccc_inode_lsm_get(inode);
1444 ccc_inode_lsm_put(inode, lsm);
1445 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1450 ll_inode_size_lock(inode);
1451 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1454 rc = oit.d.lustre.it_status;
1456 GOTO(out_req_free, rc);
1458 ll_release_openhandle(file->f_dentry, &oit);
1461 ll_inode_size_unlock(inode);
1462 ll_intent_release(&oit);
1463 ccc_inode_lsm_put(inode, lsm);
1466 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1470 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1471 struct lov_mds_md **lmmp, int *lmm_size,
1472 struct ptlrpc_request **request)
1474 struct ll_sb_info *sbi = ll_i2sbi(inode);
1475 struct mdt_body *body;
1476 struct lov_mds_md *lmm = NULL;
1477 struct ptlrpc_request *req = NULL;
1478 struct md_op_data *op_data;
1481 rc = ll_get_max_mdsize(sbi, &lmmsize);
1485 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1486 strlen(filename), lmmsize,
1487 LUSTRE_OPC_ANY, NULL);
1488 if (IS_ERR(op_data))
1489 RETURN(PTR_ERR(op_data));
1491 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1492 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1493 ll_finish_md_op_data(op_data);
1495 CDEBUG(D_INFO, "md_getattr_name failed "
1496 "on %s: rc %d\n", filename, rc);
1500 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1501 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1503 lmmsize = body->eadatasize;
1505 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1507 GOTO(out, rc = -ENODATA);
1510 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1511 LASSERT(lmm != NULL);
1513 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1514 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1515 GOTO(out, rc = -EPROTO);
1519 * This is coming from the MDS, so is probably in
1520 * little endian. We convert it to host endian before
1521 * passing it to userspace.
1523 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1526 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1527 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1530 /* if function called for directory - we should
1531 * avoid swab not existent lsm objects */
1532 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1533 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1534 if (S_ISREG(body->mode))
1535 lustre_swab_lov_user_md_objects(
1536 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1538 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1539 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1540 if (S_ISREG(body->mode))
1541 lustre_swab_lov_user_md_objects(
1542 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1549 *lmm_size = lmmsize;
1554 static int ll_lov_setea(struct inode *inode, struct file *file,
1557 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1558 struct lov_user_md *lump;
1559 int lum_size = sizeof(struct lov_user_md) +
1560 sizeof(struct lov_user_ost_data);
1564 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1567 OBD_ALLOC_LARGE(lump, lum_size);
1571 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1572 OBD_FREE_LARGE(lump, lum_size);
1576 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1578 OBD_FREE_LARGE(lump, lum_size);
1582 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1585 struct lov_user_md_v3 lumv3;
1586 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1587 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1588 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1590 int flags = FMODE_WRITE;
1593 /* first try with v1 which is smaller than v3 */
1594 lum_size = sizeof(struct lov_user_md_v1);
1595 if (copy_from_user(lumv1, lumv1p, lum_size))
1598 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1599 lum_size = sizeof(struct lov_user_md_v3);
1600 if (copy_from_user(&lumv3, lumv3p, lum_size))
1604 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1606 struct lov_stripe_md *lsm;
1609 put_user(0, &lumv1p->lmm_stripe_count);
1611 ll_layout_refresh(inode, &gen);
1612 lsm = ccc_inode_lsm_get(inode);
1613 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1614 0, lsm, (void *)arg);
1615 ccc_inode_lsm_put(inode, lsm);
1620 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1622 struct lov_stripe_md *lsm;
1626 lsm = ccc_inode_lsm_get(inode);
1628 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1630 ccc_inode_lsm_put(inode, lsm);
1634 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1636 struct ll_inode_info *lli = ll_i2info(inode);
1637 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1638 struct ccc_grouplock grouplock;
1642 if (ll_file_nolock(file))
1643 RETURN(-EOPNOTSUPP);
1645 spin_lock(&lli->lli_lock);
1646 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1647 CWARN("group lock already existed with gid %lu\n",
1648 fd->fd_grouplock.cg_gid);
1649 spin_unlock(&lli->lli_lock);
1652 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1653 spin_unlock(&lli->lli_lock);
1655 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1656 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1660 spin_lock(&lli->lli_lock);
1661 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1662 spin_unlock(&lli->lli_lock);
1663 CERROR("another thread just won the race\n");
1664 cl_put_grouplock(&grouplock);
1668 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1669 fd->fd_grouplock = grouplock;
1670 spin_unlock(&lli->lli_lock);
1672 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1676 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1678 struct ll_inode_info *lli = ll_i2info(inode);
1679 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1680 struct ccc_grouplock grouplock;
1683 spin_lock(&lli->lli_lock);
1684 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1685 spin_unlock(&lli->lli_lock);
1686 CWARN("no group lock held\n");
1689 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1691 if (fd->fd_grouplock.cg_gid != arg) {
1692 CWARN("group lock %lu doesn't match current id %lu\n",
1693 arg, fd->fd_grouplock.cg_gid);
1694 spin_unlock(&lli->lli_lock);
1698 grouplock = fd->fd_grouplock;
1699 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1700 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1701 spin_unlock(&lli->lli_lock);
1703 cl_put_grouplock(&grouplock);
1704 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1709 * Close inode open handle
1711 * \param dentry [in] dentry which contains the inode
1712 * \param it [in,out] intent which contains open info and result
1715 * \retval <0 failure
1717 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1719 struct inode *inode = dentry->d_inode;
1720 struct obd_client_handle *och;
1726 /* Root ? Do nothing. */
1727 if (dentry->d_inode->i_sb->s_root == dentry)
1730 /* No open handle to close? Move away */
1731 if (!it_disposition(it, DISP_OPEN_OPEN))
1734 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1736 OBD_ALLOC(och, sizeof(*och));
1738 GOTO(out, rc = -ENOMEM);
1740 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1742 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1745 /* this one is in place of ll_file_open */
1746 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1747 ptlrpc_req_finished(it->d.lustre.it_data);
1748 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1754 * Get size for inode for which FIEMAP mapping is requested.
1755 * Make the FIEMAP get_info call and returns the result.
1757 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1760 struct obd_export *exp = ll_i2dtexp(inode);
1761 struct lov_stripe_md *lsm = NULL;
1762 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1763 int vallen = num_bytes;
1767 /* Checks for fiemap flags */
1768 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1769 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1773 /* Check for FIEMAP_FLAG_SYNC */
1774 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1775 rc = filemap_fdatawrite(inode->i_mapping);
1780 lsm = ccc_inode_lsm_get(inode);
1784 /* If the stripe_count > 1 and the application does not understand
1785 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1787 if (lsm->lsm_stripe_count > 1 &&
1788 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1789 GOTO(out, rc = -EOPNOTSUPP);
1791 fm_key.oa.o_oi = lsm->lsm_oi;
1792 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1794 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1795 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1796 /* If filesize is 0, then there would be no objects for mapping */
1797 if (fm_key.oa.o_size == 0) {
1798 fiemap->fm_mapped_extents = 0;
1802 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1804 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1807 CERROR("obd_get_info failed: rc = %d\n", rc);
1810 ccc_inode_lsm_put(inode, lsm);
1814 int ll_fid2path(struct inode *inode, void *arg)
1816 struct obd_export *exp = ll_i2mdexp(inode);
1817 struct getinfo_fid2path *gfout, *gfin;
1821 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1822 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1825 /* Need to get the buflen */
1826 OBD_ALLOC_PTR(gfin);
1829 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1834 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1835 OBD_ALLOC(gfout, outsize);
1836 if (gfout == NULL) {
1840 memcpy(gfout, gfin, sizeof(*gfout));
1843 /* Call mdc_iocontrol */
1844 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1848 if (copy_to_user(arg, gfout, outsize))
1852 OBD_FREE(gfout, outsize);
1856 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1858 struct ll_user_fiemap *fiemap_s;
1859 size_t num_bytes, ret_bytes;
1860 unsigned int extent_count;
1863 /* Get the extent count so we can calculate the size of
1864 * required fiemap buffer */
1865 if (get_user(extent_count,
1866 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1868 num_bytes = sizeof(*fiemap_s) + (extent_count *
1869 sizeof(struct ll_fiemap_extent));
1871 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1872 if (fiemap_s == NULL)
1875 /* get the fiemap value */
1876 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1878 GOTO(error, rc = -EFAULT);
1880 /* If fm_extent_count is non-zero, read the first extent since
1881 * it is used to calculate end_offset and device from previous
1884 if (copy_from_user(&fiemap_s->fm_extents[0],
1885 (char __user *)arg + sizeof(*fiemap_s),
1886 sizeof(struct ll_fiemap_extent)))
1887 GOTO(error, rc = -EFAULT);
1890 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1894 ret_bytes = sizeof(struct ll_user_fiemap);
1896 if (extent_count != 0)
1897 ret_bytes += (fiemap_s->fm_mapped_extents *
1898 sizeof(struct ll_fiemap_extent));
1900 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1904 OBD_FREE_LARGE(fiemap_s, num_bytes);
1909 * Read the data_version for inode.
1911 * This value is computed using stripe object version on OST.
1912 * Version is computed using server side locking.
1914 * @param extent_lock Take extent lock. Not needed if a process is already
1915 * holding the OST object group locks.
1917 int ll_data_version(struct inode *inode, __u64 *data_version,
1920 struct lov_stripe_md *lsm = NULL;
1921 struct ll_sb_info *sbi = ll_i2sbi(inode);
1922 struct obdo *obdo = NULL;
1926 /* If no stripe, we consider version is 0. */
1927 lsm = ccc_inode_lsm_get(inode);
1928 if (!lsm_has_objects(lsm)) {
1930 CDEBUG(D_INODE, "No object for inode\n");
1934 OBD_ALLOC_PTR(obdo);
1936 GOTO(out, rc = -ENOMEM);
1938 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1940 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1943 *data_version = obdo->o_data_version;
1949 ccc_inode_lsm_put(inode, lsm);
1954 * Trigger a HSM release request for the provided inode.
1956 int ll_hsm_release(struct inode *inode)
1958 struct cl_env_nest nest;
1960 struct obd_client_handle *och = NULL;
1961 __u64 data_version = 0;
1965 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
1966 ll_get_fsname(inode->i_sb, NULL, 0),
1967 PFID(&ll_i2info(inode)->lli_fid));
1969 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
1971 GOTO(out, rc = PTR_ERR(och));
1973 /* Grab latest data_version and [am]time values */
1974 rc = ll_data_version(inode, &data_version, 1);
1978 env = cl_env_nested_get(&nest);
1980 GOTO(out, rc = PTR_ERR(env));
1982 ll_merge_lvb(env, inode);
1983 cl_env_nested_put(&nest, env);
1985 /* Release the file.
1986 * NB: lease lock handle is released in mdc_hsm_release_pack() because
1987 * we still need it to pack l_remote_handle to MDT. */
1988 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
1994 if (och != NULL && !IS_ERR(och)) /* close the file */
1995 ll_lease_close(och, inode, NULL);
2000 struct ll_swap_stack {
2001 struct iattr ia1, ia2;
2003 struct inode *inode1, *inode2;
2004 bool check_dv1, check_dv2;
2007 static int ll_swap_layouts(struct file *file1, struct file *file2,
2008 struct lustre_swap_layouts *lsl)
2010 struct mdc_swap_layouts msl;
2011 struct md_op_data *op_data;
2014 struct ll_swap_stack *llss = NULL;
2017 OBD_ALLOC_PTR(llss);
2021 llss->inode1 = file1->f_dentry->d_inode;
2022 llss->inode2 = file2->f_dentry->d_inode;
2024 if (!S_ISREG(llss->inode2->i_mode))
2025 GOTO(free, rc = -EINVAL);
2027 if (inode_permission(llss->inode1, MAY_WRITE) ||
2028 inode_permission(llss->inode2, MAY_WRITE))
2029 GOTO(free, rc = -EPERM);
2031 if (llss->inode2->i_sb != llss->inode1->i_sb)
2032 GOTO(free, rc = -EXDEV);
2034 /* we use 2 bool because it is easier to swap than 2 bits */
2035 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2036 llss->check_dv1 = true;
2038 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2039 llss->check_dv2 = true;
2041 /* we cannot use lsl->sl_dvX directly because we may swap them */
2042 llss->dv1 = lsl->sl_dv1;
2043 llss->dv2 = lsl->sl_dv2;
2045 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2046 if (rc == 0) /* same file, done! */
2049 if (rc < 0) { /* sequentialize it */
2050 swap(llss->inode1, llss->inode2);
2052 swap(llss->dv1, llss->dv2);
2053 swap(llss->check_dv1, llss->check_dv2);
2057 if (gid != 0) { /* application asks to flush dirty cache */
2058 rc = ll_get_grouplock(llss->inode1, file1, gid);
2062 rc = ll_get_grouplock(llss->inode2, file2, gid);
2064 ll_put_grouplock(llss->inode1, file1, gid);
2069 /* to be able to restore mtime and atime after swap
2070 * we need to first save them */
2072 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2073 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2074 llss->ia1.ia_atime = llss->inode1->i_atime;
2075 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2076 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2077 llss->ia2.ia_atime = llss->inode2->i_atime;
2078 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2081 /* ultimate check, before swaping the layouts we check if
2082 * dataversion has changed (if requested) */
2083 if (llss->check_dv1) {
2084 rc = ll_data_version(llss->inode1, &dv, 0);
2087 if (dv != llss->dv1)
2088 GOTO(putgl, rc = -EAGAIN);
2091 if (llss->check_dv2) {
2092 rc = ll_data_version(llss->inode2, &dv, 0);
2095 if (dv != llss->dv2)
2096 GOTO(putgl, rc = -EAGAIN);
2099 /* struct md_op_data is used to send the swap args to the mdt
2100 * only flags is missing, so we use struct mdc_swap_layouts
2101 * through the md_op_data->op_data */
2102 /* flags from user space have to be converted before they are send to
2103 * server, no flag is sent today, they are only used on the client */
2106 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2107 0, LUSTRE_OPC_ANY, &msl);
2108 if (IS_ERR(op_data))
2109 GOTO(free, rc = PTR_ERR(op_data));
2111 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2112 sizeof(*op_data), op_data, NULL);
2113 ll_finish_md_op_data(op_data);
2117 ll_put_grouplock(llss->inode2, file2, gid);
2118 ll_put_grouplock(llss->inode1, file1, gid);
2121 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2125 /* clear useless flags */
2126 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2127 llss->ia1.ia_valid &= ~ATTR_MTIME;
2128 llss->ia2.ia_valid &= ~ATTR_MTIME;
2131 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2132 llss->ia1.ia_valid &= ~ATTR_ATIME;
2133 llss->ia2.ia_valid &= ~ATTR_ATIME;
2136 /* update time if requested */
2138 if (llss->ia2.ia_valid != 0) {
2139 mutex_lock(&llss->inode1->i_mutex);
2140 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2141 mutex_unlock(&llss->inode1->i_mutex);
2144 if (llss->ia1.ia_valid != 0) {
2147 mutex_lock(&llss->inode2->i_mutex);
2148 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2149 mutex_unlock(&llss->inode2->i_mutex);
2161 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2163 struct md_op_data *op_data;
2166 /* Non-root users are forbidden to set or clear flags which are
2167 * NOT defined in HSM_USER_MASK. */
2168 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2169 !cfs_capable(CFS_CAP_SYS_ADMIN))
2172 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2173 LUSTRE_OPC_ANY, hss);
2174 if (IS_ERR(op_data))
2175 RETURN(PTR_ERR(op_data));
2177 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2178 sizeof(*op_data), op_data, NULL);
2180 ll_finish_md_op_data(op_data);
2185 static int ll_hsm_import(struct inode *inode, struct file *file,
2186 struct hsm_user_import *hui)
2188 struct hsm_state_set *hss = NULL;
2189 struct iattr *attr = NULL;
2193 if (!S_ISREG(inode->i_mode))
2199 GOTO(out, rc = -ENOMEM);
2201 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2202 hss->hss_archive_id = hui->hui_archive_id;
2203 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2204 rc = ll_hsm_state_set(inode, hss);
2208 OBD_ALLOC_PTR(attr);
2210 GOTO(out, rc = -ENOMEM);
2212 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2213 attr->ia_mode |= S_IFREG;
2214 attr->ia_uid = hui->hui_uid;
2215 attr->ia_gid = hui->hui_gid;
2216 attr->ia_size = hui->hui_size;
2217 attr->ia_mtime.tv_sec = hui->hui_mtime;
2218 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2219 attr->ia_atime.tv_sec = hui->hui_atime;
2220 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2222 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2223 ATTR_UID | ATTR_GID |
2224 ATTR_MTIME | ATTR_MTIME_SET |
2225 ATTR_ATIME | ATTR_ATIME_SET;
2227 rc = ll_setattr_raw(file->f_dentry, attr, true);
2241 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2243 struct inode *inode = file->f_dentry->d_inode;
2244 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2248 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
2249 inode->i_generation, inode, cmd);
2250 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2252 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2253 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2257 case LL_IOC_GETFLAGS:
2258 /* Get the current value of the file flags */
2259 return put_user(fd->fd_flags, (int *)arg);
2260 case LL_IOC_SETFLAGS:
2261 case LL_IOC_CLRFLAGS:
2262 /* Set or clear specific file flags */
2263 /* XXX This probably needs checks to ensure the flags are
2264 * not abused, and to handle any flag side effects.
2266 if (get_user(flags, (int *) arg))
2269 if (cmd == LL_IOC_SETFLAGS) {
2270 if ((flags & LL_FILE_IGNORE_LOCK) &&
2271 !(file->f_flags & O_DIRECT)) {
2272 CERROR("%s: unable to disable locking on "
2273 "non-O_DIRECT file\n", current->comm);
2277 fd->fd_flags |= flags;
2279 fd->fd_flags &= ~flags;
2282 case LL_IOC_LOV_SETSTRIPE:
2283 RETURN(ll_lov_setstripe(inode, file, arg));
2284 case LL_IOC_LOV_SETEA:
2285 RETURN(ll_lov_setea(inode, file, arg));
2286 case LL_IOC_LOV_SWAP_LAYOUTS: {
2288 struct lustre_swap_layouts lsl;
2290 if (copy_from_user(&lsl, (char *)arg,
2291 sizeof(struct lustre_swap_layouts)))
2294 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2297 file2 = fget(lsl.sl_fd);
2302 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2303 rc = ll_swap_layouts(file, file2, &lsl);
2307 case LL_IOC_LOV_GETSTRIPE:
2308 RETURN(ll_lov_getstripe(inode, arg));
2309 case LL_IOC_RECREATE_OBJ:
2310 RETURN(ll_lov_recreate_obj(inode, arg));
2311 case LL_IOC_RECREATE_FID:
2312 RETURN(ll_lov_recreate_fid(inode, arg));
2313 case FSFILT_IOC_FIEMAP:
2314 RETURN(ll_ioctl_fiemap(inode, arg));
2315 case FSFILT_IOC_GETFLAGS:
2316 case FSFILT_IOC_SETFLAGS:
2317 RETURN(ll_iocontrol(inode, file, cmd, arg));
2318 case FSFILT_IOC_GETVERSION_OLD:
2319 case FSFILT_IOC_GETVERSION:
2320 RETURN(put_user(inode->i_generation, (int *)arg));
2321 case LL_IOC_GROUP_LOCK:
2322 RETURN(ll_get_grouplock(inode, file, arg));
2323 case LL_IOC_GROUP_UNLOCK:
2324 RETURN(ll_put_grouplock(inode, file, arg));
2325 case IOC_OBD_STATFS:
2326 RETURN(ll_obd_statfs(inode, (void *)arg));
2328 /* We need to special case any other ioctls we want to handle,
2329 * to send them to the MDS/OST as appropriate and to properly
2330 * network encode the arg field.
2331 case FSFILT_IOC_SETVERSION_OLD:
2332 case FSFILT_IOC_SETVERSION:
2334 case LL_IOC_FLUSHCTX:
2335 RETURN(ll_flush_ctx(inode));
2336 case LL_IOC_PATH2FID: {
2337 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2338 sizeof(struct lu_fid)))
2343 case OBD_IOC_FID2PATH:
2344 RETURN(ll_fid2path(inode, (void *)arg));
2345 case LL_IOC_DATA_VERSION: {
2346 struct ioc_data_version idv;
2349 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2352 rc = ll_data_version(inode, &idv.idv_version,
2353 !(idv.idv_flags & LL_DV_NOFLUSH));
2355 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2361 case LL_IOC_GET_MDTIDX: {
2364 mdtidx = ll_get_mdt_idx(inode);
2368 if (put_user((int)mdtidx, (int*)arg))
2373 case OBD_IOC_GETDTNAME:
2374 case OBD_IOC_GETMDNAME:
2375 RETURN(ll_get_obd_name(inode, cmd, arg));
2376 case LL_IOC_HSM_STATE_GET: {
2377 struct md_op_data *op_data;
2378 struct hsm_user_state *hus;
2385 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2386 LUSTRE_OPC_ANY, hus);
2387 if (IS_ERR(op_data)) {
2389 RETURN(PTR_ERR(op_data));
2392 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2395 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2398 ll_finish_md_op_data(op_data);
2402 case LL_IOC_HSM_STATE_SET: {
2403 struct hsm_state_set *hss;
2410 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2415 rc = ll_hsm_state_set(inode, hss);
2420 case LL_IOC_HSM_ACTION: {
2421 struct md_op_data *op_data;
2422 struct hsm_current_action *hca;
2429 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2430 LUSTRE_OPC_ANY, hca);
2431 if (IS_ERR(op_data)) {
2433 RETURN(PTR_ERR(op_data));
2436 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2439 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2442 ll_finish_md_op_data(op_data);
2446 case LL_IOC_SET_LEASE: {
2447 struct ll_inode_info *lli = ll_i2info(inode);
2448 struct obd_client_handle *och = NULL;
2454 if (!(file->f_mode & FMODE_WRITE))
2459 if (!(file->f_mode & FMODE_READ))
2464 mutex_lock(&lli->lli_och_mutex);
2465 if (fd->fd_lease_och != NULL) {
2466 och = fd->fd_lease_och;
2467 fd->fd_lease_och = NULL;
2469 mutex_unlock(&lli->lli_och_mutex);
2472 mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
2473 rc = ll_lease_close(och, inode, &lease_broken);
2474 if (rc == 0 && lease_broken)
2480 /* return the type of lease or error */
2481 RETURN(rc < 0 ? rc : (int)mode);
2486 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2488 /* apply for lease */
2489 och = ll_lease_open(inode, file, mode, 0);
2491 RETURN(PTR_ERR(och));
2494 mutex_lock(&lli->lli_och_mutex);
2495 if (fd->fd_lease_och == NULL) {
2496 fd->fd_lease_och = och;
2499 mutex_unlock(&lli->lli_och_mutex);
2501 /* impossible now that only excl is supported for now */
2502 ll_lease_close(och, inode, &lease_broken);
2507 case LL_IOC_GET_LEASE: {
2508 struct ll_inode_info *lli = ll_i2info(inode);
2509 struct ldlm_lock *lock = NULL;
2512 mutex_lock(&lli->lli_och_mutex);
2513 if (fd->fd_lease_och != NULL) {
2514 struct obd_client_handle *och = fd->fd_lease_och;
2516 lock = ldlm_handle2lock(&och->och_lease_handle);
2518 lock_res_and_lock(lock);
2519 if (!ldlm_is_cancel(lock))
2520 rc = och->och_flags &
2521 (FMODE_READ | FMODE_WRITE);
2522 unlock_res_and_lock(lock);
2523 ldlm_lock_put(lock);
2526 mutex_unlock(&lli->lli_och_mutex);
2529 case LL_IOC_HSM_IMPORT: {
2530 struct hsm_user_import *hui;
2536 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2541 rc = ll_hsm_import(inode, file, hui);
2550 ll_iocontrol_call(inode, file, cmd, arg, &err))
2553 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2559 #ifndef HAVE_FILE_LLSEEK_SIZE
2560 static inline loff_t
2561 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2563 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2565 if (offset > maxsize)
2568 if (offset != file->f_pos) {
2569 file->f_pos = offset;
2570 file->f_version = 0;
2576 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2577 loff_t maxsize, loff_t eof)
2579 struct inode *inode = file->f_dentry->d_inode;
2587 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2588 * position-querying operation. Avoid rewriting the "same"
2589 * f_pos value back to the file because a concurrent read(),
2590 * write() or lseek() might have altered it
2595 * f_lock protects against read/modify/write race with other
2596 * SEEK_CURs. Note that parallel writes and reads behave
2599 mutex_lock(&inode->i_mutex);
2600 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2601 mutex_unlock(&inode->i_mutex);
2605 * In the generic case the entire file is data, so as long as
2606 * offset isn't at the end of the file then the offset is data.
2613 * There is a virtual hole at the end of the file, so as long as
2614 * offset isn't i_size or larger, return i_size.
2622 return llseek_execute(file, offset, maxsize);
2626 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2628 struct inode *inode = file->f_dentry->d_inode;
2629 loff_t retval, eof = 0;
2632 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2633 (origin == SEEK_CUR) ? file->f_pos : 0);
2634 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2635 inode->i_ino, inode->i_generation, inode, retval, retval,
2637 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2639 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2640 retval = ll_glimpse_size(inode);
2643 eof = i_size_read(inode);
2646 retval = ll_generic_file_llseek_size(file, offset, origin,
2647 ll_file_maxbytes(inode), eof);
2651 int ll_flush(struct file *file, fl_owner_t id)
2653 struct inode *inode = file->f_dentry->d_inode;
2654 struct ll_inode_info *lli = ll_i2info(inode);
2655 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2658 LASSERT(!S_ISDIR(inode->i_mode));
2660 /* catch async errors that were recorded back when async writeback
2661 * failed for pages in this mapping. */
2662 rc = lli->lli_async_rc;
2663 lli->lli_async_rc = 0;
2664 err = lov_read_and_clear_async_rc(lli->lli_clob);
2668 /* The application has been told write failure already.
2669 * Do not report failure again. */
2670 if (fd->fd_write_failed)
2672 return rc ? -EIO : 0;
2676 * Called to make sure a portion of file has been written out.
2677 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2679 * Return how many pages have been written.
2681 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2682 enum cl_fsync_mode mode, int ignore_layout)
2684 struct cl_env_nest nest;
2687 struct obd_capa *capa = NULL;
2688 struct cl_fsync_io *fio;
2692 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2693 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2696 env = cl_env_nested_get(&nest);
2698 RETURN(PTR_ERR(env));
2700 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2702 io = ccc_env_thread_io(env);
2703 io->ci_obj = cl_i2info(inode)->lli_clob;
2704 io->ci_ignore_layout = ignore_layout;
2706 /* initialize parameters for sync */
2707 fio = &io->u.ci_fsync;
2708 fio->fi_capa = capa;
2709 fio->fi_start = start;
2711 fio->fi_fid = ll_inode2fid(inode);
2712 fio->fi_mode = mode;
2713 fio->fi_nr_written = 0;
2715 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2716 result = cl_io_loop(env, io);
2718 result = io->ci_result;
2720 result = fio->fi_nr_written;
2721 cl_io_fini(env, io);
2722 cl_env_nested_put(&nest, env);
2730 * When dentry is provided (the 'else' case), *file->f_dentry may be
2731 * null and dentry must be used directly rather than pulled from
2732 * *file->f_dentry as is done otherwise.
2735 #ifdef HAVE_FILE_FSYNC_4ARGS
2736 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2738 struct dentry *dentry = file->f_dentry;
2739 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2740 int ll_fsync(struct file *file, int datasync)
2742 struct dentry *dentry = file->f_dentry;
2744 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2747 struct inode *inode = dentry->d_inode;
2748 struct ll_inode_info *lli = ll_i2info(inode);
2749 struct ptlrpc_request *req;
2750 struct obd_capa *oc;
2754 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2755 inode->i_generation, inode);
2756 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2758 #ifdef HAVE_FILE_FSYNC_4ARGS
2759 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2760 mutex_lock(&inode->i_mutex);
2762 /* fsync's caller has already called _fdata{sync,write}, we want
2763 * that IO to finish before calling the osc and mdc sync methods */
2764 rc = filemap_fdatawait(inode->i_mapping);
2767 /* catch async errors that were recorded back when async writeback
2768 * failed for pages in this mapping. */
2769 if (!S_ISDIR(inode->i_mode)) {
2770 err = lli->lli_async_rc;
2771 lli->lli_async_rc = 0;
2774 err = lov_read_and_clear_async_rc(lli->lli_clob);
2779 oc = ll_mdscapa_get(inode);
2780 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2786 ptlrpc_req_finished(req);
2788 if (datasync && S_ISREG(inode->i_mode)) {
2789 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2791 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2793 if (rc == 0 && err < 0)
2796 fd->fd_write_failed = true;
2798 fd->fd_write_failed = false;
2801 #ifdef HAVE_FILE_FSYNC_4ARGS
2802 mutex_unlock(&inode->i_mutex);
2807 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2809 struct inode *inode = file->f_dentry->d_inode;
2810 struct ll_sb_info *sbi = ll_i2sbi(inode);
2811 struct ldlm_enqueue_info einfo = {
2812 .ei_type = LDLM_FLOCK,
2813 .ei_cb_cp = ldlm_flock_completion_ast,
2814 .ei_cbdata = file_lock,
2816 struct md_op_data *op_data;
2817 struct lustre_handle lockh = {0};
2818 ldlm_policy_data_t flock = {{0}};
2824 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2825 inode->i_ino, file_lock);
2827 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2829 if (file_lock->fl_flags & FL_FLOCK) {
2830 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2831 /* flocks are whole-file locks */
2832 flock.l_flock.end = OFFSET_MAX;
2833 /* For flocks owner is determined by the local file desctiptor*/
2834 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2835 } else if (file_lock->fl_flags & FL_POSIX) {
2836 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2837 flock.l_flock.start = file_lock->fl_start;
2838 flock.l_flock.end = file_lock->fl_end;
2842 flock.l_flock.pid = file_lock->fl_pid;
2844 /* Somewhat ugly workaround for svc lockd.
2845 * lockd installs custom fl_lmops->lm_compare_owner that checks
2846 * for the fl_owner to be the same (which it always is on local node
2847 * I guess between lockd processes) and then compares pid.
2848 * As such we assign pid to the owner field to make it all work,
2849 * conflict with normal locks is unlikely since pid space and
2850 * pointer space for current->files are not intersecting */
2851 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2852 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2854 switch (file_lock->fl_type) {
2856 einfo.ei_mode = LCK_PR;
2859 /* An unlock request may or may not have any relation to
2860 * existing locks so we may not be able to pass a lock handle
2861 * via a normal ldlm_lock_cancel() request. The request may even
2862 * unlock a byte range in the middle of an existing lock. In
2863 * order to process an unlock request we need all of the same
2864 * information that is given with a normal read or write record
2865 * lock request. To avoid creating another ldlm unlock (cancel)
2866 * message we'll treat a LCK_NL flock request as an unlock. */
2867 einfo.ei_mode = LCK_NL;
2870 einfo.ei_mode = LCK_PW;
2873 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2874 file_lock->fl_type);
2889 flags = LDLM_FL_BLOCK_NOWAIT;
2895 flags = LDLM_FL_TEST_LOCK;
2896 /* Save the old mode so that if the mode in the lock changes we
2897 * can decrement the appropriate reader or writer refcount. */
2898 file_lock->fl_type = einfo.ei_mode;
2901 CERROR("unknown fcntl lock command: %d\n", cmd);
2905 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2906 LUSTRE_OPC_ANY, NULL);
2907 if (IS_ERR(op_data))
2908 RETURN(PTR_ERR(op_data));
2910 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2911 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2912 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2914 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2915 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2917 if ((file_lock->fl_flags & FL_FLOCK) &&
2918 (rc == 0 || file_lock->fl_type == F_UNLCK))
2919 rc2 = flock_lock_file_wait(file, file_lock);
2920 if ((file_lock->fl_flags & FL_POSIX) &&
2921 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2922 !(flags & LDLM_FL_TEST_LOCK))
2923 rc2 = posix_lock_file_wait(file, file_lock);
2925 if (rc2 && file_lock->fl_type != F_UNLCK) {
2926 einfo.ei_mode = LCK_NL;
2927 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2928 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2932 ll_finish_md_op_data(op_data);
2937 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2945 * test if some locks matching bits and l_req_mode are acquired
2946 * - bits can be in different locks
2947 * - if found clear the common lock bits in *bits
2948 * - the bits not found, are kept in *bits
2950 * \param bits [IN] searched lock bits [IN]
2951 * \param l_req_mode [IN] searched lock mode
2952 * \retval boolean, true iff all bits are found
2954 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2956 struct lustre_handle lockh;
2957 ldlm_policy_data_t policy;
2958 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2959 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2968 fid = &ll_i2info(inode)->lli_fid;
2969 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2970 ldlm_lockname[mode]);
2972 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2973 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2974 policy.l_inodebits.bits = *bits & (1 << i);
2975 if (policy.l_inodebits.bits == 0)
2978 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2979 &policy, mode, &lockh)) {
2980 struct ldlm_lock *lock;
2982 lock = ldlm_handle2lock(&lockh);
2985 ~(lock->l_policy_data.l_inodebits.bits);
2986 LDLM_LOCK_PUT(lock);
2988 *bits &= ~policy.l_inodebits.bits;
2995 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2996 struct lustre_handle *lockh, __u64 flags,
2999 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3004 fid = &ll_i2info(inode)->lli_fid;
3005 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3007 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3008 fid, LDLM_IBITS, &policy, mode, lockh);
3013 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3015 /* Already unlinked. Just update nlink and return success */
3016 if (rc == -ENOENT) {
3018 /* This path cannot be hit for regular files unless in
3019 * case of obscure races, so no need to to validate
3021 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3023 } else if (rc != 0) {
3024 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
3025 ll_get_fsname(inode->i_sb, NULL, 0),
3026 PFID(ll_inode2fid(inode)), rc);
3032 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3035 struct inode *inode = dentry->d_inode;
3036 struct ptlrpc_request *req = NULL;
3037 struct obd_export *exp;
3041 LASSERT(inode != NULL);
3043 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
3044 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
3046 exp = ll_i2mdexp(inode);
3048 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3049 * But under CMD case, it caused some lock issues, should be fixed
3050 * with new CMD ibits lock. See bug 12718 */
3051 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3052 struct lookup_intent oit = { .it_op = IT_GETATTR };
3053 struct md_op_data *op_data;
3055 if (ibits == MDS_INODELOCK_LOOKUP)
3056 oit.it_op = IT_LOOKUP;
3058 /* Call getattr by fid, so do not provide name at all. */
3059 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
3060 dentry->d_inode, NULL, 0, 0,
3061 LUSTRE_OPC_ANY, NULL);
3062 if (IS_ERR(op_data))
3063 RETURN(PTR_ERR(op_data));
3065 oit.it_create_mode |= M_CHECK_STALE;
3066 rc = md_intent_lock(exp, op_data, NULL, 0,
3067 /* we are not interested in name
3070 ll_md_blocking_ast, 0);
3071 ll_finish_md_op_data(op_data);
3072 oit.it_create_mode &= ~M_CHECK_STALE;
3074 rc = ll_inode_revalidate_fini(inode, rc);
3078 rc = ll_revalidate_it_finish(req, &oit, dentry);
3080 ll_intent_release(&oit);
3084 /* Unlinked? Unhash dentry, so it is not picked up later by
3085 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3086 here to preserve get_cwd functionality on 2.6.
3088 if (!dentry->d_inode->i_nlink)
3089 d_lustre_invalidate(dentry, 0);
3091 ll_lookup_finish_locks(&oit, dentry);
3092 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3093 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3094 obd_valid valid = OBD_MD_FLGETATTR;
3095 struct md_op_data *op_data;
3098 if (S_ISREG(inode->i_mode)) {
3099 rc = ll_get_max_mdsize(sbi, &ealen);
3102 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3105 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3106 0, ealen, LUSTRE_OPC_ANY,
3108 if (IS_ERR(op_data))
3109 RETURN(PTR_ERR(op_data));
3111 op_data->op_valid = valid;
3112 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3113 * capa for this inode. Because we only keep capas of dirs
3115 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3116 ll_finish_md_op_data(op_data);
3118 rc = ll_inode_revalidate_fini(inode, rc);
3122 rc = ll_prep_inode(&inode, req, NULL, NULL);
3125 ptlrpc_req_finished(req);
3129 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3132 struct inode *inode = dentry->d_inode;
3136 rc = __ll_inode_revalidate_it(dentry, it, ibits);
3140 /* if object isn't regular file, don't validate size */
3141 if (!S_ISREG(inode->i_mode)) {
3142 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3143 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3144 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3146 /* In case of restore, the MDT has the right size and has
3147 * already send it back without granting the layout lock,
3148 * inode is up-to-date so glimpse is useless.
3149 * Also to glimpse we need the layout, in case of a running
3150 * restore the MDT holds the layout lock so the glimpse will
3151 * block up to the end of restore (getattr will block)
3153 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3154 rc = ll_glimpse_size(inode);
3159 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
3160 struct lookup_intent *it, struct kstat *stat)
3162 struct inode *inode = de->d_inode;
3163 struct ll_sb_info *sbi = ll_i2sbi(inode);
3164 struct ll_inode_info *lli = ll_i2info(inode);
3167 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3168 MDS_INODELOCK_LOOKUP);
3169 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3174 stat->dev = inode->i_sb->s_dev;
3175 if (ll_need_32bit_api(sbi))
3176 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3178 stat->ino = inode->i_ino;
3179 stat->mode = inode->i_mode;
3180 stat->nlink = inode->i_nlink;
3181 stat->uid = inode->i_uid;
3182 stat->gid = inode->i_gid;
3183 stat->rdev = inode->i_rdev;
3184 stat->atime = inode->i_atime;
3185 stat->mtime = inode->i_mtime;
3186 stat->ctime = inode->i_ctime;
3187 stat->blksize = 1 << inode->i_blkbits;
3189 stat->size = i_size_read(inode);
3190 stat->blocks = inode->i_blocks;
3194 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3196 struct lookup_intent it = { .it_op = IT_GETATTR };
3198 return ll_getattr_it(mnt, de, &it, stat);
3201 #ifdef HAVE_LINUX_FIEMAP_H
3202 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3203 __u64 start, __u64 len)
3207 struct ll_user_fiemap *fiemap;
3208 unsigned int extent_count = fieinfo->fi_extents_max;
3210 num_bytes = sizeof(*fiemap) + (extent_count *
3211 sizeof(struct ll_fiemap_extent));
3212 OBD_ALLOC_LARGE(fiemap, num_bytes);
3217 fiemap->fm_flags = fieinfo->fi_flags;
3218 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3219 fiemap->fm_start = start;
3220 fiemap->fm_length = len;
3221 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3222 sizeof(struct ll_fiemap_extent));
3224 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3226 fieinfo->fi_flags = fiemap->fm_flags;
3227 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3228 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3229 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3231 OBD_FREE_LARGE(fiemap, num_bytes);
3236 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3238 struct ll_inode_info *lli = ll_i2info(inode);
3239 struct posix_acl *acl = NULL;
3242 spin_lock(&lli->lli_lock);
3243 /* VFS' acl_permission_check->check_acl will release the refcount */
3244 acl = posix_acl_dup(lli->lli_posix_acl);
3245 spin_unlock(&lli->lli_lock);
3250 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3252 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3253 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3255 ll_check_acl(struct inode *inode, int mask)
3258 # ifdef CONFIG_FS_POSIX_ACL
3259 struct posix_acl *acl;
3263 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3264 if (flags & IPERM_FLAG_RCU)
3267 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3272 rc = posix_acl_permission(inode, acl, mask);
3273 posix_acl_release(acl);
3276 # else /* !CONFIG_FS_POSIX_ACL */
3278 # endif /* CONFIG_FS_POSIX_ACL */
3280 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3282 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3283 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3285 # ifdef HAVE_INODE_PERMISION_2ARGS
3286 int ll_inode_permission(struct inode *inode, int mask)
3288 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3295 #ifdef MAY_NOT_BLOCK
3296 if (mask & MAY_NOT_BLOCK)
3298 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3299 if (flags & IPERM_FLAG_RCU)
3303 /* as root inode are NOT getting validated in lookup operation,
3304 * need to do it before permission check. */
3306 if (inode == inode->i_sb->s_root->d_inode) {
3307 struct lookup_intent it = { .it_op = IT_LOOKUP };
3309 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3310 MDS_INODELOCK_LOOKUP);
3315 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
3316 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
3318 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3319 return lustre_check_remote_perm(inode, mask);
3321 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3322 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3327 /* -o localflock - only provides locally consistent flock locks */
3328 struct file_operations ll_file_operations = {
3329 .read = ll_file_read,
3330 .aio_read = ll_file_aio_read,
3331 .write = ll_file_write,
3332 .aio_write = ll_file_aio_write,
3333 .unlocked_ioctl = ll_file_ioctl,
3334 .open = ll_file_open,
3335 .release = ll_file_release,
3336 .mmap = ll_file_mmap,
3337 .llseek = ll_file_seek,
3338 .splice_read = ll_file_splice_read,
3343 struct file_operations ll_file_operations_flock = {
3344 .read = ll_file_read,
3345 .aio_read = ll_file_aio_read,
3346 .write = ll_file_write,
3347 .aio_write = ll_file_aio_write,
3348 .unlocked_ioctl = ll_file_ioctl,
3349 .open = ll_file_open,
3350 .release = ll_file_release,
3351 .mmap = ll_file_mmap,
3352 .llseek = ll_file_seek,
3353 .splice_read = ll_file_splice_read,
3356 .flock = ll_file_flock,
3357 .lock = ll_file_flock
3360 /* These are for -o noflock - to return ENOSYS on flock calls */
3361 struct file_operations ll_file_operations_noflock = {
3362 .read = ll_file_read,
3363 .aio_read = ll_file_aio_read,
3364 .write = ll_file_write,
3365 .aio_write = ll_file_aio_write,
3366 .unlocked_ioctl = ll_file_ioctl,
3367 .open = ll_file_open,
3368 .release = ll_file_release,
3369 .mmap = ll_file_mmap,
3370 .llseek = ll_file_seek,
3371 .splice_read = ll_file_splice_read,
3374 .flock = ll_file_noflock,
3375 .lock = ll_file_noflock
3378 struct inode_operations ll_file_inode_operations = {
3379 .setattr = ll_setattr,
3380 .getattr = ll_getattr,
3381 .permission = ll_inode_permission,
3382 .setxattr = ll_setxattr,
3383 .getxattr = ll_getxattr,
3384 .listxattr = ll_listxattr,
3385 .removexattr = ll_removexattr,
3386 #ifdef HAVE_LINUX_FIEMAP_H
3387 .fiemap = ll_fiemap,
3389 #ifdef HAVE_IOP_GET_ACL
3390 .get_acl = ll_get_acl,
3394 /* dynamic ioctl number support routins */
3395 static struct llioc_ctl_data {
3396 struct rw_semaphore ioc_sem;
3397 cfs_list_t ioc_head;
3399 __RWSEM_INITIALIZER(llioc.ioc_sem),
3400 CFS_LIST_HEAD_INIT(llioc.ioc_head)
3405 cfs_list_t iocd_list;
3406 unsigned int iocd_size;
3407 llioc_callback_t iocd_cb;
3408 unsigned int iocd_count;
3409 unsigned int iocd_cmd[0];
3412 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3415 struct llioc_data *in_data = NULL;
3418 if (cb == NULL || cmd == NULL ||
3419 count > LLIOC_MAX_CMD || count < 0)
3422 size = sizeof(*in_data) + count * sizeof(unsigned int);
3423 OBD_ALLOC(in_data, size);
3424 if (in_data == NULL)
3427 memset(in_data, 0, sizeof(*in_data));
3428 in_data->iocd_size = size;
3429 in_data->iocd_cb = cb;
3430 in_data->iocd_count = count;
3431 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3433 down_write(&llioc.ioc_sem);
3434 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3435 up_write(&llioc.ioc_sem);
3440 void ll_iocontrol_unregister(void *magic)
3442 struct llioc_data *tmp;
3447 down_write(&llioc.ioc_sem);
3448 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3450 unsigned int size = tmp->iocd_size;
3452 cfs_list_del(&tmp->iocd_list);
3453 up_write(&llioc.ioc_sem);
3455 OBD_FREE(tmp, size);
3459 up_write(&llioc.ioc_sem);
3461 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3464 EXPORT_SYMBOL(ll_iocontrol_register);
3465 EXPORT_SYMBOL(ll_iocontrol_unregister);
3467 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3468 unsigned int cmd, unsigned long arg, int *rcp)
3470 enum llioc_iter ret = LLIOC_CONT;
3471 struct llioc_data *data;
3472 int rc = -EINVAL, i;
3474 down_read(&llioc.ioc_sem);
3475 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3476 for (i = 0; i < data->iocd_count; i++) {
3477 if (cmd != data->iocd_cmd[i])
3480 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3484 if (ret == LLIOC_STOP)
3487 up_read(&llioc.ioc_sem);
3494 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3496 struct ll_inode_info *lli = ll_i2info(inode);
3497 struct cl_env_nest nest;
3502 if (lli->lli_clob == NULL)
3505 env = cl_env_nested_get(&nest);
3507 RETURN(PTR_ERR(env));
3509 result = cl_conf_set(env, lli->lli_clob, conf);
3510 cl_env_nested_put(&nest, env);
3512 if (conf->coc_opc == OBJECT_CONF_SET) {
3513 struct ldlm_lock *lock = conf->coc_lock;
3515 LASSERT(lock != NULL);
3516 LASSERT(ldlm_has_layout(lock));
3518 /* it can only be allowed to match after layout is
3519 * applied to inode otherwise false layout would be
3520 * seen. Applying layout shoud happen before dropping
3521 * the intent lock. */
3522 ldlm_lock_allow_match(lock);
3528 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3529 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3532 struct ll_sb_info *sbi = ll_i2sbi(inode);
3533 struct obd_capa *oc;
3534 struct ptlrpc_request *req;
3535 struct mdt_body *body;
3542 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3543 PFID(ll_inode2fid(inode)), !!(lock->l_flags & LDLM_FL_LVB_READY),
3544 lock->l_lvb_data, lock->l_lvb_len);
3546 if ((lock->l_lvb_data != NULL) && (lock->l_flags & LDLM_FL_LVB_READY))
3549 /* if layout lock was granted right away, the layout is returned
3550 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3551 * blocked and then granted via completion ast, we have to fetch
3552 * layout here. Please note that we can't use the LVB buffer in
3553 * completion AST because it doesn't have a large enough buffer */
3554 oc = ll_mdscapa_get(inode);
3555 rc = ll_get_max_mdsize(sbi, &lmmsize);
3557 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3558 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3564 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3565 if (body == NULL || body->eadatasize > lmmsize)
3566 GOTO(out, rc = -EPROTO);
3568 lmmsize = body->eadatasize;
3569 if (lmmsize == 0) /* empty layout */
3572 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3574 GOTO(out, rc = -EFAULT);
3576 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3577 if (lvbdata == NULL)
3578 GOTO(out, rc = -ENOMEM);
3580 memcpy(lvbdata, lmm, lmmsize);
3581 lock_res_and_lock(lock);
3582 if (lock->l_lvb_data != NULL)
3583 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3585 lock->l_lvb_data = lvbdata;
3586 lock->l_lvb_len = lmmsize;
3587 unlock_res_and_lock(lock);
3592 ptlrpc_req_finished(req);
3597 * Apply the layout to the inode. Layout lock is held and will be released
3600 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3601 struct inode *inode, __u32 *gen, bool reconf)
3603 struct ll_inode_info *lli = ll_i2info(inode);
3604 struct ll_sb_info *sbi = ll_i2sbi(inode);
3605 struct ldlm_lock *lock;
3606 struct lustre_md md = { NULL };
3607 struct cl_object_conf conf;
3610 bool wait_layout = false;
3613 LASSERT(lustre_handle_is_used(lockh));
3615 lock = ldlm_handle2lock(lockh);
3616 LASSERT(lock != NULL);
3617 LASSERT(ldlm_has_layout(lock));
3619 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3620 inode, PFID(&lli->lli_fid), reconf);
3622 /* in case this is a caching lock and reinstate with new inode */
3623 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3625 lock_res_and_lock(lock);
3626 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3627 unlock_res_and_lock(lock);
3628 /* checking lvb_ready is racy but this is okay. The worst case is
3629 * that multi processes may configure the file on the same time. */
3631 if (lvb_ready || !reconf) {
3634 /* layout_gen must be valid if layout lock is not
3635 * cancelled and stripe has already set */
3636 *gen = lli->lli_layout_gen;
3642 rc = ll_layout_fetch(inode, lock);
3646 /* for layout lock, lmm is returned in lock's lvb.
3647 * lvb_data is immutable if the lock is held so it's safe to access it
3648 * without res lock. See the description in ldlm_lock_decref_internal()
3649 * for the condition to free lvb_data of layout lock */
3650 if (lock->l_lvb_data != NULL) {
3651 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3652 lock->l_lvb_data, lock->l_lvb_len);
3654 *gen = LL_LAYOUT_GEN_EMPTY;
3656 *gen = md.lsm->lsm_layout_gen;
3659 CERROR("%s: file "DFID" unpackmd error: %d\n",
3660 ll_get_fsname(inode->i_sb, NULL, 0),
3661 PFID(&lli->lli_fid), rc);
3667 /* set layout to file. Unlikely this will fail as old layout was
3668 * surely eliminated */
3669 memset(&conf, 0, sizeof conf);
3670 conf.coc_opc = OBJECT_CONF_SET;
3671 conf.coc_inode = inode;
3672 conf.coc_lock = lock;
3673 conf.u.coc_md = &md;
3674 rc = ll_layout_conf(inode, &conf);
3677 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3679 /* refresh layout failed, need to wait */
3680 wait_layout = rc == -EBUSY;
3684 LDLM_LOCK_PUT(lock);
3685 ldlm_lock_decref(lockh, mode);
3687 /* wait for IO to complete if it's still being used. */
3689 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3690 ll_get_fsname(inode->i_sb, NULL, 0),
3691 inode, PFID(&lli->lli_fid));
3693 memset(&conf, 0, sizeof conf);
3694 conf.coc_opc = OBJECT_CONF_WAIT;
3695 conf.coc_inode = inode;
3696 rc = ll_layout_conf(inode, &conf);
3700 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3701 PFID(&lli->lli_fid), rc);
3707 * This function checks if there exists a LAYOUT lock on the client side,
3708 * or enqueues it if it doesn't have one in cache.
3710 * This function will not hold layout lock so it may be revoked any time after
3711 * this function returns. Any operations depend on layout should be redone
3714 * This function should be called before lov_io_init() to get an uptodate
3715 * layout version, the caller should save the version number and after IO
3716 * is finished, this function should be called again to verify that layout
3717 * is not changed during IO time.
3719 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3721 struct ll_inode_info *lli = ll_i2info(inode);
3722 struct ll_sb_info *sbi = ll_i2sbi(inode);
3723 struct md_op_data *op_data;
3724 struct lookup_intent it;
3725 struct lustre_handle lockh;
3727 struct ldlm_enqueue_info einfo = {
3728 .ei_type = LDLM_IBITS,
3730 .ei_cb_bl = ll_md_blocking_ast,
3731 .ei_cb_cp = ldlm_completion_ast,
3736 *gen = lli->lli_layout_gen;
3737 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3741 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3742 LASSERT(S_ISREG(inode->i_mode));
3744 /* mostly layout lock is caching on the local side, so try to match
3745 * it before grabbing layout lock mutex. */
3746 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3747 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3748 if (mode != 0) { /* hit cached lock */
3749 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3753 /* better hold lli_layout_mutex to try again otherwise
3754 * it will have starvation problem. */
3757 /* take layout lock mutex to enqueue layout lock exclusively. */
3758 mutex_lock(&lli->lli_layout_mutex);
3761 /* try again. Maybe somebody else has done this. */
3762 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3763 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3764 if (mode != 0) { /* hit cached lock */
3765 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3769 mutex_unlock(&lli->lli_layout_mutex);
3773 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3774 0, 0, LUSTRE_OPC_ANY, NULL);
3775 if (IS_ERR(op_data)) {
3776 mutex_unlock(&lli->lli_layout_mutex);
3777 RETURN(PTR_ERR(op_data));
3780 /* have to enqueue one */
3781 memset(&it, 0, sizeof(it));
3782 it.it_op = IT_LAYOUT;
3783 lockh.cookie = 0ULL;
3785 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3786 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3787 PFID(&lli->lli_fid));
3789 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3791 if (it.d.lustre.it_data != NULL)
3792 ptlrpc_req_finished(it.d.lustre.it_data);
3793 it.d.lustre.it_data = NULL;
3795 ll_finish_md_op_data(op_data);
3797 mode = it.d.lustre.it_lock_mode;
3798 it.d.lustre.it_lock_mode = 0;
3799 ll_intent_drop_lock(&it);
3802 /* set lock data in case this is a new lock */
3803 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3804 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3808 mutex_unlock(&lli->lli_layout_mutex);
3814 * This function send a restore request to the MDT
3816 int ll_layout_restore(struct inode *inode)
3818 struct hsm_user_request *hur;
3822 len = sizeof(struct hsm_user_request) +
3823 sizeof(struct hsm_user_item);
3824 OBD_ALLOC(hur, len);
3828 hur->hur_request.hr_action = HUA_RESTORE;
3829 hur->hur_request.hr_archive_id = 0;
3830 hur->hur_request.hr_flags = 0;
3831 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3832 sizeof(hur->hur_user_item[0].hui_fid));
3833 hur->hur_user_item[0].hui_extent.length = -1;
3834 hur->hur_request.hr_itemcount = 1;
3835 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,