4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
61 fd->fd_write_failed = false;
66 static void ll_file_data_put(struct ll_file_data *fd)
69 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
72 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
73 struct lustre_handle *fh)
75 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
76 op_data->op_attr.ia_mode = inode->i_mode;
77 op_data->op_attr.ia_atime = inode->i_atime;
78 op_data->op_attr.ia_mtime = inode->i_mtime;
79 op_data->op_attr.ia_ctime = inode->i_ctime;
80 op_data->op_attr.ia_size = i_size_read(inode);
81 op_data->op_attr_blocks = inode->i_blocks;
82 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
83 ll_inode_to_ext_flags(inode->i_flags);
84 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
86 op_data->op_handle = *fh;
87 op_data->op_capa1 = ll_mdscapa_get(inode);
89 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
90 op_data->op_bias |= MDS_DATA_MODIFIED;
94 * Closes the IO epoch and packs all the attributes into @op_data for
97 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
98 struct obd_client_handle *och)
102 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
103 ATTR_MTIME | ATTR_MTIME_SET |
104 ATTR_CTIME | ATTR_CTIME_SET;
106 if (!(och->och_flags & FMODE_WRITE))
109 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
110 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
112 ll_ioepoch_close(inode, op_data, &och, 0);
115 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
116 ll_prep_md_op_data(op_data, inode, NULL, NULL,
117 0, 0, LUSTRE_OPC_ANY, NULL);
121 static int ll_close_inode_openhandle(struct obd_export *md_exp,
123 struct obd_client_handle *och,
124 const __u64 *data_version)
126 struct obd_export *exp = ll_i2mdexp(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
129 struct obd_device *obd = class_exp2obd(exp);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle "LPX64"\n",
140 ll_i2mdexp(inode)->exp_handle.h_cookie);
144 OBD_ALLOC_PTR(op_data);
146 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
148 ll_prepare_close(inode, op_data, och);
149 if (data_version != NULL) {
150 /* Pass in data_version implies release. */
151 op_data->op_bias |= MDS_HSM_RELEASE;
152 op_data->op_data_version = *data_version;
153 op_data->op_lease_handle = och->och_lease_handle;
154 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
156 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
157 rc = md_close(md_exp, op_data, och->och_mod, &req);
159 /* This close must have the epoch closed. */
160 LASSERT(epoch_close);
161 /* MDS has instructed us to obtain Size-on-MDS attribute from
162 * OSTs and send setattr to back to MDS. */
163 rc = ll_som_update(inode, op_data);
165 CERROR("%s: inode "DFID" mdc Size-on-MDS update"
166 " failed: rc = %d\n",
167 ll_i2mdexp(inode)->exp_obd->obd_name,
168 PFID(ll_inode2fid(inode)), rc);
172 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
173 ll_i2mdexp(inode)->exp_obd->obd_name,
174 PFID(ll_inode2fid(inode)), rc);
177 /* DATA_MODIFIED flag was successfully sent on close, cancel data
178 * modification flag. */
179 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
180 struct ll_inode_info *lli = ll_i2info(inode);
182 spin_lock(&lli->lli_lock);
183 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
184 spin_unlock(&lli->lli_lock);
188 rc = ll_objects_destroy(req, inode);
190 CERROR("%s: inode "DFID
191 " ll_objects destroy: rc = %d\n",
192 ll_i2mdexp(inode)->exp_obd->obd_name,
193 PFID(ll_inode2fid(inode)), rc);
196 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
197 struct mdt_body *body;
198 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
199 if (!(body->valid & OBD_MD_FLRELEASED))
203 ll_finish_md_op_data(op_data);
207 if (exp_connect_som(exp) && !epoch_close &&
208 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
209 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
211 md_clear_open_replay_data(md_exp, och);
212 /* Free @och if it is not waiting for DONE_WRITING. */
213 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
216 if (req) /* This is close request */
217 ptlrpc_req_finished(req);
221 int ll_md_real_close(struct inode *inode, int flags)
223 struct ll_inode_info *lli = ll_i2info(inode);
224 struct obd_client_handle **och_p;
225 struct obd_client_handle *och;
230 if (flags & FMODE_WRITE) {
231 och_p = &lli->lli_mds_write_och;
232 och_usecount = &lli->lli_open_fd_write_count;
233 } else if (flags & FMODE_EXEC) {
234 och_p = &lli->lli_mds_exec_och;
235 och_usecount = &lli->lli_open_fd_exec_count;
237 LASSERT(flags & FMODE_READ);
238 och_p = &lli->lli_mds_read_och;
239 och_usecount = &lli->lli_open_fd_read_count;
242 mutex_lock(&lli->lli_och_mutex);
243 if (*och_usecount) { /* There are still users of this handle, so
245 mutex_unlock(&lli->lli_och_mutex);
250 mutex_unlock(&lli->lli_och_mutex);
252 if (och) { /* There might be a race and somebody have freed this och
254 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
261 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
264 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
265 struct ll_inode_info *lli = ll_i2info(inode);
269 /* clear group lock, if present */
270 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
271 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
273 if (fd->fd_lease_och != NULL) {
276 /* Usually the lease is not released when the
277 * application crashed, we need to release here. */
278 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
279 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
280 PFID(&lli->lli_fid), rc, lease_broken);
282 fd->fd_lease_och = NULL;
285 if (fd->fd_och != NULL) {
286 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
291 /* Let's see if we have good enough OPEN lock on the file and if
292 we can skip talking to MDS */
293 if (file->f_dentry->d_inode) { /* Can this ever be false? */
295 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
296 struct lustre_handle lockh;
297 struct inode *inode = file->f_dentry->d_inode;
298 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
300 mutex_lock(&lli->lli_och_mutex);
301 if (fd->fd_omode & FMODE_WRITE) {
303 LASSERT(lli->lli_open_fd_write_count);
304 lli->lli_open_fd_write_count--;
305 } else if (fd->fd_omode & FMODE_EXEC) {
307 LASSERT(lli->lli_open_fd_exec_count);
308 lli->lli_open_fd_exec_count--;
311 LASSERT(lli->lli_open_fd_read_count);
312 lli->lli_open_fd_read_count--;
314 mutex_unlock(&lli->lli_och_mutex);
316 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
317 LDLM_IBITS, &policy, lockmode,
319 rc = ll_md_real_close(file->f_dentry->d_inode,
323 CERROR("Releasing a file %p with negative dentry %p. Name %s",
324 file, file->f_dentry, file->f_dentry->d_name.name);
328 LUSTRE_FPRIVATE(file) = NULL;
329 ll_file_data_put(fd);
330 ll_capa_close(inode);
335 /* While this returns an error code, fput() the caller does not, so we need
336 * to make every effort to clean up all of our state here. Also, applications
337 * rarely check close errors and even if an error is returned they will not
338 * re-try the close call.
340 int ll_file_release(struct inode *inode, struct file *file)
342 struct ll_file_data *fd;
343 struct ll_sb_info *sbi = ll_i2sbi(inode);
344 struct ll_inode_info *lli = ll_i2info(inode);
348 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
349 PFID(ll_inode2fid(inode)), inode);
351 #ifdef CONFIG_FS_POSIX_ACL
352 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
353 inode == inode->i_sb->s_root->d_inode) {
354 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
357 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
358 fd->fd_flags &= ~LL_FILE_RMTACL;
359 rct_del(&sbi->ll_rct, current_pid());
360 et_search_free(&sbi->ll_et, current_pid());
365 if (inode->i_sb->s_root != file->f_dentry)
366 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
367 fd = LUSTRE_FPRIVATE(file);
370 /* The last ref on @file, maybe not the the owner pid of statahead.
371 * Different processes can open the same dir, "ll_opendir_key" means:
372 * it is me that should stop the statahead thread. */
373 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
374 lli->lli_opendir_pid != 0)
375 ll_stop_statahead(inode, lli->lli_opendir_key);
377 if (inode->i_sb->s_root == file->f_dentry) {
378 LUSTRE_FPRIVATE(file) = NULL;
379 ll_file_data_put(fd);
383 if (!S_ISDIR(inode->i_mode)) {
384 lov_read_and_clear_async_rc(lli->lli_clob);
385 lli->lli_async_rc = 0;
388 rc = ll_md_close(sbi->ll_md_exp, inode, file);
390 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
391 libcfs_debug_dumplog();
396 static int ll_intent_file_open(struct file *file, void *lmm,
397 int lmmsize, struct lookup_intent *itp)
399 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
400 struct dentry *parent = file->f_dentry->d_parent;
401 const char *name = file->f_dentry->d_name.name;
402 const int len = file->f_dentry->d_name.len;
403 struct md_op_data *op_data;
404 struct ptlrpc_request *req;
405 __u32 opc = LUSTRE_OPC_ANY;
412 /* Usually we come here only for NFSD, and we want open lock.
413 But we can also get here with pre 2.6.15 patchless kernels, and in
414 that case that lock is also ok */
415 /* We can also get here if there was cached open handle in revalidate_it
416 * but it disappeared while we were getting from there to ll_file_open.
417 * But this means this file was closed and immediatelly opened which
418 * makes a good candidate for using OPEN lock */
419 /* If lmmsize & lmm are not 0, we are just setting stripe info
420 * parameters. No need for the open lock */
421 if (lmm == NULL && lmmsize == 0) {
422 itp->it_flags |= MDS_OPEN_LOCK;
423 if (itp->it_flags & FMODE_WRITE)
424 opc = LUSTRE_OPC_CREATE;
427 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
428 file->f_dentry->d_inode, name, len,
431 RETURN(PTR_ERR(op_data));
433 itp->it_flags |= MDS_OPEN_BY_FID;
434 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
435 0 /*unused */, &req, ll_md_blocking_ast, 0);
436 ll_finish_md_op_data(op_data);
438 /* reason for keep own exit path - don`t flood log
439 * with messages with -ESTALE errors.
441 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
442 it_open_error(DISP_OPEN_OPEN, itp))
444 ll_release_openhandle(file->f_dentry, itp);
448 if (it_disposition(itp, DISP_LOOKUP_NEG))
449 GOTO(out, rc = -ENOENT);
451 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
452 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
453 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
457 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
458 if (!rc && itp->d.lustre.it_lock_mode)
459 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
463 ptlrpc_req_finished(req);
464 ll_intent_drop_lock(itp);
470 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
471 * not believe attributes if a few ioepoch holders exist. Attributes for
472 * previous ioepoch if new one is opened are also skipped by MDS.
474 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
476 if (ioepoch && lli->lli_ioepoch != ioepoch) {
477 lli->lli_ioepoch = ioepoch;
478 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
479 ioepoch, PFID(&lli->lli_fid));
483 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
484 struct obd_client_handle *och)
486 struct ptlrpc_request *req = it->d.lustre.it_data;
487 struct mdt_body *body;
489 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
490 och->och_fh = body->handle;
491 och->och_fid = body->fid1;
492 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
493 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
494 och->och_flags = it->it_flags;
496 return md_set_open_replay_data(md_exp, och, it);
499 int ll_local_open(struct file *file, struct lookup_intent *it,
500 struct ll_file_data *fd, struct obd_client_handle *och)
502 struct inode *inode = file->f_dentry->d_inode;
503 struct ll_inode_info *lli = ll_i2info(inode);
506 LASSERT(!LUSTRE_FPRIVATE(file));
511 struct ptlrpc_request *req = it->d.lustre.it_data;
512 struct mdt_body *body;
515 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
519 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
520 ll_ioepoch_open(lli, body->ioepoch);
523 LUSTRE_FPRIVATE(file) = fd;
524 ll_readahead_init(inode, &fd->fd_ras);
525 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
530 /* Open a file, and (for the very first open) create objects on the OSTs at
531 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
532 * creation or open until ll_lov_setstripe() ioctl is called.
534 * If we already have the stripe MD locally then we don't request it in
535 * md_open(), by passing a lmm_size = 0.
537 * It is up to the application to ensure no other processes open this file
538 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
539 * used. We might be able to avoid races of that sort by getting lli_open_sem
540 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
541 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
543 int ll_file_open(struct inode *inode, struct file *file)
545 struct ll_inode_info *lli = ll_i2info(inode);
546 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
547 .it_flags = file->f_flags };
548 struct obd_client_handle **och_p = NULL;
549 __u64 *och_usecount = NULL;
550 struct ll_file_data *fd;
551 int rc = 0, opendir_set = 0;
554 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
555 PFID(ll_inode2fid(inode)), inode, file->f_flags);
557 it = file->private_data; /* XXX: compat macro */
558 file->private_data = NULL; /* prevent ll_local_open assertion */
560 fd = ll_file_data_get();
562 GOTO(out_openerr, rc = -ENOMEM);
565 if (S_ISDIR(inode->i_mode)) {
566 spin_lock(&lli->lli_sa_lock);
567 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
568 lli->lli_opendir_pid == 0) {
569 lli->lli_opendir_key = fd;
570 lli->lli_opendir_pid = current_pid();
573 spin_unlock(&lli->lli_sa_lock);
576 if (inode->i_sb->s_root == file->f_dentry) {
577 LUSTRE_FPRIVATE(file) = fd;
581 if (!it || !it->d.lustre.it_disposition) {
582 /* Convert f_flags into access mode. We cannot use file->f_mode,
583 * because everything but O_ACCMODE mask was stripped from
585 if ((oit.it_flags + 1) & O_ACCMODE)
587 if (file->f_flags & O_TRUNC)
588 oit.it_flags |= FMODE_WRITE;
590 /* kernel only call f_op->open in dentry_open. filp_open calls
591 * dentry_open after call to open_namei that checks permissions.
592 * Only nfsd_open call dentry_open directly without checking
593 * permissions and because of that this code below is safe. */
594 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
595 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
597 /* We do not want O_EXCL here, presumably we opened the file
598 * already? XXX - NFS implications? */
599 oit.it_flags &= ~O_EXCL;
601 /* bug20584, if "it_flags" contains O_CREAT, the file will be
602 * created if necessary, then "IT_CREAT" should be set to keep
603 * consistent with it */
604 if (oit.it_flags & O_CREAT)
605 oit.it_op |= IT_CREAT;
611 /* Let's see if we have file open on MDS already. */
612 if (it->it_flags & FMODE_WRITE) {
613 och_p = &lli->lli_mds_write_och;
614 och_usecount = &lli->lli_open_fd_write_count;
615 } else if (it->it_flags & FMODE_EXEC) {
616 och_p = &lli->lli_mds_exec_och;
617 och_usecount = &lli->lli_open_fd_exec_count;
619 och_p = &lli->lli_mds_read_och;
620 och_usecount = &lli->lli_open_fd_read_count;
623 mutex_lock(&lli->lli_och_mutex);
624 if (*och_p) { /* Open handle is present */
625 if (it_disposition(it, DISP_OPEN_OPEN)) {
626 /* Well, there's extra open request that we do not need,
627 let's close it somehow. This will decref request. */
628 rc = it_open_error(DISP_OPEN_OPEN, it);
630 mutex_unlock(&lli->lli_och_mutex);
631 GOTO(out_openerr, rc);
634 ll_release_openhandle(file->f_dentry, it);
638 rc = ll_local_open(file, it, fd, NULL);
641 mutex_unlock(&lli->lli_och_mutex);
642 GOTO(out_openerr, rc);
645 LASSERT(*och_usecount == 0);
646 if (!it->d.lustre.it_disposition) {
647 /* We cannot just request lock handle now, new ELC code
648 means that one of other OPEN locks for this file
649 could be cancelled, and since blocking ast handler
650 would attempt to grab och_mutex as well, that would
651 result in a deadlock */
652 mutex_unlock(&lli->lli_och_mutex);
653 it->it_create_mode |= M_CHECK_STALE;
654 rc = ll_intent_file_open(file, NULL, 0, it);
655 it->it_create_mode &= ~M_CHECK_STALE;
657 GOTO(out_openerr, rc);
661 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
663 GOTO(out_och_free, rc = -ENOMEM);
667 /* md_intent_lock() didn't get a request ref if there was an
668 * open error, so don't do cleanup on the request here
670 /* XXX (green): Should not we bail out on any error here, not
671 * just open error? */
672 rc = it_open_error(DISP_OPEN_OPEN, it);
674 GOTO(out_och_free, rc);
676 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
677 "inode %p: disposition %x, status %d\n", inode,
678 it_disposition(it, ~0), it->d.lustre.it_status);
680 rc = ll_local_open(file, it, fd, *och_p);
682 GOTO(out_och_free, rc);
684 mutex_unlock(&lli->lli_och_mutex);
687 /* Must do this outside lli_och_mutex lock to prevent deadlock where
688 different kind of OPEN lock for this same inode gets cancelled
689 by ldlm_cancel_lru */
690 if (!S_ISREG(inode->i_mode))
691 GOTO(out_och_free, rc);
695 if (!lli->lli_has_smd) {
696 if (file->f_flags & O_LOV_DELAY_CREATE ||
697 !(file->f_mode & FMODE_WRITE)) {
698 CDEBUG(D_INODE, "object creation was delayed\n");
699 GOTO(out_och_free, rc);
702 file->f_flags &= ~O_LOV_DELAY_CREATE;
703 GOTO(out_och_free, rc);
707 if (och_p && *och_p) {
708 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
709 *och_p = NULL; /* OBD_FREE writes some magic there */
712 mutex_unlock(&lli->lli_och_mutex);
715 if (opendir_set != 0)
716 ll_stop_statahead(inode, lli->lli_opendir_key);
718 ll_file_data_put(fd);
720 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
723 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
724 ptlrpc_req_finished(it->d.lustre.it_data);
725 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
731 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
732 struct ldlm_lock_desc *desc, void *data, int flag)
735 struct lustre_handle lockh;
739 case LDLM_CB_BLOCKING:
740 ldlm_lock2handle(lock, &lockh);
741 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
743 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
747 case LDLM_CB_CANCELING:
755 * Acquire a lease and open the file.
757 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
758 fmode_t fmode, __u64 open_flags)
760 struct lookup_intent it = { .it_op = IT_OPEN };
761 struct ll_sb_info *sbi = ll_i2sbi(inode);
762 struct md_op_data *op_data;
763 struct ptlrpc_request *req;
764 struct lustre_handle old_handle = { 0 };
765 struct obd_client_handle *och = NULL;
770 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
771 RETURN(ERR_PTR(-EINVAL));
774 struct ll_inode_info *lli = ll_i2info(inode);
775 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
776 struct obd_client_handle **och_p;
779 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
780 RETURN(ERR_PTR(-EPERM));
782 /* Get the openhandle of the file */
784 mutex_lock(&lli->lli_och_mutex);
785 if (fd->fd_lease_och != NULL) {
786 mutex_unlock(&lli->lli_och_mutex);
790 if (fd->fd_och == NULL) {
791 if (file->f_mode & FMODE_WRITE) {
792 LASSERT(lli->lli_mds_write_och != NULL);
793 och_p = &lli->lli_mds_write_och;
794 och_usecount = &lli->lli_open_fd_write_count;
796 LASSERT(lli->lli_mds_read_och != NULL);
797 och_p = &lli->lli_mds_read_och;
798 och_usecount = &lli->lli_open_fd_read_count;
800 if (*och_usecount == 1) {
807 mutex_unlock(&lli->lli_och_mutex);
808 if (rc < 0) /* more than 1 opener */
811 LASSERT(fd->fd_och != NULL);
812 old_handle = fd->fd_och->och_fh;
817 RETURN(ERR_PTR(-ENOMEM));
819 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
820 LUSTRE_OPC_ANY, NULL);
822 GOTO(out, rc = PTR_ERR(op_data));
824 /* To tell the MDT this openhandle is from the same owner */
825 op_data->op_handle = old_handle;
827 it.it_flags = fmode | open_flags;
828 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
829 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
830 ll_md_blocking_lease_ast,
831 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
832 * it can be cancelled which may mislead applications that the lease is
834 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
835 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
836 * doesn't deal with openhandle, so normal openhandle will be leaked. */
837 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
838 ll_finish_md_op_data(op_data);
839 ptlrpc_req_finished(req);
841 GOTO(out_release_it, rc);
843 if (it_disposition(&it, DISP_LOOKUP_NEG))
844 GOTO(out_release_it, rc = -ENOENT);
846 rc = it_open_error(DISP_OPEN_OPEN, &it);
848 GOTO(out_release_it, rc);
850 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
851 ll_och_fill(sbi->ll_md_exp, &it, och);
853 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
854 GOTO(out_close, rc = -EOPNOTSUPP);
856 /* already get lease, handle lease lock */
857 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
858 if (it.d.lustre.it_lock_mode == 0 ||
859 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
860 /* open lock must return for lease */
861 CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
862 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
863 it.d.lustre.it_lock_bits);
864 GOTO(out_close, rc = -EPROTO);
867 ll_intent_release(&it);
871 /* Cancel open lock */
872 if (it.d.lustre.it_lock_mode != 0) {
873 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
874 it.d.lustre.it_lock_mode);
875 it.d.lustre.it_lock_mode = 0;
876 och->och_lease_handle.cookie = 0ULL;
878 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
880 CERROR("%s: error closing file "DFID": %d\n",
881 ll_get_fsname(inode->i_sb, NULL, 0),
882 PFID(&ll_i2info(inode)->lli_fid), rc2);
883 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
885 ll_intent_release(&it);
891 EXPORT_SYMBOL(ll_lease_open);
894 * Release lease and close the file.
895 * It will check if the lease has ever broken.
897 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
900 struct ldlm_lock *lock;
901 bool cancelled = true;
905 lock = ldlm_handle2lock(&och->och_lease_handle);
907 lock_res_and_lock(lock);
908 cancelled = ldlm_is_cancel(lock);
909 unlock_res_and_lock(lock);
913 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
914 PFID(&ll_i2info(inode)->lli_fid), cancelled);
917 ldlm_cli_cancel(&och->och_lease_handle, 0);
918 if (lease_broken != NULL)
919 *lease_broken = cancelled;
921 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
925 EXPORT_SYMBOL(ll_lease_close);
927 /* Fills the obdo with the attributes for the lsm */
928 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
929 struct obd_capa *capa, struct obdo *obdo,
930 __u64 ioepoch, int dv_flags)
932 struct ptlrpc_request_set *set;
933 struct obd_info oinfo = { { { 0 } } };
938 LASSERT(lsm != NULL);
942 oinfo.oi_oa->o_oi = lsm->lsm_oi;
943 oinfo.oi_oa->o_mode = S_IFREG;
944 oinfo.oi_oa->o_ioepoch = ioepoch;
945 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
946 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
947 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
948 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
949 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
950 OBD_MD_FLDATAVERSION;
951 oinfo.oi_capa = capa;
952 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
953 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
954 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
955 if (dv_flags & LL_DV_WR_FLUSH)
956 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
959 set = ptlrpc_prep_set();
961 CERROR("can't allocate ptlrpc set\n");
964 rc = obd_getattr_async(exp, &oinfo, set);
966 rc = ptlrpc_set_wait(set);
967 ptlrpc_set_destroy(set);
970 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
971 OBD_MD_FLATIME | OBD_MD_FLMTIME |
972 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
973 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
974 if (dv_flags & LL_DV_WR_FLUSH &&
975 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
976 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
983 * Performs the getattr on the inode and updates its fields.
984 * If @sync != 0, perform the getattr under the server-side lock.
986 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
987 __u64 ioepoch, int sync)
989 struct obd_capa *capa = ll_mdscapa_get(inode);
990 struct lov_stripe_md *lsm;
994 lsm = ccc_inode_lsm_get(inode);
995 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
996 capa, obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
999 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
1001 obdo_refresh_inode(inode, obdo, obdo->o_valid);
1002 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
1003 " blksize %lu\n", POSTID(oi), i_size_read(inode),
1004 (unsigned long long)inode->i_blocks,
1005 (unsigned long)ll_inode_blksize(inode));
1007 ccc_inode_lsm_put(inode, lsm);
1011 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
1013 struct ll_inode_info *lli = ll_i2info(inode);
1014 struct cl_object *obj = lli->lli_clob;
1015 struct cl_attr *attr = ccc_env_thread_attr(env);
1021 ll_inode_size_lock(inode);
1022 /* merge timestamps the most recently obtained from mds with
1023 timestamps obtained from osts */
1024 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1025 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1026 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1027 inode_init_lvb(inode, &lvb);
1029 cl_object_attr_lock(obj);
1030 rc = cl_object_attr_get(env, obj, attr);
1031 cl_object_attr_unlock(obj);
1034 if (lvb.lvb_atime < attr->cat_atime)
1035 lvb.lvb_atime = attr->cat_atime;
1036 if (lvb.lvb_ctime < attr->cat_ctime)
1037 lvb.lvb_ctime = attr->cat_ctime;
1038 if (lvb.lvb_mtime < attr->cat_mtime)
1039 lvb.lvb_mtime = attr->cat_mtime;
1041 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1042 PFID(&lli->lli_fid), attr->cat_size);
1043 cl_isize_write_nolock(inode, attr->cat_size);
1045 inode->i_blocks = attr->cat_blocks;
1047 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1048 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1049 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1051 ll_inode_size_unlock(inode);
1056 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1059 struct obdo obdo = { 0 };
1062 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1064 st->st_size = obdo.o_size;
1065 st->st_blocks = obdo.o_blocks;
1066 st->st_mtime = obdo.o_mtime;
1067 st->st_atime = obdo.o_atime;
1068 st->st_ctime = obdo.o_ctime;
1073 static bool file_is_noatime(const struct file *file)
1075 const struct vfsmount *mnt = file->f_path.mnt;
1076 const struct inode *inode = file->f_path.dentry->d_inode;
1078 /* Adapted from file_accessed() and touch_atime().*/
1079 if (file->f_flags & O_NOATIME)
1082 if (inode->i_flags & S_NOATIME)
1085 if (IS_NOATIME(inode))
1088 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1091 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1094 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1100 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1102 struct inode *inode = file->f_dentry->d_inode;
1104 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1106 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1107 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1108 file->f_flags & O_DIRECT ||
1111 io->ci_obj = ll_i2info(inode)->lli_clob;
1112 io->ci_lockreq = CILR_MAYBE;
1113 if (ll_file_nolock(file)) {
1114 io->ci_lockreq = CILR_NEVER;
1115 io->ci_no_srvlock = 1;
1116 } else if (file->f_flags & O_APPEND) {
1117 io->ci_lockreq = CILR_MANDATORY;
1120 io->ci_noatime = file_is_noatime(file);
1124 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1125 struct file *file, enum cl_io_type iot,
1126 loff_t *ppos, size_t count)
1128 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1129 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1134 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zd\n",
1135 file->f_dentry->d_name.name, iot, *ppos, count);
1138 io = ccc_env_thread_io(env);
1139 ll_io_init(io, file, iot == CIT_WRITE);
1141 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1142 struct vvp_io *vio = vvp_env_io(env);
1143 struct ccc_io *cio = ccc_env_io(env);
1144 int write_mutex_locked = 0;
1146 cio->cui_fd = LUSTRE_FPRIVATE(file);
1147 vio->cui_io_subtype = args->via_io_subtype;
1149 switch (vio->cui_io_subtype) {
1151 cio->cui_iov = args->u.normal.via_iov;
1152 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1153 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1154 cio->cui_iocb = args->u.normal.via_iocb;
1155 if ((iot == CIT_WRITE) &&
1156 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1157 if (mutex_lock_interruptible(&lli->
1159 GOTO(out, result = -ERESTARTSYS);
1160 write_mutex_locked = 1;
1162 down_read(&lli->lli_trunc_sem);
1165 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1166 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1169 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1170 vio->u.splice.cui_flags = args->u.splice.via_flags;
1173 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1176 result = cl_io_loop(env, io);
1177 if (args->via_io_subtype == IO_NORMAL)
1178 up_read(&lli->lli_trunc_sem);
1179 if (write_mutex_locked)
1180 mutex_unlock(&lli->lli_write_mutex);
1182 /* cl_io_rw_init() handled IO */
1183 result = io->ci_result;
1186 if (io->ci_nob > 0) {
1187 result = io->ci_nob;
1188 *ppos = io->u.ci_wr.wr.crw_pos;
1192 cl_io_fini(env, io);
1193 /* If any bit been read/written (result != 0), we just return
1194 * short read/write instead of restart io. */
1195 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1196 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1197 iot == CIT_READ ? "read" : "write",
1198 file->f_dentry->d_name.name, *ppos, count);
1199 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1203 if (iot == CIT_READ) {
1205 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1206 LPROC_LL_READ_BYTES, result);
1207 } else if (iot == CIT_WRITE) {
1209 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1210 LPROC_LL_WRITE_BYTES, result);
1211 fd->fd_write_failed = false;
1212 } else if (result != -ERESTARTSYS) {
1213 fd->fd_write_failed = true;
1216 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1223 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1225 static int ll_file_get_iov_count(const struct iovec *iov,
1226 unsigned long *nr_segs, size_t *count)
1231 for (seg = 0; seg < *nr_segs; seg++) {
1232 const struct iovec *iv = &iov[seg];
1235 * If any segment has a negative length, or the cumulative
1236 * length ever wraps negative then return -EINVAL.
1239 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1241 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1246 cnt -= iv->iov_len; /* This segment is no good */
1253 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1254 unsigned long nr_segs, loff_t pos)
1257 struct vvp_io_args *args;
1263 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1267 env = cl_env_get(&refcheck);
1269 RETURN(PTR_ERR(env));
1271 args = vvp_env_args(env, IO_NORMAL);
1272 args->u.normal.via_iov = (struct iovec *)iov;
1273 args->u.normal.via_nrsegs = nr_segs;
1274 args->u.normal.via_iocb = iocb;
1276 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1277 &iocb->ki_pos, count);
1278 cl_env_put(env, &refcheck);
1282 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1286 struct iovec *local_iov;
1287 struct kiocb *kiocb;
1292 env = cl_env_get(&refcheck);
1294 RETURN(PTR_ERR(env));
1296 local_iov = &vvp_env_info(env)->vti_local_iov;
1297 kiocb = &vvp_env_info(env)->vti_kiocb;
1298 local_iov->iov_base = (void __user *)buf;
1299 local_iov->iov_len = count;
1300 init_sync_kiocb(kiocb, file);
1301 kiocb->ki_pos = *ppos;
1302 kiocb->ki_left = count;
1304 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1305 *ppos = kiocb->ki_pos;
1307 cl_env_put(env, &refcheck);
1312 * Write to a file (through the page cache).
1315 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1316 unsigned long nr_segs, loff_t pos)
1319 struct vvp_io_args *args;
1325 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1329 env = cl_env_get(&refcheck);
1331 RETURN(PTR_ERR(env));
1333 args = vvp_env_args(env, IO_NORMAL);
1334 args->u.normal.via_iov = (struct iovec *)iov;
1335 args->u.normal.via_nrsegs = nr_segs;
1336 args->u.normal.via_iocb = iocb;
1338 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1339 &iocb->ki_pos, count);
1340 cl_env_put(env, &refcheck);
1344 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1348 struct iovec *local_iov;
1349 struct kiocb *kiocb;
1354 env = cl_env_get(&refcheck);
1356 RETURN(PTR_ERR(env));
1358 local_iov = &vvp_env_info(env)->vti_local_iov;
1359 kiocb = &vvp_env_info(env)->vti_kiocb;
1360 local_iov->iov_base = (void __user *)buf;
1361 local_iov->iov_len = count;
1362 init_sync_kiocb(kiocb, file);
1363 kiocb->ki_pos = *ppos;
1364 kiocb->ki_left = count;
1366 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1367 *ppos = kiocb->ki_pos;
1369 cl_env_put(env, &refcheck);
1374 * Send file content (through pagecache) somewhere with helper
1376 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1377 struct pipe_inode_info *pipe, size_t count,
1381 struct vvp_io_args *args;
1386 env = cl_env_get(&refcheck);
1388 RETURN(PTR_ERR(env));
1390 args = vvp_env_args(env, IO_SPLICE);
1391 args->u.splice.via_pipe = pipe;
1392 args->u.splice.via_flags = flags;
1394 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1395 cl_env_put(env, &refcheck);
1399 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1402 struct obd_export *exp = ll_i2dtexp(inode);
1403 struct obd_trans_info oti = { 0 };
1404 struct obdo *oa = NULL;
1407 struct lov_stripe_md *lsm = NULL, *lsm2;
1414 lsm = ccc_inode_lsm_get(inode);
1415 if (!lsm_has_objects(lsm))
1416 GOTO(out, rc = -ENOENT);
1418 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1419 (lsm->lsm_stripe_count));
1421 OBD_ALLOC_LARGE(lsm2, lsm_size);
1423 GOTO(out, rc = -ENOMEM);
1426 oa->o_nlink = ost_idx;
1427 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1428 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1429 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1430 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1431 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1432 memcpy(lsm2, lsm, lsm_size);
1433 ll_inode_size_lock(inode);
1434 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1435 ll_inode_size_unlock(inode);
1437 OBD_FREE_LARGE(lsm2, lsm_size);
1440 ccc_inode_lsm_put(inode, lsm);
1445 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1447 struct ll_recreate_obj ucreat;
1451 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1454 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1458 ostid_set_seq_mdt0(&oi);
1459 ostid_set_id(&oi, ucreat.lrc_id);
1460 RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
1463 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1470 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1473 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1476 fid_to_ostid(&fid, &oi);
1477 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1478 RETURN(ll_lov_recreate(inode, &oi, ost_idx));
1481 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1482 __u64 flags, struct lov_user_md *lum,
1485 struct lov_stripe_md *lsm = NULL;
1486 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1490 lsm = ccc_inode_lsm_get(inode);
1492 ccc_inode_lsm_put(inode, lsm);
1493 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1494 PFID(ll_inode2fid(inode)));
1498 ll_inode_size_lock(inode);
1499 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1502 rc = oit.d.lustre.it_status;
1504 GOTO(out_req_free, rc);
1506 ll_release_openhandle(file->f_dentry, &oit);
1509 ll_inode_size_unlock(inode);
1510 ll_intent_release(&oit);
1511 ccc_inode_lsm_put(inode, lsm);
1514 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1518 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1519 struct lov_mds_md **lmmp, int *lmm_size,
1520 struct ptlrpc_request **request)
1522 struct ll_sb_info *sbi = ll_i2sbi(inode);
1523 struct mdt_body *body;
1524 struct lov_mds_md *lmm = NULL;
1525 struct ptlrpc_request *req = NULL;
1526 struct md_op_data *op_data;
1529 rc = ll_get_max_mdsize(sbi, &lmmsize);
1533 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1534 strlen(filename), lmmsize,
1535 LUSTRE_OPC_ANY, NULL);
1536 if (IS_ERR(op_data))
1537 RETURN(PTR_ERR(op_data));
1539 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1540 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1541 ll_finish_md_op_data(op_data);
1543 CDEBUG(D_INFO, "md_getattr_name failed "
1544 "on %s: rc %d\n", filename, rc);
1548 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1549 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1551 lmmsize = body->eadatasize;
1553 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1555 GOTO(out, rc = -ENODATA);
1558 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1559 LASSERT(lmm != NULL);
1561 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1562 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1563 GOTO(out, rc = -EPROTO);
1567 * This is coming from the MDS, so is probably in
1568 * little endian. We convert it to host endian before
1569 * passing it to userspace.
1571 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1574 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1575 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1578 /* if function called for directory - we should
1579 * avoid swab not existent lsm objects */
1580 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1581 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1582 if (S_ISREG(body->mode))
1583 lustre_swab_lov_user_md_objects(
1584 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1586 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1587 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1588 if (S_ISREG(body->mode))
1589 lustre_swab_lov_user_md_objects(
1590 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1597 *lmm_size = lmmsize;
1602 static int ll_lov_setea(struct inode *inode, struct file *file,
1605 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1606 struct lov_user_md *lump;
1607 int lum_size = sizeof(struct lov_user_md) +
1608 sizeof(struct lov_user_ost_data);
1612 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1615 OBD_ALLOC_LARGE(lump, lum_size);
1619 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1620 OBD_FREE_LARGE(lump, lum_size);
1624 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1626 OBD_FREE_LARGE(lump, lum_size);
1630 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1633 struct lov_user_md_v3 lumv3;
1634 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1635 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1636 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1638 __u64 flags = FMODE_WRITE;
1641 /* first try with v1 which is smaller than v3 */
1642 lum_size = sizeof(struct lov_user_md_v1);
1643 if (copy_from_user(lumv1, lumv1p, lum_size))
1646 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1647 lum_size = sizeof(struct lov_user_md_v3);
1648 if (copy_from_user(&lumv3, lumv3p, lum_size))
1652 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1654 struct lov_stripe_md *lsm;
1657 put_user(0, &lumv1p->lmm_stripe_count);
1659 ll_layout_refresh(inode, &gen);
1660 lsm = ccc_inode_lsm_get(inode);
1661 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1662 0, lsm, (void *)arg);
1663 ccc_inode_lsm_put(inode, lsm);
1668 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1670 struct lov_stripe_md *lsm;
1674 lsm = ccc_inode_lsm_get(inode);
1676 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1678 ccc_inode_lsm_put(inode, lsm);
1682 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1684 struct ll_inode_info *lli = ll_i2info(inode);
1685 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1686 struct ccc_grouplock grouplock;
1690 if (ll_file_nolock(file))
1691 RETURN(-EOPNOTSUPP);
1693 spin_lock(&lli->lli_lock);
1694 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1695 CWARN("group lock already existed with gid %lu\n",
1696 fd->fd_grouplock.cg_gid);
1697 spin_unlock(&lli->lli_lock);
1700 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1701 spin_unlock(&lli->lli_lock);
1703 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1704 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1708 spin_lock(&lli->lli_lock);
1709 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1710 spin_unlock(&lli->lli_lock);
1711 CERROR("another thread just won the race\n");
1712 cl_put_grouplock(&grouplock);
1716 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1717 fd->fd_grouplock = grouplock;
1718 spin_unlock(&lli->lli_lock);
1720 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1724 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1726 struct ll_inode_info *lli = ll_i2info(inode);
1727 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1728 struct ccc_grouplock grouplock;
1731 spin_lock(&lli->lli_lock);
1732 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1733 spin_unlock(&lli->lli_lock);
1734 CWARN("no group lock held\n");
1737 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1739 if (fd->fd_grouplock.cg_gid != arg) {
1740 CWARN("group lock %lu doesn't match current id %lu\n",
1741 arg, fd->fd_grouplock.cg_gid);
1742 spin_unlock(&lli->lli_lock);
1746 grouplock = fd->fd_grouplock;
1747 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1748 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1749 spin_unlock(&lli->lli_lock);
1751 cl_put_grouplock(&grouplock);
1752 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1757 * Close inode open handle
1759 * \param dentry [in] dentry which contains the inode
1760 * \param it [in,out] intent which contains open info and result
1763 * \retval <0 failure
1765 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1767 struct inode *inode = dentry->d_inode;
1768 struct obd_client_handle *och;
1774 /* Root ? Do nothing. */
1775 if (dentry->d_inode->i_sb->s_root == dentry)
1778 /* No open handle to close? Move away */
1779 if (!it_disposition(it, DISP_OPEN_OPEN))
1782 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1784 OBD_ALLOC(och, sizeof(*och));
1786 GOTO(out, rc = -ENOMEM);
1788 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1790 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1793 /* this one is in place of ll_file_open */
1794 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1795 ptlrpc_req_finished(it->d.lustre.it_data);
1796 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1802 * Get size for inode for which FIEMAP mapping is requested.
1803 * Make the FIEMAP get_info call and returns the result.
1805 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1808 struct obd_export *exp = ll_i2dtexp(inode);
1809 struct lov_stripe_md *lsm = NULL;
1810 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1811 int vallen = num_bytes;
1815 /* Checks for fiemap flags */
1816 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1817 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1821 /* Check for FIEMAP_FLAG_SYNC */
1822 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1823 rc = filemap_fdatawrite(inode->i_mapping);
1828 lsm = ccc_inode_lsm_get(inode);
1832 /* If the stripe_count > 1 and the application does not understand
1833 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1835 if (lsm->lsm_stripe_count > 1 &&
1836 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1837 GOTO(out, rc = -EOPNOTSUPP);
1839 fm_key.oa.o_oi = lsm->lsm_oi;
1840 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1842 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1843 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1844 /* If filesize is 0, then there would be no objects for mapping */
1845 if (fm_key.oa.o_size == 0) {
1846 fiemap->fm_mapped_extents = 0;
1850 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1852 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1855 CERROR("obd_get_info failed: rc = %d\n", rc);
1858 ccc_inode_lsm_put(inode, lsm);
1862 int ll_fid2path(struct inode *inode, void *arg)
1864 struct obd_export *exp = ll_i2mdexp(inode);
1865 struct getinfo_fid2path *gfout, *gfin;
1869 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1870 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1873 /* Need to get the buflen */
1874 OBD_ALLOC_PTR(gfin);
1877 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1882 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1883 OBD_ALLOC(gfout, outsize);
1884 if (gfout == NULL) {
1888 memcpy(gfout, gfin, sizeof(*gfout));
1891 /* Call mdc_iocontrol */
1892 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1896 if (copy_to_user(arg, gfout, outsize))
1900 OBD_FREE(gfout, outsize);
1904 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1906 struct ll_user_fiemap *fiemap_s;
1907 size_t num_bytes, ret_bytes;
1908 unsigned int extent_count;
1911 /* Get the extent count so we can calculate the size of
1912 * required fiemap buffer */
1913 if (get_user(extent_count,
1914 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1916 num_bytes = sizeof(*fiemap_s) + (extent_count *
1917 sizeof(struct ll_fiemap_extent));
1919 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1920 if (fiemap_s == NULL)
1923 /* get the fiemap value */
1924 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1926 GOTO(error, rc = -EFAULT);
1928 /* If fm_extent_count is non-zero, read the first extent since
1929 * it is used to calculate end_offset and device from previous
1932 if (copy_from_user(&fiemap_s->fm_extents[0],
1933 (char __user *)arg + sizeof(*fiemap_s),
1934 sizeof(struct ll_fiemap_extent)))
1935 GOTO(error, rc = -EFAULT);
1938 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1942 ret_bytes = sizeof(struct ll_user_fiemap);
1944 if (extent_count != 0)
1945 ret_bytes += (fiemap_s->fm_mapped_extents *
1946 sizeof(struct ll_fiemap_extent));
1948 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1952 OBD_FREE_LARGE(fiemap_s, num_bytes);
1957 * Read the data_version for inode.
1959 * This value is computed using stripe object version on OST.
1960 * Version is computed using server side locking.
1962 * @param sync if do sync on the OST side;
1964 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1965 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1967 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1969 struct lov_stripe_md *lsm = NULL;
1970 struct ll_sb_info *sbi = ll_i2sbi(inode);
1971 struct obdo *obdo = NULL;
1975 /* If no stripe, we consider version is 0. */
1976 lsm = ccc_inode_lsm_get(inode);
1977 if (!lsm_has_objects(lsm)) {
1979 CDEBUG(D_INODE, "No object for inode\n");
1983 OBD_ALLOC_PTR(obdo);
1985 GOTO(out, rc = -ENOMEM);
1987 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, flags);
1989 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1992 *data_version = obdo->o_data_version;
1998 ccc_inode_lsm_put(inode, lsm);
2003 * Trigger a HSM release request for the provided inode.
2005 int ll_hsm_release(struct inode *inode)
2007 struct cl_env_nest nest;
2009 struct obd_client_handle *och = NULL;
2010 __u64 data_version = 0;
2014 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
2015 ll_get_fsname(inode->i_sb, NULL, 0),
2016 PFID(&ll_i2info(inode)->lli_fid));
2018 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
2020 GOTO(out, rc = PTR_ERR(och));
2022 /* Grab latest data_version and [am]time values */
2023 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
2027 env = cl_env_nested_get(&nest);
2029 GOTO(out, rc = PTR_ERR(env));
2031 ll_merge_lvb(env, inode);
2032 cl_env_nested_put(&nest, env);
2034 /* Release the file.
2035 * NB: lease lock handle is released in mdc_hsm_release_pack() because
2036 * we still need it to pack l_remote_handle to MDT. */
2037 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
2043 if (och != NULL && !IS_ERR(och)) /* close the file */
2044 ll_lease_close(och, inode, NULL);
2049 struct ll_swap_stack {
2050 struct iattr ia1, ia2;
2052 struct inode *inode1, *inode2;
2053 bool check_dv1, check_dv2;
2056 static int ll_swap_layouts(struct file *file1, struct file *file2,
2057 struct lustre_swap_layouts *lsl)
2059 struct mdc_swap_layouts msl;
2060 struct md_op_data *op_data;
2063 struct ll_swap_stack *llss = NULL;
2066 OBD_ALLOC_PTR(llss);
2070 llss->inode1 = file1->f_dentry->d_inode;
2071 llss->inode2 = file2->f_dentry->d_inode;
2073 if (!S_ISREG(llss->inode2->i_mode))
2074 GOTO(free, rc = -EINVAL);
2076 if (inode_permission(llss->inode1, MAY_WRITE) ||
2077 inode_permission(llss->inode2, MAY_WRITE))
2078 GOTO(free, rc = -EPERM);
2080 if (llss->inode2->i_sb != llss->inode1->i_sb)
2081 GOTO(free, rc = -EXDEV);
2083 /* we use 2 bool because it is easier to swap than 2 bits */
2084 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2085 llss->check_dv1 = true;
2087 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2088 llss->check_dv2 = true;
2090 /* we cannot use lsl->sl_dvX directly because we may swap them */
2091 llss->dv1 = lsl->sl_dv1;
2092 llss->dv2 = lsl->sl_dv2;
2094 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2095 if (rc == 0) /* same file, done! */
2098 if (rc < 0) { /* sequentialize it */
2099 swap(llss->inode1, llss->inode2);
2101 swap(llss->dv1, llss->dv2);
2102 swap(llss->check_dv1, llss->check_dv2);
2106 if (gid != 0) { /* application asks to flush dirty cache */
2107 rc = ll_get_grouplock(llss->inode1, file1, gid);
2111 rc = ll_get_grouplock(llss->inode2, file2, gid);
2113 ll_put_grouplock(llss->inode1, file1, gid);
2118 /* to be able to restore mtime and atime after swap
2119 * we need to first save them */
2121 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2122 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2123 llss->ia1.ia_atime = llss->inode1->i_atime;
2124 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2125 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2126 llss->ia2.ia_atime = llss->inode2->i_atime;
2127 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2130 /* ultimate check, before swaping the layouts we check if
2131 * dataversion has changed (if requested) */
2132 if (llss->check_dv1) {
2133 rc = ll_data_version(llss->inode1, &dv, 0);
2136 if (dv != llss->dv1)
2137 GOTO(putgl, rc = -EAGAIN);
2140 if (llss->check_dv2) {
2141 rc = ll_data_version(llss->inode2, &dv, 0);
2144 if (dv != llss->dv2)
2145 GOTO(putgl, rc = -EAGAIN);
2148 /* struct md_op_data is used to send the swap args to the mdt
2149 * only flags is missing, so we use struct mdc_swap_layouts
2150 * through the md_op_data->op_data */
2151 /* flags from user space have to be converted before they are send to
2152 * server, no flag is sent today, they are only used on the client */
2155 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2156 0, LUSTRE_OPC_ANY, &msl);
2157 if (IS_ERR(op_data))
2158 GOTO(free, rc = PTR_ERR(op_data));
2160 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2161 sizeof(*op_data), op_data, NULL);
2162 ll_finish_md_op_data(op_data);
2166 ll_put_grouplock(llss->inode2, file2, gid);
2167 ll_put_grouplock(llss->inode1, file1, gid);
2170 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2174 /* clear useless flags */
2175 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2176 llss->ia1.ia_valid &= ~ATTR_MTIME;
2177 llss->ia2.ia_valid &= ~ATTR_MTIME;
2180 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2181 llss->ia1.ia_valid &= ~ATTR_ATIME;
2182 llss->ia2.ia_valid &= ~ATTR_ATIME;
2185 /* update time if requested */
2187 if (llss->ia2.ia_valid != 0) {
2188 mutex_lock(&llss->inode1->i_mutex);
2189 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2190 mutex_unlock(&llss->inode1->i_mutex);
2193 if (llss->ia1.ia_valid != 0) {
2196 mutex_lock(&llss->inode2->i_mutex);
2197 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2198 mutex_unlock(&llss->inode2->i_mutex);
2210 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2212 struct md_op_data *op_data;
2215 /* Non-root users are forbidden to set or clear flags which are
2216 * NOT defined in HSM_USER_MASK. */
2217 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2218 !cfs_capable(CFS_CAP_SYS_ADMIN))
2221 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2222 LUSTRE_OPC_ANY, hss);
2223 if (IS_ERR(op_data))
2224 RETURN(PTR_ERR(op_data));
2226 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2227 sizeof(*op_data), op_data, NULL);
2229 ll_finish_md_op_data(op_data);
2234 static int ll_hsm_import(struct inode *inode, struct file *file,
2235 struct hsm_user_import *hui)
2237 struct hsm_state_set *hss = NULL;
2238 struct iattr *attr = NULL;
2242 if (!S_ISREG(inode->i_mode))
2248 GOTO(out, rc = -ENOMEM);
2250 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2251 hss->hss_archive_id = hui->hui_archive_id;
2252 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2253 rc = ll_hsm_state_set(inode, hss);
2257 OBD_ALLOC_PTR(attr);
2259 GOTO(out, rc = -ENOMEM);
2261 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2262 attr->ia_mode |= S_IFREG;
2263 attr->ia_uid = hui->hui_uid;
2264 attr->ia_gid = hui->hui_gid;
2265 attr->ia_size = hui->hui_size;
2266 attr->ia_mtime.tv_sec = hui->hui_mtime;
2267 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2268 attr->ia_atime.tv_sec = hui->hui_atime;
2269 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2271 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2272 ATTR_UID | ATTR_GID |
2273 ATTR_MTIME | ATTR_MTIME_SET |
2274 ATTR_ATIME | ATTR_ATIME_SET;
2276 rc = ll_setattr_raw(file->f_dentry, attr, true);
2290 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2292 struct inode *inode = file->f_dentry->d_inode;
2293 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2297 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2298 PFID(ll_inode2fid(inode)), inode, cmd);
2299 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2301 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2302 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2306 case LL_IOC_GETFLAGS:
2307 /* Get the current value of the file flags */
2308 return put_user(fd->fd_flags, (int *)arg);
2309 case LL_IOC_SETFLAGS:
2310 case LL_IOC_CLRFLAGS:
2311 /* Set or clear specific file flags */
2312 /* XXX This probably needs checks to ensure the flags are
2313 * not abused, and to handle any flag side effects.
2315 if (get_user(flags, (int *) arg))
2318 if (cmd == LL_IOC_SETFLAGS) {
2319 if ((flags & LL_FILE_IGNORE_LOCK) &&
2320 !(file->f_flags & O_DIRECT)) {
2321 CERROR("%s: unable to disable locking on "
2322 "non-O_DIRECT file\n", current->comm);
2326 fd->fd_flags |= flags;
2328 fd->fd_flags &= ~flags;
2331 case LL_IOC_LOV_SETSTRIPE:
2332 RETURN(ll_lov_setstripe(inode, file, arg));
2333 case LL_IOC_LOV_SETEA:
2334 RETURN(ll_lov_setea(inode, file, arg));
2335 case LL_IOC_LOV_SWAP_LAYOUTS: {
2337 struct lustre_swap_layouts lsl;
2339 if (copy_from_user(&lsl, (char *)arg,
2340 sizeof(struct lustre_swap_layouts)))
2343 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2346 file2 = fget(lsl.sl_fd);
2351 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2352 rc = ll_swap_layouts(file, file2, &lsl);
2356 case LL_IOC_LOV_GETSTRIPE:
2357 RETURN(ll_lov_getstripe(inode, arg));
2358 case LL_IOC_RECREATE_OBJ:
2359 RETURN(ll_lov_recreate_obj(inode, arg));
2360 case LL_IOC_RECREATE_FID:
2361 RETURN(ll_lov_recreate_fid(inode, arg));
2362 case FSFILT_IOC_FIEMAP:
2363 RETURN(ll_ioctl_fiemap(inode, arg));
2364 case FSFILT_IOC_GETFLAGS:
2365 case FSFILT_IOC_SETFLAGS:
2366 RETURN(ll_iocontrol(inode, file, cmd, arg));
2367 case FSFILT_IOC_GETVERSION_OLD:
2368 case FSFILT_IOC_GETVERSION:
2369 RETURN(put_user(inode->i_generation, (int *)arg));
2370 case LL_IOC_GROUP_LOCK:
2371 RETURN(ll_get_grouplock(inode, file, arg));
2372 case LL_IOC_GROUP_UNLOCK:
2373 RETURN(ll_put_grouplock(inode, file, arg));
2374 case IOC_OBD_STATFS:
2375 RETURN(ll_obd_statfs(inode, (void *)arg));
2377 /* We need to special case any other ioctls we want to handle,
2378 * to send them to the MDS/OST as appropriate and to properly
2379 * network encode the arg field.
2380 case FSFILT_IOC_SETVERSION_OLD:
2381 case FSFILT_IOC_SETVERSION:
2383 case LL_IOC_FLUSHCTX:
2384 RETURN(ll_flush_ctx(inode));
2385 case LL_IOC_PATH2FID: {
2386 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2387 sizeof(struct lu_fid)))
2392 case OBD_IOC_FID2PATH:
2393 RETURN(ll_fid2path(inode, (void *)arg));
2394 case LL_IOC_DATA_VERSION: {
2395 struct ioc_data_version idv;
2398 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2401 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2402 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2404 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2410 case LL_IOC_GET_MDTIDX: {
2413 mdtidx = ll_get_mdt_idx(inode);
2417 if (put_user((int)mdtidx, (int*)arg))
2422 case OBD_IOC_GETDTNAME:
2423 case OBD_IOC_GETMDNAME:
2424 RETURN(ll_get_obd_name(inode, cmd, arg));
2425 case LL_IOC_HSM_STATE_GET: {
2426 struct md_op_data *op_data;
2427 struct hsm_user_state *hus;
2434 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2435 LUSTRE_OPC_ANY, hus);
2436 if (IS_ERR(op_data)) {
2438 RETURN(PTR_ERR(op_data));
2441 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2444 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2447 ll_finish_md_op_data(op_data);
2451 case LL_IOC_HSM_STATE_SET: {
2452 struct hsm_state_set *hss;
2459 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2464 rc = ll_hsm_state_set(inode, hss);
2469 case LL_IOC_HSM_ACTION: {
2470 struct md_op_data *op_data;
2471 struct hsm_current_action *hca;
2478 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2479 LUSTRE_OPC_ANY, hca);
2480 if (IS_ERR(op_data)) {
2482 RETURN(PTR_ERR(op_data));
2485 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2488 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2491 ll_finish_md_op_data(op_data);
2495 case LL_IOC_SET_LEASE: {
2496 struct ll_inode_info *lli = ll_i2info(inode);
2497 struct obd_client_handle *och = NULL;
2503 if (!(file->f_mode & FMODE_WRITE))
2508 if (!(file->f_mode & FMODE_READ))
2513 mutex_lock(&lli->lli_och_mutex);
2514 if (fd->fd_lease_och != NULL) {
2515 och = fd->fd_lease_och;
2516 fd->fd_lease_och = NULL;
2518 mutex_unlock(&lli->lli_och_mutex);
2521 mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
2522 rc = ll_lease_close(och, inode, &lease_broken);
2523 if (rc == 0 && lease_broken)
2529 /* return the type of lease or error */
2530 RETURN(rc < 0 ? rc : (int)mode);
2535 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2537 /* apply for lease */
2538 och = ll_lease_open(inode, file, mode, 0);
2540 RETURN(PTR_ERR(och));
2543 mutex_lock(&lli->lli_och_mutex);
2544 if (fd->fd_lease_och == NULL) {
2545 fd->fd_lease_och = och;
2548 mutex_unlock(&lli->lli_och_mutex);
2550 /* impossible now that only excl is supported for now */
2551 ll_lease_close(och, inode, &lease_broken);
2556 case LL_IOC_GET_LEASE: {
2557 struct ll_inode_info *lli = ll_i2info(inode);
2558 struct ldlm_lock *lock = NULL;
2561 mutex_lock(&lli->lli_och_mutex);
2562 if (fd->fd_lease_och != NULL) {
2563 struct obd_client_handle *och = fd->fd_lease_och;
2565 lock = ldlm_handle2lock(&och->och_lease_handle);
2567 lock_res_and_lock(lock);
2568 if (!ldlm_is_cancel(lock))
2569 rc = och->och_flags &
2570 (FMODE_READ | FMODE_WRITE);
2571 unlock_res_and_lock(lock);
2572 LDLM_LOCK_PUT(lock);
2575 mutex_unlock(&lli->lli_och_mutex);
2578 case LL_IOC_HSM_IMPORT: {
2579 struct hsm_user_import *hui;
2585 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2590 rc = ll_hsm_import(inode, file, hui);
2599 ll_iocontrol_call(inode, file, cmd, arg, &err))
2602 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2608 #ifndef HAVE_FILE_LLSEEK_SIZE
2609 static inline loff_t
2610 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2612 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2614 if (offset > maxsize)
2617 if (offset != file->f_pos) {
2618 file->f_pos = offset;
2619 file->f_version = 0;
2625 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2626 loff_t maxsize, loff_t eof)
2628 struct inode *inode = file->f_dentry->d_inode;
2636 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2637 * position-querying operation. Avoid rewriting the "same"
2638 * f_pos value back to the file because a concurrent read(),
2639 * write() or lseek() might have altered it
2644 * f_lock protects against read/modify/write race with other
2645 * SEEK_CURs. Note that parallel writes and reads behave
2648 mutex_lock(&inode->i_mutex);
2649 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2650 mutex_unlock(&inode->i_mutex);
2654 * In the generic case the entire file is data, so as long as
2655 * offset isn't at the end of the file then the offset is data.
2662 * There is a virtual hole at the end of the file, so as long as
2663 * offset isn't i_size or larger, return i_size.
2671 return llseek_execute(file, offset, maxsize);
2675 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2677 struct inode *inode = file->f_dentry->d_inode;
2678 loff_t retval, eof = 0;
2681 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2682 (origin == SEEK_CUR) ? file->f_pos : 0);
2683 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2684 PFID(ll_inode2fid(inode)), inode, retval, retval,
2686 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2688 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2689 retval = ll_glimpse_size(inode);
2692 eof = i_size_read(inode);
2695 retval = ll_generic_file_llseek_size(file, offset, origin,
2696 ll_file_maxbytes(inode), eof);
2700 int ll_flush(struct file *file, fl_owner_t id)
2702 struct inode *inode = file->f_dentry->d_inode;
2703 struct ll_inode_info *lli = ll_i2info(inode);
2704 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2707 LASSERT(!S_ISDIR(inode->i_mode));
2709 /* catch async errors that were recorded back when async writeback
2710 * failed for pages in this mapping. */
2711 rc = lli->lli_async_rc;
2712 lli->lli_async_rc = 0;
2713 err = lov_read_and_clear_async_rc(lli->lli_clob);
2717 /* The application has been told write failure already.
2718 * Do not report failure again. */
2719 if (fd->fd_write_failed)
2721 return rc ? -EIO : 0;
2725 * Called to make sure a portion of file has been written out.
2726 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2728 * Return how many pages have been written.
2730 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2731 enum cl_fsync_mode mode, int ignore_layout)
2733 struct cl_env_nest nest;
2736 struct obd_capa *capa = NULL;
2737 struct cl_fsync_io *fio;
2741 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2742 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2745 env = cl_env_nested_get(&nest);
2747 RETURN(PTR_ERR(env));
2749 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2751 io = ccc_env_thread_io(env);
2752 io->ci_obj = cl_i2info(inode)->lli_clob;
2753 io->ci_ignore_layout = ignore_layout;
2755 /* initialize parameters for sync */
2756 fio = &io->u.ci_fsync;
2757 fio->fi_capa = capa;
2758 fio->fi_start = start;
2760 fio->fi_fid = ll_inode2fid(inode);
2761 fio->fi_mode = mode;
2762 fio->fi_nr_written = 0;
2764 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2765 result = cl_io_loop(env, io);
2767 result = io->ci_result;
2769 result = fio->fi_nr_written;
2770 cl_io_fini(env, io);
2771 cl_env_nested_put(&nest, env);
2779 * When dentry is provided (the 'else' case), *file->f_dentry may be
2780 * null and dentry must be used directly rather than pulled from
2781 * *file->f_dentry as is done otherwise.
2784 #ifdef HAVE_FILE_FSYNC_4ARGS
2785 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2787 struct dentry *dentry = file->f_dentry;
2788 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2789 int ll_fsync(struct file *file, int datasync)
2791 struct dentry *dentry = file->f_dentry;
2793 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2796 struct inode *inode = dentry->d_inode;
2797 struct ll_inode_info *lli = ll_i2info(inode);
2798 struct ptlrpc_request *req;
2799 struct obd_capa *oc;
2803 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2804 PFID(ll_inode2fid(inode)), inode);
2805 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2807 #ifdef HAVE_FILE_FSYNC_4ARGS
2808 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2809 mutex_lock(&inode->i_mutex);
2811 /* fsync's caller has already called _fdata{sync,write}, we want
2812 * that IO to finish before calling the osc and mdc sync methods */
2813 rc = filemap_fdatawait(inode->i_mapping);
2816 /* catch async errors that were recorded back when async writeback
2817 * failed for pages in this mapping. */
2818 if (!S_ISDIR(inode->i_mode)) {
2819 err = lli->lli_async_rc;
2820 lli->lli_async_rc = 0;
2823 err = lov_read_and_clear_async_rc(lli->lli_clob);
2828 oc = ll_mdscapa_get(inode);
2829 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2835 ptlrpc_req_finished(req);
2837 if (datasync && S_ISREG(inode->i_mode)) {
2838 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2840 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2842 if (rc == 0 && err < 0)
2845 fd->fd_write_failed = true;
2847 fd->fd_write_failed = false;
2850 #ifdef HAVE_FILE_FSYNC_4ARGS
2851 mutex_unlock(&inode->i_mutex);
2856 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2858 struct inode *inode = file->f_dentry->d_inode;
2859 struct ll_sb_info *sbi = ll_i2sbi(inode);
2860 struct ldlm_enqueue_info einfo = {
2861 .ei_type = LDLM_FLOCK,
2862 .ei_cb_cp = ldlm_flock_completion_ast,
2863 .ei_cbdata = file_lock,
2865 struct md_op_data *op_data;
2866 struct lustre_handle lockh = {0};
2867 ldlm_policy_data_t flock = {{0}};
2873 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2874 PFID(ll_inode2fid(inode)), file_lock);
2876 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2878 if (file_lock->fl_flags & FL_FLOCK) {
2879 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2880 /* flocks are whole-file locks */
2881 flock.l_flock.end = OFFSET_MAX;
2882 /* For flocks owner is determined by the local file desctiptor*/
2883 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2884 } else if (file_lock->fl_flags & FL_POSIX) {
2885 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2886 flock.l_flock.start = file_lock->fl_start;
2887 flock.l_flock.end = file_lock->fl_end;
2891 flock.l_flock.pid = file_lock->fl_pid;
2893 /* Somewhat ugly workaround for svc lockd.
2894 * lockd installs custom fl_lmops->lm_compare_owner that checks
2895 * for the fl_owner to be the same (which it always is on local node
2896 * I guess between lockd processes) and then compares pid.
2897 * As such we assign pid to the owner field to make it all work,
2898 * conflict with normal locks is unlikely since pid space and
2899 * pointer space for current->files are not intersecting */
2900 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2901 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2903 switch (file_lock->fl_type) {
2905 einfo.ei_mode = LCK_PR;
2908 /* An unlock request may or may not have any relation to
2909 * existing locks so we may not be able to pass a lock handle
2910 * via a normal ldlm_lock_cancel() request. The request may even
2911 * unlock a byte range in the middle of an existing lock. In
2912 * order to process an unlock request we need all of the same
2913 * information that is given with a normal read or write record
2914 * lock request. To avoid creating another ldlm unlock (cancel)
2915 * message we'll treat a LCK_NL flock request as an unlock. */
2916 einfo.ei_mode = LCK_NL;
2919 einfo.ei_mode = LCK_PW;
2922 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2923 file_lock->fl_type);
2938 flags = LDLM_FL_BLOCK_NOWAIT;
2944 flags = LDLM_FL_TEST_LOCK;
2945 /* Save the old mode so that if the mode in the lock changes we
2946 * can decrement the appropriate reader or writer refcount. */
2947 file_lock->fl_type = einfo.ei_mode;
2950 CERROR("unknown fcntl lock command: %d\n", cmd);
2954 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2955 LUSTRE_OPC_ANY, NULL);
2956 if (IS_ERR(op_data))
2957 RETURN(PTR_ERR(op_data));
2959 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2960 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2961 flock.l_flock.pid, flags, einfo.ei_mode,
2962 flock.l_flock.start, flock.l_flock.end);
2964 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2965 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2967 if ((file_lock->fl_flags & FL_FLOCK) &&
2968 (rc == 0 || file_lock->fl_type == F_UNLCK))
2969 rc2 = flock_lock_file_wait(file, file_lock);
2970 if ((file_lock->fl_flags & FL_POSIX) &&
2971 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2972 !(flags & LDLM_FL_TEST_LOCK))
2973 rc2 = posix_lock_file_wait(file, file_lock);
2975 if (rc2 && file_lock->fl_type != F_UNLCK) {
2976 einfo.ei_mode = LCK_NL;
2977 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2978 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2982 ll_finish_md_op_data(op_data);
2987 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2995 * test if some locks matching bits and l_req_mode are acquired
2996 * - bits can be in different locks
2997 * - if found clear the common lock bits in *bits
2998 * - the bits not found, are kept in *bits
3000 * \param bits [IN] searched lock bits [IN]
3001 * \param l_req_mode [IN] searched lock mode
3002 * \retval boolean, true iff all bits are found
3004 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3006 struct lustre_handle lockh;
3007 ldlm_policy_data_t policy;
3008 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3009 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3018 fid = &ll_i2info(inode)->lli_fid;
3019 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3020 ldlm_lockname[mode]);
3022 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3023 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3024 policy.l_inodebits.bits = *bits & (1 << i);
3025 if (policy.l_inodebits.bits == 0)
3028 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3029 &policy, mode, &lockh)) {
3030 struct ldlm_lock *lock;
3032 lock = ldlm_handle2lock(&lockh);
3035 ~(lock->l_policy_data.l_inodebits.bits);
3036 LDLM_LOCK_PUT(lock);
3038 *bits &= ~policy.l_inodebits.bits;
3045 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3046 struct lustre_handle *lockh, __u64 flags,
3049 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3054 fid = &ll_i2info(inode)->lli_fid;
3055 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3057 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3058 fid, LDLM_IBITS, &policy, mode, lockh);
3063 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3065 /* Already unlinked. Just update nlink and return success */
3066 if (rc == -ENOENT) {
3068 /* This path cannot be hit for regular files unless in
3069 * case of obscure races, so no need to to validate
3071 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3073 } else if (rc != 0) {
3074 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
3075 ll_get_fsname(inode->i_sb, NULL, 0),
3076 PFID(ll_inode2fid(inode)), rc);
3082 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3085 struct inode *inode = dentry->d_inode;
3086 struct ptlrpc_request *req = NULL;
3087 struct obd_export *exp;
3091 LASSERT(inode != NULL);
3093 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3094 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3096 exp = ll_i2mdexp(inode);
3098 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3099 * But under CMD case, it caused some lock issues, should be fixed
3100 * with new CMD ibits lock. See bug 12718 */
3101 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3102 struct lookup_intent oit = { .it_op = IT_GETATTR };
3103 struct md_op_data *op_data;
3105 if (ibits == MDS_INODELOCK_LOOKUP)
3106 oit.it_op = IT_LOOKUP;
3108 /* Call getattr by fid, so do not provide name at all. */
3109 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3110 dentry->d_inode, NULL, 0, 0,
3111 LUSTRE_OPC_ANY, NULL);
3112 if (IS_ERR(op_data))
3113 RETURN(PTR_ERR(op_data));
3115 oit.it_create_mode |= M_CHECK_STALE;
3116 rc = md_intent_lock(exp, op_data, NULL, 0,
3117 /* we are not interested in name
3120 ll_md_blocking_ast, 0);
3121 ll_finish_md_op_data(op_data);
3122 oit.it_create_mode &= ~M_CHECK_STALE;
3124 rc = ll_inode_revalidate_fini(inode, rc);
3128 rc = ll_revalidate_it_finish(req, &oit, dentry);
3130 ll_intent_release(&oit);
3134 /* Unlinked? Unhash dentry, so it is not picked up later by
3135 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3136 here to preserve get_cwd functionality on 2.6.
3138 if (!dentry->d_inode->i_nlink)
3139 d_lustre_invalidate(dentry, 0);
3141 ll_lookup_finish_locks(&oit, dentry);
3142 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3143 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3144 obd_valid valid = OBD_MD_FLGETATTR;
3145 struct md_op_data *op_data;
3148 if (S_ISREG(inode->i_mode)) {
3149 rc = ll_get_max_mdsize(sbi, &ealen);
3152 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3155 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3156 0, ealen, LUSTRE_OPC_ANY,
3158 if (IS_ERR(op_data))
3159 RETURN(PTR_ERR(op_data));
3161 op_data->op_valid = valid;
3162 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3163 * capa for this inode. Because we only keep capas of dirs
3165 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3166 ll_finish_md_op_data(op_data);
3168 rc = ll_inode_revalidate_fini(inode, rc);
3172 rc = ll_prep_inode(&inode, req, NULL, NULL);
3175 ptlrpc_req_finished(req);
3179 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3182 struct inode *inode = dentry->d_inode;
3186 rc = __ll_inode_revalidate_it(dentry, it, ibits);
3190 /* if object isn't regular file, don't validate size */
3191 if (!S_ISREG(inode->i_mode)) {
3192 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3193 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3194 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3196 /* In case of restore, the MDT has the right size and has
3197 * already send it back without granting the layout lock,
3198 * inode is up-to-date so glimpse is useless.
3199 * Also to glimpse we need the layout, in case of a running
3200 * restore the MDT holds the layout lock so the glimpse will
3201 * block up to the end of restore (getattr will block)
3203 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3204 rc = ll_glimpse_size(inode);
3209 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
3210 struct lookup_intent *it, struct kstat *stat)
3212 struct inode *inode = de->d_inode;
3213 struct ll_sb_info *sbi = ll_i2sbi(inode);
3214 struct ll_inode_info *lli = ll_i2info(inode);
3217 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3218 MDS_INODELOCK_LOOKUP);
3219 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3224 stat->dev = inode->i_sb->s_dev;
3225 if (ll_need_32bit_api(sbi))
3226 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3228 stat->ino = inode->i_ino;
3229 stat->mode = inode->i_mode;
3230 stat->nlink = inode->i_nlink;
3231 stat->uid = inode->i_uid;
3232 stat->gid = inode->i_gid;
3233 stat->rdev = inode->i_rdev;
3234 stat->atime = inode->i_atime;
3235 stat->mtime = inode->i_mtime;
3236 stat->ctime = inode->i_ctime;
3237 stat->blksize = 1 << inode->i_blkbits;
3239 stat->size = i_size_read(inode);
3240 stat->blocks = inode->i_blocks;
3244 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3246 struct lookup_intent it = { .it_op = IT_GETATTR };
3248 return ll_getattr_it(mnt, de, &it, stat);
3251 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3252 __u64 start, __u64 len)
3256 struct ll_user_fiemap *fiemap;
3257 unsigned int extent_count = fieinfo->fi_extents_max;
3259 num_bytes = sizeof(*fiemap) + (extent_count *
3260 sizeof(struct ll_fiemap_extent));
3261 OBD_ALLOC_LARGE(fiemap, num_bytes);
3266 fiemap->fm_flags = fieinfo->fi_flags;
3267 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3268 fiemap->fm_start = start;
3269 fiemap->fm_length = len;
3270 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3271 sizeof(struct ll_fiemap_extent));
3273 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3275 fieinfo->fi_flags = fiemap->fm_flags;
3276 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3277 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3278 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3280 OBD_FREE_LARGE(fiemap, num_bytes);
3284 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3286 struct ll_inode_info *lli = ll_i2info(inode);
3287 struct posix_acl *acl = NULL;
3290 spin_lock(&lli->lli_lock);
3291 /* VFS' acl_permission_check->check_acl will release the refcount */
3292 acl = posix_acl_dup(lli->lli_posix_acl);
3293 spin_unlock(&lli->lli_lock);
3298 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3300 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3301 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3303 ll_check_acl(struct inode *inode, int mask)
3306 # ifdef CONFIG_FS_POSIX_ACL
3307 struct posix_acl *acl;
3311 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3312 if (flags & IPERM_FLAG_RCU)
3315 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3320 rc = posix_acl_permission(inode, acl, mask);
3321 posix_acl_release(acl);
3324 # else /* !CONFIG_FS_POSIX_ACL */
3326 # endif /* CONFIG_FS_POSIX_ACL */
3328 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3330 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3331 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3333 # ifdef HAVE_INODE_PERMISION_2ARGS
3334 int ll_inode_permission(struct inode *inode, int mask)
3336 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3343 #ifdef MAY_NOT_BLOCK
3344 if (mask & MAY_NOT_BLOCK)
3346 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3347 if (flags & IPERM_FLAG_RCU)
3351 /* as root inode are NOT getting validated in lookup operation,
3352 * need to do it before permission check. */
3354 if (inode == inode->i_sb->s_root->d_inode) {
3355 struct lookup_intent it = { .it_op = IT_LOOKUP };
3357 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3358 MDS_INODELOCK_LOOKUP);
3363 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3364 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3366 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3367 return lustre_check_remote_perm(inode, mask);
3369 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3370 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3375 /* -o localflock - only provides locally consistent flock locks */
3376 struct file_operations ll_file_operations = {
3377 .read = ll_file_read,
3378 .aio_read = ll_file_aio_read,
3379 .write = ll_file_write,
3380 .aio_write = ll_file_aio_write,
3381 .unlocked_ioctl = ll_file_ioctl,
3382 .open = ll_file_open,
3383 .release = ll_file_release,
3384 .mmap = ll_file_mmap,
3385 .llseek = ll_file_seek,
3386 .splice_read = ll_file_splice_read,
3391 struct file_operations ll_file_operations_flock = {
3392 .read = ll_file_read,
3393 .aio_read = ll_file_aio_read,
3394 .write = ll_file_write,
3395 .aio_write = ll_file_aio_write,
3396 .unlocked_ioctl = ll_file_ioctl,
3397 .open = ll_file_open,
3398 .release = ll_file_release,
3399 .mmap = ll_file_mmap,
3400 .llseek = ll_file_seek,
3401 .splice_read = ll_file_splice_read,
3404 .flock = ll_file_flock,
3405 .lock = ll_file_flock
3408 /* These are for -o noflock - to return ENOSYS on flock calls */
3409 struct file_operations ll_file_operations_noflock = {
3410 .read = ll_file_read,
3411 .aio_read = ll_file_aio_read,
3412 .write = ll_file_write,
3413 .aio_write = ll_file_aio_write,
3414 .unlocked_ioctl = ll_file_ioctl,
3415 .open = ll_file_open,
3416 .release = ll_file_release,
3417 .mmap = ll_file_mmap,
3418 .llseek = ll_file_seek,
3419 .splice_read = ll_file_splice_read,
3422 .flock = ll_file_noflock,
3423 .lock = ll_file_noflock
3426 struct inode_operations ll_file_inode_operations = {
3427 .setattr = ll_setattr,
3428 .getattr = ll_getattr,
3429 .permission = ll_inode_permission,
3430 .setxattr = ll_setxattr,
3431 .getxattr = ll_getxattr,
3432 .listxattr = ll_listxattr,
3433 .removexattr = ll_removexattr,
3434 .fiemap = ll_fiemap,
3435 #ifdef HAVE_IOP_GET_ACL
3436 .get_acl = ll_get_acl,
3440 /* dynamic ioctl number support routins */
3441 static struct llioc_ctl_data {
3442 struct rw_semaphore ioc_sem;
3443 cfs_list_t ioc_head;
3445 __RWSEM_INITIALIZER(llioc.ioc_sem),
3446 CFS_LIST_HEAD_INIT(llioc.ioc_head)
3451 cfs_list_t iocd_list;
3452 unsigned int iocd_size;
3453 llioc_callback_t iocd_cb;
3454 unsigned int iocd_count;
3455 unsigned int iocd_cmd[0];
3458 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3461 struct llioc_data *in_data = NULL;
3464 if (cb == NULL || cmd == NULL ||
3465 count > LLIOC_MAX_CMD || count < 0)
3468 size = sizeof(*in_data) + count * sizeof(unsigned int);
3469 OBD_ALLOC(in_data, size);
3470 if (in_data == NULL)
3473 memset(in_data, 0, sizeof(*in_data));
3474 in_data->iocd_size = size;
3475 in_data->iocd_cb = cb;
3476 in_data->iocd_count = count;
3477 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3479 down_write(&llioc.ioc_sem);
3480 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3481 up_write(&llioc.ioc_sem);
3486 void ll_iocontrol_unregister(void *magic)
3488 struct llioc_data *tmp;
3493 down_write(&llioc.ioc_sem);
3494 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3496 unsigned int size = tmp->iocd_size;
3498 cfs_list_del(&tmp->iocd_list);
3499 up_write(&llioc.ioc_sem);
3501 OBD_FREE(tmp, size);
3505 up_write(&llioc.ioc_sem);
3507 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3510 EXPORT_SYMBOL(ll_iocontrol_register);
3511 EXPORT_SYMBOL(ll_iocontrol_unregister);
3513 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3514 unsigned int cmd, unsigned long arg, int *rcp)
3516 enum llioc_iter ret = LLIOC_CONT;
3517 struct llioc_data *data;
3518 int rc = -EINVAL, i;
3520 down_read(&llioc.ioc_sem);
3521 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3522 for (i = 0; i < data->iocd_count; i++) {
3523 if (cmd != data->iocd_cmd[i])
3526 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3530 if (ret == LLIOC_STOP)
3533 up_read(&llioc.ioc_sem);
3540 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3542 struct ll_inode_info *lli = ll_i2info(inode);
3543 struct cl_env_nest nest;
3548 if (lli->lli_clob == NULL)
3551 env = cl_env_nested_get(&nest);
3553 RETURN(PTR_ERR(env));
3555 result = cl_conf_set(env, lli->lli_clob, conf);
3556 cl_env_nested_put(&nest, env);
3558 if (conf->coc_opc == OBJECT_CONF_SET) {
3559 struct ldlm_lock *lock = conf->coc_lock;
3561 LASSERT(lock != NULL);
3562 LASSERT(ldlm_has_layout(lock));
3564 /* it can only be allowed to match after layout is
3565 * applied to inode otherwise false layout would be
3566 * seen. Applying layout shoud happen before dropping
3567 * the intent lock. */
3568 ldlm_lock_allow_match(lock);
3574 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3575 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3578 struct ll_sb_info *sbi = ll_i2sbi(inode);
3579 struct obd_capa *oc;
3580 struct ptlrpc_request *req;
3581 struct mdt_body *body;
3588 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3589 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3590 lock->l_lvb_data, lock->l_lvb_len);
3592 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3595 /* if layout lock was granted right away, the layout is returned
3596 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3597 * blocked and then granted via completion ast, we have to fetch
3598 * layout here. Please note that we can't use the LVB buffer in
3599 * completion AST because it doesn't have a large enough buffer */
3600 oc = ll_mdscapa_get(inode);
3601 rc = ll_get_max_mdsize(sbi, &lmmsize);
3603 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3604 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3610 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3611 if (body == NULL || body->eadatasize > lmmsize)
3612 GOTO(out, rc = -EPROTO);
3614 lmmsize = body->eadatasize;
3615 if (lmmsize == 0) /* empty layout */
3618 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3620 GOTO(out, rc = -EFAULT);
3622 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3623 if (lvbdata == NULL)
3624 GOTO(out, rc = -ENOMEM);
3626 memcpy(lvbdata, lmm, lmmsize);
3627 lock_res_and_lock(lock);
3628 if (lock->l_lvb_data != NULL)
3629 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3631 lock->l_lvb_data = lvbdata;
3632 lock->l_lvb_len = lmmsize;
3633 unlock_res_and_lock(lock);
3638 ptlrpc_req_finished(req);
3643 * Apply the layout to the inode. Layout lock is held and will be released
3646 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3647 struct inode *inode, __u32 *gen, bool reconf)
3649 struct ll_inode_info *lli = ll_i2info(inode);
3650 struct ll_sb_info *sbi = ll_i2sbi(inode);
3651 struct ldlm_lock *lock;
3652 struct lustre_md md = { NULL };
3653 struct cl_object_conf conf;
3656 bool wait_layout = false;
3659 LASSERT(lustre_handle_is_used(lockh));
3661 lock = ldlm_handle2lock(lockh);
3662 LASSERT(lock != NULL);
3663 LASSERT(ldlm_has_layout(lock));
3665 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d\n",
3666 PFID(&lli->lli_fid), inode, reconf);
3668 /* in case this is a caching lock and reinstate with new inode */
3669 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3671 lock_res_and_lock(lock);
3672 lvb_ready = ldlm_is_lvb_ready(lock);
3673 unlock_res_and_lock(lock);
3674 /* checking lvb_ready is racy but this is okay. The worst case is
3675 * that multi processes may configure the file on the same time. */
3677 if (lvb_ready || !reconf) {
3680 /* layout_gen must be valid if layout lock is not
3681 * cancelled and stripe has already set */
3682 *gen = lli->lli_layout_gen;
3688 rc = ll_layout_fetch(inode, lock);
3692 /* for layout lock, lmm is returned in lock's lvb.
3693 * lvb_data is immutable if the lock is held so it's safe to access it
3694 * without res lock. See the description in ldlm_lock_decref_internal()
3695 * for the condition to free lvb_data of layout lock */
3696 if (lock->l_lvb_data != NULL) {
3697 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3698 lock->l_lvb_data, lock->l_lvb_len);
3700 *gen = LL_LAYOUT_GEN_EMPTY;
3702 *gen = md.lsm->lsm_layout_gen;
3705 CERROR("%s: file "DFID" unpackmd error: %d\n",
3706 ll_get_fsname(inode->i_sb, NULL, 0),
3707 PFID(&lli->lli_fid), rc);
3713 /* set layout to file. Unlikely this will fail as old layout was
3714 * surely eliminated */
3715 memset(&conf, 0, sizeof conf);
3716 conf.coc_opc = OBJECT_CONF_SET;
3717 conf.coc_inode = inode;
3718 conf.coc_lock = lock;
3719 conf.u.coc_md = &md;
3720 rc = ll_layout_conf(inode, &conf);
3723 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3725 /* refresh layout failed, need to wait */
3726 wait_layout = rc == -EBUSY;
3730 LDLM_LOCK_PUT(lock);
3731 ldlm_lock_decref(lockh, mode);
3733 /* wait for IO to complete if it's still being used. */
3735 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3736 ll_get_fsname(inode->i_sb, NULL, 0),
3737 PFID(&lli->lli_fid), inode);
3739 memset(&conf, 0, sizeof conf);
3740 conf.coc_opc = OBJECT_CONF_WAIT;
3741 conf.coc_inode = inode;
3742 rc = ll_layout_conf(inode, &conf);
3746 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3747 ll_get_fsname(inode->i_sb, NULL, 0),
3748 PFID(&lli->lli_fid), rc);
3754 * This function checks if there exists a LAYOUT lock on the client side,
3755 * or enqueues it if it doesn't have one in cache.
3757 * This function will not hold layout lock so it may be revoked any time after
3758 * this function returns. Any operations depend on layout should be redone
3761 * This function should be called before lov_io_init() to get an uptodate
3762 * layout version, the caller should save the version number and after IO
3763 * is finished, this function should be called again to verify that layout
3764 * is not changed during IO time.
3766 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3768 struct ll_inode_info *lli = ll_i2info(inode);
3769 struct ll_sb_info *sbi = ll_i2sbi(inode);
3770 struct md_op_data *op_data;
3771 struct lookup_intent it;
3772 struct lustre_handle lockh;
3774 struct ldlm_enqueue_info einfo = {
3775 .ei_type = LDLM_IBITS,
3777 .ei_cb_bl = ll_md_blocking_ast,
3778 .ei_cb_cp = ldlm_completion_ast,
3783 *gen = lli->lli_layout_gen;
3784 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3788 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3789 LASSERT(S_ISREG(inode->i_mode));
3791 /* mostly layout lock is caching on the local side, so try to match
3792 * it before grabbing layout lock mutex. */
3793 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3794 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3795 if (mode != 0) { /* hit cached lock */
3796 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3800 /* better hold lli_layout_mutex to try again otherwise
3801 * it will have starvation problem. */
3804 /* take layout lock mutex to enqueue layout lock exclusively. */
3805 mutex_lock(&lli->lli_layout_mutex);
3808 /* try again. Maybe somebody else has done this. */
3809 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3810 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3811 if (mode != 0) { /* hit cached lock */
3812 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3816 mutex_unlock(&lli->lli_layout_mutex);
3820 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3821 0, 0, LUSTRE_OPC_ANY, NULL);
3822 if (IS_ERR(op_data)) {
3823 mutex_unlock(&lli->lli_layout_mutex);
3824 RETURN(PTR_ERR(op_data));
3827 /* have to enqueue one */
3828 memset(&it, 0, sizeof(it));
3829 it.it_op = IT_LAYOUT;
3830 lockh.cookie = 0ULL;
3832 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)\n",
3833 ll_get_fsname(inode->i_sb, NULL, 0),
3834 PFID(&lli->lli_fid), inode);
3836 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3838 if (it.d.lustre.it_data != NULL)
3839 ptlrpc_req_finished(it.d.lustre.it_data);
3840 it.d.lustre.it_data = NULL;
3842 ll_finish_md_op_data(op_data);
3844 mode = it.d.lustre.it_lock_mode;
3845 it.d.lustre.it_lock_mode = 0;
3846 ll_intent_drop_lock(&it);
3849 /* set lock data in case this is a new lock */
3850 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3851 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3855 mutex_unlock(&lli->lli_layout_mutex);
3861 * This function send a restore request to the MDT
3863 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3865 struct hsm_user_request *hur;
3869 len = sizeof(struct hsm_user_request) +
3870 sizeof(struct hsm_user_item);
3871 OBD_ALLOC(hur, len);
3875 hur->hur_request.hr_action = HUA_RESTORE;
3876 hur->hur_request.hr_archive_id = 0;
3877 hur->hur_request.hr_flags = 0;
3878 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3879 sizeof(hur->hur_user_item[0].hui_fid));
3880 hur->hur_user_item[0].hui_extent.offset = offset;
3881 hur->hur_user_item[0].hui_extent.length = length;
3882 hur->hur_request.hr_itemcount = 1;
3883 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,