4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
61 fd->fd_write_failed = false;
66 static void ll_file_data_put(struct ll_file_data *fd)
69 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
72 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
73 struct lustre_handle *fh)
75 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
76 op_data->op_attr.ia_mode = inode->i_mode;
77 op_data->op_attr.ia_atime = inode->i_atime;
78 op_data->op_attr.ia_mtime = inode->i_mtime;
79 op_data->op_attr.ia_ctime = inode->i_ctime;
80 op_data->op_attr.ia_size = i_size_read(inode);
81 op_data->op_attr_blocks = inode->i_blocks;
82 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
83 ll_inode_to_ext_flags(inode->i_flags);
84 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
86 op_data->op_handle = *fh;
87 op_data->op_capa1 = ll_mdscapa_get(inode);
89 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
90 op_data->op_bias |= MDS_DATA_MODIFIED;
94 * Closes the IO epoch and packs all the attributes into @op_data for
97 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
98 struct obd_client_handle *och)
102 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
103 ATTR_MTIME | ATTR_MTIME_SET |
104 ATTR_CTIME | ATTR_CTIME_SET;
106 if (!(och->och_flags & FMODE_WRITE))
109 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
110 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
112 ll_ioepoch_close(inode, op_data, &och, 0);
115 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
116 ll_prep_md_op_data(op_data, inode, NULL, NULL,
117 0, 0, LUSTRE_OPC_ANY, NULL);
121 static int ll_close_inode_openhandle(struct obd_export *md_exp,
123 struct obd_client_handle *och,
124 const __u64 *data_version)
126 struct obd_export *exp = ll_i2mdexp(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
129 struct obd_device *obd = class_exp2obd(exp);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle "LPX64"\n",
140 ll_i2mdexp(inode)->exp_handle.h_cookie);
144 OBD_ALLOC_PTR(op_data);
146 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
148 ll_prepare_close(inode, op_data, och);
149 if (data_version != NULL) {
150 /* Pass in data_version implies release. */
151 op_data->op_bias |= MDS_HSM_RELEASE;
152 op_data->op_data_version = *data_version;
153 op_data->op_lease_handle = och->och_lease_handle;
154 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
156 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
157 rc = md_close(md_exp, op_data, och->och_mod, &req);
159 /* This close must have the epoch closed. */
160 LASSERT(epoch_close);
161 /* MDS has instructed us to obtain Size-on-MDS attribute from
162 * OSTs and send setattr to back to MDS. */
163 rc = ll_som_update(inode, op_data);
165 CERROR("%s: inode "DFID" mdc Size-on-MDS update"
166 " failed: rc = %d\n",
167 ll_i2mdexp(inode)->exp_obd->obd_name,
168 PFID(ll_inode2fid(inode)), rc);
172 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
173 ll_i2mdexp(inode)->exp_obd->obd_name,
174 PFID(ll_inode2fid(inode)), rc);
177 /* DATA_MODIFIED flag was successfully sent on close, cancel data
178 * modification flag. */
179 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
180 struct ll_inode_info *lli = ll_i2info(inode);
182 spin_lock(&lli->lli_lock);
183 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
184 spin_unlock(&lli->lli_lock);
188 rc = ll_objects_destroy(req, inode);
190 CERROR("%s: inode "DFID
191 " ll_objects destroy: rc = %d\n",
192 ll_i2mdexp(inode)->exp_obd->obd_name,
193 PFID(ll_inode2fid(inode)), rc);
196 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
197 struct mdt_body *body;
198 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
199 if (!(body->valid & OBD_MD_FLRELEASED))
203 ll_finish_md_op_data(op_data);
207 if (exp_connect_som(exp) && !epoch_close &&
208 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
209 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
211 md_clear_open_replay_data(md_exp, och);
212 /* Free @och if it is not waiting for DONE_WRITING. */
213 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
216 if (req) /* This is close request */
217 ptlrpc_req_finished(req);
221 int ll_md_real_close(struct inode *inode, int flags)
223 struct ll_inode_info *lli = ll_i2info(inode);
224 struct obd_client_handle **och_p;
225 struct obd_client_handle *och;
230 if (flags & FMODE_WRITE) {
231 och_p = &lli->lli_mds_write_och;
232 och_usecount = &lli->lli_open_fd_write_count;
233 } else if (flags & FMODE_EXEC) {
234 och_p = &lli->lli_mds_exec_och;
235 och_usecount = &lli->lli_open_fd_exec_count;
237 LASSERT(flags & FMODE_READ);
238 och_p = &lli->lli_mds_read_och;
239 och_usecount = &lli->lli_open_fd_read_count;
242 mutex_lock(&lli->lli_och_mutex);
243 if (*och_usecount) { /* There are still users of this handle, so
245 mutex_unlock(&lli->lli_och_mutex);
250 mutex_unlock(&lli->lli_och_mutex);
252 if (och) { /* There might be a race and somebody have freed this och
254 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
261 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
264 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
265 struct ll_inode_info *lli = ll_i2info(inode);
269 /* clear group lock, if present */
270 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
271 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
273 if (fd->fd_lease_och != NULL) {
276 /* Usually the lease is not released when the
277 * application crashed, we need to release here. */
278 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
279 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
280 PFID(&lli->lli_fid), rc, lease_broken);
282 fd->fd_lease_och = NULL;
285 if (fd->fd_och != NULL) {
286 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
291 /* Let's see if we have good enough OPEN lock on the file and if
292 we can skip talking to MDS */
293 if (file->f_dentry->d_inode) { /* Can this ever be false? */
295 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
296 struct lustre_handle lockh;
297 struct inode *inode = file->f_dentry->d_inode;
298 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
300 mutex_lock(&lli->lli_och_mutex);
301 if (fd->fd_omode & FMODE_WRITE) {
303 LASSERT(lli->lli_open_fd_write_count);
304 lli->lli_open_fd_write_count--;
305 } else if (fd->fd_omode & FMODE_EXEC) {
307 LASSERT(lli->lli_open_fd_exec_count);
308 lli->lli_open_fd_exec_count--;
311 LASSERT(lli->lli_open_fd_read_count);
312 lli->lli_open_fd_read_count--;
314 mutex_unlock(&lli->lli_och_mutex);
316 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
317 LDLM_IBITS, &policy, lockmode,
319 rc = ll_md_real_close(file->f_dentry->d_inode,
323 CERROR("Releasing a file %p with negative dentry %p. Name %s",
324 file, file->f_dentry, file->f_dentry->d_name.name);
328 LUSTRE_FPRIVATE(file) = NULL;
329 ll_file_data_put(fd);
330 ll_capa_close(inode);
335 /* While this returns an error code, fput() the caller does not, so we need
336 * to make every effort to clean up all of our state here. Also, applications
337 * rarely check close errors and even if an error is returned they will not
338 * re-try the close call.
340 int ll_file_release(struct inode *inode, struct file *file)
342 struct ll_file_data *fd;
343 struct ll_sb_info *sbi = ll_i2sbi(inode);
344 struct ll_inode_info *lli = ll_i2info(inode);
348 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
349 PFID(ll_inode2fid(inode)), inode);
351 #ifdef CONFIG_FS_POSIX_ACL
352 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
353 inode == inode->i_sb->s_root->d_inode) {
354 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
357 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
358 fd->fd_flags &= ~LL_FILE_RMTACL;
359 rct_del(&sbi->ll_rct, current_pid());
360 et_search_free(&sbi->ll_et, current_pid());
365 if (inode->i_sb->s_root != file->f_dentry)
366 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
367 fd = LUSTRE_FPRIVATE(file);
370 /* The last ref on @file, maybe not the the owner pid of statahead.
371 * Different processes can open the same dir, "ll_opendir_key" means:
372 * it is me that should stop the statahead thread. */
373 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
374 lli->lli_opendir_pid != 0)
375 ll_stop_statahead(inode, lli->lli_opendir_key);
377 if (inode->i_sb->s_root == file->f_dentry) {
378 LUSTRE_FPRIVATE(file) = NULL;
379 ll_file_data_put(fd);
383 if (!S_ISDIR(inode->i_mode)) {
384 lov_read_and_clear_async_rc(lli->lli_clob);
385 lli->lli_async_rc = 0;
388 rc = ll_md_close(sbi->ll_md_exp, inode, file);
390 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
391 libcfs_debug_dumplog();
396 static int ll_intent_file_open(struct file *file, void *lmm,
397 int lmmsize, struct lookup_intent *itp)
399 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
400 struct dentry *parent = file->f_dentry->d_parent;
401 const char *name = file->f_dentry->d_name.name;
402 const int len = file->f_dentry->d_name.len;
403 struct md_op_data *op_data;
404 struct ptlrpc_request *req;
405 __u32 opc = LUSTRE_OPC_ANY;
412 /* Usually we come here only for NFSD, and we want open lock.
413 But we can also get here with pre 2.6.15 patchless kernels, and in
414 that case that lock is also ok */
415 /* We can also get here if there was cached open handle in revalidate_it
416 * but it disappeared while we were getting from there to ll_file_open.
417 * But this means this file was closed and immediatelly opened which
418 * makes a good candidate for using OPEN lock */
419 /* If lmmsize & lmm are not 0, we are just setting stripe info
420 * parameters. No need for the open lock */
421 if (lmm == NULL && lmmsize == 0) {
422 itp->it_flags |= MDS_OPEN_LOCK;
423 if (itp->it_flags & FMODE_WRITE)
424 opc = LUSTRE_OPC_CREATE;
427 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
428 file->f_dentry->d_inode, name, len,
431 RETURN(PTR_ERR(op_data));
433 itp->it_flags |= MDS_OPEN_BY_FID;
434 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
435 0 /*unused */, &req, ll_md_blocking_ast, 0);
436 ll_finish_md_op_data(op_data);
438 /* reason for keep own exit path - don`t flood log
439 * with messages with -ESTALE errors.
441 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
442 it_open_error(DISP_OPEN_OPEN, itp))
444 ll_release_openhandle(file->f_dentry, itp);
448 if (it_disposition(itp, DISP_LOOKUP_NEG))
449 GOTO(out, rc = -ENOENT);
451 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
452 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
453 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
457 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
458 if (!rc && itp->d.lustre.it_lock_mode)
459 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
463 ptlrpc_req_finished(itp->d.lustre.it_data);
464 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
465 ll_intent_drop_lock(itp);
471 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
472 * not believe attributes if a few ioepoch holders exist. Attributes for
473 * previous ioepoch if new one is opened are also skipped by MDS.
475 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
477 if (ioepoch && lli->lli_ioepoch != ioepoch) {
478 lli->lli_ioepoch = ioepoch;
479 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
480 ioepoch, PFID(&lli->lli_fid));
484 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
485 struct obd_client_handle *och)
487 struct ptlrpc_request *req = it->d.lustre.it_data;
488 struct mdt_body *body;
490 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
491 och->och_fh = body->handle;
492 och->och_fid = body->fid1;
493 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
494 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
495 och->och_flags = it->it_flags;
497 return md_set_open_replay_data(md_exp, och, it);
500 int ll_local_open(struct file *file, struct lookup_intent *it,
501 struct ll_file_data *fd, struct obd_client_handle *och)
503 struct inode *inode = file->f_dentry->d_inode;
504 struct ll_inode_info *lli = ll_i2info(inode);
507 LASSERT(!LUSTRE_FPRIVATE(file));
512 struct ptlrpc_request *req = it->d.lustre.it_data;
513 struct mdt_body *body;
516 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
520 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
521 ll_ioepoch_open(lli, body->ioepoch);
524 LUSTRE_FPRIVATE(file) = fd;
525 ll_readahead_init(inode, &fd->fd_ras);
526 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
531 /* Open a file, and (for the very first open) create objects on the OSTs at
532 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
533 * creation or open until ll_lov_setstripe() ioctl is called.
535 * If we already have the stripe MD locally then we don't request it in
536 * md_open(), by passing a lmm_size = 0.
538 * It is up to the application to ensure no other processes open this file
539 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
540 * used. We might be able to avoid races of that sort by getting lli_open_sem
541 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
542 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
544 int ll_file_open(struct inode *inode, struct file *file)
546 struct ll_inode_info *lli = ll_i2info(inode);
547 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
548 .it_flags = file->f_flags };
549 struct obd_client_handle **och_p = NULL;
550 __u64 *och_usecount = NULL;
551 struct ll_file_data *fd;
552 int rc = 0, opendir_set = 0;
555 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
556 PFID(ll_inode2fid(inode)), inode, file->f_flags);
558 it = file->private_data; /* XXX: compat macro */
559 file->private_data = NULL; /* prevent ll_local_open assertion */
561 fd = ll_file_data_get();
563 GOTO(out_openerr, rc = -ENOMEM);
566 if (S_ISDIR(inode->i_mode)) {
567 spin_lock(&lli->lli_sa_lock);
568 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
569 lli->lli_opendir_pid == 0) {
570 lli->lli_opendir_key = fd;
571 lli->lli_opendir_pid = current_pid();
574 spin_unlock(&lli->lli_sa_lock);
577 if (inode->i_sb->s_root == file->f_dentry) {
578 LUSTRE_FPRIVATE(file) = fd;
582 if (!it || !it->d.lustre.it_disposition) {
583 /* Convert f_flags into access mode. We cannot use file->f_mode,
584 * because everything but O_ACCMODE mask was stripped from
586 if ((oit.it_flags + 1) & O_ACCMODE)
588 if (file->f_flags & O_TRUNC)
589 oit.it_flags |= FMODE_WRITE;
591 /* kernel only call f_op->open in dentry_open. filp_open calls
592 * dentry_open after call to open_namei that checks permissions.
593 * Only nfsd_open call dentry_open directly without checking
594 * permissions and because of that this code below is safe. */
595 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
596 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
598 /* We do not want O_EXCL here, presumably we opened the file
599 * already? XXX - NFS implications? */
600 oit.it_flags &= ~O_EXCL;
602 /* bug20584, if "it_flags" contains O_CREAT, the file will be
603 * created if necessary, then "IT_CREAT" should be set to keep
604 * consistent with it */
605 if (oit.it_flags & O_CREAT)
606 oit.it_op |= IT_CREAT;
612 /* Let's see if we have file open on MDS already. */
613 if (it->it_flags & FMODE_WRITE) {
614 och_p = &lli->lli_mds_write_och;
615 och_usecount = &lli->lli_open_fd_write_count;
616 } else if (it->it_flags & FMODE_EXEC) {
617 och_p = &lli->lli_mds_exec_och;
618 och_usecount = &lli->lli_open_fd_exec_count;
620 och_p = &lli->lli_mds_read_och;
621 och_usecount = &lli->lli_open_fd_read_count;
624 mutex_lock(&lli->lli_och_mutex);
625 if (*och_p) { /* Open handle is present */
626 if (it_disposition(it, DISP_OPEN_OPEN)) {
627 /* Well, there's extra open request that we do not need,
628 let's close it somehow. This will decref request. */
629 rc = it_open_error(DISP_OPEN_OPEN, it);
631 mutex_unlock(&lli->lli_och_mutex);
632 GOTO(out_openerr, rc);
635 ll_release_openhandle(file->f_dentry, it);
639 rc = ll_local_open(file, it, fd, NULL);
642 mutex_unlock(&lli->lli_och_mutex);
643 GOTO(out_openerr, rc);
646 LASSERT(*och_usecount == 0);
647 if (!it->d.lustre.it_disposition) {
648 /* We cannot just request lock handle now, new ELC code
649 means that one of other OPEN locks for this file
650 could be cancelled, and since blocking ast handler
651 would attempt to grab och_mutex as well, that would
652 result in a deadlock */
653 mutex_unlock(&lli->lli_och_mutex);
654 it->it_create_mode |= M_CHECK_STALE;
655 rc = ll_intent_file_open(file, NULL, 0, it);
656 it->it_create_mode &= ~M_CHECK_STALE;
658 GOTO(out_openerr, rc);
662 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
664 GOTO(out_och_free, rc = -ENOMEM);
668 /* md_intent_lock() didn't get a request ref if there was an
669 * open error, so don't do cleanup on the request here
671 /* XXX (green): Should not we bail out on any error here, not
672 * just open error? */
673 rc = it_open_error(DISP_OPEN_OPEN, it);
675 GOTO(out_och_free, rc);
677 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
679 rc = ll_local_open(file, it, fd, *och_p);
681 GOTO(out_och_free, rc);
683 mutex_unlock(&lli->lli_och_mutex);
686 /* Must do this outside lli_och_mutex lock to prevent deadlock where
687 different kind of OPEN lock for this same inode gets cancelled
688 by ldlm_cancel_lru */
689 if (!S_ISREG(inode->i_mode))
690 GOTO(out_och_free, rc);
694 if (!lli->lli_has_smd) {
695 if (file->f_flags & O_LOV_DELAY_CREATE ||
696 !(file->f_mode & FMODE_WRITE)) {
697 CDEBUG(D_INODE, "object creation was delayed\n");
698 GOTO(out_och_free, rc);
701 file->f_flags &= ~O_LOV_DELAY_CREATE;
702 GOTO(out_och_free, rc);
706 if (och_p && *och_p) {
707 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
708 *och_p = NULL; /* OBD_FREE writes some magic there */
711 mutex_unlock(&lli->lli_och_mutex);
714 if (opendir_set != 0)
715 ll_stop_statahead(inode, lli->lli_opendir_key);
717 ll_file_data_put(fd);
719 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
722 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
723 ptlrpc_req_finished(it->d.lustre.it_data);
724 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
730 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
731 struct ldlm_lock_desc *desc, void *data, int flag)
734 struct lustre_handle lockh;
738 case LDLM_CB_BLOCKING:
739 ldlm_lock2handle(lock, &lockh);
740 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
742 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
746 case LDLM_CB_CANCELING:
754 * Acquire a lease and open the file.
756 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
757 fmode_t fmode, __u64 open_flags)
759 struct lookup_intent it = { .it_op = IT_OPEN };
760 struct ll_sb_info *sbi = ll_i2sbi(inode);
761 struct md_op_data *op_data;
762 struct ptlrpc_request *req;
763 struct lustre_handle old_handle = { 0 };
764 struct obd_client_handle *och = NULL;
769 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
770 RETURN(ERR_PTR(-EINVAL));
773 struct ll_inode_info *lli = ll_i2info(inode);
774 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
775 struct obd_client_handle **och_p;
778 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
779 RETURN(ERR_PTR(-EPERM));
781 /* Get the openhandle of the file */
783 mutex_lock(&lli->lli_och_mutex);
784 if (fd->fd_lease_och != NULL) {
785 mutex_unlock(&lli->lli_och_mutex);
789 if (fd->fd_och == NULL) {
790 if (file->f_mode & FMODE_WRITE) {
791 LASSERT(lli->lli_mds_write_och != NULL);
792 och_p = &lli->lli_mds_write_och;
793 och_usecount = &lli->lli_open_fd_write_count;
795 LASSERT(lli->lli_mds_read_och != NULL);
796 och_p = &lli->lli_mds_read_och;
797 och_usecount = &lli->lli_open_fd_read_count;
799 if (*och_usecount == 1) {
806 mutex_unlock(&lli->lli_och_mutex);
807 if (rc < 0) /* more than 1 opener */
810 LASSERT(fd->fd_och != NULL);
811 old_handle = fd->fd_och->och_fh;
816 RETURN(ERR_PTR(-ENOMEM));
818 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
819 LUSTRE_OPC_ANY, NULL);
821 GOTO(out, rc = PTR_ERR(op_data));
823 /* To tell the MDT this openhandle is from the same owner */
824 op_data->op_handle = old_handle;
826 it.it_flags = fmode | open_flags;
827 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
828 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
829 ll_md_blocking_lease_ast,
830 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
831 * it can be cancelled which may mislead applications that the lease is
833 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
834 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
835 * doesn't deal with openhandle, so normal openhandle will be leaked. */
836 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
837 ll_finish_md_op_data(op_data);
839 ptlrpc_req_finished(req);
840 it_clear_disposition(&it, DISP_ENQ_COMPLETE);
843 GOTO(out_release_it, rc);
845 if (it_disposition(&it, DISP_LOOKUP_NEG))
846 GOTO(out_release_it, rc = -ENOENT);
848 rc = it_open_error(DISP_OPEN_OPEN, &it);
850 GOTO(out_release_it, rc);
852 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
853 ll_och_fill(sbi->ll_md_exp, &it, och);
855 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
856 GOTO(out_close, rc = -EOPNOTSUPP);
858 /* already get lease, handle lease lock */
859 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
860 if (it.d.lustre.it_lock_mode == 0 ||
861 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
862 /* open lock must return for lease */
863 CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
864 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
865 it.d.lustre.it_lock_bits);
866 GOTO(out_close, rc = -EPROTO);
869 ll_intent_release(&it);
873 /* Cancel open lock */
874 if (it.d.lustre.it_lock_mode != 0) {
875 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
876 it.d.lustre.it_lock_mode);
877 it.d.lustre.it_lock_mode = 0;
878 och->och_lease_handle.cookie = 0ULL;
880 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
882 CERROR("%s: error closing file "DFID": %d\n",
883 ll_get_fsname(inode->i_sb, NULL, 0),
884 PFID(&ll_i2info(inode)->lli_fid), rc2);
885 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
887 ll_intent_release(&it);
893 EXPORT_SYMBOL(ll_lease_open);
896 * Release lease and close the file.
897 * It will check if the lease has ever broken.
899 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
902 struct ldlm_lock *lock;
903 bool cancelled = true;
907 lock = ldlm_handle2lock(&och->och_lease_handle);
909 lock_res_and_lock(lock);
910 cancelled = ldlm_is_cancel(lock);
911 unlock_res_and_lock(lock);
915 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
916 PFID(&ll_i2info(inode)->lli_fid), cancelled);
919 ldlm_cli_cancel(&och->och_lease_handle, 0);
920 if (lease_broken != NULL)
921 *lease_broken = cancelled;
923 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
927 EXPORT_SYMBOL(ll_lease_close);
929 /* Fills the obdo with the attributes for the lsm */
930 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
931 struct obd_capa *capa, struct obdo *obdo,
932 __u64 ioepoch, int dv_flags)
934 struct ptlrpc_request_set *set;
935 struct obd_info oinfo = { { { 0 } } };
940 LASSERT(lsm != NULL);
944 oinfo.oi_oa->o_oi = lsm->lsm_oi;
945 oinfo.oi_oa->o_mode = S_IFREG;
946 oinfo.oi_oa->o_ioepoch = ioepoch;
947 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
948 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
949 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
950 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
951 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
952 OBD_MD_FLDATAVERSION;
953 oinfo.oi_capa = capa;
954 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
955 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
956 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
957 if (dv_flags & LL_DV_WR_FLUSH)
958 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
961 set = ptlrpc_prep_set();
963 CERROR("can't allocate ptlrpc set\n");
966 rc = obd_getattr_async(exp, &oinfo, set);
968 rc = ptlrpc_set_wait(set);
969 ptlrpc_set_destroy(set);
972 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
973 OBD_MD_FLATIME | OBD_MD_FLMTIME |
974 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
975 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
976 if (dv_flags & LL_DV_WR_FLUSH &&
977 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
978 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
985 * Performs the getattr on the inode and updates its fields.
986 * If @sync != 0, perform the getattr under the server-side lock.
988 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
989 __u64 ioepoch, int sync)
991 struct obd_capa *capa = ll_mdscapa_get(inode);
992 struct lov_stripe_md *lsm;
996 lsm = ccc_inode_lsm_get(inode);
997 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
998 capa, obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
1001 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
1003 obdo_refresh_inode(inode, obdo, obdo->o_valid);
1004 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
1005 " blksize %lu\n", POSTID(oi), i_size_read(inode),
1006 (unsigned long long)inode->i_blocks,
1007 (unsigned long)ll_inode_blksize(inode));
1009 ccc_inode_lsm_put(inode, lsm);
1013 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
1015 struct ll_inode_info *lli = ll_i2info(inode);
1016 struct cl_object *obj = lli->lli_clob;
1017 struct cl_attr *attr = ccc_env_thread_attr(env);
1023 ll_inode_size_lock(inode);
1024 /* merge timestamps the most recently obtained from mds with
1025 timestamps obtained from osts */
1026 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1027 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1028 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1029 inode_init_lvb(inode, &lvb);
1031 cl_object_attr_lock(obj);
1032 rc = cl_object_attr_get(env, obj, attr);
1033 cl_object_attr_unlock(obj);
1036 if (lvb.lvb_atime < attr->cat_atime)
1037 lvb.lvb_atime = attr->cat_atime;
1038 if (lvb.lvb_ctime < attr->cat_ctime)
1039 lvb.lvb_ctime = attr->cat_ctime;
1040 if (lvb.lvb_mtime < attr->cat_mtime)
1041 lvb.lvb_mtime = attr->cat_mtime;
1043 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1044 PFID(&lli->lli_fid), attr->cat_size);
1045 cl_isize_write_nolock(inode, attr->cat_size);
1047 inode->i_blocks = attr->cat_blocks;
1049 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1050 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1051 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1053 ll_inode_size_unlock(inode);
1058 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1061 struct obdo obdo = { 0 };
1064 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1066 st->st_size = obdo.o_size;
1067 st->st_blocks = obdo.o_blocks;
1068 st->st_mtime = obdo.o_mtime;
1069 st->st_atime = obdo.o_atime;
1070 st->st_ctime = obdo.o_ctime;
1075 static bool file_is_noatime(const struct file *file)
1077 const struct vfsmount *mnt = file->f_path.mnt;
1078 const struct inode *inode = file->f_path.dentry->d_inode;
1080 /* Adapted from file_accessed() and touch_atime().*/
1081 if (file->f_flags & O_NOATIME)
1084 if (inode->i_flags & S_NOATIME)
1087 if (IS_NOATIME(inode))
1090 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1093 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1096 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1102 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1104 struct inode *inode = file->f_dentry->d_inode;
1106 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1108 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1109 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1110 file->f_flags & O_DIRECT ||
1113 io->ci_obj = ll_i2info(inode)->lli_clob;
1114 io->ci_lockreq = CILR_MAYBE;
1115 if (ll_file_nolock(file)) {
1116 io->ci_lockreq = CILR_NEVER;
1117 io->ci_no_srvlock = 1;
1118 } else if (file->f_flags & O_APPEND) {
1119 io->ci_lockreq = CILR_MANDATORY;
1122 io->ci_noatime = file_is_noatime(file);
1126 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1127 struct file *file, enum cl_io_type iot,
1128 loff_t *ppos, size_t count)
1130 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1131 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1137 io = ccc_env_thread_io(env);
1138 ll_io_init(io, file, iot == CIT_WRITE);
1140 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1141 struct vvp_io *vio = vvp_env_io(env);
1142 struct ccc_io *cio = ccc_env_io(env);
1143 int write_mutex_locked = 0;
1145 cio->cui_fd = LUSTRE_FPRIVATE(file);
1146 vio->cui_io_subtype = args->via_io_subtype;
1148 switch (vio->cui_io_subtype) {
1150 cio->cui_iov = args->u.normal.via_iov;
1151 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1152 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1153 cio->cui_iocb = args->u.normal.via_iocb;
1154 if ((iot == CIT_WRITE) &&
1155 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1156 if (mutex_lock_interruptible(&lli->
1158 GOTO(out, result = -ERESTARTSYS);
1159 write_mutex_locked = 1;
1160 } else if (iot == CIT_READ) {
1161 down_read(&lli->lli_trunc_sem);
1165 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1166 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1169 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1170 vio->u.splice.cui_flags = args->u.splice.via_flags;
1173 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1176 result = cl_io_loop(env, io);
1177 if (write_mutex_locked)
1178 mutex_unlock(&lli->lli_write_mutex);
1179 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1180 up_read(&lli->lli_trunc_sem);
1182 /* cl_io_rw_init() handled IO */
1183 result = io->ci_result;
1186 if (io->ci_nob > 0) {
1187 result = io->ci_nob;
1188 *ppos = io->u.ci_wr.wr.crw_pos;
1192 cl_io_fini(env, io);
1193 /* If any bit been read/written (result != 0), we just return
1194 * short read/write instead of restart io. */
1195 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1196 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1197 iot == CIT_READ ? "read" : "write",
1198 file->f_dentry->d_name.name, *ppos, count);
1199 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1203 if (iot == CIT_READ) {
1205 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1206 LPROC_LL_READ_BYTES, result);
1207 } else if (iot == CIT_WRITE) {
1209 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1210 LPROC_LL_WRITE_BYTES, result);
1211 fd->fd_write_failed = false;
1212 } else if (result != -ERESTARTSYS) {
1213 fd->fd_write_failed = true;
1222 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1224 static int ll_file_get_iov_count(const struct iovec *iov,
1225 unsigned long *nr_segs, size_t *count)
1230 for (seg = 0; seg < *nr_segs; seg++) {
1231 const struct iovec *iv = &iov[seg];
1234 * If any segment has a negative length, or the cumulative
1235 * length ever wraps negative then return -EINVAL.
1238 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1240 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1245 cnt -= iv->iov_len; /* This segment is no good */
1252 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1253 unsigned long nr_segs, loff_t pos)
1256 struct vvp_io_args *args;
1262 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1266 env = cl_env_get(&refcheck);
1268 RETURN(PTR_ERR(env));
1270 args = vvp_env_args(env, IO_NORMAL);
1271 args->u.normal.via_iov = (struct iovec *)iov;
1272 args->u.normal.via_nrsegs = nr_segs;
1273 args->u.normal.via_iocb = iocb;
1275 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1276 &iocb->ki_pos, count);
1277 cl_env_put(env, &refcheck);
1281 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1285 struct iovec *local_iov;
1286 struct kiocb *kiocb;
1291 env = cl_env_get(&refcheck);
1293 RETURN(PTR_ERR(env));
1295 local_iov = &vvp_env_info(env)->vti_local_iov;
1296 kiocb = &vvp_env_info(env)->vti_kiocb;
1297 local_iov->iov_base = (void __user *)buf;
1298 local_iov->iov_len = count;
1299 init_sync_kiocb(kiocb, file);
1300 kiocb->ki_pos = *ppos;
1301 kiocb->ki_left = count;
1303 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1304 *ppos = kiocb->ki_pos;
1306 cl_env_put(env, &refcheck);
1311 * Write to a file (through the page cache).
1314 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1315 unsigned long nr_segs, loff_t pos)
1318 struct vvp_io_args *args;
1324 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1328 env = cl_env_get(&refcheck);
1330 RETURN(PTR_ERR(env));
1332 args = vvp_env_args(env, IO_NORMAL);
1333 args->u.normal.via_iov = (struct iovec *)iov;
1334 args->u.normal.via_nrsegs = nr_segs;
1335 args->u.normal.via_iocb = iocb;
1337 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1338 &iocb->ki_pos, count);
1339 cl_env_put(env, &refcheck);
1343 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1347 struct iovec *local_iov;
1348 struct kiocb *kiocb;
1353 env = cl_env_get(&refcheck);
1355 RETURN(PTR_ERR(env));
1357 local_iov = &vvp_env_info(env)->vti_local_iov;
1358 kiocb = &vvp_env_info(env)->vti_kiocb;
1359 local_iov->iov_base = (void __user *)buf;
1360 local_iov->iov_len = count;
1361 init_sync_kiocb(kiocb, file);
1362 kiocb->ki_pos = *ppos;
1363 kiocb->ki_left = count;
1365 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1366 *ppos = kiocb->ki_pos;
1368 cl_env_put(env, &refcheck);
1373 * Send file content (through pagecache) somewhere with helper
1375 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1376 struct pipe_inode_info *pipe, size_t count,
1380 struct vvp_io_args *args;
1385 env = cl_env_get(&refcheck);
1387 RETURN(PTR_ERR(env));
1389 args = vvp_env_args(env, IO_SPLICE);
1390 args->u.splice.via_pipe = pipe;
1391 args->u.splice.via_flags = flags;
1393 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1394 cl_env_put(env, &refcheck);
1398 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1401 struct obd_export *exp = ll_i2dtexp(inode);
1402 struct obd_trans_info oti = { 0 };
1403 struct obdo *oa = NULL;
1406 struct lov_stripe_md *lsm = NULL, *lsm2;
1413 lsm = ccc_inode_lsm_get(inode);
1414 if (!lsm_has_objects(lsm))
1415 GOTO(out, rc = -ENOENT);
1417 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1418 (lsm->lsm_stripe_count));
1420 OBD_ALLOC_LARGE(lsm2, lsm_size);
1422 GOTO(out, rc = -ENOMEM);
1425 oa->o_nlink = ost_idx;
1426 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1427 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1428 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1429 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1430 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1431 memcpy(lsm2, lsm, lsm_size);
1432 ll_inode_size_lock(inode);
1433 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1434 ll_inode_size_unlock(inode);
1436 OBD_FREE_LARGE(lsm2, lsm_size);
1439 ccc_inode_lsm_put(inode, lsm);
1444 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1446 struct ll_recreate_obj ucreat;
1450 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1453 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1457 ostid_set_seq_mdt0(&oi);
1458 ostid_set_id(&oi, ucreat.lrc_id);
1459 RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
1462 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1469 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1472 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1475 fid_to_ostid(&fid, &oi);
1476 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1477 RETURN(ll_lov_recreate(inode, &oi, ost_idx));
1480 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1481 __u64 flags, struct lov_user_md *lum,
1484 struct lov_stripe_md *lsm = NULL;
1485 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1489 lsm = ccc_inode_lsm_get(inode);
1491 ccc_inode_lsm_put(inode, lsm);
1492 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1493 PFID(ll_inode2fid(inode)));
1497 ll_inode_size_lock(inode);
1498 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1501 rc = oit.d.lustre.it_status;
1503 GOTO(out_req_free, rc);
1505 ll_release_openhandle(file->f_dentry, &oit);
1508 ll_inode_size_unlock(inode);
1509 ll_intent_release(&oit);
1510 ccc_inode_lsm_put(inode, lsm);
1513 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1517 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1518 struct lov_mds_md **lmmp, int *lmm_size,
1519 struct ptlrpc_request **request)
1521 struct ll_sb_info *sbi = ll_i2sbi(inode);
1522 struct mdt_body *body;
1523 struct lov_mds_md *lmm = NULL;
1524 struct ptlrpc_request *req = NULL;
1525 struct md_op_data *op_data;
1528 rc = ll_get_max_mdsize(sbi, &lmmsize);
1532 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1533 strlen(filename), lmmsize,
1534 LUSTRE_OPC_ANY, NULL);
1535 if (IS_ERR(op_data))
1536 RETURN(PTR_ERR(op_data));
1538 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1539 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1540 ll_finish_md_op_data(op_data);
1542 CDEBUG(D_INFO, "md_getattr_name failed "
1543 "on %s: rc %d\n", filename, rc);
1547 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1548 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1550 lmmsize = body->eadatasize;
1552 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1554 GOTO(out, rc = -ENODATA);
1557 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1558 LASSERT(lmm != NULL);
1560 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1561 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1562 GOTO(out, rc = -EPROTO);
1566 * This is coming from the MDS, so is probably in
1567 * little endian. We convert it to host endian before
1568 * passing it to userspace.
1570 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1573 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1574 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1577 /* if function called for directory - we should
1578 * avoid swab not existent lsm objects */
1579 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1580 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1581 if (S_ISREG(body->mode))
1582 lustre_swab_lov_user_md_objects(
1583 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1585 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1586 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1587 if (S_ISREG(body->mode))
1588 lustre_swab_lov_user_md_objects(
1589 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1596 *lmm_size = lmmsize;
1601 static int ll_lov_setea(struct inode *inode, struct file *file,
1604 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1605 struct lov_user_md *lump;
1606 int lum_size = sizeof(struct lov_user_md) +
1607 sizeof(struct lov_user_ost_data);
1611 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1614 OBD_ALLOC_LARGE(lump, lum_size);
1618 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1619 OBD_FREE_LARGE(lump, lum_size);
1623 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1625 OBD_FREE_LARGE(lump, lum_size);
1629 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1632 struct lov_user_md_v3 lumv3;
1633 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1634 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1635 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1637 __u64 flags = FMODE_WRITE;
1640 /* first try with v1 which is smaller than v3 */
1641 lum_size = sizeof(struct lov_user_md_v1);
1642 if (copy_from_user(lumv1, lumv1p, lum_size))
1645 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1646 lum_size = sizeof(struct lov_user_md_v3);
1647 if (copy_from_user(&lumv3, lumv3p, lum_size))
1651 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1653 struct lov_stripe_md *lsm;
1656 put_user(0, &lumv1p->lmm_stripe_count);
1658 ll_layout_refresh(inode, &gen);
1659 lsm = ccc_inode_lsm_get(inode);
1660 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1661 0, lsm, (void *)arg);
1662 ccc_inode_lsm_put(inode, lsm);
1667 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1669 struct lov_stripe_md *lsm;
1673 lsm = ccc_inode_lsm_get(inode);
1675 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1677 ccc_inode_lsm_put(inode, lsm);
1681 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1683 struct ll_inode_info *lli = ll_i2info(inode);
1684 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1685 struct ccc_grouplock grouplock;
1689 if (ll_file_nolock(file))
1690 RETURN(-EOPNOTSUPP);
1692 spin_lock(&lli->lli_lock);
1693 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1694 CWARN("group lock already existed with gid %lu\n",
1695 fd->fd_grouplock.cg_gid);
1696 spin_unlock(&lli->lli_lock);
1699 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1700 spin_unlock(&lli->lli_lock);
1702 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1703 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1707 spin_lock(&lli->lli_lock);
1708 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1709 spin_unlock(&lli->lli_lock);
1710 CERROR("another thread just won the race\n");
1711 cl_put_grouplock(&grouplock);
1715 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1716 fd->fd_grouplock = grouplock;
1717 spin_unlock(&lli->lli_lock);
1719 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1723 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1725 struct ll_inode_info *lli = ll_i2info(inode);
1726 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1727 struct ccc_grouplock grouplock;
1730 spin_lock(&lli->lli_lock);
1731 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1732 spin_unlock(&lli->lli_lock);
1733 CWARN("no group lock held\n");
1736 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1738 if (fd->fd_grouplock.cg_gid != arg) {
1739 CWARN("group lock %lu doesn't match current id %lu\n",
1740 arg, fd->fd_grouplock.cg_gid);
1741 spin_unlock(&lli->lli_lock);
1745 grouplock = fd->fd_grouplock;
1746 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1747 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1748 spin_unlock(&lli->lli_lock);
1750 cl_put_grouplock(&grouplock);
1751 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1756 * Close inode open handle
1758 * \param dentry [in] dentry which contains the inode
1759 * \param it [in,out] intent which contains open info and result
1762 * \retval <0 failure
1764 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1766 struct inode *inode = dentry->d_inode;
1767 struct obd_client_handle *och;
1773 /* Root ? Do nothing. */
1774 if (dentry->d_inode->i_sb->s_root == dentry)
1777 /* No open handle to close? Move away */
1778 if (!it_disposition(it, DISP_OPEN_OPEN))
1781 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1783 OBD_ALLOC(och, sizeof(*och));
1785 GOTO(out, rc = -ENOMEM);
1787 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1789 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1792 /* this one is in place of ll_file_open */
1793 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1794 ptlrpc_req_finished(it->d.lustre.it_data);
1795 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1801 * Get size for inode for which FIEMAP mapping is requested.
1802 * Make the FIEMAP get_info call and returns the result.
1804 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1807 struct obd_export *exp = ll_i2dtexp(inode);
1808 struct lov_stripe_md *lsm = NULL;
1809 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1810 int vallen = num_bytes;
1814 /* Checks for fiemap flags */
1815 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1816 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1820 /* Check for FIEMAP_FLAG_SYNC */
1821 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1822 rc = filemap_fdatawrite(inode->i_mapping);
1827 lsm = ccc_inode_lsm_get(inode);
1831 /* If the stripe_count > 1 and the application does not understand
1832 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1834 if (lsm->lsm_stripe_count > 1 &&
1835 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1836 GOTO(out, rc = -EOPNOTSUPP);
1838 fm_key.oa.o_oi = lsm->lsm_oi;
1839 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1841 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1842 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1843 /* If filesize is 0, then there would be no objects for mapping */
1844 if (fm_key.oa.o_size == 0) {
1845 fiemap->fm_mapped_extents = 0;
1849 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1851 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1854 CERROR("obd_get_info failed: rc = %d\n", rc);
1857 ccc_inode_lsm_put(inode, lsm);
1861 int ll_fid2path(struct inode *inode, void *arg)
1863 struct obd_export *exp = ll_i2mdexp(inode);
1864 struct getinfo_fid2path *gfout, *gfin;
1868 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1869 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1872 /* Need to get the buflen */
1873 OBD_ALLOC_PTR(gfin);
1876 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1881 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1882 OBD_ALLOC(gfout, outsize);
1883 if (gfout == NULL) {
1887 memcpy(gfout, gfin, sizeof(*gfout));
1890 /* Call mdc_iocontrol */
1891 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1895 if (copy_to_user(arg, gfout, outsize))
1899 OBD_FREE(gfout, outsize);
1903 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1905 struct ll_user_fiemap *fiemap_s;
1906 size_t num_bytes, ret_bytes;
1907 unsigned int extent_count;
1910 /* Get the extent count so we can calculate the size of
1911 * required fiemap buffer */
1912 if (get_user(extent_count,
1913 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1915 num_bytes = sizeof(*fiemap_s) + (extent_count *
1916 sizeof(struct ll_fiemap_extent));
1918 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1919 if (fiemap_s == NULL)
1922 /* get the fiemap value */
1923 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1925 GOTO(error, rc = -EFAULT);
1927 /* If fm_extent_count is non-zero, read the first extent since
1928 * it is used to calculate end_offset and device from previous
1931 if (copy_from_user(&fiemap_s->fm_extents[0],
1932 (char __user *)arg + sizeof(*fiemap_s),
1933 sizeof(struct ll_fiemap_extent)))
1934 GOTO(error, rc = -EFAULT);
1937 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1941 ret_bytes = sizeof(struct ll_user_fiemap);
1943 if (extent_count != 0)
1944 ret_bytes += (fiemap_s->fm_mapped_extents *
1945 sizeof(struct ll_fiemap_extent));
1947 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1951 OBD_FREE_LARGE(fiemap_s, num_bytes);
1956 * Read the data_version for inode.
1958 * This value is computed using stripe object version on OST.
1959 * Version is computed using server side locking.
1961 * @param sync if do sync on the OST side;
1963 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1964 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1966 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1968 struct lov_stripe_md *lsm = NULL;
1969 struct ll_sb_info *sbi = ll_i2sbi(inode);
1970 struct obdo *obdo = NULL;
1974 /* If no stripe, we consider version is 0. */
1975 lsm = ccc_inode_lsm_get(inode);
1976 if (!lsm_has_objects(lsm)) {
1978 CDEBUG(D_INODE, "No object for inode\n");
1982 OBD_ALLOC_PTR(obdo);
1984 GOTO(out, rc = -ENOMEM);
1986 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, flags);
1988 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1991 *data_version = obdo->o_data_version;
1997 ccc_inode_lsm_put(inode, lsm);
2002 * Trigger a HSM release request for the provided inode.
2004 int ll_hsm_release(struct inode *inode)
2006 struct cl_env_nest nest;
2008 struct obd_client_handle *och = NULL;
2009 __u64 data_version = 0;
2013 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
2014 ll_get_fsname(inode->i_sb, NULL, 0),
2015 PFID(&ll_i2info(inode)->lli_fid));
2017 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
2019 GOTO(out, rc = PTR_ERR(och));
2021 /* Grab latest data_version and [am]time values */
2022 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
2026 env = cl_env_nested_get(&nest);
2028 GOTO(out, rc = PTR_ERR(env));
2030 ll_merge_lvb(env, inode);
2031 cl_env_nested_put(&nest, env);
2033 /* Release the file.
2034 * NB: lease lock handle is released in mdc_hsm_release_pack() because
2035 * we still need it to pack l_remote_handle to MDT. */
2036 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
2042 if (och != NULL && !IS_ERR(och)) /* close the file */
2043 ll_lease_close(och, inode, NULL);
2048 struct ll_swap_stack {
2049 struct iattr ia1, ia2;
2051 struct inode *inode1, *inode2;
2052 bool check_dv1, check_dv2;
2055 static int ll_swap_layouts(struct file *file1, struct file *file2,
2056 struct lustre_swap_layouts *lsl)
2058 struct mdc_swap_layouts msl;
2059 struct md_op_data *op_data;
2062 struct ll_swap_stack *llss = NULL;
2065 OBD_ALLOC_PTR(llss);
2069 llss->inode1 = file1->f_dentry->d_inode;
2070 llss->inode2 = file2->f_dentry->d_inode;
2072 if (!S_ISREG(llss->inode2->i_mode))
2073 GOTO(free, rc = -EINVAL);
2075 if (inode_permission(llss->inode1, MAY_WRITE) ||
2076 inode_permission(llss->inode2, MAY_WRITE))
2077 GOTO(free, rc = -EPERM);
2079 if (llss->inode2->i_sb != llss->inode1->i_sb)
2080 GOTO(free, rc = -EXDEV);
2082 /* we use 2 bool because it is easier to swap than 2 bits */
2083 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2084 llss->check_dv1 = true;
2086 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2087 llss->check_dv2 = true;
2089 /* we cannot use lsl->sl_dvX directly because we may swap them */
2090 llss->dv1 = lsl->sl_dv1;
2091 llss->dv2 = lsl->sl_dv2;
2093 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2094 if (rc == 0) /* same file, done! */
2097 if (rc < 0) { /* sequentialize it */
2098 swap(llss->inode1, llss->inode2);
2100 swap(llss->dv1, llss->dv2);
2101 swap(llss->check_dv1, llss->check_dv2);
2105 if (gid != 0) { /* application asks to flush dirty cache */
2106 rc = ll_get_grouplock(llss->inode1, file1, gid);
2110 rc = ll_get_grouplock(llss->inode2, file2, gid);
2112 ll_put_grouplock(llss->inode1, file1, gid);
2117 /* to be able to restore mtime and atime after swap
2118 * we need to first save them */
2120 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2121 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2122 llss->ia1.ia_atime = llss->inode1->i_atime;
2123 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2124 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2125 llss->ia2.ia_atime = llss->inode2->i_atime;
2126 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2129 /* ultimate check, before swaping the layouts we check if
2130 * dataversion has changed (if requested) */
2131 if (llss->check_dv1) {
2132 rc = ll_data_version(llss->inode1, &dv, 0);
2135 if (dv != llss->dv1)
2136 GOTO(putgl, rc = -EAGAIN);
2139 if (llss->check_dv2) {
2140 rc = ll_data_version(llss->inode2, &dv, 0);
2143 if (dv != llss->dv2)
2144 GOTO(putgl, rc = -EAGAIN);
2147 /* struct md_op_data is used to send the swap args to the mdt
2148 * only flags is missing, so we use struct mdc_swap_layouts
2149 * through the md_op_data->op_data */
2150 /* flags from user space have to be converted before they are send to
2151 * server, no flag is sent today, they are only used on the client */
2154 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2155 0, LUSTRE_OPC_ANY, &msl);
2156 if (IS_ERR(op_data))
2157 GOTO(free, rc = PTR_ERR(op_data));
2159 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2160 sizeof(*op_data), op_data, NULL);
2161 ll_finish_md_op_data(op_data);
2165 ll_put_grouplock(llss->inode2, file2, gid);
2166 ll_put_grouplock(llss->inode1, file1, gid);
2169 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2173 /* clear useless flags */
2174 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2175 llss->ia1.ia_valid &= ~ATTR_MTIME;
2176 llss->ia2.ia_valid &= ~ATTR_MTIME;
2179 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2180 llss->ia1.ia_valid &= ~ATTR_ATIME;
2181 llss->ia2.ia_valid &= ~ATTR_ATIME;
2184 /* update time if requested */
2186 if (llss->ia2.ia_valid != 0) {
2187 mutex_lock(&llss->inode1->i_mutex);
2188 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2189 mutex_unlock(&llss->inode1->i_mutex);
2192 if (llss->ia1.ia_valid != 0) {
2195 mutex_lock(&llss->inode2->i_mutex);
2196 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2197 mutex_unlock(&llss->inode2->i_mutex);
2209 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2211 struct md_op_data *op_data;
2214 /* Non-root users are forbidden to set or clear flags which are
2215 * NOT defined in HSM_USER_MASK. */
2216 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2217 !cfs_capable(CFS_CAP_SYS_ADMIN))
2220 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2221 LUSTRE_OPC_ANY, hss);
2222 if (IS_ERR(op_data))
2223 RETURN(PTR_ERR(op_data));
2225 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2226 sizeof(*op_data), op_data, NULL);
2228 ll_finish_md_op_data(op_data);
2233 static int ll_hsm_import(struct inode *inode, struct file *file,
2234 struct hsm_user_import *hui)
2236 struct hsm_state_set *hss = NULL;
2237 struct iattr *attr = NULL;
2241 if (!S_ISREG(inode->i_mode))
2247 GOTO(out, rc = -ENOMEM);
2249 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2250 hss->hss_archive_id = hui->hui_archive_id;
2251 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2252 rc = ll_hsm_state_set(inode, hss);
2256 OBD_ALLOC_PTR(attr);
2258 GOTO(out, rc = -ENOMEM);
2260 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2261 attr->ia_mode |= S_IFREG;
2262 attr->ia_uid = hui->hui_uid;
2263 attr->ia_gid = hui->hui_gid;
2264 attr->ia_size = hui->hui_size;
2265 attr->ia_mtime.tv_sec = hui->hui_mtime;
2266 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2267 attr->ia_atime.tv_sec = hui->hui_atime;
2268 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2270 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2271 ATTR_UID | ATTR_GID |
2272 ATTR_MTIME | ATTR_MTIME_SET |
2273 ATTR_ATIME | ATTR_ATIME_SET;
2275 rc = ll_setattr_raw(file->f_dentry, attr, true);
2289 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2291 struct inode *inode = file->f_dentry->d_inode;
2292 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2296 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2297 PFID(ll_inode2fid(inode)), inode, cmd);
2298 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2300 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2301 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2305 case LL_IOC_GETFLAGS:
2306 /* Get the current value of the file flags */
2307 return put_user(fd->fd_flags, (int *)arg);
2308 case LL_IOC_SETFLAGS:
2309 case LL_IOC_CLRFLAGS:
2310 /* Set or clear specific file flags */
2311 /* XXX This probably needs checks to ensure the flags are
2312 * not abused, and to handle any flag side effects.
2314 if (get_user(flags, (int *) arg))
2317 if (cmd == LL_IOC_SETFLAGS) {
2318 if ((flags & LL_FILE_IGNORE_LOCK) &&
2319 !(file->f_flags & O_DIRECT)) {
2320 CERROR("%s: unable to disable locking on "
2321 "non-O_DIRECT file\n", current->comm);
2325 fd->fd_flags |= flags;
2327 fd->fd_flags &= ~flags;
2330 case LL_IOC_LOV_SETSTRIPE:
2331 RETURN(ll_lov_setstripe(inode, file, arg));
2332 case LL_IOC_LOV_SETEA:
2333 RETURN(ll_lov_setea(inode, file, arg));
2334 case LL_IOC_LOV_SWAP_LAYOUTS: {
2336 struct lustre_swap_layouts lsl;
2338 if (copy_from_user(&lsl, (char *)arg,
2339 sizeof(struct lustre_swap_layouts)))
2342 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2345 file2 = fget(lsl.sl_fd);
2350 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2351 rc = ll_swap_layouts(file, file2, &lsl);
2355 case LL_IOC_LOV_GETSTRIPE:
2356 RETURN(ll_lov_getstripe(inode, arg));
2357 case LL_IOC_RECREATE_OBJ:
2358 RETURN(ll_lov_recreate_obj(inode, arg));
2359 case LL_IOC_RECREATE_FID:
2360 RETURN(ll_lov_recreate_fid(inode, arg));
2361 case FSFILT_IOC_FIEMAP:
2362 RETURN(ll_ioctl_fiemap(inode, arg));
2363 case FSFILT_IOC_GETFLAGS:
2364 case FSFILT_IOC_SETFLAGS:
2365 RETURN(ll_iocontrol(inode, file, cmd, arg));
2366 case FSFILT_IOC_GETVERSION_OLD:
2367 case FSFILT_IOC_GETVERSION:
2368 RETURN(put_user(inode->i_generation, (int *)arg));
2369 case LL_IOC_GROUP_LOCK:
2370 RETURN(ll_get_grouplock(inode, file, arg));
2371 case LL_IOC_GROUP_UNLOCK:
2372 RETURN(ll_put_grouplock(inode, file, arg));
2373 case IOC_OBD_STATFS:
2374 RETURN(ll_obd_statfs(inode, (void *)arg));
2376 /* We need to special case any other ioctls we want to handle,
2377 * to send them to the MDS/OST as appropriate and to properly
2378 * network encode the arg field.
2379 case FSFILT_IOC_SETVERSION_OLD:
2380 case FSFILT_IOC_SETVERSION:
2382 case LL_IOC_FLUSHCTX:
2383 RETURN(ll_flush_ctx(inode));
2384 case LL_IOC_PATH2FID: {
2385 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2386 sizeof(struct lu_fid)))
2391 case OBD_IOC_FID2PATH:
2392 RETURN(ll_fid2path(inode, (void *)arg));
2393 case LL_IOC_DATA_VERSION: {
2394 struct ioc_data_version idv;
2397 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2400 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2401 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2403 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2409 case LL_IOC_GET_MDTIDX: {
2412 mdtidx = ll_get_mdt_idx(inode);
2416 if (put_user((int)mdtidx, (int*)arg))
2421 case OBD_IOC_GETDTNAME:
2422 case OBD_IOC_GETMDNAME:
2423 RETURN(ll_get_obd_name(inode, cmd, arg));
2424 case LL_IOC_HSM_STATE_GET: {
2425 struct md_op_data *op_data;
2426 struct hsm_user_state *hus;
2433 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2434 LUSTRE_OPC_ANY, hus);
2435 if (IS_ERR(op_data)) {
2437 RETURN(PTR_ERR(op_data));
2440 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2443 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2446 ll_finish_md_op_data(op_data);
2450 case LL_IOC_HSM_STATE_SET: {
2451 struct hsm_state_set *hss;
2458 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2463 rc = ll_hsm_state_set(inode, hss);
2468 case LL_IOC_HSM_ACTION: {
2469 struct md_op_data *op_data;
2470 struct hsm_current_action *hca;
2477 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2478 LUSTRE_OPC_ANY, hca);
2479 if (IS_ERR(op_data)) {
2481 RETURN(PTR_ERR(op_data));
2484 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2487 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2490 ll_finish_md_op_data(op_data);
2494 case LL_IOC_SET_LEASE: {
2495 struct ll_inode_info *lli = ll_i2info(inode);
2496 struct obd_client_handle *och = NULL;
2502 if (!(file->f_mode & FMODE_WRITE))
2507 if (!(file->f_mode & FMODE_READ))
2512 mutex_lock(&lli->lli_och_mutex);
2513 if (fd->fd_lease_och != NULL) {
2514 och = fd->fd_lease_och;
2515 fd->fd_lease_och = NULL;
2517 mutex_unlock(&lli->lli_och_mutex);
2520 mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
2521 rc = ll_lease_close(och, inode, &lease_broken);
2522 if (rc == 0 && lease_broken)
2528 /* return the type of lease or error */
2529 RETURN(rc < 0 ? rc : (int)mode);
2534 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2536 /* apply for lease */
2537 och = ll_lease_open(inode, file, mode, 0);
2539 RETURN(PTR_ERR(och));
2542 mutex_lock(&lli->lli_och_mutex);
2543 if (fd->fd_lease_och == NULL) {
2544 fd->fd_lease_och = och;
2547 mutex_unlock(&lli->lli_och_mutex);
2549 /* impossible now that only excl is supported for now */
2550 ll_lease_close(och, inode, &lease_broken);
2555 case LL_IOC_GET_LEASE: {
2556 struct ll_inode_info *lli = ll_i2info(inode);
2557 struct ldlm_lock *lock = NULL;
2560 mutex_lock(&lli->lli_och_mutex);
2561 if (fd->fd_lease_och != NULL) {
2562 struct obd_client_handle *och = fd->fd_lease_och;
2564 lock = ldlm_handle2lock(&och->och_lease_handle);
2566 lock_res_and_lock(lock);
2567 if (!ldlm_is_cancel(lock))
2568 rc = och->och_flags &
2569 (FMODE_READ | FMODE_WRITE);
2570 unlock_res_and_lock(lock);
2571 LDLM_LOCK_PUT(lock);
2574 mutex_unlock(&lli->lli_och_mutex);
2577 case LL_IOC_HSM_IMPORT: {
2578 struct hsm_user_import *hui;
2584 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2589 rc = ll_hsm_import(inode, file, hui);
2598 ll_iocontrol_call(inode, file, cmd, arg, &err))
2601 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2607 #ifndef HAVE_FILE_LLSEEK_SIZE
2608 static inline loff_t
2609 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2611 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2613 if (offset > maxsize)
2616 if (offset != file->f_pos) {
2617 file->f_pos = offset;
2618 file->f_version = 0;
2624 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2625 loff_t maxsize, loff_t eof)
2627 struct inode *inode = file->f_dentry->d_inode;
2635 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2636 * position-querying operation. Avoid rewriting the "same"
2637 * f_pos value back to the file because a concurrent read(),
2638 * write() or lseek() might have altered it
2643 * f_lock protects against read/modify/write race with other
2644 * SEEK_CURs. Note that parallel writes and reads behave
2647 mutex_lock(&inode->i_mutex);
2648 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2649 mutex_unlock(&inode->i_mutex);
2653 * In the generic case the entire file is data, so as long as
2654 * offset isn't at the end of the file then the offset is data.
2661 * There is a virtual hole at the end of the file, so as long as
2662 * offset isn't i_size or larger, return i_size.
2670 return llseek_execute(file, offset, maxsize);
2674 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2676 struct inode *inode = file->f_dentry->d_inode;
2677 loff_t retval, eof = 0;
2680 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2681 (origin == SEEK_CUR) ? file->f_pos : 0);
2682 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2683 PFID(ll_inode2fid(inode)), inode, retval, retval,
2685 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2687 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2688 retval = ll_glimpse_size(inode);
2691 eof = i_size_read(inode);
2694 retval = ll_generic_file_llseek_size(file, offset, origin,
2695 ll_file_maxbytes(inode), eof);
2699 int ll_flush(struct file *file, fl_owner_t id)
2701 struct inode *inode = file->f_dentry->d_inode;
2702 struct ll_inode_info *lli = ll_i2info(inode);
2703 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2706 LASSERT(!S_ISDIR(inode->i_mode));
2708 /* catch async errors that were recorded back when async writeback
2709 * failed for pages in this mapping. */
2710 rc = lli->lli_async_rc;
2711 lli->lli_async_rc = 0;
2712 err = lov_read_and_clear_async_rc(lli->lli_clob);
2716 /* The application has been told write failure already.
2717 * Do not report failure again. */
2718 if (fd->fd_write_failed)
2720 return rc ? -EIO : 0;
2724 * Called to make sure a portion of file has been written out.
2725 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2727 * Return how many pages have been written.
2729 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2730 enum cl_fsync_mode mode, int ignore_layout)
2732 struct cl_env_nest nest;
2735 struct obd_capa *capa = NULL;
2736 struct cl_fsync_io *fio;
2740 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2741 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2744 env = cl_env_nested_get(&nest);
2746 RETURN(PTR_ERR(env));
2748 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2750 io = ccc_env_thread_io(env);
2751 io->ci_obj = cl_i2info(inode)->lli_clob;
2752 io->ci_ignore_layout = ignore_layout;
2754 /* initialize parameters for sync */
2755 fio = &io->u.ci_fsync;
2756 fio->fi_capa = capa;
2757 fio->fi_start = start;
2759 fio->fi_fid = ll_inode2fid(inode);
2760 fio->fi_mode = mode;
2761 fio->fi_nr_written = 0;
2763 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2764 result = cl_io_loop(env, io);
2766 result = io->ci_result;
2768 result = fio->fi_nr_written;
2769 cl_io_fini(env, io);
2770 cl_env_nested_put(&nest, env);
2778 * When dentry is provided (the 'else' case), *file->f_dentry may be
2779 * null and dentry must be used directly rather than pulled from
2780 * *file->f_dentry as is done otherwise.
2783 #ifdef HAVE_FILE_FSYNC_4ARGS
2784 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2786 struct dentry *dentry = file->f_dentry;
2787 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2788 int ll_fsync(struct file *file, int datasync)
2790 struct dentry *dentry = file->f_dentry;
2792 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2795 struct inode *inode = dentry->d_inode;
2796 struct ll_inode_info *lli = ll_i2info(inode);
2797 struct ptlrpc_request *req;
2798 struct obd_capa *oc;
2802 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2803 PFID(ll_inode2fid(inode)), inode);
2804 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2806 #ifdef HAVE_FILE_FSYNC_4ARGS
2807 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2808 mutex_lock(&inode->i_mutex);
2810 /* fsync's caller has already called _fdata{sync,write}, we want
2811 * that IO to finish before calling the osc and mdc sync methods */
2812 rc = filemap_fdatawait(inode->i_mapping);
2815 /* catch async errors that were recorded back when async writeback
2816 * failed for pages in this mapping. */
2817 if (!S_ISDIR(inode->i_mode)) {
2818 err = lli->lli_async_rc;
2819 lli->lli_async_rc = 0;
2822 err = lov_read_and_clear_async_rc(lli->lli_clob);
2827 oc = ll_mdscapa_get(inode);
2828 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2834 ptlrpc_req_finished(req);
2836 if (datasync && S_ISREG(inode->i_mode)) {
2837 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2839 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2841 if (rc == 0 && err < 0)
2844 fd->fd_write_failed = true;
2846 fd->fd_write_failed = false;
2849 #ifdef HAVE_FILE_FSYNC_4ARGS
2850 mutex_unlock(&inode->i_mutex);
2855 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2857 struct inode *inode = file->f_dentry->d_inode;
2858 struct ll_sb_info *sbi = ll_i2sbi(inode);
2859 struct ldlm_enqueue_info einfo = {
2860 .ei_type = LDLM_FLOCK,
2861 .ei_cb_cp = ldlm_flock_completion_ast,
2862 .ei_cbdata = file_lock,
2864 struct md_op_data *op_data;
2865 struct lustre_handle lockh = {0};
2866 ldlm_policy_data_t flock = {{0}};
2872 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2873 PFID(ll_inode2fid(inode)), file_lock);
2875 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2877 if (file_lock->fl_flags & FL_FLOCK) {
2878 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2879 /* flocks are whole-file locks */
2880 flock.l_flock.end = OFFSET_MAX;
2881 /* For flocks owner is determined by the local file desctiptor*/
2882 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2883 } else if (file_lock->fl_flags & FL_POSIX) {
2884 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2885 flock.l_flock.start = file_lock->fl_start;
2886 flock.l_flock.end = file_lock->fl_end;
2890 flock.l_flock.pid = file_lock->fl_pid;
2892 /* Somewhat ugly workaround for svc lockd.
2893 * lockd installs custom fl_lmops->lm_compare_owner that checks
2894 * for the fl_owner to be the same (which it always is on local node
2895 * I guess between lockd processes) and then compares pid.
2896 * As such we assign pid to the owner field to make it all work,
2897 * conflict with normal locks is unlikely since pid space and
2898 * pointer space for current->files are not intersecting */
2899 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2900 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2902 switch (file_lock->fl_type) {
2904 einfo.ei_mode = LCK_PR;
2907 /* An unlock request may or may not have any relation to
2908 * existing locks so we may not be able to pass a lock handle
2909 * via a normal ldlm_lock_cancel() request. The request may even
2910 * unlock a byte range in the middle of an existing lock. In
2911 * order to process an unlock request we need all of the same
2912 * information that is given with a normal read or write record
2913 * lock request. To avoid creating another ldlm unlock (cancel)
2914 * message we'll treat a LCK_NL flock request as an unlock. */
2915 einfo.ei_mode = LCK_NL;
2918 einfo.ei_mode = LCK_PW;
2921 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2922 file_lock->fl_type);
2937 flags = LDLM_FL_BLOCK_NOWAIT;
2943 flags = LDLM_FL_TEST_LOCK;
2944 /* Save the old mode so that if the mode in the lock changes we
2945 * can decrement the appropriate reader or writer refcount. */
2946 file_lock->fl_type = einfo.ei_mode;
2949 CERROR("unknown fcntl lock command: %d\n", cmd);
2953 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2954 LUSTRE_OPC_ANY, NULL);
2955 if (IS_ERR(op_data))
2956 RETURN(PTR_ERR(op_data));
2958 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2959 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2960 flock.l_flock.pid, flags, einfo.ei_mode,
2961 flock.l_flock.start, flock.l_flock.end);
2963 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2964 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2966 if ((file_lock->fl_flags & FL_FLOCK) &&
2967 (rc == 0 || file_lock->fl_type == F_UNLCK))
2968 rc2 = flock_lock_file_wait(file, file_lock);
2969 if ((file_lock->fl_flags & FL_POSIX) &&
2970 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2971 !(flags & LDLM_FL_TEST_LOCK))
2972 rc2 = posix_lock_file_wait(file, file_lock);
2974 if (rc2 && file_lock->fl_type != F_UNLCK) {
2975 einfo.ei_mode = LCK_NL;
2976 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2977 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2981 ll_finish_md_op_data(op_data);
2986 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2994 * test if some locks matching bits and l_req_mode are acquired
2995 * - bits can be in different locks
2996 * - if found clear the common lock bits in *bits
2997 * - the bits not found, are kept in *bits
2999 * \param bits [IN] searched lock bits [IN]
3000 * \param l_req_mode [IN] searched lock mode
3001 * \retval boolean, true iff all bits are found
3003 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3005 struct lustre_handle lockh;
3006 ldlm_policy_data_t policy;
3007 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3008 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3017 fid = &ll_i2info(inode)->lli_fid;
3018 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3019 ldlm_lockname[mode]);
3021 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3022 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3023 policy.l_inodebits.bits = *bits & (1 << i);
3024 if (policy.l_inodebits.bits == 0)
3027 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3028 &policy, mode, &lockh)) {
3029 struct ldlm_lock *lock;
3031 lock = ldlm_handle2lock(&lockh);
3034 ~(lock->l_policy_data.l_inodebits.bits);
3035 LDLM_LOCK_PUT(lock);
3037 *bits &= ~policy.l_inodebits.bits;
3044 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3045 struct lustre_handle *lockh, __u64 flags,
3048 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3053 fid = &ll_i2info(inode)->lli_fid;
3054 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3056 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3057 fid, LDLM_IBITS, &policy, mode, lockh);
3062 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3064 /* Already unlinked. Just update nlink and return success */
3065 if (rc == -ENOENT) {
3067 /* This path cannot be hit for regular files unless in
3068 * case of obscure races, so no need to to validate
3070 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3072 } else if (rc != 0) {
3073 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
3074 ll_get_fsname(inode->i_sb, NULL, 0),
3075 PFID(ll_inode2fid(inode)), rc);
3081 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3084 struct inode *inode = dentry->d_inode;
3085 struct ptlrpc_request *req = NULL;
3086 struct obd_export *exp;
3090 LASSERT(inode != NULL);
3092 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3093 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3095 exp = ll_i2mdexp(inode);
3097 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3098 * But under CMD case, it caused some lock issues, should be fixed
3099 * with new CMD ibits lock. See bug 12718 */
3100 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3101 struct lookup_intent oit = { .it_op = IT_GETATTR };
3102 struct md_op_data *op_data;
3104 if (ibits == MDS_INODELOCK_LOOKUP)
3105 oit.it_op = IT_LOOKUP;
3107 /* Call getattr by fid, so do not provide name at all. */
3108 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3109 dentry->d_inode, NULL, 0, 0,
3110 LUSTRE_OPC_ANY, NULL);
3111 if (IS_ERR(op_data))
3112 RETURN(PTR_ERR(op_data));
3114 oit.it_create_mode |= M_CHECK_STALE;
3115 rc = md_intent_lock(exp, op_data, NULL, 0,
3116 /* we are not interested in name
3119 ll_md_blocking_ast, 0);
3120 ll_finish_md_op_data(op_data);
3121 oit.it_create_mode &= ~M_CHECK_STALE;
3123 rc = ll_inode_revalidate_fini(inode, rc);
3127 rc = ll_revalidate_it_finish(req, &oit, dentry);
3129 ll_intent_release(&oit);
3133 /* Unlinked? Unhash dentry, so it is not picked up later by
3134 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3135 here to preserve get_cwd functionality on 2.6.
3137 if (!dentry->d_inode->i_nlink)
3138 d_lustre_invalidate(dentry, 0);
3140 ll_lookup_finish_locks(&oit, dentry);
3141 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3142 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3143 obd_valid valid = OBD_MD_FLGETATTR;
3144 struct md_op_data *op_data;
3147 if (S_ISREG(inode->i_mode)) {
3148 rc = ll_get_max_mdsize(sbi, &ealen);
3151 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3154 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3155 0, ealen, LUSTRE_OPC_ANY,
3157 if (IS_ERR(op_data))
3158 RETURN(PTR_ERR(op_data));
3160 op_data->op_valid = valid;
3161 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3162 * capa for this inode. Because we only keep capas of dirs
3164 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3165 ll_finish_md_op_data(op_data);
3167 rc = ll_inode_revalidate_fini(inode, rc);
3171 rc = ll_prep_inode(&inode, req, NULL, NULL);
3174 ptlrpc_req_finished(req);
3178 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3181 struct inode *inode = dentry->d_inode;
3185 rc = __ll_inode_revalidate_it(dentry, it, ibits);
3189 /* if object isn't regular file, don't validate size */
3190 if (!S_ISREG(inode->i_mode)) {
3191 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3192 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3193 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3195 /* In case of restore, the MDT has the right size and has
3196 * already send it back without granting the layout lock,
3197 * inode is up-to-date so glimpse is useless.
3198 * Also to glimpse we need the layout, in case of a running
3199 * restore the MDT holds the layout lock so the glimpse will
3200 * block up to the end of restore (getattr will block)
3202 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3203 rc = ll_glimpse_size(inode);
3208 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
3209 struct lookup_intent *it, struct kstat *stat)
3211 struct inode *inode = de->d_inode;
3212 struct ll_sb_info *sbi = ll_i2sbi(inode);
3213 struct ll_inode_info *lli = ll_i2info(inode);
3216 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3217 MDS_INODELOCK_LOOKUP);
3218 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3223 stat->dev = inode->i_sb->s_dev;
3224 if (ll_need_32bit_api(sbi))
3225 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3227 stat->ino = inode->i_ino;
3228 stat->mode = inode->i_mode;
3229 stat->nlink = inode->i_nlink;
3230 stat->uid = inode->i_uid;
3231 stat->gid = inode->i_gid;
3232 stat->rdev = inode->i_rdev;
3233 stat->atime = inode->i_atime;
3234 stat->mtime = inode->i_mtime;
3235 stat->ctime = inode->i_ctime;
3236 stat->blksize = 1 << inode->i_blkbits;
3238 stat->size = i_size_read(inode);
3239 stat->blocks = inode->i_blocks;
3243 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3245 struct lookup_intent it = { .it_op = IT_GETATTR };
3247 return ll_getattr_it(mnt, de, &it, stat);
3250 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3251 __u64 start, __u64 len)
3255 struct ll_user_fiemap *fiemap;
3256 unsigned int extent_count = fieinfo->fi_extents_max;
3258 num_bytes = sizeof(*fiemap) + (extent_count *
3259 sizeof(struct ll_fiemap_extent));
3260 OBD_ALLOC_LARGE(fiemap, num_bytes);
3265 fiemap->fm_flags = fieinfo->fi_flags;
3266 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3267 fiemap->fm_start = start;
3268 fiemap->fm_length = len;
3269 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3270 sizeof(struct ll_fiemap_extent));
3272 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3274 fieinfo->fi_flags = fiemap->fm_flags;
3275 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3276 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3277 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3279 OBD_FREE_LARGE(fiemap, num_bytes);
3283 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3285 struct ll_inode_info *lli = ll_i2info(inode);
3286 struct posix_acl *acl = NULL;
3289 spin_lock(&lli->lli_lock);
3290 /* VFS' acl_permission_check->check_acl will release the refcount */
3291 acl = posix_acl_dup(lli->lli_posix_acl);
3292 spin_unlock(&lli->lli_lock);
3297 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3299 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3300 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3302 ll_check_acl(struct inode *inode, int mask)
3305 # ifdef CONFIG_FS_POSIX_ACL
3306 struct posix_acl *acl;
3310 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3311 if (flags & IPERM_FLAG_RCU)
3314 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3319 rc = posix_acl_permission(inode, acl, mask);
3320 posix_acl_release(acl);
3323 # else /* !CONFIG_FS_POSIX_ACL */
3325 # endif /* CONFIG_FS_POSIX_ACL */
3327 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3329 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3330 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3332 # ifdef HAVE_INODE_PERMISION_2ARGS
3333 int ll_inode_permission(struct inode *inode, int mask)
3335 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3342 #ifdef MAY_NOT_BLOCK
3343 if (mask & MAY_NOT_BLOCK)
3345 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3346 if (flags & IPERM_FLAG_RCU)
3350 /* as root inode are NOT getting validated in lookup operation,
3351 * need to do it before permission check. */
3353 if (inode == inode->i_sb->s_root->d_inode) {
3354 struct lookup_intent it = { .it_op = IT_LOOKUP };
3356 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3357 MDS_INODELOCK_LOOKUP);
3362 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3363 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3365 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3366 return lustre_check_remote_perm(inode, mask);
3368 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3369 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3374 /* -o localflock - only provides locally consistent flock locks */
3375 struct file_operations ll_file_operations = {
3376 .read = ll_file_read,
3377 .aio_read = ll_file_aio_read,
3378 .write = ll_file_write,
3379 .aio_write = ll_file_aio_write,
3380 .unlocked_ioctl = ll_file_ioctl,
3381 .open = ll_file_open,
3382 .release = ll_file_release,
3383 .mmap = ll_file_mmap,
3384 .llseek = ll_file_seek,
3385 .splice_read = ll_file_splice_read,
3390 struct file_operations ll_file_operations_flock = {
3391 .read = ll_file_read,
3392 .aio_read = ll_file_aio_read,
3393 .write = ll_file_write,
3394 .aio_write = ll_file_aio_write,
3395 .unlocked_ioctl = ll_file_ioctl,
3396 .open = ll_file_open,
3397 .release = ll_file_release,
3398 .mmap = ll_file_mmap,
3399 .llseek = ll_file_seek,
3400 .splice_read = ll_file_splice_read,
3403 .flock = ll_file_flock,
3404 .lock = ll_file_flock
3407 /* These are for -o noflock - to return ENOSYS on flock calls */
3408 struct file_operations ll_file_operations_noflock = {
3409 .read = ll_file_read,
3410 .aio_read = ll_file_aio_read,
3411 .write = ll_file_write,
3412 .aio_write = ll_file_aio_write,
3413 .unlocked_ioctl = ll_file_ioctl,
3414 .open = ll_file_open,
3415 .release = ll_file_release,
3416 .mmap = ll_file_mmap,
3417 .llseek = ll_file_seek,
3418 .splice_read = ll_file_splice_read,
3421 .flock = ll_file_noflock,
3422 .lock = ll_file_noflock
3425 struct inode_operations ll_file_inode_operations = {
3426 .setattr = ll_setattr,
3427 .getattr = ll_getattr,
3428 .permission = ll_inode_permission,
3429 .setxattr = ll_setxattr,
3430 .getxattr = ll_getxattr,
3431 .listxattr = ll_listxattr,
3432 .removexattr = ll_removexattr,
3433 .fiemap = ll_fiemap,
3434 #ifdef HAVE_IOP_GET_ACL
3435 .get_acl = ll_get_acl,
3439 /* dynamic ioctl number support routins */
3440 static struct llioc_ctl_data {
3441 struct rw_semaphore ioc_sem;
3442 cfs_list_t ioc_head;
3444 __RWSEM_INITIALIZER(llioc.ioc_sem),
3445 CFS_LIST_HEAD_INIT(llioc.ioc_head)
3450 cfs_list_t iocd_list;
3451 unsigned int iocd_size;
3452 llioc_callback_t iocd_cb;
3453 unsigned int iocd_count;
3454 unsigned int iocd_cmd[0];
3457 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3460 struct llioc_data *in_data = NULL;
3463 if (cb == NULL || cmd == NULL ||
3464 count > LLIOC_MAX_CMD || count < 0)
3467 size = sizeof(*in_data) + count * sizeof(unsigned int);
3468 OBD_ALLOC(in_data, size);
3469 if (in_data == NULL)
3472 memset(in_data, 0, sizeof(*in_data));
3473 in_data->iocd_size = size;
3474 in_data->iocd_cb = cb;
3475 in_data->iocd_count = count;
3476 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3478 down_write(&llioc.ioc_sem);
3479 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3480 up_write(&llioc.ioc_sem);
3485 void ll_iocontrol_unregister(void *magic)
3487 struct llioc_data *tmp;
3492 down_write(&llioc.ioc_sem);
3493 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3495 unsigned int size = tmp->iocd_size;
3497 cfs_list_del(&tmp->iocd_list);
3498 up_write(&llioc.ioc_sem);
3500 OBD_FREE(tmp, size);
3504 up_write(&llioc.ioc_sem);
3506 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3509 EXPORT_SYMBOL(ll_iocontrol_register);
3510 EXPORT_SYMBOL(ll_iocontrol_unregister);
3512 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3513 unsigned int cmd, unsigned long arg, int *rcp)
3515 enum llioc_iter ret = LLIOC_CONT;
3516 struct llioc_data *data;
3517 int rc = -EINVAL, i;
3519 down_read(&llioc.ioc_sem);
3520 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3521 for (i = 0; i < data->iocd_count; i++) {
3522 if (cmd != data->iocd_cmd[i])
3525 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3529 if (ret == LLIOC_STOP)
3532 up_read(&llioc.ioc_sem);
3539 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3541 struct ll_inode_info *lli = ll_i2info(inode);
3542 struct cl_env_nest nest;
3547 if (lli->lli_clob == NULL)
3550 env = cl_env_nested_get(&nest);
3552 RETURN(PTR_ERR(env));
3554 result = cl_conf_set(env, lli->lli_clob, conf);
3555 cl_env_nested_put(&nest, env);
3557 if (conf->coc_opc == OBJECT_CONF_SET) {
3558 struct ldlm_lock *lock = conf->coc_lock;
3560 LASSERT(lock != NULL);
3561 LASSERT(ldlm_has_layout(lock));
3563 /* it can only be allowed to match after layout is
3564 * applied to inode otherwise false layout would be
3565 * seen. Applying layout shoud happen before dropping
3566 * the intent lock. */
3567 ldlm_lock_allow_match(lock);
3573 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3574 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3577 struct ll_sb_info *sbi = ll_i2sbi(inode);
3578 struct obd_capa *oc;
3579 struct ptlrpc_request *req;
3580 struct mdt_body *body;
3587 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3588 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3589 lock->l_lvb_data, lock->l_lvb_len);
3591 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3594 /* if layout lock was granted right away, the layout is returned
3595 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3596 * blocked and then granted via completion ast, we have to fetch
3597 * layout here. Please note that we can't use the LVB buffer in
3598 * completion AST because it doesn't have a large enough buffer */
3599 oc = ll_mdscapa_get(inode);
3600 rc = ll_get_max_mdsize(sbi, &lmmsize);
3602 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3603 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3609 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3610 if (body == NULL || body->eadatasize > lmmsize)
3611 GOTO(out, rc = -EPROTO);
3613 lmmsize = body->eadatasize;
3614 if (lmmsize == 0) /* empty layout */
3617 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3619 GOTO(out, rc = -EFAULT);
3621 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3622 if (lvbdata == NULL)
3623 GOTO(out, rc = -ENOMEM);
3625 memcpy(lvbdata, lmm, lmmsize);
3626 lock_res_and_lock(lock);
3627 if (lock->l_lvb_data != NULL)
3628 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3630 lock->l_lvb_data = lvbdata;
3631 lock->l_lvb_len = lmmsize;
3632 unlock_res_and_lock(lock);
3637 ptlrpc_req_finished(req);
3642 * Apply the layout to the inode. Layout lock is held and will be released
3645 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3646 struct inode *inode, __u32 *gen, bool reconf)
3648 struct ll_inode_info *lli = ll_i2info(inode);
3649 struct ll_sb_info *sbi = ll_i2sbi(inode);
3650 struct ldlm_lock *lock;
3651 struct lustre_md md = { NULL };
3652 struct cl_object_conf conf;
3655 bool wait_layout = false;
3658 LASSERT(lustre_handle_is_used(lockh));
3660 lock = ldlm_handle2lock(lockh);
3661 LASSERT(lock != NULL);
3662 LASSERT(ldlm_has_layout(lock));
3664 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d\n",
3665 PFID(&lli->lli_fid), inode, reconf);
3667 /* in case this is a caching lock and reinstate with new inode */
3668 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3670 lock_res_and_lock(lock);
3671 lvb_ready = ldlm_is_lvb_ready(lock);
3672 unlock_res_and_lock(lock);
3673 /* checking lvb_ready is racy but this is okay. The worst case is
3674 * that multi processes may configure the file on the same time. */
3676 if (lvb_ready || !reconf) {
3679 /* layout_gen must be valid if layout lock is not
3680 * cancelled and stripe has already set */
3681 *gen = lli->lli_layout_gen;
3687 rc = ll_layout_fetch(inode, lock);
3691 /* for layout lock, lmm is returned in lock's lvb.
3692 * lvb_data is immutable if the lock is held so it's safe to access it
3693 * without res lock. See the description in ldlm_lock_decref_internal()
3694 * for the condition to free lvb_data of layout lock */
3695 if (lock->l_lvb_data != NULL) {
3696 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3697 lock->l_lvb_data, lock->l_lvb_len);
3699 *gen = LL_LAYOUT_GEN_EMPTY;
3701 *gen = md.lsm->lsm_layout_gen;
3704 CERROR("%s: file "DFID" unpackmd error: %d\n",
3705 ll_get_fsname(inode->i_sb, NULL, 0),
3706 PFID(&lli->lli_fid), rc);
3712 /* set layout to file. Unlikely this will fail as old layout was
3713 * surely eliminated */
3714 memset(&conf, 0, sizeof conf);
3715 conf.coc_opc = OBJECT_CONF_SET;
3716 conf.coc_inode = inode;
3717 conf.coc_lock = lock;
3718 conf.u.coc_md = &md;
3719 rc = ll_layout_conf(inode, &conf);
3722 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3724 /* refresh layout failed, need to wait */
3725 wait_layout = rc == -EBUSY;
3729 LDLM_LOCK_PUT(lock);
3730 ldlm_lock_decref(lockh, mode);
3732 /* wait for IO to complete if it's still being used. */
3734 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3735 ll_get_fsname(inode->i_sb, NULL, 0),
3736 PFID(&lli->lli_fid), inode);
3738 memset(&conf, 0, sizeof conf);
3739 conf.coc_opc = OBJECT_CONF_WAIT;
3740 conf.coc_inode = inode;
3741 rc = ll_layout_conf(inode, &conf);
3745 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3746 ll_get_fsname(inode->i_sb, NULL, 0),
3747 PFID(&lli->lli_fid), rc);
3753 * This function checks if there exists a LAYOUT lock on the client side,
3754 * or enqueues it if it doesn't have one in cache.
3756 * This function will not hold layout lock so it may be revoked any time after
3757 * this function returns. Any operations depend on layout should be redone
3760 * This function should be called before lov_io_init() to get an uptodate
3761 * layout version, the caller should save the version number and after IO
3762 * is finished, this function should be called again to verify that layout
3763 * is not changed during IO time.
3765 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3767 struct ll_inode_info *lli = ll_i2info(inode);
3768 struct ll_sb_info *sbi = ll_i2sbi(inode);
3769 struct md_op_data *op_data;
3770 struct lookup_intent it;
3771 struct lustre_handle lockh;
3773 struct ldlm_enqueue_info einfo = {
3774 .ei_type = LDLM_IBITS,
3776 .ei_cb_bl = ll_md_blocking_ast,
3777 .ei_cb_cp = ldlm_completion_ast,
3782 *gen = lli->lli_layout_gen;
3783 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3787 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3788 LASSERT(S_ISREG(inode->i_mode));
3790 /* mostly layout lock is caching on the local side, so try to match
3791 * it before grabbing layout lock mutex. */
3792 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3793 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3794 if (mode != 0) { /* hit cached lock */
3795 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3799 /* better hold lli_layout_mutex to try again otherwise
3800 * it will have starvation problem. */
3803 /* take layout lock mutex to enqueue layout lock exclusively. */
3804 mutex_lock(&lli->lli_layout_mutex);
3807 /* try again. Maybe somebody else has done this. */
3808 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3809 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3810 if (mode != 0) { /* hit cached lock */
3811 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3815 mutex_unlock(&lli->lli_layout_mutex);
3819 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3820 0, 0, LUSTRE_OPC_ANY, NULL);
3821 if (IS_ERR(op_data)) {
3822 mutex_unlock(&lli->lli_layout_mutex);
3823 RETURN(PTR_ERR(op_data));
3826 /* have to enqueue one */
3827 memset(&it, 0, sizeof(it));
3828 it.it_op = IT_LAYOUT;
3829 lockh.cookie = 0ULL;
3831 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)\n",
3832 ll_get_fsname(inode->i_sb, NULL, 0),
3833 PFID(&lli->lli_fid), inode);
3835 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3837 if (it.d.lustre.it_data != NULL)
3838 ptlrpc_req_finished(it.d.lustre.it_data);
3839 it.d.lustre.it_data = NULL;
3841 ll_finish_md_op_data(op_data);
3843 mode = it.d.lustre.it_lock_mode;
3844 it.d.lustre.it_lock_mode = 0;
3845 ll_intent_drop_lock(&it);
3848 /* set lock data in case this is a new lock */
3849 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3850 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3854 mutex_unlock(&lli->lli_layout_mutex);
3860 * This function send a restore request to the MDT
3862 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3864 struct hsm_user_request *hur;
3868 len = sizeof(struct hsm_user_request) +
3869 sizeof(struct hsm_user_item);
3870 OBD_ALLOC(hur, len);
3874 hur->hur_request.hr_action = HUA_RESTORE;
3875 hur->hur_request.hr_archive_id = 0;
3876 hur->hur_request.hr_flags = 0;
3877 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3878 sizeof(hur->hur_user_item[0].hui_fid));
3879 hur->hur_user_item[0].hui_extent.offset = offset;
3880 hur->hur_user_item[0].hui_extent.length = length;
3881 hur->hur_request.hr_itemcount = 1;
3882 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,