4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
61 fd->fd_write_failed = false;
66 static void ll_file_data_put(struct ll_file_data *fd)
69 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
72 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
73 struct lustre_handle *fh)
75 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
76 op_data->op_attr.ia_mode = inode->i_mode;
77 op_data->op_attr.ia_atime = inode->i_atime;
78 op_data->op_attr.ia_mtime = inode->i_mtime;
79 op_data->op_attr.ia_ctime = inode->i_ctime;
80 op_data->op_attr.ia_size = i_size_read(inode);
81 op_data->op_attr_blocks = inode->i_blocks;
82 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
83 ll_inode_to_ext_flags(inode->i_flags);
84 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
86 op_data->op_handle = *fh;
87 op_data->op_capa1 = ll_mdscapa_get(inode);
89 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
90 op_data->op_bias |= MDS_DATA_MODIFIED;
94 * Closes the IO epoch and packs all the attributes into @op_data for
97 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
98 struct obd_client_handle *och)
102 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
103 ATTR_MTIME | ATTR_MTIME_SET |
104 ATTR_CTIME | ATTR_CTIME_SET;
106 if (!(och->och_flags & FMODE_WRITE))
109 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
110 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
112 ll_ioepoch_close(inode, op_data, &och, 0);
115 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
116 ll_prep_md_op_data(op_data, inode, NULL, NULL,
117 0, 0, LUSTRE_OPC_ANY, NULL);
121 static int ll_close_inode_openhandle(struct obd_export *md_exp,
123 struct obd_client_handle *och,
124 const __u64 *data_version)
126 struct obd_export *exp = ll_i2mdexp(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
129 struct obd_device *obd = class_exp2obd(exp);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle "LPX64"\n",
140 ll_i2mdexp(inode)->exp_handle.h_cookie);
144 OBD_ALLOC_PTR(op_data);
146 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
148 ll_prepare_close(inode, op_data, och);
149 if (data_version != NULL) {
150 /* Pass in data_version implies release. */
151 op_data->op_bias |= MDS_HSM_RELEASE;
152 op_data->op_data_version = *data_version;
153 op_data->op_lease_handle = och->och_lease_handle;
154 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
156 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
157 rc = md_close(md_exp, op_data, och->och_mod, &req);
159 /* This close must have the epoch closed. */
160 LASSERT(epoch_close);
161 /* MDS has instructed us to obtain Size-on-MDS attribute from
162 * OSTs and send setattr to back to MDS. */
163 rc = ll_som_update(inode, op_data);
165 CERROR("%s: inode "DFID" mdc Size-on-MDS update"
166 " failed: rc = %d\n",
167 ll_i2mdexp(inode)->exp_obd->obd_name,
168 PFID(ll_inode2fid(inode)), rc);
172 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
173 ll_i2mdexp(inode)->exp_obd->obd_name,
174 PFID(ll_inode2fid(inode)), rc);
177 /* DATA_MODIFIED flag was successfully sent on close, cancel data
178 * modification flag. */
179 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
180 struct ll_inode_info *lli = ll_i2info(inode);
182 spin_lock(&lli->lli_lock);
183 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
184 spin_unlock(&lli->lli_lock);
188 rc = ll_objects_destroy(req, inode);
190 CERROR("%s: inode "DFID
191 " ll_objects destroy: rc = %d\n",
192 ll_i2mdexp(inode)->exp_obd->obd_name,
193 PFID(ll_inode2fid(inode)), rc);
196 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
197 struct mdt_body *body;
198 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
199 if (!(body->valid & OBD_MD_FLRELEASED))
203 ll_finish_md_op_data(op_data);
207 if (exp_connect_som(exp) && !epoch_close &&
208 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
209 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
211 md_clear_open_replay_data(md_exp, och);
212 /* Free @och if it is not waiting for DONE_WRITING. */
213 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
216 if (req) /* This is close request */
217 ptlrpc_req_finished(req);
221 int ll_md_real_close(struct inode *inode, int flags)
223 struct ll_inode_info *lli = ll_i2info(inode);
224 struct obd_client_handle **och_p;
225 struct obd_client_handle *och;
230 if (flags & FMODE_WRITE) {
231 och_p = &lli->lli_mds_write_och;
232 och_usecount = &lli->lli_open_fd_write_count;
233 } else if (flags & FMODE_EXEC) {
234 och_p = &lli->lli_mds_exec_och;
235 och_usecount = &lli->lli_open_fd_exec_count;
237 LASSERT(flags & FMODE_READ);
238 och_p = &lli->lli_mds_read_och;
239 och_usecount = &lli->lli_open_fd_read_count;
242 mutex_lock(&lli->lli_och_mutex);
243 if (*och_usecount) { /* There are still users of this handle, so
245 mutex_unlock(&lli->lli_och_mutex);
250 mutex_unlock(&lli->lli_och_mutex);
252 if (och) { /* There might be a race and somebody have freed this och
254 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
261 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
264 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
265 struct ll_inode_info *lli = ll_i2info(inode);
269 /* clear group lock, if present */
270 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
271 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
273 if (fd->fd_lease_och != NULL) {
276 /* Usually the lease is not released when the
277 * application crashed, we need to release here. */
278 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
279 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
280 PFID(&lli->lli_fid), rc, lease_broken);
282 fd->fd_lease_och = NULL;
285 if (fd->fd_och != NULL) {
286 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
291 /* Let's see if we have good enough OPEN lock on the file and if
292 we can skip talking to MDS */
293 if (file->f_dentry->d_inode) { /* Can this ever be false? */
295 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
296 struct lustre_handle lockh;
297 struct inode *inode = file->f_dentry->d_inode;
298 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
300 mutex_lock(&lli->lli_och_mutex);
301 if (fd->fd_omode & FMODE_WRITE) {
303 LASSERT(lli->lli_open_fd_write_count);
304 lli->lli_open_fd_write_count--;
305 } else if (fd->fd_omode & FMODE_EXEC) {
307 LASSERT(lli->lli_open_fd_exec_count);
308 lli->lli_open_fd_exec_count--;
311 LASSERT(lli->lli_open_fd_read_count);
312 lli->lli_open_fd_read_count--;
314 mutex_unlock(&lli->lli_och_mutex);
316 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
317 LDLM_IBITS, &policy, lockmode,
319 rc = ll_md_real_close(file->f_dentry->d_inode,
323 CERROR("Releasing a file %p with negative dentry %p. Name %s",
324 file, file->f_dentry, file->f_dentry->d_name.name);
328 LUSTRE_FPRIVATE(file) = NULL;
329 ll_file_data_put(fd);
330 ll_capa_close(inode);
335 /* While this returns an error code, fput() the caller does not, so we need
336 * to make every effort to clean up all of our state here. Also, applications
337 * rarely check close errors and even if an error is returned they will not
338 * re-try the close call.
340 int ll_file_release(struct inode *inode, struct file *file)
342 struct ll_file_data *fd;
343 struct ll_sb_info *sbi = ll_i2sbi(inode);
344 struct ll_inode_info *lli = ll_i2info(inode);
348 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
349 PFID(ll_inode2fid(inode)), inode);
351 #ifdef CONFIG_FS_POSIX_ACL
352 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
353 inode == inode->i_sb->s_root->d_inode) {
354 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
357 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
358 fd->fd_flags &= ~LL_FILE_RMTACL;
359 rct_del(&sbi->ll_rct, current_pid());
360 et_search_free(&sbi->ll_et, current_pid());
365 if (inode->i_sb->s_root != file->f_dentry)
366 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
367 fd = LUSTRE_FPRIVATE(file);
370 /* The last ref on @file, maybe not the the owner pid of statahead.
371 * Different processes can open the same dir, "ll_opendir_key" means:
372 * it is me that should stop the statahead thread. */
373 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
374 lli->lli_opendir_pid != 0)
375 ll_stop_statahead(inode, lli->lli_opendir_key);
377 if (inode->i_sb->s_root == file->f_dentry) {
378 LUSTRE_FPRIVATE(file) = NULL;
379 ll_file_data_put(fd);
383 if (!S_ISDIR(inode->i_mode)) {
384 lov_read_and_clear_async_rc(lli->lli_clob);
385 lli->lli_async_rc = 0;
388 rc = ll_md_close(sbi->ll_md_exp, inode, file);
390 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
391 libcfs_debug_dumplog();
396 static int ll_intent_file_open(struct file *file, void *lmm,
397 int lmmsize, struct lookup_intent *itp)
399 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
400 struct dentry *parent = file->f_dentry->d_parent;
401 const char *name = file->f_dentry->d_name.name;
402 const int len = file->f_dentry->d_name.len;
403 struct md_op_data *op_data;
404 struct ptlrpc_request *req;
405 __u32 opc = LUSTRE_OPC_ANY;
412 /* Usually we come here only for NFSD, and we want open lock.
413 But we can also get here with pre 2.6.15 patchless kernels, and in
414 that case that lock is also ok */
415 /* We can also get here if there was cached open handle in revalidate_it
416 * but it disappeared while we were getting from there to ll_file_open.
417 * But this means this file was closed and immediatelly opened which
418 * makes a good candidate for using OPEN lock */
419 /* If lmmsize & lmm are not 0, we are just setting stripe info
420 * parameters. No need for the open lock */
421 if (lmm == NULL && lmmsize == 0) {
422 itp->it_flags |= MDS_OPEN_LOCK;
423 if (itp->it_flags & FMODE_WRITE)
424 opc = LUSTRE_OPC_CREATE;
427 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
428 file->f_dentry->d_inode, name, len,
431 RETURN(PTR_ERR(op_data));
433 itp->it_flags |= MDS_OPEN_BY_FID;
434 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
435 0 /*unused */, &req, ll_md_blocking_ast, 0);
436 ll_finish_md_op_data(op_data);
438 /* reason for keep own exit path - don`t flood log
439 * with messages with -ESTALE errors.
441 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
442 it_open_error(DISP_OPEN_OPEN, itp))
444 ll_release_openhandle(file->f_dentry, itp);
448 if (it_disposition(itp, DISP_LOOKUP_NEG))
449 GOTO(out, rc = -ENOENT);
451 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
452 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
453 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
457 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
458 if (!rc && itp->d.lustre.it_lock_mode)
459 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
463 ptlrpc_req_finished(req);
464 ll_intent_drop_lock(itp);
470 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
471 * not believe attributes if a few ioepoch holders exist. Attributes for
472 * previous ioepoch if new one is opened are also skipped by MDS.
474 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
476 if (ioepoch && lli->lli_ioepoch != ioepoch) {
477 lli->lli_ioepoch = ioepoch;
478 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
479 ioepoch, PFID(&lli->lli_fid));
483 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
484 struct obd_client_handle *och)
486 struct ptlrpc_request *req = it->d.lustre.it_data;
487 struct mdt_body *body;
489 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
490 och->och_fh = body->handle;
491 och->och_fid = body->fid1;
492 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
493 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
494 och->och_flags = it->it_flags;
496 return md_set_open_replay_data(md_exp, och, it);
499 int ll_local_open(struct file *file, struct lookup_intent *it,
500 struct ll_file_data *fd, struct obd_client_handle *och)
502 struct inode *inode = file->f_dentry->d_inode;
503 struct ll_inode_info *lli = ll_i2info(inode);
506 LASSERT(!LUSTRE_FPRIVATE(file));
511 struct ptlrpc_request *req = it->d.lustre.it_data;
512 struct mdt_body *body;
515 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
519 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
520 ll_ioepoch_open(lli, body->ioepoch);
523 LUSTRE_FPRIVATE(file) = fd;
524 ll_readahead_init(inode, &fd->fd_ras);
525 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
530 /* Open a file, and (for the very first open) create objects on the OSTs at
531 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
532 * creation or open until ll_lov_setstripe() ioctl is called.
534 * If we already have the stripe MD locally then we don't request it in
535 * md_open(), by passing a lmm_size = 0.
537 * It is up to the application to ensure no other processes open this file
538 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
539 * used. We might be able to avoid races of that sort by getting lli_open_sem
540 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
541 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
543 int ll_file_open(struct inode *inode, struct file *file)
545 struct ll_inode_info *lli = ll_i2info(inode);
546 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
547 .it_flags = file->f_flags };
548 struct obd_client_handle **och_p = NULL;
549 __u64 *och_usecount = NULL;
550 struct ll_file_data *fd;
551 int rc = 0, opendir_set = 0;
554 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
555 PFID(ll_inode2fid(inode)), inode, file->f_flags);
557 it = file->private_data; /* XXX: compat macro */
558 file->private_data = NULL; /* prevent ll_local_open assertion */
560 fd = ll_file_data_get();
562 GOTO(out_openerr, rc = -ENOMEM);
565 if (S_ISDIR(inode->i_mode)) {
566 spin_lock(&lli->lli_sa_lock);
567 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
568 lli->lli_opendir_pid == 0) {
569 lli->lli_opendir_key = fd;
570 lli->lli_opendir_pid = current_pid();
573 spin_unlock(&lli->lli_sa_lock);
576 if (inode->i_sb->s_root == file->f_dentry) {
577 LUSTRE_FPRIVATE(file) = fd;
581 if (!it || !it->d.lustre.it_disposition) {
582 /* Convert f_flags into access mode. We cannot use file->f_mode,
583 * because everything but O_ACCMODE mask was stripped from
585 if ((oit.it_flags + 1) & O_ACCMODE)
587 if (file->f_flags & O_TRUNC)
588 oit.it_flags |= FMODE_WRITE;
590 /* kernel only call f_op->open in dentry_open. filp_open calls
591 * dentry_open after call to open_namei that checks permissions.
592 * Only nfsd_open call dentry_open directly without checking
593 * permissions and because of that this code below is safe. */
594 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
595 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
597 /* We do not want O_EXCL here, presumably we opened the file
598 * already? XXX - NFS implications? */
599 oit.it_flags &= ~O_EXCL;
601 /* bug20584, if "it_flags" contains O_CREAT, the file will be
602 * created if necessary, then "IT_CREAT" should be set to keep
603 * consistent with it */
604 if (oit.it_flags & O_CREAT)
605 oit.it_op |= IT_CREAT;
611 /* Let's see if we have file open on MDS already. */
612 if (it->it_flags & FMODE_WRITE) {
613 och_p = &lli->lli_mds_write_och;
614 och_usecount = &lli->lli_open_fd_write_count;
615 } else if (it->it_flags & FMODE_EXEC) {
616 och_p = &lli->lli_mds_exec_och;
617 och_usecount = &lli->lli_open_fd_exec_count;
619 och_p = &lli->lli_mds_read_och;
620 och_usecount = &lli->lli_open_fd_read_count;
623 mutex_lock(&lli->lli_och_mutex);
624 if (*och_p) { /* Open handle is present */
625 if (it_disposition(it, DISP_OPEN_OPEN)) {
626 /* Well, there's extra open request that we do not need,
627 let's close it somehow. This will decref request. */
628 rc = it_open_error(DISP_OPEN_OPEN, it);
630 mutex_unlock(&lli->lli_och_mutex);
631 GOTO(out_openerr, rc);
634 ll_release_openhandle(file->f_dentry, it);
638 rc = ll_local_open(file, it, fd, NULL);
641 mutex_unlock(&lli->lli_och_mutex);
642 GOTO(out_openerr, rc);
645 LASSERT(*och_usecount == 0);
646 if (!it->d.lustre.it_disposition) {
647 /* We cannot just request lock handle now, new ELC code
648 means that one of other OPEN locks for this file
649 could be cancelled, and since blocking ast handler
650 would attempt to grab och_mutex as well, that would
651 result in a deadlock */
652 mutex_unlock(&lli->lli_och_mutex);
653 it->it_create_mode |= M_CHECK_STALE;
654 rc = ll_intent_file_open(file, NULL, 0, it);
655 it->it_create_mode &= ~M_CHECK_STALE;
657 GOTO(out_openerr, rc);
661 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
663 GOTO(out_och_free, rc = -ENOMEM);
667 /* md_intent_lock() didn't get a request ref if there was an
668 * open error, so don't do cleanup on the request here
670 /* XXX (green): Should not we bail out on any error here, not
671 * just open error? */
672 rc = it_open_error(DISP_OPEN_OPEN, it);
674 GOTO(out_och_free, rc);
676 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
677 "inode %p: disposition %x, status %d\n", inode,
678 it_disposition(it, ~0), it->d.lustre.it_status);
680 rc = ll_local_open(file, it, fd, *och_p);
682 GOTO(out_och_free, rc);
684 mutex_unlock(&lli->lli_och_mutex);
687 /* Must do this outside lli_och_mutex lock to prevent deadlock where
688 different kind of OPEN lock for this same inode gets cancelled
689 by ldlm_cancel_lru */
690 if (!S_ISREG(inode->i_mode))
691 GOTO(out_och_free, rc);
695 if (!lli->lli_has_smd) {
696 if (file->f_flags & O_LOV_DELAY_CREATE ||
697 !(file->f_mode & FMODE_WRITE)) {
698 CDEBUG(D_INODE, "object creation was delayed\n");
699 GOTO(out_och_free, rc);
702 file->f_flags &= ~O_LOV_DELAY_CREATE;
703 GOTO(out_och_free, rc);
707 if (och_p && *och_p) {
708 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
709 *och_p = NULL; /* OBD_FREE writes some magic there */
712 mutex_unlock(&lli->lli_och_mutex);
715 if (opendir_set != 0)
716 ll_stop_statahead(inode, lli->lli_opendir_key);
718 ll_file_data_put(fd);
720 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
723 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
724 ptlrpc_req_finished(it->d.lustre.it_data);
725 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
731 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
732 struct ldlm_lock_desc *desc, void *data, int flag)
735 struct lustre_handle lockh;
739 case LDLM_CB_BLOCKING:
740 ldlm_lock2handle(lock, &lockh);
741 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
743 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
747 case LDLM_CB_CANCELING:
755 * Acquire a lease and open the file.
757 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
758 fmode_t fmode, __u64 open_flags)
760 struct lookup_intent it = { .it_op = IT_OPEN };
761 struct ll_sb_info *sbi = ll_i2sbi(inode);
762 struct md_op_data *op_data;
763 struct ptlrpc_request *req;
764 struct lustre_handle old_handle = { 0 };
765 struct obd_client_handle *och = NULL;
770 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
771 RETURN(ERR_PTR(-EINVAL));
774 struct ll_inode_info *lli = ll_i2info(inode);
775 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
776 struct obd_client_handle **och_p;
779 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
780 RETURN(ERR_PTR(-EPERM));
782 /* Get the openhandle of the file */
784 mutex_lock(&lli->lli_och_mutex);
785 if (fd->fd_lease_och != NULL) {
786 mutex_unlock(&lli->lli_och_mutex);
790 if (fd->fd_och == NULL) {
791 if (file->f_mode & FMODE_WRITE) {
792 LASSERT(lli->lli_mds_write_och != NULL);
793 och_p = &lli->lli_mds_write_och;
794 och_usecount = &lli->lli_open_fd_write_count;
796 LASSERT(lli->lli_mds_read_och != NULL);
797 och_p = &lli->lli_mds_read_och;
798 och_usecount = &lli->lli_open_fd_read_count;
800 if (*och_usecount == 1) {
807 mutex_unlock(&lli->lli_och_mutex);
808 if (rc < 0) /* more than 1 opener */
811 LASSERT(fd->fd_och != NULL);
812 old_handle = fd->fd_och->och_fh;
817 RETURN(ERR_PTR(-ENOMEM));
819 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
820 LUSTRE_OPC_ANY, NULL);
822 GOTO(out, rc = PTR_ERR(op_data));
824 /* To tell the MDT this openhandle is from the same owner */
825 op_data->op_handle = old_handle;
827 it.it_flags = fmode | open_flags;
828 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
829 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
830 ll_md_blocking_lease_ast,
831 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
832 * it can be cancelled which may mislead applications that the lease is
834 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
835 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
836 * doesn't deal with openhandle, so normal openhandle will be leaked. */
837 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
838 ll_finish_md_op_data(op_data);
839 ptlrpc_req_finished(req);
841 GOTO(out_release_it, rc);
843 if (it_disposition(&it, DISP_LOOKUP_NEG))
844 GOTO(out_release_it, rc = -ENOENT);
846 rc = it_open_error(DISP_OPEN_OPEN, &it);
848 GOTO(out_release_it, rc);
850 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
851 ll_och_fill(sbi->ll_md_exp, &it, och);
853 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
854 GOTO(out_close, rc = -EOPNOTSUPP);
856 /* already get lease, handle lease lock */
857 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
858 if (it.d.lustre.it_lock_mode == 0 ||
859 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
860 /* open lock must return for lease */
861 CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
862 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
863 it.d.lustre.it_lock_bits);
864 GOTO(out_close, rc = -EPROTO);
867 ll_intent_release(&it);
871 /* Cancel open lock */
872 if (it.d.lustre.it_lock_mode != 0) {
873 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
874 it.d.lustre.it_lock_mode);
875 it.d.lustre.it_lock_mode = 0;
876 och->och_lease_handle.cookie = 0ULL;
878 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
880 CERROR("%s: error closing file "DFID": %d\n",
881 ll_get_fsname(inode->i_sb, NULL, 0),
882 PFID(&ll_i2info(inode)->lli_fid), rc2);
883 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
885 ll_intent_release(&it);
891 EXPORT_SYMBOL(ll_lease_open);
894 * Release lease and close the file.
895 * It will check if the lease has ever broken.
897 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
900 struct ldlm_lock *lock;
901 bool cancelled = true;
905 lock = ldlm_handle2lock(&och->och_lease_handle);
907 lock_res_and_lock(lock);
908 cancelled = ldlm_is_cancel(lock);
909 unlock_res_and_lock(lock);
913 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
914 PFID(&ll_i2info(inode)->lli_fid), cancelled);
917 ldlm_cli_cancel(&och->och_lease_handle, 0);
918 if (lease_broken != NULL)
919 *lease_broken = cancelled;
921 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
925 EXPORT_SYMBOL(ll_lease_close);
927 /* Fills the obdo with the attributes for the lsm */
928 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
929 struct obd_capa *capa, struct obdo *obdo,
930 __u64 ioepoch, int dv_flags)
932 struct ptlrpc_request_set *set;
933 struct obd_info oinfo = { { { 0 } } };
938 LASSERT(lsm != NULL);
942 oinfo.oi_oa->o_oi = lsm->lsm_oi;
943 oinfo.oi_oa->o_mode = S_IFREG;
944 oinfo.oi_oa->o_ioepoch = ioepoch;
945 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
946 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
947 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
948 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
949 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
950 OBD_MD_FLDATAVERSION;
951 oinfo.oi_capa = capa;
952 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
953 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
954 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
955 if (dv_flags & LL_DV_WR_FLUSH)
956 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
959 set = ptlrpc_prep_set();
961 CERROR("can't allocate ptlrpc set\n");
964 rc = obd_getattr_async(exp, &oinfo, set);
966 rc = ptlrpc_set_wait(set);
967 ptlrpc_set_destroy(set);
970 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
971 OBD_MD_FLATIME | OBD_MD_FLMTIME |
972 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
973 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
974 if (dv_flags & LL_DV_WR_FLUSH &&
975 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
976 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
983 * Performs the getattr on the inode and updates its fields.
984 * If @sync != 0, perform the getattr under the server-side lock.
986 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
987 __u64 ioepoch, int sync)
989 struct obd_capa *capa = ll_mdscapa_get(inode);
990 struct lov_stripe_md *lsm;
994 lsm = ccc_inode_lsm_get(inode);
995 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
996 capa, obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
999 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
1001 obdo_refresh_inode(inode, obdo, obdo->o_valid);
1002 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
1003 " blksize %lu\n", POSTID(oi), i_size_read(inode),
1004 (unsigned long long)inode->i_blocks,
1005 (unsigned long)ll_inode_blksize(inode));
1007 ccc_inode_lsm_put(inode, lsm);
1011 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
1013 struct ll_inode_info *lli = ll_i2info(inode);
1014 struct cl_object *obj = lli->lli_clob;
1015 struct cl_attr *attr = ccc_env_thread_attr(env);
1021 ll_inode_size_lock(inode);
1022 /* merge timestamps the most recently obtained from mds with
1023 timestamps obtained from osts */
1024 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1025 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1026 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1027 inode_init_lvb(inode, &lvb);
1029 cl_object_attr_lock(obj);
1030 rc = cl_object_attr_get(env, obj, attr);
1031 cl_object_attr_unlock(obj);
1034 if (lvb.lvb_atime < attr->cat_atime)
1035 lvb.lvb_atime = attr->cat_atime;
1036 if (lvb.lvb_ctime < attr->cat_ctime)
1037 lvb.lvb_ctime = attr->cat_ctime;
1038 if (lvb.lvb_mtime < attr->cat_mtime)
1039 lvb.lvb_mtime = attr->cat_mtime;
1041 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1042 PFID(&lli->lli_fid), attr->cat_size);
1043 cl_isize_write_nolock(inode, attr->cat_size);
1045 inode->i_blocks = attr->cat_blocks;
1047 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1048 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1049 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1051 ll_inode_size_unlock(inode);
1056 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1059 struct obdo obdo = { 0 };
1062 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1064 st->st_size = obdo.o_size;
1065 st->st_blocks = obdo.o_blocks;
1066 st->st_mtime = obdo.o_mtime;
1067 st->st_atime = obdo.o_atime;
1068 st->st_ctime = obdo.o_ctime;
1073 static bool file_is_noatime(const struct file *file)
1075 const struct vfsmount *mnt = file->f_path.mnt;
1076 const struct inode *inode = file->f_path.dentry->d_inode;
1078 /* Adapted from file_accessed() and touch_atime().*/
1079 if (file->f_flags & O_NOATIME)
1082 if (inode->i_flags & S_NOATIME)
1085 if (IS_NOATIME(inode))
1088 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1091 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1094 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1100 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1102 struct inode *inode = file->f_dentry->d_inode;
1104 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1106 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1107 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1108 file->f_flags & O_DIRECT ||
1111 io->ci_obj = ll_i2info(inode)->lli_clob;
1112 io->ci_lockreq = CILR_MAYBE;
1113 if (ll_file_nolock(file)) {
1114 io->ci_lockreq = CILR_NEVER;
1115 io->ci_no_srvlock = 1;
1116 } else if (file->f_flags & O_APPEND) {
1117 io->ci_lockreq = CILR_MANDATORY;
1120 io->ci_noatime = file_is_noatime(file);
1124 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1125 struct file *file, enum cl_io_type iot,
1126 loff_t *ppos, size_t count)
1128 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1129 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1135 io = ccc_env_thread_io(env);
1136 ll_io_init(io, file, iot == CIT_WRITE);
1138 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1139 struct vvp_io *vio = vvp_env_io(env);
1140 struct ccc_io *cio = ccc_env_io(env);
1141 int write_mutex_locked = 0;
1143 cio->cui_fd = LUSTRE_FPRIVATE(file);
1144 vio->cui_io_subtype = args->via_io_subtype;
1146 switch (vio->cui_io_subtype) {
1148 cio->cui_iov = args->u.normal.via_iov;
1149 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1150 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1151 cio->cui_iocb = args->u.normal.via_iocb;
1152 if ((iot == CIT_WRITE) &&
1153 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1154 if (mutex_lock_interruptible(&lli->
1156 GOTO(out, result = -ERESTARTSYS);
1157 write_mutex_locked = 1;
1158 } else if (iot == CIT_READ) {
1159 down_read(&lli->lli_trunc_sem);
1163 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1164 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1167 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1168 vio->u.splice.cui_flags = args->u.splice.via_flags;
1171 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1174 result = cl_io_loop(env, io);
1175 if (write_mutex_locked)
1176 mutex_unlock(&lli->lli_write_mutex);
1177 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1178 up_read(&lli->lli_trunc_sem);
1180 /* cl_io_rw_init() handled IO */
1181 result = io->ci_result;
1184 if (io->ci_nob > 0) {
1185 result = io->ci_nob;
1186 *ppos = io->u.ci_wr.wr.crw_pos;
1190 cl_io_fini(env, io);
1191 /* If any bit been read/written (result != 0), we just return
1192 * short read/write instead of restart io. */
1193 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1194 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1195 iot == CIT_READ ? "read" : "write",
1196 file->f_dentry->d_name.name, *ppos, count);
1197 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1201 if (iot == CIT_READ) {
1203 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1204 LPROC_LL_READ_BYTES, result);
1205 } else if (iot == CIT_WRITE) {
1207 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1208 LPROC_LL_WRITE_BYTES, result);
1209 fd->fd_write_failed = false;
1210 } else if (result != -ERESTARTSYS) {
1211 fd->fd_write_failed = true;
1220 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1222 static int ll_file_get_iov_count(const struct iovec *iov,
1223 unsigned long *nr_segs, size_t *count)
1228 for (seg = 0; seg < *nr_segs; seg++) {
1229 const struct iovec *iv = &iov[seg];
1232 * If any segment has a negative length, or the cumulative
1233 * length ever wraps negative then return -EINVAL.
1236 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1238 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1243 cnt -= iv->iov_len; /* This segment is no good */
1250 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1251 unsigned long nr_segs, loff_t pos)
1254 struct vvp_io_args *args;
1260 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1264 env = cl_env_get(&refcheck);
1266 RETURN(PTR_ERR(env));
1268 args = vvp_env_args(env, IO_NORMAL);
1269 args->u.normal.via_iov = (struct iovec *)iov;
1270 args->u.normal.via_nrsegs = nr_segs;
1271 args->u.normal.via_iocb = iocb;
1273 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1274 &iocb->ki_pos, count);
1275 cl_env_put(env, &refcheck);
1279 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1283 struct iovec *local_iov;
1284 struct kiocb *kiocb;
1289 env = cl_env_get(&refcheck);
1291 RETURN(PTR_ERR(env));
1293 local_iov = &vvp_env_info(env)->vti_local_iov;
1294 kiocb = &vvp_env_info(env)->vti_kiocb;
1295 local_iov->iov_base = (void __user *)buf;
1296 local_iov->iov_len = count;
1297 init_sync_kiocb(kiocb, file);
1298 kiocb->ki_pos = *ppos;
1299 kiocb->ki_left = count;
1301 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1302 *ppos = kiocb->ki_pos;
1304 cl_env_put(env, &refcheck);
1309 * Write to a file (through the page cache).
1312 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1313 unsigned long nr_segs, loff_t pos)
1316 struct vvp_io_args *args;
1322 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1326 env = cl_env_get(&refcheck);
1328 RETURN(PTR_ERR(env));
1330 args = vvp_env_args(env, IO_NORMAL);
1331 args->u.normal.via_iov = (struct iovec *)iov;
1332 args->u.normal.via_nrsegs = nr_segs;
1333 args->u.normal.via_iocb = iocb;
1335 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1336 &iocb->ki_pos, count);
1337 cl_env_put(env, &refcheck);
1341 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1345 struct iovec *local_iov;
1346 struct kiocb *kiocb;
1351 env = cl_env_get(&refcheck);
1353 RETURN(PTR_ERR(env));
1355 local_iov = &vvp_env_info(env)->vti_local_iov;
1356 kiocb = &vvp_env_info(env)->vti_kiocb;
1357 local_iov->iov_base = (void __user *)buf;
1358 local_iov->iov_len = count;
1359 init_sync_kiocb(kiocb, file);
1360 kiocb->ki_pos = *ppos;
1361 kiocb->ki_left = count;
1363 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1364 *ppos = kiocb->ki_pos;
1366 cl_env_put(env, &refcheck);
1371 * Send file content (through pagecache) somewhere with helper
1373 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1374 struct pipe_inode_info *pipe, size_t count,
1378 struct vvp_io_args *args;
1383 env = cl_env_get(&refcheck);
1385 RETURN(PTR_ERR(env));
1387 args = vvp_env_args(env, IO_SPLICE);
1388 args->u.splice.via_pipe = pipe;
1389 args->u.splice.via_flags = flags;
1391 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1392 cl_env_put(env, &refcheck);
1396 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1399 struct obd_export *exp = ll_i2dtexp(inode);
1400 struct obd_trans_info oti = { 0 };
1401 struct obdo *oa = NULL;
1404 struct lov_stripe_md *lsm = NULL, *lsm2;
1411 lsm = ccc_inode_lsm_get(inode);
1412 if (!lsm_has_objects(lsm))
1413 GOTO(out, rc = -ENOENT);
1415 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1416 (lsm->lsm_stripe_count));
1418 OBD_ALLOC_LARGE(lsm2, lsm_size);
1420 GOTO(out, rc = -ENOMEM);
1423 oa->o_nlink = ost_idx;
1424 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1425 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1426 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1427 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1428 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1429 memcpy(lsm2, lsm, lsm_size);
1430 ll_inode_size_lock(inode);
1431 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1432 ll_inode_size_unlock(inode);
1434 OBD_FREE_LARGE(lsm2, lsm_size);
1437 ccc_inode_lsm_put(inode, lsm);
1442 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1444 struct ll_recreate_obj ucreat;
1448 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1451 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1455 ostid_set_seq_mdt0(&oi);
1456 ostid_set_id(&oi, ucreat.lrc_id);
1457 RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
1460 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1467 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1470 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1473 fid_to_ostid(&fid, &oi);
1474 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1475 RETURN(ll_lov_recreate(inode, &oi, ost_idx));
1478 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1479 __u64 flags, struct lov_user_md *lum,
1482 struct lov_stripe_md *lsm = NULL;
1483 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1487 lsm = ccc_inode_lsm_get(inode);
1489 ccc_inode_lsm_put(inode, lsm);
1490 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1491 PFID(ll_inode2fid(inode)));
1495 ll_inode_size_lock(inode);
1496 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1499 rc = oit.d.lustre.it_status;
1501 GOTO(out_req_free, rc);
1503 ll_release_openhandle(file->f_dentry, &oit);
1506 ll_inode_size_unlock(inode);
1507 ll_intent_release(&oit);
1508 ccc_inode_lsm_put(inode, lsm);
1511 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1515 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1516 struct lov_mds_md **lmmp, int *lmm_size,
1517 struct ptlrpc_request **request)
1519 struct ll_sb_info *sbi = ll_i2sbi(inode);
1520 struct mdt_body *body;
1521 struct lov_mds_md *lmm = NULL;
1522 struct ptlrpc_request *req = NULL;
1523 struct md_op_data *op_data;
1526 rc = ll_get_max_mdsize(sbi, &lmmsize);
1530 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1531 strlen(filename), lmmsize,
1532 LUSTRE_OPC_ANY, NULL);
1533 if (IS_ERR(op_data))
1534 RETURN(PTR_ERR(op_data));
1536 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1537 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1538 ll_finish_md_op_data(op_data);
1540 CDEBUG(D_INFO, "md_getattr_name failed "
1541 "on %s: rc %d\n", filename, rc);
1545 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1546 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1548 lmmsize = body->eadatasize;
1550 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1552 GOTO(out, rc = -ENODATA);
1555 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1556 LASSERT(lmm != NULL);
1558 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1559 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1560 GOTO(out, rc = -EPROTO);
1564 * This is coming from the MDS, so is probably in
1565 * little endian. We convert it to host endian before
1566 * passing it to userspace.
1568 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1571 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1572 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1575 /* if function called for directory - we should
1576 * avoid swab not existent lsm objects */
1577 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1578 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1579 if (S_ISREG(body->mode))
1580 lustre_swab_lov_user_md_objects(
1581 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1583 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1584 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1585 if (S_ISREG(body->mode))
1586 lustre_swab_lov_user_md_objects(
1587 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1594 *lmm_size = lmmsize;
1599 static int ll_lov_setea(struct inode *inode, struct file *file,
1602 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1603 struct lov_user_md *lump;
1604 int lum_size = sizeof(struct lov_user_md) +
1605 sizeof(struct lov_user_ost_data);
1609 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1612 OBD_ALLOC_LARGE(lump, lum_size);
1616 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1617 OBD_FREE_LARGE(lump, lum_size);
1621 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1623 OBD_FREE_LARGE(lump, lum_size);
1627 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1630 struct lov_user_md_v3 lumv3;
1631 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1632 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1633 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1635 __u64 flags = FMODE_WRITE;
1638 /* first try with v1 which is smaller than v3 */
1639 lum_size = sizeof(struct lov_user_md_v1);
1640 if (copy_from_user(lumv1, lumv1p, lum_size))
1643 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1644 lum_size = sizeof(struct lov_user_md_v3);
1645 if (copy_from_user(&lumv3, lumv3p, lum_size))
1649 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1651 struct lov_stripe_md *lsm;
1654 put_user(0, &lumv1p->lmm_stripe_count);
1656 ll_layout_refresh(inode, &gen);
1657 lsm = ccc_inode_lsm_get(inode);
1658 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1659 0, lsm, (void *)arg);
1660 ccc_inode_lsm_put(inode, lsm);
1665 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1667 struct lov_stripe_md *lsm;
1671 lsm = ccc_inode_lsm_get(inode);
1673 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1675 ccc_inode_lsm_put(inode, lsm);
1679 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1681 struct ll_inode_info *lli = ll_i2info(inode);
1682 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1683 struct ccc_grouplock grouplock;
1687 if (ll_file_nolock(file))
1688 RETURN(-EOPNOTSUPP);
1690 spin_lock(&lli->lli_lock);
1691 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1692 CWARN("group lock already existed with gid %lu\n",
1693 fd->fd_grouplock.cg_gid);
1694 spin_unlock(&lli->lli_lock);
1697 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1698 spin_unlock(&lli->lli_lock);
1700 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1701 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1705 spin_lock(&lli->lli_lock);
1706 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1707 spin_unlock(&lli->lli_lock);
1708 CERROR("another thread just won the race\n");
1709 cl_put_grouplock(&grouplock);
1713 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1714 fd->fd_grouplock = grouplock;
1715 spin_unlock(&lli->lli_lock);
1717 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1721 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1723 struct ll_inode_info *lli = ll_i2info(inode);
1724 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1725 struct ccc_grouplock grouplock;
1728 spin_lock(&lli->lli_lock);
1729 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1730 spin_unlock(&lli->lli_lock);
1731 CWARN("no group lock held\n");
1734 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1736 if (fd->fd_grouplock.cg_gid != arg) {
1737 CWARN("group lock %lu doesn't match current id %lu\n",
1738 arg, fd->fd_grouplock.cg_gid);
1739 spin_unlock(&lli->lli_lock);
1743 grouplock = fd->fd_grouplock;
1744 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1745 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1746 spin_unlock(&lli->lli_lock);
1748 cl_put_grouplock(&grouplock);
1749 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1754 * Close inode open handle
1756 * \param dentry [in] dentry which contains the inode
1757 * \param it [in,out] intent which contains open info and result
1760 * \retval <0 failure
1762 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1764 struct inode *inode = dentry->d_inode;
1765 struct obd_client_handle *och;
1771 /* Root ? Do nothing. */
1772 if (dentry->d_inode->i_sb->s_root == dentry)
1775 /* No open handle to close? Move away */
1776 if (!it_disposition(it, DISP_OPEN_OPEN))
1779 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1781 OBD_ALLOC(och, sizeof(*och));
1783 GOTO(out, rc = -ENOMEM);
1785 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1787 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1790 /* this one is in place of ll_file_open */
1791 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1792 ptlrpc_req_finished(it->d.lustre.it_data);
1793 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1799 * Get size for inode for which FIEMAP mapping is requested.
1800 * Make the FIEMAP get_info call and returns the result.
1802 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1805 struct obd_export *exp = ll_i2dtexp(inode);
1806 struct lov_stripe_md *lsm = NULL;
1807 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1808 int vallen = num_bytes;
1812 /* Checks for fiemap flags */
1813 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1814 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1818 /* Check for FIEMAP_FLAG_SYNC */
1819 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1820 rc = filemap_fdatawrite(inode->i_mapping);
1825 lsm = ccc_inode_lsm_get(inode);
1829 /* If the stripe_count > 1 and the application does not understand
1830 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1832 if (lsm->lsm_stripe_count > 1 &&
1833 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1834 GOTO(out, rc = -EOPNOTSUPP);
1836 fm_key.oa.o_oi = lsm->lsm_oi;
1837 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1839 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1840 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1841 /* If filesize is 0, then there would be no objects for mapping */
1842 if (fm_key.oa.o_size == 0) {
1843 fiemap->fm_mapped_extents = 0;
1847 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1849 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1852 CERROR("obd_get_info failed: rc = %d\n", rc);
1855 ccc_inode_lsm_put(inode, lsm);
1859 int ll_fid2path(struct inode *inode, void *arg)
1861 struct obd_export *exp = ll_i2mdexp(inode);
1862 struct getinfo_fid2path *gfout, *gfin;
1866 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1867 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1870 /* Need to get the buflen */
1871 OBD_ALLOC_PTR(gfin);
1874 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1879 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1880 OBD_ALLOC(gfout, outsize);
1881 if (gfout == NULL) {
1885 memcpy(gfout, gfin, sizeof(*gfout));
1888 /* Call mdc_iocontrol */
1889 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1893 if (copy_to_user(arg, gfout, outsize))
1897 OBD_FREE(gfout, outsize);
1901 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1903 struct ll_user_fiemap *fiemap_s;
1904 size_t num_bytes, ret_bytes;
1905 unsigned int extent_count;
1908 /* Get the extent count so we can calculate the size of
1909 * required fiemap buffer */
1910 if (get_user(extent_count,
1911 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1913 num_bytes = sizeof(*fiemap_s) + (extent_count *
1914 sizeof(struct ll_fiemap_extent));
1916 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1917 if (fiemap_s == NULL)
1920 /* get the fiemap value */
1921 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1923 GOTO(error, rc = -EFAULT);
1925 /* If fm_extent_count is non-zero, read the first extent since
1926 * it is used to calculate end_offset and device from previous
1929 if (copy_from_user(&fiemap_s->fm_extents[0],
1930 (char __user *)arg + sizeof(*fiemap_s),
1931 sizeof(struct ll_fiemap_extent)))
1932 GOTO(error, rc = -EFAULT);
1935 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1939 ret_bytes = sizeof(struct ll_user_fiemap);
1941 if (extent_count != 0)
1942 ret_bytes += (fiemap_s->fm_mapped_extents *
1943 sizeof(struct ll_fiemap_extent));
1945 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1949 OBD_FREE_LARGE(fiemap_s, num_bytes);
1954 * Read the data_version for inode.
1956 * This value is computed using stripe object version on OST.
1957 * Version is computed using server side locking.
1959 * @param sync if do sync on the OST side;
1961 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1962 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1964 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1966 struct lov_stripe_md *lsm = NULL;
1967 struct ll_sb_info *sbi = ll_i2sbi(inode);
1968 struct obdo *obdo = NULL;
1972 /* If no stripe, we consider version is 0. */
1973 lsm = ccc_inode_lsm_get(inode);
1974 if (!lsm_has_objects(lsm)) {
1976 CDEBUG(D_INODE, "No object for inode\n");
1980 OBD_ALLOC_PTR(obdo);
1982 GOTO(out, rc = -ENOMEM);
1984 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, flags);
1986 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1989 *data_version = obdo->o_data_version;
1995 ccc_inode_lsm_put(inode, lsm);
2000 * Trigger a HSM release request for the provided inode.
2002 int ll_hsm_release(struct inode *inode)
2004 struct cl_env_nest nest;
2006 struct obd_client_handle *och = NULL;
2007 __u64 data_version = 0;
2011 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
2012 ll_get_fsname(inode->i_sb, NULL, 0),
2013 PFID(&ll_i2info(inode)->lli_fid));
2015 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
2017 GOTO(out, rc = PTR_ERR(och));
2019 /* Grab latest data_version and [am]time values */
2020 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
2024 env = cl_env_nested_get(&nest);
2026 GOTO(out, rc = PTR_ERR(env));
2028 ll_merge_lvb(env, inode);
2029 cl_env_nested_put(&nest, env);
2031 /* Release the file.
2032 * NB: lease lock handle is released in mdc_hsm_release_pack() because
2033 * we still need it to pack l_remote_handle to MDT. */
2034 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
2040 if (och != NULL && !IS_ERR(och)) /* close the file */
2041 ll_lease_close(och, inode, NULL);
2046 struct ll_swap_stack {
2047 struct iattr ia1, ia2;
2049 struct inode *inode1, *inode2;
2050 bool check_dv1, check_dv2;
2053 static int ll_swap_layouts(struct file *file1, struct file *file2,
2054 struct lustre_swap_layouts *lsl)
2056 struct mdc_swap_layouts msl;
2057 struct md_op_data *op_data;
2060 struct ll_swap_stack *llss = NULL;
2063 OBD_ALLOC_PTR(llss);
2067 llss->inode1 = file1->f_dentry->d_inode;
2068 llss->inode2 = file2->f_dentry->d_inode;
2070 if (!S_ISREG(llss->inode2->i_mode))
2071 GOTO(free, rc = -EINVAL);
2073 if (inode_permission(llss->inode1, MAY_WRITE) ||
2074 inode_permission(llss->inode2, MAY_WRITE))
2075 GOTO(free, rc = -EPERM);
2077 if (llss->inode2->i_sb != llss->inode1->i_sb)
2078 GOTO(free, rc = -EXDEV);
2080 /* we use 2 bool because it is easier to swap than 2 bits */
2081 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2082 llss->check_dv1 = true;
2084 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2085 llss->check_dv2 = true;
2087 /* we cannot use lsl->sl_dvX directly because we may swap them */
2088 llss->dv1 = lsl->sl_dv1;
2089 llss->dv2 = lsl->sl_dv2;
2091 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2092 if (rc == 0) /* same file, done! */
2095 if (rc < 0) { /* sequentialize it */
2096 swap(llss->inode1, llss->inode2);
2098 swap(llss->dv1, llss->dv2);
2099 swap(llss->check_dv1, llss->check_dv2);
2103 if (gid != 0) { /* application asks to flush dirty cache */
2104 rc = ll_get_grouplock(llss->inode1, file1, gid);
2108 rc = ll_get_grouplock(llss->inode2, file2, gid);
2110 ll_put_grouplock(llss->inode1, file1, gid);
2115 /* to be able to restore mtime and atime after swap
2116 * we need to first save them */
2118 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2119 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2120 llss->ia1.ia_atime = llss->inode1->i_atime;
2121 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2122 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2123 llss->ia2.ia_atime = llss->inode2->i_atime;
2124 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2127 /* ultimate check, before swaping the layouts we check if
2128 * dataversion has changed (if requested) */
2129 if (llss->check_dv1) {
2130 rc = ll_data_version(llss->inode1, &dv, 0);
2133 if (dv != llss->dv1)
2134 GOTO(putgl, rc = -EAGAIN);
2137 if (llss->check_dv2) {
2138 rc = ll_data_version(llss->inode2, &dv, 0);
2141 if (dv != llss->dv2)
2142 GOTO(putgl, rc = -EAGAIN);
2145 /* struct md_op_data is used to send the swap args to the mdt
2146 * only flags is missing, so we use struct mdc_swap_layouts
2147 * through the md_op_data->op_data */
2148 /* flags from user space have to be converted before they are send to
2149 * server, no flag is sent today, they are only used on the client */
2152 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2153 0, LUSTRE_OPC_ANY, &msl);
2154 if (IS_ERR(op_data))
2155 GOTO(free, rc = PTR_ERR(op_data));
2157 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2158 sizeof(*op_data), op_data, NULL);
2159 ll_finish_md_op_data(op_data);
2163 ll_put_grouplock(llss->inode2, file2, gid);
2164 ll_put_grouplock(llss->inode1, file1, gid);
2167 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2171 /* clear useless flags */
2172 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2173 llss->ia1.ia_valid &= ~ATTR_MTIME;
2174 llss->ia2.ia_valid &= ~ATTR_MTIME;
2177 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2178 llss->ia1.ia_valid &= ~ATTR_ATIME;
2179 llss->ia2.ia_valid &= ~ATTR_ATIME;
2182 /* update time if requested */
2184 if (llss->ia2.ia_valid != 0) {
2185 mutex_lock(&llss->inode1->i_mutex);
2186 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2187 mutex_unlock(&llss->inode1->i_mutex);
2190 if (llss->ia1.ia_valid != 0) {
2193 mutex_lock(&llss->inode2->i_mutex);
2194 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2195 mutex_unlock(&llss->inode2->i_mutex);
2207 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2209 struct md_op_data *op_data;
2212 /* Non-root users are forbidden to set or clear flags which are
2213 * NOT defined in HSM_USER_MASK. */
2214 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2215 !cfs_capable(CFS_CAP_SYS_ADMIN))
2218 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2219 LUSTRE_OPC_ANY, hss);
2220 if (IS_ERR(op_data))
2221 RETURN(PTR_ERR(op_data));
2223 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2224 sizeof(*op_data), op_data, NULL);
2226 ll_finish_md_op_data(op_data);
2231 static int ll_hsm_import(struct inode *inode, struct file *file,
2232 struct hsm_user_import *hui)
2234 struct hsm_state_set *hss = NULL;
2235 struct iattr *attr = NULL;
2239 if (!S_ISREG(inode->i_mode))
2245 GOTO(out, rc = -ENOMEM);
2247 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2248 hss->hss_archive_id = hui->hui_archive_id;
2249 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2250 rc = ll_hsm_state_set(inode, hss);
2254 OBD_ALLOC_PTR(attr);
2256 GOTO(out, rc = -ENOMEM);
2258 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2259 attr->ia_mode |= S_IFREG;
2260 attr->ia_uid = hui->hui_uid;
2261 attr->ia_gid = hui->hui_gid;
2262 attr->ia_size = hui->hui_size;
2263 attr->ia_mtime.tv_sec = hui->hui_mtime;
2264 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2265 attr->ia_atime.tv_sec = hui->hui_atime;
2266 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2268 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2269 ATTR_UID | ATTR_GID |
2270 ATTR_MTIME | ATTR_MTIME_SET |
2271 ATTR_ATIME | ATTR_ATIME_SET;
2273 rc = ll_setattr_raw(file->f_dentry, attr, true);
2287 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2289 struct inode *inode = file->f_dentry->d_inode;
2290 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2294 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2295 PFID(ll_inode2fid(inode)), inode, cmd);
2296 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2298 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2299 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2303 case LL_IOC_GETFLAGS:
2304 /* Get the current value of the file flags */
2305 return put_user(fd->fd_flags, (int *)arg);
2306 case LL_IOC_SETFLAGS:
2307 case LL_IOC_CLRFLAGS:
2308 /* Set or clear specific file flags */
2309 /* XXX This probably needs checks to ensure the flags are
2310 * not abused, and to handle any flag side effects.
2312 if (get_user(flags, (int *) arg))
2315 if (cmd == LL_IOC_SETFLAGS) {
2316 if ((flags & LL_FILE_IGNORE_LOCK) &&
2317 !(file->f_flags & O_DIRECT)) {
2318 CERROR("%s: unable to disable locking on "
2319 "non-O_DIRECT file\n", current->comm);
2323 fd->fd_flags |= flags;
2325 fd->fd_flags &= ~flags;
2328 case LL_IOC_LOV_SETSTRIPE:
2329 RETURN(ll_lov_setstripe(inode, file, arg));
2330 case LL_IOC_LOV_SETEA:
2331 RETURN(ll_lov_setea(inode, file, arg));
2332 case LL_IOC_LOV_SWAP_LAYOUTS: {
2334 struct lustre_swap_layouts lsl;
2336 if (copy_from_user(&lsl, (char *)arg,
2337 sizeof(struct lustre_swap_layouts)))
2340 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2343 file2 = fget(lsl.sl_fd);
2348 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2349 rc = ll_swap_layouts(file, file2, &lsl);
2353 case LL_IOC_LOV_GETSTRIPE:
2354 RETURN(ll_lov_getstripe(inode, arg));
2355 case LL_IOC_RECREATE_OBJ:
2356 RETURN(ll_lov_recreate_obj(inode, arg));
2357 case LL_IOC_RECREATE_FID:
2358 RETURN(ll_lov_recreate_fid(inode, arg));
2359 case FSFILT_IOC_FIEMAP:
2360 RETURN(ll_ioctl_fiemap(inode, arg));
2361 case FSFILT_IOC_GETFLAGS:
2362 case FSFILT_IOC_SETFLAGS:
2363 RETURN(ll_iocontrol(inode, file, cmd, arg));
2364 case FSFILT_IOC_GETVERSION_OLD:
2365 case FSFILT_IOC_GETVERSION:
2366 RETURN(put_user(inode->i_generation, (int *)arg));
2367 case LL_IOC_GROUP_LOCK:
2368 RETURN(ll_get_grouplock(inode, file, arg));
2369 case LL_IOC_GROUP_UNLOCK:
2370 RETURN(ll_put_grouplock(inode, file, arg));
2371 case IOC_OBD_STATFS:
2372 RETURN(ll_obd_statfs(inode, (void *)arg));
2374 /* We need to special case any other ioctls we want to handle,
2375 * to send them to the MDS/OST as appropriate and to properly
2376 * network encode the arg field.
2377 case FSFILT_IOC_SETVERSION_OLD:
2378 case FSFILT_IOC_SETVERSION:
2380 case LL_IOC_FLUSHCTX:
2381 RETURN(ll_flush_ctx(inode));
2382 case LL_IOC_PATH2FID: {
2383 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2384 sizeof(struct lu_fid)))
2389 case OBD_IOC_FID2PATH:
2390 RETURN(ll_fid2path(inode, (void *)arg));
2391 case LL_IOC_DATA_VERSION: {
2392 struct ioc_data_version idv;
2395 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2398 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2399 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2401 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2407 case LL_IOC_GET_MDTIDX: {
2410 mdtidx = ll_get_mdt_idx(inode);
2414 if (put_user((int)mdtidx, (int*)arg))
2419 case OBD_IOC_GETDTNAME:
2420 case OBD_IOC_GETMDNAME:
2421 RETURN(ll_get_obd_name(inode, cmd, arg));
2422 case LL_IOC_HSM_STATE_GET: {
2423 struct md_op_data *op_data;
2424 struct hsm_user_state *hus;
2431 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2432 LUSTRE_OPC_ANY, hus);
2433 if (IS_ERR(op_data)) {
2435 RETURN(PTR_ERR(op_data));
2438 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2441 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2444 ll_finish_md_op_data(op_data);
2448 case LL_IOC_HSM_STATE_SET: {
2449 struct hsm_state_set *hss;
2456 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2461 rc = ll_hsm_state_set(inode, hss);
2466 case LL_IOC_HSM_ACTION: {
2467 struct md_op_data *op_data;
2468 struct hsm_current_action *hca;
2475 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2476 LUSTRE_OPC_ANY, hca);
2477 if (IS_ERR(op_data)) {
2479 RETURN(PTR_ERR(op_data));
2482 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2485 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2488 ll_finish_md_op_data(op_data);
2492 case LL_IOC_SET_LEASE: {
2493 struct ll_inode_info *lli = ll_i2info(inode);
2494 struct obd_client_handle *och = NULL;
2500 if (!(file->f_mode & FMODE_WRITE))
2505 if (!(file->f_mode & FMODE_READ))
2510 mutex_lock(&lli->lli_och_mutex);
2511 if (fd->fd_lease_och != NULL) {
2512 och = fd->fd_lease_och;
2513 fd->fd_lease_och = NULL;
2515 mutex_unlock(&lli->lli_och_mutex);
2518 mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
2519 rc = ll_lease_close(och, inode, &lease_broken);
2520 if (rc == 0 && lease_broken)
2526 /* return the type of lease or error */
2527 RETURN(rc < 0 ? rc : (int)mode);
2532 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2534 /* apply for lease */
2535 och = ll_lease_open(inode, file, mode, 0);
2537 RETURN(PTR_ERR(och));
2540 mutex_lock(&lli->lli_och_mutex);
2541 if (fd->fd_lease_och == NULL) {
2542 fd->fd_lease_och = och;
2545 mutex_unlock(&lli->lli_och_mutex);
2547 /* impossible now that only excl is supported for now */
2548 ll_lease_close(och, inode, &lease_broken);
2553 case LL_IOC_GET_LEASE: {
2554 struct ll_inode_info *lli = ll_i2info(inode);
2555 struct ldlm_lock *lock = NULL;
2558 mutex_lock(&lli->lli_och_mutex);
2559 if (fd->fd_lease_och != NULL) {
2560 struct obd_client_handle *och = fd->fd_lease_och;
2562 lock = ldlm_handle2lock(&och->och_lease_handle);
2564 lock_res_and_lock(lock);
2565 if (!ldlm_is_cancel(lock))
2566 rc = och->och_flags &
2567 (FMODE_READ | FMODE_WRITE);
2568 unlock_res_and_lock(lock);
2569 LDLM_LOCK_PUT(lock);
2572 mutex_unlock(&lli->lli_och_mutex);
2575 case LL_IOC_HSM_IMPORT: {
2576 struct hsm_user_import *hui;
2582 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2587 rc = ll_hsm_import(inode, file, hui);
2596 ll_iocontrol_call(inode, file, cmd, arg, &err))
2599 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2605 #ifndef HAVE_FILE_LLSEEK_SIZE
2606 static inline loff_t
2607 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2609 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2611 if (offset > maxsize)
2614 if (offset != file->f_pos) {
2615 file->f_pos = offset;
2616 file->f_version = 0;
2622 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2623 loff_t maxsize, loff_t eof)
2625 struct inode *inode = file->f_dentry->d_inode;
2633 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2634 * position-querying operation. Avoid rewriting the "same"
2635 * f_pos value back to the file because a concurrent read(),
2636 * write() or lseek() might have altered it
2641 * f_lock protects against read/modify/write race with other
2642 * SEEK_CURs. Note that parallel writes and reads behave
2645 mutex_lock(&inode->i_mutex);
2646 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2647 mutex_unlock(&inode->i_mutex);
2651 * In the generic case the entire file is data, so as long as
2652 * offset isn't at the end of the file then the offset is data.
2659 * There is a virtual hole at the end of the file, so as long as
2660 * offset isn't i_size or larger, return i_size.
2668 return llseek_execute(file, offset, maxsize);
2672 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2674 struct inode *inode = file->f_dentry->d_inode;
2675 loff_t retval, eof = 0;
2678 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2679 (origin == SEEK_CUR) ? file->f_pos : 0);
2680 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2681 PFID(ll_inode2fid(inode)), inode, retval, retval,
2683 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2685 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2686 retval = ll_glimpse_size(inode);
2689 eof = i_size_read(inode);
2692 retval = ll_generic_file_llseek_size(file, offset, origin,
2693 ll_file_maxbytes(inode), eof);
2697 int ll_flush(struct file *file, fl_owner_t id)
2699 struct inode *inode = file->f_dentry->d_inode;
2700 struct ll_inode_info *lli = ll_i2info(inode);
2701 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2704 LASSERT(!S_ISDIR(inode->i_mode));
2706 /* catch async errors that were recorded back when async writeback
2707 * failed for pages in this mapping. */
2708 rc = lli->lli_async_rc;
2709 lli->lli_async_rc = 0;
2710 err = lov_read_and_clear_async_rc(lli->lli_clob);
2714 /* The application has been told write failure already.
2715 * Do not report failure again. */
2716 if (fd->fd_write_failed)
2718 return rc ? -EIO : 0;
2722 * Called to make sure a portion of file has been written out.
2723 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2725 * Return how many pages have been written.
2727 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2728 enum cl_fsync_mode mode, int ignore_layout)
2730 struct cl_env_nest nest;
2733 struct obd_capa *capa = NULL;
2734 struct cl_fsync_io *fio;
2738 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2739 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2742 env = cl_env_nested_get(&nest);
2744 RETURN(PTR_ERR(env));
2746 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2748 io = ccc_env_thread_io(env);
2749 io->ci_obj = cl_i2info(inode)->lli_clob;
2750 io->ci_ignore_layout = ignore_layout;
2752 /* initialize parameters for sync */
2753 fio = &io->u.ci_fsync;
2754 fio->fi_capa = capa;
2755 fio->fi_start = start;
2757 fio->fi_fid = ll_inode2fid(inode);
2758 fio->fi_mode = mode;
2759 fio->fi_nr_written = 0;
2761 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2762 result = cl_io_loop(env, io);
2764 result = io->ci_result;
2766 result = fio->fi_nr_written;
2767 cl_io_fini(env, io);
2768 cl_env_nested_put(&nest, env);
2776 * When dentry is provided (the 'else' case), *file->f_dentry may be
2777 * null and dentry must be used directly rather than pulled from
2778 * *file->f_dentry as is done otherwise.
2781 #ifdef HAVE_FILE_FSYNC_4ARGS
2782 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2784 struct dentry *dentry = file->f_dentry;
2785 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2786 int ll_fsync(struct file *file, int datasync)
2788 struct dentry *dentry = file->f_dentry;
2790 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2793 struct inode *inode = dentry->d_inode;
2794 struct ll_inode_info *lli = ll_i2info(inode);
2795 struct ptlrpc_request *req;
2796 struct obd_capa *oc;
2800 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2801 PFID(ll_inode2fid(inode)), inode);
2802 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2804 #ifdef HAVE_FILE_FSYNC_4ARGS
2805 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2806 mutex_lock(&inode->i_mutex);
2808 /* fsync's caller has already called _fdata{sync,write}, we want
2809 * that IO to finish before calling the osc and mdc sync methods */
2810 rc = filemap_fdatawait(inode->i_mapping);
2813 /* catch async errors that were recorded back when async writeback
2814 * failed for pages in this mapping. */
2815 if (!S_ISDIR(inode->i_mode)) {
2816 err = lli->lli_async_rc;
2817 lli->lli_async_rc = 0;
2820 err = lov_read_and_clear_async_rc(lli->lli_clob);
2825 oc = ll_mdscapa_get(inode);
2826 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2832 ptlrpc_req_finished(req);
2834 if (datasync && S_ISREG(inode->i_mode)) {
2835 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2837 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2839 if (rc == 0 && err < 0)
2842 fd->fd_write_failed = true;
2844 fd->fd_write_failed = false;
2847 #ifdef HAVE_FILE_FSYNC_4ARGS
2848 mutex_unlock(&inode->i_mutex);
2853 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2855 struct inode *inode = file->f_dentry->d_inode;
2856 struct ll_sb_info *sbi = ll_i2sbi(inode);
2857 struct ldlm_enqueue_info einfo = {
2858 .ei_type = LDLM_FLOCK,
2859 .ei_cb_cp = ldlm_flock_completion_ast,
2860 .ei_cbdata = file_lock,
2862 struct md_op_data *op_data;
2863 struct lustre_handle lockh = {0};
2864 ldlm_policy_data_t flock = {{0}};
2870 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2871 PFID(ll_inode2fid(inode)), file_lock);
2873 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2875 if (file_lock->fl_flags & FL_FLOCK) {
2876 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2877 /* flocks are whole-file locks */
2878 flock.l_flock.end = OFFSET_MAX;
2879 /* For flocks owner is determined by the local file desctiptor*/
2880 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2881 } else if (file_lock->fl_flags & FL_POSIX) {
2882 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2883 flock.l_flock.start = file_lock->fl_start;
2884 flock.l_flock.end = file_lock->fl_end;
2888 flock.l_flock.pid = file_lock->fl_pid;
2890 /* Somewhat ugly workaround for svc lockd.
2891 * lockd installs custom fl_lmops->lm_compare_owner that checks
2892 * for the fl_owner to be the same (which it always is on local node
2893 * I guess between lockd processes) and then compares pid.
2894 * As such we assign pid to the owner field to make it all work,
2895 * conflict with normal locks is unlikely since pid space and
2896 * pointer space for current->files are not intersecting */
2897 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2898 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2900 switch (file_lock->fl_type) {
2902 einfo.ei_mode = LCK_PR;
2905 /* An unlock request may or may not have any relation to
2906 * existing locks so we may not be able to pass a lock handle
2907 * via a normal ldlm_lock_cancel() request. The request may even
2908 * unlock a byte range in the middle of an existing lock. In
2909 * order to process an unlock request we need all of the same
2910 * information that is given with a normal read or write record
2911 * lock request. To avoid creating another ldlm unlock (cancel)
2912 * message we'll treat a LCK_NL flock request as an unlock. */
2913 einfo.ei_mode = LCK_NL;
2916 einfo.ei_mode = LCK_PW;
2919 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2920 file_lock->fl_type);
2935 flags = LDLM_FL_BLOCK_NOWAIT;
2941 flags = LDLM_FL_TEST_LOCK;
2942 /* Save the old mode so that if the mode in the lock changes we
2943 * can decrement the appropriate reader or writer refcount. */
2944 file_lock->fl_type = einfo.ei_mode;
2947 CERROR("unknown fcntl lock command: %d\n", cmd);
2951 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2952 LUSTRE_OPC_ANY, NULL);
2953 if (IS_ERR(op_data))
2954 RETURN(PTR_ERR(op_data));
2956 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2957 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2958 flock.l_flock.pid, flags, einfo.ei_mode,
2959 flock.l_flock.start, flock.l_flock.end);
2961 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2962 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2964 if ((file_lock->fl_flags & FL_FLOCK) &&
2965 (rc == 0 || file_lock->fl_type == F_UNLCK))
2966 rc2 = flock_lock_file_wait(file, file_lock);
2967 if ((file_lock->fl_flags & FL_POSIX) &&
2968 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2969 !(flags & LDLM_FL_TEST_LOCK))
2970 rc2 = posix_lock_file_wait(file, file_lock);
2972 if (rc2 && file_lock->fl_type != F_UNLCK) {
2973 einfo.ei_mode = LCK_NL;
2974 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2975 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2979 ll_finish_md_op_data(op_data);
2984 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2992 * test if some locks matching bits and l_req_mode are acquired
2993 * - bits can be in different locks
2994 * - if found clear the common lock bits in *bits
2995 * - the bits not found, are kept in *bits
2997 * \param bits [IN] searched lock bits [IN]
2998 * \param l_req_mode [IN] searched lock mode
2999 * \retval boolean, true iff all bits are found
3001 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3003 struct lustre_handle lockh;
3004 ldlm_policy_data_t policy;
3005 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3006 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3015 fid = &ll_i2info(inode)->lli_fid;
3016 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3017 ldlm_lockname[mode]);
3019 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3020 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3021 policy.l_inodebits.bits = *bits & (1 << i);
3022 if (policy.l_inodebits.bits == 0)
3025 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3026 &policy, mode, &lockh)) {
3027 struct ldlm_lock *lock;
3029 lock = ldlm_handle2lock(&lockh);
3032 ~(lock->l_policy_data.l_inodebits.bits);
3033 LDLM_LOCK_PUT(lock);
3035 *bits &= ~policy.l_inodebits.bits;
3042 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3043 struct lustre_handle *lockh, __u64 flags,
3046 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3051 fid = &ll_i2info(inode)->lli_fid;
3052 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3054 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3055 fid, LDLM_IBITS, &policy, mode, lockh);
3060 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3062 /* Already unlinked. Just update nlink and return success */
3063 if (rc == -ENOENT) {
3065 /* This path cannot be hit for regular files unless in
3066 * case of obscure races, so no need to to validate
3068 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3070 } else if (rc != 0) {
3071 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
3072 ll_get_fsname(inode->i_sb, NULL, 0),
3073 PFID(ll_inode2fid(inode)), rc);
3079 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3082 struct inode *inode = dentry->d_inode;
3083 struct ptlrpc_request *req = NULL;
3084 struct obd_export *exp;
3088 LASSERT(inode != NULL);
3090 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3091 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3093 exp = ll_i2mdexp(inode);
3095 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3096 * But under CMD case, it caused some lock issues, should be fixed
3097 * with new CMD ibits lock. See bug 12718 */
3098 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3099 struct lookup_intent oit = { .it_op = IT_GETATTR };
3100 struct md_op_data *op_data;
3102 if (ibits == MDS_INODELOCK_LOOKUP)
3103 oit.it_op = IT_LOOKUP;
3105 /* Call getattr by fid, so do not provide name at all. */
3106 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3107 dentry->d_inode, NULL, 0, 0,
3108 LUSTRE_OPC_ANY, NULL);
3109 if (IS_ERR(op_data))
3110 RETURN(PTR_ERR(op_data));
3112 oit.it_create_mode |= M_CHECK_STALE;
3113 rc = md_intent_lock(exp, op_data, NULL, 0,
3114 /* we are not interested in name
3117 ll_md_blocking_ast, 0);
3118 ll_finish_md_op_data(op_data);
3119 oit.it_create_mode &= ~M_CHECK_STALE;
3121 rc = ll_inode_revalidate_fini(inode, rc);
3125 rc = ll_revalidate_it_finish(req, &oit, dentry);
3127 ll_intent_release(&oit);
3131 /* Unlinked? Unhash dentry, so it is not picked up later by
3132 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3133 here to preserve get_cwd functionality on 2.6.
3135 if (!dentry->d_inode->i_nlink)
3136 d_lustre_invalidate(dentry, 0);
3138 ll_lookup_finish_locks(&oit, dentry);
3139 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3140 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3141 obd_valid valid = OBD_MD_FLGETATTR;
3142 struct md_op_data *op_data;
3145 if (S_ISREG(inode->i_mode)) {
3146 rc = ll_get_max_mdsize(sbi, &ealen);
3149 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3152 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3153 0, ealen, LUSTRE_OPC_ANY,
3155 if (IS_ERR(op_data))
3156 RETURN(PTR_ERR(op_data));
3158 op_data->op_valid = valid;
3159 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3160 * capa for this inode. Because we only keep capas of dirs
3162 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3163 ll_finish_md_op_data(op_data);
3165 rc = ll_inode_revalidate_fini(inode, rc);
3169 rc = ll_prep_inode(&inode, req, NULL, NULL);
3172 ptlrpc_req_finished(req);
3176 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3179 struct inode *inode = dentry->d_inode;
3183 rc = __ll_inode_revalidate_it(dentry, it, ibits);
3187 /* if object isn't regular file, don't validate size */
3188 if (!S_ISREG(inode->i_mode)) {
3189 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3190 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3191 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3193 /* In case of restore, the MDT has the right size and has
3194 * already send it back without granting the layout lock,
3195 * inode is up-to-date so glimpse is useless.
3196 * Also to glimpse we need the layout, in case of a running
3197 * restore the MDT holds the layout lock so the glimpse will
3198 * block up to the end of restore (getattr will block)
3200 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3201 rc = ll_glimpse_size(inode);
3206 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
3207 struct lookup_intent *it, struct kstat *stat)
3209 struct inode *inode = de->d_inode;
3210 struct ll_sb_info *sbi = ll_i2sbi(inode);
3211 struct ll_inode_info *lli = ll_i2info(inode);
3214 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3215 MDS_INODELOCK_LOOKUP);
3216 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3221 stat->dev = inode->i_sb->s_dev;
3222 if (ll_need_32bit_api(sbi))
3223 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3225 stat->ino = inode->i_ino;
3226 stat->mode = inode->i_mode;
3227 stat->nlink = inode->i_nlink;
3228 stat->uid = inode->i_uid;
3229 stat->gid = inode->i_gid;
3230 stat->rdev = inode->i_rdev;
3231 stat->atime = inode->i_atime;
3232 stat->mtime = inode->i_mtime;
3233 stat->ctime = inode->i_ctime;
3234 stat->blksize = 1 << inode->i_blkbits;
3236 stat->size = i_size_read(inode);
3237 stat->blocks = inode->i_blocks;
3241 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3243 struct lookup_intent it = { .it_op = IT_GETATTR };
3245 return ll_getattr_it(mnt, de, &it, stat);
3248 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3249 __u64 start, __u64 len)
3253 struct ll_user_fiemap *fiemap;
3254 unsigned int extent_count = fieinfo->fi_extents_max;
3256 num_bytes = sizeof(*fiemap) + (extent_count *
3257 sizeof(struct ll_fiemap_extent));
3258 OBD_ALLOC_LARGE(fiemap, num_bytes);
3263 fiemap->fm_flags = fieinfo->fi_flags;
3264 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3265 fiemap->fm_start = start;
3266 fiemap->fm_length = len;
3267 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3268 sizeof(struct ll_fiemap_extent));
3270 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3272 fieinfo->fi_flags = fiemap->fm_flags;
3273 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3274 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3275 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3277 OBD_FREE_LARGE(fiemap, num_bytes);
3281 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3283 struct ll_inode_info *lli = ll_i2info(inode);
3284 struct posix_acl *acl = NULL;
3287 spin_lock(&lli->lli_lock);
3288 /* VFS' acl_permission_check->check_acl will release the refcount */
3289 acl = posix_acl_dup(lli->lli_posix_acl);
3290 spin_unlock(&lli->lli_lock);
3295 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3297 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3298 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3300 ll_check_acl(struct inode *inode, int mask)
3303 # ifdef CONFIG_FS_POSIX_ACL
3304 struct posix_acl *acl;
3308 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3309 if (flags & IPERM_FLAG_RCU)
3312 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3317 rc = posix_acl_permission(inode, acl, mask);
3318 posix_acl_release(acl);
3321 # else /* !CONFIG_FS_POSIX_ACL */
3323 # endif /* CONFIG_FS_POSIX_ACL */
3325 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3327 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3328 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3330 # ifdef HAVE_INODE_PERMISION_2ARGS
3331 int ll_inode_permission(struct inode *inode, int mask)
3333 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3340 #ifdef MAY_NOT_BLOCK
3341 if (mask & MAY_NOT_BLOCK)
3343 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3344 if (flags & IPERM_FLAG_RCU)
3348 /* as root inode are NOT getting validated in lookup operation,
3349 * need to do it before permission check. */
3351 if (inode == inode->i_sb->s_root->d_inode) {
3352 struct lookup_intent it = { .it_op = IT_LOOKUP };
3354 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3355 MDS_INODELOCK_LOOKUP);
3360 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3361 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3363 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3364 return lustre_check_remote_perm(inode, mask);
3366 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3367 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3372 /* -o localflock - only provides locally consistent flock locks */
3373 struct file_operations ll_file_operations = {
3374 .read = ll_file_read,
3375 .aio_read = ll_file_aio_read,
3376 .write = ll_file_write,
3377 .aio_write = ll_file_aio_write,
3378 .unlocked_ioctl = ll_file_ioctl,
3379 .open = ll_file_open,
3380 .release = ll_file_release,
3381 .mmap = ll_file_mmap,
3382 .llseek = ll_file_seek,
3383 .splice_read = ll_file_splice_read,
3388 struct file_operations ll_file_operations_flock = {
3389 .read = ll_file_read,
3390 .aio_read = ll_file_aio_read,
3391 .write = ll_file_write,
3392 .aio_write = ll_file_aio_write,
3393 .unlocked_ioctl = ll_file_ioctl,
3394 .open = ll_file_open,
3395 .release = ll_file_release,
3396 .mmap = ll_file_mmap,
3397 .llseek = ll_file_seek,
3398 .splice_read = ll_file_splice_read,
3401 .flock = ll_file_flock,
3402 .lock = ll_file_flock
3405 /* These are for -o noflock - to return ENOSYS on flock calls */
3406 struct file_operations ll_file_operations_noflock = {
3407 .read = ll_file_read,
3408 .aio_read = ll_file_aio_read,
3409 .write = ll_file_write,
3410 .aio_write = ll_file_aio_write,
3411 .unlocked_ioctl = ll_file_ioctl,
3412 .open = ll_file_open,
3413 .release = ll_file_release,
3414 .mmap = ll_file_mmap,
3415 .llseek = ll_file_seek,
3416 .splice_read = ll_file_splice_read,
3419 .flock = ll_file_noflock,
3420 .lock = ll_file_noflock
3423 struct inode_operations ll_file_inode_operations = {
3424 .setattr = ll_setattr,
3425 .getattr = ll_getattr,
3426 .permission = ll_inode_permission,
3427 .setxattr = ll_setxattr,
3428 .getxattr = ll_getxattr,
3429 .listxattr = ll_listxattr,
3430 .removexattr = ll_removexattr,
3431 .fiemap = ll_fiemap,
3432 #ifdef HAVE_IOP_GET_ACL
3433 .get_acl = ll_get_acl,
3437 /* dynamic ioctl number support routins */
3438 static struct llioc_ctl_data {
3439 struct rw_semaphore ioc_sem;
3440 cfs_list_t ioc_head;
3442 __RWSEM_INITIALIZER(llioc.ioc_sem),
3443 CFS_LIST_HEAD_INIT(llioc.ioc_head)
3448 cfs_list_t iocd_list;
3449 unsigned int iocd_size;
3450 llioc_callback_t iocd_cb;
3451 unsigned int iocd_count;
3452 unsigned int iocd_cmd[0];
3455 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3458 struct llioc_data *in_data = NULL;
3461 if (cb == NULL || cmd == NULL ||
3462 count > LLIOC_MAX_CMD || count < 0)
3465 size = sizeof(*in_data) + count * sizeof(unsigned int);
3466 OBD_ALLOC(in_data, size);
3467 if (in_data == NULL)
3470 memset(in_data, 0, sizeof(*in_data));
3471 in_data->iocd_size = size;
3472 in_data->iocd_cb = cb;
3473 in_data->iocd_count = count;
3474 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3476 down_write(&llioc.ioc_sem);
3477 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3478 up_write(&llioc.ioc_sem);
3483 void ll_iocontrol_unregister(void *magic)
3485 struct llioc_data *tmp;
3490 down_write(&llioc.ioc_sem);
3491 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3493 unsigned int size = tmp->iocd_size;
3495 cfs_list_del(&tmp->iocd_list);
3496 up_write(&llioc.ioc_sem);
3498 OBD_FREE(tmp, size);
3502 up_write(&llioc.ioc_sem);
3504 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3507 EXPORT_SYMBOL(ll_iocontrol_register);
3508 EXPORT_SYMBOL(ll_iocontrol_unregister);
3510 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3511 unsigned int cmd, unsigned long arg, int *rcp)
3513 enum llioc_iter ret = LLIOC_CONT;
3514 struct llioc_data *data;
3515 int rc = -EINVAL, i;
3517 down_read(&llioc.ioc_sem);
3518 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3519 for (i = 0; i < data->iocd_count; i++) {
3520 if (cmd != data->iocd_cmd[i])
3523 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3527 if (ret == LLIOC_STOP)
3530 up_read(&llioc.ioc_sem);
3537 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3539 struct ll_inode_info *lli = ll_i2info(inode);
3540 struct cl_env_nest nest;
3545 if (lli->lli_clob == NULL)
3548 env = cl_env_nested_get(&nest);
3550 RETURN(PTR_ERR(env));
3552 result = cl_conf_set(env, lli->lli_clob, conf);
3553 cl_env_nested_put(&nest, env);
3555 if (conf->coc_opc == OBJECT_CONF_SET) {
3556 struct ldlm_lock *lock = conf->coc_lock;
3558 LASSERT(lock != NULL);
3559 LASSERT(ldlm_has_layout(lock));
3561 /* it can only be allowed to match after layout is
3562 * applied to inode otherwise false layout would be
3563 * seen. Applying layout shoud happen before dropping
3564 * the intent lock. */
3565 ldlm_lock_allow_match(lock);
3571 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3572 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3575 struct ll_sb_info *sbi = ll_i2sbi(inode);
3576 struct obd_capa *oc;
3577 struct ptlrpc_request *req;
3578 struct mdt_body *body;
3585 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3586 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3587 lock->l_lvb_data, lock->l_lvb_len);
3589 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3592 /* if layout lock was granted right away, the layout is returned
3593 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3594 * blocked and then granted via completion ast, we have to fetch
3595 * layout here. Please note that we can't use the LVB buffer in
3596 * completion AST because it doesn't have a large enough buffer */
3597 oc = ll_mdscapa_get(inode);
3598 rc = ll_get_max_mdsize(sbi, &lmmsize);
3600 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3601 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3607 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3608 if (body == NULL || body->eadatasize > lmmsize)
3609 GOTO(out, rc = -EPROTO);
3611 lmmsize = body->eadatasize;
3612 if (lmmsize == 0) /* empty layout */
3615 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3617 GOTO(out, rc = -EFAULT);
3619 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3620 if (lvbdata == NULL)
3621 GOTO(out, rc = -ENOMEM);
3623 memcpy(lvbdata, lmm, lmmsize);
3624 lock_res_and_lock(lock);
3625 if (lock->l_lvb_data != NULL)
3626 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3628 lock->l_lvb_data = lvbdata;
3629 lock->l_lvb_len = lmmsize;
3630 unlock_res_and_lock(lock);
3635 ptlrpc_req_finished(req);
3640 * Apply the layout to the inode. Layout lock is held and will be released
3643 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3644 struct inode *inode, __u32 *gen, bool reconf)
3646 struct ll_inode_info *lli = ll_i2info(inode);
3647 struct ll_sb_info *sbi = ll_i2sbi(inode);
3648 struct ldlm_lock *lock;
3649 struct lustre_md md = { NULL };
3650 struct cl_object_conf conf;
3653 bool wait_layout = false;
3656 LASSERT(lustre_handle_is_used(lockh));
3658 lock = ldlm_handle2lock(lockh);
3659 LASSERT(lock != NULL);
3660 LASSERT(ldlm_has_layout(lock));
3662 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d\n",
3663 PFID(&lli->lli_fid), inode, reconf);
3665 /* in case this is a caching lock and reinstate with new inode */
3666 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3668 lock_res_and_lock(lock);
3669 lvb_ready = ldlm_is_lvb_ready(lock);
3670 unlock_res_and_lock(lock);
3671 /* checking lvb_ready is racy but this is okay. The worst case is
3672 * that multi processes may configure the file on the same time. */
3674 if (lvb_ready || !reconf) {
3677 /* layout_gen must be valid if layout lock is not
3678 * cancelled and stripe has already set */
3679 *gen = lli->lli_layout_gen;
3685 rc = ll_layout_fetch(inode, lock);
3689 /* for layout lock, lmm is returned in lock's lvb.
3690 * lvb_data is immutable if the lock is held so it's safe to access it
3691 * without res lock. See the description in ldlm_lock_decref_internal()
3692 * for the condition to free lvb_data of layout lock */
3693 if (lock->l_lvb_data != NULL) {
3694 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3695 lock->l_lvb_data, lock->l_lvb_len);
3697 *gen = LL_LAYOUT_GEN_EMPTY;
3699 *gen = md.lsm->lsm_layout_gen;
3702 CERROR("%s: file "DFID" unpackmd error: %d\n",
3703 ll_get_fsname(inode->i_sb, NULL, 0),
3704 PFID(&lli->lli_fid), rc);
3710 /* set layout to file. Unlikely this will fail as old layout was
3711 * surely eliminated */
3712 memset(&conf, 0, sizeof conf);
3713 conf.coc_opc = OBJECT_CONF_SET;
3714 conf.coc_inode = inode;
3715 conf.coc_lock = lock;
3716 conf.u.coc_md = &md;
3717 rc = ll_layout_conf(inode, &conf);
3720 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3722 /* refresh layout failed, need to wait */
3723 wait_layout = rc == -EBUSY;
3727 LDLM_LOCK_PUT(lock);
3728 ldlm_lock_decref(lockh, mode);
3730 /* wait for IO to complete if it's still being used. */
3732 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3733 ll_get_fsname(inode->i_sb, NULL, 0),
3734 PFID(&lli->lli_fid), inode);
3736 memset(&conf, 0, sizeof conf);
3737 conf.coc_opc = OBJECT_CONF_WAIT;
3738 conf.coc_inode = inode;
3739 rc = ll_layout_conf(inode, &conf);
3743 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3744 ll_get_fsname(inode->i_sb, NULL, 0),
3745 PFID(&lli->lli_fid), rc);
3751 * This function checks if there exists a LAYOUT lock on the client side,
3752 * or enqueues it if it doesn't have one in cache.
3754 * This function will not hold layout lock so it may be revoked any time after
3755 * this function returns. Any operations depend on layout should be redone
3758 * This function should be called before lov_io_init() to get an uptodate
3759 * layout version, the caller should save the version number and after IO
3760 * is finished, this function should be called again to verify that layout
3761 * is not changed during IO time.
3763 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3765 struct ll_inode_info *lli = ll_i2info(inode);
3766 struct ll_sb_info *sbi = ll_i2sbi(inode);
3767 struct md_op_data *op_data;
3768 struct lookup_intent it;
3769 struct lustre_handle lockh;
3771 struct ldlm_enqueue_info einfo = {
3772 .ei_type = LDLM_IBITS,
3774 .ei_cb_bl = ll_md_blocking_ast,
3775 .ei_cb_cp = ldlm_completion_ast,
3780 *gen = lli->lli_layout_gen;
3781 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3785 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3786 LASSERT(S_ISREG(inode->i_mode));
3788 /* mostly layout lock is caching on the local side, so try to match
3789 * it before grabbing layout lock mutex. */
3790 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3791 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3792 if (mode != 0) { /* hit cached lock */
3793 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3797 /* better hold lli_layout_mutex to try again otherwise
3798 * it will have starvation problem. */
3801 /* take layout lock mutex to enqueue layout lock exclusively. */
3802 mutex_lock(&lli->lli_layout_mutex);
3805 /* try again. Maybe somebody else has done this. */
3806 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3807 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3808 if (mode != 0) { /* hit cached lock */
3809 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3813 mutex_unlock(&lli->lli_layout_mutex);
3817 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3818 0, 0, LUSTRE_OPC_ANY, NULL);
3819 if (IS_ERR(op_data)) {
3820 mutex_unlock(&lli->lli_layout_mutex);
3821 RETURN(PTR_ERR(op_data));
3824 /* have to enqueue one */
3825 memset(&it, 0, sizeof(it));
3826 it.it_op = IT_LAYOUT;
3827 lockh.cookie = 0ULL;
3829 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)\n",
3830 ll_get_fsname(inode->i_sb, NULL, 0),
3831 PFID(&lli->lli_fid), inode);
3833 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3835 if (it.d.lustre.it_data != NULL)
3836 ptlrpc_req_finished(it.d.lustre.it_data);
3837 it.d.lustre.it_data = NULL;
3839 ll_finish_md_op_data(op_data);
3841 mode = it.d.lustre.it_lock_mode;
3842 it.d.lustre.it_lock_mode = 0;
3843 ll_intent_drop_lock(&it);
3846 /* set lock data in case this is a new lock */
3847 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3848 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3852 mutex_unlock(&lli->lli_layout_mutex);
3858 * This function send a restore request to the MDT
3860 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3862 struct hsm_user_request *hur;
3866 len = sizeof(struct hsm_user_request) +
3867 sizeof(struct hsm_user_item);
3868 OBD_ALLOC(hur, len);
3872 hur->hur_request.hr_action = HUA_RESTORE;
3873 hur->hur_request.hr_archive_id = 0;
3874 hur->hur_request.hr_flags = 0;
3875 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3876 sizeof(hur->hur_user_item[0].hui_fid));
3877 hur->hur_user_item[0].hui_extent.offset = offset;
3878 hur->hur_user_item[0].hui_extent.length = length;
3879 hur->hur_request.hr_itemcount = 1;
3880 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,