4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
61 fd->fd_write_failed = false;
66 static void ll_file_data_put(struct ll_file_data *fd)
69 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
72 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
73 struct lustre_handle *fh)
75 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
76 op_data->op_attr.ia_mode = inode->i_mode;
77 op_data->op_attr.ia_atime = inode->i_atime;
78 op_data->op_attr.ia_mtime = inode->i_mtime;
79 op_data->op_attr.ia_ctime = inode->i_ctime;
80 op_data->op_attr.ia_size = i_size_read(inode);
81 op_data->op_attr_blocks = inode->i_blocks;
82 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
83 ll_inode_to_ext_flags(inode->i_flags);
84 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
86 op_data->op_handle = *fh;
87 op_data->op_capa1 = ll_mdscapa_get(inode);
89 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
90 op_data->op_bias |= MDS_DATA_MODIFIED;
94 * Closes the IO epoch and packs all the attributes into @op_data for
97 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
98 struct obd_client_handle *och)
102 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
103 ATTR_MTIME | ATTR_MTIME_SET |
104 ATTR_CTIME | ATTR_CTIME_SET;
106 if (!(och->och_flags & FMODE_WRITE))
109 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
110 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
112 ll_ioepoch_close(inode, op_data, &och, 0);
115 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
116 ll_prep_md_op_data(op_data, inode, NULL, NULL,
117 0, 0, LUSTRE_OPC_ANY, NULL);
121 static int ll_close_inode_openhandle(struct obd_export *md_exp,
123 struct obd_client_handle *och,
124 const __u64 *data_version)
126 struct obd_export *exp = ll_i2mdexp(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
129 struct obd_device *obd = class_exp2obd(exp);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle "LPX64"\n",
140 ll_i2mdexp(inode)->exp_handle.h_cookie);
144 OBD_ALLOC_PTR(op_data);
146 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
148 ll_prepare_close(inode, op_data, och);
149 if (data_version != NULL) {
150 /* Pass in data_version implies release. */
151 op_data->op_bias |= MDS_HSM_RELEASE;
152 op_data->op_data_version = *data_version;
153 op_data->op_lease_handle = och->och_lease_handle;
154 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
156 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
157 rc = md_close(md_exp, op_data, och->och_mod, &req);
159 /* This close must have the epoch closed. */
160 LASSERT(epoch_close);
161 /* MDS has instructed us to obtain Size-on-MDS attribute from
162 * OSTs and send setattr to back to MDS. */
163 rc = ll_som_update(inode, op_data);
165 CERROR("%s: inode "DFID" mdc Size-on-MDS update"
166 " failed: rc = %d\n",
167 ll_i2mdexp(inode)->exp_obd->obd_name,
168 PFID(ll_inode2fid(inode)), rc);
172 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
173 ll_i2mdexp(inode)->exp_obd->obd_name,
174 PFID(ll_inode2fid(inode)), rc);
177 /* DATA_MODIFIED flag was successfully sent on close, cancel data
178 * modification flag. */
179 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
180 struct ll_inode_info *lli = ll_i2info(inode);
182 spin_lock(&lli->lli_lock);
183 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
184 spin_unlock(&lli->lli_lock);
188 rc = ll_objects_destroy(req, inode);
190 CERROR("%s: inode "DFID
191 " ll_objects destroy: rc = %d\n",
192 ll_i2mdexp(inode)->exp_obd->obd_name,
193 PFID(ll_inode2fid(inode)), rc);
196 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
197 struct mdt_body *body;
198 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
199 if (!(body->valid & OBD_MD_FLRELEASED))
203 ll_finish_md_op_data(op_data);
207 if (exp_connect_som(exp) && !epoch_close &&
208 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
209 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
211 md_clear_open_replay_data(md_exp, och);
212 /* Free @och if it is not waiting for DONE_WRITING. */
213 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
216 if (req) /* This is close request */
217 ptlrpc_req_finished(req);
221 int ll_md_real_close(struct inode *inode, int flags)
223 struct ll_inode_info *lli = ll_i2info(inode);
224 struct obd_client_handle **och_p;
225 struct obd_client_handle *och;
230 if (flags & FMODE_WRITE) {
231 och_p = &lli->lli_mds_write_och;
232 och_usecount = &lli->lli_open_fd_write_count;
233 } else if (flags & FMODE_EXEC) {
234 och_p = &lli->lli_mds_exec_och;
235 och_usecount = &lli->lli_open_fd_exec_count;
237 LASSERT(flags & FMODE_READ);
238 och_p = &lli->lli_mds_read_och;
239 och_usecount = &lli->lli_open_fd_read_count;
242 mutex_lock(&lli->lli_och_mutex);
243 if (*och_usecount) { /* There are still users of this handle, so
245 mutex_unlock(&lli->lli_och_mutex);
250 mutex_unlock(&lli->lli_och_mutex);
252 if (och) { /* There might be a race and somebody have freed this och
254 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
261 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
264 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
265 struct ll_inode_info *lli = ll_i2info(inode);
269 /* clear group lock, if present */
270 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
271 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
273 if (fd->fd_lease_och != NULL) {
276 /* Usually the lease is not released when the
277 * application crashed, we need to release here. */
278 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
279 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
280 PFID(&lli->lli_fid), rc, lease_broken);
282 fd->fd_lease_och = NULL;
285 if (fd->fd_och != NULL) {
286 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
291 /* Let's see if we have good enough OPEN lock on the file and if
292 we can skip talking to MDS */
293 if (file->f_dentry->d_inode) { /* Can this ever be false? */
295 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
296 struct lustre_handle lockh;
297 struct inode *inode = file->f_dentry->d_inode;
298 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
300 mutex_lock(&lli->lli_och_mutex);
301 if (fd->fd_omode & FMODE_WRITE) {
303 LASSERT(lli->lli_open_fd_write_count);
304 lli->lli_open_fd_write_count--;
305 } else if (fd->fd_omode & FMODE_EXEC) {
307 LASSERT(lli->lli_open_fd_exec_count);
308 lli->lli_open_fd_exec_count--;
311 LASSERT(lli->lli_open_fd_read_count);
312 lli->lli_open_fd_read_count--;
314 mutex_unlock(&lli->lli_och_mutex);
316 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
317 LDLM_IBITS, &policy, lockmode,
319 rc = ll_md_real_close(file->f_dentry->d_inode,
323 CERROR("Releasing a file %p with negative dentry %p. Name %s",
324 file, file->f_dentry, file->f_dentry->d_name.name);
328 LUSTRE_FPRIVATE(file) = NULL;
329 ll_file_data_put(fd);
330 ll_capa_close(inode);
335 /* While this returns an error code, fput() the caller does not, so we need
336 * to make every effort to clean up all of our state here. Also, applications
337 * rarely check close errors and even if an error is returned they will not
338 * re-try the close call.
340 int ll_file_release(struct inode *inode, struct file *file)
342 struct ll_file_data *fd;
343 struct ll_sb_info *sbi = ll_i2sbi(inode);
344 struct ll_inode_info *lli = ll_i2info(inode);
348 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
349 PFID(ll_inode2fid(inode)), inode);
351 #ifdef CONFIG_FS_POSIX_ACL
352 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
353 inode == inode->i_sb->s_root->d_inode) {
354 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
357 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
358 fd->fd_flags &= ~LL_FILE_RMTACL;
359 rct_del(&sbi->ll_rct, current_pid());
360 et_search_free(&sbi->ll_et, current_pid());
365 if (inode->i_sb->s_root != file->f_dentry)
366 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
367 fd = LUSTRE_FPRIVATE(file);
370 /* The last ref on @file, maybe not the the owner pid of statahead.
371 * Different processes can open the same dir, "ll_opendir_key" means:
372 * it is me that should stop the statahead thread. */
373 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
374 lli->lli_opendir_pid != 0)
375 ll_stop_statahead(inode, lli->lli_opendir_key);
377 if (inode->i_sb->s_root == file->f_dentry) {
378 LUSTRE_FPRIVATE(file) = NULL;
379 ll_file_data_put(fd);
383 if (!S_ISDIR(inode->i_mode)) {
384 lov_read_and_clear_async_rc(lli->lli_clob);
385 lli->lli_async_rc = 0;
388 rc = ll_md_close(sbi->ll_md_exp, inode, file);
390 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
391 libcfs_debug_dumplog();
396 static int ll_intent_file_open(struct file *file, void *lmm,
397 int lmmsize, struct lookup_intent *itp)
399 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
400 struct dentry *parent = file->f_dentry->d_parent;
401 const char *name = file->f_dentry->d_name.name;
402 const int len = file->f_dentry->d_name.len;
403 struct md_op_data *op_data;
404 struct ptlrpc_request *req;
405 __u32 opc = LUSTRE_OPC_ANY;
412 /* Usually we come here only for NFSD, and we want open lock.
413 But we can also get here with pre 2.6.15 patchless kernels, and in
414 that case that lock is also ok */
415 /* We can also get here if there was cached open handle in revalidate_it
416 * but it disappeared while we were getting from there to ll_file_open.
417 * But this means this file was closed and immediatelly opened which
418 * makes a good candidate for using OPEN lock */
419 /* If lmmsize & lmm are not 0, we are just setting stripe info
420 * parameters. No need for the open lock */
421 if (lmm == NULL && lmmsize == 0) {
422 itp->it_flags |= MDS_OPEN_LOCK;
423 if (itp->it_flags & FMODE_WRITE)
424 opc = LUSTRE_OPC_CREATE;
427 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
428 file->f_dentry->d_inode, name, len,
431 RETURN(PTR_ERR(op_data));
433 itp->it_flags |= MDS_OPEN_BY_FID;
434 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
435 0 /*unused */, &req, ll_md_blocking_ast, 0);
436 ll_finish_md_op_data(op_data);
438 /* reason for keep own exit path - don`t flood log
439 * with messages with -ESTALE errors.
441 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
442 it_open_error(DISP_OPEN_OPEN, itp))
444 ll_release_openhandle(file->f_dentry, itp);
448 if (it_disposition(itp, DISP_LOOKUP_NEG))
449 GOTO(out, rc = -ENOENT);
451 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
452 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
453 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
457 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
458 if (!rc && itp->d.lustre.it_lock_mode)
459 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
463 ptlrpc_req_finished(req);
464 ll_intent_drop_lock(itp);
470 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
471 * not believe attributes if a few ioepoch holders exist. Attributes for
472 * previous ioepoch if new one is opened are also skipped by MDS.
474 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
476 if (ioepoch && lli->lli_ioepoch != ioepoch) {
477 lli->lli_ioepoch = ioepoch;
478 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
479 ioepoch, PFID(&lli->lli_fid));
483 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
484 struct obd_client_handle *och)
486 struct ptlrpc_request *req = it->d.lustre.it_data;
487 struct mdt_body *body;
489 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
490 och->och_fh = body->handle;
491 och->och_fid = body->fid1;
492 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
493 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
494 och->och_flags = it->it_flags;
496 return md_set_open_replay_data(md_exp, och, it);
499 int ll_local_open(struct file *file, struct lookup_intent *it,
500 struct ll_file_data *fd, struct obd_client_handle *och)
502 struct inode *inode = file->f_dentry->d_inode;
503 struct ll_inode_info *lli = ll_i2info(inode);
506 LASSERT(!LUSTRE_FPRIVATE(file));
511 struct ptlrpc_request *req = it->d.lustre.it_data;
512 struct mdt_body *body;
515 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
519 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
520 ll_ioepoch_open(lli, body->ioepoch);
523 LUSTRE_FPRIVATE(file) = fd;
524 ll_readahead_init(inode, &fd->fd_ras);
525 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
530 /* Open a file, and (for the very first open) create objects on the OSTs at
531 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
532 * creation or open until ll_lov_setstripe() ioctl is called.
534 * If we already have the stripe MD locally then we don't request it in
535 * md_open(), by passing a lmm_size = 0.
537 * It is up to the application to ensure no other processes open this file
538 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
539 * used. We might be able to avoid races of that sort by getting lli_open_sem
540 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
541 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
543 int ll_file_open(struct inode *inode, struct file *file)
545 struct ll_inode_info *lli = ll_i2info(inode);
546 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
547 .it_flags = file->f_flags };
548 struct obd_client_handle **och_p = NULL;
549 __u64 *och_usecount = NULL;
550 struct ll_file_data *fd;
551 int rc = 0, opendir_set = 0;
554 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
555 PFID(ll_inode2fid(inode)), inode, file->f_flags);
557 it = file->private_data; /* XXX: compat macro */
558 file->private_data = NULL; /* prevent ll_local_open assertion */
560 fd = ll_file_data_get();
562 GOTO(out_openerr, rc = -ENOMEM);
565 if (S_ISDIR(inode->i_mode)) {
566 spin_lock(&lli->lli_sa_lock);
567 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
568 lli->lli_opendir_pid == 0) {
569 lli->lli_opendir_key = fd;
570 lli->lli_opendir_pid = current_pid();
573 spin_unlock(&lli->lli_sa_lock);
576 if (inode->i_sb->s_root == file->f_dentry) {
577 LUSTRE_FPRIVATE(file) = fd;
581 if (!it || !it->d.lustre.it_disposition) {
582 /* Convert f_flags into access mode. We cannot use file->f_mode,
583 * because everything but O_ACCMODE mask was stripped from
585 if ((oit.it_flags + 1) & O_ACCMODE)
587 if (file->f_flags & O_TRUNC)
588 oit.it_flags |= FMODE_WRITE;
590 /* kernel only call f_op->open in dentry_open. filp_open calls
591 * dentry_open after call to open_namei that checks permissions.
592 * Only nfsd_open call dentry_open directly without checking
593 * permissions and because of that this code below is safe. */
594 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
595 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
597 /* We do not want O_EXCL here, presumably we opened the file
598 * already? XXX - NFS implications? */
599 oit.it_flags &= ~O_EXCL;
601 /* bug20584, if "it_flags" contains O_CREAT, the file will be
602 * created if necessary, then "IT_CREAT" should be set to keep
603 * consistent with it */
604 if (oit.it_flags & O_CREAT)
605 oit.it_op |= IT_CREAT;
611 /* Let's see if we have file open on MDS already. */
612 if (it->it_flags & FMODE_WRITE) {
613 och_p = &lli->lli_mds_write_och;
614 och_usecount = &lli->lli_open_fd_write_count;
615 } else if (it->it_flags & FMODE_EXEC) {
616 och_p = &lli->lli_mds_exec_och;
617 och_usecount = &lli->lli_open_fd_exec_count;
619 och_p = &lli->lli_mds_read_och;
620 och_usecount = &lli->lli_open_fd_read_count;
623 mutex_lock(&lli->lli_och_mutex);
624 if (*och_p) { /* Open handle is present */
625 if (it_disposition(it, DISP_OPEN_OPEN)) {
626 /* Well, there's extra open request that we do not need,
627 let's close it somehow. This will decref request. */
628 rc = it_open_error(DISP_OPEN_OPEN, it);
630 mutex_unlock(&lli->lli_och_mutex);
631 GOTO(out_openerr, rc);
634 ll_release_openhandle(file->f_dentry, it);
638 rc = ll_local_open(file, it, fd, NULL);
641 mutex_unlock(&lli->lli_och_mutex);
642 GOTO(out_openerr, rc);
645 LASSERT(*och_usecount == 0);
646 if (!it->d.lustre.it_disposition) {
647 /* We cannot just request lock handle now, new ELC code
648 means that one of other OPEN locks for this file
649 could be cancelled, and since blocking ast handler
650 would attempt to grab och_mutex as well, that would
651 result in a deadlock */
652 mutex_unlock(&lli->lli_och_mutex);
653 it->it_create_mode |= M_CHECK_STALE;
654 rc = ll_intent_file_open(file, NULL, 0, it);
655 it->it_create_mode &= ~M_CHECK_STALE;
657 GOTO(out_openerr, rc);
661 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
663 GOTO(out_och_free, rc = -ENOMEM);
667 /* md_intent_lock() didn't get a request ref if there was an
668 * open error, so don't do cleanup on the request here
670 /* XXX (green): Should not we bail out on any error here, not
671 * just open error? */
672 rc = it_open_error(DISP_OPEN_OPEN, it);
674 GOTO(out_och_free, rc);
676 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
678 rc = ll_local_open(file, it, fd, *och_p);
680 GOTO(out_och_free, rc);
682 mutex_unlock(&lli->lli_och_mutex);
685 /* Must do this outside lli_och_mutex lock to prevent deadlock where
686 different kind of OPEN lock for this same inode gets cancelled
687 by ldlm_cancel_lru */
688 if (!S_ISREG(inode->i_mode))
689 GOTO(out_och_free, rc);
693 if (!lli->lli_has_smd) {
694 if (file->f_flags & O_LOV_DELAY_CREATE ||
695 !(file->f_mode & FMODE_WRITE)) {
696 CDEBUG(D_INODE, "object creation was delayed\n");
697 GOTO(out_och_free, rc);
700 file->f_flags &= ~O_LOV_DELAY_CREATE;
701 GOTO(out_och_free, rc);
705 if (och_p && *och_p) {
706 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
707 *och_p = NULL; /* OBD_FREE writes some magic there */
710 mutex_unlock(&lli->lli_och_mutex);
713 if (opendir_set != 0)
714 ll_stop_statahead(inode, lli->lli_opendir_key);
716 ll_file_data_put(fd);
718 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
721 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
722 ptlrpc_req_finished(it->d.lustre.it_data);
723 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
729 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
730 struct ldlm_lock_desc *desc, void *data, int flag)
733 struct lustre_handle lockh;
737 case LDLM_CB_BLOCKING:
738 ldlm_lock2handle(lock, &lockh);
739 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
741 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
745 case LDLM_CB_CANCELING:
753 * Acquire a lease and open the file.
755 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
756 fmode_t fmode, __u64 open_flags)
758 struct lookup_intent it = { .it_op = IT_OPEN };
759 struct ll_sb_info *sbi = ll_i2sbi(inode);
760 struct md_op_data *op_data;
761 struct ptlrpc_request *req;
762 struct lustre_handle old_handle = { 0 };
763 struct obd_client_handle *och = NULL;
768 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
769 RETURN(ERR_PTR(-EINVAL));
772 struct ll_inode_info *lli = ll_i2info(inode);
773 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
774 struct obd_client_handle **och_p;
777 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
778 RETURN(ERR_PTR(-EPERM));
780 /* Get the openhandle of the file */
782 mutex_lock(&lli->lli_och_mutex);
783 if (fd->fd_lease_och != NULL) {
784 mutex_unlock(&lli->lli_och_mutex);
788 if (fd->fd_och == NULL) {
789 if (file->f_mode & FMODE_WRITE) {
790 LASSERT(lli->lli_mds_write_och != NULL);
791 och_p = &lli->lli_mds_write_och;
792 och_usecount = &lli->lli_open_fd_write_count;
794 LASSERT(lli->lli_mds_read_och != NULL);
795 och_p = &lli->lli_mds_read_och;
796 och_usecount = &lli->lli_open_fd_read_count;
798 if (*och_usecount == 1) {
805 mutex_unlock(&lli->lli_och_mutex);
806 if (rc < 0) /* more than 1 opener */
809 LASSERT(fd->fd_och != NULL);
810 old_handle = fd->fd_och->och_fh;
815 RETURN(ERR_PTR(-ENOMEM));
817 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
818 LUSTRE_OPC_ANY, NULL);
820 GOTO(out, rc = PTR_ERR(op_data));
822 /* To tell the MDT this openhandle is from the same owner */
823 op_data->op_handle = old_handle;
825 it.it_flags = fmode | open_flags;
826 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
827 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
828 ll_md_blocking_lease_ast,
829 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
830 * it can be cancelled which may mislead applications that the lease is
832 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
833 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
834 * doesn't deal with openhandle, so normal openhandle will be leaked. */
835 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
836 ll_finish_md_op_data(op_data);
837 ptlrpc_req_finished(req);
839 GOTO(out_release_it, rc);
841 if (it_disposition(&it, DISP_LOOKUP_NEG))
842 GOTO(out_release_it, rc = -ENOENT);
844 rc = it_open_error(DISP_OPEN_OPEN, &it);
846 GOTO(out_release_it, rc);
848 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
849 ll_och_fill(sbi->ll_md_exp, &it, och);
851 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
852 GOTO(out_close, rc = -EOPNOTSUPP);
854 /* already get lease, handle lease lock */
855 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
856 if (it.d.lustre.it_lock_mode == 0 ||
857 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
858 /* open lock must return for lease */
859 CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
860 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
861 it.d.lustre.it_lock_bits);
862 GOTO(out_close, rc = -EPROTO);
865 ll_intent_release(&it);
869 /* Cancel open lock */
870 if (it.d.lustre.it_lock_mode != 0) {
871 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
872 it.d.lustre.it_lock_mode);
873 it.d.lustre.it_lock_mode = 0;
874 och->och_lease_handle.cookie = 0ULL;
876 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
878 CERROR("%s: error closing file "DFID": %d\n",
879 ll_get_fsname(inode->i_sb, NULL, 0),
880 PFID(&ll_i2info(inode)->lli_fid), rc2);
881 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
883 ll_intent_release(&it);
889 EXPORT_SYMBOL(ll_lease_open);
892 * Release lease and close the file.
893 * It will check if the lease has ever broken.
895 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
898 struct ldlm_lock *lock;
899 bool cancelled = true;
903 lock = ldlm_handle2lock(&och->och_lease_handle);
905 lock_res_and_lock(lock);
906 cancelled = ldlm_is_cancel(lock);
907 unlock_res_and_lock(lock);
911 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
912 PFID(&ll_i2info(inode)->lli_fid), cancelled);
915 ldlm_cli_cancel(&och->och_lease_handle, 0);
916 if (lease_broken != NULL)
917 *lease_broken = cancelled;
919 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
923 EXPORT_SYMBOL(ll_lease_close);
925 /* Fills the obdo with the attributes for the lsm */
926 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
927 struct obd_capa *capa, struct obdo *obdo,
928 __u64 ioepoch, int dv_flags)
930 struct ptlrpc_request_set *set;
931 struct obd_info oinfo = { { { 0 } } };
936 LASSERT(lsm != NULL);
940 oinfo.oi_oa->o_oi = lsm->lsm_oi;
941 oinfo.oi_oa->o_mode = S_IFREG;
942 oinfo.oi_oa->o_ioepoch = ioepoch;
943 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
944 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
945 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
946 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
947 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
948 OBD_MD_FLDATAVERSION;
949 oinfo.oi_capa = capa;
950 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
951 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
952 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
953 if (dv_flags & LL_DV_WR_FLUSH)
954 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
957 set = ptlrpc_prep_set();
959 CERROR("can't allocate ptlrpc set\n");
962 rc = obd_getattr_async(exp, &oinfo, set);
964 rc = ptlrpc_set_wait(set);
965 ptlrpc_set_destroy(set);
968 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
969 OBD_MD_FLATIME | OBD_MD_FLMTIME |
970 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
971 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
972 if (dv_flags & LL_DV_WR_FLUSH &&
973 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
974 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
981 * Performs the getattr on the inode and updates its fields.
982 * If @sync != 0, perform the getattr under the server-side lock.
984 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
985 __u64 ioepoch, int sync)
987 struct obd_capa *capa = ll_mdscapa_get(inode);
988 struct lov_stripe_md *lsm;
992 lsm = ccc_inode_lsm_get(inode);
993 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
994 capa, obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
997 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
999 obdo_refresh_inode(inode, obdo, obdo->o_valid);
1000 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
1001 " blksize %lu\n", POSTID(oi), i_size_read(inode),
1002 (unsigned long long)inode->i_blocks,
1003 (unsigned long)ll_inode_blksize(inode));
1005 ccc_inode_lsm_put(inode, lsm);
1009 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
1011 struct ll_inode_info *lli = ll_i2info(inode);
1012 struct cl_object *obj = lli->lli_clob;
1013 struct cl_attr *attr = ccc_env_thread_attr(env);
1019 ll_inode_size_lock(inode);
1020 /* merge timestamps the most recently obtained from mds with
1021 timestamps obtained from osts */
1022 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1023 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1024 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1025 inode_init_lvb(inode, &lvb);
1027 cl_object_attr_lock(obj);
1028 rc = cl_object_attr_get(env, obj, attr);
1029 cl_object_attr_unlock(obj);
1032 if (lvb.lvb_atime < attr->cat_atime)
1033 lvb.lvb_atime = attr->cat_atime;
1034 if (lvb.lvb_ctime < attr->cat_ctime)
1035 lvb.lvb_ctime = attr->cat_ctime;
1036 if (lvb.lvb_mtime < attr->cat_mtime)
1037 lvb.lvb_mtime = attr->cat_mtime;
1039 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1040 PFID(&lli->lli_fid), attr->cat_size);
1041 cl_isize_write_nolock(inode, attr->cat_size);
1043 inode->i_blocks = attr->cat_blocks;
1045 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1046 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1047 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1049 ll_inode_size_unlock(inode);
1054 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1057 struct obdo obdo = { 0 };
1060 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1062 st->st_size = obdo.o_size;
1063 st->st_blocks = obdo.o_blocks;
1064 st->st_mtime = obdo.o_mtime;
1065 st->st_atime = obdo.o_atime;
1066 st->st_ctime = obdo.o_ctime;
1071 static bool file_is_noatime(const struct file *file)
1073 const struct vfsmount *mnt = file->f_path.mnt;
1074 const struct inode *inode = file->f_path.dentry->d_inode;
1076 /* Adapted from file_accessed() and touch_atime().*/
1077 if (file->f_flags & O_NOATIME)
1080 if (inode->i_flags & S_NOATIME)
1083 if (IS_NOATIME(inode))
1086 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1089 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1092 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1098 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1100 struct inode *inode = file->f_dentry->d_inode;
1102 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1104 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1105 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1106 file->f_flags & O_DIRECT ||
1109 io->ci_obj = ll_i2info(inode)->lli_clob;
1110 io->ci_lockreq = CILR_MAYBE;
1111 if (ll_file_nolock(file)) {
1112 io->ci_lockreq = CILR_NEVER;
1113 io->ci_no_srvlock = 1;
1114 } else if (file->f_flags & O_APPEND) {
1115 io->ci_lockreq = CILR_MANDATORY;
1118 io->ci_noatime = file_is_noatime(file);
1122 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1123 struct file *file, enum cl_io_type iot,
1124 loff_t *ppos, size_t count)
1126 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1127 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1133 io = ccc_env_thread_io(env);
1134 ll_io_init(io, file, iot == CIT_WRITE);
1136 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1137 struct vvp_io *vio = vvp_env_io(env);
1138 struct ccc_io *cio = ccc_env_io(env);
1139 int write_mutex_locked = 0;
1141 cio->cui_fd = LUSTRE_FPRIVATE(file);
1142 vio->cui_io_subtype = args->via_io_subtype;
1144 switch (vio->cui_io_subtype) {
1146 cio->cui_iov = args->u.normal.via_iov;
1147 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1148 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1149 cio->cui_iocb = args->u.normal.via_iocb;
1150 if ((iot == CIT_WRITE) &&
1151 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1152 if (mutex_lock_interruptible(&lli->
1154 GOTO(out, result = -ERESTARTSYS);
1155 write_mutex_locked = 1;
1156 } else if (iot == CIT_READ) {
1157 down_read(&lli->lli_trunc_sem);
1161 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1162 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1165 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1166 vio->u.splice.cui_flags = args->u.splice.via_flags;
1169 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1172 result = cl_io_loop(env, io);
1173 if (write_mutex_locked)
1174 mutex_unlock(&lli->lli_write_mutex);
1175 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
1176 up_read(&lli->lli_trunc_sem);
1178 /* cl_io_rw_init() handled IO */
1179 result = io->ci_result;
1182 if (io->ci_nob > 0) {
1183 result = io->ci_nob;
1184 *ppos = io->u.ci_wr.wr.crw_pos;
1188 cl_io_fini(env, io);
1189 /* If any bit been read/written (result != 0), we just return
1190 * short read/write instead of restart io. */
1191 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1192 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1193 iot == CIT_READ ? "read" : "write",
1194 file->f_dentry->d_name.name, *ppos, count);
1195 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1199 if (iot == CIT_READ) {
1201 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1202 LPROC_LL_READ_BYTES, result);
1203 } else if (iot == CIT_WRITE) {
1205 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1206 LPROC_LL_WRITE_BYTES, result);
1207 fd->fd_write_failed = false;
1208 } else if (result != -ERESTARTSYS) {
1209 fd->fd_write_failed = true;
1218 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1220 static int ll_file_get_iov_count(const struct iovec *iov,
1221 unsigned long *nr_segs, size_t *count)
1226 for (seg = 0; seg < *nr_segs; seg++) {
1227 const struct iovec *iv = &iov[seg];
1230 * If any segment has a negative length, or the cumulative
1231 * length ever wraps negative then return -EINVAL.
1234 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1236 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1241 cnt -= iv->iov_len; /* This segment is no good */
1248 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1249 unsigned long nr_segs, loff_t pos)
1252 struct vvp_io_args *args;
1258 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1262 env = cl_env_get(&refcheck);
1264 RETURN(PTR_ERR(env));
1266 args = vvp_env_args(env, IO_NORMAL);
1267 args->u.normal.via_iov = (struct iovec *)iov;
1268 args->u.normal.via_nrsegs = nr_segs;
1269 args->u.normal.via_iocb = iocb;
1271 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1272 &iocb->ki_pos, count);
1273 cl_env_put(env, &refcheck);
1277 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1281 struct iovec *local_iov;
1282 struct kiocb *kiocb;
1287 env = cl_env_get(&refcheck);
1289 RETURN(PTR_ERR(env));
1291 local_iov = &vvp_env_info(env)->vti_local_iov;
1292 kiocb = &vvp_env_info(env)->vti_kiocb;
1293 local_iov->iov_base = (void __user *)buf;
1294 local_iov->iov_len = count;
1295 init_sync_kiocb(kiocb, file);
1296 kiocb->ki_pos = *ppos;
1297 kiocb->ki_left = count;
1299 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1300 *ppos = kiocb->ki_pos;
1302 cl_env_put(env, &refcheck);
1307 * Write to a file (through the page cache).
1310 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1311 unsigned long nr_segs, loff_t pos)
1314 struct vvp_io_args *args;
1320 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1324 env = cl_env_get(&refcheck);
1326 RETURN(PTR_ERR(env));
1328 args = vvp_env_args(env, IO_NORMAL);
1329 args->u.normal.via_iov = (struct iovec *)iov;
1330 args->u.normal.via_nrsegs = nr_segs;
1331 args->u.normal.via_iocb = iocb;
1333 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1334 &iocb->ki_pos, count);
1335 cl_env_put(env, &refcheck);
1339 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1343 struct iovec *local_iov;
1344 struct kiocb *kiocb;
1349 env = cl_env_get(&refcheck);
1351 RETURN(PTR_ERR(env));
1353 local_iov = &vvp_env_info(env)->vti_local_iov;
1354 kiocb = &vvp_env_info(env)->vti_kiocb;
1355 local_iov->iov_base = (void __user *)buf;
1356 local_iov->iov_len = count;
1357 init_sync_kiocb(kiocb, file);
1358 kiocb->ki_pos = *ppos;
1359 kiocb->ki_left = count;
1361 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1362 *ppos = kiocb->ki_pos;
1364 cl_env_put(env, &refcheck);
1369 * Send file content (through pagecache) somewhere with helper
1371 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1372 struct pipe_inode_info *pipe, size_t count,
1376 struct vvp_io_args *args;
1381 env = cl_env_get(&refcheck);
1383 RETURN(PTR_ERR(env));
1385 args = vvp_env_args(env, IO_SPLICE);
1386 args->u.splice.via_pipe = pipe;
1387 args->u.splice.via_flags = flags;
1389 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1390 cl_env_put(env, &refcheck);
1394 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1397 struct obd_export *exp = ll_i2dtexp(inode);
1398 struct obd_trans_info oti = { 0 };
1399 struct obdo *oa = NULL;
1402 struct lov_stripe_md *lsm = NULL, *lsm2;
1409 lsm = ccc_inode_lsm_get(inode);
1410 if (!lsm_has_objects(lsm))
1411 GOTO(out, rc = -ENOENT);
1413 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1414 (lsm->lsm_stripe_count));
1416 OBD_ALLOC_LARGE(lsm2, lsm_size);
1418 GOTO(out, rc = -ENOMEM);
1421 oa->o_nlink = ost_idx;
1422 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1423 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1424 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1425 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1426 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1427 memcpy(lsm2, lsm, lsm_size);
1428 ll_inode_size_lock(inode);
1429 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1430 ll_inode_size_unlock(inode);
1432 OBD_FREE_LARGE(lsm2, lsm_size);
1435 ccc_inode_lsm_put(inode, lsm);
1440 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1442 struct ll_recreate_obj ucreat;
1446 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1449 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1453 ostid_set_seq_mdt0(&oi);
1454 ostid_set_id(&oi, ucreat.lrc_id);
1455 RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
1458 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1465 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1468 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1471 fid_to_ostid(&fid, &oi);
1472 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1473 RETURN(ll_lov_recreate(inode, &oi, ost_idx));
1476 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1477 __u64 flags, struct lov_user_md *lum,
1480 struct lov_stripe_md *lsm = NULL;
1481 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1485 lsm = ccc_inode_lsm_get(inode);
1487 ccc_inode_lsm_put(inode, lsm);
1488 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1489 PFID(ll_inode2fid(inode)));
1493 ll_inode_size_lock(inode);
1494 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1497 rc = oit.d.lustre.it_status;
1499 GOTO(out_req_free, rc);
1501 ll_release_openhandle(file->f_dentry, &oit);
1504 ll_inode_size_unlock(inode);
1505 ll_intent_release(&oit);
1506 ccc_inode_lsm_put(inode, lsm);
1509 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1513 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1514 struct lov_mds_md **lmmp, int *lmm_size,
1515 struct ptlrpc_request **request)
1517 struct ll_sb_info *sbi = ll_i2sbi(inode);
1518 struct mdt_body *body;
1519 struct lov_mds_md *lmm = NULL;
1520 struct ptlrpc_request *req = NULL;
1521 struct md_op_data *op_data;
1524 rc = ll_get_max_mdsize(sbi, &lmmsize);
1528 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1529 strlen(filename), lmmsize,
1530 LUSTRE_OPC_ANY, NULL);
1531 if (IS_ERR(op_data))
1532 RETURN(PTR_ERR(op_data));
1534 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1535 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1536 ll_finish_md_op_data(op_data);
1538 CDEBUG(D_INFO, "md_getattr_name failed "
1539 "on %s: rc %d\n", filename, rc);
1543 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1544 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1546 lmmsize = body->eadatasize;
1548 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1550 GOTO(out, rc = -ENODATA);
1553 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1554 LASSERT(lmm != NULL);
1556 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1557 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1558 GOTO(out, rc = -EPROTO);
1562 * This is coming from the MDS, so is probably in
1563 * little endian. We convert it to host endian before
1564 * passing it to userspace.
1566 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1569 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1570 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1573 /* if function called for directory - we should
1574 * avoid swab not existent lsm objects */
1575 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1576 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1577 if (S_ISREG(body->mode))
1578 lustre_swab_lov_user_md_objects(
1579 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1581 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1582 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1583 if (S_ISREG(body->mode))
1584 lustre_swab_lov_user_md_objects(
1585 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1592 *lmm_size = lmmsize;
1597 static int ll_lov_setea(struct inode *inode, struct file *file,
1600 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1601 struct lov_user_md *lump;
1602 int lum_size = sizeof(struct lov_user_md) +
1603 sizeof(struct lov_user_ost_data);
1607 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1610 OBD_ALLOC_LARGE(lump, lum_size);
1614 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1615 OBD_FREE_LARGE(lump, lum_size);
1619 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1621 OBD_FREE_LARGE(lump, lum_size);
1625 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1628 struct lov_user_md_v3 lumv3;
1629 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1630 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1631 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1633 __u64 flags = FMODE_WRITE;
1636 /* first try with v1 which is smaller than v3 */
1637 lum_size = sizeof(struct lov_user_md_v1);
1638 if (copy_from_user(lumv1, lumv1p, lum_size))
1641 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1642 lum_size = sizeof(struct lov_user_md_v3);
1643 if (copy_from_user(&lumv3, lumv3p, lum_size))
1647 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1649 struct lov_stripe_md *lsm;
1652 put_user(0, &lumv1p->lmm_stripe_count);
1654 ll_layout_refresh(inode, &gen);
1655 lsm = ccc_inode_lsm_get(inode);
1656 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1657 0, lsm, (void *)arg);
1658 ccc_inode_lsm_put(inode, lsm);
1663 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1665 struct lov_stripe_md *lsm;
1669 lsm = ccc_inode_lsm_get(inode);
1671 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1673 ccc_inode_lsm_put(inode, lsm);
1677 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1679 struct ll_inode_info *lli = ll_i2info(inode);
1680 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1681 struct ccc_grouplock grouplock;
1685 if (ll_file_nolock(file))
1686 RETURN(-EOPNOTSUPP);
1688 spin_lock(&lli->lli_lock);
1689 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1690 CWARN("group lock already existed with gid %lu\n",
1691 fd->fd_grouplock.cg_gid);
1692 spin_unlock(&lli->lli_lock);
1695 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1696 spin_unlock(&lli->lli_lock);
1698 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1699 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1703 spin_lock(&lli->lli_lock);
1704 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1705 spin_unlock(&lli->lli_lock);
1706 CERROR("another thread just won the race\n");
1707 cl_put_grouplock(&grouplock);
1711 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1712 fd->fd_grouplock = grouplock;
1713 spin_unlock(&lli->lli_lock);
1715 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1719 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1721 struct ll_inode_info *lli = ll_i2info(inode);
1722 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1723 struct ccc_grouplock grouplock;
1726 spin_lock(&lli->lli_lock);
1727 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1728 spin_unlock(&lli->lli_lock);
1729 CWARN("no group lock held\n");
1732 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1734 if (fd->fd_grouplock.cg_gid != arg) {
1735 CWARN("group lock %lu doesn't match current id %lu\n",
1736 arg, fd->fd_grouplock.cg_gid);
1737 spin_unlock(&lli->lli_lock);
1741 grouplock = fd->fd_grouplock;
1742 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1743 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1744 spin_unlock(&lli->lli_lock);
1746 cl_put_grouplock(&grouplock);
1747 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1752 * Close inode open handle
1754 * \param dentry [in] dentry which contains the inode
1755 * \param it [in,out] intent which contains open info and result
1758 * \retval <0 failure
1760 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1762 struct inode *inode = dentry->d_inode;
1763 struct obd_client_handle *och;
1769 /* Root ? Do nothing. */
1770 if (dentry->d_inode->i_sb->s_root == dentry)
1773 /* No open handle to close? Move away */
1774 if (!it_disposition(it, DISP_OPEN_OPEN))
1777 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1779 OBD_ALLOC(och, sizeof(*och));
1781 GOTO(out, rc = -ENOMEM);
1783 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1785 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1788 /* this one is in place of ll_file_open */
1789 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1790 ptlrpc_req_finished(it->d.lustre.it_data);
1791 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1797 * Get size for inode for which FIEMAP mapping is requested.
1798 * Make the FIEMAP get_info call and returns the result.
1800 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1803 struct obd_export *exp = ll_i2dtexp(inode);
1804 struct lov_stripe_md *lsm = NULL;
1805 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1806 int vallen = num_bytes;
1810 /* Checks for fiemap flags */
1811 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1812 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1816 /* Check for FIEMAP_FLAG_SYNC */
1817 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1818 rc = filemap_fdatawrite(inode->i_mapping);
1823 lsm = ccc_inode_lsm_get(inode);
1827 /* If the stripe_count > 1 and the application does not understand
1828 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1830 if (lsm->lsm_stripe_count > 1 &&
1831 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1832 GOTO(out, rc = -EOPNOTSUPP);
1834 fm_key.oa.o_oi = lsm->lsm_oi;
1835 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1837 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1838 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1839 /* If filesize is 0, then there would be no objects for mapping */
1840 if (fm_key.oa.o_size == 0) {
1841 fiemap->fm_mapped_extents = 0;
1845 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1847 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1850 CERROR("obd_get_info failed: rc = %d\n", rc);
1853 ccc_inode_lsm_put(inode, lsm);
1857 int ll_fid2path(struct inode *inode, void *arg)
1859 struct obd_export *exp = ll_i2mdexp(inode);
1860 struct getinfo_fid2path *gfout, *gfin;
1864 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1865 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1868 /* Need to get the buflen */
1869 OBD_ALLOC_PTR(gfin);
1872 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1877 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1878 OBD_ALLOC(gfout, outsize);
1879 if (gfout == NULL) {
1883 memcpy(gfout, gfin, sizeof(*gfout));
1886 /* Call mdc_iocontrol */
1887 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1891 if (copy_to_user(arg, gfout, outsize))
1895 OBD_FREE(gfout, outsize);
1899 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1901 struct ll_user_fiemap *fiemap_s;
1902 size_t num_bytes, ret_bytes;
1903 unsigned int extent_count;
1906 /* Get the extent count so we can calculate the size of
1907 * required fiemap buffer */
1908 if (get_user(extent_count,
1909 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1911 num_bytes = sizeof(*fiemap_s) + (extent_count *
1912 sizeof(struct ll_fiemap_extent));
1914 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1915 if (fiemap_s == NULL)
1918 /* get the fiemap value */
1919 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1921 GOTO(error, rc = -EFAULT);
1923 /* If fm_extent_count is non-zero, read the first extent since
1924 * it is used to calculate end_offset and device from previous
1927 if (copy_from_user(&fiemap_s->fm_extents[0],
1928 (char __user *)arg + sizeof(*fiemap_s),
1929 sizeof(struct ll_fiemap_extent)))
1930 GOTO(error, rc = -EFAULT);
1933 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1937 ret_bytes = sizeof(struct ll_user_fiemap);
1939 if (extent_count != 0)
1940 ret_bytes += (fiemap_s->fm_mapped_extents *
1941 sizeof(struct ll_fiemap_extent));
1943 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1947 OBD_FREE_LARGE(fiemap_s, num_bytes);
1952 * Read the data_version for inode.
1954 * This value is computed using stripe object version on OST.
1955 * Version is computed using server side locking.
1957 * @param sync if do sync on the OST side;
1959 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1960 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1962 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1964 struct lov_stripe_md *lsm = NULL;
1965 struct ll_sb_info *sbi = ll_i2sbi(inode);
1966 struct obdo *obdo = NULL;
1970 /* If no stripe, we consider version is 0. */
1971 lsm = ccc_inode_lsm_get(inode);
1972 if (!lsm_has_objects(lsm)) {
1974 CDEBUG(D_INODE, "No object for inode\n");
1978 OBD_ALLOC_PTR(obdo);
1980 GOTO(out, rc = -ENOMEM);
1982 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, flags);
1984 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1987 *data_version = obdo->o_data_version;
1993 ccc_inode_lsm_put(inode, lsm);
1998 * Trigger a HSM release request for the provided inode.
2000 int ll_hsm_release(struct inode *inode)
2002 struct cl_env_nest nest;
2004 struct obd_client_handle *och = NULL;
2005 __u64 data_version = 0;
2009 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
2010 ll_get_fsname(inode->i_sb, NULL, 0),
2011 PFID(&ll_i2info(inode)->lli_fid));
2013 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
2015 GOTO(out, rc = PTR_ERR(och));
2017 /* Grab latest data_version and [am]time values */
2018 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
2022 env = cl_env_nested_get(&nest);
2024 GOTO(out, rc = PTR_ERR(env));
2026 ll_merge_lvb(env, inode);
2027 cl_env_nested_put(&nest, env);
2029 /* Release the file.
2030 * NB: lease lock handle is released in mdc_hsm_release_pack() because
2031 * we still need it to pack l_remote_handle to MDT. */
2032 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
2038 if (och != NULL && !IS_ERR(och)) /* close the file */
2039 ll_lease_close(och, inode, NULL);
2044 struct ll_swap_stack {
2045 struct iattr ia1, ia2;
2047 struct inode *inode1, *inode2;
2048 bool check_dv1, check_dv2;
2051 static int ll_swap_layouts(struct file *file1, struct file *file2,
2052 struct lustre_swap_layouts *lsl)
2054 struct mdc_swap_layouts msl;
2055 struct md_op_data *op_data;
2058 struct ll_swap_stack *llss = NULL;
2061 OBD_ALLOC_PTR(llss);
2065 llss->inode1 = file1->f_dentry->d_inode;
2066 llss->inode2 = file2->f_dentry->d_inode;
2068 if (!S_ISREG(llss->inode2->i_mode))
2069 GOTO(free, rc = -EINVAL);
2071 if (inode_permission(llss->inode1, MAY_WRITE) ||
2072 inode_permission(llss->inode2, MAY_WRITE))
2073 GOTO(free, rc = -EPERM);
2075 if (llss->inode2->i_sb != llss->inode1->i_sb)
2076 GOTO(free, rc = -EXDEV);
2078 /* we use 2 bool because it is easier to swap than 2 bits */
2079 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2080 llss->check_dv1 = true;
2082 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2083 llss->check_dv2 = true;
2085 /* we cannot use lsl->sl_dvX directly because we may swap them */
2086 llss->dv1 = lsl->sl_dv1;
2087 llss->dv2 = lsl->sl_dv2;
2089 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2090 if (rc == 0) /* same file, done! */
2093 if (rc < 0) { /* sequentialize it */
2094 swap(llss->inode1, llss->inode2);
2096 swap(llss->dv1, llss->dv2);
2097 swap(llss->check_dv1, llss->check_dv2);
2101 if (gid != 0) { /* application asks to flush dirty cache */
2102 rc = ll_get_grouplock(llss->inode1, file1, gid);
2106 rc = ll_get_grouplock(llss->inode2, file2, gid);
2108 ll_put_grouplock(llss->inode1, file1, gid);
2113 /* to be able to restore mtime and atime after swap
2114 * we need to first save them */
2116 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2117 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2118 llss->ia1.ia_atime = llss->inode1->i_atime;
2119 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2120 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2121 llss->ia2.ia_atime = llss->inode2->i_atime;
2122 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2125 /* ultimate check, before swaping the layouts we check if
2126 * dataversion has changed (if requested) */
2127 if (llss->check_dv1) {
2128 rc = ll_data_version(llss->inode1, &dv, 0);
2131 if (dv != llss->dv1)
2132 GOTO(putgl, rc = -EAGAIN);
2135 if (llss->check_dv2) {
2136 rc = ll_data_version(llss->inode2, &dv, 0);
2139 if (dv != llss->dv2)
2140 GOTO(putgl, rc = -EAGAIN);
2143 /* struct md_op_data is used to send the swap args to the mdt
2144 * only flags is missing, so we use struct mdc_swap_layouts
2145 * through the md_op_data->op_data */
2146 /* flags from user space have to be converted before they are send to
2147 * server, no flag is sent today, they are only used on the client */
2150 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2151 0, LUSTRE_OPC_ANY, &msl);
2152 if (IS_ERR(op_data))
2153 GOTO(free, rc = PTR_ERR(op_data));
2155 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2156 sizeof(*op_data), op_data, NULL);
2157 ll_finish_md_op_data(op_data);
2161 ll_put_grouplock(llss->inode2, file2, gid);
2162 ll_put_grouplock(llss->inode1, file1, gid);
2165 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2169 /* clear useless flags */
2170 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2171 llss->ia1.ia_valid &= ~ATTR_MTIME;
2172 llss->ia2.ia_valid &= ~ATTR_MTIME;
2175 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2176 llss->ia1.ia_valid &= ~ATTR_ATIME;
2177 llss->ia2.ia_valid &= ~ATTR_ATIME;
2180 /* update time if requested */
2182 if (llss->ia2.ia_valid != 0) {
2183 mutex_lock(&llss->inode1->i_mutex);
2184 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2185 mutex_unlock(&llss->inode1->i_mutex);
2188 if (llss->ia1.ia_valid != 0) {
2191 mutex_lock(&llss->inode2->i_mutex);
2192 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2193 mutex_unlock(&llss->inode2->i_mutex);
2205 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2207 struct md_op_data *op_data;
2210 /* Non-root users are forbidden to set or clear flags which are
2211 * NOT defined in HSM_USER_MASK. */
2212 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2213 !cfs_capable(CFS_CAP_SYS_ADMIN))
2216 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2217 LUSTRE_OPC_ANY, hss);
2218 if (IS_ERR(op_data))
2219 RETURN(PTR_ERR(op_data));
2221 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2222 sizeof(*op_data), op_data, NULL);
2224 ll_finish_md_op_data(op_data);
2229 static int ll_hsm_import(struct inode *inode, struct file *file,
2230 struct hsm_user_import *hui)
2232 struct hsm_state_set *hss = NULL;
2233 struct iattr *attr = NULL;
2237 if (!S_ISREG(inode->i_mode))
2243 GOTO(out, rc = -ENOMEM);
2245 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2246 hss->hss_archive_id = hui->hui_archive_id;
2247 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2248 rc = ll_hsm_state_set(inode, hss);
2252 OBD_ALLOC_PTR(attr);
2254 GOTO(out, rc = -ENOMEM);
2256 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2257 attr->ia_mode |= S_IFREG;
2258 attr->ia_uid = hui->hui_uid;
2259 attr->ia_gid = hui->hui_gid;
2260 attr->ia_size = hui->hui_size;
2261 attr->ia_mtime.tv_sec = hui->hui_mtime;
2262 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2263 attr->ia_atime.tv_sec = hui->hui_atime;
2264 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2266 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2267 ATTR_UID | ATTR_GID |
2268 ATTR_MTIME | ATTR_MTIME_SET |
2269 ATTR_ATIME | ATTR_ATIME_SET;
2271 rc = ll_setattr_raw(file->f_dentry, attr, true);
2285 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2287 struct inode *inode = file->f_dentry->d_inode;
2288 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2292 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2293 PFID(ll_inode2fid(inode)), inode, cmd);
2294 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2296 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2297 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2301 case LL_IOC_GETFLAGS:
2302 /* Get the current value of the file flags */
2303 return put_user(fd->fd_flags, (int *)arg);
2304 case LL_IOC_SETFLAGS:
2305 case LL_IOC_CLRFLAGS:
2306 /* Set or clear specific file flags */
2307 /* XXX This probably needs checks to ensure the flags are
2308 * not abused, and to handle any flag side effects.
2310 if (get_user(flags, (int *) arg))
2313 if (cmd == LL_IOC_SETFLAGS) {
2314 if ((flags & LL_FILE_IGNORE_LOCK) &&
2315 !(file->f_flags & O_DIRECT)) {
2316 CERROR("%s: unable to disable locking on "
2317 "non-O_DIRECT file\n", current->comm);
2321 fd->fd_flags |= flags;
2323 fd->fd_flags &= ~flags;
2326 case LL_IOC_LOV_SETSTRIPE:
2327 RETURN(ll_lov_setstripe(inode, file, arg));
2328 case LL_IOC_LOV_SETEA:
2329 RETURN(ll_lov_setea(inode, file, arg));
2330 case LL_IOC_LOV_SWAP_LAYOUTS: {
2332 struct lustre_swap_layouts lsl;
2334 if (copy_from_user(&lsl, (char *)arg,
2335 sizeof(struct lustre_swap_layouts)))
2338 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2341 file2 = fget(lsl.sl_fd);
2346 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2347 rc = ll_swap_layouts(file, file2, &lsl);
2351 case LL_IOC_LOV_GETSTRIPE:
2352 RETURN(ll_lov_getstripe(inode, arg));
2353 case LL_IOC_RECREATE_OBJ:
2354 RETURN(ll_lov_recreate_obj(inode, arg));
2355 case LL_IOC_RECREATE_FID:
2356 RETURN(ll_lov_recreate_fid(inode, arg));
2357 case FSFILT_IOC_FIEMAP:
2358 RETURN(ll_ioctl_fiemap(inode, arg));
2359 case FSFILT_IOC_GETFLAGS:
2360 case FSFILT_IOC_SETFLAGS:
2361 RETURN(ll_iocontrol(inode, file, cmd, arg));
2362 case FSFILT_IOC_GETVERSION_OLD:
2363 case FSFILT_IOC_GETVERSION:
2364 RETURN(put_user(inode->i_generation, (int *)arg));
2365 case LL_IOC_GROUP_LOCK:
2366 RETURN(ll_get_grouplock(inode, file, arg));
2367 case LL_IOC_GROUP_UNLOCK:
2368 RETURN(ll_put_grouplock(inode, file, arg));
2369 case IOC_OBD_STATFS:
2370 RETURN(ll_obd_statfs(inode, (void *)arg));
2372 /* We need to special case any other ioctls we want to handle,
2373 * to send them to the MDS/OST as appropriate and to properly
2374 * network encode the arg field.
2375 case FSFILT_IOC_SETVERSION_OLD:
2376 case FSFILT_IOC_SETVERSION:
2378 case LL_IOC_FLUSHCTX:
2379 RETURN(ll_flush_ctx(inode));
2380 case LL_IOC_PATH2FID: {
2381 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2382 sizeof(struct lu_fid)))
2387 case OBD_IOC_FID2PATH:
2388 RETURN(ll_fid2path(inode, (void *)arg));
2389 case LL_IOC_DATA_VERSION: {
2390 struct ioc_data_version idv;
2393 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2396 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2397 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2399 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2405 case LL_IOC_GET_MDTIDX: {
2408 mdtidx = ll_get_mdt_idx(inode);
2412 if (put_user((int)mdtidx, (int*)arg))
2417 case OBD_IOC_GETDTNAME:
2418 case OBD_IOC_GETMDNAME:
2419 RETURN(ll_get_obd_name(inode, cmd, arg));
2420 case LL_IOC_HSM_STATE_GET: {
2421 struct md_op_data *op_data;
2422 struct hsm_user_state *hus;
2429 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2430 LUSTRE_OPC_ANY, hus);
2431 if (IS_ERR(op_data)) {
2433 RETURN(PTR_ERR(op_data));
2436 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2439 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2442 ll_finish_md_op_data(op_data);
2446 case LL_IOC_HSM_STATE_SET: {
2447 struct hsm_state_set *hss;
2454 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2459 rc = ll_hsm_state_set(inode, hss);
2464 case LL_IOC_HSM_ACTION: {
2465 struct md_op_data *op_data;
2466 struct hsm_current_action *hca;
2473 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2474 LUSTRE_OPC_ANY, hca);
2475 if (IS_ERR(op_data)) {
2477 RETURN(PTR_ERR(op_data));
2480 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2483 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2486 ll_finish_md_op_data(op_data);
2490 case LL_IOC_SET_LEASE: {
2491 struct ll_inode_info *lli = ll_i2info(inode);
2492 struct obd_client_handle *och = NULL;
2498 if (!(file->f_mode & FMODE_WRITE))
2503 if (!(file->f_mode & FMODE_READ))
2508 mutex_lock(&lli->lli_och_mutex);
2509 if (fd->fd_lease_och != NULL) {
2510 och = fd->fd_lease_och;
2511 fd->fd_lease_och = NULL;
2513 mutex_unlock(&lli->lli_och_mutex);
2516 mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
2517 rc = ll_lease_close(och, inode, &lease_broken);
2518 if (rc == 0 && lease_broken)
2524 /* return the type of lease or error */
2525 RETURN(rc < 0 ? rc : (int)mode);
2530 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2532 /* apply for lease */
2533 och = ll_lease_open(inode, file, mode, 0);
2535 RETURN(PTR_ERR(och));
2538 mutex_lock(&lli->lli_och_mutex);
2539 if (fd->fd_lease_och == NULL) {
2540 fd->fd_lease_och = och;
2543 mutex_unlock(&lli->lli_och_mutex);
2545 /* impossible now that only excl is supported for now */
2546 ll_lease_close(och, inode, &lease_broken);
2551 case LL_IOC_GET_LEASE: {
2552 struct ll_inode_info *lli = ll_i2info(inode);
2553 struct ldlm_lock *lock = NULL;
2556 mutex_lock(&lli->lli_och_mutex);
2557 if (fd->fd_lease_och != NULL) {
2558 struct obd_client_handle *och = fd->fd_lease_och;
2560 lock = ldlm_handle2lock(&och->och_lease_handle);
2562 lock_res_and_lock(lock);
2563 if (!ldlm_is_cancel(lock))
2564 rc = och->och_flags &
2565 (FMODE_READ | FMODE_WRITE);
2566 unlock_res_and_lock(lock);
2567 LDLM_LOCK_PUT(lock);
2570 mutex_unlock(&lli->lli_och_mutex);
2573 case LL_IOC_HSM_IMPORT: {
2574 struct hsm_user_import *hui;
2580 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2585 rc = ll_hsm_import(inode, file, hui);
2594 ll_iocontrol_call(inode, file, cmd, arg, &err))
2597 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2603 #ifndef HAVE_FILE_LLSEEK_SIZE
2604 static inline loff_t
2605 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2607 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2609 if (offset > maxsize)
2612 if (offset != file->f_pos) {
2613 file->f_pos = offset;
2614 file->f_version = 0;
2620 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2621 loff_t maxsize, loff_t eof)
2623 struct inode *inode = file->f_dentry->d_inode;
2631 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2632 * position-querying operation. Avoid rewriting the "same"
2633 * f_pos value back to the file because a concurrent read(),
2634 * write() or lseek() might have altered it
2639 * f_lock protects against read/modify/write race with other
2640 * SEEK_CURs. Note that parallel writes and reads behave
2643 mutex_lock(&inode->i_mutex);
2644 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2645 mutex_unlock(&inode->i_mutex);
2649 * In the generic case the entire file is data, so as long as
2650 * offset isn't at the end of the file then the offset is data.
2657 * There is a virtual hole at the end of the file, so as long as
2658 * offset isn't i_size or larger, return i_size.
2666 return llseek_execute(file, offset, maxsize);
2670 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2672 struct inode *inode = file->f_dentry->d_inode;
2673 loff_t retval, eof = 0;
2676 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2677 (origin == SEEK_CUR) ? file->f_pos : 0);
2678 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2679 PFID(ll_inode2fid(inode)), inode, retval, retval,
2681 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2683 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2684 retval = ll_glimpse_size(inode);
2687 eof = i_size_read(inode);
2690 retval = ll_generic_file_llseek_size(file, offset, origin,
2691 ll_file_maxbytes(inode), eof);
2695 int ll_flush(struct file *file, fl_owner_t id)
2697 struct inode *inode = file->f_dentry->d_inode;
2698 struct ll_inode_info *lli = ll_i2info(inode);
2699 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2702 LASSERT(!S_ISDIR(inode->i_mode));
2704 /* catch async errors that were recorded back when async writeback
2705 * failed for pages in this mapping. */
2706 rc = lli->lli_async_rc;
2707 lli->lli_async_rc = 0;
2708 err = lov_read_and_clear_async_rc(lli->lli_clob);
2712 /* The application has been told write failure already.
2713 * Do not report failure again. */
2714 if (fd->fd_write_failed)
2716 return rc ? -EIO : 0;
2720 * Called to make sure a portion of file has been written out.
2721 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2723 * Return how many pages have been written.
2725 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2726 enum cl_fsync_mode mode, int ignore_layout)
2728 struct cl_env_nest nest;
2731 struct obd_capa *capa = NULL;
2732 struct cl_fsync_io *fio;
2736 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2737 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2740 env = cl_env_nested_get(&nest);
2742 RETURN(PTR_ERR(env));
2744 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2746 io = ccc_env_thread_io(env);
2747 io->ci_obj = cl_i2info(inode)->lli_clob;
2748 io->ci_ignore_layout = ignore_layout;
2750 /* initialize parameters for sync */
2751 fio = &io->u.ci_fsync;
2752 fio->fi_capa = capa;
2753 fio->fi_start = start;
2755 fio->fi_fid = ll_inode2fid(inode);
2756 fio->fi_mode = mode;
2757 fio->fi_nr_written = 0;
2759 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2760 result = cl_io_loop(env, io);
2762 result = io->ci_result;
2764 result = fio->fi_nr_written;
2765 cl_io_fini(env, io);
2766 cl_env_nested_put(&nest, env);
2774 * When dentry is provided (the 'else' case), *file->f_dentry may be
2775 * null and dentry must be used directly rather than pulled from
2776 * *file->f_dentry as is done otherwise.
2779 #ifdef HAVE_FILE_FSYNC_4ARGS
2780 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2782 struct dentry *dentry = file->f_dentry;
2783 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2784 int ll_fsync(struct file *file, int datasync)
2786 struct dentry *dentry = file->f_dentry;
2788 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2791 struct inode *inode = dentry->d_inode;
2792 struct ll_inode_info *lli = ll_i2info(inode);
2793 struct ptlrpc_request *req;
2794 struct obd_capa *oc;
2798 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2799 PFID(ll_inode2fid(inode)), inode);
2800 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2802 #ifdef HAVE_FILE_FSYNC_4ARGS
2803 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2804 mutex_lock(&inode->i_mutex);
2806 /* fsync's caller has already called _fdata{sync,write}, we want
2807 * that IO to finish before calling the osc and mdc sync methods */
2808 rc = filemap_fdatawait(inode->i_mapping);
2811 /* catch async errors that were recorded back when async writeback
2812 * failed for pages in this mapping. */
2813 if (!S_ISDIR(inode->i_mode)) {
2814 err = lli->lli_async_rc;
2815 lli->lli_async_rc = 0;
2818 err = lov_read_and_clear_async_rc(lli->lli_clob);
2823 oc = ll_mdscapa_get(inode);
2824 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2830 ptlrpc_req_finished(req);
2832 if (datasync && S_ISREG(inode->i_mode)) {
2833 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2835 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2837 if (rc == 0 && err < 0)
2840 fd->fd_write_failed = true;
2842 fd->fd_write_failed = false;
2845 #ifdef HAVE_FILE_FSYNC_4ARGS
2846 mutex_unlock(&inode->i_mutex);
2851 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2853 struct inode *inode = file->f_dentry->d_inode;
2854 struct ll_sb_info *sbi = ll_i2sbi(inode);
2855 struct ldlm_enqueue_info einfo = {
2856 .ei_type = LDLM_FLOCK,
2857 .ei_cb_cp = ldlm_flock_completion_ast,
2858 .ei_cbdata = file_lock,
2860 struct md_op_data *op_data;
2861 struct lustre_handle lockh = {0};
2862 ldlm_policy_data_t flock = {{0}};
2868 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2869 PFID(ll_inode2fid(inode)), file_lock);
2871 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2873 if (file_lock->fl_flags & FL_FLOCK) {
2874 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2875 /* flocks are whole-file locks */
2876 flock.l_flock.end = OFFSET_MAX;
2877 /* For flocks owner is determined by the local file desctiptor*/
2878 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2879 } else if (file_lock->fl_flags & FL_POSIX) {
2880 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2881 flock.l_flock.start = file_lock->fl_start;
2882 flock.l_flock.end = file_lock->fl_end;
2886 flock.l_flock.pid = file_lock->fl_pid;
2888 /* Somewhat ugly workaround for svc lockd.
2889 * lockd installs custom fl_lmops->lm_compare_owner that checks
2890 * for the fl_owner to be the same (which it always is on local node
2891 * I guess between lockd processes) and then compares pid.
2892 * As such we assign pid to the owner field to make it all work,
2893 * conflict with normal locks is unlikely since pid space and
2894 * pointer space for current->files are not intersecting */
2895 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2896 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2898 switch (file_lock->fl_type) {
2900 einfo.ei_mode = LCK_PR;
2903 /* An unlock request may or may not have any relation to
2904 * existing locks so we may not be able to pass a lock handle
2905 * via a normal ldlm_lock_cancel() request. The request may even
2906 * unlock a byte range in the middle of an existing lock. In
2907 * order to process an unlock request we need all of the same
2908 * information that is given with a normal read or write record
2909 * lock request. To avoid creating another ldlm unlock (cancel)
2910 * message we'll treat a LCK_NL flock request as an unlock. */
2911 einfo.ei_mode = LCK_NL;
2914 einfo.ei_mode = LCK_PW;
2917 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2918 file_lock->fl_type);
2933 flags = LDLM_FL_BLOCK_NOWAIT;
2939 flags = LDLM_FL_TEST_LOCK;
2940 /* Save the old mode so that if the mode in the lock changes we
2941 * can decrement the appropriate reader or writer refcount. */
2942 file_lock->fl_type = einfo.ei_mode;
2945 CERROR("unknown fcntl lock command: %d\n", cmd);
2949 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2950 LUSTRE_OPC_ANY, NULL);
2951 if (IS_ERR(op_data))
2952 RETURN(PTR_ERR(op_data));
2954 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2955 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2956 flock.l_flock.pid, flags, einfo.ei_mode,
2957 flock.l_flock.start, flock.l_flock.end);
2959 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2960 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2962 if ((file_lock->fl_flags & FL_FLOCK) &&
2963 (rc == 0 || file_lock->fl_type == F_UNLCK))
2964 rc2 = flock_lock_file_wait(file, file_lock);
2965 if ((file_lock->fl_flags & FL_POSIX) &&
2966 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2967 !(flags & LDLM_FL_TEST_LOCK))
2968 rc2 = posix_lock_file_wait(file, file_lock);
2970 if (rc2 && file_lock->fl_type != F_UNLCK) {
2971 einfo.ei_mode = LCK_NL;
2972 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2973 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2977 ll_finish_md_op_data(op_data);
2982 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2990 * test if some locks matching bits and l_req_mode are acquired
2991 * - bits can be in different locks
2992 * - if found clear the common lock bits in *bits
2993 * - the bits not found, are kept in *bits
2995 * \param bits [IN] searched lock bits [IN]
2996 * \param l_req_mode [IN] searched lock mode
2997 * \retval boolean, true iff all bits are found
2999 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3001 struct lustre_handle lockh;
3002 ldlm_policy_data_t policy;
3003 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3004 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3013 fid = &ll_i2info(inode)->lli_fid;
3014 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3015 ldlm_lockname[mode]);
3017 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3018 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3019 policy.l_inodebits.bits = *bits & (1 << i);
3020 if (policy.l_inodebits.bits == 0)
3023 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3024 &policy, mode, &lockh)) {
3025 struct ldlm_lock *lock;
3027 lock = ldlm_handle2lock(&lockh);
3030 ~(lock->l_policy_data.l_inodebits.bits);
3031 LDLM_LOCK_PUT(lock);
3033 *bits &= ~policy.l_inodebits.bits;
3040 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3041 struct lustre_handle *lockh, __u64 flags,
3044 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3049 fid = &ll_i2info(inode)->lli_fid;
3050 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3052 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3053 fid, LDLM_IBITS, &policy, mode, lockh);
3058 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3060 /* Already unlinked. Just update nlink and return success */
3061 if (rc == -ENOENT) {
3063 /* This path cannot be hit for regular files unless in
3064 * case of obscure races, so no need to to validate
3066 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3068 } else if (rc != 0) {
3069 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
3070 ll_get_fsname(inode->i_sb, NULL, 0),
3071 PFID(ll_inode2fid(inode)), rc);
3077 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3080 struct inode *inode = dentry->d_inode;
3081 struct ptlrpc_request *req = NULL;
3082 struct obd_export *exp;
3086 LASSERT(inode != NULL);
3088 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3089 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3091 exp = ll_i2mdexp(inode);
3093 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3094 * But under CMD case, it caused some lock issues, should be fixed
3095 * with new CMD ibits lock. See bug 12718 */
3096 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3097 struct lookup_intent oit = { .it_op = IT_GETATTR };
3098 struct md_op_data *op_data;
3100 if (ibits == MDS_INODELOCK_LOOKUP)
3101 oit.it_op = IT_LOOKUP;
3103 /* Call getattr by fid, so do not provide name at all. */
3104 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3105 dentry->d_inode, NULL, 0, 0,
3106 LUSTRE_OPC_ANY, NULL);
3107 if (IS_ERR(op_data))
3108 RETURN(PTR_ERR(op_data));
3110 oit.it_create_mode |= M_CHECK_STALE;
3111 rc = md_intent_lock(exp, op_data, NULL, 0,
3112 /* we are not interested in name
3115 ll_md_blocking_ast, 0);
3116 ll_finish_md_op_data(op_data);
3117 oit.it_create_mode &= ~M_CHECK_STALE;
3119 rc = ll_inode_revalidate_fini(inode, rc);
3123 rc = ll_revalidate_it_finish(req, &oit, dentry);
3125 ll_intent_release(&oit);
3129 /* Unlinked? Unhash dentry, so it is not picked up later by
3130 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3131 here to preserve get_cwd functionality on 2.6.
3133 if (!dentry->d_inode->i_nlink)
3134 d_lustre_invalidate(dentry, 0);
3136 ll_lookup_finish_locks(&oit, dentry);
3137 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3138 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3139 obd_valid valid = OBD_MD_FLGETATTR;
3140 struct md_op_data *op_data;
3143 if (S_ISREG(inode->i_mode)) {
3144 rc = ll_get_max_mdsize(sbi, &ealen);
3147 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3150 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3151 0, ealen, LUSTRE_OPC_ANY,
3153 if (IS_ERR(op_data))
3154 RETURN(PTR_ERR(op_data));
3156 op_data->op_valid = valid;
3157 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3158 * capa for this inode. Because we only keep capas of dirs
3160 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3161 ll_finish_md_op_data(op_data);
3163 rc = ll_inode_revalidate_fini(inode, rc);
3167 rc = ll_prep_inode(&inode, req, NULL, NULL);
3170 ptlrpc_req_finished(req);
3174 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3177 struct inode *inode = dentry->d_inode;
3181 rc = __ll_inode_revalidate_it(dentry, it, ibits);
3185 /* if object isn't regular file, don't validate size */
3186 if (!S_ISREG(inode->i_mode)) {
3187 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3188 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3189 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3191 /* In case of restore, the MDT has the right size and has
3192 * already send it back without granting the layout lock,
3193 * inode is up-to-date so glimpse is useless.
3194 * Also to glimpse we need the layout, in case of a running
3195 * restore the MDT holds the layout lock so the glimpse will
3196 * block up to the end of restore (getattr will block)
3198 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3199 rc = ll_glimpse_size(inode);
3204 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
3205 struct lookup_intent *it, struct kstat *stat)
3207 struct inode *inode = de->d_inode;
3208 struct ll_sb_info *sbi = ll_i2sbi(inode);
3209 struct ll_inode_info *lli = ll_i2info(inode);
3212 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3213 MDS_INODELOCK_LOOKUP);
3214 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3219 stat->dev = inode->i_sb->s_dev;
3220 if (ll_need_32bit_api(sbi))
3221 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3223 stat->ino = inode->i_ino;
3224 stat->mode = inode->i_mode;
3225 stat->nlink = inode->i_nlink;
3226 stat->uid = inode->i_uid;
3227 stat->gid = inode->i_gid;
3228 stat->rdev = inode->i_rdev;
3229 stat->atime = inode->i_atime;
3230 stat->mtime = inode->i_mtime;
3231 stat->ctime = inode->i_ctime;
3232 stat->blksize = 1 << inode->i_blkbits;
3234 stat->size = i_size_read(inode);
3235 stat->blocks = inode->i_blocks;
3239 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3241 struct lookup_intent it = { .it_op = IT_GETATTR };
3243 return ll_getattr_it(mnt, de, &it, stat);
3246 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3247 __u64 start, __u64 len)
3251 struct ll_user_fiemap *fiemap;
3252 unsigned int extent_count = fieinfo->fi_extents_max;
3254 num_bytes = sizeof(*fiemap) + (extent_count *
3255 sizeof(struct ll_fiemap_extent));
3256 OBD_ALLOC_LARGE(fiemap, num_bytes);
3261 fiemap->fm_flags = fieinfo->fi_flags;
3262 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3263 fiemap->fm_start = start;
3264 fiemap->fm_length = len;
3265 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3266 sizeof(struct ll_fiemap_extent));
3268 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3270 fieinfo->fi_flags = fiemap->fm_flags;
3271 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3272 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3273 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3275 OBD_FREE_LARGE(fiemap, num_bytes);
3279 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3281 struct ll_inode_info *lli = ll_i2info(inode);
3282 struct posix_acl *acl = NULL;
3285 spin_lock(&lli->lli_lock);
3286 /* VFS' acl_permission_check->check_acl will release the refcount */
3287 acl = posix_acl_dup(lli->lli_posix_acl);
3288 spin_unlock(&lli->lli_lock);
3293 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3295 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3296 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3298 ll_check_acl(struct inode *inode, int mask)
3301 # ifdef CONFIG_FS_POSIX_ACL
3302 struct posix_acl *acl;
3306 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3307 if (flags & IPERM_FLAG_RCU)
3310 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3315 rc = posix_acl_permission(inode, acl, mask);
3316 posix_acl_release(acl);
3319 # else /* !CONFIG_FS_POSIX_ACL */
3321 # endif /* CONFIG_FS_POSIX_ACL */
3323 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3325 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3326 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3328 # ifdef HAVE_INODE_PERMISION_2ARGS
3329 int ll_inode_permission(struct inode *inode, int mask)
3331 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3338 #ifdef MAY_NOT_BLOCK
3339 if (mask & MAY_NOT_BLOCK)
3341 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3342 if (flags & IPERM_FLAG_RCU)
3346 /* as root inode are NOT getting validated in lookup operation,
3347 * need to do it before permission check. */
3349 if (inode == inode->i_sb->s_root->d_inode) {
3350 struct lookup_intent it = { .it_op = IT_LOOKUP };
3352 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3353 MDS_INODELOCK_LOOKUP);
3358 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3359 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3361 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3362 return lustre_check_remote_perm(inode, mask);
3364 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3365 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3370 /* -o localflock - only provides locally consistent flock locks */
3371 struct file_operations ll_file_operations = {
3372 .read = ll_file_read,
3373 .aio_read = ll_file_aio_read,
3374 .write = ll_file_write,
3375 .aio_write = ll_file_aio_write,
3376 .unlocked_ioctl = ll_file_ioctl,
3377 .open = ll_file_open,
3378 .release = ll_file_release,
3379 .mmap = ll_file_mmap,
3380 .llseek = ll_file_seek,
3381 .splice_read = ll_file_splice_read,
3386 struct file_operations ll_file_operations_flock = {
3387 .read = ll_file_read,
3388 .aio_read = ll_file_aio_read,
3389 .write = ll_file_write,
3390 .aio_write = ll_file_aio_write,
3391 .unlocked_ioctl = ll_file_ioctl,
3392 .open = ll_file_open,
3393 .release = ll_file_release,
3394 .mmap = ll_file_mmap,
3395 .llseek = ll_file_seek,
3396 .splice_read = ll_file_splice_read,
3399 .flock = ll_file_flock,
3400 .lock = ll_file_flock
3403 /* These are for -o noflock - to return ENOSYS on flock calls */
3404 struct file_operations ll_file_operations_noflock = {
3405 .read = ll_file_read,
3406 .aio_read = ll_file_aio_read,
3407 .write = ll_file_write,
3408 .aio_write = ll_file_aio_write,
3409 .unlocked_ioctl = ll_file_ioctl,
3410 .open = ll_file_open,
3411 .release = ll_file_release,
3412 .mmap = ll_file_mmap,
3413 .llseek = ll_file_seek,
3414 .splice_read = ll_file_splice_read,
3417 .flock = ll_file_noflock,
3418 .lock = ll_file_noflock
3421 struct inode_operations ll_file_inode_operations = {
3422 .setattr = ll_setattr,
3423 .getattr = ll_getattr,
3424 .permission = ll_inode_permission,
3425 .setxattr = ll_setxattr,
3426 .getxattr = ll_getxattr,
3427 .listxattr = ll_listxattr,
3428 .removexattr = ll_removexattr,
3429 .fiemap = ll_fiemap,
3430 #ifdef HAVE_IOP_GET_ACL
3431 .get_acl = ll_get_acl,
3435 /* dynamic ioctl number support routins */
3436 static struct llioc_ctl_data {
3437 struct rw_semaphore ioc_sem;
3438 cfs_list_t ioc_head;
3440 __RWSEM_INITIALIZER(llioc.ioc_sem),
3441 CFS_LIST_HEAD_INIT(llioc.ioc_head)
3446 cfs_list_t iocd_list;
3447 unsigned int iocd_size;
3448 llioc_callback_t iocd_cb;
3449 unsigned int iocd_count;
3450 unsigned int iocd_cmd[0];
3453 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3456 struct llioc_data *in_data = NULL;
3459 if (cb == NULL || cmd == NULL ||
3460 count > LLIOC_MAX_CMD || count < 0)
3463 size = sizeof(*in_data) + count * sizeof(unsigned int);
3464 OBD_ALLOC(in_data, size);
3465 if (in_data == NULL)
3468 memset(in_data, 0, sizeof(*in_data));
3469 in_data->iocd_size = size;
3470 in_data->iocd_cb = cb;
3471 in_data->iocd_count = count;
3472 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3474 down_write(&llioc.ioc_sem);
3475 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3476 up_write(&llioc.ioc_sem);
3481 void ll_iocontrol_unregister(void *magic)
3483 struct llioc_data *tmp;
3488 down_write(&llioc.ioc_sem);
3489 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3491 unsigned int size = tmp->iocd_size;
3493 cfs_list_del(&tmp->iocd_list);
3494 up_write(&llioc.ioc_sem);
3496 OBD_FREE(tmp, size);
3500 up_write(&llioc.ioc_sem);
3502 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3505 EXPORT_SYMBOL(ll_iocontrol_register);
3506 EXPORT_SYMBOL(ll_iocontrol_unregister);
3508 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3509 unsigned int cmd, unsigned long arg, int *rcp)
3511 enum llioc_iter ret = LLIOC_CONT;
3512 struct llioc_data *data;
3513 int rc = -EINVAL, i;
3515 down_read(&llioc.ioc_sem);
3516 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3517 for (i = 0; i < data->iocd_count; i++) {
3518 if (cmd != data->iocd_cmd[i])
3521 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3525 if (ret == LLIOC_STOP)
3528 up_read(&llioc.ioc_sem);
3535 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3537 struct ll_inode_info *lli = ll_i2info(inode);
3538 struct cl_env_nest nest;
3543 if (lli->lli_clob == NULL)
3546 env = cl_env_nested_get(&nest);
3548 RETURN(PTR_ERR(env));
3550 result = cl_conf_set(env, lli->lli_clob, conf);
3551 cl_env_nested_put(&nest, env);
3553 if (conf->coc_opc == OBJECT_CONF_SET) {
3554 struct ldlm_lock *lock = conf->coc_lock;
3556 LASSERT(lock != NULL);
3557 LASSERT(ldlm_has_layout(lock));
3559 /* it can only be allowed to match after layout is
3560 * applied to inode otherwise false layout would be
3561 * seen. Applying layout shoud happen before dropping
3562 * the intent lock. */
3563 ldlm_lock_allow_match(lock);
3569 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3570 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3573 struct ll_sb_info *sbi = ll_i2sbi(inode);
3574 struct obd_capa *oc;
3575 struct ptlrpc_request *req;
3576 struct mdt_body *body;
3583 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3584 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3585 lock->l_lvb_data, lock->l_lvb_len);
3587 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3590 /* if layout lock was granted right away, the layout is returned
3591 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3592 * blocked and then granted via completion ast, we have to fetch
3593 * layout here. Please note that we can't use the LVB buffer in
3594 * completion AST because it doesn't have a large enough buffer */
3595 oc = ll_mdscapa_get(inode);
3596 rc = ll_get_max_mdsize(sbi, &lmmsize);
3598 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3599 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3605 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3606 if (body == NULL || body->eadatasize > lmmsize)
3607 GOTO(out, rc = -EPROTO);
3609 lmmsize = body->eadatasize;
3610 if (lmmsize == 0) /* empty layout */
3613 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3615 GOTO(out, rc = -EFAULT);
3617 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3618 if (lvbdata == NULL)
3619 GOTO(out, rc = -ENOMEM);
3621 memcpy(lvbdata, lmm, lmmsize);
3622 lock_res_and_lock(lock);
3623 if (lock->l_lvb_data != NULL)
3624 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3626 lock->l_lvb_data = lvbdata;
3627 lock->l_lvb_len = lmmsize;
3628 unlock_res_and_lock(lock);
3633 ptlrpc_req_finished(req);
3638 * Apply the layout to the inode. Layout lock is held and will be released
3641 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3642 struct inode *inode, __u32 *gen, bool reconf)
3644 struct ll_inode_info *lli = ll_i2info(inode);
3645 struct ll_sb_info *sbi = ll_i2sbi(inode);
3646 struct ldlm_lock *lock;
3647 struct lustre_md md = { NULL };
3648 struct cl_object_conf conf;
3651 bool wait_layout = false;
3654 LASSERT(lustre_handle_is_used(lockh));
3656 lock = ldlm_handle2lock(lockh);
3657 LASSERT(lock != NULL);
3658 LASSERT(ldlm_has_layout(lock));
3660 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d\n",
3661 PFID(&lli->lli_fid), inode, reconf);
3663 /* in case this is a caching lock and reinstate with new inode */
3664 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3666 lock_res_and_lock(lock);
3667 lvb_ready = ldlm_is_lvb_ready(lock);
3668 unlock_res_and_lock(lock);
3669 /* checking lvb_ready is racy but this is okay. The worst case is
3670 * that multi processes may configure the file on the same time. */
3672 if (lvb_ready || !reconf) {
3675 /* layout_gen must be valid if layout lock is not
3676 * cancelled and stripe has already set */
3677 *gen = lli->lli_layout_gen;
3683 rc = ll_layout_fetch(inode, lock);
3687 /* for layout lock, lmm is returned in lock's lvb.
3688 * lvb_data is immutable if the lock is held so it's safe to access it
3689 * without res lock. See the description in ldlm_lock_decref_internal()
3690 * for the condition to free lvb_data of layout lock */
3691 if (lock->l_lvb_data != NULL) {
3692 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3693 lock->l_lvb_data, lock->l_lvb_len);
3695 *gen = LL_LAYOUT_GEN_EMPTY;
3697 *gen = md.lsm->lsm_layout_gen;
3700 CERROR("%s: file "DFID" unpackmd error: %d\n",
3701 ll_get_fsname(inode->i_sb, NULL, 0),
3702 PFID(&lli->lli_fid), rc);
3708 /* set layout to file. Unlikely this will fail as old layout was
3709 * surely eliminated */
3710 memset(&conf, 0, sizeof conf);
3711 conf.coc_opc = OBJECT_CONF_SET;
3712 conf.coc_inode = inode;
3713 conf.coc_lock = lock;
3714 conf.u.coc_md = &md;
3715 rc = ll_layout_conf(inode, &conf);
3718 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3720 /* refresh layout failed, need to wait */
3721 wait_layout = rc == -EBUSY;
3725 LDLM_LOCK_PUT(lock);
3726 ldlm_lock_decref(lockh, mode);
3728 /* wait for IO to complete if it's still being used. */
3730 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3731 ll_get_fsname(inode->i_sb, NULL, 0),
3732 PFID(&lli->lli_fid), inode);
3734 memset(&conf, 0, sizeof conf);
3735 conf.coc_opc = OBJECT_CONF_WAIT;
3736 conf.coc_inode = inode;
3737 rc = ll_layout_conf(inode, &conf);
3741 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3742 ll_get_fsname(inode->i_sb, NULL, 0),
3743 PFID(&lli->lli_fid), rc);
3749 * This function checks if there exists a LAYOUT lock on the client side,
3750 * or enqueues it if it doesn't have one in cache.
3752 * This function will not hold layout lock so it may be revoked any time after
3753 * this function returns. Any operations depend on layout should be redone
3756 * This function should be called before lov_io_init() to get an uptodate
3757 * layout version, the caller should save the version number and after IO
3758 * is finished, this function should be called again to verify that layout
3759 * is not changed during IO time.
3761 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3763 struct ll_inode_info *lli = ll_i2info(inode);
3764 struct ll_sb_info *sbi = ll_i2sbi(inode);
3765 struct md_op_data *op_data;
3766 struct lookup_intent it;
3767 struct lustre_handle lockh;
3769 struct ldlm_enqueue_info einfo = {
3770 .ei_type = LDLM_IBITS,
3772 .ei_cb_bl = ll_md_blocking_ast,
3773 .ei_cb_cp = ldlm_completion_ast,
3778 *gen = lli->lli_layout_gen;
3779 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3783 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3784 LASSERT(S_ISREG(inode->i_mode));
3786 /* mostly layout lock is caching on the local side, so try to match
3787 * it before grabbing layout lock mutex. */
3788 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3789 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3790 if (mode != 0) { /* hit cached lock */
3791 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3795 /* better hold lli_layout_mutex to try again otherwise
3796 * it will have starvation problem. */
3799 /* take layout lock mutex to enqueue layout lock exclusively. */
3800 mutex_lock(&lli->lli_layout_mutex);
3803 /* try again. Maybe somebody else has done this. */
3804 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3805 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3806 if (mode != 0) { /* hit cached lock */
3807 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3811 mutex_unlock(&lli->lli_layout_mutex);
3815 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3816 0, 0, LUSTRE_OPC_ANY, NULL);
3817 if (IS_ERR(op_data)) {
3818 mutex_unlock(&lli->lli_layout_mutex);
3819 RETURN(PTR_ERR(op_data));
3822 /* have to enqueue one */
3823 memset(&it, 0, sizeof(it));
3824 it.it_op = IT_LAYOUT;
3825 lockh.cookie = 0ULL;
3827 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)\n",
3828 ll_get_fsname(inode->i_sb, NULL, 0),
3829 PFID(&lli->lli_fid), inode);
3831 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3833 if (it.d.lustre.it_data != NULL)
3834 ptlrpc_req_finished(it.d.lustre.it_data);
3835 it.d.lustre.it_data = NULL;
3837 ll_finish_md_op_data(op_data);
3839 mode = it.d.lustre.it_lock_mode;
3840 it.d.lustre.it_lock_mode = 0;
3841 ll_intent_drop_lock(&it);
3844 /* set lock data in case this is a new lock */
3845 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3846 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3850 mutex_unlock(&lli->lli_layout_mutex);
3856 * This function send a restore request to the MDT
3858 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3860 struct hsm_user_request *hur;
3864 len = sizeof(struct hsm_user_request) +
3865 sizeof(struct hsm_user_item);
3866 OBD_ALLOC(hur, len);
3870 hur->hur_request.hr_action = HUA_RESTORE;
3871 hur->hur_request.hr_archive_id = 0;
3872 hur->hur_request.hr_flags = 0;
3873 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3874 sizeof(hur->hur_user_item[0].hui_fid));
3875 hur->hur_user_item[0].hui_extent.offset = offset;
3876 hur->hur_user_item[0].hui_extent.length = length;
3877 hur->hur_request.hr_itemcount = 1;
3878 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,