4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
61 fd->fd_write_failed = false;
66 static void ll_file_data_put(struct ll_file_data *fd)
69 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
72 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
73 struct lustre_handle *fh)
75 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
76 op_data->op_attr.ia_mode = inode->i_mode;
77 op_data->op_attr.ia_atime = inode->i_atime;
78 op_data->op_attr.ia_mtime = inode->i_mtime;
79 op_data->op_attr.ia_ctime = inode->i_ctime;
80 op_data->op_attr.ia_size = i_size_read(inode);
81 op_data->op_attr_blocks = inode->i_blocks;
82 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
83 ll_inode_to_ext_flags(inode->i_flags);
84 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
86 op_data->op_handle = *fh;
87 op_data->op_capa1 = ll_mdscapa_get(inode);
89 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
90 op_data->op_bias |= MDS_DATA_MODIFIED;
94 * Closes the IO epoch and packs all the attributes into @op_data for
97 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
98 struct obd_client_handle *och)
102 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
103 ATTR_MTIME | ATTR_MTIME_SET |
104 ATTR_CTIME | ATTR_CTIME_SET;
106 if (!(och->och_flags & FMODE_WRITE))
109 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
110 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
112 ll_ioepoch_close(inode, op_data, &och, 0);
115 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
116 ll_prep_md_op_data(op_data, inode, NULL, NULL,
117 0, 0, LUSTRE_OPC_ANY, NULL);
121 static int ll_close_inode_openhandle(struct obd_export *md_exp,
123 struct obd_client_handle *och,
124 const __u64 *data_version)
126 struct obd_export *exp = ll_i2mdexp(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
129 struct obd_device *obd = class_exp2obd(exp);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle "LPX64"\n",
140 ll_i2mdexp(inode)->exp_handle.h_cookie);
144 OBD_ALLOC_PTR(op_data);
146 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
148 ll_prepare_close(inode, op_data, och);
149 if (data_version != NULL) {
150 /* Pass in data_version implies release. */
151 op_data->op_bias |= MDS_HSM_RELEASE;
152 op_data->op_data_version = *data_version;
153 op_data->op_lease_handle = och->och_lease_handle;
154 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
156 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
157 rc = md_close(md_exp, op_data, och->och_mod, &req);
159 /* This close must have the epoch closed. */
160 LASSERT(epoch_close);
161 /* MDS has instructed us to obtain Size-on-MDS attribute from
162 * OSTs and send setattr to back to MDS. */
163 rc = ll_som_update(inode, op_data);
165 CERROR("%s: inode "DFID" mdc Size-on-MDS update"
166 " failed: rc = %d\n",
167 ll_i2mdexp(inode)->exp_obd->obd_name,
168 PFID(ll_inode2fid(inode)), rc);
172 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
173 ll_i2mdexp(inode)->exp_obd->obd_name,
174 PFID(ll_inode2fid(inode)), rc);
177 /* DATA_MODIFIED flag was successfully sent on close, cancel data
178 * modification flag. */
179 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
180 struct ll_inode_info *lli = ll_i2info(inode);
182 spin_lock(&lli->lli_lock);
183 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
184 spin_unlock(&lli->lli_lock);
188 rc = ll_objects_destroy(req, inode);
190 CERROR("%s: inode "DFID
191 " ll_objects destroy: rc = %d\n",
192 ll_i2mdexp(inode)->exp_obd->obd_name,
193 PFID(ll_inode2fid(inode)), rc);
196 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
197 struct mdt_body *body;
198 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
199 if (!(body->valid & OBD_MD_FLRELEASED))
203 ll_finish_md_op_data(op_data);
207 if (exp_connect_som(exp) && !epoch_close &&
208 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
209 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
211 md_clear_open_replay_data(md_exp, och);
212 /* Free @och if it is not waiting for DONE_WRITING. */
213 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
216 if (req) /* This is close request */
217 ptlrpc_req_finished(req);
221 int ll_md_real_close(struct inode *inode, fmode_t fmode)
223 struct ll_inode_info *lli = ll_i2info(inode);
224 struct obd_client_handle **och_p;
225 struct obd_client_handle *och;
230 if (fmode & FMODE_WRITE) {
231 och_p = &lli->lli_mds_write_och;
232 och_usecount = &lli->lli_open_fd_write_count;
233 } else if (fmode & FMODE_EXEC) {
234 och_p = &lli->lli_mds_exec_och;
235 och_usecount = &lli->lli_open_fd_exec_count;
237 LASSERT(fmode & FMODE_READ);
238 och_p = &lli->lli_mds_read_och;
239 och_usecount = &lli->lli_open_fd_read_count;
242 mutex_lock(&lli->lli_och_mutex);
243 if (*och_usecount > 0) {
244 /* There are still users of this handle, so skip
246 mutex_unlock(&lli->lli_och_mutex);
252 mutex_unlock(&lli->lli_och_mutex);
255 /* There might be a race and this handle may already
257 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
264 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
267 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
268 struct ll_inode_info *lli = ll_i2info(inode);
272 /* clear group lock, if present */
273 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
274 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
276 if (fd->fd_lease_och != NULL) {
279 /* Usually the lease is not released when the
280 * application crashed, we need to release here. */
281 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
282 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
283 PFID(&lli->lli_fid), rc, lease_broken);
285 fd->fd_lease_och = NULL;
288 if (fd->fd_och != NULL) {
289 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
294 /* Let's see if we have good enough OPEN lock on the file and if
295 we can skip talking to MDS */
296 if (file->f_dentry->d_inode) { /* Can this ever be false? */
298 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
299 struct lustre_handle lockh;
300 struct inode *inode = file->f_dentry->d_inode;
301 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
303 mutex_lock(&lli->lli_och_mutex);
304 if (fd->fd_omode & FMODE_WRITE) {
306 LASSERT(lli->lli_open_fd_write_count);
307 lli->lli_open_fd_write_count--;
308 } else if (fd->fd_omode & FMODE_EXEC) {
310 LASSERT(lli->lli_open_fd_exec_count);
311 lli->lli_open_fd_exec_count--;
314 LASSERT(lli->lli_open_fd_read_count);
315 lli->lli_open_fd_read_count--;
317 mutex_unlock(&lli->lli_och_mutex);
319 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
320 LDLM_IBITS, &policy, lockmode,
322 rc = ll_md_real_close(file->f_dentry->d_inode,
326 CERROR("Releasing a file %p with negative dentry %p. Name %s",
327 file, file->f_dentry, file->f_dentry->d_name.name);
331 LUSTRE_FPRIVATE(file) = NULL;
332 ll_file_data_put(fd);
333 ll_capa_close(inode);
338 /* While this returns an error code, fput() the caller does not, so we need
339 * to make every effort to clean up all of our state here. Also, applications
340 * rarely check close errors and even if an error is returned they will not
341 * re-try the close call.
343 int ll_file_release(struct inode *inode, struct file *file)
345 struct ll_file_data *fd;
346 struct ll_sb_info *sbi = ll_i2sbi(inode);
347 struct ll_inode_info *lli = ll_i2info(inode);
351 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
352 PFID(ll_inode2fid(inode)), inode);
354 #ifdef CONFIG_FS_POSIX_ACL
355 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
356 inode == inode->i_sb->s_root->d_inode) {
357 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
360 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
361 fd->fd_flags &= ~LL_FILE_RMTACL;
362 rct_del(&sbi->ll_rct, current_pid());
363 et_search_free(&sbi->ll_et, current_pid());
368 if (inode->i_sb->s_root != file->f_dentry)
369 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
370 fd = LUSTRE_FPRIVATE(file);
373 /* The last ref on @file, maybe not the the owner pid of statahead.
374 * Different processes can open the same dir, "ll_opendir_key" means:
375 * it is me that should stop the statahead thread. */
376 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
377 lli->lli_opendir_pid != 0)
378 ll_stop_statahead(inode, lli->lli_opendir_key);
380 if (inode->i_sb->s_root == file->f_dentry) {
381 LUSTRE_FPRIVATE(file) = NULL;
382 ll_file_data_put(fd);
386 if (!S_ISDIR(inode->i_mode)) {
387 lov_read_and_clear_async_rc(lli->lli_clob);
388 lli->lli_async_rc = 0;
391 rc = ll_md_close(sbi->ll_md_exp, inode, file);
393 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
394 libcfs_debug_dumplog();
399 static int ll_intent_file_open(struct file *file, void *lmm,
400 int lmmsize, struct lookup_intent *itp)
402 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
403 struct dentry *parent = file->f_dentry->d_parent;
404 const char *name = file->f_dentry->d_name.name;
405 const int len = file->f_dentry->d_name.len;
406 struct md_op_data *op_data;
407 struct ptlrpc_request *req;
408 __u32 opc = LUSTRE_OPC_ANY;
415 /* Usually we come here only for NFSD, and we want open lock.
416 But we can also get here with pre 2.6.15 patchless kernels, and in
417 that case that lock is also ok */
418 /* We can also get here if there was cached open handle in revalidate_it
419 * but it disappeared while we were getting from there to ll_file_open.
420 * But this means this file was closed and immediatelly opened which
421 * makes a good candidate for using OPEN lock */
422 /* If lmmsize & lmm are not 0, we are just setting stripe info
423 * parameters. No need for the open lock */
424 if (lmm == NULL && lmmsize == 0) {
425 itp->it_flags |= MDS_OPEN_LOCK;
426 if (itp->it_flags & FMODE_WRITE)
427 opc = LUSTRE_OPC_CREATE;
430 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
431 file->f_dentry->d_inode, name, len,
434 RETURN(PTR_ERR(op_data));
436 itp->it_flags |= MDS_OPEN_BY_FID;
437 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
438 0 /*unused */, &req, ll_md_blocking_ast, 0);
439 ll_finish_md_op_data(op_data);
441 /* reason for keep own exit path - don`t flood log
442 * with messages with -ESTALE errors.
444 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
445 it_open_error(DISP_OPEN_OPEN, itp))
447 ll_release_openhandle(file->f_dentry, itp);
451 if (it_disposition(itp, DISP_LOOKUP_NEG))
452 GOTO(out, rc = -ENOENT);
454 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
455 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
456 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
460 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
461 if (!rc && itp->d.lustre.it_lock_mode)
462 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
466 ptlrpc_req_finished(req);
467 ll_intent_drop_lock(itp);
473 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
474 * not believe attributes if a few ioepoch holders exist. Attributes for
475 * previous ioepoch if new one is opened are also skipped by MDS.
477 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
479 if (ioepoch && lli->lli_ioepoch != ioepoch) {
480 lli->lli_ioepoch = ioepoch;
481 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
482 ioepoch, PFID(&lli->lli_fid));
486 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
487 struct obd_client_handle *och)
489 struct ptlrpc_request *req = it->d.lustre.it_data;
490 struct mdt_body *body;
492 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
493 och->och_fh = body->handle;
494 och->och_fid = body->fid1;
495 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
496 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
497 och->och_flags = it->it_flags;
499 return md_set_open_replay_data(md_exp, och, it);
502 int ll_local_open(struct file *file, struct lookup_intent *it,
503 struct ll_file_data *fd, struct obd_client_handle *och)
505 struct inode *inode = file->f_dentry->d_inode;
506 struct ll_inode_info *lli = ll_i2info(inode);
509 LASSERT(!LUSTRE_FPRIVATE(file));
514 struct ptlrpc_request *req = it->d.lustre.it_data;
515 struct mdt_body *body;
518 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
522 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
523 ll_ioepoch_open(lli, body->ioepoch);
526 LUSTRE_FPRIVATE(file) = fd;
527 ll_readahead_init(inode, &fd->fd_ras);
528 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
533 /* Open a file, and (for the very first open) create objects on the OSTs at
534 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
535 * creation or open until ll_lov_setstripe() ioctl is called.
537 * If we already have the stripe MD locally then we don't request it in
538 * md_open(), by passing a lmm_size = 0.
540 * It is up to the application to ensure no other processes open this file
541 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
542 * used. We might be able to avoid races of that sort by getting lli_open_sem
543 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
544 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
546 int ll_file_open(struct inode *inode, struct file *file)
548 struct ll_inode_info *lli = ll_i2info(inode);
549 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
550 .it_flags = file->f_flags };
551 struct obd_client_handle **och_p = NULL;
552 __u64 *och_usecount = NULL;
553 struct ll_file_data *fd;
554 int rc = 0, opendir_set = 0;
557 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
558 PFID(ll_inode2fid(inode)), inode, file->f_flags);
560 it = file->private_data; /* XXX: compat macro */
561 file->private_data = NULL; /* prevent ll_local_open assertion */
563 fd = ll_file_data_get();
565 GOTO(out_openerr, rc = -ENOMEM);
568 if (S_ISDIR(inode->i_mode)) {
569 spin_lock(&lli->lli_sa_lock);
570 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
571 lli->lli_opendir_pid == 0) {
572 lli->lli_opendir_key = fd;
573 lli->lli_opendir_pid = current_pid();
576 spin_unlock(&lli->lli_sa_lock);
579 if (inode->i_sb->s_root == file->f_dentry) {
580 LUSTRE_FPRIVATE(file) = fd;
584 if (!it || !it->d.lustre.it_disposition) {
585 /* Convert f_flags into access mode. We cannot use file->f_mode,
586 * because everything but O_ACCMODE mask was stripped from
588 if ((oit.it_flags + 1) & O_ACCMODE)
590 if (file->f_flags & O_TRUNC)
591 oit.it_flags |= FMODE_WRITE;
593 /* kernel only call f_op->open in dentry_open. filp_open calls
594 * dentry_open after call to open_namei that checks permissions.
595 * Only nfsd_open call dentry_open directly without checking
596 * permissions and because of that this code below is safe. */
597 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
598 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
600 /* We do not want O_EXCL here, presumably we opened the file
601 * already? XXX - NFS implications? */
602 oit.it_flags &= ~O_EXCL;
604 /* bug20584, if "it_flags" contains O_CREAT, the file will be
605 * created if necessary, then "IT_CREAT" should be set to keep
606 * consistent with it */
607 if (oit.it_flags & O_CREAT)
608 oit.it_op |= IT_CREAT;
614 /* Let's see if we have file open on MDS already. */
615 if (it->it_flags & FMODE_WRITE) {
616 och_p = &lli->lli_mds_write_och;
617 och_usecount = &lli->lli_open_fd_write_count;
618 } else if (it->it_flags & FMODE_EXEC) {
619 och_p = &lli->lli_mds_exec_och;
620 och_usecount = &lli->lli_open_fd_exec_count;
622 och_p = &lli->lli_mds_read_och;
623 och_usecount = &lli->lli_open_fd_read_count;
626 mutex_lock(&lli->lli_och_mutex);
627 if (*och_p) { /* Open handle is present */
628 if (it_disposition(it, DISP_OPEN_OPEN)) {
629 /* Well, there's extra open request that we do not need,
630 let's close it somehow. This will decref request. */
631 rc = it_open_error(DISP_OPEN_OPEN, it);
633 mutex_unlock(&lli->lli_och_mutex);
634 GOTO(out_openerr, rc);
637 ll_release_openhandle(file->f_dentry, it);
641 rc = ll_local_open(file, it, fd, NULL);
644 mutex_unlock(&lli->lli_och_mutex);
645 GOTO(out_openerr, rc);
648 LASSERT(*och_usecount == 0);
649 if (!it->d.lustre.it_disposition) {
650 /* We cannot just request lock handle now, new ELC code
651 means that one of other OPEN locks for this file
652 could be cancelled, and since blocking ast handler
653 would attempt to grab och_mutex as well, that would
654 result in a deadlock */
655 mutex_unlock(&lli->lli_och_mutex);
656 it->it_create_mode |= M_CHECK_STALE;
657 rc = ll_intent_file_open(file, NULL, 0, it);
658 it->it_create_mode &= ~M_CHECK_STALE;
660 GOTO(out_openerr, rc);
664 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
666 GOTO(out_och_free, rc = -ENOMEM);
670 /* md_intent_lock() didn't get a request ref if there was an
671 * open error, so don't do cleanup on the request here
673 /* XXX (green): Should not we bail out on any error here, not
674 * just open error? */
675 rc = it_open_error(DISP_OPEN_OPEN, it);
677 GOTO(out_och_free, rc);
679 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
680 "inode %p: disposition %x, status %d\n", inode,
681 it_disposition(it, ~0), it->d.lustre.it_status);
683 rc = ll_local_open(file, it, fd, *och_p);
685 GOTO(out_och_free, rc);
687 mutex_unlock(&lli->lli_och_mutex);
690 /* Must do this outside lli_och_mutex lock to prevent deadlock where
691 different kind of OPEN lock for this same inode gets cancelled
692 by ldlm_cancel_lru */
693 if (!S_ISREG(inode->i_mode))
694 GOTO(out_och_free, rc);
698 if (!lli->lli_has_smd &&
699 (cl_is_lov_delay_create(file->f_flags) ||
700 (file->f_mode & FMODE_WRITE) == 0)) {
701 CDEBUG(D_INODE, "object creation was delayed\n");
702 GOTO(out_och_free, rc);
704 cl_lov_delay_create_clear(&file->f_flags);
705 GOTO(out_och_free, rc);
709 if (och_p && *och_p) {
710 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
711 *och_p = NULL; /* OBD_FREE writes some magic there */
714 mutex_unlock(&lli->lli_och_mutex);
717 if (opendir_set != 0)
718 ll_stop_statahead(inode, lli->lli_opendir_key);
720 ll_file_data_put(fd);
722 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
725 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
726 ptlrpc_req_finished(it->d.lustre.it_data);
727 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
733 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
734 struct ldlm_lock_desc *desc, void *data, int flag)
737 struct lustre_handle lockh;
741 case LDLM_CB_BLOCKING:
742 ldlm_lock2handle(lock, &lockh);
743 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
745 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
749 case LDLM_CB_CANCELING:
757 * Acquire a lease and open the file.
759 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
760 fmode_t fmode, __u64 open_flags)
762 struct lookup_intent it = { .it_op = IT_OPEN };
763 struct ll_sb_info *sbi = ll_i2sbi(inode);
764 struct md_op_data *op_data;
765 struct ptlrpc_request *req;
766 struct lustre_handle old_handle = { 0 };
767 struct obd_client_handle *och = NULL;
772 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
773 RETURN(ERR_PTR(-EINVAL));
776 struct ll_inode_info *lli = ll_i2info(inode);
777 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
778 struct obd_client_handle **och_p;
781 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
782 RETURN(ERR_PTR(-EPERM));
784 /* Get the openhandle of the file */
786 mutex_lock(&lli->lli_och_mutex);
787 if (fd->fd_lease_och != NULL) {
788 mutex_unlock(&lli->lli_och_mutex);
792 if (fd->fd_och == NULL) {
793 if (file->f_mode & FMODE_WRITE) {
794 LASSERT(lli->lli_mds_write_och != NULL);
795 och_p = &lli->lli_mds_write_och;
796 och_usecount = &lli->lli_open_fd_write_count;
798 LASSERT(lli->lli_mds_read_och != NULL);
799 och_p = &lli->lli_mds_read_och;
800 och_usecount = &lli->lli_open_fd_read_count;
802 if (*och_usecount == 1) {
809 mutex_unlock(&lli->lli_och_mutex);
810 if (rc < 0) /* more than 1 opener */
813 LASSERT(fd->fd_och != NULL);
814 old_handle = fd->fd_och->och_fh;
819 RETURN(ERR_PTR(-ENOMEM));
821 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
822 LUSTRE_OPC_ANY, NULL);
824 GOTO(out, rc = PTR_ERR(op_data));
826 /* To tell the MDT this openhandle is from the same owner */
827 op_data->op_handle = old_handle;
829 it.it_flags = fmode | open_flags;
830 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
831 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
832 ll_md_blocking_lease_ast,
833 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
834 * it can be cancelled which may mislead applications that the lease is
836 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
837 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
838 * doesn't deal with openhandle, so normal openhandle will be leaked. */
839 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
840 ll_finish_md_op_data(op_data);
841 ptlrpc_req_finished(req);
843 GOTO(out_release_it, rc);
845 if (it_disposition(&it, DISP_LOOKUP_NEG))
846 GOTO(out_release_it, rc = -ENOENT);
848 rc = it_open_error(DISP_OPEN_OPEN, &it);
850 GOTO(out_release_it, rc);
852 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
853 ll_och_fill(sbi->ll_md_exp, &it, och);
855 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
856 GOTO(out_close, rc = -EOPNOTSUPP);
858 /* already get lease, handle lease lock */
859 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
860 if (it.d.lustre.it_lock_mode == 0 ||
861 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
862 /* open lock must return for lease */
863 CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
864 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
865 it.d.lustre.it_lock_bits);
866 GOTO(out_close, rc = -EPROTO);
869 ll_intent_release(&it);
873 /* Cancel open lock */
874 if (it.d.lustre.it_lock_mode != 0) {
875 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
876 it.d.lustre.it_lock_mode);
877 it.d.lustre.it_lock_mode = 0;
878 och->och_lease_handle.cookie = 0ULL;
880 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
882 CERROR("%s: error closing file "DFID": %d\n",
883 ll_get_fsname(inode->i_sb, NULL, 0),
884 PFID(&ll_i2info(inode)->lli_fid), rc2);
885 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
887 ll_intent_release(&it);
893 EXPORT_SYMBOL(ll_lease_open);
896 * Release lease and close the file.
897 * It will check if the lease has ever broken.
899 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
902 struct ldlm_lock *lock;
903 bool cancelled = true;
907 lock = ldlm_handle2lock(&och->och_lease_handle);
909 lock_res_and_lock(lock);
910 cancelled = ldlm_is_cancel(lock);
911 unlock_res_and_lock(lock);
915 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
916 PFID(&ll_i2info(inode)->lli_fid), cancelled);
919 ldlm_cli_cancel(&och->och_lease_handle, 0);
920 if (lease_broken != NULL)
921 *lease_broken = cancelled;
923 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
927 EXPORT_SYMBOL(ll_lease_close);
929 /* Fills the obdo with the attributes for the lsm */
930 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
931 struct obd_capa *capa, struct obdo *obdo,
932 __u64 ioepoch, int dv_flags)
934 struct ptlrpc_request_set *set;
935 struct obd_info oinfo = { { { 0 } } };
940 LASSERT(lsm != NULL);
944 oinfo.oi_oa->o_oi = lsm->lsm_oi;
945 oinfo.oi_oa->o_mode = S_IFREG;
946 oinfo.oi_oa->o_ioepoch = ioepoch;
947 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
948 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
949 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
950 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
951 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
952 OBD_MD_FLDATAVERSION;
953 oinfo.oi_capa = capa;
954 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
955 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
956 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
957 if (dv_flags & LL_DV_WR_FLUSH)
958 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
961 set = ptlrpc_prep_set();
963 CERROR("can't allocate ptlrpc set\n");
966 rc = obd_getattr_async(exp, &oinfo, set);
968 rc = ptlrpc_set_wait(set);
969 ptlrpc_set_destroy(set);
972 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
973 OBD_MD_FLATIME | OBD_MD_FLMTIME |
974 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
975 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
976 if (dv_flags & LL_DV_WR_FLUSH &&
977 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
978 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
985 * Performs the getattr on the inode and updates its fields.
986 * If @sync != 0, perform the getattr under the server-side lock.
988 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
989 __u64 ioepoch, int sync)
991 struct obd_capa *capa = ll_mdscapa_get(inode);
992 struct lov_stripe_md *lsm;
996 lsm = ccc_inode_lsm_get(inode);
997 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
998 capa, obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
1001 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
1003 obdo_refresh_inode(inode, obdo, obdo->o_valid);
1004 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
1005 " blksize %lu\n", POSTID(oi), i_size_read(inode),
1006 (unsigned long long)inode->i_blocks,
1007 (unsigned long)ll_inode_blksize(inode));
1009 ccc_inode_lsm_put(inode, lsm);
1013 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
1015 struct ll_inode_info *lli = ll_i2info(inode);
1016 struct cl_object *obj = lli->lli_clob;
1017 struct cl_attr *attr = ccc_env_thread_attr(env);
1023 ll_inode_size_lock(inode);
1024 /* merge timestamps the most recently obtained from mds with
1025 timestamps obtained from osts */
1026 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1027 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1028 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1029 inode_init_lvb(inode, &lvb);
1031 cl_object_attr_lock(obj);
1032 rc = cl_object_attr_get(env, obj, attr);
1033 cl_object_attr_unlock(obj);
1036 if (lvb.lvb_atime < attr->cat_atime)
1037 lvb.lvb_atime = attr->cat_atime;
1038 if (lvb.lvb_ctime < attr->cat_ctime)
1039 lvb.lvb_ctime = attr->cat_ctime;
1040 if (lvb.lvb_mtime < attr->cat_mtime)
1041 lvb.lvb_mtime = attr->cat_mtime;
1043 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1044 PFID(&lli->lli_fid), attr->cat_size);
1045 cl_isize_write_nolock(inode, attr->cat_size);
1047 inode->i_blocks = attr->cat_blocks;
1049 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1050 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1051 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1053 ll_inode_size_unlock(inode);
1058 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1061 struct obdo obdo = { 0 };
1064 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1066 st->st_size = obdo.o_size;
1067 st->st_blocks = obdo.o_blocks;
1068 st->st_mtime = obdo.o_mtime;
1069 st->st_atime = obdo.o_atime;
1070 st->st_ctime = obdo.o_ctime;
1075 static bool file_is_noatime(const struct file *file)
1077 const struct vfsmount *mnt = file->f_path.mnt;
1078 const struct inode *inode = file->f_path.dentry->d_inode;
1080 /* Adapted from file_accessed() and touch_atime().*/
1081 if (file->f_flags & O_NOATIME)
1084 if (inode->i_flags & S_NOATIME)
1087 if (IS_NOATIME(inode))
1090 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1093 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1096 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1102 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1104 struct inode *inode = file->f_dentry->d_inode;
1106 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1108 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1109 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1110 file->f_flags & O_DIRECT ||
1113 io->ci_obj = ll_i2info(inode)->lli_clob;
1114 io->ci_lockreq = CILR_MAYBE;
1115 if (ll_file_nolock(file)) {
1116 io->ci_lockreq = CILR_NEVER;
1117 io->ci_no_srvlock = 1;
1118 } else if (file->f_flags & O_APPEND) {
1119 io->ci_lockreq = CILR_MANDATORY;
1122 io->ci_noatime = file_is_noatime(file);
1126 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1127 struct file *file, enum cl_io_type iot,
1128 loff_t *ppos, size_t count)
1130 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1131 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1136 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zd\n",
1137 file->f_dentry->d_name.name, iot, *ppos, count);
1140 io = ccc_env_thread_io(env);
1141 ll_io_init(io, file, iot == CIT_WRITE);
1143 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1144 struct vvp_io *vio = vvp_env_io(env);
1145 struct ccc_io *cio = ccc_env_io(env);
1146 int write_mutex_locked = 0;
1148 cio->cui_fd = LUSTRE_FPRIVATE(file);
1149 vio->cui_io_subtype = args->via_io_subtype;
1151 switch (vio->cui_io_subtype) {
1153 cio->cui_iov = args->u.normal.via_iov;
1154 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1155 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1156 cio->cui_iocb = args->u.normal.via_iocb;
1157 if ((iot == CIT_WRITE) &&
1158 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1159 if (mutex_lock_interruptible(&lli->
1161 GOTO(out, result = -ERESTARTSYS);
1162 write_mutex_locked = 1;
1164 down_read(&lli->lli_trunc_sem);
1167 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1168 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1171 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1172 vio->u.splice.cui_flags = args->u.splice.via_flags;
1175 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1178 result = cl_io_loop(env, io);
1179 if (args->via_io_subtype == IO_NORMAL)
1180 up_read(&lli->lli_trunc_sem);
1181 if (write_mutex_locked)
1182 mutex_unlock(&lli->lli_write_mutex);
1184 /* cl_io_rw_init() handled IO */
1185 result = io->ci_result;
1188 if (io->ci_nob > 0) {
1189 result = io->ci_nob;
1190 *ppos = io->u.ci_wr.wr.crw_pos;
1194 cl_io_fini(env, io);
1195 /* If any bit been read/written (result != 0), we just return
1196 * short read/write instead of restart io. */
1197 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1198 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1199 iot == CIT_READ ? "read" : "write",
1200 file->f_dentry->d_name.name, *ppos, count);
1201 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1205 if (iot == CIT_READ) {
1207 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1208 LPROC_LL_READ_BYTES, result);
1209 } else if (iot == CIT_WRITE) {
1211 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1212 LPROC_LL_WRITE_BYTES, result);
1213 fd->fd_write_failed = false;
1214 } else if (result != -ERESTARTSYS) {
1215 fd->fd_write_failed = true;
1218 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1225 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1227 static int ll_file_get_iov_count(const struct iovec *iov,
1228 unsigned long *nr_segs, size_t *count)
1233 for (seg = 0; seg < *nr_segs; seg++) {
1234 const struct iovec *iv = &iov[seg];
1237 * If any segment has a negative length, or the cumulative
1238 * length ever wraps negative then return -EINVAL.
1241 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1243 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1248 cnt -= iv->iov_len; /* This segment is no good */
1255 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1256 unsigned long nr_segs, loff_t pos)
1259 struct vvp_io_args *args;
1265 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1269 env = cl_env_get(&refcheck);
1271 RETURN(PTR_ERR(env));
1273 args = vvp_env_args(env, IO_NORMAL);
1274 args->u.normal.via_iov = (struct iovec *)iov;
1275 args->u.normal.via_nrsegs = nr_segs;
1276 args->u.normal.via_iocb = iocb;
1278 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1279 &iocb->ki_pos, count);
1280 cl_env_put(env, &refcheck);
1284 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1288 struct iovec *local_iov;
1289 struct kiocb *kiocb;
1294 env = cl_env_get(&refcheck);
1296 RETURN(PTR_ERR(env));
1298 local_iov = &vvp_env_info(env)->vti_local_iov;
1299 kiocb = &vvp_env_info(env)->vti_kiocb;
1300 local_iov->iov_base = (void __user *)buf;
1301 local_iov->iov_len = count;
1302 init_sync_kiocb(kiocb, file);
1303 kiocb->ki_pos = *ppos;
1304 #ifdef HAVE_KIOCB_KI_LEFT
1305 kiocb->ki_left = count;
1307 kiocb->ki_nbytes = count;
1310 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1311 *ppos = kiocb->ki_pos;
1313 cl_env_put(env, &refcheck);
1318 * Write to a file (through the page cache).
1321 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1322 unsigned long nr_segs, loff_t pos)
1325 struct vvp_io_args *args;
1331 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1335 env = cl_env_get(&refcheck);
1337 RETURN(PTR_ERR(env));
1339 args = vvp_env_args(env, IO_NORMAL);
1340 args->u.normal.via_iov = (struct iovec *)iov;
1341 args->u.normal.via_nrsegs = nr_segs;
1342 args->u.normal.via_iocb = iocb;
1344 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1345 &iocb->ki_pos, count);
1346 cl_env_put(env, &refcheck);
1350 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1354 struct iovec *local_iov;
1355 struct kiocb *kiocb;
1360 env = cl_env_get(&refcheck);
1362 RETURN(PTR_ERR(env));
1364 local_iov = &vvp_env_info(env)->vti_local_iov;
1365 kiocb = &vvp_env_info(env)->vti_kiocb;
1366 local_iov->iov_base = (void __user *)buf;
1367 local_iov->iov_len = count;
1368 init_sync_kiocb(kiocb, file);
1369 kiocb->ki_pos = *ppos;
1370 #ifdef HAVE_KIOCB_KI_LEFT
1371 kiocb->ki_left = count;
1373 kiocb->ki_nbytes = count;
1376 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1377 *ppos = kiocb->ki_pos;
1379 cl_env_put(env, &refcheck);
1384 * Send file content (through pagecache) somewhere with helper
1386 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1387 struct pipe_inode_info *pipe, size_t count,
1391 struct vvp_io_args *args;
1396 env = cl_env_get(&refcheck);
1398 RETURN(PTR_ERR(env));
1400 args = vvp_env_args(env, IO_SPLICE);
1401 args->u.splice.via_pipe = pipe;
1402 args->u.splice.via_flags = flags;
1404 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1405 cl_env_put(env, &refcheck);
1409 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1412 struct obd_export *exp = ll_i2dtexp(inode);
1413 struct obd_trans_info oti = { 0 };
1414 struct obdo *oa = NULL;
1417 struct lov_stripe_md *lsm = NULL, *lsm2;
1424 lsm = ccc_inode_lsm_get(inode);
1425 if (!lsm_has_objects(lsm))
1426 GOTO(out, rc = -ENOENT);
1428 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1429 (lsm->lsm_stripe_count));
1431 OBD_ALLOC_LARGE(lsm2, lsm_size);
1433 GOTO(out, rc = -ENOMEM);
1436 oa->o_nlink = ost_idx;
1437 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1438 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1439 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1440 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1441 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1442 memcpy(lsm2, lsm, lsm_size);
1443 ll_inode_size_lock(inode);
1444 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1445 ll_inode_size_unlock(inode);
1447 OBD_FREE_LARGE(lsm2, lsm_size);
1450 ccc_inode_lsm_put(inode, lsm);
1455 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1457 struct ll_recreate_obj ucreat;
1461 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1464 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1468 ostid_set_seq_mdt0(&oi);
1469 ostid_set_id(&oi, ucreat.lrc_id);
1470 RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
1473 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1480 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1483 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1486 fid_to_ostid(&fid, &oi);
1487 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1488 RETURN(ll_lov_recreate(inode, &oi, ost_idx));
1491 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1492 __u64 flags, struct lov_user_md *lum,
1495 struct lov_stripe_md *lsm = NULL;
1496 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1500 lsm = ccc_inode_lsm_get(inode);
1502 ccc_inode_lsm_put(inode, lsm);
1503 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1504 PFID(ll_inode2fid(inode)));
1505 GOTO(out, rc = -EEXIST);
1508 ll_inode_size_lock(inode);
1509 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1511 GOTO(out_unlock, rc);
1512 rc = oit.d.lustre.it_status;
1514 GOTO(out_req_free, rc);
1516 ll_release_openhandle(file->f_dentry, &oit);
1519 ll_inode_size_unlock(inode);
1520 ll_intent_release(&oit);
1521 ccc_inode_lsm_put(inode, lsm);
1523 cl_lov_delay_create_clear(&file->f_flags);
1526 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1530 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1531 struct lov_mds_md **lmmp, int *lmm_size,
1532 struct ptlrpc_request **request)
1534 struct ll_sb_info *sbi = ll_i2sbi(inode);
1535 struct mdt_body *body;
1536 struct lov_mds_md *lmm = NULL;
1537 struct ptlrpc_request *req = NULL;
1538 struct md_op_data *op_data;
1541 rc = ll_get_max_mdsize(sbi, &lmmsize);
1545 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1546 strlen(filename), lmmsize,
1547 LUSTRE_OPC_ANY, NULL);
1548 if (IS_ERR(op_data))
1549 RETURN(PTR_ERR(op_data));
1551 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1552 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1553 ll_finish_md_op_data(op_data);
1555 CDEBUG(D_INFO, "md_getattr_name failed "
1556 "on %s: rc %d\n", filename, rc);
1560 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1561 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1563 lmmsize = body->eadatasize;
1565 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1567 GOTO(out, rc = -ENODATA);
1570 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1571 LASSERT(lmm != NULL);
1573 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1574 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1575 GOTO(out, rc = -EPROTO);
1579 * This is coming from the MDS, so is probably in
1580 * little endian. We convert it to host endian before
1581 * passing it to userspace.
1583 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1586 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1587 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1590 /* if function called for directory - we should
1591 * avoid swab not existent lsm objects */
1592 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1593 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1594 if (S_ISREG(body->mode))
1595 lustre_swab_lov_user_md_objects(
1596 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1598 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1599 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1600 if (S_ISREG(body->mode))
1601 lustre_swab_lov_user_md_objects(
1602 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1609 *lmm_size = lmmsize;
1614 static int ll_lov_setea(struct inode *inode, struct file *file,
1617 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1618 struct lov_user_md *lump;
1619 int lum_size = sizeof(struct lov_user_md) +
1620 sizeof(struct lov_user_ost_data);
1624 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1627 OBD_ALLOC_LARGE(lump, lum_size);
1631 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1632 OBD_FREE_LARGE(lump, lum_size);
1636 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1638 OBD_FREE_LARGE(lump, lum_size);
1642 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1645 struct lov_user_md_v3 lumv3;
1646 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1647 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1648 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1650 __u64 flags = FMODE_WRITE;
1653 /* first try with v1 which is smaller than v3 */
1654 lum_size = sizeof(struct lov_user_md_v1);
1655 if (copy_from_user(lumv1, lumv1p, lum_size))
1658 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1659 lum_size = sizeof(struct lov_user_md_v3);
1660 if (copy_from_user(&lumv3, lumv3p, lum_size))
1664 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1666 struct lov_stripe_md *lsm;
1669 put_user(0, &lumv1p->lmm_stripe_count);
1671 ll_layout_refresh(inode, &gen);
1672 lsm = ccc_inode_lsm_get(inode);
1673 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1674 0, lsm, (void *)arg);
1675 ccc_inode_lsm_put(inode, lsm);
1680 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1682 struct lov_stripe_md *lsm;
1686 lsm = ccc_inode_lsm_get(inode);
1688 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1690 ccc_inode_lsm_put(inode, lsm);
1694 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1696 struct ll_inode_info *lli = ll_i2info(inode);
1697 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1698 struct ccc_grouplock grouplock;
1702 if (ll_file_nolock(file))
1703 RETURN(-EOPNOTSUPP);
1705 spin_lock(&lli->lli_lock);
1706 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1707 CWARN("group lock already existed with gid %lu\n",
1708 fd->fd_grouplock.cg_gid);
1709 spin_unlock(&lli->lli_lock);
1712 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1713 spin_unlock(&lli->lli_lock);
1715 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1716 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1720 spin_lock(&lli->lli_lock);
1721 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1722 spin_unlock(&lli->lli_lock);
1723 CERROR("another thread just won the race\n");
1724 cl_put_grouplock(&grouplock);
1728 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1729 fd->fd_grouplock = grouplock;
1730 spin_unlock(&lli->lli_lock);
1732 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1736 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1738 struct ll_inode_info *lli = ll_i2info(inode);
1739 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1740 struct ccc_grouplock grouplock;
1743 spin_lock(&lli->lli_lock);
1744 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1745 spin_unlock(&lli->lli_lock);
1746 CWARN("no group lock held\n");
1749 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1751 if (fd->fd_grouplock.cg_gid != arg) {
1752 CWARN("group lock %lu doesn't match current id %lu\n",
1753 arg, fd->fd_grouplock.cg_gid);
1754 spin_unlock(&lli->lli_lock);
1758 grouplock = fd->fd_grouplock;
1759 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1760 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1761 spin_unlock(&lli->lli_lock);
1763 cl_put_grouplock(&grouplock);
1764 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1769 * Close inode open handle
1771 * \param dentry [in] dentry which contains the inode
1772 * \param it [in,out] intent which contains open info and result
1775 * \retval <0 failure
1777 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1779 struct inode *inode = dentry->d_inode;
1780 struct obd_client_handle *och;
1786 /* Root ? Do nothing. */
1787 if (dentry->d_inode->i_sb->s_root == dentry)
1790 /* No open handle to close? Move away */
1791 if (!it_disposition(it, DISP_OPEN_OPEN))
1794 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1796 OBD_ALLOC(och, sizeof(*och));
1798 GOTO(out, rc = -ENOMEM);
1800 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1802 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1805 /* this one is in place of ll_file_open */
1806 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1807 ptlrpc_req_finished(it->d.lustre.it_data);
1808 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1814 * Get size for inode for which FIEMAP mapping is requested.
1815 * Make the FIEMAP get_info call and returns the result.
1817 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1820 struct obd_export *exp = ll_i2dtexp(inode);
1821 struct lov_stripe_md *lsm = NULL;
1822 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1823 int vallen = num_bytes;
1827 /* Checks for fiemap flags */
1828 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1829 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1833 /* Check for FIEMAP_FLAG_SYNC */
1834 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1835 rc = filemap_fdatawrite(inode->i_mapping);
1840 lsm = ccc_inode_lsm_get(inode);
1844 /* If the stripe_count > 1 and the application does not understand
1845 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1847 if (lsm->lsm_stripe_count > 1 &&
1848 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1849 GOTO(out, rc = -EOPNOTSUPP);
1851 fm_key.oa.o_oi = lsm->lsm_oi;
1852 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1854 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1855 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1856 /* If filesize is 0, then there would be no objects for mapping */
1857 if (fm_key.oa.o_size == 0) {
1858 fiemap->fm_mapped_extents = 0;
1862 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1864 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1867 CERROR("obd_get_info failed: rc = %d\n", rc);
1870 ccc_inode_lsm_put(inode, lsm);
1874 int ll_fid2path(struct inode *inode, void *arg)
1876 struct obd_export *exp = ll_i2mdexp(inode);
1877 struct getinfo_fid2path *gfout, *gfin;
1881 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1882 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1885 /* Need to get the buflen */
1886 OBD_ALLOC_PTR(gfin);
1889 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1894 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1895 OBD_ALLOC(gfout, outsize);
1896 if (gfout == NULL) {
1900 memcpy(gfout, gfin, sizeof(*gfout));
1903 /* Call mdc_iocontrol */
1904 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1908 if (copy_to_user(arg, gfout, outsize))
1912 OBD_FREE(gfout, outsize);
1916 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1918 struct ll_user_fiemap *fiemap_s;
1919 size_t num_bytes, ret_bytes;
1920 unsigned int extent_count;
1923 /* Get the extent count so we can calculate the size of
1924 * required fiemap buffer */
1925 if (get_user(extent_count,
1926 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1928 num_bytes = sizeof(*fiemap_s) + (extent_count *
1929 sizeof(struct ll_fiemap_extent));
1931 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1932 if (fiemap_s == NULL)
1935 /* get the fiemap value */
1936 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1938 GOTO(error, rc = -EFAULT);
1940 /* If fm_extent_count is non-zero, read the first extent since
1941 * it is used to calculate end_offset and device from previous
1944 if (copy_from_user(&fiemap_s->fm_extents[0],
1945 (char __user *)arg + sizeof(*fiemap_s),
1946 sizeof(struct ll_fiemap_extent)))
1947 GOTO(error, rc = -EFAULT);
1950 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1954 ret_bytes = sizeof(struct ll_user_fiemap);
1956 if (extent_count != 0)
1957 ret_bytes += (fiemap_s->fm_mapped_extents *
1958 sizeof(struct ll_fiemap_extent));
1960 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1964 OBD_FREE_LARGE(fiemap_s, num_bytes);
1969 * Read the data_version for inode.
1971 * This value is computed using stripe object version on OST.
1972 * Version is computed using server side locking.
1974 * @param sync if do sync on the OST side;
1976 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1977 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1979 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1981 struct lov_stripe_md *lsm = NULL;
1982 struct ll_sb_info *sbi = ll_i2sbi(inode);
1983 struct obdo *obdo = NULL;
1987 /* If no stripe, we consider version is 0. */
1988 lsm = ccc_inode_lsm_get(inode);
1989 if (!lsm_has_objects(lsm)) {
1991 CDEBUG(D_INODE, "No object for inode\n");
1995 OBD_ALLOC_PTR(obdo);
1997 GOTO(out, rc = -ENOMEM);
1999 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, flags);
2001 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
2004 *data_version = obdo->o_data_version;
2010 ccc_inode_lsm_put(inode, lsm);
2015 * Trigger a HSM release request for the provided inode.
2017 int ll_hsm_release(struct inode *inode)
2019 struct cl_env_nest nest;
2021 struct obd_client_handle *och = NULL;
2022 __u64 data_version = 0;
2026 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
2027 ll_get_fsname(inode->i_sb, NULL, 0),
2028 PFID(&ll_i2info(inode)->lli_fid));
2030 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
2032 GOTO(out, rc = PTR_ERR(och));
2034 /* Grab latest data_version and [am]time values */
2035 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
2039 env = cl_env_nested_get(&nest);
2041 GOTO(out, rc = PTR_ERR(env));
2043 ll_merge_lvb(env, inode);
2044 cl_env_nested_put(&nest, env);
2046 /* Release the file.
2047 * NB: lease lock handle is released in mdc_hsm_release_pack() because
2048 * we still need it to pack l_remote_handle to MDT. */
2049 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
2055 if (och != NULL && !IS_ERR(och)) /* close the file */
2056 ll_lease_close(och, inode, NULL);
2061 struct ll_swap_stack {
2062 struct iattr ia1, ia2;
2064 struct inode *inode1, *inode2;
2065 bool check_dv1, check_dv2;
2068 static int ll_swap_layouts(struct file *file1, struct file *file2,
2069 struct lustre_swap_layouts *lsl)
2071 struct mdc_swap_layouts msl;
2072 struct md_op_data *op_data;
2075 struct ll_swap_stack *llss = NULL;
2078 OBD_ALLOC_PTR(llss);
2082 llss->inode1 = file1->f_dentry->d_inode;
2083 llss->inode2 = file2->f_dentry->d_inode;
2085 if (!S_ISREG(llss->inode2->i_mode))
2086 GOTO(free, rc = -EINVAL);
2088 if (inode_permission(llss->inode1, MAY_WRITE) ||
2089 inode_permission(llss->inode2, MAY_WRITE))
2090 GOTO(free, rc = -EPERM);
2092 if (llss->inode2->i_sb != llss->inode1->i_sb)
2093 GOTO(free, rc = -EXDEV);
2095 /* we use 2 bool because it is easier to swap than 2 bits */
2096 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2097 llss->check_dv1 = true;
2099 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2100 llss->check_dv2 = true;
2102 /* we cannot use lsl->sl_dvX directly because we may swap them */
2103 llss->dv1 = lsl->sl_dv1;
2104 llss->dv2 = lsl->sl_dv2;
2106 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2107 if (rc == 0) /* same file, done! */
2110 if (rc < 0) { /* sequentialize it */
2111 swap(llss->inode1, llss->inode2);
2113 swap(llss->dv1, llss->dv2);
2114 swap(llss->check_dv1, llss->check_dv2);
2118 if (gid != 0) { /* application asks to flush dirty cache */
2119 rc = ll_get_grouplock(llss->inode1, file1, gid);
2123 rc = ll_get_grouplock(llss->inode2, file2, gid);
2125 ll_put_grouplock(llss->inode1, file1, gid);
2130 /* to be able to restore mtime and atime after swap
2131 * we need to first save them */
2133 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2134 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2135 llss->ia1.ia_atime = llss->inode1->i_atime;
2136 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2137 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2138 llss->ia2.ia_atime = llss->inode2->i_atime;
2139 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2142 /* ultimate check, before swaping the layouts we check if
2143 * dataversion has changed (if requested) */
2144 if (llss->check_dv1) {
2145 rc = ll_data_version(llss->inode1, &dv, 0);
2148 if (dv != llss->dv1)
2149 GOTO(putgl, rc = -EAGAIN);
2152 if (llss->check_dv2) {
2153 rc = ll_data_version(llss->inode2, &dv, 0);
2156 if (dv != llss->dv2)
2157 GOTO(putgl, rc = -EAGAIN);
2160 /* struct md_op_data is used to send the swap args to the mdt
2161 * only flags is missing, so we use struct mdc_swap_layouts
2162 * through the md_op_data->op_data */
2163 /* flags from user space have to be converted before they are send to
2164 * server, no flag is sent today, they are only used on the client */
2167 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2168 0, LUSTRE_OPC_ANY, &msl);
2169 if (IS_ERR(op_data))
2170 GOTO(free, rc = PTR_ERR(op_data));
2172 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2173 sizeof(*op_data), op_data, NULL);
2174 ll_finish_md_op_data(op_data);
2178 ll_put_grouplock(llss->inode2, file2, gid);
2179 ll_put_grouplock(llss->inode1, file1, gid);
2182 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2186 /* clear useless flags */
2187 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2188 llss->ia1.ia_valid &= ~ATTR_MTIME;
2189 llss->ia2.ia_valid &= ~ATTR_MTIME;
2192 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2193 llss->ia1.ia_valid &= ~ATTR_ATIME;
2194 llss->ia2.ia_valid &= ~ATTR_ATIME;
2197 /* update time if requested */
2199 if (llss->ia2.ia_valid != 0) {
2200 mutex_lock(&llss->inode1->i_mutex);
2201 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2202 mutex_unlock(&llss->inode1->i_mutex);
2205 if (llss->ia1.ia_valid != 0) {
2208 mutex_lock(&llss->inode2->i_mutex);
2209 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2210 mutex_unlock(&llss->inode2->i_mutex);
2222 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2224 struct md_op_data *op_data;
2227 /* Non-root users are forbidden to set or clear flags which are
2228 * NOT defined in HSM_USER_MASK. */
2229 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2230 !cfs_capable(CFS_CAP_SYS_ADMIN))
2233 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2234 LUSTRE_OPC_ANY, hss);
2235 if (IS_ERR(op_data))
2236 RETURN(PTR_ERR(op_data));
2238 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2239 sizeof(*op_data), op_data, NULL);
2241 ll_finish_md_op_data(op_data);
2246 static int ll_hsm_import(struct inode *inode, struct file *file,
2247 struct hsm_user_import *hui)
2249 struct hsm_state_set *hss = NULL;
2250 struct iattr *attr = NULL;
2254 if (!S_ISREG(inode->i_mode))
2260 GOTO(out, rc = -ENOMEM);
2262 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2263 hss->hss_archive_id = hui->hui_archive_id;
2264 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2265 rc = ll_hsm_state_set(inode, hss);
2269 OBD_ALLOC_PTR(attr);
2271 GOTO(out, rc = -ENOMEM);
2273 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2274 attr->ia_mode |= S_IFREG;
2275 attr->ia_uid = hui->hui_uid;
2276 attr->ia_gid = hui->hui_gid;
2277 attr->ia_size = hui->hui_size;
2278 attr->ia_mtime.tv_sec = hui->hui_mtime;
2279 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2280 attr->ia_atime.tv_sec = hui->hui_atime;
2281 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2283 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2284 ATTR_UID | ATTR_GID |
2285 ATTR_MTIME | ATTR_MTIME_SET |
2286 ATTR_ATIME | ATTR_ATIME_SET;
2288 rc = ll_setattr_raw(file->f_dentry, attr, true);
2302 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2304 struct inode *inode = file->f_dentry->d_inode;
2305 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2309 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2310 PFID(ll_inode2fid(inode)), inode, cmd);
2311 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2313 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2314 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2318 case LL_IOC_GETFLAGS:
2319 /* Get the current value of the file flags */
2320 return put_user(fd->fd_flags, (int *)arg);
2321 case LL_IOC_SETFLAGS:
2322 case LL_IOC_CLRFLAGS:
2323 /* Set or clear specific file flags */
2324 /* XXX This probably needs checks to ensure the flags are
2325 * not abused, and to handle any flag side effects.
2327 if (get_user(flags, (int *) arg))
2330 if (cmd == LL_IOC_SETFLAGS) {
2331 if ((flags & LL_FILE_IGNORE_LOCK) &&
2332 !(file->f_flags & O_DIRECT)) {
2333 CERROR("%s: unable to disable locking on "
2334 "non-O_DIRECT file\n", current->comm);
2338 fd->fd_flags |= flags;
2340 fd->fd_flags &= ~flags;
2343 case LL_IOC_LOV_SETSTRIPE:
2344 RETURN(ll_lov_setstripe(inode, file, arg));
2345 case LL_IOC_LOV_SETEA:
2346 RETURN(ll_lov_setea(inode, file, arg));
2347 case LL_IOC_LOV_SWAP_LAYOUTS: {
2349 struct lustre_swap_layouts lsl;
2351 if (copy_from_user(&lsl, (char *)arg,
2352 sizeof(struct lustre_swap_layouts)))
2355 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2358 file2 = fget(lsl.sl_fd);
2363 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2364 rc = ll_swap_layouts(file, file2, &lsl);
2368 case LL_IOC_LOV_GETSTRIPE:
2369 RETURN(ll_lov_getstripe(inode, arg));
2370 case LL_IOC_RECREATE_OBJ:
2371 RETURN(ll_lov_recreate_obj(inode, arg));
2372 case LL_IOC_RECREATE_FID:
2373 RETURN(ll_lov_recreate_fid(inode, arg));
2374 case FSFILT_IOC_FIEMAP:
2375 RETURN(ll_ioctl_fiemap(inode, arg));
2376 case FSFILT_IOC_GETFLAGS:
2377 case FSFILT_IOC_SETFLAGS:
2378 RETURN(ll_iocontrol(inode, file, cmd, arg));
2379 case FSFILT_IOC_GETVERSION_OLD:
2380 case FSFILT_IOC_GETVERSION:
2381 RETURN(put_user(inode->i_generation, (int *)arg));
2382 case LL_IOC_GROUP_LOCK:
2383 RETURN(ll_get_grouplock(inode, file, arg));
2384 case LL_IOC_GROUP_UNLOCK:
2385 RETURN(ll_put_grouplock(inode, file, arg));
2386 case IOC_OBD_STATFS:
2387 RETURN(ll_obd_statfs(inode, (void *)arg));
2389 /* We need to special case any other ioctls we want to handle,
2390 * to send them to the MDS/OST as appropriate and to properly
2391 * network encode the arg field.
2392 case FSFILT_IOC_SETVERSION_OLD:
2393 case FSFILT_IOC_SETVERSION:
2395 case LL_IOC_FLUSHCTX:
2396 RETURN(ll_flush_ctx(inode));
2397 case LL_IOC_PATH2FID: {
2398 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2399 sizeof(struct lu_fid)))
2404 case OBD_IOC_FID2PATH:
2405 RETURN(ll_fid2path(inode, (void *)arg));
2406 case LL_IOC_DATA_VERSION: {
2407 struct ioc_data_version idv;
2410 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2413 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2414 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2416 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2422 case LL_IOC_GET_MDTIDX: {
2425 mdtidx = ll_get_mdt_idx(inode);
2429 if (put_user((int)mdtidx, (int*)arg))
2434 case OBD_IOC_GETDTNAME:
2435 case OBD_IOC_GETMDNAME:
2436 RETURN(ll_get_obd_name(inode, cmd, arg));
2437 case LL_IOC_HSM_STATE_GET: {
2438 struct md_op_data *op_data;
2439 struct hsm_user_state *hus;
2446 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2447 LUSTRE_OPC_ANY, hus);
2448 if (IS_ERR(op_data)) {
2450 RETURN(PTR_ERR(op_data));
2453 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2456 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2459 ll_finish_md_op_data(op_data);
2463 case LL_IOC_HSM_STATE_SET: {
2464 struct hsm_state_set *hss;
2471 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2476 rc = ll_hsm_state_set(inode, hss);
2481 case LL_IOC_HSM_ACTION: {
2482 struct md_op_data *op_data;
2483 struct hsm_current_action *hca;
2490 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2491 LUSTRE_OPC_ANY, hca);
2492 if (IS_ERR(op_data)) {
2494 RETURN(PTR_ERR(op_data));
2497 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2500 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2503 ll_finish_md_op_data(op_data);
2507 case LL_IOC_SET_LEASE: {
2508 struct ll_inode_info *lli = ll_i2info(inode);
2509 struct obd_client_handle *och = NULL;
2515 if (!(file->f_mode & FMODE_WRITE))
2520 if (!(file->f_mode & FMODE_READ))
2525 mutex_lock(&lli->lli_och_mutex);
2526 if (fd->fd_lease_och != NULL) {
2527 och = fd->fd_lease_och;
2528 fd->fd_lease_och = NULL;
2530 mutex_unlock(&lli->lli_och_mutex);
2533 mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
2534 rc = ll_lease_close(och, inode, &lease_broken);
2535 if (rc == 0 && lease_broken)
2541 /* return the type of lease or error */
2542 RETURN(rc < 0 ? rc : (int)mode);
2547 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2549 /* apply for lease */
2550 och = ll_lease_open(inode, file, mode, 0);
2552 RETURN(PTR_ERR(och));
2555 mutex_lock(&lli->lli_och_mutex);
2556 if (fd->fd_lease_och == NULL) {
2557 fd->fd_lease_och = och;
2560 mutex_unlock(&lli->lli_och_mutex);
2562 /* impossible now that only excl is supported for now */
2563 ll_lease_close(och, inode, &lease_broken);
2568 case LL_IOC_GET_LEASE: {
2569 struct ll_inode_info *lli = ll_i2info(inode);
2570 struct ldlm_lock *lock = NULL;
2573 mutex_lock(&lli->lli_och_mutex);
2574 if (fd->fd_lease_och != NULL) {
2575 struct obd_client_handle *och = fd->fd_lease_och;
2577 lock = ldlm_handle2lock(&och->och_lease_handle);
2579 lock_res_and_lock(lock);
2580 if (!ldlm_is_cancel(lock))
2581 rc = och->och_flags &
2582 (FMODE_READ | FMODE_WRITE);
2583 unlock_res_and_lock(lock);
2584 LDLM_LOCK_PUT(lock);
2587 mutex_unlock(&lli->lli_och_mutex);
2590 case LL_IOC_HSM_IMPORT: {
2591 struct hsm_user_import *hui;
2597 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2602 rc = ll_hsm_import(inode, file, hui);
2611 ll_iocontrol_call(inode, file, cmd, arg, &err))
2614 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2620 #ifndef HAVE_FILE_LLSEEK_SIZE
2621 static inline loff_t
2622 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2624 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2626 if (offset > maxsize)
2629 if (offset != file->f_pos) {
2630 file->f_pos = offset;
2631 file->f_version = 0;
2637 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2638 loff_t maxsize, loff_t eof)
2640 struct inode *inode = file->f_dentry->d_inode;
2648 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2649 * position-querying operation. Avoid rewriting the "same"
2650 * f_pos value back to the file because a concurrent read(),
2651 * write() or lseek() might have altered it
2656 * f_lock protects against read/modify/write race with other
2657 * SEEK_CURs. Note that parallel writes and reads behave
2660 mutex_lock(&inode->i_mutex);
2661 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2662 mutex_unlock(&inode->i_mutex);
2666 * In the generic case the entire file is data, so as long as
2667 * offset isn't at the end of the file then the offset is data.
2674 * There is a virtual hole at the end of the file, so as long as
2675 * offset isn't i_size or larger, return i_size.
2683 return llseek_execute(file, offset, maxsize);
2687 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2689 struct inode *inode = file->f_dentry->d_inode;
2690 loff_t retval, eof = 0;
2693 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2694 (origin == SEEK_CUR) ? file->f_pos : 0);
2695 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2696 PFID(ll_inode2fid(inode)), inode, retval, retval,
2698 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2700 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2701 retval = ll_glimpse_size(inode);
2704 eof = i_size_read(inode);
2707 retval = ll_generic_file_llseek_size(file, offset, origin,
2708 ll_file_maxbytes(inode), eof);
2712 int ll_flush(struct file *file, fl_owner_t id)
2714 struct inode *inode = file->f_dentry->d_inode;
2715 struct ll_inode_info *lli = ll_i2info(inode);
2716 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2719 LASSERT(!S_ISDIR(inode->i_mode));
2721 /* catch async errors that were recorded back when async writeback
2722 * failed for pages in this mapping. */
2723 rc = lli->lli_async_rc;
2724 lli->lli_async_rc = 0;
2725 err = lov_read_and_clear_async_rc(lli->lli_clob);
2729 /* The application has been told write failure already.
2730 * Do not report failure again. */
2731 if (fd->fd_write_failed)
2733 return rc ? -EIO : 0;
2737 * Called to make sure a portion of file has been written out.
2738 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2740 * Return how many pages have been written.
2742 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2743 enum cl_fsync_mode mode, int ignore_layout)
2745 struct cl_env_nest nest;
2748 struct obd_capa *capa = NULL;
2749 struct cl_fsync_io *fio;
2753 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2754 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2757 env = cl_env_nested_get(&nest);
2759 RETURN(PTR_ERR(env));
2761 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2763 io = ccc_env_thread_io(env);
2764 io->ci_obj = cl_i2info(inode)->lli_clob;
2765 io->ci_ignore_layout = ignore_layout;
2767 /* initialize parameters for sync */
2768 fio = &io->u.ci_fsync;
2769 fio->fi_capa = capa;
2770 fio->fi_start = start;
2772 fio->fi_fid = ll_inode2fid(inode);
2773 fio->fi_mode = mode;
2774 fio->fi_nr_written = 0;
2776 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2777 result = cl_io_loop(env, io);
2779 result = io->ci_result;
2781 result = fio->fi_nr_written;
2782 cl_io_fini(env, io);
2783 cl_env_nested_put(&nest, env);
2791 * When dentry is provided (the 'else' case), *file->f_dentry may be
2792 * null and dentry must be used directly rather than pulled from
2793 * *file->f_dentry as is done otherwise.
2796 #ifdef HAVE_FILE_FSYNC_4ARGS
2797 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2799 struct dentry *dentry = file->f_dentry;
2800 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2801 int ll_fsync(struct file *file, int datasync)
2803 struct dentry *dentry = file->f_dentry;
2805 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2808 struct inode *inode = dentry->d_inode;
2809 struct ll_inode_info *lli = ll_i2info(inode);
2810 struct ptlrpc_request *req;
2811 struct obd_capa *oc;
2815 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2816 PFID(ll_inode2fid(inode)), inode);
2817 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2819 #ifdef HAVE_FILE_FSYNC_4ARGS
2820 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2821 mutex_lock(&inode->i_mutex);
2823 /* fsync's caller has already called _fdata{sync,write}, we want
2824 * that IO to finish before calling the osc and mdc sync methods */
2825 rc = filemap_fdatawait(inode->i_mapping);
2828 /* catch async errors that were recorded back when async writeback
2829 * failed for pages in this mapping. */
2830 if (!S_ISDIR(inode->i_mode)) {
2831 err = lli->lli_async_rc;
2832 lli->lli_async_rc = 0;
2835 err = lov_read_and_clear_async_rc(lli->lli_clob);
2840 oc = ll_mdscapa_get(inode);
2841 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2847 ptlrpc_req_finished(req);
2849 if (datasync && S_ISREG(inode->i_mode)) {
2850 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2852 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2854 if (rc == 0 && err < 0)
2857 fd->fd_write_failed = true;
2859 fd->fd_write_failed = false;
2862 #ifdef HAVE_FILE_FSYNC_4ARGS
2863 mutex_unlock(&inode->i_mutex);
2868 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2870 struct inode *inode = file->f_dentry->d_inode;
2871 struct ll_sb_info *sbi = ll_i2sbi(inode);
2872 struct ldlm_enqueue_info einfo = {
2873 .ei_type = LDLM_FLOCK,
2874 .ei_cb_cp = ldlm_flock_completion_ast,
2875 .ei_cbdata = file_lock,
2877 struct md_op_data *op_data;
2878 struct lustre_handle lockh = {0};
2879 ldlm_policy_data_t flock = {{0}};
2885 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2886 PFID(ll_inode2fid(inode)), file_lock);
2888 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2890 if (file_lock->fl_flags & FL_FLOCK) {
2891 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2892 /* flocks are whole-file locks */
2893 flock.l_flock.end = OFFSET_MAX;
2894 /* For flocks owner is determined by the local file desctiptor*/
2895 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2896 } else if (file_lock->fl_flags & FL_POSIX) {
2897 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2898 flock.l_flock.start = file_lock->fl_start;
2899 flock.l_flock.end = file_lock->fl_end;
2903 flock.l_flock.pid = file_lock->fl_pid;
2905 /* Somewhat ugly workaround for svc lockd.
2906 * lockd installs custom fl_lmops->lm_compare_owner that checks
2907 * for the fl_owner to be the same (which it always is on local node
2908 * I guess between lockd processes) and then compares pid.
2909 * As such we assign pid to the owner field to make it all work,
2910 * conflict with normal locks is unlikely since pid space and
2911 * pointer space for current->files are not intersecting */
2912 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2913 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2915 switch (file_lock->fl_type) {
2917 einfo.ei_mode = LCK_PR;
2920 /* An unlock request may or may not have any relation to
2921 * existing locks so we may not be able to pass a lock handle
2922 * via a normal ldlm_lock_cancel() request. The request may even
2923 * unlock a byte range in the middle of an existing lock. In
2924 * order to process an unlock request we need all of the same
2925 * information that is given with a normal read or write record
2926 * lock request. To avoid creating another ldlm unlock (cancel)
2927 * message we'll treat a LCK_NL flock request as an unlock. */
2928 einfo.ei_mode = LCK_NL;
2931 einfo.ei_mode = LCK_PW;
2934 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2935 file_lock->fl_type);
2950 flags = LDLM_FL_BLOCK_NOWAIT;
2956 flags = LDLM_FL_TEST_LOCK;
2957 /* Save the old mode so that if the mode in the lock changes we
2958 * can decrement the appropriate reader or writer refcount. */
2959 file_lock->fl_type = einfo.ei_mode;
2962 CERROR("unknown fcntl lock command: %d\n", cmd);
2966 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2967 LUSTRE_OPC_ANY, NULL);
2968 if (IS_ERR(op_data))
2969 RETURN(PTR_ERR(op_data));
2971 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2972 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2973 flock.l_flock.pid, flags, einfo.ei_mode,
2974 flock.l_flock.start, flock.l_flock.end);
2976 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2977 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2979 if ((file_lock->fl_flags & FL_FLOCK) &&
2980 (rc == 0 || file_lock->fl_type == F_UNLCK))
2981 rc2 = flock_lock_file_wait(file, file_lock);
2982 if ((file_lock->fl_flags & FL_POSIX) &&
2983 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2984 !(flags & LDLM_FL_TEST_LOCK))
2985 rc2 = posix_lock_file_wait(file, file_lock);
2987 if (rc2 && file_lock->fl_type != F_UNLCK) {
2988 einfo.ei_mode = LCK_NL;
2989 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2990 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2994 ll_finish_md_op_data(op_data);
2999 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3007 * test if some locks matching bits and l_req_mode are acquired
3008 * - bits can be in different locks
3009 * - if found clear the common lock bits in *bits
3010 * - the bits not found, are kept in *bits
3012 * \param bits [IN] searched lock bits [IN]
3013 * \param l_req_mode [IN] searched lock mode
3014 * \retval boolean, true iff all bits are found
3016 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3018 struct lustre_handle lockh;
3019 ldlm_policy_data_t policy;
3020 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3021 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3030 fid = &ll_i2info(inode)->lli_fid;
3031 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3032 ldlm_lockname[mode]);
3034 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3035 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3036 policy.l_inodebits.bits = *bits & (1 << i);
3037 if (policy.l_inodebits.bits == 0)
3040 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3041 &policy, mode, &lockh)) {
3042 struct ldlm_lock *lock;
3044 lock = ldlm_handle2lock(&lockh);
3047 ~(lock->l_policy_data.l_inodebits.bits);
3048 LDLM_LOCK_PUT(lock);
3050 *bits &= ~policy.l_inodebits.bits;
3057 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3058 struct lustre_handle *lockh, __u64 flags,
3061 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3066 fid = &ll_i2info(inode)->lli_fid;
3067 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3069 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3070 fid, LDLM_IBITS, &policy, mode, lockh);
3075 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3077 /* Already unlinked. Just update nlink and return success */
3078 if (rc == -ENOENT) {
3080 /* This path cannot be hit for regular files unless in
3081 * case of obscure races, so no need to to validate
3083 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3085 } else if (rc != 0) {
3086 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
3087 ll_get_fsname(inode->i_sb, NULL, 0),
3088 PFID(ll_inode2fid(inode)), rc);
3094 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3097 struct inode *inode = dentry->d_inode;
3098 struct ptlrpc_request *req = NULL;
3099 struct obd_export *exp;
3103 LASSERT(inode != NULL);
3105 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3106 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3108 exp = ll_i2mdexp(inode);
3110 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3111 * But under CMD case, it caused some lock issues, should be fixed
3112 * with new CMD ibits lock. See bug 12718 */
3113 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3114 struct lookup_intent oit = { .it_op = IT_GETATTR };
3115 struct md_op_data *op_data;
3117 if (ibits == MDS_INODELOCK_LOOKUP)
3118 oit.it_op = IT_LOOKUP;
3120 /* Call getattr by fid, so do not provide name at all. */
3121 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3122 dentry->d_inode, NULL, 0, 0,
3123 LUSTRE_OPC_ANY, NULL);
3124 if (IS_ERR(op_data))
3125 RETURN(PTR_ERR(op_data));
3127 oit.it_create_mode |= M_CHECK_STALE;
3128 rc = md_intent_lock(exp, op_data, NULL, 0,
3129 /* we are not interested in name
3132 ll_md_blocking_ast, 0);
3133 ll_finish_md_op_data(op_data);
3134 oit.it_create_mode &= ~M_CHECK_STALE;
3136 rc = ll_inode_revalidate_fini(inode, rc);
3140 rc = ll_revalidate_it_finish(req, &oit, dentry);
3142 ll_intent_release(&oit);
3146 /* Unlinked? Unhash dentry, so it is not picked up later by
3147 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3148 here to preserve get_cwd functionality on 2.6.
3150 if (!dentry->d_inode->i_nlink)
3151 d_lustre_invalidate(dentry, 0);
3153 ll_lookup_finish_locks(&oit, dentry);
3154 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3155 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3156 obd_valid valid = OBD_MD_FLGETATTR;
3157 struct md_op_data *op_data;
3160 if (S_ISREG(inode->i_mode)) {
3161 rc = ll_get_max_mdsize(sbi, &ealen);
3164 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3167 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3168 0, ealen, LUSTRE_OPC_ANY,
3170 if (IS_ERR(op_data))
3171 RETURN(PTR_ERR(op_data));
3173 op_data->op_valid = valid;
3174 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3175 * capa for this inode. Because we only keep capas of dirs
3177 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3178 ll_finish_md_op_data(op_data);
3180 rc = ll_inode_revalidate_fini(inode, rc);
3184 rc = ll_prep_inode(&inode, req, NULL, NULL);
3187 ptlrpc_req_finished(req);
3191 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3194 struct inode *inode = dentry->d_inode;
3198 rc = __ll_inode_revalidate_it(dentry, it, ibits);
3202 /* if object isn't regular file, don't validate size */
3203 if (!S_ISREG(inode->i_mode)) {
3204 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3205 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3206 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3208 /* In case of restore, the MDT has the right size and has
3209 * already send it back without granting the layout lock,
3210 * inode is up-to-date so glimpse is useless.
3211 * Also to glimpse we need the layout, in case of a running
3212 * restore the MDT holds the layout lock so the glimpse will
3213 * block up to the end of restore (getattr will block)
3215 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3216 rc = ll_glimpse_size(inode);
3221 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
3222 struct lookup_intent *it, struct kstat *stat)
3224 struct inode *inode = de->d_inode;
3225 struct ll_sb_info *sbi = ll_i2sbi(inode);
3226 struct ll_inode_info *lli = ll_i2info(inode);
3229 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3230 MDS_INODELOCK_LOOKUP);
3231 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3236 stat->dev = inode->i_sb->s_dev;
3237 if (ll_need_32bit_api(sbi))
3238 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3240 stat->ino = inode->i_ino;
3241 stat->mode = inode->i_mode;
3242 stat->nlink = inode->i_nlink;
3243 stat->uid = inode->i_uid;
3244 stat->gid = inode->i_gid;
3245 stat->rdev = inode->i_rdev;
3246 stat->atime = inode->i_atime;
3247 stat->mtime = inode->i_mtime;
3248 stat->ctime = inode->i_ctime;
3249 stat->blksize = 1 << inode->i_blkbits;
3251 stat->size = i_size_read(inode);
3252 stat->blocks = inode->i_blocks;
3256 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3258 struct lookup_intent it = { .it_op = IT_GETATTR };
3260 return ll_getattr_it(mnt, de, &it, stat);
3263 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3264 __u64 start, __u64 len)
3268 struct ll_user_fiemap *fiemap;
3269 unsigned int extent_count = fieinfo->fi_extents_max;
3271 num_bytes = sizeof(*fiemap) + (extent_count *
3272 sizeof(struct ll_fiemap_extent));
3273 OBD_ALLOC_LARGE(fiemap, num_bytes);
3278 fiemap->fm_flags = fieinfo->fi_flags;
3279 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3280 fiemap->fm_start = start;
3281 fiemap->fm_length = len;
3282 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3283 sizeof(struct ll_fiemap_extent));
3285 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3287 fieinfo->fi_flags = fiemap->fm_flags;
3288 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3289 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3290 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3292 OBD_FREE_LARGE(fiemap, num_bytes);
3296 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3298 struct ll_inode_info *lli = ll_i2info(inode);
3299 struct posix_acl *acl = NULL;
3302 spin_lock(&lli->lli_lock);
3303 /* VFS' acl_permission_check->check_acl will release the refcount */
3304 acl = posix_acl_dup(lli->lli_posix_acl);
3305 spin_unlock(&lli->lli_lock);
3310 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3312 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3313 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3315 ll_check_acl(struct inode *inode, int mask)
3318 # ifdef CONFIG_FS_POSIX_ACL
3319 struct posix_acl *acl;
3323 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3324 if (flags & IPERM_FLAG_RCU)
3327 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3332 rc = posix_acl_permission(inode, acl, mask);
3333 posix_acl_release(acl);
3336 # else /* !CONFIG_FS_POSIX_ACL */
3338 # endif /* CONFIG_FS_POSIX_ACL */
3340 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3342 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3343 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3345 # ifdef HAVE_INODE_PERMISION_2ARGS
3346 int ll_inode_permission(struct inode *inode, int mask)
3348 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3355 #ifdef MAY_NOT_BLOCK
3356 if (mask & MAY_NOT_BLOCK)
3358 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3359 if (flags & IPERM_FLAG_RCU)
3363 /* as root inode are NOT getting validated in lookup operation,
3364 * need to do it before permission check. */
3366 if (inode == inode->i_sb->s_root->d_inode) {
3367 struct lookup_intent it = { .it_op = IT_LOOKUP };
3369 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3370 MDS_INODELOCK_LOOKUP);
3375 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3376 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3378 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3379 return lustre_check_remote_perm(inode, mask);
3381 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3382 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3387 /* -o localflock - only provides locally consistent flock locks */
3388 struct file_operations ll_file_operations = {
3389 .read = ll_file_read,
3390 .aio_read = ll_file_aio_read,
3391 .write = ll_file_write,
3392 .aio_write = ll_file_aio_write,
3393 .unlocked_ioctl = ll_file_ioctl,
3394 .open = ll_file_open,
3395 .release = ll_file_release,
3396 .mmap = ll_file_mmap,
3397 .llseek = ll_file_seek,
3398 .splice_read = ll_file_splice_read,
3403 struct file_operations ll_file_operations_flock = {
3404 .read = ll_file_read,
3405 .aio_read = ll_file_aio_read,
3406 .write = ll_file_write,
3407 .aio_write = ll_file_aio_write,
3408 .unlocked_ioctl = ll_file_ioctl,
3409 .open = ll_file_open,
3410 .release = ll_file_release,
3411 .mmap = ll_file_mmap,
3412 .llseek = ll_file_seek,
3413 .splice_read = ll_file_splice_read,
3416 .flock = ll_file_flock,
3417 .lock = ll_file_flock
3420 /* These are for -o noflock - to return ENOSYS on flock calls */
3421 struct file_operations ll_file_operations_noflock = {
3422 .read = ll_file_read,
3423 .aio_read = ll_file_aio_read,
3424 .write = ll_file_write,
3425 .aio_write = ll_file_aio_write,
3426 .unlocked_ioctl = ll_file_ioctl,
3427 .open = ll_file_open,
3428 .release = ll_file_release,
3429 .mmap = ll_file_mmap,
3430 .llseek = ll_file_seek,
3431 .splice_read = ll_file_splice_read,
3434 .flock = ll_file_noflock,
3435 .lock = ll_file_noflock
3438 struct inode_operations ll_file_inode_operations = {
3439 .setattr = ll_setattr,
3440 .getattr = ll_getattr,
3441 .permission = ll_inode_permission,
3442 .setxattr = ll_setxattr,
3443 .getxattr = ll_getxattr,
3444 .listxattr = ll_listxattr,
3445 .removexattr = ll_removexattr,
3446 .fiemap = ll_fiemap,
3447 #ifdef HAVE_IOP_GET_ACL
3448 .get_acl = ll_get_acl,
3452 /* dynamic ioctl number support routins */
3453 static struct llioc_ctl_data {
3454 struct rw_semaphore ioc_sem;
3455 cfs_list_t ioc_head;
3457 __RWSEM_INITIALIZER(llioc.ioc_sem),
3458 CFS_LIST_HEAD_INIT(llioc.ioc_head)
3463 cfs_list_t iocd_list;
3464 unsigned int iocd_size;
3465 llioc_callback_t iocd_cb;
3466 unsigned int iocd_count;
3467 unsigned int iocd_cmd[0];
3470 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3473 struct llioc_data *in_data = NULL;
3476 if (cb == NULL || cmd == NULL ||
3477 count > LLIOC_MAX_CMD || count < 0)
3480 size = sizeof(*in_data) + count * sizeof(unsigned int);
3481 OBD_ALLOC(in_data, size);
3482 if (in_data == NULL)
3485 memset(in_data, 0, sizeof(*in_data));
3486 in_data->iocd_size = size;
3487 in_data->iocd_cb = cb;
3488 in_data->iocd_count = count;
3489 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3491 down_write(&llioc.ioc_sem);
3492 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3493 up_write(&llioc.ioc_sem);
3498 void ll_iocontrol_unregister(void *magic)
3500 struct llioc_data *tmp;
3505 down_write(&llioc.ioc_sem);
3506 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3508 unsigned int size = tmp->iocd_size;
3510 cfs_list_del(&tmp->iocd_list);
3511 up_write(&llioc.ioc_sem);
3513 OBD_FREE(tmp, size);
3517 up_write(&llioc.ioc_sem);
3519 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3522 EXPORT_SYMBOL(ll_iocontrol_register);
3523 EXPORT_SYMBOL(ll_iocontrol_unregister);
3525 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3526 unsigned int cmd, unsigned long arg, int *rcp)
3528 enum llioc_iter ret = LLIOC_CONT;
3529 struct llioc_data *data;
3530 int rc = -EINVAL, i;
3532 down_read(&llioc.ioc_sem);
3533 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3534 for (i = 0; i < data->iocd_count; i++) {
3535 if (cmd != data->iocd_cmd[i])
3538 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3542 if (ret == LLIOC_STOP)
3545 up_read(&llioc.ioc_sem);
3552 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3554 struct ll_inode_info *lli = ll_i2info(inode);
3555 struct cl_env_nest nest;
3560 if (lli->lli_clob == NULL)
3563 env = cl_env_nested_get(&nest);
3565 RETURN(PTR_ERR(env));
3567 result = cl_conf_set(env, lli->lli_clob, conf);
3568 cl_env_nested_put(&nest, env);
3570 if (conf->coc_opc == OBJECT_CONF_SET) {
3571 struct ldlm_lock *lock = conf->coc_lock;
3573 LASSERT(lock != NULL);
3574 LASSERT(ldlm_has_layout(lock));
3576 /* it can only be allowed to match after layout is
3577 * applied to inode otherwise false layout would be
3578 * seen. Applying layout shoud happen before dropping
3579 * the intent lock. */
3580 ldlm_lock_allow_match(lock);
3586 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3587 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3590 struct ll_sb_info *sbi = ll_i2sbi(inode);
3591 struct obd_capa *oc;
3592 struct ptlrpc_request *req;
3593 struct mdt_body *body;
3600 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3601 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3602 lock->l_lvb_data, lock->l_lvb_len);
3604 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3607 /* if layout lock was granted right away, the layout is returned
3608 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3609 * blocked and then granted via completion ast, we have to fetch
3610 * layout here. Please note that we can't use the LVB buffer in
3611 * completion AST because it doesn't have a large enough buffer */
3612 oc = ll_mdscapa_get(inode);
3613 rc = ll_get_max_mdsize(sbi, &lmmsize);
3615 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3616 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3622 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3623 if (body == NULL || body->eadatasize > lmmsize)
3624 GOTO(out, rc = -EPROTO);
3626 lmmsize = body->eadatasize;
3627 if (lmmsize == 0) /* empty layout */
3630 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3632 GOTO(out, rc = -EFAULT);
3634 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3635 if (lvbdata == NULL)
3636 GOTO(out, rc = -ENOMEM);
3638 memcpy(lvbdata, lmm, lmmsize);
3639 lock_res_and_lock(lock);
3640 if (lock->l_lvb_data != NULL)
3641 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3643 lock->l_lvb_data = lvbdata;
3644 lock->l_lvb_len = lmmsize;
3645 unlock_res_and_lock(lock);
3650 ptlrpc_req_finished(req);
3655 * Apply the layout to the inode. Layout lock is held and will be released
3658 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3659 struct inode *inode, __u32 *gen, bool reconf)
3661 struct ll_inode_info *lli = ll_i2info(inode);
3662 struct ll_sb_info *sbi = ll_i2sbi(inode);
3663 struct ldlm_lock *lock;
3664 struct lustre_md md = { NULL };
3665 struct cl_object_conf conf;
3668 bool wait_layout = false;
3671 LASSERT(lustre_handle_is_used(lockh));
3673 lock = ldlm_handle2lock(lockh);
3674 LASSERT(lock != NULL);
3675 LASSERT(ldlm_has_layout(lock));
3677 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d\n",
3678 PFID(&lli->lli_fid), inode, reconf);
3680 /* in case this is a caching lock and reinstate with new inode */
3681 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3683 lock_res_and_lock(lock);
3684 lvb_ready = ldlm_is_lvb_ready(lock);
3685 unlock_res_and_lock(lock);
3686 /* checking lvb_ready is racy but this is okay. The worst case is
3687 * that multi processes may configure the file on the same time. */
3689 if (lvb_ready || !reconf) {
3692 /* layout_gen must be valid if layout lock is not
3693 * cancelled and stripe has already set */
3694 *gen = lli->lli_layout_gen;
3700 rc = ll_layout_fetch(inode, lock);
3704 /* for layout lock, lmm is returned in lock's lvb.
3705 * lvb_data is immutable if the lock is held so it's safe to access it
3706 * without res lock. See the description in ldlm_lock_decref_internal()
3707 * for the condition to free lvb_data of layout lock */
3708 if (lock->l_lvb_data != NULL) {
3709 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3710 lock->l_lvb_data, lock->l_lvb_len);
3712 *gen = LL_LAYOUT_GEN_EMPTY;
3714 *gen = md.lsm->lsm_layout_gen;
3717 CERROR("%s: file "DFID" unpackmd error: %d\n",
3718 ll_get_fsname(inode->i_sb, NULL, 0),
3719 PFID(&lli->lli_fid), rc);
3725 /* set layout to file. Unlikely this will fail as old layout was
3726 * surely eliminated */
3727 memset(&conf, 0, sizeof conf);
3728 conf.coc_opc = OBJECT_CONF_SET;
3729 conf.coc_inode = inode;
3730 conf.coc_lock = lock;
3731 conf.u.coc_md = &md;
3732 rc = ll_layout_conf(inode, &conf);
3735 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3737 /* refresh layout failed, need to wait */
3738 wait_layout = rc == -EBUSY;
3742 LDLM_LOCK_PUT(lock);
3743 ldlm_lock_decref(lockh, mode);
3745 /* wait for IO to complete if it's still being used. */
3747 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3748 ll_get_fsname(inode->i_sb, NULL, 0),
3749 PFID(&lli->lli_fid), inode);
3751 memset(&conf, 0, sizeof conf);
3752 conf.coc_opc = OBJECT_CONF_WAIT;
3753 conf.coc_inode = inode;
3754 rc = ll_layout_conf(inode, &conf);
3758 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3759 ll_get_fsname(inode->i_sb, NULL, 0),
3760 PFID(&lli->lli_fid), rc);
3766 * This function checks if there exists a LAYOUT lock on the client side,
3767 * or enqueues it if it doesn't have one in cache.
3769 * This function will not hold layout lock so it may be revoked any time after
3770 * this function returns. Any operations depend on layout should be redone
3773 * This function should be called before lov_io_init() to get an uptodate
3774 * layout version, the caller should save the version number and after IO
3775 * is finished, this function should be called again to verify that layout
3776 * is not changed during IO time.
3778 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3780 struct ll_inode_info *lli = ll_i2info(inode);
3781 struct ll_sb_info *sbi = ll_i2sbi(inode);
3782 struct md_op_data *op_data;
3783 struct lookup_intent it;
3784 struct lustre_handle lockh;
3786 struct ldlm_enqueue_info einfo = {
3787 .ei_type = LDLM_IBITS,
3789 .ei_cb_bl = ll_md_blocking_ast,
3790 .ei_cb_cp = ldlm_completion_ast,
3795 *gen = lli->lli_layout_gen;
3796 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3800 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3801 LASSERT(S_ISREG(inode->i_mode));
3803 /* mostly layout lock is caching on the local side, so try to match
3804 * it before grabbing layout lock mutex. */
3805 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3806 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3807 if (mode != 0) { /* hit cached lock */
3808 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3812 /* better hold lli_layout_mutex to try again otherwise
3813 * it will have starvation problem. */
3816 /* take layout lock mutex to enqueue layout lock exclusively. */
3817 mutex_lock(&lli->lli_layout_mutex);
3820 /* try again. Maybe somebody else has done this. */
3821 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3822 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3823 if (mode != 0) { /* hit cached lock */
3824 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3828 mutex_unlock(&lli->lli_layout_mutex);
3832 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3833 0, 0, LUSTRE_OPC_ANY, NULL);
3834 if (IS_ERR(op_data)) {
3835 mutex_unlock(&lli->lli_layout_mutex);
3836 RETURN(PTR_ERR(op_data));
3839 /* have to enqueue one */
3840 memset(&it, 0, sizeof(it));
3841 it.it_op = IT_LAYOUT;
3842 lockh.cookie = 0ULL;
3844 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)\n",
3845 ll_get_fsname(inode->i_sb, NULL, 0),
3846 PFID(&lli->lli_fid), inode);
3848 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3850 if (it.d.lustre.it_data != NULL)
3851 ptlrpc_req_finished(it.d.lustre.it_data);
3852 it.d.lustre.it_data = NULL;
3854 ll_finish_md_op_data(op_data);
3856 mode = it.d.lustre.it_lock_mode;
3857 it.d.lustre.it_lock_mode = 0;
3858 ll_intent_drop_lock(&it);
3861 /* set lock data in case this is a new lock */
3862 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3863 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3867 mutex_unlock(&lli->lli_layout_mutex);
3873 * This function send a restore request to the MDT
3875 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3877 struct hsm_user_request *hur;
3881 len = sizeof(struct hsm_user_request) +
3882 sizeof(struct hsm_user_item);
3883 OBD_ALLOC(hur, len);
3887 hur->hur_request.hr_action = HUA_RESTORE;
3888 hur->hur_request.hr_archive_id = 0;
3889 hur->hur_request.hr_flags = 0;
3890 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3891 sizeof(hur->hur_user_item[0].hui_fid));
3892 hur->hur_user_item[0].hui_extent.offset = offset;
3893 hur->hur_user_item[0].hui_extent.length = length;
3894 hur->hur_request.hr_itemcount = 1;
3895 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,