4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, __GFP_IO);
61 fd->fd_write_failed = false;
66 static void ll_file_data_put(struct ll_file_data *fd)
69 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
72 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
73 struct lustre_handle *fh)
75 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
76 op_data->op_attr.ia_mode = inode->i_mode;
77 op_data->op_attr.ia_atime = inode->i_atime;
78 op_data->op_attr.ia_mtime = inode->i_mtime;
79 op_data->op_attr.ia_ctime = inode->i_ctime;
80 op_data->op_attr.ia_size = i_size_read(inode);
81 op_data->op_attr_blocks = inode->i_blocks;
82 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
83 ll_inode_to_ext_flags(inode->i_flags);
84 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
86 op_data->op_handle = *fh;
87 op_data->op_capa1 = ll_mdscapa_get(inode);
89 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
90 op_data->op_bias |= MDS_DATA_MODIFIED;
94 * Closes the IO epoch and packs all the attributes into @op_data for
97 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
98 struct obd_client_handle *och)
102 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET |
103 ATTR_MTIME | ATTR_MTIME_SET |
104 ATTR_CTIME | ATTR_CTIME_SET;
106 if (!(och->och_flags & FMODE_WRITE))
109 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
110 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
112 ll_ioepoch_close(inode, op_data, &och, 0);
115 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
116 ll_prep_md_op_data(op_data, inode, NULL, NULL,
117 0, 0, LUSTRE_OPC_ANY, NULL);
121 static int ll_close_inode_openhandle(struct obd_export *md_exp,
123 struct obd_client_handle *och,
124 const __u64 *data_version)
126 struct obd_export *exp = ll_i2mdexp(inode);
127 struct md_op_data *op_data;
128 struct ptlrpc_request *req = NULL;
129 struct obd_device *obd = class_exp2obd(exp);
136 * XXX: in case of LMV, is this correct to access
139 CERROR("Invalid MDC connection handle "LPX64"\n",
140 ll_i2mdexp(inode)->exp_handle.h_cookie);
144 OBD_ALLOC_PTR(op_data);
146 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
148 ll_prepare_close(inode, op_data, och);
149 if (data_version != NULL) {
150 /* Pass in data_version implies release. */
151 op_data->op_bias |= MDS_HSM_RELEASE;
152 op_data->op_data_version = *data_version;
153 op_data->op_lease_handle = och->och_lease_handle;
154 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
156 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
157 rc = md_close(md_exp, op_data, och->och_mod, &req);
159 /* This close must have the epoch closed. */
160 LASSERT(epoch_close);
161 /* MDS has instructed us to obtain Size-on-MDS attribute from
162 * OSTs and send setattr to back to MDS. */
163 rc = ll_som_update(inode, op_data);
165 CERROR("%s: inode "DFID" mdc Size-on-MDS update"
166 " failed: rc = %d\n",
167 ll_i2mdexp(inode)->exp_obd->obd_name,
168 PFID(ll_inode2fid(inode)), rc);
172 CERROR("%s: inode "DFID" mdc close failed: rc = %d\n",
173 ll_i2mdexp(inode)->exp_obd->obd_name,
174 PFID(ll_inode2fid(inode)), rc);
177 /* DATA_MODIFIED flag was successfully sent on close, cancel data
178 * modification flag. */
179 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
180 struct ll_inode_info *lli = ll_i2info(inode);
182 spin_lock(&lli->lli_lock);
183 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
184 spin_unlock(&lli->lli_lock);
188 rc = ll_objects_destroy(req, inode);
190 CERROR("%s: inode "DFID
191 " ll_objects destroy: rc = %d\n",
192 ll_i2mdexp(inode)->exp_obd->obd_name,
193 PFID(ll_inode2fid(inode)), rc);
196 if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
197 struct mdt_body *body;
198 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
199 if (!(body->valid & OBD_MD_FLRELEASED))
203 ll_finish_md_op_data(op_data);
207 if (exp_connect_som(exp) && !epoch_close &&
208 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
209 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
211 md_clear_open_replay_data(md_exp, och);
212 /* Free @och if it is not waiting for DONE_WRITING. */
213 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
216 if (req) /* This is close request */
217 ptlrpc_req_finished(req);
221 int ll_md_real_close(struct inode *inode, fmode_t fmode)
223 struct ll_inode_info *lli = ll_i2info(inode);
224 struct obd_client_handle **och_p;
225 struct obd_client_handle *och;
230 if (fmode & FMODE_WRITE) {
231 och_p = &lli->lli_mds_write_och;
232 och_usecount = &lli->lli_open_fd_write_count;
233 } else if (fmode & FMODE_EXEC) {
234 och_p = &lli->lli_mds_exec_och;
235 och_usecount = &lli->lli_open_fd_exec_count;
237 LASSERT(fmode & FMODE_READ);
238 och_p = &lli->lli_mds_read_och;
239 och_usecount = &lli->lli_open_fd_read_count;
242 mutex_lock(&lli->lli_och_mutex);
243 if (*och_usecount > 0) {
244 /* There are still users of this handle, so skip
246 mutex_unlock(&lli->lli_och_mutex);
252 mutex_unlock(&lli->lli_och_mutex);
255 /* There might be a race and this handle may already
257 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
264 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
267 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
268 struct ll_inode_info *lli = ll_i2info(inode);
272 /* clear group lock, if present */
273 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
274 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
276 if (fd->fd_lease_och != NULL) {
279 /* Usually the lease is not released when the
280 * application crashed, we need to release here. */
281 rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
282 CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
283 PFID(&lli->lli_fid), rc, lease_broken);
285 fd->fd_lease_och = NULL;
288 if (fd->fd_och != NULL) {
289 rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
294 /* Let's see if we have good enough OPEN lock on the file and if
295 we can skip talking to MDS */
296 if (file->f_dentry->d_inode) { /* Can this ever be false? */
298 __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
299 struct lustre_handle lockh;
300 struct inode *inode = file->f_dentry->d_inode;
301 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
303 mutex_lock(&lli->lli_och_mutex);
304 if (fd->fd_omode & FMODE_WRITE) {
306 LASSERT(lli->lli_open_fd_write_count);
307 lli->lli_open_fd_write_count--;
308 } else if (fd->fd_omode & FMODE_EXEC) {
310 LASSERT(lli->lli_open_fd_exec_count);
311 lli->lli_open_fd_exec_count--;
314 LASSERT(lli->lli_open_fd_read_count);
315 lli->lli_open_fd_read_count--;
317 mutex_unlock(&lli->lli_och_mutex);
319 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
320 LDLM_IBITS, &policy, lockmode,
322 rc = ll_md_real_close(file->f_dentry->d_inode,
326 CERROR("Releasing a file %p with negative dentry %p. Name %s",
327 file, file->f_dentry, file->f_dentry->d_name.name);
331 LUSTRE_FPRIVATE(file) = NULL;
332 ll_file_data_put(fd);
333 ll_capa_close(inode);
338 /* While this returns an error code, fput() the caller does not, so we need
339 * to make every effort to clean up all of our state here. Also, applications
340 * rarely check close errors and even if an error is returned they will not
341 * re-try the close call.
343 int ll_file_release(struct inode *inode, struct file *file)
345 struct ll_file_data *fd;
346 struct ll_sb_info *sbi = ll_i2sbi(inode);
347 struct ll_inode_info *lli = ll_i2info(inode);
351 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
352 PFID(ll_inode2fid(inode)), inode);
354 #ifdef CONFIG_FS_POSIX_ACL
355 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
356 inode == inode->i_sb->s_root->d_inode) {
357 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
360 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
361 fd->fd_flags &= ~LL_FILE_RMTACL;
362 rct_del(&sbi->ll_rct, current_pid());
363 et_search_free(&sbi->ll_et, current_pid());
368 if (inode->i_sb->s_root != file->f_dentry)
369 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
370 fd = LUSTRE_FPRIVATE(file);
373 /* The last ref on @file, maybe not the the owner pid of statahead.
374 * Different processes can open the same dir, "ll_opendir_key" means:
375 * it is me that should stop the statahead thread. */
376 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
377 lli->lli_opendir_pid != 0)
378 ll_stop_statahead(inode, lli->lli_opendir_key);
380 if (inode->i_sb->s_root == file->f_dentry) {
381 LUSTRE_FPRIVATE(file) = NULL;
382 ll_file_data_put(fd);
386 if (!S_ISDIR(inode->i_mode)) {
387 lov_read_and_clear_async_rc(lli->lli_clob);
388 lli->lli_async_rc = 0;
391 rc = ll_md_close(sbi->ll_md_exp, inode, file);
393 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
394 libcfs_debug_dumplog();
399 static int ll_intent_file_open(struct file *file, void *lmm,
400 int lmmsize, struct lookup_intent *itp)
402 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
403 struct dentry *parent = file->f_dentry->d_parent;
404 const char *name = file->f_dentry->d_name.name;
405 const int len = file->f_dentry->d_name.len;
406 struct md_op_data *op_data;
407 struct ptlrpc_request *req;
408 __u32 opc = LUSTRE_OPC_ANY;
415 /* Usually we come here only for NFSD, and we want open lock.
416 But we can also get here with pre 2.6.15 patchless kernels, and in
417 that case that lock is also ok */
418 /* We can also get here if there was cached open handle in revalidate_it
419 * but it disappeared while we were getting from there to ll_file_open.
420 * But this means this file was closed and immediatelly opened which
421 * makes a good candidate for using OPEN lock */
422 /* If lmmsize & lmm are not 0, we are just setting stripe info
423 * parameters. No need for the open lock */
424 if (lmm == NULL && lmmsize == 0) {
425 itp->it_flags |= MDS_OPEN_LOCK;
426 if (itp->it_flags & FMODE_WRITE)
427 opc = LUSTRE_OPC_CREATE;
430 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
431 file->f_dentry->d_inode, name, len,
434 RETURN(PTR_ERR(op_data));
436 itp->it_flags |= MDS_OPEN_BY_FID;
437 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
438 0 /*unused */, &req, ll_md_blocking_ast, 0);
439 ll_finish_md_op_data(op_data);
441 /* reason for keep own exit path - don`t flood log
442 * with messages with -ESTALE errors.
444 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
445 it_open_error(DISP_OPEN_OPEN, itp))
447 ll_release_openhandle(file->f_dentry, itp);
451 if (it_disposition(itp, DISP_LOOKUP_NEG))
452 GOTO(out, rc = -ENOENT);
454 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
455 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
456 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
460 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
461 if (!rc && itp->d.lustre.it_lock_mode)
462 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
466 ptlrpc_req_finished(req);
467 ll_intent_drop_lock(itp);
473 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
474 * not believe attributes if a few ioepoch holders exist. Attributes for
475 * previous ioepoch if new one is opened are also skipped by MDS.
477 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
479 if (ioepoch && lli->lli_ioepoch != ioepoch) {
480 lli->lli_ioepoch = ioepoch;
481 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
482 ioepoch, PFID(&lli->lli_fid));
486 static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
487 struct obd_client_handle *och)
489 struct ptlrpc_request *req = it->d.lustre.it_data;
490 struct mdt_body *body;
492 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
493 och->och_fh = body->handle;
494 och->och_fid = body->fid1;
495 och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
496 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
497 och->och_flags = it->it_flags;
499 return md_set_open_replay_data(md_exp, och, it);
502 int ll_local_open(struct file *file, struct lookup_intent *it,
503 struct ll_file_data *fd, struct obd_client_handle *och)
505 struct inode *inode = file->f_dentry->d_inode;
506 struct ll_inode_info *lli = ll_i2info(inode);
509 LASSERT(!LUSTRE_FPRIVATE(file));
514 struct ptlrpc_request *req = it->d.lustre.it_data;
515 struct mdt_body *body;
518 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
522 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
523 ll_ioepoch_open(lli, body->ioepoch);
526 LUSTRE_FPRIVATE(file) = fd;
527 ll_readahead_init(inode, &fd->fd_ras);
528 fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
533 /* Open a file, and (for the very first open) create objects on the OSTs at
534 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
535 * creation or open until ll_lov_setstripe() ioctl is called.
537 * If we already have the stripe MD locally then we don't request it in
538 * md_open(), by passing a lmm_size = 0.
540 * It is up to the application to ensure no other processes open this file
541 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
542 * used. We might be able to avoid races of that sort by getting lli_open_sem
543 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
544 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
546 int ll_file_open(struct inode *inode, struct file *file)
548 struct ll_inode_info *lli = ll_i2info(inode);
549 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
550 .it_flags = file->f_flags };
551 struct obd_client_handle **och_p = NULL;
552 __u64 *och_usecount = NULL;
553 struct ll_file_data *fd;
554 int rc = 0, opendir_set = 0;
557 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
558 PFID(ll_inode2fid(inode)), inode, file->f_flags);
560 it = file->private_data; /* XXX: compat macro */
561 file->private_data = NULL; /* prevent ll_local_open assertion */
563 fd = ll_file_data_get();
565 GOTO(out_openerr, rc = -ENOMEM);
568 if (S_ISDIR(inode->i_mode)) {
569 spin_lock(&lli->lli_sa_lock);
570 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
571 lli->lli_opendir_pid == 0) {
572 lli->lli_opendir_key = fd;
573 lli->lli_opendir_pid = current_pid();
576 spin_unlock(&lli->lli_sa_lock);
579 if (inode->i_sb->s_root == file->f_dentry) {
580 LUSTRE_FPRIVATE(file) = fd;
584 if (!it || !it->d.lustre.it_disposition) {
585 /* Convert f_flags into access mode. We cannot use file->f_mode,
586 * because everything but O_ACCMODE mask was stripped from
588 if ((oit.it_flags + 1) & O_ACCMODE)
590 if (file->f_flags & O_TRUNC)
591 oit.it_flags |= FMODE_WRITE;
593 /* kernel only call f_op->open in dentry_open. filp_open calls
594 * dentry_open after call to open_namei that checks permissions.
595 * Only nfsd_open call dentry_open directly without checking
596 * permissions and because of that this code below is safe. */
597 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
598 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
600 /* We do not want O_EXCL here, presumably we opened the file
601 * already? XXX - NFS implications? */
602 oit.it_flags &= ~O_EXCL;
604 /* bug20584, if "it_flags" contains O_CREAT, the file will be
605 * created if necessary, then "IT_CREAT" should be set to keep
606 * consistent with it */
607 if (oit.it_flags & O_CREAT)
608 oit.it_op |= IT_CREAT;
614 /* Let's see if we have file open on MDS already. */
615 if (it->it_flags & FMODE_WRITE) {
616 och_p = &lli->lli_mds_write_och;
617 och_usecount = &lli->lli_open_fd_write_count;
618 } else if (it->it_flags & FMODE_EXEC) {
619 och_p = &lli->lli_mds_exec_och;
620 och_usecount = &lli->lli_open_fd_exec_count;
622 och_p = &lli->lli_mds_read_och;
623 och_usecount = &lli->lli_open_fd_read_count;
626 mutex_lock(&lli->lli_och_mutex);
627 if (*och_p) { /* Open handle is present */
628 if (it_disposition(it, DISP_OPEN_OPEN)) {
629 /* Well, there's extra open request that we do not need,
630 let's close it somehow. This will decref request. */
631 rc = it_open_error(DISP_OPEN_OPEN, it);
633 mutex_unlock(&lli->lli_och_mutex);
634 GOTO(out_openerr, rc);
637 ll_release_openhandle(file->f_dentry, it);
641 rc = ll_local_open(file, it, fd, NULL);
644 mutex_unlock(&lli->lli_och_mutex);
645 GOTO(out_openerr, rc);
648 LASSERT(*och_usecount == 0);
649 if (!it->d.lustre.it_disposition) {
650 /* We cannot just request lock handle now, new ELC code
651 means that one of other OPEN locks for this file
652 could be cancelled, and since blocking ast handler
653 would attempt to grab och_mutex as well, that would
654 result in a deadlock */
655 mutex_unlock(&lli->lli_och_mutex);
656 it->it_create_mode |= M_CHECK_STALE;
657 rc = ll_intent_file_open(file, NULL, 0, it);
658 it->it_create_mode &= ~M_CHECK_STALE;
660 GOTO(out_openerr, rc);
664 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
666 GOTO(out_och_free, rc = -ENOMEM);
670 /* md_intent_lock() didn't get a request ref if there was an
671 * open error, so don't do cleanup on the request here
673 /* XXX (green): Should not we bail out on any error here, not
674 * just open error? */
675 rc = it_open_error(DISP_OPEN_OPEN, it);
677 GOTO(out_och_free, rc);
679 LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF),
680 "inode %p: disposition %x, status %d\n", inode,
681 it_disposition(it, ~0), it->d.lustre.it_status);
683 rc = ll_local_open(file, it, fd, *och_p);
685 GOTO(out_och_free, rc);
687 mutex_unlock(&lli->lli_och_mutex);
690 /* Must do this outside lli_och_mutex lock to prevent deadlock where
691 different kind of OPEN lock for this same inode gets cancelled
692 by ldlm_cancel_lru */
693 if (!S_ISREG(inode->i_mode))
694 GOTO(out_och_free, rc);
698 if (!lli->lli_has_smd) {
699 if (file->f_flags & O_LOV_DELAY_CREATE ||
700 !(file->f_mode & FMODE_WRITE)) {
701 CDEBUG(D_INODE, "object creation was delayed\n");
702 GOTO(out_och_free, rc);
705 file->f_flags &= ~O_LOV_DELAY_CREATE;
706 GOTO(out_och_free, rc);
710 if (och_p && *och_p) {
711 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
712 *och_p = NULL; /* OBD_FREE writes some magic there */
715 mutex_unlock(&lli->lli_och_mutex);
718 if (opendir_set != 0)
719 ll_stop_statahead(inode, lli->lli_opendir_key);
721 ll_file_data_put(fd);
723 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
726 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
727 ptlrpc_req_finished(it->d.lustre.it_data);
728 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
734 static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
735 struct ldlm_lock_desc *desc, void *data, int flag)
738 struct lustre_handle lockh;
742 case LDLM_CB_BLOCKING:
743 ldlm_lock2handle(lock, &lockh);
744 rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
746 CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
750 case LDLM_CB_CANCELING:
758 * Acquire a lease and open the file.
760 struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
761 fmode_t fmode, __u64 open_flags)
763 struct lookup_intent it = { .it_op = IT_OPEN };
764 struct ll_sb_info *sbi = ll_i2sbi(inode);
765 struct md_op_data *op_data;
766 struct ptlrpc_request *req;
767 struct lustre_handle old_handle = { 0 };
768 struct obd_client_handle *och = NULL;
773 if (fmode != FMODE_WRITE && fmode != FMODE_READ)
774 RETURN(ERR_PTR(-EINVAL));
777 struct ll_inode_info *lli = ll_i2info(inode);
778 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
779 struct obd_client_handle **och_p;
782 if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
783 RETURN(ERR_PTR(-EPERM));
785 /* Get the openhandle of the file */
787 mutex_lock(&lli->lli_och_mutex);
788 if (fd->fd_lease_och != NULL) {
789 mutex_unlock(&lli->lli_och_mutex);
793 if (fd->fd_och == NULL) {
794 if (file->f_mode & FMODE_WRITE) {
795 LASSERT(lli->lli_mds_write_och != NULL);
796 och_p = &lli->lli_mds_write_och;
797 och_usecount = &lli->lli_open_fd_write_count;
799 LASSERT(lli->lli_mds_read_och != NULL);
800 och_p = &lli->lli_mds_read_och;
801 och_usecount = &lli->lli_open_fd_read_count;
803 if (*och_usecount == 1) {
810 mutex_unlock(&lli->lli_och_mutex);
811 if (rc < 0) /* more than 1 opener */
814 LASSERT(fd->fd_och != NULL);
815 old_handle = fd->fd_och->och_fh;
820 RETURN(ERR_PTR(-ENOMEM));
822 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
823 LUSTRE_OPC_ANY, NULL);
825 GOTO(out, rc = PTR_ERR(op_data));
827 /* To tell the MDT this openhandle is from the same owner */
828 op_data->op_handle = old_handle;
830 it.it_flags = fmode | open_flags;
831 it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
832 rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
833 ll_md_blocking_lease_ast,
834 /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
835 * it can be cancelled which may mislead applications that the lease is
837 * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
838 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
839 * doesn't deal with openhandle, so normal openhandle will be leaked. */
840 LDLM_FL_NO_LRU | LDLM_FL_EXCL);
841 ll_finish_md_op_data(op_data);
842 ptlrpc_req_finished(req);
844 GOTO(out_release_it, rc);
846 if (it_disposition(&it, DISP_LOOKUP_NEG))
847 GOTO(out_release_it, rc = -ENOENT);
849 rc = it_open_error(DISP_OPEN_OPEN, &it);
851 GOTO(out_release_it, rc);
853 LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
854 ll_och_fill(sbi->ll_md_exp, &it, och);
856 if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
857 GOTO(out_close, rc = -EOPNOTSUPP);
859 /* already get lease, handle lease lock */
860 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
861 if (it.d.lustre.it_lock_mode == 0 ||
862 it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
863 /* open lock must return for lease */
864 CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
865 PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
866 it.d.lustre.it_lock_bits);
867 GOTO(out_close, rc = -EPROTO);
870 ll_intent_release(&it);
874 /* Cancel open lock */
875 if (it.d.lustre.it_lock_mode != 0) {
876 ldlm_lock_decref_and_cancel(&och->och_lease_handle,
877 it.d.lustre.it_lock_mode);
878 it.d.lustre.it_lock_mode = 0;
879 och->och_lease_handle.cookie = 0ULL;
881 rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
883 CERROR("%s: error closing file "DFID": %d\n",
884 ll_get_fsname(inode->i_sb, NULL, 0),
885 PFID(&ll_i2info(inode)->lli_fid), rc2);
886 och = NULL; /* och has been freed in ll_close_inode_openhandle() */
888 ll_intent_release(&it);
894 EXPORT_SYMBOL(ll_lease_open);
897 * Release lease and close the file.
898 * It will check if the lease has ever broken.
900 int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
903 struct ldlm_lock *lock;
904 bool cancelled = true;
908 lock = ldlm_handle2lock(&och->och_lease_handle);
910 lock_res_and_lock(lock);
911 cancelled = ldlm_is_cancel(lock);
912 unlock_res_and_lock(lock);
916 CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
917 PFID(&ll_i2info(inode)->lli_fid), cancelled);
920 ldlm_cli_cancel(&och->och_lease_handle, 0);
921 if (lease_broken != NULL)
922 *lease_broken = cancelled;
924 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
928 EXPORT_SYMBOL(ll_lease_close);
930 /* Fills the obdo with the attributes for the lsm */
931 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
932 struct obd_capa *capa, struct obdo *obdo,
933 __u64 ioepoch, int dv_flags)
935 struct ptlrpc_request_set *set;
936 struct obd_info oinfo = { { { 0 } } };
941 LASSERT(lsm != NULL);
945 oinfo.oi_oa->o_oi = lsm->lsm_oi;
946 oinfo.oi_oa->o_mode = S_IFREG;
947 oinfo.oi_oa->o_ioepoch = ioepoch;
948 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
949 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
950 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
951 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
952 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
953 OBD_MD_FLDATAVERSION;
954 oinfo.oi_capa = capa;
955 if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) {
956 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
957 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
958 if (dv_flags & LL_DV_WR_FLUSH)
959 oinfo.oi_oa->o_flags |= OBD_FL_FLUSH;
962 set = ptlrpc_prep_set();
964 CERROR("can't allocate ptlrpc set\n");
967 rc = obd_getattr_async(exp, &oinfo, set);
969 rc = ptlrpc_set_wait(set);
970 ptlrpc_set_destroy(set);
973 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
974 OBD_MD_FLATIME | OBD_MD_FLMTIME |
975 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
976 OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS);
977 if (dv_flags & LL_DV_WR_FLUSH &&
978 !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
979 oinfo.oi_oa->o_flags & OBD_FL_FLUSH))
986 * Performs the getattr on the inode and updates its fields.
987 * If @sync != 0, perform the getattr under the server-side lock.
989 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
990 __u64 ioepoch, int sync)
992 struct obd_capa *capa = ll_mdscapa_get(inode);
993 struct lov_stripe_md *lsm;
997 lsm = ccc_inode_lsm_get(inode);
998 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
999 capa, obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0);
1002 struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi;
1004 obdo_refresh_inode(inode, obdo, obdo->o_valid);
1005 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu,"
1006 " blksize %lu\n", POSTID(oi), i_size_read(inode),
1007 (unsigned long long)inode->i_blocks,
1008 (unsigned long)ll_inode_blksize(inode));
1010 ccc_inode_lsm_put(inode, lsm);
1014 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
1016 struct ll_inode_info *lli = ll_i2info(inode);
1017 struct cl_object *obj = lli->lli_clob;
1018 struct cl_attr *attr = ccc_env_thread_attr(env);
1024 ll_inode_size_lock(inode);
1025 /* merge timestamps the most recently obtained from mds with
1026 timestamps obtained from osts */
1027 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
1028 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
1029 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
1030 inode_init_lvb(inode, &lvb);
1032 cl_object_attr_lock(obj);
1033 rc = cl_object_attr_get(env, obj, attr);
1034 cl_object_attr_unlock(obj);
1037 if (lvb.lvb_atime < attr->cat_atime)
1038 lvb.lvb_atime = attr->cat_atime;
1039 if (lvb.lvb_ctime < attr->cat_ctime)
1040 lvb.lvb_ctime = attr->cat_ctime;
1041 if (lvb.lvb_mtime < attr->cat_mtime)
1042 lvb.lvb_mtime = attr->cat_mtime;
1044 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
1045 PFID(&lli->lli_fid), attr->cat_size);
1046 cl_isize_write_nolock(inode, attr->cat_size);
1048 inode->i_blocks = attr->cat_blocks;
1050 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
1051 LTIME_S(inode->i_atime) = lvb.lvb_atime;
1052 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
1054 ll_inode_size_unlock(inode);
1059 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
1062 struct obdo obdo = { 0 };
1065 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
1067 st->st_size = obdo.o_size;
1068 st->st_blocks = obdo.o_blocks;
1069 st->st_mtime = obdo.o_mtime;
1070 st->st_atime = obdo.o_atime;
1071 st->st_ctime = obdo.o_ctime;
1076 static bool file_is_noatime(const struct file *file)
1078 const struct vfsmount *mnt = file->f_path.mnt;
1079 const struct inode *inode = file->f_path.dentry->d_inode;
1081 /* Adapted from file_accessed() and touch_atime().*/
1082 if (file->f_flags & O_NOATIME)
1085 if (inode->i_flags & S_NOATIME)
1088 if (IS_NOATIME(inode))
1091 if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY))
1094 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1097 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1103 void ll_io_init(struct cl_io *io, const struct file *file, int write)
1105 struct inode *inode = file->f_dentry->d_inode;
1107 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
1109 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
1110 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
1111 file->f_flags & O_DIRECT ||
1114 io->ci_obj = ll_i2info(inode)->lli_clob;
1115 io->ci_lockreq = CILR_MAYBE;
1116 if (ll_file_nolock(file)) {
1117 io->ci_lockreq = CILR_NEVER;
1118 io->ci_no_srvlock = 1;
1119 } else if (file->f_flags & O_APPEND) {
1120 io->ci_lockreq = CILR_MANDATORY;
1123 io->ci_noatime = file_is_noatime(file);
1127 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
1128 struct file *file, enum cl_io_type iot,
1129 loff_t *ppos, size_t count)
1131 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
1132 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1137 CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: "LPU64", count: %zd\n",
1138 file->f_dentry->d_name.name, iot, *ppos, count);
1141 io = ccc_env_thread_io(env);
1142 ll_io_init(io, file, iot == CIT_WRITE);
1144 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
1145 struct vvp_io *vio = vvp_env_io(env);
1146 struct ccc_io *cio = ccc_env_io(env);
1147 int write_mutex_locked = 0;
1149 cio->cui_fd = LUSTRE_FPRIVATE(file);
1150 vio->cui_io_subtype = args->via_io_subtype;
1152 switch (vio->cui_io_subtype) {
1154 cio->cui_iov = args->u.normal.via_iov;
1155 cio->cui_nrsegs = args->u.normal.via_nrsegs;
1156 cio->cui_tot_nrsegs = cio->cui_nrsegs;
1157 cio->cui_iocb = args->u.normal.via_iocb;
1158 if ((iot == CIT_WRITE) &&
1159 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1160 if (mutex_lock_interruptible(&lli->
1162 GOTO(out, result = -ERESTARTSYS);
1163 write_mutex_locked = 1;
1165 down_read(&lli->lli_trunc_sem);
1168 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
1169 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
1172 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
1173 vio->u.splice.cui_flags = args->u.splice.via_flags;
1176 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
1179 result = cl_io_loop(env, io);
1180 if (args->via_io_subtype == IO_NORMAL)
1181 up_read(&lli->lli_trunc_sem);
1182 if (write_mutex_locked)
1183 mutex_unlock(&lli->lli_write_mutex);
1185 /* cl_io_rw_init() handled IO */
1186 result = io->ci_result;
1189 if (io->ci_nob > 0) {
1190 result = io->ci_nob;
1191 *ppos = io->u.ci_wr.wr.crw_pos;
1195 cl_io_fini(env, io);
1196 /* If any bit been read/written (result != 0), we just return
1197 * short read/write instead of restart io. */
1198 if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
1199 CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
1200 iot == CIT_READ ? "read" : "write",
1201 file->f_dentry->d_name.name, *ppos, count);
1202 LASSERTF(io->ci_nob == 0, "%zd", io->ci_nob);
1206 if (iot == CIT_READ) {
1208 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1209 LPROC_LL_READ_BYTES, result);
1210 } else if (iot == CIT_WRITE) {
1212 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
1213 LPROC_LL_WRITE_BYTES, result);
1214 fd->fd_write_failed = false;
1215 } else if (result != -ERESTARTSYS) {
1216 fd->fd_write_failed = true;
1219 CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);
1226 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
1228 static int ll_file_get_iov_count(const struct iovec *iov,
1229 unsigned long *nr_segs, size_t *count)
1234 for (seg = 0; seg < *nr_segs; seg++) {
1235 const struct iovec *iv = &iov[seg];
1238 * If any segment has a negative length, or the cumulative
1239 * length ever wraps negative then return -EINVAL.
1242 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
1244 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
1249 cnt -= iv->iov_len; /* This segment is no good */
1256 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1257 unsigned long nr_segs, loff_t pos)
1260 struct vvp_io_args *args;
1266 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1270 env = cl_env_get(&refcheck);
1272 RETURN(PTR_ERR(env));
1274 args = vvp_env_args(env, IO_NORMAL);
1275 args->u.normal.via_iov = (struct iovec *)iov;
1276 args->u.normal.via_nrsegs = nr_segs;
1277 args->u.normal.via_iocb = iocb;
1279 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1280 &iocb->ki_pos, count);
1281 cl_env_put(env, &refcheck);
1285 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1289 struct iovec *local_iov;
1290 struct kiocb *kiocb;
1295 env = cl_env_get(&refcheck);
1297 RETURN(PTR_ERR(env));
1299 local_iov = &vvp_env_info(env)->vti_local_iov;
1300 kiocb = &vvp_env_info(env)->vti_kiocb;
1301 local_iov->iov_base = (void __user *)buf;
1302 local_iov->iov_len = count;
1303 init_sync_kiocb(kiocb, file);
1304 kiocb->ki_pos = *ppos;
1305 #ifdef HAVE_KIOCB_KI_LEFT
1306 kiocb->ki_left = count;
1308 kiocb->ki_nbytes = count;
1311 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1312 *ppos = kiocb->ki_pos;
1314 cl_env_put(env, &refcheck);
1319 * Write to a file (through the page cache).
1322 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1323 unsigned long nr_segs, loff_t pos)
1326 struct vvp_io_args *args;
1332 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1336 env = cl_env_get(&refcheck);
1338 RETURN(PTR_ERR(env));
1340 args = vvp_env_args(env, IO_NORMAL);
1341 args->u.normal.via_iov = (struct iovec *)iov;
1342 args->u.normal.via_nrsegs = nr_segs;
1343 args->u.normal.via_iocb = iocb;
1345 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1346 &iocb->ki_pos, count);
1347 cl_env_put(env, &refcheck);
1351 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1355 struct iovec *local_iov;
1356 struct kiocb *kiocb;
1361 env = cl_env_get(&refcheck);
1363 RETURN(PTR_ERR(env));
1365 local_iov = &vvp_env_info(env)->vti_local_iov;
1366 kiocb = &vvp_env_info(env)->vti_kiocb;
1367 local_iov->iov_base = (void __user *)buf;
1368 local_iov->iov_len = count;
1369 init_sync_kiocb(kiocb, file);
1370 kiocb->ki_pos = *ppos;
1371 #ifdef HAVE_KIOCB_KI_LEFT
1372 kiocb->ki_left = count;
1374 kiocb->ki_nbytes = count;
1377 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1378 *ppos = kiocb->ki_pos;
1380 cl_env_put(env, &refcheck);
1385 * Send file content (through pagecache) somewhere with helper
1387 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1388 struct pipe_inode_info *pipe, size_t count,
1392 struct vvp_io_args *args;
1397 env = cl_env_get(&refcheck);
1399 RETURN(PTR_ERR(env));
1401 args = vvp_env_args(env, IO_SPLICE);
1402 args->u.splice.via_pipe = pipe;
1403 args->u.splice.via_flags = flags;
1405 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1406 cl_env_put(env, &refcheck);
1410 static int ll_lov_recreate(struct inode *inode, struct ost_id *oi,
1413 struct obd_export *exp = ll_i2dtexp(inode);
1414 struct obd_trans_info oti = { 0 };
1415 struct obdo *oa = NULL;
1418 struct lov_stripe_md *lsm = NULL, *lsm2;
1425 lsm = ccc_inode_lsm_get(inode);
1426 if (!lsm_has_objects(lsm))
1427 GOTO(out, rc = -ENOENT);
1429 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1430 (lsm->lsm_stripe_count));
1432 OBD_ALLOC_LARGE(lsm2, lsm_size);
1434 GOTO(out, rc = -ENOMEM);
1437 oa->o_nlink = ost_idx;
1438 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1439 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1440 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1441 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1442 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1443 memcpy(lsm2, lsm, lsm_size);
1444 ll_inode_size_lock(inode);
1445 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1446 ll_inode_size_unlock(inode);
1448 OBD_FREE_LARGE(lsm2, lsm_size);
1451 ccc_inode_lsm_put(inode, lsm);
1456 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1458 struct ll_recreate_obj ucreat;
1462 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1465 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1469 ostid_set_seq_mdt0(&oi);
1470 ostid_set_id(&oi, ucreat.lrc_id);
1471 RETURN(ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx));
1474 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1481 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1484 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1487 fid_to_ostid(&fid, &oi);
1488 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1489 RETURN(ll_lov_recreate(inode, &oi, ost_idx));
1492 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1493 __u64 flags, struct lov_user_md *lum,
1496 struct lov_stripe_md *lsm = NULL;
1497 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1501 lsm = ccc_inode_lsm_get(inode);
1503 ccc_inode_lsm_put(inode, lsm);
1504 CDEBUG(D_IOCTL, "stripe already exists for inode "DFID"\n",
1505 PFID(ll_inode2fid(inode)));
1509 ll_inode_size_lock(inode);
1510 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1513 rc = oit.d.lustre.it_status;
1515 GOTO(out_req_free, rc);
1517 ll_release_openhandle(file->f_dentry, &oit);
1520 ll_inode_size_unlock(inode);
1521 ll_intent_release(&oit);
1522 ccc_inode_lsm_put(inode, lsm);
1525 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1529 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1530 struct lov_mds_md **lmmp, int *lmm_size,
1531 struct ptlrpc_request **request)
1533 struct ll_sb_info *sbi = ll_i2sbi(inode);
1534 struct mdt_body *body;
1535 struct lov_mds_md *lmm = NULL;
1536 struct ptlrpc_request *req = NULL;
1537 struct md_op_data *op_data;
1540 rc = ll_get_max_mdsize(sbi, &lmmsize);
1544 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1545 strlen(filename), lmmsize,
1546 LUSTRE_OPC_ANY, NULL);
1547 if (IS_ERR(op_data))
1548 RETURN(PTR_ERR(op_data));
1550 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1551 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1552 ll_finish_md_op_data(op_data);
1554 CDEBUG(D_INFO, "md_getattr_name failed "
1555 "on %s: rc %d\n", filename, rc);
1559 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1560 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1562 lmmsize = body->eadatasize;
1564 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1566 GOTO(out, rc = -ENODATA);
1569 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1570 LASSERT(lmm != NULL);
1572 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1573 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1574 GOTO(out, rc = -EPROTO);
1578 * This is coming from the MDS, so is probably in
1579 * little endian. We convert it to host endian before
1580 * passing it to userspace.
1582 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1585 stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
1586 if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED)
1589 /* if function called for directory - we should
1590 * avoid swab not existent lsm objects */
1591 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1592 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1593 if (S_ISREG(body->mode))
1594 lustre_swab_lov_user_md_objects(
1595 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1597 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1598 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1599 if (S_ISREG(body->mode))
1600 lustre_swab_lov_user_md_objects(
1601 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1608 *lmm_size = lmmsize;
1613 static int ll_lov_setea(struct inode *inode, struct file *file,
1616 __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1617 struct lov_user_md *lump;
1618 int lum_size = sizeof(struct lov_user_md) +
1619 sizeof(struct lov_user_ost_data);
1623 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1626 OBD_ALLOC_LARGE(lump, lum_size);
1630 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1631 OBD_FREE_LARGE(lump, lum_size);
1635 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1637 OBD_FREE_LARGE(lump, lum_size);
1641 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1644 struct lov_user_md_v3 lumv3;
1645 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1646 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1647 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1649 __u64 flags = FMODE_WRITE;
1652 /* first try with v1 which is smaller than v3 */
1653 lum_size = sizeof(struct lov_user_md_v1);
1654 if (copy_from_user(lumv1, lumv1p, lum_size))
1657 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1658 lum_size = sizeof(struct lov_user_md_v3);
1659 if (copy_from_user(&lumv3, lumv3p, lum_size))
1663 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1665 struct lov_stripe_md *lsm;
1668 put_user(0, &lumv1p->lmm_stripe_count);
1670 ll_layout_refresh(inode, &gen);
1671 lsm = ccc_inode_lsm_get(inode);
1672 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1673 0, lsm, (void *)arg);
1674 ccc_inode_lsm_put(inode, lsm);
1679 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1681 struct lov_stripe_md *lsm;
1685 lsm = ccc_inode_lsm_get(inode);
1687 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1689 ccc_inode_lsm_put(inode, lsm);
1693 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1695 struct ll_inode_info *lli = ll_i2info(inode);
1696 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1697 struct ccc_grouplock grouplock;
1701 if (ll_file_nolock(file))
1702 RETURN(-EOPNOTSUPP);
1704 spin_lock(&lli->lli_lock);
1705 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1706 CWARN("group lock already existed with gid %lu\n",
1707 fd->fd_grouplock.cg_gid);
1708 spin_unlock(&lli->lli_lock);
1711 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1712 spin_unlock(&lli->lli_lock);
1714 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1715 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1719 spin_lock(&lli->lli_lock);
1720 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1721 spin_unlock(&lli->lli_lock);
1722 CERROR("another thread just won the race\n");
1723 cl_put_grouplock(&grouplock);
1727 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1728 fd->fd_grouplock = grouplock;
1729 spin_unlock(&lli->lli_lock);
1731 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1735 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1737 struct ll_inode_info *lli = ll_i2info(inode);
1738 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1739 struct ccc_grouplock grouplock;
1742 spin_lock(&lli->lli_lock);
1743 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1744 spin_unlock(&lli->lli_lock);
1745 CWARN("no group lock held\n");
1748 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1750 if (fd->fd_grouplock.cg_gid != arg) {
1751 CWARN("group lock %lu doesn't match current id %lu\n",
1752 arg, fd->fd_grouplock.cg_gid);
1753 spin_unlock(&lli->lli_lock);
1757 grouplock = fd->fd_grouplock;
1758 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1759 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1760 spin_unlock(&lli->lli_lock);
1762 cl_put_grouplock(&grouplock);
1763 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1768 * Close inode open handle
1770 * \param dentry [in] dentry which contains the inode
1771 * \param it [in,out] intent which contains open info and result
1774 * \retval <0 failure
1776 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1778 struct inode *inode = dentry->d_inode;
1779 struct obd_client_handle *och;
1785 /* Root ? Do nothing. */
1786 if (dentry->d_inode->i_sb->s_root == dentry)
1789 /* No open handle to close? Move away */
1790 if (!it_disposition(it, DISP_OPEN_OPEN))
1793 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1795 OBD_ALLOC(och, sizeof(*och));
1797 GOTO(out, rc = -ENOMEM);
1799 ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
1801 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1804 /* this one is in place of ll_file_open */
1805 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1806 ptlrpc_req_finished(it->d.lustre.it_data);
1807 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1813 * Get size for inode for which FIEMAP mapping is requested.
1814 * Make the FIEMAP get_info call and returns the result.
1816 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1819 struct obd_export *exp = ll_i2dtexp(inode);
1820 struct lov_stripe_md *lsm = NULL;
1821 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1822 int vallen = num_bytes;
1826 /* Checks for fiemap flags */
1827 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1828 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1832 /* Check for FIEMAP_FLAG_SYNC */
1833 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1834 rc = filemap_fdatawrite(inode->i_mapping);
1839 lsm = ccc_inode_lsm_get(inode);
1843 /* If the stripe_count > 1 and the application does not understand
1844 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1846 if (lsm->lsm_stripe_count > 1 &&
1847 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1848 GOTO(out, rc = -EOPNOTSUPP);
1850 fm_key.oa.o_oi = lsm->lsm_oi;
1851 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1853 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1854 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1855 /* If filesize is 0, then there would be no objects for mapping */
1856 if (fm_key.oa.o_size == 0) {
1857 fiemap->fm_mapped_extents = 0;
1861 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1863 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1866 CERROR("obd_get_info failed: rc = %d\n", rc);
1869 ccc_inode_lsm_put(inode, lsm);
1873 int ll_fid2path(struct inode *inode, void *arg)
1875 struct obd_export *exp = ll_i2mdexp(inode);
1876 struct getinfo_fid2path *gfout, *gfin;
1880 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1881 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1884 /* Need to get the buflen */
1885 OBD_ALLOC_PTR(gfin);
1888 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1893 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1894 OBD_ALLOC(gfout, outsize);
1895 if (gfout == NULL) {
1899 memcpy(gfout, gfin, sizeof(*gfout));
1902 /* Call mdc_iocontrol */
1903 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1907 if (copy_to_user(arg, gfout, outsize))
1911 OBD_FREE(gfout, outsize);
1915 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1917 struct ll_user_fiemap *fiemap_s;
1918 size_t num_bytes, ret_bytes;
1919 unsigned int extent_count;
1922 /* Get the extent count so we can calculate the size of
1923 * required fiemap buffer */
1924 if (get_user(extent_count,
1925 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1927 num_bytes = sizeof(*fiemap_s) + (extent_count *
1928 sizeof(struct ll_fiemap_extent));
1930 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1931 if (fiemap_s == NULL)
1934 /* get the fiemap value */
1935 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1937 GOTO(error, rc = -EFAULT);
1939 /* If fm_extent_count is non-zero, read the first extent since
1940 * it is used to calculate end_offset and device from previous
1943 if (copy_from_user(&fiemap_s->fm_extents[0],
1944 (char __user *)arg + sizeof(*fiemap_s),
1945 sizeof(struct ll_fiemap_extent)))
1946 GOTO(error, rc = -EFAULT);
1949 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1953 ret_bytes = sizeof(struct ll_user_fiemap);
1955 if (extent_count != 0)
1956 ret_bytes += (fiemap_s->fm_mapped_extents *
1957 sizeof(struct ll_fiemap_extent));
1959 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1963 OBD_FREE_LARGE(fiemap_s, num_bytes);
1968 * Read the data_version for inode.
1970 * This value is computed using stripe object version on OST.
1971 * Version is computed using server side locking.
1973 * @param sync if do sync on the OST side;
1975 * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs
1976 * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs
1978 int ll_data_version(struct inode *inode, __u64 *data_version, int flags)
1980 struct lov_stripe_md *lsm = NULL;
1981 struct ll_sb_info *sbi = ll_i2sbi(inode);
1982 struct obdo *obdo = NULL;
1986 /* If no stripe, we consider version is 0. */
1987 lsm = ccc_inode_lsm_get(inode);
1988 if (!lsm_has_objects(lsm)) {
1990 CDEBUG(D_INODE, "No object for inode\n");
1994 OBD_ALLOC_PTR(obdo);
1996 GOTO(out, rc = -ENOMEM);
1998 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, flags);
2000 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
2003 *data_version = obdo->o_data_version;
2009 ccc_inode_lsm_put(inode, lsm);
2014 * Trigger a HSM release request for the provided inode.
2016 int ll_hsm_release(struct inode *inode)
2018 struct cl_env_nest nest;
2020 struct obd_client_handle *och = NULL;
2021 __u64 data_version = 0;
2025 CDEBUG(D_INODE, "%s: Releasing file "DFID".\n",
2026 ll_get_fsname(inode->i_sb, NULL, 0),
2027 PFID(&ll_i2info(inode)->lli_fid));
2029 och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE);
2031 GOTO(out, rc = PTR_ERR(och));
2033 /* Grab latest data_version and [am]time values */
2034 rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH);
2038 env = cl_env_nested_get(&nest);
2040 GOTO(out, rc = PTR_ERR(env));
2042 ll_merge_lvb(env, inode);
2043 cl_env_nested_put(&nest, env);
2045 /* Release the file.
2046 * NB: lease lock handle is released in mdc_hsm_release_pack() because
2047 * we still need it to pack l_remote_handle to MDT. */
2048 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
2054 if (och != NULL && !IS_ERR(och)) /* close the file */
2055 ll_lease_close(och, inode, NULL);
2060 struct ll_swap_stack {
2061 struct iattr ia1, ia2;
2063 struct inode *inode1, *inode2;
2064 bool check_dv1, check_dv2;
2067 static int ll_swap_layouts(struct file *file1, struct file *file2,
2068 struct lustre_swap_layouts *lsl)
2070 struct mdc_swap_layouts msl;
2071 struct md_op_data *op_data;
2074 struct ll_swap_stack *llss = NULL;
2077 OBD_ALLOC_PTR(llss);
2081 llss->inode1 = file1->f_dentry->d_inode;
2082 llss->inode2 = file2->f_dentry->d_inode;
2084 if (!S_ISREG(llss->inode2->i_mode))
2085 GOTO(free, rc = -EINVAL);
2087 if (inode_permission(llss->inode1, MAY_WRITE) ||
2088 inode_permission(llss->inode2, MAY_WRITE))
2089 GOTO(free, rc = -EPERM);
2091 if (llss->inode2->i_sb != llss->inode1->i_sb)
2092 GOTO(free, rc = -EXDEV);
2094 /* we use 2 bool because it is easier to swap than 2 bits */
2095 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
2096 llss->check_dv1 = true;
2098 if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2)
2099 llss->check_dv2 = true;
2101 /* we cannot use lsl->sl_dvX directly because we may swap them */
2102 llss->dv1 = lsl->sl_dv1;
2103 llss->dv2 = lsl->sl_dv2;
2105 rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
2106 if (rc == 0) /* same file, done! */
2109 if (rc < 0) { /* sequentialize it */
2110 swap(llss->inode1, llss->inode2);
2112 swap(llss->dv1, llss->dv2);
2113 swap(llss->check_dv1, llss->check_dv2);
2117 if (gid != 0) { /* application asks to flush dirty cache */
2118 rc = ll_get_grouplock(llss->inode1, file1, gid);
2122 rc = ll_get_grouplock(llss->inode2, file2, gid);
2124 ll_put_grouplock(llss->inode1, file1, gid);
2129 /* to be able to restore mtime and atime after swap
2130 * we need to first save them */
2132 (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
2133 llss->ia1.ia_mtime = llss->inode1->i_mtime;
2134 llss->ia1.ia_atime = llss->inode1->i_atime;
2135 llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
2136 llss->ia2.ia_mtime = llss->inode2->i_mtime;
2137 llss->ia2.ia_atime = llss->inode2->i_atime;
2138 llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
2141 /* ultimate check, before swaping the layouts we check if
2142 * dataversion has changed (if requested) */
2143 if (llss->check_dv1) {
2144 rc = ll_data_version(llss->inode1, &dv, 0);
2147 if (dv != llss->dv1)
2148 GOTO(putgl, rc = -EAGAIN);
2151 if (llss->check_dv2) {
2152 rc = ll_data_version(llss->inode2, &dv, 0);
2155 if (dv != llss->dv2)
2156 GOTO(putgl, rc = -EAGAIN);
2159 /* struct md_op_data is used to send the swap args to the mdt
2160 * only flags is missing, so we use struct mdc_swap_layouts
2161 * through the md_op_data->op_data */
2162 /* flags from user space have to be converted before they are send to
2163 * server, no flag is sent today, they are only used on the client */
2166 op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0,
2167 0, LUSTRE_OPC_ANY, &msl);
2168 if (IS_ERR(op_data))
2169 GOTO(free, rc = PTR_ERR(op_data));
2171 rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1),
2172 sizeof(*op_data), op_data, NULL);
2173 ll_finish_md_op_data(op_data);
2177 ll_put_grouplock(llss->inode2, file2, gid);
2178 ll_put_grouplock(llss->inode1, file1, gid);
2181 /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
2185 /* clear useless flags */
2186 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
2187 llss->ia1.ia_valid &= ~ATTR_MTIME;
2188 llss->ia2.ia_valid &= ~ATTR_MTIME;
2191 if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
2192 llss->ia1.ia_valid &= ~ATTR_ATIME;
2193 llss->ia2.ia_valid &= ~ATTR_ATIME;
2196 /* update time if requested */
2198 if (llss->ia2.ia_valid != 0) {
2199 mutex_lock(&llss->inode1->i_mutex);
2200 rc = ll_setattr(file1->f_dentry, &llss->ia2);
2201 mutex_unlock(&llss->inode1->i_mutex);
2204 if (llss->ia1.ia_valid != 0) {
2207 mutex_lock(&llss->inode2->i_mutex);
2208 rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
2209 mutex_unlock(&llss->inode2->i_mutex);
2221 static int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss)
2223 struct md_op_data *op_data;
2226 /* Non-root users are forbidden to set or clear flags which are
2227 * NOT defined in HSM_USER_MASK. */
2228 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) &&
2229 !cfs_capable(CFS_CAP_SYS_ADMIN))
2232 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2233 LUSTRE_OPC_ANY, hss);
2234 if (IS_ERR(op_data))
2235 RETURN(PTR_ERR(op_data));
2237 rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode),
2238 sizeof(*op_data), op_data, NULL);
2240 ll_finish_md_op_data(op_data);
2245 static int ll_hsm_import(struct inode *inode, struct file *file,
2246 struct hsm_user_import *hui)
2248 struct hsm_state_set *hss = NULL;
2249 struct iattr *attr = NULL;
2253 if (!S_ISREG(inode->i_mode))
2259 GOTO(out, rc = -ENOMEM);
2261 hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID;
2262 hss->hss_archive_id = hui->hui_archive_id;
2263 hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED;
2264 rc = ll_hsm_state_set(inode, hss);
2268 OBD_ALLOC_PTR(attr);
2270 GOTO(out, rc = -ENOMEM);
2272 attr->ia_mode = hui->hui_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
2273 attr->ia_mode |= S_IFREG;
2274 attr->ia_uid = hui->hui_uid;
2275 attr->ia_gid = hui->hui_gid;
2276 attr->ia_size = hui->hui_size;
2277 attr->ia_mtime.tv_sec = hui->hui_mtime;
2278 attr->ia_mtime.tv_nsec = hui->hui_mtime_ns;
2279 attr->ia_atime.tv_sec = hui->hui_atime;
2280 attr->ia_atime.tv_nsec = hui->hui_atime_ns;
2282 attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE |
2283 ATTR_UID | ATTR_GID |
2284 ATTR_MTIME | ATTR_MTIME_SET |
2285 ATTR_ATIME | ATTR_ATIME_SET;
2287 rc = ll_setattr_raw(file->f_dentry, attr, true);
2301 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2303 struct inode *inode = file->f_dentry->d_inode;
2304 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2308 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), cmd=%x\n",
2309 PFID(ll_inode2fid(inode)), inode, cmd);
2310 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
2312 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
2313 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
2317 case LL_IOC_GETFLAGS:
2318 /* Get the current value of the file flags */
2319 return put_user(fd->fd_flags, (int *)arg);
2320 case LL_IOC_SETFLAGS:
2321 case LL_IOC_CLRFLAGS:
2322 /* Set or clear specific file flags */
2323 /* XXX This probably needs checks to ensure the flags are
2324 * not abused, and to handle any flag side effects.
2326 if (get_user(flags, (int *) arg))
2329 if (cmd == LL_IOC_SETFLAGS) {
2330 if ((flags & LL_FILE_IGNORE_LOCK) &&
2331 !(file->f_flags & O_DIRECT)) {
2332 CERROR("%s: unable to disable locking on "
2333 "non-O_DIRECT file\n", current->comm);
2337 fd->fd_flags |= flags;
2339 fd->fd_flags &= ~flags;
2342 case LL_IOC_LOV_SETSTRIPE:
2343 RETURN(ll_lov_setstripe(inode, file, arg));
2344 case LL_IOC_LOV_SETEA:
2345 RETURN(ll_lov_setea(inode, file, arg));
2346 case LL_IOC_LOV_SWAP_LAYOUTS: {
2348 struct lustre_swap_layouts lsl;
2350 if (copy_from_user(&lsl, (char *)arg,
2351 sizeof(struct lustre_swap_layouts)))
2354 if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
2357 file2 = fget(lsl.sl_fd);
2362 if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
2363 rc = ll_swap_layouts(file, file2, &lsl);
2367 case LL_IOC_LOV_GETSTRIPE:
2368 RETURN(ll_lov_getstripe(inode, arg));
2369 case LL_IOC_RECREATE_OBJ:
2370 RETURN(ll_lov_recreate_obj(inode, arg));
2371 case LL_IOC_RECREATE_FID:
2372 RETURN(ll_lov_recreate_fid(inode, arg));
2373 case FSFILT_IOC_FIEMAP:
2374 RETURN(ll_ioctl_fiemap(inode, arg));
2375 case FSFILT_IOC_GETFLAGS:
2376 case FSFILT_IOC_SETFLAGS:
2377 RETURN(ll_iocontrol(inode, file, cmd, arg));
2378 case FSFILT_IOC_GETVERSION_OLD:
2379 case FSFILT_IOC_GETVERSION:
2380 RETURN(put_user(inode->i_generation, (int *)arg));
2381 case LL_IOC_GROUP_LOCK:
2382 RETURN(ll_get_grouplock(inode, file, arg));
2383 case LL_IOC_GROUP_UNLOCK:
2384 RETURN(ll_put_grouplock(inode, file, arg));
2385 case IOC_OBD_STATFS:
2386 RETURN(ll_obd_statfs(inode, (void *)arg));
2388 /* We need to special case any other ioctls we want to handle,
2389 * to send them to the MDS/OST as appropriate and to properly
2390 * network encode the arg field.
2391 case FSFILT_IOC_SETVERSION_OLD:
2392 case FSFILT_IOC_SETVERSION:
2394 case LL_IOC_FLUSHCTX:
2395 RETURN(ll_flush_ctx(inode));
2396 case LL_IOC_PATH2FID: {
2397 if (copy_to_user((void *)arg, ll_inode2fid(inode),
2398 sizeof(struct lu_fid)))
2403 case OBD_IOC_FID2PATH:
2404 RETURN(ll_fid2path(inode, (void *)arg));
2405 case LL_IOC_DATA_VERSION: {
2406 struct ioc_data_version idv;
2409 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
2412 idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH;
2413 rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags);
2415 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
2421 case LL_IOC_GET_MDTIDX: {
2424 mdtidx = ll_get_mdt_idx(inode);
2428 if (put_user((int)mdtidx, (int*)arg))
2433 case OBD_IOC_GETDTNAME:
2434 case OBD_IOC_GETMDNAME:
2435 RETURN(ll_get_obd_name(inode, cmd, arg));
2436 case LL_IOC_HSM_STATE_GET: {
2437 struct md_op_data *op_data;
2438 struct hsm_user_state *hus;
2445 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2446 LUSTRE_OPC_ANY, hus);
2447 if (IS_ERR(op_data)) {
2449 RETURN(PTR_ERR(op_data));
2452 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2455 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
2458 ll_finish_md_op_data(op_data);
2462 case LL_IOC_HSM_STATE_SET: {
2463 struct hsm_state_set *hss;
2470 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2475 rc = ll_hsm_state_set(inode, hss);
2480 case LL_IOC_HSM_ACTION: {
2481 struct md_op_data *op_data;
2482 struct hsm_current_action *hca;
2489 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2490 LUSTRE_OPC_ANY, hca);
2491 if (IS_ERR(op_data)) {
2493 RETURN(PTR_ERR(op_data));
2496 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2499 if (copy_to_user((char *)arg, hca, sizeof(*hca)))
2502 ll_finish_md_op_data(op_data);
2506 case LL_IOC_SET_LEASE: {
2507 struct ll_inode_info *lli = ll_i2info(inode);
2508 struct obd_client_handle *och = NULL;
2514 if (!(file->f_mode & FMODE_WRITE))
2519 if (!(file->f_mode & FMODE_READ))
2524 mutex_lock(&lli->lli_och_mutex);
2525 if (fd->fd_lease_och != NULL) {
2526 och = fd->fd_lease_och;
2527 fd->fd_lease_och = NULL;
2529 mutex_unlock(&lli->lli_och_mutex);
2532 mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
2533 rc = ll_lease_close(och, inode, &lease_broken);
2534 if (rc == 0 && lease_broken)
2540 /* return the type of lease or error */
2541 RETURN(rc < 0 ? rc : (int)mode);
2546 CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
2548 /* apply for lease */
2549 och = ll_lease_open(inode, file, mode, 0);
2551 RETURN(PTR_ERR(och));
2554 mutex_lock(&lli->lli_och_mutex);
2555 if (fd->fd_lease_och == NULL) {
2556 fd->fd_lease_och = och;
2559 mutex_unlock(&lli->lli_och_mutex);
2561 /* impossible now that only excl is supported for now */
2562 ll_lease_close(och, inode, &lease_broken);
2567 case LL_IOC_GET_LEASE: {
2568 struct ll_inode_info *lli = ll_i2info(inode);
2569 struct ldlm_lock *lock = NULL;
2572 mutex_lock(&lli->lli_och_mutex);
2573 if (fd->fd_lease_och != NULL) {
2574 struct obd_client_handle *och = fd->fd_lease_och;
2576 lock = ldlm_handle2lock(&och->och_lease_handle);
2578 lock_res_and_lock(lock);
2579 if (!ldlm_is_cancel(lock))
2580 rc = och->och_flags &
2581 (FMODE_READ | FMODE_WRITE);
2582 unlock_res_and_lock(lock);
2583 LDLM_LOCK_PUT(lock);
2586 mutex_unlock(&lli->lli_och_mutex);
2589 case LL_IOC_HSM_IMPORT: {
2590 struct hsm_user_import *hui;
2596 if (copy_from_user(hui, (void *)arg, sizeof(*hui))) {
2601 rc = ll_hsm_import(inode, file, hui);
2610 ll_iocontrol_call(inode, file, cmd, arg, &err))
2613 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2619 #ifndef HAVE_FILE_LLSEEK_SIZE
2620 static inline loff_t
2621 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2623 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2625 if (offset > maxsize)
2628 if (offset != file->f_pos) {
2629 file->f_pos = offset;
2630 file->f_version = 0;
2636 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2637 loff_t maxsize, loff_t eof)
2639 struct inode *inode = file->f_dentry->d_inode;
2647 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2648 * position-querying operation. Avoid rewriting the "same"
2649 * f_pos value back to the file because a concurrent read(),
2650 * write() or lseek() might have altered it
2655 * f_lock protects against read/modify/write race with other
2656 * SEEK_CURs. Note that parallel writes and reads behave
2659 mutex_lock(&inode->i_mutex);
2660 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2661 mutex_unlock(&inode->i_mutex);
2665 * In the generic case the entire file is data, so as long as
2666 * offset isn't at the end of the file then the offset is data.
2673 * There is a virtual hole at the end of the file, so as long as
2674 * offset isn't i_size or larger, return i_size.
2682 return llseek_execute(file, offset, maxsize);
2686 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2688 struct inode *inode = file->f_dentry->d_inode;
2689 loff_t retval, eof = 0;
2692 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2693 (origin == SEEK_CUR) ? file->f_pos : 0);
2694 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), to=%llu=%#llx(%d)\n",
2695 PFID(ll_inode2fid(inode)), inode, retval, retval,
2697 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2699 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2700 retval = ll_glimpse_size(inode);
2703 eof = i_size_read(inode);
2706 retval = ll_generic_file_llseek_size(file, offset, origin,
2707 ll_file_maxbytes(inode), eof);
2711 int ll_flush(struct file *file, fl_owner_t id)
2713 struct inode *inode = file->f_dentry->d_inode;
2714 struct ll_inode_info *lli = ll_i2info(inode);
2715 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2718 LASSERT(!S_ISDIR(inode->i_mode));
2720 /* catch async errors that were recorded back when async writeback
2721 * failed for pages in this mapping. */
2722 rc = lli->lli_async_rc;
2723 lli->lli_async_rc = 0;
2724 err = lov_read_and_clear_async_rc(lli->lli_clob);
2728 /* The application has been told write failure already.
2729 * Do not report failure again. */
2730 if (fd->fd_write_failed)
2732 return rc ? -EIO : 0;
2736 * Called to make sure a portion of file has been written out.
2737 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2739 * Return how many pages have been written.
2741 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2742 enum cl_fsync_mode mode, int ignore_layout)
2744 struct cl_env_nest nest;
2747 struct obd_capa *capa = NULL;
2748 struct cl_fsync_io *fio;
2752 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2753 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2756 env = cl_env_nested_get(&nest);
2758 RETURN(PTR_ERR(env));
2760 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2762 io = ccc_env_thread_io(env);
2763 io->ci_obj = cl_i2info(inode)->lli_clob;
2764 io->ci_ignore_layout = ignore_layout;
2766 /* initialize parameters for sync */
2767 fio = &io->u.ci_fsync;
2768 fio->fi_capa = capa;
2769 fio->fi_start = start;
2771 fio->fi_fid = ll_inode2fid(inode);
2772 fio->fi_mode = mode;
2773 fio->fi_nr_written = 0;
2775 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2776 result = cl_io_loop(env, io);
2778 result = io->ci_result;
2780 result = fio->fi_nr_written;
2781 cl_io_fini(env, io);
2782 cl_env_nested_put(&nest, env);
2790 * When dentry is provided (the 'else' case), *file->f_dentry may be
2791 * null and dentry must be used directly rather than pulled from
2792 * *file->f_dentry as is done otherwise.
2795 #ifdef HAVE_FILE_FSYNC_4ARGS
2796 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2798 struct dentry *dentry = file->f_dentry;
2799 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2800 int ll_fsync(struct file *file, int datasync)
2802 struct dentry *dentry = file->f_dentry;
2804 int ll_fsync(struct file *file, struct dentry *dentry, int datasync)
2807 struct inode *inode = dentry->d_inode;
2808 struct ll_inode_info *lli = ll_i2info(inode);
2809 struct ptlrpc_request *req;
2810 struct obd_capa *oc;
2814 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2815 PFID(ll_inode2fid(inode)), inode);
2816 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2818 #ifdef HAVE_FILE_FSYNC_4ARGS
2819 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2820 mutex_lock(&inode->i_mutex);
2822 /* fsync's caller has already called _fdata{sync,write}, we want
2823 * that IO to finish before calling the osc and mdc sync methods */
2824 rc = filemap_fdatawait(inode->i_mapping);
2827 /* catch async errors that were recorded back when async writeback
2828 * failed for pages in this mapping. */
2829 if (!S_ISDIR(inode->i_mode)) {
2830 err = lli->lli_async_rc;
2831 lli->lli_async_rc = 0;
2834 err = lov_read_and_clear_async_rc(lli->lli_clob);
2839 oc = ll_mdscapa_get(inode);
2840 err = md_fsync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2846 ptlrpc_req_finished(req);
2848 if (datasync && S_ISREG(inode->i_mode)) {
2849 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2851 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2853 if (rc == 0 && err < 0)
2856 fd->fd_write_failed = true;
2858 fd->fd_write_failed = false;
2861 #ifdef HAVE_FILE_FSYNC_4ARGS
2862 mutex_unlock(&inode->i_mutex);
2867 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2869 struct inode *inode = file->f_dentry->d_inode;
2870 struct ll_sb_info *sbi = ll_i2sbi(inode);
2871 struct ldlm_enqueue_info einfo = {
2872 .ei_type = LDLM_FLOCK,
2873 .ei_cb_cp = ldlm_flock_completion_ast,
2874 .ei_cbdata = file_lock,
2876 struct md_op_data *op_data;
2877 struct lustre_handle lockh = {0};
2878 ldlm_policy_data_t flock = {{0}};
2884 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID" file_lock=%p\n",
2885 PFID(ll_inode2fid(inode)), file_lock);
2887 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2889 if (file_lock->fl_flags & FL_FLOCK) {
2890 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2891 /* flocks are whole-file locks */
2892 flock.l_flock.end = OFFSET_MAX;
2893 /* For flocks owner is determined by the local file desctiptor*/
2894 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2895 } else if (file_lock->fl_flags & FL_POSIX) {
2896 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2897 flock.l_flock.start = file_lock->fl_start;
2898 flock.l_flock.end = file_lock->fl_end;
2902 flock.l_flock.pid = file_lock->fl_pid;
2904 /* Somewhat ugly workaround for svc lockd.
2905 * lockd installs custom fl_lmops->lm_compare_owner that checks
2906 * for the fl_owner to be the same (which it always is on local node
2907 * I guess between lockd processes) and then compares pid.
2908 * As such we assign pid to the owner field to make it all work,
2909 * conflict with normal locks is unlikely since pid space and
2910 * pointer space for current->files are not intersecting */
2911 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2912 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2914 switch (file_lock->fl_type) {
2916 einfo.ei_mode = LCK_PR;
2919 /* An unlock request may or may not have any relation to
2920 * existing locks so we may not be able to pass a lock handle
2921 * via a normal ldlm_lock_cancel() request. The request may even
2922 * unlock a byte range in the middle of an existing lock. In
2923 * order to process an unlock request we need all of the same
2924 * information that is given with a normal read or write record
2925 * lock request. To avoid creating another ldlm unlock (cancel)
2926 * message we'll treat a LCK_NL flock request as an unlock. */
2927 einfo.ei_mode = LCK_NL;
2930 einfo.ei_mode = LCK_PW;
2933 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2934 file_lock->fl_type);
2949 flags = LDLM_FL_BLOCK_NOWAIT;
2955 flags = LDLM_FL_TEST_LOCK;
2956 /* Save the old mode so that if the mode in the lock changes we
2957 * can decrement the appropriate reader or writer refcount. */
2958 file_lock->fl_type = einfo.ei_mode;
2961 CERROR("unknown fcntl lock command: %d\n", cmd);
2965 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2966 LUSTRE_OPC_ANY, NULL);
2967 if (IS_ERR(op_data))
2968 RETURN(PTR_ERR(op_data));
2970 CDEBUG(D_DLMTRACE, "inode="DFID", pid=%u, flags="LPX64", mode=%u, "
2971 "start="LPU64", end="LPU64"\n", PFID(ll_inode2fid(inode)),
2972 flock.l_flock.pid, flags, einfo.ei_mode,
2973 flock.l_flock.start, flock.l_flock.end);
2975 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2976 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2978 if ((file_lock->fl_flags & FL_FLOCK) &&
2979 (rc == 0 || file_lock->fl_type == F_UNLCK))
2980 rc2 = flock_lock_file_wait(file, file_lock);
2981 if ((file_lock->fl_flags & FL_POSIX) &&
2982 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2983 !(flags & LDLM_FL_TEST_LOCK))
2984 rc2 = posix_lock_file_wait(file, file_lock);
2986 if (rc2 && file_lock->fl_type != F_UNLCK) {
2987 einfo.ei_mode = LCK_NL;
2988 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2989 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2993 ll_finish_md_op_data(op_data);
2998 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
3006 * test if some locks matching bits and l_req_mode are acquired
3007 * - bits can be in different locks
3008 * - if found clear the common lock bits in *bits
3009 * - the bits not found, are kept in *bits
3011 * \param bits [IN] searched lock bits [IN]
3012 * \param l_req_mode [IN] searched lock mode
3013 * \retval boolean, true iff all bits are found
3015 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
3017 struct lustre_handle lockh;
3018 ldlm_policy_data_t policy;
3019 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
3020 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
3029 fid = &ll_i2info(inode)->lli_fid;
3030 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
3031 ldlm_lockname[mode]);
3033 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
3034 for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
3035 policy.l_inodebits.bits = *bits & (1 << i);
3036 if (policy.l_inodebits.bits == 0)
3039 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
3040 &policy, mode, &lockh)) {
3041 struct ldlm_lock *lock;
3043 lock = ldlm_handle2lock(&lockh);
3046 ~(lock->l_policy_data.l_inodebits.bits);
3047 LDLM_LOCK_PUT(lock);
3049 *bits &= ~policy.l_inodebits.bits;
3056 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
3057 struct lustre_handle *lockh, __u64 flags,
3060 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
3065 fid = &ll_i2info(inode)->lli_fid;
3066 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
3068 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
3069 fid, LDLM_IBITS, &policy, mode, lockh);
3074 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
3076 /* Already unlinked. Just update nlink and return success */
3077 if (rc == -ENOENT) {
3079 /* This path cannot be hit for regular files unless in
3080 * case of obscure races, so no need to to validate
3082 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
3084 } else if (rc != 0) {
3085 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
3086 ll_get_fsname(inode->i_sb, NULL, 0),
3087 PFID(ll_inode2fid(inode)), rc);
3093 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3096 struct inode *inode = dentry->d_inode;
3097 struct ptlrpc_request *req = NULL;
3098 struct obd_export *exp;
3102 LASSERT(inode != NULL);
3104 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p),name=%s\n",
3105 PFID(ll_inode2fid(inode)), inode, dentry->d_name.name);
3107 exp = ll_i2mdexp(inode);
3109 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
3110 * But under CMD case, it caused some lock issues, should be fixed
3111 * with new CMD ibits lock. See bug 12718 */
3112 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
3113 struct lookup_intent oit = { .it_op = IT_GETATTR };
3114 struct md_op_data *op_data;
3116 if (ibits == MDS_INODELOCK_LOOKUP)
3117 oit.it_op = IT_LOOKUP;
3119 /* Call getattr by fid, so do not provide name at all. */
3120 op_data = ll_prep_md_op_data(NULL, dentry->d_inode,
3121 dentry->d_inode, NULL, 0, 0,
3122 LUSTRE_OPC_ANY, NULL);
3123 if (IS_ERR(op_data))
3124 RETURN(PTR_ERR(op_data));
3126 oit.it_create_mode |= M_CHECK_STALE;
3127 rc = md_intent_lock(exp, op_data, NULL, 0,
3128 /* we are not interested in name
3131 ll_md_blocking_ast, 0);
3132 ll_finish_md_op_data(op_data);
3133 oit.it_create_mode &= ~M_CHECK_STALE;
3135 rc = ll_inode_revalidate_fini(inode, rc);
3139 rc = ll_revalidate_it_finish(req, &oit, dentry);
3141 ll_intent_release(&oit);
3145 /* Unlinked? Unhash dentry, so it is not picked up later by
3146 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
3147 here to preserve get_cwd functionality on 2.6.
3149 if (!dentry->d_inode->i_nlink)
3150 d_lustre_invalidate(dentry, 0);
3152 ll_lookup_finish_locks(&oit, dentry);
3153 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
3154 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
3155 obd_valid valid = OBD_MD_FLGETATTR;
3156 struct md_op_data *op_data;
3159 if (S_ISREG(inode->i_mode)) {
3160 rc = ll_get_max_mdsize(sbi, &ealen);
3163 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
3166 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
3167 0, ealen, LUSTRE_OPC_ANY,
3169 if (IS_ERR(op_data))
3170 RETURN(PTR_ERR(op_data));
3172 op_data->op_valid = valid;
3173 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
3174 * capa for this inode. Because we only keep capas of dirs
3176 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
3177 ll_finish_md_op_data(op_data);
3179 rc = ll_inode_revalidate_fini(inode, rc);
3183 rc = ll_prep_inode(&inode, req, NULL, NULL);
3186 ptlrpc_req_finished(req);
3190 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
3193 struct inode *inode = dentry->d_inode;
3197 rc = __ll_inode_revalidate_it(dentry, it, ibits);
3201 /* if object isn't regular file, don't validate size */
3202 if (!S_ISREG(inode->i_mode)) {
3203 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
3204 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
3205 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
3207 /* In case of restore, the MDT has the right size and has
3208 * already send it back without granting the layout lock,
3209 * inode is up-to-date so glimpse is useless.
3210 * Also to glimpse we need the layout, in case of a running
3211 * restore the MDT holds the layout lock so the glimpse will
3212 * block up to the end of restore (getattr will block)
3214 if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
3215 rc = ll_glimpse_size(inode);
3220 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
3221 struct lookup_intent *it, struct kstat *stat)
3223 struct inode *inode = de->d_inode;
3224 struct ll_sb_info *sbi = ll_i2sbi(inode);
3225 struct ll_inode_info *lli = ll_i2info(inode);
3228 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
3229 MDS_INODELOCK_LOOKUP);
3230 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
3235 stat->dev = inode->i_sb->s_dev;
3236 if (ll_need_32bit_api(sbi))
3237 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
3239 stat->ino = inode->i_ino;
3240 stat->mode = inode->i_mode;
3241 stat->nlink = inode->i_nlink;
3242 stat->uid = inode->i_uid;
3243 stat->gid = inode->i_gid;
3244 stat->rdev = inode->i_rdev;
3245 stat->atime = inode->i_atime;
3246 stat->mtime = inode->i_mtime;
3247 stat->ctime = inode->i_ctime;
3248 stat->blksize = 1 << inode->i_blkbits;
3250 stat->size = i_size_read(inode);
3251 stat->blocks = inode->i_blocks;
3255 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
3257 struct lookup_intent it = { .it_op = IT_GETATTR };
3259 return ll_getattr_it(mnt, de, &it, stat);
3262 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3263 __u64 start, __u64 len)
3267 struct ll_user_fiemap *fiemap;
3268 unsigned int extent_count = fieinfo->fi_extents_max;
3270 num_bytes = sizeof(*fiemap) + (extent_count *
3271 sizeof(struct ll_fiemap_extent));
3272 OBD_ALLOC_LARGE(fiemap, num_bytes);
3277 fiemap->fm_flags = fieinfo->fi_flags;
3278 fiemap->fm_extent_count = fieinfo->fi_extents_max;
3279 fiemap->fm_start = start;
3280 fiemap->fm_length = len;
3281 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
3282 sizeof(struct ll_fiemap_extent));
3284 rc = ll_do_fiemap(inode, fiemap, num_bytes);
3286 fieinfo->fi_flags = fiemap->fm_flags;
3287 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
3288 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
3289 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
3291 OBD_FREE_LARGE(fiemap, num_bytes);
3295 struct posix_acl * ll_get_acl(struct inode *inode, int type)
3297 struct ll_inode_info *lli = ll_i2info(inode);
3298 struct posix_acl *acl = NULL;
3301 spin_lock(&lli->lli_lock);
3302 /* VFS' acl_permission_check->check_acl will release the refcount */
3303 acl = posix_acl_dup(lli->lli_posix_acl);
3304 spin_unlock(&lli->lli_lock);
3309 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
3311 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3312 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
3314 ll_check_acl(struct inode *inode, int mask)
3317 # ifdef CONFIG_FS_POSIX_ACL
3318 struct posix_acl *acl;
3322 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
3323 if (flags & IPERM_FLAG_RCU)
3326 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
3331 rc = posix_acl_permission(inode, acl, mask);
3332 posix_acl_release(acl);
3335 # else /* !CONFIG_FS_POSIX_ACL */
3337 # endif /* CONFIG_FS_POSIX_ACL */
3339 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
3341 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
3342 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
3344 # ifdef HAVE_INODE_PERMISION_2ARGS
3345 int ll_inode_permission(struct inode *inode, int mask)
3347 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
3354 #ifdef MAY_NOT_BLOCK
3355 if (mask & MAY_NOT_BLOCK)
3357 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
3358 if (flags & IPERM_FLAG_RCU)
3362 /* as root inode are NOT getting validated in lookup operation,
3363 * need to do it before permission check. */
3365 if (inode == inode->i_sb->s_root->d_inode) {
3366 struct lookup_intent it = { .it_op = IT_LOOKUP };
3368 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
3369 MDS_INODELOCK_LOOKUP);
3374 CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
3375 PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
3377 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
3378 return lustre_check_remote_perm(inode, mask);
3380 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
3381 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
3386 /* -o localflock - only provides locally consistent flock locks */
3387 struct file_operations ll_file_operations = {
3388 .read = ll_file_read,
3389 .aio_read = ll_file_aio_read,
3390 .write = ll_file_write,
3391 .aio_write = ll_file_aio_write,
3392 .unlocked_ioctl = ll_file_ioctl,
3393 .open = ll_file_open,
3394 .release = ll_file_release,
3395 .mmap = ll_file_mmap,
3396 .llseek = ll_file_seek,
3397 .splice_read = ll_file_splice_read,
3402 struct file_operations ll_file_operations_flock = {
3403 .read = ll_file_read,
3404 .aio_read = ll_file_aio_read,
3405 .write = ll_file_write,
3406 .aio_write = ll_file_aio_write,
3407 .unlocked_ioctl = ll_file_ioctl,
3408 .open = ll_file_open,
3409 .release = ll_file_release,
3410 .mmap = ll_file_mmap,
3411 .llseek = ll_file_seek,
3412 .splice_read = ll_file_splice_read,
3415 .flock = ll_file_flock,
3416 .lock = ll_file_flock
3419 /* These are for -o noflock - to return ENOSYS on flock calls */
3420 struct file_operations ll_file_operations_noflock = {
3421 .read = ll_file_read,
3422 .aio_read = ll_file_aio_read,
3423 .write = ll_file_write,
3424 .aio_write = ll_file_aio_write,
3425 .unlocked_ioctl = ll_file_ioctl,
3426 .open = ll_file_open,
3427 .release = ll_file_release,
3428 .mmap = ll_file_mmap,
3429 .llseek = ll_file_seek,
3430 .splice_read = ll_file_splice_read,
3433 .flock = ll_file_noflock,
3434 .lock = ll_file_noflock
3437 struct inode_operations ll_file_inode_operations = {
3438 .setattr = ll_setattr,
3439 .getattr = ll_getattr,
3440 .permission = ll_inode_permission,
3441 .setxattr = ll_setxattr,
3442 .getxattr = ll_getxattr,
3443 .listxattr = ll_listxattr,
3444 .removexattr = ll_removexattr,
3445 .fiemap = ll_fiemap,
3446 #ifdef HAVE_IOP_GET_ACL
3447 .get_acl = ll_get_acl,
3451 /* dynamic ioctl number support routins */
3452 static struct llioc_ctl_data {
3453 struct rw_semaphore ioc_sem;
3454 cfs_list_t ioc_head;
3456 __RWSEM_INITIALIZER(llioc.ioc_sem),
3457 CFS_LIST_HEAD_INIT(llioc.ioc_head)
3462 cfs_list_t iocd_list;
3463 unsigned int iocd_size;
3464 llioc_callback_t iocd_cb;
3465 unsigned int iocd_count;
3466 unsigned int iocd_cmd[0];
3469 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
3472 struct llioc_data *in_data = NULL;
3475 if (cb == NULL || cmd == NULL ||
3476 count > LLIOC_MAX_CMD || count < 0)
3479 size = sizeof(*in_data) + count * sizeof(unsigned int);
3480 OBD_ALLOC(in_data, size);
3481 if (in_data == NULL)
3484 memset(in_data, 0, sizeof(*in_data));
3485 in_data->iocd_size = size;
3486 in_data->iocd_cb = cb;
3487 in_data->iocd_count = count;
3488 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
3490 down_write(&llioc.ioc_sem);
3491 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
3492 up_write(&llioc.ioc_sem);
3497 void ll_iocontrol_unregister(void *magic)
3499 struct llioc_data *tmp;
3504 down_write(&llioc.ioc_sem);
3505 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
3507 unsigned int size = tmp->iocd_size;
3509 cfs_list_del(&tmp->iocd_list);
3510 up_write(&llioc.ioc_sem);
3512 OBD_FREE(tmp, size);
3516 up_write(&llioc.ioc_sem);
3518 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
3521 EXPORT_SYMBOL(ll_iocontrol_register);
3522 EXPORT_SYMBOL(ll_iocontrol_unregister);
3524 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
3525 unsigned int cmd, unsigned long arg, int *rcp)
3527 enum llioc_iter ret = LLIOC_CONT;
3528 struct llioc_data *data;
3529 int rc = -EINVAL, i;
3531 down_read(&llioc.ioc_sem);
3532 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
3533 for (i = 0; i < data->iocd_count; i++) {
3534 if (cmd != data->iocd_cmd[i])
3537 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
3541 if (ret == LLIOC_STOP)
3544 up_read(&llioc.ioc_sem);
3551 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3553 struct ll_inode_info *lli = ll_i2info(inode);
3554 struct cl_env_nest nest;
3559 if (lli->lli_clob == NULL)
3562 env = cl_env_nested_get(&nest);
3564 RETURN(PTR_ERR(env));
3566 result = cl_conf_set(env, lli->lli_clob, conf);
3567 cl_env_nested_put(&nest, env);
3569 if (conf->coc_opc == OBJECT_CONF_SET) {
3570 struct ldlm_lock *lock = conf->coc_lock;
3572 LASSERT(lock != NULL);
3573 LASSERT(ldlm_has_layout(lock));
3575 /* it can only be allowed to match after layout is
3576 * applied to inode otherwise false layout would be
3577 * seen. Applying layout shoud happen before dropping
3578 * the intent lock. */
3579 ldlm_lock_allow_match(lock);
3585 /* Fetch layout from MDT with getxattr request, if it's not ready yet */
3586 static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
3589 struct ll_sb_info *sbi = ll_i2sbi(inode);
3590 struct obd_capa *oc;
3591 struct ptlrpc_request *req;
3592 struct mdt_body *body;
3599 CDEBUG(D_INODE, DFID" LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n",
3600 PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock),
3601 lock->l_lvb_data, lock->l_lvb_len);
3603 if ((lock->l_lvb_data != NULL) && ldlm_is_lvb_ready(lock))
3606 /* if layout lock was granted right away, the layout is returned
3607 * within DLM_LVB of dlm reply; otherwise if the lock was ever
3608 * blocked and then granted via completion ast, we have to fetch
3609 * layout here. Please note that we can't use the LVB buffer in
3610 * completion AST because it doesn't have a large enough buffer */
3611 oc = ll_mdscapa_get(inode);
3612 rc = ll_get_max_mdsize(sbi, &lmmsize);
3614 rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
3615 OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0,
3621 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
3622 if (body == NULL || body->eadatasize > lmmsize)
3623 GOTO(out, rc = -EPROTO);
3625 lmmsize = body->eadatasize;
3626 if (lmmsize == 0) /* empty layout */
3629 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize);
3631 GOTO(out, rc = -EFAULT);
3633 OBD_ALLOC_LARGE(lvbdata, lmmsize);
3634 if (lvbdata == NULL)
3635 GOTO(out, rc = -ENOMEM);
3637 memcpy(lvbdata, lmm, lmmsize);
3638 lock_res_and_lock(lock);
3639 if (lock->l_lvb_data != NULL)
3640 OBD_FREE_LARGE(lock->l_lvb_data, lock->l_lvb_len);
3642 lock->l_lvb_data = lvbdata;
3643 lock->l_lvb_len = lmmsize;
3644 unlock_res_and_lock(lock);
3649 ptlrpc_req_finished(req);
3654 * Apply the layout to the inode. Layout lock is held and will be released
3657 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3658 struct inode *inode, __u32 *gen, bool reconf)
3660 struct ll_inode_info *lli = ll_i2info(inode);
3661 struct ll_sb_info *sbi = ll_i2sbi(inode);
3662 struct ldlm_lock *lock;
3663 struct lustre_md md = { NULL };
3664 struct cl_object_conf conf;
3667 bool wait_layout = false;
3670 LASSERT(lustre_handle_is_used(lockh));
3672 lock = ldlm_handle2lock(lockh);
3673 LASSERT(lock != NULL);
3674 LASSERT(ldlm_has_layout(lock));
3676 LDLM_DEBUG(lock, "file "DFID"(%p) being reconfigured: %d\n",
3677 PFID(&lli->lli_fid), inode, reconf);
3679 /* in case this is a caching lock and reinstate with new inode */
3680 md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
3682 lock_res_and_lock(lock);
3683 lvb_ready = ldlm_is_lvb_ready(lock);
3684 unlock_res_and_lock(lock);
3685 /* checking lvb_ready is racy but this is okay. The worst case is
3686 * that multi processes may configure the file on the same time. */
3688 if (lvb_ready || !reconf) {
3691 /* layout_gen must be valid if layout lock is not
3692 * cancelled and stripe has already set */
3693 *gen = lli->lli_layout_gen;
3699 rc = ll_layout_fetch(inode, lock);
3703 /* for layout lock, lmm is returned in lock's lvb.
3704 * lvb_data is immutable if the lock is held so it's safe to access it
3705 * without res lock. See the description in ldlm_lock_decref_internal()
3706 * for the condition to free lvb_data of layout lock */
3707 if (lock->l_lvb_data != NULL) {
3708 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3709 lock->l_lvb_data, lock->l_lvb_len);
3711 *gen = LL_LAYOUT_GEN_EMPTY;
3713 *gen = md.lsm->lsm_layout_gen;
3716 CERROR("%s: file "DFID" unpackmd error: %d\n",
3717 ll_get_fsname(inode->i_sb, NULL, 0),
3718 PFID(&lli->lli_fid), rc);
3724 /* set layout to file. Unlikely this will fail as old layout was
3725 * surely eliminated */
3726 memset(&conf, 0, sizeof conf);
3727 conf.coc_opc = OBJECT_CONF_SET;
3728 conf.coc_inode = inode;
3729 conf.coc_lock = lock;
3730 conf.u.coc_md = &md;
3731 rc = ll_layout_conf(inode, &conf);
3734 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3736 /* refresh layout failed, need to wait */
3737 wait_layout = rc == -EBUSY;
3741 LDLM_LOCK_PUT(lock);
3742 ldlm_lock_decref(lockh, mode);
3744 /* wait for IO to complete if it's still being used. */
3746 CDEBUG(D_INODE, "%s: "DFID"(%p) wait for layout reconf\n",
3747 ll_get_fsname(inode->i_sb, NULL, 0),
3748 PFID(&lli->lli_fid), inode);
3750 memset(&conf, 0, sizeof conf);
3751 conf.coc_opc = OBJECT_CONF_WAIT;
3752 conf.coc_inode = inode;
3753 rc = ll_layout_conf(inode, &conf);
3757 CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
3758 ll_get_fsname(inode->i_sb, NULL, 0),
3759 PFID(&lli->lli_fid), rc);
3765 * This function checks if there exists a LAYOUT lock on the client side,
3766 * or enqueues it if it doesn't have one in cache.
3768 * This function will not hold layout lock so it may be revoked any time after
3769 * this function returns. Any operations depend on layout should be redone
3772 * This function should be called before lov_io_init() to get an uptodate
3773 * layout version, the caller should save the version number and after IO
3774 * is finished, this function should be called again to verify that layout
3775 * is not changed during IO time.
3777 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3779 struct ll_inode_info *lli = ll_i2info(inode);
3780 struct ll_sb_info *sbi = ll_i2sbi(inode);
3781 struct md_op_data *op_data;
3782 struct lookup_intent it;
3783 struct lustre_handle lockh;
3785 struct ldlm_enqueue_info einfo = {
3786 .ei_type = LDLM_IBITS,
3788 .ei_cb_bl = ll_md_blocking_ast,
3789 .ei_cb_cp = ldlm_completion_ast,
3794 *gen = lli->lli_layout_gen;
3795 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3799 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3800 LASSERT(S_ISREG(inode->i_mode));
3802 /* mostly layout lock is caching on the local side, so try to match
3803 * it before grabbing layout lock mutex. */
3804 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3805 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3806 if (mode != 0) { /* hit cached lock */
3807 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3811 /* better hold lli_layout_mutex to try again otherwise
3812 * it will have starvation problem. */
3815 /* take layout lock mutex to enqueue layout lock exclusively. */
3816 mutex_lock(&lli->lli_layout_mutex);
3819 /* try again. Maybe somebody else has done this. */
3820 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0,
3821 LCK_CR | LCK_CW | LCK_PR | LCK_PW);
3822 if (mode != 0) { /* hit cached lock */
3823 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3827 mutex_unlock(&lli->lli_layout_mutex);
3831 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3832 0, 0, LUSTRE_OPC_ANY, NULL);
3833 if (IS_ERR(op_data)) {
3834 mutex_unlock(&lli->lli_layout_mutex);
3835 RETURN(PTR_ERR(op_data));
3838 /* have to enqueue one */
3839 memset(&it, 0, sizeof(it));
3840 it.it_op = IT_LAYOUT;
3841 lockh.cookie = 0ULL;
3843 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file "DFID"(%p)\n",
3844 ll_get_fsname(inode->i_sb, NULL, 0),
3845 PFID(&lli->lli_fid), inode);
3847 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3849 if (it.d.lustre.it_data != NULL)
3850 ptlrpc_req_finished(it.d.lustre.it_data);
3851 it.d.lustre.it_data = NULL;
3853 ll_finish_md_op_data(op_data);
3855 mode = it.d.lustre.it_lock_mode;
3856 it.d.lustre.it_lock_mode = 0;
3857 ll_intent_drop_lock(&it);
3860 /* set lock data in case this is a new lock */
3861 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3862 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3866 mutex_unlock(&lli->lli_layout_mutex);
3872 * This function send a restore request to the MDT
3874 int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length)
3876 struct hsm_user_request *hur;
3880 len = sizeof(struct hsm_user_request) +
3881 sizeof(struct hsm_user_item);
3882 OBD_ALLOC(hur, len);
3886 hur->hur_request.hr_action = HUA_RESTORE;
3887 hur->hur_request.hr_archive_id = 0;
3888 hur->hur_request.hr_flags = 0;
3889 memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
3890 sizeof(hur->hur_user_item[0].hui_fid));
3891 hur->hur_user_item[0].hui_extent.offset = offset;
3892 hur->hur_user_item[0].hui_extent.length = length;
3893 hur->hur_request.hr_itemcount = 1;
3894 rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,