4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
58 fd->fd_write_failed = false;
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
85 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
86 op_data->op_bias |= MDS_DATA_MODIFIED;
90 * Closes the IO epoch and packs all the attributes into @op_data for
93 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
94 struct obd_client_handle *och)
98 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
99 ATTR_MTIME_SET | ATTR_CTIME_SET;
101 if (!(och->och_flags & FMODE_WRITE))
104 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
105 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
107 ll_ioepoch_close(inode, op_data, &och, 0);
110 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
111 ll_prep_md_op_data(op_data, inode, NULL, NULL,
112 0, 0, LUSTRE_OPC_ANY, NULL);
116 static int ll_close_inode_openhandle(struct obd_export *md_exp,
118 struct obd_client_handle *och)
120 struct obd_export *exp = ll_i2mdexp(inode);
121 struct md_op_data *op_data;
122 struct ptlrpc_request *req = NULL;
123 struct obd_device *obd = class_exp2obd(exp);
130 * XXX: in case of LMV, is this correct to access
133 CERROR("Invalid MDC connection handle "LPX64"\n",
134 ll_i2mdexp(inode)->exp_handle.h_cookie);
138 OBD_ALLOC_PTR(op_data);
140 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
142 ll_prepare_close(inode, op_data, och);
143 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
144 rc = md_close(md_exp, op_data, och->och_mod, &req);
146 /* This close must have the epoch closed. */
147 LASSERT(epoch_close);
148 /* MDS has instructed us to obtain Size-on-MDS attribute from
149 * OSTs and send setattr to back to MDS. */
150 rc = ll_som_update(inode, op_data);
152 CERROR("inode %lu mdc Size-on-MDS update failed: "
153 "rc = %d\n", inode->i_ino, rc);
157 CERROR("inode %lu mdc close failed: rc = %d\n",
161 /* DATA_MODIFIED flag was successfully sent on close, cancel data
162 * modification flag. */
163 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
164 struct ll_inode_info *lli = ll_i2info(inode);
166 spin_lock(&lli->lli_lock);
167 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
168 spin_unlock(&lli->lli_lock);
171 ll_finish_md_op_data(op_data);
174 rc = ll_objects_destroy(req, inode);
176 CERROR("inode %lu ll_objects destroy: rc = %d\n",
183 if (exp_connect_som(exp) && !epoch_close &&
184 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
185 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
187 md_clear_open_replay_data(md_exp, och);
188 /* Free @och if it is not waiting for DONE_WRITING. */
189 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
192 if (req) /* This is close request */
193 ptlrpc_req_finished(req);
197 int ll_md_real_close(struct inode *inode, int flags)
199 struct ll_inode_info *lli = ll_i2info(inode);
200 struct obd_client_handle **och_p;
201 struct obd_client_handle *och;
206 if (flags & FMODE_WRITE) {
207 och_p = &lli->lli_mds_write_och;
208 och_usecount = &lli->lli_open_fd_write_count;
209 } else if (flags & FMODE_EXEC) {
210 och_p = &lli->lli_mds_exec_och;
211 och_usecount = &lli->lli_open_fd_exec_count;
213 LASSERT(flags & FMODE_READ);
214 och_p = &lli->lli_mds_read_och;
215 och_usecount = &lli->lli_open_fd_read_count;
218 mutex_lock(&lli->lli_och_mutex);
219 if (*och_usecount) { /* There are still users of this handle, so
221 mutex_unlock(&lli->lli_och_mutex);
226 mutex_unlock(&lli->lli_och_mutex);
228 if (och) { /* There might be a race and somebody have freed this och
230 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
237 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
240 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
241 struct ll_inode_info *lli = ll_i2info(inode);
245 /* clear group lock, if present */
246 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
247 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
249 /* Let's see if we have good enough OPEN lock on the file and if
250 we can skip talking to MDS */
251 if (file->f_dentry->d_inode) { /* Can this ever be false? */
253 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
254 struct lustre_handle lockh;
255 struct inode *inode = file->f_dentry->d_inode;
256 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
258 mutex_lock(&lli->lli_och_mutex);
259 if (fd->fd_omode & FMODE_WRITE) {
261 LASSERT(lli->lli_open_fd_write_count);
262 lli->lli_open_fd_write_count--;
263 } else if (fd->fd_omode & FMODE_EXEC) {
265 LASSERT(lli->lli_open_fd_exec_count);
266 lli->lli_open_fd_exec_count--;
269 LASSERT(lli->lli_open_fd_read_count);
270 lli->lli_open_fd_read_count--;
272 mutex_unlock(&lli->lli_och_mutex);
274 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
275 LDLM_IBITS, &policy, lockmode,
277 rc = ll_md_real_close(file->f_dentry->d_inode,
281 CERROR("Releasing a file %p with negative dentry %p. Name %s",
282 file, file->f_dentry, file->f_dentry->d_name.name);
285 LUSTRE_FPRIVATE(file) = NULL;
286 ll_file_data_put(fd);
287 ll_capa_close(inode);
292 /* While this returns an error code, fput() the caller does not, so we need
293 * to make every effort to clean up all of our state here. Also, applications
294 * rarely check close errors and even if an error is returned they will not
295 * re-try the close call.
297 int ll_file_release(struct inode *inode, struct file *file)
299 struct ll_file_data *fd;
300 struct ll_sb_info *sbi = ll_i2sbi(inode);
301 struct ll_inode_info *lli = ll_i2info(inode);
305 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
306 inode->i_generation, inode);
308 #ifdef CONFIG_FS_POSIX_ACL
309 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
310 inode == inode->i_sb->s_root->d_inode) {
311 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
314 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
315 fd->fd_flags &= ~LL_FILE_RMTACL;
316 rct_del(&sbi->ll_rct, cfs_curproc_pid());
317 et_search_free(&sbi->ll_et, cfs_curproc_pid());
322 if (inode->i_sb->s_root != file->f_dentry)
323 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
324 fd = LUSTRE_FPRIVATE(file);
327 /* The last ref on @file, maybe not the the owner pid of statahead.
328 * Different processes can open the same dir, "ll_opendir_key" means:
329 * it is me that should stop the statahead thread. */
330 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
331 lli->lli_opendir_pid != 0)
332 ll_stop_statahead(inode, lli->lli_opendir_key);
334 if (inode->i_sb->s_root == file->f_dentry) {
335 LUSTRE_FPRIVATE(file) = NULL;
336 ll_file_data_put(fd);
340 if (!S_ISDIR(inode->i_mode)) {
341 lov_read_and_clear_async_rc(lli->lli_clob);
342 lli->lli_async_rc = 0;
345 rc = ll_md_close(sbi->ll_md_exp, inode, file);
347 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
348 libcfs_debug_dumplog();
353 static int ll_intent_file_open(struct file *file, void *lmm,
354 int lmmsize, struct lookup_intent *itp)
356 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
357 struct dentry *parent = file->f_dentry->d_parent;
358 const char *name = file->f_dentry->d_name.name;
359 const int len = file->f_dentry->d_name.len;
360 struct md_op_data *op_data;
361 struct ptlrpc_request *req;
362 __u32 opc = LUSTRE_OPC_ANY;
369 /* Usually we come here only for NFSD, and we want open lock.
370 But we can also get here with pre 2.6.15 patchless kernels, and in
371 that case that lock is also ok */
372 /* We can also get here if there was cached open handle in revalidate_it
373 * but it disappeared while we were getting from there to ll_file_open.
374 * But this means this file was closed and immediatelly opened which
375 * makes a good candidate for using OPEN lock */
376 /* If lmmsize & lmm are not 0, we are just setting stripe info
377 * parameters. No need for the open lock */
378 if (lmm == NULL && lmmsize == 0) {
379 itp->it_flags |= MDS_OPEN_LOCK;
380 if (itp->it_flags & FMODE_WRITE)
381 opc = LUSTRE_OPC_CREATE;
384 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
385 file->f_dentry->d_inode, name, len,
388 RETURN(PTR_ERR(op_data));
390 itp->it_flags |= MDS_OPEN_BY_FID;
391 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
392 0 /*unused */, &req, ll_md_blocking_ast, 0);
393 ll_finish_md_op_data(op_data);
395 /* reason for keep own exit path - don`t flood log
396 * with messages with -ESTALE errors.
398 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
399 it_open_error(DISP_OPEN_OPEN, itp))
401 ll_release_openhandle(file->f_dentry, itp);
405 if (it_disposition(itp, DISP_LOOKUP_NEG))
406 GOTO(out, rc = -ENOENT);
408 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
409 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
410 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
414 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
415 if (!rc && itp->d.lustre.it_lock_mode)
416 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
420 ptlrpc_req_finished(itp->d.lustre.it_data);
421 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
422 ll_intent_drop_lock(itp);
428 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
429 * not believe attributes if a few ioepoch holders exist. Attributes for
430 * previous ioepoch if new one is opened are also skipped by MDS.
432 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
434 if (ioepoch && lli->lli_ioepoch != ioepoch) {
435 lli->lli_ioepoch = ioepoch;
436 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
437 ioepoch, PFID(&lli->lli_fid));
441 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
442 struct lookup_intent *it, struct obd_client_handle *och)
444 struct ptlrpc_request *req = it->d.lustre.it_data;
445 struct mdt_body *body;
449 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
450 LASSERT(body != NULL); /* reply already checked out */
452 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
453 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
454 och->och_fid = lli->lli_fid;
455 och->och_flags = it->it_flags;
456 ll_ioepoch_open(lli, body->ioepoch);
458 return md_set_open_replay_data(md_exp, och, req);
461 int ll_local_open(struct file *file, struct lookup_intent *it,
462 struct ll_file_data *fd, struct obd_client_handle *och)
464 struct inode *inode = file->f_dentry->d_inode;
465 struct ll_inode_info *lli = ll_i2info(inode);
468 LASSERT(!LUSTRE_FPRIVATE(file));
473 struct ptlrpc_request *req = it->d.lustre.it_data;
474 struct mdt_body *body;
477 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
481 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
482 if ((it->it_flags & FMODE_WRITE) &&
483 (body->valid & OBD_MD_FLSIZE))
484 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
485 lli->lli_ioepoch, PFID(&lli->lli_fid));
488 LUSTRE_FPRIVATE(file) = fd;
489 ll_readahead_init(inode, &fd->fd_ras);
490 fd->fd_omode = it->it_flags;
494 /* Open a file, and (for the very first open) create objects on the OSTs at
495 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
496 * creation or open until ll_lov_setstripe() ioctl is called.
498 * If we already have the stripe MD locally then we don't request it in
499 * md_open(), by passing a lmm_size = 0.
501 * It is up to the application to ensure no other processes open this file
502 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
503 * used. We might be able to avoid races of that sort by getting lli_open_sem
504 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
505 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
507 int ll_file_open(struct inode *inode, struct file *file)
509 struct ll_inode_info *lli = ll_i2info(inode);
510 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
511 .it_flags = file->f_flags };
512 struct obd_client_handle **och_p = NULL;
513 __u64 *och_usecount = NULL;
514 struct ll_file_data *fd;
515 int rc = 0, opendir_set = 0;
518 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
519 inode->i_generation, inode, file->f_flags);
521 it = file->private_data; /* XXX: compat macro */
522 file->private_data = NULL; /* prevent ll_local_open assertion */
524 fd = ll_file_data_get();
526 GOTO(out_och_free, rc = -ENOMEM);
529 if (S_ISDIR(inode->i_mode)) {
530 spin_lock(&lli->lli_sa_lock);
531 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
532 lli->lli_opendir_pid == 0) {
533 lli->lli_opendir_key = fd;
534 lli->lli_opendir_pid = cfs_curproc_pid();
537 spin_unlock(&lli->lli_sa_lock);
540 if (inode->i_sb->s_root == file->f_dentry) {
541 LUSTRE_FPRIVATE(file) = fd;
545 if (!it || !it->d.lustre.it_disposition) {
546 /* Convert f_flags into access mode. We cannot use file->f_mode,
547 * because everything but O_ACCMODE mask was stripped from
549 if ((oit.it_flags + 1) & O_ACCMODE)
551 if (file->f_flags & O_TRUNC)
552 oit.it_flags |= FMODE_WRITE;
554 /* kernel only call f_op->open in dentry_open. filp_open calls
555 * dentry_open after call to open_namei that checks permissions.
556 * Only nfsd_open call dentry_open directly without checking
557 * permissions and because of that this code below is safe. */
558 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
559 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
561 /* We do not want O_EXCL here, presumably we opened the file
562 * already? XXX - NFS implications? */
563 oit.it_flags &= ~O_EXCL;
565 /* bug20584, if "it_flags" contains O_CREAT, the file will be
566 * created if necessary, then "IT_CREAT" should be set to keep
567 * consistent with it */
568 if (oit.it_flags & O_CREAT)
569 oit.it_op |= IT_CREAT;
575 /* Let's see if we have file open on MDS already. */
576 if (it->it_flags & FMODE_WRITE) {
577 och_p = &lli->lli_mds_write_och;
578 och_usecount = &lli->lli_open_fd_write_count;
579 } else if (it->it_flags & FMODE_EXEC) {
580 och_p = &lli->lli_mds_exec_och;
581 och_usecount = &lli->lli_open_fd_exec_count;
583 och_p = &lli->lli_mds_read_och;
584 och_usecount = &lli->lli_open_fd_read_count;
587 mutex_lock(&lli->lli_och_mutex);
588 if (*och_p) { /* Open handle is present */
589 if (it_disposition(it, DISP_OPEN_OPEN)) {
590 /* Well, there's extra open request that we do not need,
591 let's close it somehow. This will decref request. */
592 rc = it_open_error(DISP_OPEN_OPEN, it);
594 mutex_unlock(&lli->lli_och_mutex);
595 GOTO(out_openerr, rc);
598 ll_release_openhandle(file->f_dentry, it);
602 rc = ll_local_open(file, it, fd, NULL);
605 mutex_unlock(&lli->lli_och_mutex);
606 GOTO(out_openerr, rc);
609 LASSERT(*och_usecount == 0);
610 if (!it->d.lustre.it_disposition) {
611 /* We cannot just request lock handle now, new ELC code
612 means that one of other OPEN locks for this file
613 could be cancelled, and since blocking ast handler
614 would attempt to grab och_mutex as well, that would
615 result in a deadlock */
616 mutex_unlock(&lli->lli_och_mutex);
617 it->it_create_mode |= M_CHECK_STALE;
618 rc = ll_intent_file_open(file, NULL, 0, it);
619 it->it_create_mode &= ~M_CHECK_STALE;
621 GOTO(out_openerr, rc);
625 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
627 GOTO(out_och_free, rc = -ENOMEM);
631 /* md_intent_lock() didn't get a request ref if there was an
632 * open error, so don't do cleanup on the request here
634 /* XXX (green): Should not we bail out on any error here, not
635 * just open error? */
636 rc = it_open_error(DISP_OPEN_OPEN, it);
638 GOTO(out_och_free, rc);
640 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
642 rc = ll_local_open(file, it, fd, *och_p);
644 GOTO(out_och_free, rc);
646 mutex_unlock(&lli->lli_och_mutex);
649 /* Must do this outside lli_och_mutex lock to prevent deadlock where
650 different kind of OPEN lock for this same inode gets cancelled
651 by ldlm_cancel_lru */
652 if (!S_ISREG(inode->i_mode))
653 GOTO(out_och_free, rc);
657 if (!lli->lli_has_smd) {
658 if (file->f_flags & O_LOV_DELAY_CREATE ||
659 !(file->f_mode & FMODE_WRITE)) {
660 CDEBUG(D_INODE, "object creation was delayed\n");
661 GOTO(out_och_free, rc);
664 file->f_flags &= ~O_LOV_DELAY_CREATE;
665 GOTO(out_och_free, rc);
669 if (och_p && *och_p) {
670 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
671 *och_p = NULL; /* OBD_FREE writes some magic there */
674 mutex_unlock(&lli->lli_och_mutex);
677 if (opendir_set != 0)
678 ll_stop_statahead(inode, lli->lli_opendir_key);
680 ll_file_data_put(fd);
682 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
685 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
686 ptlrpc_req_finished(it->d.lustre.it_data);
687 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
693 /* Fills the obdo with the attributes for the lsm */
694 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
695 struct obd_capa *capa, struct obdo *obdo,
696 __u64 ioepoch, int sync)
698 struct ptlrpc_request_set *set;
699 struct obd_info oinfo = { { { 0 } } };
704 LASSERT(lsm != NULL);
708 oinfo.oi_oa->o_id = lsm->lsm_object_id;
709 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
710 oinfo.oi_oa->o_mode = S_IFREG;
711 oinfo.oi_oa->o_ioepoch = ioepoch;
712 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
713 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
714 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
715 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
716 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
717 OBD_MD_FLDATAVERSION;
718 oinfo.oi_capa = capa;
720 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
721 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
724 set = ptlrpc_prep_set();
726 CERROR("can't allocate ptlrpc set\n");
729 rc = obd_getattr_async(exp, &oinfo, set);
731 rc = ptlrpc_set_wait(set);
732 ptlrpc_set_destroy(set);
735 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
736 OBD_MD_FLATIME | OBD_MD_FLMTIME |
737 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
738 OBD_MD_FLDATAVERSION);
743 * Performs the getattr on the inode and updates its fields.
744 * If @sync != 0, perform the getattr under the server-side lock.
746 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
747 __u64 ioepoch, int sync)
749 struct obd_capa *capa = ll_mdscapa_get(inode);
750 struct lov_stripe_md *lsm;
754 lsm = ccc_inode_lsm_get(inode);
755 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
756 capa, obdo, ioepoch, sync);
759 obdo_refresh_inode(inode, obdo, obdo->o_valid);
761 "objid "LPX64" size %llu, blocks %llu, blksize %lu\n",
762 lsm ? lsm->lsm_object_id : 0, i_size_read(inode),
763 (unsigned long long)inode->i_blocks,
764 (unsigned long)ll_inode_blksize(inode));
766 ccc_inode_lsm_put(inode, lsm);
770 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
772 struct ll_inode_info *lli = ll_i2info(inode);
773 struct cl_object *obj = lli->lli_clob;
774 struct cl_attr *attr = ccc_env_thread_attr(env);
780 ll_inode_size_lock(inode);
781 /* merge timestamps the most recently obtained from mds with
782 timestamps obtained from osts */
783 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
784 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
785 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
786 inode_init_lvb(inode, &lvb);
788 cl_object_attr_lock(obj);
789 rc = cl_object_attr_get(env, obj, attr);
790 cl_object_attr_unlock(obj);
793 if (lvb.lvb_atime < attr->cat_atime)
794 lvb.lvb_atime = attr->cat_atime;
795 if (lvb.lvb_ctime < attr->cat_ctime)
796 lvb.lvb_ctime = attr->cat_ctime;
797 if (lvb.lvb_mtime < attr->cat_mtime)
798 lvb.lvb_mtime = attr->cat_mtime;
800 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
801 PFID(&lli->lli_fid), attr->cat_size);
802 cl_isize_write_nolock(inode, attr->cat_size);
804 inode->i_blocks = attr->cat_blocks;
806 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
807 LTIME_S(inode->i_atime) = lvb.lvb_atime;
808 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
810 ll_inode_size_unlock(inode);
815 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
818 struct obdo obdo = { 0 };
821 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
823 st->st_size = obdo.o_size;
824 st->st_blocks = obdo.o_blocks;
825 st->st_mtime = obdo.o_mtime;
826 st->st_atime = obdo.o_atime;
827 st->st_ctime = obdo.o_ctime;
832 void ll_io_init(struct cl_io *io, const struct file *file, int write)
834 struct inode *inode = file->f_dentry->d_inode;
836 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
838 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
839 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || IS_SYNC(inode);
841 io->ci_obj = ll_i2info(inode)->lli_clob;
842 io->ci_lockreq = CILR_MAYBE;
843 if (ll_file_nolock(file)) {
844 io->ci_lockreq = CILR_NEVER;
845 io->ci_no_srvlock = 1;
846 } else if (file->f_flags & O_APPEND) {
847 io->ci_lockreq = CILR_MANDATORY;
852 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
853 struct file *file, enum cl_io_type iot,
854 loff_t *ppos, size_t count)
856 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
857 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
862 io = ccc_env_thread_io(env);
863 ll_io_init(io, file, iot == CIT_WRITE);
865 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
866 struct vvp_io *vio = vvp_env_io(env);
867 struct ccc_io *cio = ccc_env_io(env);
868 int write_mutex_locked = 0;
870 cio->cui_fd = LUSTRE_FPRIVATE(file);
871 vio->cui_io_subtype = args->via_io_subtype;
873 switch (vio->cui_io_subtype) {
875 cio->cui_iov = args->u.normal.via_iov;
876 cio->cui_nrsegs = args->u.normal.via_nrsegs;
877 cio->cui_tot_nrsegs = cio->cui_nrsegs;
878 #ifndef HAVE_FILE_WRITEV
879 cio->cui_iocb = args->u.normal.via_iocb;
881 if ((iot == CIT_WRITE) &&
882 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
883 if (mutex_lock_interruptible(&lli->
885 GOTO(out, result = -ERESTARTSYS);
886 write_mutex_locked = 1;
887 } else if (iot == CIT_READ) {
888 down_read(&lli->lli_trunc_sem);
892 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
893 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
896 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
897 vio->u.splice.cui_flags = args->u.splice.via_flags;
900 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
903 result = cl_io_loop(env, io);
904 if (write_mutex_locked)
905 mutex_unlock(&lli->lli_write_mutex);
906 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
907 up_read(&lli->lli_trunc_sem);
909 /* cl_io_rw_init() handled IO */
910 result = io->ci_result;
913 if (io->ci_nob > 0) {
915 *ppos = io->u.ci_wr.wr.crw_pos;
921 if (iot == CIT_READ) {
923 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
924 LPROC_LL_READ_BYTES, result);
925 } else if (iot == CIT_WRITE) {
927 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
928 LPROC_LL_WRITE_BYTES, result);
929 fd->fd_write_failed = false;
931 fd->fd_write_failed = true;
940 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
942 static int ll_file_get_iov_count(const struct iovec *iov,
943 unsigned long *nr_segs, size_t *count)
948 for (seg = 0; seg < *nr_segs; seg++) {
949 const struct iovec *iv = &iov[seg];
952 * If any segment has a negative length, or the cumulative
953 * length ever wraps negative then return -EINVAL.
956 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
958 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
963 cnt -= iv->iov_len; /* This segment is no good */
970 #ifdef HAVE_FILE_READV
971 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
972 unsigned long nr_segs, loff_t *ppos)
975 struct vvp_io_args *args;
981 result = ll_file_get_iov_count(iov, &nr_segs, &count);
985 env = cl_env_get(&refcheck);
987 RETURN(PTR_ERR(env));
989 args = vvp_env_args(env, IO_NORMAL);
990 args->u.normal.via_iov = (struct iovec *)iov;
991 args->u.normal.via_nrsegs = nr_segs;
993 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
994 cl_env_put(env, &refcheck);
998 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1002 struct iovec *local_iov;
1007 env = cl_env_get(&refcheck);
1009 RETURN(PTR_ERR(env));
1011 local_iov = &vvp_env_info(env)->vti_local_iov;
1012 local_iov->iov_base = (void __user *)buf;
1013 local_iov->iov_len = count;
1014 result = ll_file_readv(file, local_iov, 1, ppos);
1015 cl_env_put(env, &refcheck);
1020 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1021 unsigned long nr_segs, loff_t pos)
1024 struct vvp_io_args *args;
1030 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1034 env = cl_env_get(&refcheck);
1036 RETURN(PTR_ERR(env));
1038 args = vvp_env_args(env, IO_NORMAL);
1039 args->u.normal.via_iov = (struct iovec *)iov;
1040 args->u.normal.via_nrsegs = nr_segs;
1041 args->u.normal.via_iocb = iocb;
1043 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1044 &iocb->ki_pos, count);
1045 cl_env_put(env, &refcheck);
1049 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1053 struct iovec *local_iov;
1054 struct kiocb *kiocb;
1059 env = cl_env_get(&refcheck);
1061 RETURN(PTR_ERR(env));
1063 local_iov = &vvp_env_info(env)->vti_local_iov;
1064 kiocb = &vvp_env_info(env)->vti_kiocb;
1065 local_iov->iov_base = (void __user *)buf;
1066 local_iov->iov_len = count;
1067 init_sync_kiocb(kiocb, file);
1068 kiocb->ki_pos = *ppos;
1069 kiocb->ki_left = count;
1071 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1072 *ppos = kiocb->ki_pos;
1074 cl_env_put(env, &refcheck);
1080 * Write to a file (through the page cache).
1082 #ifdef HAVE_FILE_WRITEV
1083 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1084 unsigned long nr_segs, loff_t *ppos)
1087 struct vvp_io_args *args;
1093 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1097 env = cl_env_get(&refcheck);
1099 RETURN(PTR_ERR(env));
1101 args = vvp_env_args(env, IO_NORMAL);
1102 args->u.normal.via_iov = (struct iovec *)iov;
1103 args->u.normal.via_nrsegs = nr_segs;
1105 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1106 cl_env_put(env, &refcheck);
1110 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1114 struct iovec *local_iov;
1119 env = cl_env_get(&refcheck);
1121 RETURN(PTR_ERR(env));
1123 local_iov = &vvp_env_info(env)->vti_local_iov;
1124 local_iov->iov_base = (void __user *)buf;
1125 local_iov->iov_len = count;
1127 result = ll_file_writev(file, local_iov, 1, ppos);
1128 cl_env_put(env, &refcheck);
1132 #else /* AIO stuff */
1133 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1134 unsigned long nr_segs, loff_t pos)
1137 struct vvp_io_args *args;
1143 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1147 env = cl_env_get(&refcheck);
1149 RETURN(PTR_ERR(env));
1151 args = vvp_env_args(env, IO_NORMAL);
1152 args->u.normal.via_iov = (struct iovec *)iov;
1153 args->u.normal.via_nrsegs = nr_segs;
1154 args->u.normal.via_iocb = iocb;
1156 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1157 &iocb->ki_pos, count);
1158 cl_env_put(env, &refcheck);
1162 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1166 struct iovec *local_iov;
1167 struct kiocb *kiocb;
1172 env = cl_env_get(&refcheck);
1174 RETURN(PTR_ERR(env));
1176 local_iov = &vvp_env_info(env)->vti_local_iov;
1177 kiocb = &vvp_env_info(env)->vti_kiocb;
1178 local_iov->iov_base = (void __user *)buf;
1179 local_iov->iov_len = count;
1180 init_sync_kiocb(kiocb, file);
1181 kiocb->ki_pos = *ppos;
1182 kiocb->ki_left = count;
1184 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1185 *ppos = kiocb->ki_pos;
1187 cl_env_put(env, &refcheck);
1193 #ifdef HAVE_KERNEL_SENDFILE
1195 * Send file content (through pagecache) somewhere with helper
1197 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1198 read_actor_t actor, void *target)
1201 struct vvp_io_args *args;
1206 env = cl_env_get(&refcheck);
1208 RETURN(PTR_ERR(env));
1210 args = vvp_env_args(env, IO_SENDFILE);
1211 args->u.sendfile.via_target = target;
1212 args->u.sendfile.via_actor = actor;
1214 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1215 cl_env_put(env, &refcheck);
1220 #ifdef HAVE_KERNEL_SPLICE_READ
1222 * Send file content (through pagecache) somewhere with helper
1224 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1225 struct pipe_inode_info *pipe, size_t count,
1229 struct vvp_io_args *args;
1234 env = cl_env_get(&refcheck);
1236 RETURN(PTR_ERR(env));
1238 args = vvp_env_args(env, IO_SPLICE);
1239 args->u.splice.via_pipe = pipe;
1240 args->u.splice.via_flags = flags;
1242 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1243 cl_env_put(env, &refcheck);
1248 static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
1251 struct obd_export *exp = ll_i2dtexp(inode);
1252 struct obd_trans_info oti = { 0 };
1253 struct obdo *oa = NULL;
1256 struct lov_stripe_md *lsm = NULL, *lsm2;
1263 lsm = ccc_inode_lsm_get(inode);
1265 GOTO(out, rc = -ENOENT);
1267 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1268 (lsm->lsm_stripe_count));
1270 OBD_ALLOC_LARGE(lsm2, lsm_size);
1272 GOTO(out, rc = -ENOMEM);
1276 oa->o_nlink = ost_idx;
1277 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1278 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1279 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1280 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1281 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1282 memcpy(lsm2, lsm, lsm_size);
1283 ll_inode_size_lock(inode);
1284 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1285 ll_inode_size_unlock(inode);
1287 OBD_FREE_LARGE(lsm2, lsm_size);
1290 ccc_inode_lsm_put(inode, lsm);
1295 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1297 struct ll_recreate_obj ucreat;
1300 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1303 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1307 RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
1308 ucreat.lrc_ost_idx));
1311 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1318 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1321 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1324 id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
1325 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1326 RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
1329 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1330 int flags, struct lov_user_md *lum, int lum_size)
1332 struct lov_stripe_md *lsm = NULL;
1333 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1337 lsm = ccc_inode_lsm_get(inode);
1339 ccc_inode_lsm_put(inode, lsm);
1340 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1345 ll_inode_size_lock(inode);
1346 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1349 rc = oit.d.lustre.it_status;
1351 GOTO(out_req_free, rc);
1353 ll_release_openhandle(file->f_dentry, &oit);
1356 ll_inode_size_unlock(inode);
1357 ll_intent_release(&oit);
1358 ccc_inode_lsm_put(inode, lsm);
1361 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1365 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1366 struct lov_mds_md **lmmp, int *lmm_size,
1367 struct ptlrpc_request **request)
1369 struct ll_sb_info *sbi = ll_i2sbi(inode);
1370 struct mdt_body *body;
1371 struct lov_mds_md *lmm = NULL;
1372 struct ptlrpc_request *req = NULL;
1373 struct md_op_data *op_data;
1376 rc = ll_get_max_mdsize(sbi, &lmmsize);
1380 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1381 strlen(filename), lmmsize,
1382 LUSTRE_OPC_ANY, NULL);
1383 if (IS_ERR(op_data))
1384 RETURN(PTR_ERR(op_data));
1386 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1387 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1388 ll_finish_md_op_data(op_data);
1390 CDEBUG(D_INFO, "md_getattr_name failed "
1391 "on %s: rc %d\n", filename, rc);
1395 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1396 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1398 lmmsize = body->eadatasize;
1400 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1402 GOTO(out, rc = -ENODATA);
1405 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1406 LASSERT(lmm != NULL);
1408 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1409 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1410 GOTO(out, rc = -EPROTO);
1414 * This is coming from the MDS, so is probably in
1415 * little endian. We convert it to host endian before
1416 * passing it to userspace.
1418 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1419 /* if function called for directory - we should
1420 * avoid swab not existent lsm objects */
1421 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1422 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1423 if (S_ISREG(body->mode))
1424 lustre_swab_lov_user_md_objects(
1425 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1426 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1427 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1428 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1429 if (S_ISREG(body->mode))
1430 lustre_swab_lov_user_md_objects(
1431 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1432 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1438 *lmm_size = lmmsize;
1443 static int ll_lov_setea(struct inode *inode, struct file *file,
1446 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1447 struct lov_user_md *lump;
1448 int lum_size = sizeof(struct lov_user_md) +
1449 sizeof(struct lov_user_ost_data);
1453 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1456 OBD_ALLOC_LARGE(lump, lum_size);
1460 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1461 OBD_FREE_LARGE(lump, lum_size);
1465 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1467 OBD_FREE_LARGE(lump, lum_size);
1471 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1474 struct lov_user_md_v3 lumv3;
1475 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1476 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1477 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1479 int flags = FMODE_WRITE;
1482 /* first try with v1 which is smaller than v3 */
1483 lum_size = sizeof(struct lov_user_md_v1);
1484 if (copy_from_user(lumv1, lumv1p, lum_size))
1487 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1488 lum_size = sizeof(struct lov_user_md_v3);
1489 if (copy_from_user(&lumv3, lumv3p, lum_size))
1493 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1495 struct lov_stripe_md *lsm;
1498 put_user(0, &lumv1p->lmm_stripe_count);
1500 ll_layout_refresh(inode, &gen);
1501 lsm = ccc_inode_lsm_get(inode);
1502 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1503 0, lsm, (void *)arg);
1504 ccc_inode_lsm_put(inode, lsm);
1509 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1511 struct lov_stripe_md *lsm;
1515 lsm = ccc_inode_lsm_get(inode);
1517 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1519 ccc_inode_lsm_put(inode, lsm);
1523 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1525 struct ll_inode_info *lli = ll_i2info(inode);
1526 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1527 struct ccc_grouplock grouplock;
1531 if (ll_file_nolock(file))
1532 RETURN(-EOPNOTSUPP);
1534 spin_lock(&lli->lli_lock);
1535 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1536 CWARN("group lock already existed with gid %lu\n",
1537 fd->fd_grouplock.cg_gid);
1538 spin_unlock(&lli->lli_lock);
1541 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1542 spin_unlock(&lli->lli_lock);
1544 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1545 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1549 spin_lock(&lli->lli_lock);
1550 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1551 spin_unlock(&lli->lli_lock);
1552 CERROR("another thread just won the race\n");
1553 cl_put_grouplock(&grouplock);
1557 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1558 fd->fd_grouplock = grouplock;
1559 spin_unlock(&lli->lli_lock);
1561 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1565 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1567 struct ll_inode_info *lli = ll_i2info(inode);
1568 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1569 struct ccc_grouplock grouplock;
1572 spin_lock(&lli->lli_lock);
1573 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1574 spin_unlock(&lli->lli_lock);
1575 CWARN("no group lock held\n");
1578 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1580 if (fd->fd_grouplock.cg_gid != arg) {
1581 CWARN("group lock %lu doesn't match current id %lu\n",
1582 arg, fd->fd_grouplock.cg_gid);
1583 spin_unlock(&lli->lli_lock);
1587 grouplock = fd->fd_grouplock;
1588 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1589 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1590 spin_unlock(&lli->lli_lock);
1592 cl_put_grouplock(&grouplock);
1593 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1598 * Close inode open handle
1600 * \param dentry [in] dentry which contains the inode
1601 * \param it [in,out] intent which contains open info and result
1604 * \retval <0 failure
1606 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1608 struct inode *inode = dentry->d_inode;
1609 struct obd_client_handle *och;
1615 /* Root ? Do nothing. */
1616 if (dentry->d_inode->i_sb->s_root == dentry)
1619 /* No open handle to close? Move away */
1620 if (!it_disposition(it, DISP_OPEN_OPEN))
1623 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1625 OBD_ALLOC(och, sizeof(*och));
1627 GOTO(out, rc = -ENOMEM);
1629 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1630 ll_i2info(inode), it, och);
1632 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1635 /* this one is in place of ll_file_open */
1636 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1637 ptlrpc_req_finished(it->d.lustre.it_data);
1638 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1644 * Get size for inode for which FIEMAP mapping is requested.
1645 * Make the FIEMAP get_info call and returns the result.
1647 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1650 struct obd_export *exp = ll_i2dtexp(inode);
1651 struct lov_stripe_md *lsm = NULL;
1652 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1653 int vallen = num_bytes;
1657 /* Checks for fiemap flags */
1658 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1659 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1663 /* Check for FIEMAP_FLAG_SYNC */
1664 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1665 rc = filemap_fdatawrite(inode->i_mapping);
1670 lsm = ccc_inode_lsm_get(inode);
1674 /* If the stripe_count > 1 and the application does not understand
1675 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1677 if (lsm->lsm_stripe_count > 1 &&
1678 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1679 GOTO(out, rc = -EOPNOTSUPP);
1681 fm_key.oa.o_id = lsm->lsm_object_id;
1682 fm_key.oa.o_seq = lsm->lsm_object_seq;
1683 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1685 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1686 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1687 /* If filesize is 0, then there would be no objects for mapping */
1688 if (fm_key.oa.o_size == 0) {
1689 fiemap->fm_mapped_extents = 0;
1693 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1695 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1698 CERROR("obd_get_info failed: rc = %d\n", rc);
1701 ccc_inode_lsm_put(inode, lsm);
1705 int ll_fid2path(struct inode *inode, void *arg)
1707 struct obd_export *exp = ll_i2mdexp(inode);
1708 struct getinfo_fid2path *gfout, *gfin;
1712 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1713 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1716 /* Need to get the buflen */
1717 OBD_ALLOC_PTR(gfin);
1720 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1725 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1726 OBD_ALLOC(gfout, outsize);
1727 if (gfout == NULL) {
1731 memcpy(gfout, gfin, sizeof(*gfout));
1734 /* Call mdc_iocontrol */
1735 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1738 if (copy_to_user(arg, gfout, outsize))
1742 OBD_FREE(gfout, outsize);
1746 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1748 struct ll_user_fiemap *fiemap_s;
1749 size_t num_bytes, ret_bytes;
1750 unsigned int extent_count;
1753 /* Get the extent count so we can calculate the size of
1754 * required fiemap buffer */
1755 if (get_user(extent_count,
1756 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1758 num_bytes = sizeof(*fiemap_s) + (extent_count *
1759 sizeof(struct ll_fiemap_extent));
1761 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1762 if (fiemap_s == NULL)
1765 /* get the fiemap value */
1766 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1768 GOTO(error, rc = -EFAULT);
1770 /* If fm_extent_count is non-zero, read the first extent since
1771 * it is used to calculate end_offset and device from previous
1774 if (copy_from_user(&fiemap_s->fm_extents[0],
1775 (char __user *)arg + sizeof(*fiemap_s),
1776 sizeof(struct ll_fiemap_extent)))
1777 GOTO(error, rc = -EFAULT);
1780 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1784 ret_bytes = sizeof(struct ll_user_fiemap);
1786 if (extent_count != 0)
1787 ret_bytes += (fiemap_s->fm_mapped_extents *
1788 sizeof(struct ll_fiemap_extent));
1790 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1794 OBD_FREE_LARGE(fiemap_s, num_bytes);
1799 * Read the data_version for inode.
1801 * This value is computed using stripe object version on OST.
1802 * Version is computed using server side locking.
1804 * @param extent_lock Take extent lock. Not needed if a process is already
1805 * holding the OST object group locks.
1807 int ll_data_version(struct inode *inode, __u64 *data_version,
1810 struct lov_stripe_md *lsm = NULL;
1811 struct ll_sb_info *sbi = ll_i2sbi(inode);
1812 struct obdo *obdo = NULL;
1816 /* If no stripe, we consider version is 0. */
1817 lsm = ccc_inode_lsm_get(inode);
1820 CDEBUG(D_INODE, "No object for inode\n");
1824 OBD_ALLOC_PTR(obdo);
1826 ccc_inode_lsm_put(inode, lsm);
1830 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1832 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1835 *data_version = obdo->o_data_version;
1839 ccc_inode_lsm_put(inode, lsm);
1844 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1846 struct inode *inode = file->f_dentry->d_inode;
1847 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1852 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1853 inode->i_generation, inode, cmd);
1854 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1856 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1857 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1861 case LL_IOC_GETFLAGS:
1862 /* Get the current value of the file flags */
1863 return put_user(fd->fd_flags, (int *)arg);
1864 case LL_IOC_SETFLAGS:
1865 case LL_IOC_CLRFLAGS:
1866 /* Set or clear specific file flags */
1867 /* XXX This probably needs checks to ensure the flags are
1868 * not abused, and to handle any flag side effects.
1870 if (get_user(flags, (int *) arg))
1873 if (cmd == LL_IOC_SETFLAGS) {
1874 if ((flags & LL_FILE_IGNORE_LOCK) &&
1875 !(file->f_flags & O_DIRECT)) {
1876 CERROR("%s: unable to disable locking on "
1877 "non-O_DIRECT file\n", current->comm);
1881 fd->fd_flags |= flags;
1883 fd->fd_flags &= ~flags;
1886 case LL_IOC_LOV_SETSTRIPE:
1887 RETURN(ll_lov_setstripe(inode, file, arg));
1888 case LL_IOC_LOV_SETEA:
1889 RETURN(ll_lov_setea(inode, file, arg));
1890 case LL_IOC_LOV_GETSTRIPE:
1891 RETURN(ll_lov_getstripe(inode, arg));
1892 case LL_IOC_RECREATE_OBJ:
1893 RETURN(ll_lov_recreate_obj(inode, arg));
1894 case LL_IOC_RECREATE_FID:
1895 RETURN(ll_lov_recreate_fid(inode, arg));
1896 case FSFILT_IOC_FIEMAP:
1897 RETURN(ll_ioctl_fiemap(inode, arg));
1898 case FSFILT_IOC_GETFLAGS:
1899 case FSFILT_IOC_SETFLAGS:
1900 RETURN(ll_iocontrol(inode, file, cmd, arg));
1901 case FSFILT_IOC_GETVERSION_OLD:
1902 case FSFILT_IOC_GETVERSION:
1903 RETURN(put_user(inode->i_generation, (int *)arg));
1904 case LL_IOC_GROUP_LOCK:
1905 RETURN(ll_get_grouplock(inode, file, arg));
1906 case LL_IOC_GROUP_UNLOCK:
1907 RETURN(ll_put_grouplock(inode, file, arg));
1908 case IOC_OBD_STATFS:
1909 RETURN(ll_obd_statfs(inode, (void *)arg));
1911 /* We need to special case any other ioctls we want to handle,
1912 * to send them to the MDS/OST as appropriate and to properly
1913 * network encode the arg field.
1914 case FSFILT_IOC_SETVERSION_OLD:
1915 case FSFILT_IOC_SETVERSION:
1917 case LL_IOC_FLUSHCTX:
1918 RETURN(ll_flush_ctx(inode));
1919 case LL_IOC_PATH2FID: {
1920 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1921 sizeof(struct lu_fid)))
1926 case OBD_IOC_FID2PATH:
1927 RETURN(ll_fid2path(inode, (void *)arg));
1928 case LL_IOC_DATA_VERSION: {
1929 struct ioc_data_version idv;
1932 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
1935 rc = ll_data_version(inode, &idv.idv_version,
1936 !(idv.idv_flags & LL_DV_NOFLUSH));
1938 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
1944 case LL_IOC_GET_MDTIDX: {
1947 mdtidx = ll_get_mdt_idx(inode);
1951 if (put_user((int)mdtidx, (int*)arg))
1956 case OBD_IOC_GETDTNAME:
1957 case OBD_IOC_GETMDNAME:
1958 RETURN(ll_get_obd_name(inode, cmd, arg));
1959 case LL_IOC_HSM_STATE_GET: {
1960 struct md_op_data *op_data;
1961 struct hsm_user_state *hus;
1968 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1969 LUSTRE_OPC_ANY, hus);
1970 if (op_data == NULL) {
1975 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
1978 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
1981 ll_finish_md_op_data(op_data);
1985 case LL_IOC_HSM_STATE_SET: {
1986 struct md_op_data *op_data;
1987 struct hsm_state_set *hss;
1993 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
1998 /* Non-root users are forbidden to set or clear flags which are
1999 * NOT defined in HSM_USER_MASK. */
2000 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK)
2001 && !cfs_capable(CFS_CAP_SYS_ADMIN)) {
2006 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2007 LUSTRE_OPC_ANY, hss);
2008 if (op_data == NULL) {
2013 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2016 ll_finish_md_op_data(op_data);
2021 case LL_IOC_HSM_ACTION: {
2022 struct md_op_data *op_data;
2023 struct hsm_current_action *hca;
2030 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2031 LUSTRE_OPC_ANY, hca);
2032 if (op_data == NULL) {
2037 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2040 if (cfs_copy_to_user((char *)arg, hca, sizeof(*hca)))
2043 ll_finish_md_op_data(op_data);
2051 ll_iocontrol_call(inode, file, cmd, arg, &err))
2054 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2060 #ifndef HAVE_FILE_LLSEEK_SIZE
2061 static inline loff_t
2062 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2064 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2066 if (offset > maxsize)
2069 if (offset != file->f_pos) {
2070 file->f_pos = offset;
2071 file->f_version = 0;
2077 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2078 loff_t maxsize, loff_t eof)
2080 struct inode *inode = file->f_dentry->d_inode;
2088 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2089 * position-querying operation. Avoid rewriting the "same"
2090 * f_pos value back to the file because a concurrent read(),
2091 * write() or lseek() might have altered it
2096 * f_lock protects against read/modify/write race with other
2097 * SEEK_CURs. Note that parallel writes and reads behave
2100 mutex_lock(&inode->i_mutex);
2101 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2102 mutex_unlock(&inode->i_mutex);
2106 * In the generic case the entire file is data, so as long as
2107 * offset isn't at the end of the file then the offset is data.
2114 * There is a virtual hole at the end of the file, so as long as
2115 * offset isn't i_size or larger, return i_size.
2123 return llseek_execute(file, offset, maxsize);
2127 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2129 struct inode *inode = file->f_dentry->d_inode;
2130 loff_t retval, eof = 0;
2133 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2134 (origin == SEEK_CUR) ? file->f_pos : 0);
2135 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2136 inode->i_ino, inode->i_generation, inode, retval, retval,
2138 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2140 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2141 retval = ll_glimpse_size(inode);
2144 eof = i_size_read(inode);
2147 retval = generic_file_llseek_size(file, offset, origin,
2148 ll_file_maxbytes(inode), eof);
2152 int ll_flush(struct file *file, fl_owner_t id)
2154 struct inode *inode = file->f_dentry->d_inode;
2155 struct ll_inode_info *lli = ll_i2info(inode);
2156 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2159 LASSERT(!S_ISDIR(inode->i_mode));
2161 /* catch async errors that were recorded back when async writeback
2162 * failed for pages in this mapping. */
2163 rc = lli->lli_async_rc;
2164 lli->lli_async_rc = 0;
2165 err = lov_read_and_clear_async_rc(lli->lli_clob);
2169 /* The application has been told write failure already.
2170 * Do not report failure again. */
2171 if (fd->fd_write_failed)
2173 return rc ? -EIO : 0;
2177 * Called to make sure a portion of file has been written out.
2178 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2180 * Return how many pages have been written.
2182 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2183 enum cl_fsync_mode mode)
2185 struct cl_env_nest nest;
2188 struct obd_capa *capa = NULL;
2189 struct cl_fsync_io *fio;
2193 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2194 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2197 env = cl_env_nested_get(&nest);
2199 RETURN(PTR_ERR(env));
2201 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2203 io = ccc_env_thread_io(env);
2204 io->ci_obj = cl_i2info(inode)->lli_clob;
2205 io->ci_ignore_layout = 1;
2207 /* initialize parameters for sync */
2208 fio = &io->u.ci_fsync;
2209 fio->fi_capa = capa;
2210 fio->fi_start = start;
2212 fio->fi_fid = ll_inode2fid(inode);
2213 fio->fi_mode = mode;
2214 fio->fi_nr_written = 0;
2216 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2217 result = cl_io_loop(env, io);
2219 result = io->ci_result;
2221 result = fio->fi_nr_written;
2222 cl_io_fini(env, io);
2223 cl_env_nested_put(&nest, env);
2230 #ifdef HAVE_FILE_FSYNC_4ARGS
2231 int ll_fsync(struct file *file, loff_t start, loff_t end, int data)
2232 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2233 int ll_fsync(struct file *file, int data)
2235 int ll_fsync(struct file *file, struct dentry *dentry, int data)
2238 struct inode *inode = file->f_dentry->d_inode;
2239 struct ll_inode_info *lli = ll_i2info(inode);
2240 struct ptlrpc_request *req;
2241 struct obd_capa *oc;
2245 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2246 inode->i_generation, inode);
2247 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2249 #ifdef HAVE_FILE_FSYNC_4ARGS
2250 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2251 mutex_lock(&inode->i_mutex);
2253 /* fsync's caller has already called _fdata{sync,write}, we want
2254 * that IO to finish before calling the osc and mdc sync methods */
2255 rc = filemap_fdatawait(inode->i_mapping);
2258 /* catch async errors that were recorded back when async writeback
2259 * failed for pages in this mapping. */
2260 if (!S_ISDIR(inode->i_mode)) {
2261 err = lli->lli_async_rc;
2262 lli->lli_async_rc = 0;
2265 err = lov_read_and_clear_async_rc(lli->lli_clob);
2270 oc = ll_mdscapa_get(inode);
2271 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2277 ptlrpc_req_finished(req);
2280 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2282 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2284 if (rc == 0 && err < 0)
2287 fd->fd_write_failed = true;
2289 fd->fd_write_failed = false;
2292 #ifdef HAVE_FILE_FSYNC_4ARGS
2293 mutex_unlock(&inode->i_mutex);
2298 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2300 struct inode *inode = file->f_dentry->d_inode;
2301 struct ll_sb_info *sbi = ll_i2sbi(inode);
2302 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2303 .ei_cb_cp =ldlm_flock_completion_ast,
2304 .ei_cbdata = file_lock };
2305 struct md_op_data *op_data;
2306 struct lustre_handle lockh = {0};
2307 ldlm_policy_data_t flock = {{0}};
2313 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2314 inode->i_ino, file_lock);
2316 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2318 if (file_lock->fl_flags & FL_FLOCK) {
2319 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2320 /* flocks are whole-file locks */
2321 flock.l_flock.end = OFFSET_MAX;
2322 /* For flocks owner is determined by the local file desctiptor*/
2323 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2324 } else if (file_lock->fl_flags & FL_POSIX) {
2325 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2326 flock.l_flock.start = file_lock->fl_start;
2327 flock.l_flock.end = file_lock->fl_end;
2331 flock.l_flock.pid = file_lock->fl_pid;
2333 /* Somewhat ugly workaround for svc lockd.
2334 * lockd installs custom fl_lmops->lm_compare_owner that checks
2335 * for the fl_owner to be the same (which it always is on local node
2336 * I guess between lockd processes) and then compares pid.
2337 * As such we assign pid to the owner field to make it all work,
2338 * conflict with normal locks is unlikely since pid space and
2339 * pointer space for current->files are not intersecting */
2340 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2341 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2343 switch (file_lock->fl_type) {
2345 einfo.ei_mode = LCK_PR;
2348 /* An unlock request may or may not have any relation to
2349 * existing locks so we may not be able to pass a lock handle
2350 * via a normal ldlm_lock_cancel() request. The request may even
2351 * unlock a byte range in the middle of an existing lock. In
2352 * order to process an unlock request we need all of the same
2353 * information that is given with a normal read or write record
2354 * lock request. To avoid creating another ldlm unlock (cancel)
2355 * message we'll treat a LCK_NL flock request as an unlock. */
2356 einfo.ei_mode = LCK_NL;
2359 einfo.ei_mode = LCK_PW;
2362 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2363 file_lock->fl_type);
2378 flags = LDLM_FL_BLOCK_NOWAIT;
2384 flags = LDLM_FL_TEST_LOCK;
2385 /* Save the old mode so that if the mode in the lock changes we
2386 * can decrement the appropriate reader or writer refcount. */
2387 file_lock->fl_type = einfo.ei_mode;
2390 CERROR("unknown fcntl lock command: %d\n", cmd);
2394 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2395 LUSTRE_OPC_ANY, NULL);
2396 if (IS_ERR(op_data))
2397 RETURN(PTR_ERR(op_data));
2399 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2400 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2401 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2403 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2404 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2406 if ((file_lock->fl_flags & FL_FLOCK) &&
2407 (rc == 0 || file_lock->fl_type == F_UNLCK))
2408 rc2 = flock_lock_file_wait(file, file_lock);
2409 if ((file_lock->fl_flags & FL_POSIX) &&
2410 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2411 !(flags & LDLM_FL_TEST_LOCK))
2412 rc2 = posix_lock_file_wait(file, file_lock);
2414 if (rc2 && file_lock->fl_type != F_UNLCK) {
2415 einfo.ei_mode = LCK_NL;
2416 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2417 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2421 ll_finish_md_op_data(op_data);
2426 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2434 * test if some locks matching bits and l_req_mode are acquired
2435 * - bits can be in different locks
2436 * - if found clear the common lock bits in *bits
2437 * - the bits not found, are kept in *bits
2439 * \param bits [IN] searched lock bits [IN]
2440 * \param l_req_mode [IN] searched lock mode
2441 * \retval boolean, true iff all bits are found
2443 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2445 struct lustre_handle lockh;
2446 ldlm_policy_data_t policy;
2447 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2448 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2457 fid = &ll_i2info(inode)->lli_fid;
2458 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2459 ldlm_lockname[mode]);
2461 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2462 for (i = 0; i < MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2463 policy.l_inodebits.bits = *bits & (1 << i);
2464 if (policy.l_inodebits.bits == 0)
2467 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2468 &policy, mode, &lockh)) {
2469 struct ldlm_lock *lock;
2471 lock = ldlm_handle2lock(&lockh);
2474 ~(lock->l_policy_data.l_inodebits.bits);
2475 LDLM_LOCK_PUT(lock);
2477 *bits &= ~policy.l_inodebits.bits;
2484 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2485 struct lustre_handle *lockh, __u64 flags)
2487 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2492 fid = &ll_i2info(inode)->lli_fid;
2493 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2495 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
2496 fid, LDLM_IBITS, &policy,
2497 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2501 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
2503 /* Already unlinked. Just update nlink and return success */
2504 if (rc == -ENOENT) {
2506 /* This path cannot be hit for regular files unless in
2507 * case of obscure races, so no need to to validate
2509 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
2511 } else if (rc != 0) {
2512 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
2513 ll_get_fsname(inode->i_sb, NULL, 0),
2514 PFID(ll_inode2fid(inode)), rc);
2520 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2523 struct inode *inode = dentry->d_inode;
2524 struct ptlrpc_request *req = NULL;
2525 struct obd_export *exp;
2529 LASSERT(inode != NULL);
2531 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2532 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2534 exp = ll_i2mdexp(inode);
2536 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2537 * But under CMD case, it caused some lock issues, should be fixed
2538 * with new CMD ibits lock. See bug 12718 */
2539 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
2540 struct lookup_intent oit = { .it_op = IT_GETATTR };
2541 struct md_op_data *op_data;
2543 if (ibits == MDS_INODELOCK_LOOKUP)
2544 oit.it_op = IT_LOOKUP;
2546 /* Call getattr by fid, so do not provide name at all. */
2547 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2548 dentry->d_inode, NULL, 0, 0,
2549 LUSTRE_OPC_ANY, NULL);
2550 if (IS_ERR(op_data))
2551 RETURN(PTR_ERR(op_data));
2553 oit.it_create_mode |= M_CHECK_STALE;
2554 rc = md_intent_lock(exp, op_data, NULL, 0,
2555 /* we are not interested in name
2558 ll_md_blocking_ast, 0);
2559 ll_finish_md_op_data(op_data);
2560 oit.it_create_mode &= ~M_CHECK_STALE;
2562 rc = ll_inode_revalidate_fini(inode, rc);
2566 rc = ll_revalidate_it_finish(req, &oit, dentry);
2568 ll_intent_release(&oit);
2572 /* Unlinked? Unhash dentry, so it is not picked up later by
2573 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2574 here to preserve get_cwd functionality on 2.6.
2576 if (!dentry->d_inode->i_nlink)
2577 d_lustre_invalidate(dentry);
2579 ll_lookup_finish_locks(&oit, dentry);
2580 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
2581 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2582 obd_valid valid = OBD_MD_FLGETATTR;
2583 struct md_op_data *op_data;
2586 if (S_ISREG(inode->i_mode)) {
2587 rc = ll_get_max_mdsize(sbi, &ealen);
2590 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2593 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2594 0, ealen, LUSTRE_OPC_ANY,
2596 if (IS_ERR(op_data))
2597 RETURN(PTR_ERR(op_data));
2599 op_data->op_valid = valid;
2600 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2601 * capa for this inode. Because we only keep capas of dirs
2603 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2604 ll_finish_md_op_data(op_data);
2606 rc = ll_inode_revalidate_fini(inode, rc);
2610 rc = ll_prep_inode(&inode, req, NULL, NULL);
2613 ptlrpc_req_finished(req);
2617 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2620 struct inode *inode = dentry->d_inode;
2624 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2628 /* if object isn't regular file, don't validate size */
2629 if (!S_ISREG(inode->i_mode)) {
2630 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2631 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2632 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2634 rc = ll_glimpse_size(inode);
2639 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2640 struct lookup_intent *it, struct kstat *stat)
2642 struct inode *inode = de->d_inode;
2643 struct ll_sb_info *sbi = ll_i2sbi(inode);
2644 struct ll_inode_info *lli = ll_i2info(inode);
2647 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
2648 MDS_INODELOCK_LOOKUP);
2649 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
2654 stat->dev = inode->i_sb->s_dev;
2655 if (ll_need_32bit_api(sbi))
2656 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
2658 stat->ino = inode->i_ino;
2659 stat->mode = inode->i_mode;
2660 stat->nlink = inode->i_nlink;
2661 stat->uid = inode->i_uid;
2662 stat->gid = inode->i_gid;
2663 stat->rdev = inode->i_rdev;
2664 stat->atime = inode->i_atime;
2665 stat->mtime = inode->i_mtime;
2666 stat->ctime = inode->i_ctime;
2667 stat->blksize = 1 << inode->i_blkbits;
2669 stat->size = i_size_read(inode);
2670 stat->blocks = inode->i_blocks;
2674 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2676 struct lookup_intent it = { .it_op = IT_GETATTR };
2678 return ll_getattr_it(mnt, de, &it, stat);
2681 #ifdef HAVE_LINUX_FIEMAP_H
2682 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2683 __u64 start, __u64 len)
2687 struct ll_user_fiemap *fiemap;
2688 unsigned int extent_count = fieinfo->fi_extents_max;
2690 num_bytes = sizeof(*fiemap) + (extent_count *
2691 sizeof(struct ll_fiemap_extent));
2692 OBD_ALLOC_LARGE(fiemap, num_bytes);
2697 fiemap->fm_flags = fieinfo->fi_flags;
2698 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2699 fiemap->fm_start = start;
2700 fiemap->fm_length = len;
2701 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2702 sizeof(struct ll_fiemap_extent));
2704 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2706 fieinfo->fi_flags = fiemap->fm_flags;
2707 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2708 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2709 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2711 OBD_FREE_LARGE(fiemap, num_bytes);
2716 struct posix_acl * ll_get_acl(struct inode *inode, int type)
2718 struct ll_inode_info *lli = ll_i2info(inode);
2719 struct posix_acl *acl = NULL;
2722 spin_lock(&lli->lli_lock);
2723 /* VFS' acl_permission_check->check_acl will release the refcount */
2724 acl = posix_acl_dup(lli->lli_posix_acl);
2725 spin_unlock(&lli->lli_lock);
2730 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
2732 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
2733 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
2735 ll_check_acl(struct inode *inode, int mask)
2738 # ifdef CONFIG_FS_POSIX_ACL
2739 struct posix_acl *acl;
2743 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
2744 if (flags & IPERM_FLAG_RCU)
2747 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
2752 rc = posix_acl_permission(inode, acl, mask);
2753 posix_acl_release(acl);
2756 # else /* !CONFIG_FS_POSIX_ACL */
2758 # endif /* CONFIG_FS_POSIX_ACL */
2760 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
2762 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
2763 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
2765 # ifdef HAVE_INODE_PERMISION_2ARGS
2766 int ll_inode_permission(struct inode *inode, int mask)
2768 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2775 #ifdef MAY_NOT_BLOCK
2776 if (mask & MAY_NOT_BLOCK)
2778 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
2779 if (flags & IPERM_FLAG_RCU)
2783 /* as root inode are NOT getting validated in lookup operation,
2784 * need to do it before permission check. */
2786 if (inode == inode->i_sb->s_root->d_inode) {
2787 struct lookup_intent it = { .it_op = IT_LOOKUP };
2789 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2790 MDS_INODELOCK_LOOKUP);
2795 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2796 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2798 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2799 return lustre_check_remote_perm(inode, mask);
2801 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2802 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
2807 #ifdef HAVE_FILE_READV
2808 #define READ_METHOD readv
2809 #define READ_FUNCTION ll_file_readv
2810 #define WRITE_METHOD writev
2811 #define WRITE_FUNCTION ll_file_writev
2813 #define READ_METHOD aio_read
2814 #define READ_FUNCTION ll_file_aio_read
2815 #define WRITE_METHOD aio_write
2816 #define WRITE_FUNCTION ll_file_aio_write
2819 /* -o localflock - only provides locally consistent flock locks */
2820 struct file_operations ll_file_operations = {
2821 .read = ll_file_read,
2822 .READ_METHOD = READ_FUNCTION,
2823 .write = ll_file_write,
2824 .WRITE_METHOD = WRITE_FUNCTION,
2825 .unlocked_ioctl = ll_file_ioctl,
2826 .open = ll_file_open,
2827 .release = ll_file_release,
2828 .mmap = ll_file_mmap,
2829 .llseek = ll_file_seek,
2830 #ifdef HAVE_KERNEL_SENDFILE
2831 .sendfile = ll_file_sendfile,
2833 #ifdef HAVE_KERNEL_SPLICE_READ
2834 .splice_read = ll_file_splice_read,
2840 struct file_operations ll_file_operations_flock = {
2841 .read = ll_file_read,
2842 .READ_METHOD = READ_FUNCTION,
2843 .write = ll_file_write,
2844 .WRITE_METHOD = WRITE_FUNCTION,
2845 .unlocked_ioctl = ll_file_ioctl,
2846 .open = ll_file_open,
2847 .release = ll_file_release,
2848 .mmap = ll_file_mmap,
2849 .llseek = ll_file_seek,
2850 #ifdef HAVE_KERNEL_SENDFILE
2851 .sendfile = ll_file_sendfile,
2853 #ifdef HAVE_KERNEL_SPLICE_READ
2854 .splice_read = ll_file_splice_read,
2858 .flock = ll_file_flock,
2859 .lock = ll_file_flock
2862 /* These are for -o noflock - to return ENOSYS on flock calls */
2863 struct file_operations ll_file_operations_noflock = {
2864 .read = ll_file_read,
2865 .READ_METHOD = READ_FUNCTION,
2866 .write = ll_file_write,
2867 .WRITE_METHOD = WRITE_FUNCTION,
2868 .unlocked_ioctl = ll_file_ioctl,
2869 .open = ll_file_open,
2870 .release = ll_file_release,
2871 .mmap = ll_file_mmap,
2872 .llseek = ll_file_seek,
2873 #ifdef HAVE_KERNEL_SENDFILE
2874 .sendfile = ll_file_sendfile,
2876 #ifdef HAVE_KERNEL_SPLICE_READ
2877 .splice_read = ll_file_splice_read,
2881 .flock = ll_file_noflock,
2882 .lock = ll_file_noflock
2885 struct inode_operations ll_file_inode_operations = {
2886 .setattr = ll_setattr,
2887 .getattr = ll_getattr,
2888 .permission = ll_inode_permission,
2889 .setxattr = ll_setxattr,
2890 .getxattr = ll_getxattr,
2891 .listxattr = ll_listxattr,
2892 .removexattr = ll_removexattr,
2893 #ifdef HAVE_LINUX_FIEMAP_H
2894 .fiemap = ll_fiemap,
2896 #ifdef HAVE_IOP_GET_ACL
2897 .get_acl = ll_get_acl,
2901 /* dynamic ioctl number support routins */
2902 static struct llioc_ctl_data {
2903 struct rw_semaphore ioc_sem;
2904 cfs_list_t ioc_head;
2906 __RWSEM_INITIALIZER(llioc.ioc_sem),
2907 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2912 cfs_list_t iocd_list;
2913 unsigned int iocd_size;
2914 llioc_callback_t iocd_cb;
2915 unsigned int iocd_count;
2916 unsigned int iocd_cmd[0];
2919 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2922 struct llioc_data *in_data = NULL;
2925 if (cb == NULL || cmd == NULL ||
2926 count > LLIOC_MAX_CMD || count < 0)
2929 size = sizeof(*in_data) + count * sizeof(unsigned int);
2930 OBD_ALLOC(in_data, size);
2931 if (in_data == NULL)
2934 memset(in_data, 0, sizeof(*in_data));
2935 in_data->iocd_size = size;
2936 in_data->iocd_cb = cb;
2937 in_data->iocd_count = count;
2938 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2940 down_write(&llioc.ioc_sem);
2941 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2942 up_write(&llioc.ioc_sem);
2947 void ll_iocontrol_unregister(void *magic)
2949 struct llioc_data *tmp;
2954 down_write(&llioc.ioc_sem);
2955 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2957 unsigned int size = tmp->iocd_size;
2959 cfs_list_del(&tmp->iocd_list);
2960 up_write(&llioc.ioc_sem);
2962 OBD_FREE(tmp, size);
2966 up_write(&llioc.ioc_sem);
2968 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2971 EXPORT_SYMBOL(ll_iocontrol_register);
2972 EXPORT_SYMBOL(ll_iocontrol_unregister);
2974 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2975 unsigned int cmd, unsigned long arg, int *rcp)
2977 enum llioc_iter ret = LLIOC_CONT;
2978 struct llioc_data *data;
2979 int rc = -EINVAL, i;
2981 down_read(&llioc.ioc_sem);
2982 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2983 for (i = 0; i < data->iocd_count; i++) {
2984 if (cmd != data->iocd_cmd[i])
2987 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2991 if (ret == LLIOC_STOP)
2994 up_read(&llioc.ioc_sem);
3001 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3003 struct ll_inode_info *lli = ll_i2info(inode);
3004 struct cl_env_nest nest;
3009 if (lli->lli_clob == NULL)
3012 env = cl_env_nested_get(&nest);
3014 RETURN(PTR_ERR(env));
3016 result = cl_conf_set(env, lli->lli_clob, conf);
3017 cl_env_nested_put(&nest, env);
3019 if (conf->coc_opc == OBJECT_CONF_SET) {
3020 struct ldlm_lock *lock = conf->coc_lock;
3022 LASSERT(lock != NULL);
3023 LASSERT(ldlm_has_layout(lock));
3025 /* it can only be allowed to match after layout is
3026 * applied to inode otherwise false layout would be
3027 * seen. Applying layout shoud happen before dropping
3028 * the intent lock. */
3029 ldlm_lock_allow_match(lock);
3036 * Apply the layout to the inode. Layout lock is held and will be released
3039 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3040 struct inode *inode, __u32 *gen, bool reconf)
3042 struct ll_inode_info *lli = ll_i2info(inode);
3043 struct ll_sb_info *sbi = ll_i2sbi(inode);
3044 struct ldlm_lock *lock;
3045 struct lustre_md md = { NULL };
3046 struct cl_object_conf conf;
3051 LASSERT(lustre_handle_is_used(lockh));
3053 lock = ldlm_handle2lock(lockh);
3054 LASSERT(lock != NULL);
3055 LASSERT(ldlm_has_layout(lock));
3057 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3058 inode, PFID(&lli->lli_fid), reconf);
3060 lock_res_and_lock(lock);
3061 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3062 unlock_res_and_lock(lock);
3063 /* checking lvb_ready is racy but this is okay. The worst case is
3064 * that multi processes may configure the file on the same time. */
3065 if (lvb_ready || !reconf) {
3066 LDLM_LOCK_PUT(lock);
3070 /* layout_gen must be valid if layout lock is not
3071 * cancelled and stripe has already set */
3072 *gen = lli->lli_layout_gen;
3075 ldlm_lock_decref(lockh, mode);
3079 /* for layout lock, lmm is returned in lock's lvb.
3080 * lvb_data is immutable if the lock is held so it's safe to access it
3081 * without res lock. See the description in ldlm_lock_decref_internal()
3082 * for the condition to free lvb_data of layout lock */
3083 if (lock->l_lvb_data != NULL) {
3084 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3085 lock->l_lvb_data, lock->l_lvb_len);
3088 *gen = md.lsm->lsm_layout_gen;
3091 CERROR("%s: file "DFID" unpackmd error: %d\n",
3092 ll_get_fsname(inode->i_sb, NULL, 0),
3093 PFID(&lli->lli_fid), rc);
3097 LDLM_LOCK_PUT(lock);
3098 ldlm_lock_decref(lockh, mode);
3102 /* set layout to file. Unlikely this will fail as old layout was
3103 * surely eliminated */
3104 memset(&conf, 0, sizeof conf);
3105 conf.coc_opc = OBJECT_CONF_SET;
3106 conf.coc_inode = inode;
3107 conf.coc_lock = lock;
3108 conf.u.coc_md = &md;
3109 rc = ll_layout_conf(inode, &conf);
3110 LDLM_LOCK_PUT(lock);
3112 ldlm_lock_decref(lockh, mode);
3115 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3117 /* wait for IO to complete if it's still being used. */
3119 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3120 ll_get_fsname(inode->i_sb, NULL, 0),
3121 inode, PFID(&lli->lli_fid));
3123 memset(&conf, 0, sizeof conf);
3124 conf.coc_opc = OBJECT_CONF_WAIT;
3125 conf.coc_inode = inode;
3126 rc = ll_layout_conf(inode, &conf);
3130 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3131 PFID(&lli->lli_fid), rc);
3138 * This function checks if there exists a LAYOUT lock on the client side,
3139 * or enqueues it if it doesn't have one in cache.
3141 * This function will not hold layout lock so it may be revoked any time after
3142 * this function returns. Any operations depend on layout should be redone
3145 * This function should be called before lov_io_init() to get an uptodate
3146 * layout version, the caller should save the version number and after IO
3147 * is finished, this function should be called again to verify that layout
3148 * is not changed during IO time.
3150 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3152 struct ll_inode_info *lli = ll_i2info(inode);
3153 struct ll_sb_info *sbi = ll_i2sbi(inode);
3154 struct md_op_data *op_data;
3155 struct lookup_intent it;
3156 struct lustre_handle lockh;
3158 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS,
3160 .ei_cb_bl = ll_md_blocking_ast,
3161 .ei_cb_cp = ldlm_completion_ast,
3162 .ei_cbdata = inode };
3166 *gen = LL_LAYOUT_GEN_ZERO;
3167 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3171 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3172 LASSERT(S_ISREG(inode->i_mode));
3174 /* mostly layout lock is caching on the local side, so try to match
3175 * it before grabbing layout lock mutex. */
3176 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0);
3177 if (mode != 0) { /* hit cached lock */
3178 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3182 /* better hold lli_layout_mutex to try again otherwise
3183 * it will have starvation problem. */
3186 /* take layout lock mutex to enqueue layout lock exclusively. */
3187 mutex_lock(&lli->lli_layout_mutex);
3190 /* try again. Maybe somebody else has done this. */
3191 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0);
3192 if (mode != 0) { /* hit cached lock */
3193 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3197 mutex_unlock(&lli->lli_layout_mutex);
3201 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3202 0, 0, LUSTRE_OPC_ANY, NULL);
3203 if (IS_ERR(op_data)) {
3204 mutex_unlock(&lli->lli_layout_mutex);
3205 RETURN(PTR_ERR(op_data));
3208 /* have to enqueue one */
3209 memset(&it, 0, sizeof(it));
3210 it.it_op = IT_LAYOUT;
3211 lockh.cookie = 0ULL;
3213 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3214 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3215 PFID(&lli->lli_fid));
3217 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3219 if (it.d.lustre.it_data != NULL)
3220 ptlrpc_req_finished(it.d.lustre.it_data);
3221 it.d.lustre.it_data = NULL;
3223 ll_finish_md_op_data(op_data);
3225 mode = it.d.lustre.it_lock_mode;
3226 it.d.lustre.it_lock_mode = 0;
3227 ll_intent_drop_lock(&it);
3230 /* set lock data in case this is a new lock */
3231 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3232 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3236 mutex_unlock(&lli->lli_layout_mutex);