4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
58 fd->fd_write_failed = false;
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
85 if (LLIF_DATA_MODIFIED & ll_i2info(inode)->lli_flags)
86 op_data->op_bias |= MDS_DATA_MODIFIED;
90 * Closes the IO epoch and packs all the attributes into @op_data for
93 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
94 struct obd_client_handle *och)
98 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
99 ATTR_MTIME_SET | ATTR_CTIME_SET;
101 if (!(och->och_flags & FMODE_WRITE))
104 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
105 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
107 ll_ioepoch_close(inode, op_data, &och, 0);
110 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
111 ll_prep_md_op_data(op_data, inode, NULL, NULL,
112 0, 0, LUSTRE_OPC_ANY, NULL);
116 static int ll_close_inode_openhandle(struct obd_export *md_exp,
118 struct obd_client_handle *och)
120 struct obd_export *exp = ll_i2mdexp(inode);
121 struct md_op_data *op_data;
122 struct ptlrpc_request *req = NULL;
123 struct obd_device *obd = class_exp2obd(exp);
130 * XXX: in case of LMV, is this correct to access
133 CERROR("Invalid MDC connection handle "LPX64"\n",
134 ll_i2mdexp(inode)->exp_handle.h_cookie);
138 OBD_ALLOC_PTR(op_data);
140 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
142 ll_prepare_close(inode, op_data, och);
143 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
144 rc = md_close(md_exp, op_data, och->och_mod, &req);
146 /* This close must have the epoch closed. */
147 LASSERT(epoch_close);
148 /* MDS has instructed us to obtain Size-on-MDS attribute from
149 * OSTs and send setattr to back to MDS. */
150 rc = ll_som_update(inode, op_data);
152 CERROR("inode %lu mdc Size-on-MDS update failed: "
153 "rc = %d\n", inode->i_ino, rc);
157 CERROR("inode %lu mdc close failed: rc = %d\n",
161 /* DATA_MODIFIED flag was successfully sent on close, cancel data
162 * modification flag. */
163 if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
164 struct ll_inode_info *lli = ll_i2info(inode);
166 spin_lock(&lli->lli_lock);
167 lli->lli_flags &= ~LLIF_DATA_MODIFIED;
168 spin_unlock(&lli->lli_lock);
171 ll_finish_md_op_data(op_data);
174 rc = ll_objects_destroy(req, inode);
176 CERROR("inode %lu ll_objects destroy: rc = %d\n",
183 if (exp_connect_som(exp) && !epoch_close &&
184 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
185 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
187 md_clear_open_replay_data(md_exp, och);
188 /* Free @och if it is not waiting for DONE_WRITING. */
189 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
192 if (req) /* This is close request */
193 ptlrpc_req_finished(req);
197 int ll_md_real_close(struct inode *inode, int flags)
199 struct ll_inode_info *lli = ll_i2info(inode);
200 struct obd_client_handle **och_p;
201 struct obd_client_handle *och;
206 if (flags & FMODE_WRITE) {
207 och_p = &lli->lli_mds_write_och;
208 och_usecount = &lli->lli_open_fd_write_count;
209 } else if (flags & FMODE_EXEC) {
210 och_p = &lli->lli_mds_exec_och;
211 och_usecount = &lli->lli_open_fd_exec_count;
213 LASSERT(flags & FMODE_READ);
214 och_p = &lli->lli_mds_read_och;
215 och_usecount = &lli->lli_open_fd_read_count;
218 mutex_lock(&lli->lli_och_mutex);
219 if (*och_usecount) { /* There are still users of this handle, so
221 mutex_unlock(&lli->lli_och_mutex);
226 mutex_unlock(&lli->lli_och_mutex);
228 if (och) { /* There might be a race and somebody have freed this och
230 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
237 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
240 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
241 struct ll_inode_info *lli = ll_i2info(inode);
245 /* clear group lock, if present */
246 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
247 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
249 /* Let's see if we have good enough OPEN lock on the file and if
250 we can skip talking to MDS */
251 if (file->f_dentry->d_inode) { /* Can this ever be false? */
253 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
254 struct lustre_handle lockh;
255 struct inode *inode = file->f_dentry->d_inode;
256 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
258 mutex_lock(&lli->lli_och_mutex);
259 if (fd->fd_omode & FMODE_WRITE) {
261 LASSERT(lli->lli_open_fd_write_count);
262 lli->lli_open_fd_write_count--;
263 } else if (fd->fd_omode & FMODE_EXEC) {
265 LASSERT(lli->lli_open_fd_exec_count);
266 lli->lli_open_fd_exec_count--;
269 LASSERT(lli->lli_open_fd_read_count);
270 lli->lli_open_fd_read_count--;
272 mutex_unlock(&lli->lli_och_mutex);
274 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
275 LDLM_IBITS, &policy, lockmode,
277 rc = ll_md_real_close(file->f_dentry->d_inode,
281 CERROR("Releasing a file %p with negative dentry %p. Name %s",
282 file, file->f_dentry, file->f_dentry->d_name.name);
285 LUSTRE_FPRIVATE(file) = NULL;
286 ll_file_data_put(fd);
287 ll_capa_close(inode);
292 /* While this returns an error code, fput() the caller does not, so we need
293 * to make every effort to clean up all of our state here. Also, applications
294 * rarely check close errors and even if an error is returned they will not
295 * re-try the close call.
297 int ll_file_release(struct inode *inode, struct file *file)
299 struct ll_file_data *fd;
300 struct ll_sb_info *sbi = ll_i2sbi(inode);
301 struct ll_inode_info *lli = ll_i2info(inode);
305 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
306 inode->i_generation, inode);
308 #ifdef CONFIG_FS_POSIX_ACL
309 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
310 inode == inode->i_sb->s_root->d_inode) {
311 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
314 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
315 fd->fd_flags &= ~LL_FILE_RMTACL;
316 rct_del(&sbi->ll_rct, cfs_curproc_pid());
317 et_search_free(&sbi->ll_et, cfs_curproc_pid());
322 if (inode->i_sb->s_root != file->f_dentry)
323 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
324 fd = LUSTRE_FPRIVATE(file);
327 /* The last ref on @file, maybe not the the owner pid of statahead.
328 * Different processes can open the same dir, "ll_opendir_key" means:
329 * it is me that should stop the statahead thread. */
330 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
331 lli->lli_opendir_pid != 0)
332 ll_stop_statahead(inode, lli->lli_opendir_key);
334 if (inode->i_sb->s_root == file->f_dentry) {
335 LUSTRE_FPRIVATE(file) = NULL;
336 ll_file_data_put(fd);
340 if (!S_ISDIR(inode->i_mode)) {
341 lov_read_and_clear_async_rc(lli->lli_clob);
342 lli->lli_async_rc = 0;
345 rc = ll_md_close(sbi->ll_md_exp, inode, file);
347 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
348 libcfs_debug_dumplog();
353 static int ll_intent_file_open(struct file *file, void *lmm,
354 int lmmsize, struct lookup_intent *itp)
356 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
357 struct dentry *parent = file->f_dentry->d_parent;
358 const char *name = file->f_dentry->d_name.name;
359 const int len = file->f_dentry->d_name.len;
360 struct md_op_data *op_data;
361 struct ptlrpc_request *req;
362 __u32 opc = LUSTRE_OPC_ANY;
369 /* Usually we come here only for NFSD, and we want open lock.
370 But we can also get here with pre 2.6.15 patchless kernels, and in
371 that case that lock is also ok */
372 /* We can also get here if there was cached open handle in revalidate_it
373 * but it disappeared while we were getting from there to ll_file_open.
374 * But this means this file was closed and immediatelly opened which
375 * makes a good candidate for using OPEN lock */
376 /* If lmmsize & lmm are not 0, we are just setting stripe info
377 * parameters. No need for the open lock */
378 if (lmm == NULL && lmmsize == 0) {
379 itp->it_flags |= MDS_OPEN_LOCK;
380 if (itp->it_flags & FMODE_WRITE)
381 opc = LUSTRE_OPC_CREATE;
384 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
385 file->f_dentry->d_inode, name, len,
388 RETURN(PTR_ERR(op_data));
390 itp->it_flags |= MDS_OPEN_BY_FID;
391 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
392 0 /*unused */, &req, ll_md_blocking_ast, 0);
393 ll_finish_md_op_data(op_data);
395 /* reason for keep own exit path - don`t flood log
396 * with messages with -ESTALE errors.
398 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
399 it_open_error(DISP_OPEN_OPEN, itp))
401 ll_release_openhandle(file->f_dentry, itp);
405 if (it_disposition(itp, DISP_LOOKUP_NEG))
406 GOTO(out, rc = -ENOENT);
408 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
409 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
410 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
414 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL, itp);
415 if (!rc && itp->d.lustre.it_lock_mode)
416 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
420 ptlrpc_req_finished(itp->d.lustre.it_data);
421 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
422 ll_intent_drop_lock(itp);
428 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
429 * not believe attributes if a few ioepoch holders exist. Attributes for
430 * previous ioepoch if new one is opened are also skipped by MDS.
432 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
434 if (ioepoch && lli->lli_ioepoch != ioepoch) {
435 lli->lli_ioepoch = ioepoch;
436 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
437 ioepoch, PFID(&lli->lli_fid));
441 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
442 struct lookup_intent *it, struct obd_client_handle *och)
444 struct ptlrpc_request *req = it->d.lustre.it_data;
445 struct mdt_body *body;
449 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
450 LASSERT(body != NULL); /* reply already checked out */
452 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
453 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
454 och->och_fid = lli->lli_fid;
455 och->och_flags = it->it_flags;
456 ll_ioepoch_open(lli, body->ioepoch);
458 return md_set_open_replay_data(md_exp, och, req);
461 int ll_local_open(struct file *file, struct lookup_intent *it,
462 struct ll_file_data *fd, struct obd_client_handle *och)
464 struct inode *inode = file->f_dentry->d_inode;
465 struct ll_inode_info *lli = ll_i2info(inode);
468 LASSERT(!LUSTRE_FPRIVATE(file));
473 struct ptlrpc_request *req = it->d.lustre.it_data;
474 struct mdt_body *body;
477 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
481 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
482 if ((it->it_flags & FMODE_WRITE) &&
483 (body->valid & OBD_MD_FLSIZE))
484 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
485 lli->lli_ioepoch, PFID(&lli->lli_fid));
488 LUSTRE_FPRIVATE(file) = fd;
489 ll_readahead_init(inode, &fd->fd_ras);
490 fd->fd_omode = it->it_flags;
494 /* Open a file, and (for the very first open) create objects on the OSTs at
495 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
496 * creation or open until ll_lov_setstripe() ioctl is called.
498 * If we already have the stripe MD locally then we don't request it in
499 * md_open(), by passing a lmm_size = 0.
501 * It is up to the application to ensure no other processes open this file
502 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
503 * used. We might be able to avoid races of that sort by getting lli_open_sem
504 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
505 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
507 int ll_file_open(struct inode *inode, struct file *file)
509 struct ll_inode_info *lli = ll_i2info(inode);
510 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
511 .it_flags = file->f_flags };
512 struct obd_client_handle **och_p = NULL;
513 __u64 *och_usecount = NULL;
514 struct ll_file_data *fd;
515 int rc = 0, opendir_set = 0;
518 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
519 inode->i_generation, inode, file->f_flags);
521 it = file->private_data; /* XXX: compat macro */
522 file->private_data = NULL; /* prevent ll_local_open assertion */
524 fd = ll_file_data_get();
526 GOTO(out_och_free, rc = -ENOMEM);
529 if (S_ISDIR(inode->i_mode)) {
530 spin_lock(&lli->lli_sa_lock);
531 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
532 lli->lli_opendir_pid == 0) {
533 lli->lli_opendir_key = fd;
534 lli->lli_opendir_pid = cfs_curproc_pid();
537 spin_unlock(&lli->lli_sa_lock);
540 if (inode->i_sb->s_root == file->f_dentry) {
541 LUSTRE_FPRIVATE(file) = fd;
545 if (!it || !it->d.lustre.it_disposition) {
546 /* Convert f_flags into access mode. We cannot use file->f_mode,
547 * because everything but O_ACCMODE mask was stripped from
549 if ((oit.it_flags + 1) & O_ACCMODE)
551 if (file->f_flags & O_TRUNC)
552 oit.it_flags |= FMODE_WRITE;
554 /* kernel only call f_op->open in dentry_open. filp_open calls
555 * dentry_open after call to open_namei that checks permissions.
556 * Only nfsd_open call dentry_open directly without checking
557 * permissions and because of that this code below is safe. */
558 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
559 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
561 /* We do not want O_EXCL here, presumably we opened the file
562 * already? XXX - NFS implications? */
563 oit.it_flags &= ~O_EXCL;
565 /* bug20584, if "it_flags" contains O_CREAT, the file will be
566 * created if necessary, then "IT_CREAT" should be set to keep
567 * consistent with it */
568 if (oit.it_flags & O_CREAT)
569 oit.it_op |= IT_CREAT;
575 /* Let's see if we have file open on MDS already. */
576 if (it->it_flags & FMODE_WRITE) {
577 och_p = &lli->lli_mds_write_och;
578 och_usecount = &lli->lli_open_fd_write_count;
579 } else if (it->it_flags & FMODE_EXEC) {
580 och_p = &lli->lli_mds_exec_och;
581 och_usecount = &lli->lli_open_fd_exec_count;
583 och_p = &lli->lli_mds_read_och;
584 och_usecount = &lli->lli_open_fd_read_count;
587 mutex_lock(&lli->lli_och_mutex);
588 if (*och_p) { /* Open handle is present */
589 if (it_disposition(it, DISP_OPEN_OPEN)) {
590 /* Well, there's extra open request that we do not need,
591 let's close it somehow. This will decref request. */
592 rc = it_open_error(DISP_OPEN_OPEN, it);
594 mutex_unlock(&lli->lli_och_mutex);
595 GOTO(out_openerr, rc);
598 ll_release_openhandle(file->f_dentry, it);
602 rc = ll_local_open(file, it, fd, NULL);
605 mutex_unlock(&lli->lli_och_mutex);
606 GOTO(out_openerr, rc);
609 LASSERT(*och_usecount == 0);
610 if (!it->d.lustre.it_disposition) {
611 /* We cannot just request lock handle now, new ELC code
612 means that one of other OPEN locks for this file
613 could be cancelled, and since blocking ast handler
614 would attempt to grab och_mutex as well, that would
615 result in a deadlock */
616 mutex_unlock(&lli->lli_och_mutex);
617 it->it_create_mode |= M_CHECK_STALE;
618 rc = ll_intent_file_open(file, NULL, 0, it);
619 it->it_create_mode &= ~M_CHECK_STALE;
621 GOTO(out_openerr, rc);
625 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
627 GOTO(out_och_free, rc = -ENOMEM);
631 /* md_intent_lock() didn't get a request ref if there was an
632 * open error, so don't do cleanup on the request here
634 /* XXX (green): Should not we bail out on any error here, not
635 * just open error? */
636 rc = it_open_error(DISP_OPEN_OPEN, it);
638 GOTO(out_och_free, rc);
640 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
642 rc = ll_local_open(file, it, fd, *och_p);
644 GOTO(out_och_free, rc);
646 mutex_unlock(&lli->lli_och_mutex);
649 /* Must do this outside lli_och_mutex lock to prevent deadlock where
650 different kind of OPEN lock for this same inode gets cancelled
651 by ldlm_cancel_lru */
652 if (!S_ISREG(inode->i_mode))
653 GOTO(out_och_free, rc);
657 if (!lli->lli_has_smd) {
658 if (file->f_flags & O_LOV_DELAY_CREATE ||
659 !(file->f_mode & FMODE_WRITE)) {
660 CDEBUG(D_INODE, "object creation was delayed\n");
661 GOTO(out_och_free, rc);
664 file->f_flags &= ~O_LOV_DELAY_CREATE;
665 GOTO(out_och_free, rc);
669 if (och_p && *och_p) {
670 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
671 *och_p = NULL; /* OBD_FREE writes some magic there */
674 mutex_unlock(&lli->lli_och_mutex);
677 if (opendir_set != 0)
678 ll_stop_statahead(inode, lli->lli_opendir_key);
680 ll_file_data_put(fd);
682 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
685 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
686 ptlrpc_req_finished(it->d.lustre.it_data);
687 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
693 /* Fills the obdo with the attributes for the lsm */
694 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
695 struct obd_capa *capa, struct obdo *obdo,
696 __u64 ioepoch, int sync)
698 struct ptlrpc_request_set *set;
699 struct obd_info oinfo = { { { 0 } } };
704 LASSERT(lsm != NULL);
708 oinfo.oi_oa->o_id = lsm->lsm_object_id;
709 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
710 oinfo.oi_oa->o_mode = S_IFREG;
711 oinfo.oi_oa->o_ioepoch = ioepoch;
712 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
713 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
714 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
715 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
716 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
717 OBD_MD_FLDATAVERSION;
718 oinfo.oi_capa = capa;
720 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
721 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
724 set = ptlrpc_prep_set();
726 CERROR("can't allocate ptlrpc set\n");
729 rc = obd_getattr_async(exp, &oinfo, set);
731 rc = ptlrpc_set_wait(set);
732 ptlrpc_set_destroy(set);
735 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
736 OBD_MD_FLATIME | OBD_MD_FLMTIME |
737 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
738 OBD_MD_FLDATAVERSION);
743 * Performs the getattr on the inode and updates its fields.
744 * If @sync != 0, perform the getattr under the server-side lock.
746 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
747 __u64 ioepoch, int sync)
749 struct obd_capa *capa = ll_mdscapa_get(inode);
750 struct lov_stripe_md *lsm;
754 lsm = ccc_inode_lsm_get(inode);
755 rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode),
756 capa, obdo, ioepoch, sync);
759 obdo_refresh_inode(inode, obdo, obdo->o_valid);
761 "objid "LPX64" size %llu, blocks %llu, blksize %lu\n",
762 lsm ? lsm->lsm_object_id : 0, i_size_read(inode),
763 (unsigned long long)inode->i_blocks,
764 (unsigned long)ll_inode_blksize(inode));
766 ccc_inode_lsm_put(inode, lsm);
770 int ll_merge_lvb(const struct lu_env *env, struct inode *inode)
772 struct ll_inode_info *lli = ll_i2info(inode);
773 struct cl_object *obj = lli->lli_clob;
774 struct cl_attr *attr = ccc_env_thread_attr(env);
780 ll_inode_size_lock(inode);
781 /* merge timestamps the most recently obtained from mds with
782 timestamps obtained from osts */
783 LTIME_S(inode->i_atime) = lli->lli_lvb.lvb_atime;
784 LTIME_S(inode->i_mtime) = lli->lli_lvb.lvb_mtime;
785 LTIME_S(inode->i_ctime) = lli->lli_lvb.lvb_ctime;
786 inode_init_lvb(inode, &lvb);
788 cl_object_attr_lock(obj);
789 rc = cl_object_attr_get(env, obj, attr);
790 cl_object_attr_unlock(obj);
793 if (lvb.lvb_atime < attr->cat_atime)
794 lvb.lvb_atime = attr->cat_atime;
795 if (lvb.lvb_ctime < attr->cat_ctime)
796 lvb.lvb_ctime = attr->cat_ctime;
797 if (lvb.lvb_mtime < attr->cat_mtime)
798 lvb.lvb_mtime = attr->cat_mtime;
800 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
801 PFID(&lli->lli_fid), attr->cat_size);
802 cl_isize_write_nolock(inode, attr->cat_size);
804 inode->i_blocks = attr->cat_blocks;
806 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
807 LTIME_S(inode->i_atime) = lvb.lvb_atime;
808 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
810 ll_inode_size_unlock(inode);
815 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
818 struct obdo obdo = { 0 };
821 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
823 st->st_size = obdo.o_size;
824 st->st_blocks = obdo.o_blocks;
825 st->st_mtime = obdo.o_mtime;
826 st->st_atime = obdo.o_atime;
827 st->st_ctime = obdo.o_ctime;
832 void ll_io_init(struct cl_io *io, const struct file *file, int write)
834 struct inode *inode = file->f_dentry->d_inode;
836 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
838 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
839 io->u.ci_wr.wr_sync = file->f_flags & O_SYNC ||
840 file->f_flags & O_DIRECT ||
843 io->ci_obj = ll_i2info(inode)->lli_clob;
844 io->ci_lockreq = CILR_MAYBE;
845 if (ll_file_nolock(file)) {
846 io->ci_lockreq = CILR_NEVER;
847 io->ci_no_srvlock = 1;
848 } else if (file->f_flags & O_APPEND) {
849 io->ci_lockreq = CILR_MANDATORY;
854 ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
855 struct file *file, enum cl_io_type iot,
856 loff_t *ppos, size_t count)
858 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
859 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
864 io = ccc_env_thread_io(env);
865 ll_io_init(io, file, iot == CIT_WRITE);
867 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
868 struct vvp_io *vio = vvp_env_io(env);
869 struct ccc_io *cio = ccc_env_io(env);
870 int write_mutex_locked = 0;
872 cio->cui_fd = LUSTRE_FPRIVATE(file);
873 vio->cui_io_subtype = args->via_io_subtype;
875 switch (vio->cui_io_subtype) {
877 cio->cui_iov = args->u.normal.via_iov;
878 cio->cui_nrsegs = args->u.normal.via_nrsegs;
879 cio->cui_tot_nrsegs = cio->cui_nrsegs;
880 #ifndef HAVE_FILE_WRITEV
881 cio->cui_iocb = args->u.normal.via_iocb;
883 if ((iot == CIT_WRITE) &&
884 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
885 if (mutex_lock_interruptible(&lli->
887 GOTO(out, result = -ERESTARTSYS);
888 write_mutex_locked = 1;
889 } else if (iot == CIT_READ) {
890 down_read(&lli->lli_trunc_sem);
894 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
895 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
898 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
899 vio->u.splice.cui_flags = args->u.splice.via_flags;
902 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
905 result = cl_io_loop(env, io);
906 if (write_mutex_locked)
907 mutex_unlock(&lli->lli_write_mutex);
908 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
909 up_read(&lli->lli_trunc_sem);
911 /* cl_io_rw_init() handled IO */
912 result = io->ci_result;
915 if (io->ci_nob > 0) {
917 *ppos = io->u.ci_wr.wr.crw_pos;
923 if (iot == CIT_READ) {
925 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
926 LPROC_LL_READ_BYTES, result);
927 } else if (iot == CIT_WRITE) {
929 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
930 LPROC_LL_WRITE_BYTES, result);
931 fd->fd_write_failed = false;
933 fd->fd_write_failed = true;
942 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
944 static int ll_file_get_iov_count(const struct iovec *iov,
945 unsigned long *nr_segs, size_t *count)
950 for (seg = 0; seg < *nr_segs; seg++) {
951 const struct iovec *iv = &iov[seg];
954 * If any segment has a negative length, or the cumulative
955 * length ever wraps negative then return -EINVAL.
958 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
960 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
965 cnt -= iv->iov_len; /* This segment is no good */
972 #ifdef HAVE_FILE_READV
973 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
974 unsigned long nr_segs, loff_t *ppos)
977 struct vvp_io_args *args;
983 result = ll_file_get_iov_count(iov, &nr_segs, &count);
987 env = cl_env_get(&refcheck);
989 RETURN(PTR_ERR(env));
991 args = vvp_env_args(env, IO_NORMAL);
992 args->u.normal.via_iov = (struct iovec *)iov;
993 args->u.normal.via_nrsegs = nr_segs;
995 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
996 cl_env_put(env, &refcheck);
1000 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1004 struct iovec *local_iov;
1009 env = cl_env_get(&refcheck);
1011 RETURN(PTR_ERR(env));
1013 local_iov = &vvp_env_info(env)->vti_local_iov;
1014 local_iov->iov_base = (void __user *)buf;
1015 local_iov->iov_len = count;
1016 result = ll_file_readv(file, local_iov, 1, ppos);
1017 cl_env_put(env, &refcheck);
1022 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1023 unsigned long nr_segs, loff_t pos)
1026 struct vvp_io_args *args;
1032 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1036 env = cl_env_get(&refcheck);
1038 RETURN(PTR_ERR(env));
1040 args = vvp_env_args(env, IO_NORMAL);
1041 args->u.normal.via_iov = (struct iovec *)iov;
1042 args->u.normal.via_nrsegs = nr_segs;
1043 args->u.normal.via_iocb = iocb;
1045 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1046 &iocb->ki_pos, count);
1047 cl_env_put(env, &refcheck);
1051 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1055 struct iovec *local_iov;
1056 struct kiocb *kiocb;
1061 env = cl_env_get(&refcheck);
1063 RETURN(PTR_ERR(env));
1065 local_iov = &vvp_env_info(env)->vti_local_iov;
1066 kiocb = &vvp_env_info(env)->vti_kiocb;
1067 local_iov->iov_base = (void __user *)buf;
1068 local_iov->iov_len = count;
1069 init_sync_kiocb(kiocb, file);
1070 kiocb->ki_pos = *ppos;
1071 kiocb->ki_left = count;
1073 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1074 *ppos = kiocb->ki_pos;
1076 cl_env_put(env, &refcheck);
1082 * Write to a file (through the page cache).
1084 #ifdef HAVE_FILE_WRITEV
1085 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1086 unsigned long nr_segs, loff_t *ppos)
1089 struct vvp_io_args *args;
1095 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1099 env = cl_env_get(&refcheck);
1101 RETURN(PTR_ERR(env));
1103 args = vvp_env_args(env, IO_NORMAL);
1104 args->u.normal.via_iov = (struct iovec *)iov;
1105 args->u.normal.via_nrsegs = nr_segs;
1107 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1108 cl_env_put(env, &refcheck);
1112 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1116 struct iovec *local_iov;
1121 env = cl_env_get(&refcheck);
1123 RETURN(PTR_ERR(env));
1125 local_iov = &vvp_env_info(env)->vti_local_iov;
1126 local_iov->iov_base = (void __user *)buf;
1127 local_iov->iov_len = count;
1129 result = ll_file_writev(file, local_iov, 1, ppos);
1130 cl_env_put(env, &refcheck);
1134 #else /* AIO stuff */
1135 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1136 unsigned long nr_segs, loff_t pos)
1139 struct vvp_io_args *args;
1145 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1149 env = cl_env_get(&refcheck);
1151 RETURN(PTR_ERR(env));
1153 args = vvp_env_args(env, IO_NORMAL);
1154 args->u.normal.via_iov = (struct iovec *)iov;
1155 args->u.normal.via_nrsegs = nr_segs;
1156 args->u.normal.via_iocb = iocb;
1158 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1159 &iocb->ki_pos, count);
1160 cl_env_put(env, &refcheck);
1164 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1168 struct iovec *local_iov;
1169 struct kiocb *kiocb;
1174 env = cl_env_get(&refcheck);
1176 RETURN(PTR_ERR(env));
1178 local_iov = &vvp_env_info(env)->vti_local_iov;
1179 kiocb = &vvp_env_info(env)->vti_kiocb;
1180 local_iov->iov_base = (void __user *)buf;
1181 local_iov->iov_len = count;
1182 init_sync_kiocb(kiocb, file);
1183 kiocb->ki_pos = *ppos;
1184 kiocb->ki_left = count;
1186 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1187 *ppos = kiocb->ki_pos;
1189 cl_env_put(env, &refcheck);
1195 #ifdef HAVE_KERNEL_SENDFILE
1197 * Send file content (through pagecache) somewhere with helper
1199 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1200 read_actor_t actor, void *target)
1203 struct vvp_io_args *args;
1208 env = cl_env_get(&refcheck);
1210 RETURN(PTR_ERR(env));
1212 args = vvp_env_args(env, IO_SENDFILE);
1213 args->u.sendfile.via_target = target;
1214 args->u.sendfile.via_actor = actor;
1216 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1217 cl_env_put(env, &refcheck);
1222 #ifdef HAVE_KERNEL_SPLICE_READ
1224 * Send file content (through pagecache) somewhere with helper
1226 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1227 struct pipe_inode_info *pipe, size_t count,
1231 struct vvp_io_args *args;
1236 env = cl_env_get(&refcheck);
1238 RETURN(PTR_ERR(env));
1240 args = vvp_env_args(env, IO_SPLICE);
1241 args->u.splice.via_pipe = pipe;
1242 args->u.splice.via_flags = flags;
1244 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1245 cl_env_put(env, &refcheck);
1250 static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
1253 struct obd_export *exp = ll_i2dtexp(inode);
1254 struct obd_trans_info oti = { 0 };
1255 struct obdo *oa = NULL;
1258 struct lov_stripe_md *lsm = NULL, *lsm2;
1265 lsm = ccc_inode_lsm_get(inode);
1267 GOTO(out, rc = -ENOENT);
1269 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1270 (lsm->lsm_stripe_count));
1272 OBD_ALLOC_LARGE(lsm2, lsm_size);
1274 GOTO(out, rc = -ENOMEM);
1278 oa->o_nlink = ost_idx;
1279 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1280 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1281 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1282 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1283 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1284 memcpy(lsm2, lsm, lsm_size);
1285 ll_inode_size_lock(inode);
1286 rc = obd_create(NULL, exp, oa, &lsm2, &oti);
1287 ll_inode_size_unlock(inode);
1289 OBD_FREE_LARGE(lsm2, lsm_size);
1292 ccc_inode_lsm_put(inode, lsm);
1297 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1299 struct ll_recreate_obj ucreat;
1302 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1305 if (copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1309 RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
1310 ucreat.lrc_ost_idx));
1313 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1320 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1323 if (copy_from_user(&fid, (struct lu_fid *)arg, sizeof(fid)))
1326 id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
1327 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1328 RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
1331 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1332 int flags, struct lov_user_md *lum, int lum_size)
1334 struct lov_stripe_md *lsm = NULL;
1335 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1339 lsm = ccc_inode_lsm_get(inode);
1341 ccc_inode_lsm_put(inode, lsm);
1342 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1347 ll_inode_size_lock(inode);
1348 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1351 rc = oit.d.lustre.it_status;
1353 GOTO(out_req_free, rc);
1355 ll_release_openhandle(file->f_dentry, &oit);
1358 ll_inode_size_unlock(inode);
1359 ll_intent_release(&oit);
1360 ccc_inode_lsm_put(inode, lsm);
1363 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1367 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1368 struct lov_mds_md **lmmp, int *lmm_size,
1369 struct ptlrpc_request **request)
1371 struct ll_sb_info *sbi = ll_i2sbi(inode);
1372 struct mdt_body *body;
1373 struct lov_mds_md *lmm = NULL;
1374 struct ptlrpc_request *req = NULL;
1375 struct md_op_data *op_data;
1378 rc = ll_get_max_mdsize(sbi, &lmmsize);
1382 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1383 strlen(filename), lmmsize,
1384 LUSTRE_OPC_ANY, NULL);
1385 if (IS_ERR(op_data))
1386 RETURN(PTR_ERR(op_data));
1388 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1389 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1390 ll_finish_md_op_data(op_data);
1392 CDEBUG(D_INFO, "md_getattr_name failed "
1393 "on %s: rc %d\n", filename, rc);
1397 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1398 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1400 lmmsize = body->eadatasize;
1402 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1404 GOTO(out, rc = -ENODATA);
1407 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1408 LASSERT(lmm != NULL);
1410 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1411 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1412 GOTO(out, rc = -EPROTO);
1416 * This is coming from the MDS, so is probably in
1417 * little endian. We convert it to host endian before
1418 * passing it to userspace.
1420 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1421 /* if function called for directory - we should
1422 * avoid swab not existent lsm objects */
1423 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1424 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1425 if (S_ISREG(body->mode))
1426 lustre_swab_lov_user_md_objects(
1427 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1428 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1429 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1430 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1431 if (S_ISREG(body->mode))
1432 lustre_swab_lov_user_md_objects(
1433 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1434 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1440 *lmm_size = lmmsize;
1445 static int ll_lov_setea(struct inode *inode, struct file *file,
1448 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1449 struct lov_user_md *lump;
1450 int lum_size = sizeof(struct lov_user_md) +
1451 sizeof(struct lov_user_ost_data);
1455 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1458 OBD_ALLOC_LARGE(lump, lum_size);
1462 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1463 OBD_FREE_LARGE(lump, lum_size);
1467 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1469 OBD_FREE_LARGE(lump, lum_size);
1473 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1476 struct lov_user_md_v3 lumv3;
1477 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1478 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1479 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1481 int flags = FMODE_WRITE;
1484 /* first try with v1 which is smaller than v3 */
1485 lum_size = sizeof(struct lov_user_md_v1);
1486 if (copy_from_user(lumv1, lumv1p, lum_size))
1489 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1490 lum_size = sizeof(struct lov_user_md_v3);
1491 if (copy_from_user(&lumv3, lumv3p, lum_size))
1495 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1497 struct lov_stripe_md *lsm;
1500 put_user(0, &lumv1p->lmm_stripe_count);
1502 ll_layout_refresh(inode, &gen);
1503 lsm = ccc_inode_lsm_get(inode);
1504 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1505 0, lsm, (void *)arg);
1506 ccc_inode_lsm_put(inode, lsm);
1511 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1513 struct lov_stripe_md *lsm;
1517 lsm = ccc_inode_lsm_get(inode);
1519 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1521 ccc_inode_lsm_put(inode, lsm);
1525 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1527 struct ll_inode_info *lli = ll_i2info(inode);
1528 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1529 struct ccc_grouplock grouplock;
1533 if (ll_file_nolock(file))
1534 RETURN(-EOPNOTSUPP);
1536 spin_lock(&lli->lli_lock);
1537 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1538 CWARN("group lock already existed with gid %lu\n",
1539 fd->fd_grouplock.cg_gid);
1540 spin_unlock(&lli->lli_lock);
1543 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1544 spin_unlock(&lli->lli_lock);
1546 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1547 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1551 spin_lock(&lli->lli_lock);
1552 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1553 spin_unlock(&lli->lli_lock);
1554 CERROR("another thread just won the race\n");
1555 cl_put_grouplock(&grouplock);
1559 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1560 fd->fd_grouplock = grouplock;
1561 spin_unlock(&lli->lli_lock);
1563 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1567 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1569 struct ll_inode_info *lli = ll_i2info(inode);
1570 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1571 struct ccc_grouplock grouplock;
1574 spin_lock(&lli->lli_lock);
1575 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1576 spin_unlock(&lli->lli_lock);
1577 CWARN("no group lock held\n");
1580 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1582 if (fd->fd_grouplock.cg_gid != arg) {
1583 CWARN("group lock %lu doesn't match current id %lu\n",
1584 arg, fd->fd_grouplock.cg_gid);
1585 spin_unlock(&lli->lli_lock);
1589 grouplock = fd->fd_grouplock;
1590 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1591 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1592 spin_unlock(&lli->lli_lock);
1594 cl_put_grouplock(&grouplock);
1595 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1600 * Close inode open handle
1602 * \param dentry [in] dentry which contains the inode
1603 * \param it [in,out] intent which contains open info and result
1606 * \retval <0 failure
1608 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1610 struct inode *inode = dentry->d_inode;
1611 struct obd_client_handle *och;
1617 /* Root ? Do nothing. */
1618 if (dentry->d_inode->i_sb->s_root == dentry)
1621 /* No open handle to close? Move away */
1622 if (!it_disposition(it, DISP_OPEN_OPEN))
1625 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1627 OBD_ALLOC(och, sizeof(*och));
1629 GOTO(out, rc = -ENOMEM);
1631 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1632 ll_i2info(inode), it, och);
1634 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1637 /* this one is in place of ll_file_open */
1638 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1639 ptlrpc_req_finished(it->d.lustre.it_data);
1640 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1646 * Get size for inode for which FIEMAP mapping is requested.
1647 * Make the FIEMAP get_info call and returns the result.
1649 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1652 struct obd_export *exp = ll_i2dtexp(inode);
1653 struct lov_stripe_md *lsm = NULL;
1654 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1655 int vallen = num_bytes;
1659 /* Checks for fiemap flags */
1660 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1661 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1665 /* Check for FIEMAP_FLAG_SYNC */
1666 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1667 rc = filemap_fdatawrite(inode->i_mapping);
1672 lsm = ccc_inode_lsm_get(inode);
1676 /* If the stripe_count > 1 and the application does not understand
1677 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1679 if (lsm->lsm_stripe_count > 1 &&
1680 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1681 GOTO(out, rc = -EOPNOTSUPP);
1683 fm_key.oa.o_id = lsm->lsm_object_id;
1684 fm_key.oa.o_seq = lsm->lsm_object_seq;
1685 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1687 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1688 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1689 /* If filesize is 0, then there would be no objects for mapping */
1690 if (fm_key.oa.o_size == 0) {
1691 fiemap->fm_mapped_extents = 0;
1695 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1697 rc = obd_get_info(NULL, exp, sizeof(fm_key), &fm_key, &vallen,
1700 CERROR("obd_get_info failed: rc = %d\n", rc);
1703 ccc_inode_lsm_put(inode, lsm);
1707 int ll_fid2path(struct inode *inode, void *arg)
1709 struct obd_export *exp = ll_i2mdexp(inode);
1710 struct getinfo_fid2path *gfout, *gfin;
1714 if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
1715 !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
1718 /* Need to get the buflen */
1719 OBD_ALLOC_PTR(gfin);
1722 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1727 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1728 OBD_ALLOC(gfout, outsize);
1729 if (gfout == NULL) {
1733 memcpy(gfout, gfin, sizeof(*gfout));
1736 /* Call mdc_iocontrol */
1737 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1740 if (copy_to_user(arg, gfout, outsize))
1744 OBD_FREE(gfout, outsize);
1748 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1750 struct ll_user_fiemap *fiemap_s;
1751 size_t num_bytes, ret_bytes;
1752 unsigned int extent_count;
1755 /* Get the extent count so we can calculate the size of
1756 * required fiemap buffer */
1757 if (get_user(extent_count,
1758 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1760 num_bytes = sizeof(*fiemap_s) + (extent_count *
1761 sizeof(struct ll_fiemap_extent));
1763 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1764 if (fiemap_s == NULL)
1767 /* get the fiemap value */
1768 if (copy_from_user(fiemap_s, (struct ll_user_fiemap __user *)arg,
1770 GOTO(error, rc = -EFAULT);
1772 /* If fm_extent_count is non-zero, read the first extent since
1773 * it is used to calculate end_offset and device from previous
1776 if (copy_from_user(&fiemap_s->fm_extents[0],
1777 (char __user *)arg + sizeof(*fiemap_s),
1778 sizeof(struct ll_fiemap_extent)))
1779 GOTO(error, rc = -EFAULT);
1782 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1786 ret_bytes = sizeof(struct ll_user_fiemap);
1788 if (extent_count != 0)
1789 ret_bytes += (fiemap_s->fm_mapped_extents *
1790 sizeof(struct ll_fiemap_extent));
1792 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1796 OBD_FREE_LARGE(fiemap_s, num_bytes);
1801 * Read the data_version for inode.
1803 * This value is computed using stripe object version on OST.
1804 * Version is computed using server side locking.
1806 * @param extent_lock Take extent lock. Not needed if a process is already
1807 * holding the OST object group locks.
1809 int ll_data_version(struct inode *inode, __u64 *data_version,
1812 struct lov_stripe_md *lsm = NULL;
1813 struct ll_sb_info *sbi = ll_i2sbi(inode);
1814 struct obdo *obdo = NULL;
1818 /* If no stripe, we consider version is 0. */
1819 lsm = ccc_inode_lsm_get(inode);
1822 CDEBUG(D_INODE, "No object for inode\n");
1826 OBD_ALLOC_PTR(obdo);
1828 ccc_inode_lsm_put(inode, lsm);
1832 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1834 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1837 *data_version = obdo->o_data_version;
1841 ccc_inode_lsm_put(inode, lsm);
1846 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1848 struct inode *inode = file->f_dentry->d_inode;
1849 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1854 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1855 inode->i_generation, inode, cmd);
1856 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1858 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1859 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1863 case LL_IOC_GETFLAGS:
1864 /* Get the current value of the file flags */
1865 return put_user(fd->fd_flags, (int *)arg);
1866 case LL_IOC_SETFLAGS:
1867 case LL_IOC_CLRFLAGS:
1868 /* Set or clear specific file flags */
1869 /* XXX This probably needs checks to ensure the flags are
1870 * not abused, and to handle any flag side effects.
1872 if (get_user(flags, (int *) arg))
1875 if (cmd == LL_IOC_SETFLAGS) {
1876 if ((flags & LL_FILE_IGNORE_LOCK) &&
1877 !(file->f_flags & O_DIRECT)) {
1878 CERROR("%s: unable to disable locking on "
1879 "non-O_DIRECT file\n", current->comm);
1883 fd->fd_flags |= flags;
1885 fd->fd_flags &= ~flags;
1888 case LL_IOC_LOV_SETSTRIPE:
1889 RETURN(ll_lov_setstripe(inode, file, arg));
1890 case LL_IOC_LOV_SETEA:
1891 RETURN(ll_lov_setea(inode, file, arg));
1892 case LL_IOC_LOV_GETSTRIPE:
1893 RETURN(ll_lov_getstripe(inode, arg));
1894 case LL_IOC_RECREATE_OBJ:
1895 RETURN(ll_lov_recreate_obj(inode, arg));
1896 case LL_IOC_RECREATE_FID:
1897 RETURN(ll_lov_recreate_fid(inode, arg));
1898 case FSFILT_IOC_FIEMAP:
1899 RETURN(ll_ioctl_fiemap(inode, arg));
1900 case FSFILT_IOC_GETFLAGS:
1901 case FSFILT_IOC_SETFLAGS:
1902 RETURN(ll_iocontrol(inode, file, cmd, arg));
1903 case FSFILT_IOC_GETVERSION_OLD:
1904 case FSFILT_IOC_GETVERSION:
1905 RETURN(put_user(inode->i_generation, (int *)arg));
1906 case LL_IOC_GROUP_LOCK:
1907 RETURN(ll_get_grouplock(inode, file, arg));
1908 case LL_IOC_GROUP_UNLOCK:
1909 RETURN(ll_put_grouplock(inode, file, arg));
1910 case IOC_OBD_STATFS:
1911 RETURN(ll_obd_statfs(inode, (void *)arg));
1913 /* We need to special case any other ioctls we want to handle,
1914 * to send them to the MDS/OST as appropriate and to properly
1915 * network encode the arg field.
1916 case FSFILT_IOC_SETVERSION_OLD:
1917 case FSFILT_IOC_SETVERSION:
1919 case LL_IOC_FLUSHCTX:
1920 RETURN(ll_flush_ctx(inode));
1921 case LL_IOC_PATH2FID: {
1922 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1923 sizeof(struct lu_fid)))
1928 case OBD_IOC_FID2PATH:
1929 RETURN(ll_fid2path(inode, (void *)arg));
1930 case LL_IOC_DATA_VERSION: {
1931 struct ioc_data_version idv;
1934 if (copy_from_user(&idv, (char *)arg, sizeof(idv)))
1937 rc = ll_data_version(inode, &idv.idv_version,
1938 !(idv.idv_flags & LL_DV_NOFLUSH));
1940 if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv)))
1946 case LL_IOC_GET_MDTIDX: {
1949 mdtidx = ll_get_mdt_idx(inode);
1953 if (put_user((int)mdtidx, (int*)arg))
1958 case OBD_IOC_GETDTNAME:
1959 case OBD_IOC_GETMDNAME:
1960 RETURN(ll_get_obd_name(inode, cmd, arg));
1961 case LL_IOC_HSM_STATE_GET: {
1962 struct md_op_data *op_data;
1963 struct hsm_user_state *hus;
1970 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1971 LUSTRE_OPC_ANY, hus);
1972 if (op_data == NULL) {
1977 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
1980 if (copy_to_user((void *)arg, hus, sizeof(*hus)))
1983 ll_finish_md_op_data(op_data);
1987 case LL_IOC_HSM_STATE_SET: {
1988 struct md_op_data *op_data;
1989 struct hsm_state_set *hss;
1995 if (copy_from_user(hss, (char *)arg, sizeof(*hss))) {
2000 /* Non-root users are forbidden to set or clear flags which are
2001 * NOT defined in HSM_USER_MASK. */
2002 if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK)
2003 && !cfs_capable(CFS_CAP_SYS_ADMIN)) {
2008 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2009 LUSTRE_OPC_ANY, hss);
2010 if (op_data == NULL) {
2015 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2018 ll_finish_md_op_data(op_data);
2023 case LL_IOC_HSM_ACTION: {
2024 struct md_op_data *op_data;
2025 struct hsm_current_action *hca;
2032 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2033 LUSTRE_OPC_ANY, hca);
2034 if (op_data == NULL) {
2039 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data),
2042 if (cfs_copy_to_user((char *)arg, hca, sizeof(*hca)))
2045 ll_finish_md_op_data(op_data);
2053 ll_iocontrol_call(inode, file, cmd, arg, &err))
2056 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2062 #ifndef HAVE_FILE_LLSEEK_SIZE
2063 static inline loff_t
2064 llseek_execute(struct file *file, loff_t offset, loff_t maxsize)
2066 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
2068 if (offset > maxsize)
2071 if (offset != file->f_pos) {
2072 file->f_pos = offset;
2073 file->f_version = 0;
2079 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
2080 loff_t maxsize, loff_t eof)
2082 struct inode *inode = file->f_dentry->d_inode;
2090 * Here we special-case the lseek(fd, 0, SEEK_CUR)
2091 * position-querying operation. Avoid rewriting the "same"
2092 * f_pos value back to the file because a concurrent read(),
2093 * write() or lseek() might have altered it
2098 * f_lock protects against read/modify/write race with other
2099 * SEEK_CURs. Note that parallel writes and reads behave
2102 mutex_lock(&inode->i_mutex);
2103 offset = llseek_execute(file, file->f_pos + offset, maxsize);
2104 mutex_unlock(&inode->i_mutex);
2108 * In the generic case the entire file is data, so as long as
2109 * offset isn't at the end of the file then the offset is data.
2116 * There is a virtual hole at the end of the file, so as long as
2117 * offset isn't i_size or larger, return i_size.
2125 return llseek_execute(file, offset, maxsize);
2129 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2131 struct inode *inode = file->f_dentry->d_inode;
2132 loff_t retval, eof = 0;
2135 retval = offset + ((origin == SEEK_END) ? i_size_read(inode) :
2136 (origin == SEEK_CUR) ? file->f_pos : 0);
2137 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%d)\n",
2138 inode->i_ino, inode->i_generation, inode, retval, retval,
2140 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2142 if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) {
2143 retval = ll_glimpse_size(inode);
2146 eof = i_size_read(inode);
2149 retval = generic_file_llseek_size(file, offset, origin,
2150 ll_file_maxbytes(inode), eof);
2154 int ll_flush(struct file *file, fl_owner_t id)
2156 struct inode *inode = file->f_dentry->d_inode;
2157 struct ll_inode_info *lli = ll_i2info(inode);
2158 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2161 LASSERT(!S_ISDIR(inode->i_mode));
2163 /* catch async errors that were recorded back when async writeback
2164 * failed for pages in this mapping. */
2165 rc = lli->lli_async_rc;
2166 lli->lli_async_rc = 0;
2167 err = lov_read_and_clear_async_rc(lli->lli_clob);
2171 /* The application has been told write failure already.
2172 * Do not report failure again. */
2173 if (fd->fd_write_failed)
2175 return rc ? -EIO : 0;
2179 * Called to make sure a portion of file has been written out.
2180 * if @local_only is not true, it will send OST_SYNC RPCs to ost.
2182 * Return how many pages have been written.
2184 int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
2185 enum cl_fsync_mode mode)
2187 struct cl_env_nest nest;
2190 struct obd_capa *capa = NULL;
2191 struct cl_fsync_io *fio;
2195 if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
2196 mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
2199 env = cl_env_nested_get(&nest);
2201 RETURN(PTR_ERR(env));
2203 capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2205 io = ccc_env_thread_io(env);
2206 io->ci_obj = cl_i2info(inode)->lli_clob;
2207 io->ci_ignore_layout = 1;
2209 /* initialize parameters for sync */
2210 fio = &io->u.ci_fsync;
2211 fio->fi_capa = capa;
2212 fio->fi_start = start;
2214 fio->fi_fid = ll_inode2fid(inode);
2215 fio->fi_mode = mode;
2216 fio->fi_nr_written = 0;
2218 if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0)
2219 result = cl_io_loop(env, io);
2221 result = io->ci_result;
2223 result = fio->fi_nr_written;
2224 cl_io_fini(env, io);
2225 cl_env_nested_put(&nest, env);
2232 #ifdef HAVE_FILE_FSYNC_4ARGS
2233 int ll_fsync(struct file *file, loff_t start, loff_t end, int data)
2234 #elif defined(HAVE_FILE_FSYNC_2ARGS)
2235 int ll_fsync(struct file *file, int data)
2237 int ll_fsync(struct file *file, struct dentry *dentry, int data)
2240 struct inode *inode = file->f_dentry->d_inode;
2241 struct ll_inode_info *lli = ll_i2info(inode);
2242 struct ptlrpc_request *req;
2243 struct obd_capa *oc;
2247 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2248 inode->i_generation, inode);
2249 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2251 #ifdef HAVE_FILE_FSYNC_4ARGS
2252 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2253 mutex_lock(&inode->i_mutex);
2255 /* fsync's caller has already called _fdata{sync,write}, we want
2256 * that IO to finish before calling the osc and mdc sync methods */
2257 rc = filemap_fdatawait(inode->i_mapping);
2260 /* catch async errors that were recorded back when async writeback
2261 * failed for pages in this mapping. */
2262 if (!S_ISDIR(inode->i_mode)) {
2263 err = lli->lli_async_rc;
2264 lli->lli_async_rc = 0;
2267 err = lov_read_and_clear_async_rc(lli->lli_clob);
2272 oc = ll_mdscapa_get(inode);
2273 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2279 ptlrpc_req_finished(req);
2282 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
2284 err = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
2286 if (rc == 0 && err < 0)
2289 fd->fd_write_failed = true;
2291 fd->fd_write_failed = false;
2294 #ifdef HAVE_FILE_FSYNC_4ARGS
2295 mutex_unlock(&inode->i_mutex);
2300 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2302 struct inode *inode = file->f_dentry->d_inode;
2303 struct ll_sb_info *sbi = ll_i2sbi(inode);
2304 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2305 .ei_cb_cp =ldlm_flock_completion_ast,
2306 .ei_cbdata = file_lock };
2307 struct md_op_data *op_data;
2308 struct lustre_handle lockh = {0};
2309 ldlm_policy_data_t flock = {{0}};
2315 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2316 inode->i_ino, file_lock);
2318 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2320 if (file_lock->fl_flags & FL_FLOCK) {
2321 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2322 /* flocks are whole-file locks */
2323 flock.l_flock.end = OFFSET_MAX;
2324 /* For flocks owner is determined by the local file desctiptor*/
2325 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2326 } else if (file_lock->fl_flags & FL_POSIX) {
2327 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2328 flock.l_flock.start = file_lock->fl_start;
2329 flock.l_flock.end = file_lock->fl_end;
2333 flock.l_flock.pid = file_lock->fl_pid;
2335 /* Somewhat ugly workaround for svc lockd.
2336 * lockd installs custom fl_lmops->lm_compare_owner that checks
2337 * for the fl_owner to be the same (which it always is on local node
2338 * I guess between lockd processes) and then compares pid.
2339 * As such we assign pid to the owner field to make it all work,
2340 * conflict with normal locks is unlikely since pid space and
2341 * pointer space for current->files are not intersecting */
2342 if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
2343 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2345 switch (file_lock->fl_type) {
2347 einfo.ei_mode = LCK_PR;
2350 /* An unlock request may or may not have any relation to
2351 * existing locks so we may not be able to pass a lock handle
2352 * via a normal ldlm_lock_cancel() request. The request may even
2353 * unlock a byte range in the middle of an existing lock. In
2354 * order to process an unlock request we need all of the same
2355 * information that is given with a normal read or write record
2356 * lock request. To avoid creating another ldlm unlock (cancel)
2357 * message we'll treat a LCK_NL flock request as an unlock. */
2358 einfo.ei_mode = LCK_NL;
2361 einfo.ei_mode = LCK_PW;
2364 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2365 file_lock->fl_type);
2380 flags = LDLM_FL_BLOCK_NOWAIT;
2386 flags = LDLM_FL_TEST_LOCK;
2387 /* Save the old mode so that if the mode in the lock changes we
2388 * can decrement the appropriate reader or writer refcount. */
2389 file_lock->fl_type = einfo.ei_mode;
2392 CERROR("unknown fcntl lock command: %d\n", cmd);
2396 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2397 LUSTRE_OPC_ANY, NULL);
2398 if (IS_ERR(op_data))
2399 RETURN(PTR_ERR(op_data));
2401 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2402 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2403 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2405 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2406 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2408 if ((file_lock->fl_flags & FL_FLOCK) &&
2409 (rc == 0 || file_lock->fl_type == F_UNLCK))
2410 rc2 = flock_lock_file_wait(file, file_lock);
2411 if ((file_lock->fl_flags & FL_POSIX) &&
2412 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2413 !(flags & LDLM_FL_TEST_LOCK))
2414 rc2 = posix_lock_file_wait(file, file_lock);
2416 if (rc2 && file_lock->fl_type != F_UNLCK) {
2417 einfo.ei_mode = LCK_NL;
2418 md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2419 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2423 ll_finish_md_op_data(op_data);
2428 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2436 * test if some locks matching bits and l_req_mode are acquired
2437 * - bits can be in different locks
2438 * - if found clear the common lock bits in *bits
2439 * - the bits not found, are kept in *bits
2441 * \param bits [IN] searched lock bits [IN]
2442 * \param l_req_mode [IN] searched lock mode
2443 * \retval boolean, true iff all bits are found
2445 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2447 struct lustre_handle lockh;
2448 ldlm_policy_data_t policy;
2449 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2450 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2459 fid = &ll_i2info(inode)->lli_fid;
2460 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2461 ldlm_lockname[mode]);
2463 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2464 for (i = 0; i < MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2465 policy.l_inodebits.bits = *bits & (1 << i);
2466 if (policy.l_inodebits.bits == 0)
2469 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2470 &policy, mode, &lockh)) {
2471 struct ldlm_lock *lock;
2473 lock = ldlm_handle2lock(&lockh);
2476 ~(lock->l_policy_data.l_inodebits.bits);
2477 LDLM_LOCK_PUT(lock);
2479 *bits &= ~policy.l_inodebits.bits;
2486 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2487 struct lustre_handle *lockh, __u64 flags)
2489 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2494 fid = &ll_i2info(inode)->lli_fid;
2495 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2497 rc = md_lock_match(ll_i2mdexp(inode), LDLM_FL_BLOCK_GRANTED|flags,
2498 fid, LDLM_IBITS, &policy,
2499 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2503 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
2505 /* Already unlinked. Just update nlink and return success */
2506 if (rc == -ENOENT) {
2508 /* This path cannot be hit for regular files unless in
2509 * case of obscure races, so no need to to validate
2511 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
2513 } else if (rc != 0) {
2514 CERROR("%s: revalidate FID "DFID" error: rc = %d\n",
2515 ll_get_fsname(inode->i_sb, NULL, 0),
2516 PFID(ll_inode2fid(inode)), rc);
2522 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2525 struct inode *inode = dentry->d_inode;
2526 struct ptlrpc_request *req = NULL;
2527 struct obd_export *exp;
2531 LASSERT(inode != NULL);
2533 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2534 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2536 exp = ll_i2mdexp(inode);
2538 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2539 * But under CMD case, it caused some lock issues, should be fixed
2540 * with new CMD ibits lock. See bug 12718 */
2541 if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) {
2542 struct lookup_intent oit = { .it_op = IT_GETATTR };
2543 struct md_op_data *op_data;
2545 if (ibits == MDS_INODELOCK_LOOKUP)
2546 oit.it_op = IT_LOOKUP;
2548 /* Call getattr by fid, so do not provide name at all. */
2549 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2550 dentry->d_inode, NULL, 0, 0,
2551 LUSTRE_OPC_ANY, NULL);
2552 if (IS_ERR(op_data))
2553 RETURN(PTR_ERR(op_data));
2555 oit.it_create_mode |= M_CHECK_STALE;
2556 rc = md_intent_lock(exp, op_data, NULL, 0,
2557 /* we are not interested in name
2560 ll_md_blocking_ast, 0);
2561 ll_finish_md_op_data(op_data);
2562 oit.it_create_mode &= ~M_CHECK_STALE;
2564 rc = ll_inode_revalidate_fini(inode, rc);
2568 rc = ll_revalidate_it_finish(req, &oit, dentry);
2570 ll_intent_release(&oit);
2574 /* Unlinked? Unhash dentry, so it is not picked up later by
2575 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2576 here to preserve get_cwd functionality on 2.6.
2578 if (!dentry->d_inode->i_nlink)
2579 d_lustre_invalidate(dentry);
2581 ll_lookup_finish_locks(&oit, dentry);
2582 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
2583 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2584 obd_valid valid = OBD_MD_FLGETATTR;
2585 struct md_op_data *op_data;
2588 if (S_ISREG(inode->i_mode)) {
2589 rc = ll_get_max_mdsize(sbi, &ealen);
2592 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2595 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2596 0, ealen, LUSTRE_OPC_ANY,
2598 if (IS_ERR(op_data))
2599 RETURN(PTR_ERR(op_data));
2601 op_data->op_valid = valid;
2602 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2603 * capa for this inode. Because we only keep capas of dirs
2605 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2606 ll_finish_md_op_data(op_data);
2608 rc = ll_inode_revalidate_fini(inode, rc);
2612 rc = ll_prep_inode(&inode, req, NULL, NULL);
2615 ptlrpc_req_finished(req);
2619 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2622 struct inode *inode = dentry->d_inode;
2626 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2630 /* if object isn't regular file, don't validate size */
2631 if (!S_ISREG(inode->i_mode)) {
2632 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2633 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2634 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2636 rc = ll_glimpse_size(inode);
2641 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2642 struct lookup_intent *it, struct kstat *stat)
2644 struct inode *inode = de->d_inode;
2645 struct ll_sb_info *sbi = ll_i2sbi(inode);
2646 struct ll_inode_info *lli = ll_i2info(inode);
2649 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
2650 MDS_INODELOCK_LOOKUP);
2651 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
2656 stat->dev = inode->i_sb->s_dev;
2657 if (ll_need_32bit_api(sbi))
2658 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
2660 stat->ino = inode->i_ino;
2661 stat->mode = inode->i_mode;
2662 stat->nlink = inode->i_nlink;
2663 stat->uid = inode->i_uid;
2664 stat->gid = inode->i_gid;
2665 stat->rdev = inode->i_rdev;
2666 stat->atime = inode->i_atime;
2667 stat->mtime = inode->i_mtime;
2668 stat->ctime = inode->i_ctime;
2669 stat->blksize = 1 << inode->i_blkbits;
2671 stat->size = i_size_read(inode);
2672 stat->blocks = inode->i_blocks;
2676 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2678 struct lookup_intent it = { .it_op = IT_GETATTR };
2680 return ll_getattr_it(mnt, de, &it, stat);
2683 #ifdef HAVE_LINUX_FIEMAP_H
2684 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2685 __u64 start, __u64 len)
2689 struct ll_user_fiemap *fiemap;
2690 unsigned int extent_count = fieinfo->fi_extents_max;
2692 num_bytes = sizeof(*fiemap) + (extent_count *
2693 sizeof(struct ll_fiemap_extent));
2694 OBD_ALLOC_LARGE(fiemap, num_bytes);
2699 fiemap->fm_flags = fieinfo->fi_flags;
2700 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2701 fiemap->fm_start = start;
2702 fiemap->fm_length = len;
2703 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2704 sizeof(struct ll_fiemap_extent));
2706 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2708 fieinfo->fi_flags = fiemap->fm_flags;
2709 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2710 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2711 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2713 OBD_FREE_LARGE(fiemap, num_bytes);
2718 struct posix_acl * ll_get_acl(struct inode *inode, int type)
2720 struct ll_inode_info *lli = ll_i2info(inode);
2721 struct posix_acl *acl = NULL;
2724 spin_lock(&lli->lli_lock);
2725 /* VFS' acl_permission_check->check_acl will release the refcount */
2726 acl = posix_acl_dup(lli->lli_posix_acl);
2727 spin_unlock(&lli->lli_lock);
2732 #ifndef HAVE_GENERIC_PERMISSION_2ARGS
2734 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
2735 ll_check_acl(struct inode *inode, int mask, unsigned int flags)
2737 ll_check_acl(struct inode *inode, int mask)
2740 # ifdef CONFIG_FS_POSIX_ACL
2741 struct posix_acl *acl;
2745 # ifdef HAVE_GENERIC_PERMISSION_4ARGS
2746 if (flags & IPERM_FLAG_RCU)
2749 acl = ll_get_acl(inode, ACL_TYPE_ACCESS);
2754 rc = posix_acl_permission(inode, acl, mask);
2755 posix_acl_release(acl);
2758 # else /* !CONFIG_FS_POSIX_ACL */
2760 # endif /* CONFIG_FS_POSIX_ACL */
2762 #endif /* HAVE_GENERIC_PERMISSION_2ARGS */
2764 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
2765 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
2767 # ifdef HAVE_INODE_PERMISION_2ARGS
2768 int ll_inode_permission(struct inode *inode, int mask)
2770 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2777 #ifdef MAY_NOT_BLOCK
2778 if (mask & MAY_NOT_BLOCK)
2780 #elif defined(HAVE_GENERIC_PERMISSION_4ARGS)
2781 if (flags & IPERM_FLAG_RCU)
2785 /* as root inode are NOT getting validated in lookup operation,
2786 * need to do it before permission check. */
2788 if (inode == inode->i_sb->s_root->d_inode) {
2789 struct lookup_intent it = { .it_op = IT_LOOKUP };
2791 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2792 MDS_INODELOCK_LOOKUP);
2797 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2798 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2800 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2801 return lustre_check_remote_perm(inode, mask);
2803 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2804 rc = ll_generic_permission(inode, mask, flags, ll_check_acl);
2809 #ifdef HAVE_FILE_READV
2810 #define READ_METHOD readv
2811 #define READ_FUNCTION ll_file_readv
2812 #define WRITE_METHOD writev
2813 #define WRITE_FUNCTION ll_file_writev
2815 #define READ_METHOD aio_read
2816 #define READ_FUNCTION ll_file_aio_read
2817 #define WRITE_METHOD aio_write
2818 #define WRITE_FUNCTION ll_file_aio_write
2821 /* -o localflock - only provides locally consistent flock locks */
2822 struct file_operations ll_file_operations = {
2823 .read = ll_file_read,
2824 .READ_METHOD = READ_FUNCTION,
2825 .write = ll_file_write,
2826 .WRITE_METHOD = WRITE_FUNCTION,
2827 .unlocked_ioctl = ll_file_ioctl,
2828 .open = ll_file_open,
2829 .release = ll_file_release,
2830 .mmap = ll_file_mmap,
2831 .llseek = ll_file_seek,
2832 #ifdef HAVE_KERNEL_SENDFILE
2833 .sendfile = ll_file_sendfile,
2835 #ifdef HAVE_KERNEL_SPLICE_READ
2836 .splice_read = ll_file_splice_read,
2842 struct file_operations ll_file_operations_flock = {
2843 .read = ll_file_read,
2844 .READ_METHOD = READ_FUNCTION,
2845 .write = ll_file_write,
2846 .WRITE_METHOD = WRITE_FUNCTION,
2847 .unlocked_ioctl = ll_file_ioctl,
2848 .open = ll_file_open,
2849 .release = ll_file_release,
2850 .mmap = ll_file_mmap,
2851 .llseek = ll_file_seek,
2852 #ifdef HAVE_KERNEL_SENDFILE
2853 .sendfile = ll_file_sendfile,
2855 #ifdef HAVE_KERNEL_SPLICE_READ
2856 .splice_read = ll_file_splice_read,
2860 .flock = ll_file_flock,
2861 .lock = ll_file_flock
2864 /* These are for -o noflock - to return ENOSYS on flock calls */
2865 struct file_operations ll_file_operations_noflock = {
2866 .read = ll_file_read,
2867 .READ_METHOD = READ_FUNCTION,
2868 .write = ll_file_write,
2869 .WRITE_METHOD = WRITE_FUNCTION,
2870 .unlocked_ioctl = ll_file_ioctl,
2871 .open = ll_file_open,
2872 .release = ll_file_release,
2873 .mmap = ll_file_mmap,
2874 .llseek = ll_file_seek,
2875 #ifdef HAVE_KERNEL_SENDFILE
2876 .sendfile = ll_file_sendfile,
2878 #ifdef HAVE_KERNEL_SPLICE_READ
2879 .splice_read = ll_file_splice_read,
2883 .flock = ll_file_noflock,
2884 .lock = ll_file_noflock
2887 struct inode_operations ll_file_inode_operations = {
2888 .setattr = ll_setattr,
2889 .getattr = ll_getattr,
2890 .permission = ll_inode_permission,
2891 .setxattr = ll_setxattr,
2892 .getxattr = ll_getxattr,
2893 .listxattr = ll_listxattr,
2894 .removexattr = ll_removexattr,
2895 #ifdef HAVE_LINUX_FIEMAP_H
2896 .fiemap = ll_fiemap,
2898 #ifdef HAVE_IOP_GET_ACL
2899 .get_acl = ll_get_acl,
2903 /* dynamic ioctl number support routins */
2904 static struct llioc_ctl_data {
2905 struct rw_semaphore ioc_sem;
2906 cfs_list_t ioc_head;
2908 __RWSEM_INITIALIZER(llioc.ioc_sem),
2909 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2914 cfs_list_t iocd_list;
2915 unsigned int iocd_size;
2916 llioc_callback_t iocd_cb;
2917 unsigned int iocd_count;
2918 unsigned int iocd_cmd[0];
2921 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2924 struct llioc_data *in_data = NULL;
2927 if (cb == NULL || cmd == NULL ||
2928 count > LLIOC_MAX_CMD || count < 0)
2931 size = sizeof(*in_data) + count * sizeof(unsigned int);
2932 OBD_ALLOC(in_data, size);
2933 if (in_data == NULL)
2936 memset(in_data, 0, sizeof(*in_data));
2937 in_data->iocd_size = size;
2938 in_data->iocd_cb = cb;
2939 in_data->iocd_count = count;
2940 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2942 down_write(&llioc.ioc_sem);
2943 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2944 up_write(&llioc.ioc_sem);
2949 void ll_iocontrol_unregister(void *magic)
2951 struct llioc_data *tmp;
2956 down_write(&llioc.ioc_sem);
2957 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2959 unsigned int size = tmp->iocd_size;
2961 cfs_list_del(&tmp->iocd_list);
2962 up_write(&llioc.ioc_sem);
2964 OBD_FREE(tmp, size);
2968 up_write(&llioc.ioc_sem);
2970 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2973 EXPORT_SYMBOL(ll_iocontrol_register);
2974 EXPORT_SYMBOL(ll_iocontrol_unregister);
2976 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2977 unsigned int cmd, unsigned long arg, int *rcp)
2979 enum llioc_iter ret = LLIOC_CONT;
2980 struct llioc_data *data;
2981 int rc = -EINVAL, i;
2983 down_read(&llioc.ioc_sem);
2984 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2985 for (i = 0; i < data->iocd_count; i++) {
2986 if (cmd != data->iocd_cmd[i])
2989 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2993 if (ret == LLIOC_STOP)
2996 up_read(&llioc.ioc_sem);
3003 int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf)
3005 struct ll_inode_info *lli = ll_i2info(inode);
3006 struct cl_env_nest nest;
3011 if (lli->lli_clob == NULL)
3014 env = cl_env_nested_get(&nest);
3016 RETURN(PTR_ERR(env));
3018 result = cl_conf_set(env, lli->lli_clob, conf);
3019 cl_env_nested_put(&nest, env);
3021 if (conf->coc_opc == OBJECT_CONF_SET) {
3022 struct ldlm_lock *lock = conf->coc_lock;
3024 LASSERT(lock != NULL);
3025 LASSERT(ldlm_has_layout(lock));
3027 /* it can only be allowed to match after layout is
3028 * applied to inode otherwise false layout would be
3029 * seen. Applying layout shoud happen before dropping
3030 * the intent lock. */
3031 ldlm_lock_allow_match(lock);
3038 * Apply the layout to the inode. Layout lock is held and will be released
3041 static int ll_layout_lock_set(struct lustre_handle *lockh, ldlm_mode_t mode,
3042 struct inode *inode, __u32 *gen, bool reconf)
3044 struct ll_inode_info *lli = ll_i2info(inode);
3045 struct ll_sb_info *sbi = ll_i2sbi(inode);
3046 struct ldlm_lock *lock;
3047 struct lustre_md md = { NULL };
3048 struct cl_object_conf conf;
3053 LASSERT(lustre_handle_is_used(lockh));
3055 lock = ldlm_handle2lock(lockh);
3056 LASSERT(lock != NULL);
3057 LASSERT(ldlm_has_layout(lock));
3059 LDLM_DEBUG(lock, "File %p/"DFID" being reconfigured: %d.\n",
3060 inode, PFID(&lli->lli_fid), reconf);
3062 lock_res_and_lock(lock);
3063 lvb_ready = !!(lock->l_flags & LDLM_FL_LVB_READY);
3064 unlock_res_and_lock(lock);
3065 /* checking lvb_ready is racy but this is okay. The worst case is
3066 * that multi processes may configure the file on the same time. */
3067 if (lvb_ready || !reconf) {
3068 LDLM_LOCK_PUT(lock);
3072 /* layout_gen must be valid if layout lock is not
3073 * cancelled and stripe has already set */
3074 *gen = lli->lli_layout_gen;
3077 ldlm_lock_decref(lockh, mode);
3081 /* for layout lock, lmm is returned in lock's lvb.
3082 * lvb_data is immutable if the lock is held so it's safe to access it
3083 * without res lock. See the description in ldlm_lock_decref_internal()
3084 * for the condition to free lvb_data of layout lock */
3085 if (lock->l_lvb_data != NULL) {
3086 rc = obd_unpackmd(sbi->ll_dt_exp, &md.lsm,
3087 lock->l_lvb_data, lock->l_lvb_len);
3090 *gen = md.lsm->lsm_layout_gen;
3093 CERROR("%s: file "DFID" unpackmd error: %d\n",
3094 ll_get_fsname(inode->i_sb, NULL, 0),
3095 PFID(&lli->lli_fid), rc);
3099 LDLM_LOCK_PUT(lock);
3100 ldlm_lock_decref(lockh, mode);
3104 /* set layout to file. Unlikely this will fail as old layout was
3105 * surely eliminated */
3106 memset(&conf, 0, sizeof conf);
3107 conf.coc_opc = OBJECT_CONF_SET;
3108 conf.coc_inode = inode;
3109 conf.coc_lock = lock;
3110 conf.u.coc_md = &md;
3111 rc = ll_layout_conf(inode, &conf);
3112 LDLM_LOCK_PUT(lock);
3114 ldlm_lock_decref(lockh, mode);
3117 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
3119 /* wait for IO to complete if it's still being used. */
3121 CDEBUG(D_INODE, "%s: %p/"DFID" wait for layout reconf.\n",
3122 ll_get_fsname(inode->i_sb, NULL, 0),
3123 inode, PFID(&lli->lli_fid));
3125 memset(&conf, 0, sizeof conf);
3126 conf.coc_opc = OBJECT_CONF_WAIT;
3127 conf.coc_inode = inode;
3128 rc = ll_layout_conf(inode, &conf);
3132 CDEBUG(D_INODE, "file: "DFID" waiting layout return: %d.\n",
3133 PFID(&lli->lli_fid), rc);
3140 * This function checks if there exists a LAYOUT lock on the client side,
3141 * or enqueues it if it doesn't have one in cache.
3143 * This function will not hold layout lock so it may be revoked any time after
3144 * this function returns. Any operations depend on layout should be redone
3147 * This function should be called before lov_io_init() to get an uptodate
3148 * layout version, the caller should save the version number and after IO
3149 * is finished, this function should be called again to verify that layout
3150 * is not changed during IO time.
3152 int ll_layout_refresh(struct inode *inode, __u32 *gen)
3154 struct ll_inode_info *lli = ll_i2info(inode);
3155 struct ll_sb_info *sbi = ll_i2sbi(inode);
3156 struct md_op_data *op_data;
3157 struct lookup_intent it;
3158 struct lustre_handle lockh;
3160 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS,
3162 .ei_cb_bl = ll_md_blocking_ast,
3163 .ei_cb_cp = ldlm_completion_ast,
3164 .ei_cbdata = inode };
3168 *gen = LL_LAYOUT_GEN_ZERO;
3169 if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
3173 LASSERT(fid_is_sane(ll_inode2fid(inode)));
3174 LASSERT(S_ISREG(inode->i_mode));
3176 /* mostly layout lock is caching on the local side, so try to match
3177 * it before grabbing layout lock mutex. */
3178 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0);
3179 if (mode != 0) { /* hit cached lock */
3180 rc = ll_layout_lock_set(&lockh, mode, inode, gen, false);
3184 /* better hold lli_layout_mutex to try again otherwise
3185 * it will have starvation problem. */
3188 /* take layout lock mutex to enqueue layout lock exclusively. */
3189 mutex_lock(&lli->lli_layout_mutex);
3192 /* try again. Maybe somebody else has done this. */
3193 mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0);
3194 if (mode != 0) { /* hit cached lock */
3195 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3199 mutex_unlock(&lli->lli_layout_mutex);
3203 op_data = ll_prep_md_op_data(NULL, inode, inode, NULL,
3204 0, 0, LUSTRE_OPC_ANY, NULL);
3205 if (IS_ERR(op_data)) {
3206 mutex_unlock(&lli->lli_layout_mutex);
3207 RETURN(PTR_ERR(op_data));
3210 /* have to enqueue one */
3211 memset(&it, 0, sizeof(it));
3212 it.it_op = IT_LAYOUT;
3213 lockh.cookie = 0ULL;
3215 LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file %p/"DFID".\n",
3216 ll_get_fsname(inode->i_sb, NULL, 0), inode,
3217 PFID(&lli->lli_fid));
3219 rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
3221 if (it.d.lustre.it_data != NULL)
3222 ptlrpc_req_finished(it.d.lustre.it_data);
3223 it.d.lustre.it_data = NULL;
3225 ll_finish_md_op_data(op_data);
3227 mode = it.d.lustre.it_lock_mode;
3228 it.d.lustre.it_lock_mode = 0;
3229 ll_intent_drop_lock(&it);
3232 /* set lock data in case this is a new lock */
3233 ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
3234 rc = ll_layout_lock_set(&lockh, mode, inode, gen, true);
3238 mutex_unlock(&lli->lli_layout_mutex);