4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <linux/pagemap.h>
47 #include <linux/file.h>
48 #include "llite_internal.h"
49 #include <lustre/ll_fiemap.h>
51 #include "cl_object.h"
53 struct ll_file_data *ll_file_data_get(void)
55 struct ll_file_data *fd;
57 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
61 static void ll_file_data_put(struct ll_file_data *fd)
64 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
67 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
68 struct lustre_handle *fh)
70 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
71 op_data->op_attr.ia_mode = inode->i_mode;
72 op_data->op_attr.ia_atime = inode->i_atime;
73 op_data->op_attr.ia_mtime = inode->i_mtime;
74 op_data->op_attr.ia_ctime = inode->i_ctime;
75 op_data->op_attr.ia_size = i_size_read(inode);
76 op_data->op_attr_blocks = inode->i_blocks;
77 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
78 ll_inode_to_ext_flags(inode->i_flags);
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
81 op_data->op_handle = *fh;
82 op_data->op_capa1 = ll_mdscapa_get(inode);
86 * Closes the IO epoch and packs all the attributes into @op_data for
89 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
90 struct obd_client_handle *och)
94 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
95 ATTR_MTIME_SET | ATTR_CTIME_SET;
97 if (!(och->och_flags & FMODE_WRITE))
100 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
101 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
103 ll_ioepoch_close(inode, op_data, &och, 0);
106 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
107 ll_prep_md_op_data(op_data, inode, NULL, NULL,
108 0, 0, LUSTRE_OPC_ANY, NULL);
112 static int ll_close_inode_openhandle(struct obd_export *md_exp,
114 struct obd_client_handle *och)
116 struct obd_export *exp = ll_i2mdexp(inode);
117 struct md_op_data *op_data;
118 struct ptlrpc_request *req = NULL;
119 struct obd_device *obd = class_exp2obd(exp);
126 * XXX: in case of LMV, is this correct to access
129 CERROR("Invalid MDC connection handle "LPX64"\n",
130 ll_i2mdexp(inode)->exp_handle.h_cookie);
134 OBD_ALLOC_PTR(op_data);
136 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
138 ll_prepare_close(inode, op_data, och);
139 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
140 rc = md_close(md_exp, op_data, och->och_mod, &req);
142 /* This close must have the epoch closed. */
143 LASSERT(epoch_close);
144 /* MDS has instructed us to obtain Size-on-MDS attribute from
145 * OSTs and send setattr to back to MDS. */
146 rc = ll_som_update(inode, op_data);
148 CERROR("inode %lu mdc Size-on-MDS update failed: "
149 "rc = %d\n", inode->i_ino, rc);
153 CERROR("inode %lu mdc close failed: rc = %d\n",
156 ll_finish_md_op_data(op_data);
159 rc = ll_objects_destroy(req, inode);
161 CERROR("inode %lu ll_objects destroy: rc = %d\n",
168 if (exp_connect_som(exp) && !epoch_close &&
169 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
170 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
172 md_clear_open_replay_data(md_exp, och);
173 /* Free @och if it is not waiting for DONE_WRITING. */
174 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
177 if (req) /* This is close request */
178 ptlrpc_req_finished(req);
182 int ll_md_real_close(struct inode *inode, int flags)
184 struct ll_inode_info *lli = ll_i2info(inode);
185 struct obd_client_handle **och_p;
186 struct obd_client_handle *och;
191 if (flags & FMODE_WRITE) {
192 och_p = &lli->lli_mds_write_och;
193 och_usecount = &lli->lli_open_fd_write_count;
194 } else if (flags & FMODE_EXEC) {
195 och_p = &lli->lli_mds_exec_och;
196 och_usecount = &lli->lli_open_fd_exec_count;
198 LASSERT(flags & FMODE_READ);
199 och_p = &lli->lli_mds_read_och;
200 och_usecount = &lli->lli_open_fd_read_count;
203 cfs_mutex_lock(&lli->lli_och_mutex);
204 if (*och_usecount) { /* There are still users of this handle, so
206 cfs_mutex_unlock(&lli->lli_och_mutex);
211 cfs_mutex_unlock(&lli->lli_och_mutex);
213 if (och) { /* There might be a race and somebody have freed this och
215 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
222 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
225 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
226 struct ll_inode_info *lli = ll_i2info(inode);
230 /* clear group lock, if present */
231 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
232 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
234 /* Let's see if we have good enough OPEN lock on the file and if
235 we can skip talking to MDS */
236 if (file->f_dentry->d_inode) { /* Can this ever be false? */
238 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
239 struct lustre_handle lockh;
240 struct inode *inode = file->f_dentry->d_inode;
241 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
243 cfs_mutex_lock(&lli->lli_och_mutex);
244 if (fd->fd_omode & FMODE_WRITE) {
246 LASSERT(lli->lli_open_fd_write_count);
247 lli->lli_open_fd_write_count--;
248 } else if (fd->fd_omode & FMODE_EXEC) {
250 LASSERT(lli->lli_open_fd_exec_count);
251 lli->lli_open_fd_exec_count--;
254 LASSERT(lli->lli_open_fd_read_count);
255 lli->lli_open_fd_read_count--;
257 cfs_mutex_unlock(&lli->lli_och_mutex);
259 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
260 LDLM_IBITS, &policy, lockmode,
262 rc = ll_md_real_close(file->f_dentry->d_inode,
266 CERROR("Releasing a file %p with negative dentry %p. Name %s",
267 file, file->f_dentry, file->f_dentry->d_name.name);
270 LUSTRE_FPRIVATE(file) = NULL;
271 ll_file_data_put(fd);
272 ll_capa_close(inode);
277 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
279 /* While this returns an error code, fput() the caller does not, so we need
280 * to make every effort to clean up all of our state here. Also, applications
281 * rarely check close errors and even if an error is returned they will not
282 * re-try the close call.
284 int ll_file_release(struct inode *inode, struct file *file)
286 struct ll_file_data *fd;
287 struct ll_sb_info *sbi = ll_i2sbi(inode);
288 struct ll_inode_info *lli = ll_i2info(inode);
289 struct lov_stripe_md *lsm = lli->lli_smd;
293 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
294 inode->i_generation, inode);
296 #ifdef CONFIG_FS_POSIX_ACL
297 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
298 inode == inode->i_sb->s_root->d_inode) {
299 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
302 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
303 fd->fd_flags &= ~LL_FILE_RMTACL;
304 rct_del(&sbi->ll_rct, cfs_curproc_pid());
305 et_search_free(&sbi->ll_et, cfs_curproc_pid());
310 if (inode->i_sb->s_root != file->f_dentry)
311 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
312 fd = LUSTRE_FPRIVATE(file);
315 /* The last ref on @file, maybe not the the owner pid of statahead.
316 * Different processes can open the same dir, "ll_opendir_key" means:
317 * it is me that should stop the statahead thread. */
318 if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
319 lli->lli_opendir_pid != 0)
320 ll_stop_statahead(inode, lli->lli_opendir_key);
322 if (inode->i_sb->s_root == file->f_dentry) {
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
328 if (!S_ISDIR(inode->i_mode)) {
330 lov_test_and_clear_async_rc(lsm);
331 lli->lli_async_rc = 0;
334 rc = ll_md_close(sbi->ll_md_exp, inode, file);
336 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
337 libcfs_debug_dumplog();
342 static int ll_intent_file_open(struct file *file, void *lmm,
343 int lmmsize, struct lookup_intent *itp)
345 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
346 struct dentry *parent = file->f_dentry->d_parent;
347 const char *name = file->f_dentry->d_name.name;
348 const int len = file->f_dentry->d_name.len;
349 struct md_op_data *op_data;
350 struct ptlrpc_request *req;
351 __u32 opc = LUSTRE_OPC_ANY;
358 /* Usually we come here only for NFSD, and we want open lock.
359 But we can also get here with pre 2.6.15 patchless kernels, and in
360 that case that lock is also ok */
361 /* We can also get here if there was cached open handle in revalidate_it
362 * but it disappeared while we were getting from there to ll_file_open.
363 * But this means this file was closed and immediatelly opened which
364 * makes a good candidate for using OPEN lock */
365 /* If lmmsize & lmm are not 0, we are just setting stripe info
366 * parameters. No need for the open lock */
367 if (lmm == NULL && lmmsize == 0) {
368 itp->it_flags |= MDS_OPEN_LOCK;
369 if (itp->it_flags & FMODE_WRITE)
370 opc = LUSTRE_OPC_CREATE;
373 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
374 file->f_dentry->d_inode, name, len,
377 RETURN(PTR_ERR(op_data));
379 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
380 0 /*unused */, &req, ll_md_blocking_ast, 0);
381 ll_finish_md_op_data(op_data);
383 /* reason for keep own exit path - don`t flood log
384 * with messages with -ESTALE errors.
386 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
387 it_open_error(DISP_OPEN_OPEN, itp))
389 ll_release_openhandle(file->f_dentry, itp);
393 if (it_disposition(itp, DISP_LOOKUP_NEG))
394 GOTO(out, rc = -ENOENT);
396 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
397 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
398 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
402 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
403 if (!rc && itp->d.lustre.it_lock_mode)
404 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
408 ptlrpc_req_finished(itp->d.lustre.it_data);
409 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
410 ll_intent_drop_lock(itp);
416 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
417 * not believe attributes if a few ioepoch holders exist. Attributes for
418 * previous ioepoch if new one is opened are also skipped by MDS.
420 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
422 if (ioepoch && lli->lli_ioepoch != ioepoch) {
423 lli->lli_ioepoch = ioepoch;
424 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
425 ioepoch, PFID(&lli->lli_fid));
429 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
430 struct lookup_intent *it, struct obd_client_handle *och)
432 struct ptlrpc_request *req = it->d.lustre.it_data;
433 struct mdt_body *body;
437 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
438 LASSERT(body != NULL); /* reply already checked out */
440 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
441 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
442 och->och_fid = lli->lli_fid;
443 och->och_flags = it->it_flags;
444 ll_ioepoch_open(lli, body->ioepoch);
446 return md_set_open_replay_data(md_exp, och, req);
449 int ll_local_open(struct file *file, struct lookup_intent *it,
450 struct ll_file_data *fd, struct obd_client_handle *och)
452 struct inode *inode = file->f_dentry->d_inode;
453 struct ll_inode_info *lli = ll_i2info(inode);
456 LASSERT(!LUSTRE_FPRIVATE(file));
461 struct ptlrpc_request *req = it->d.lustre.it_data;
462 struct mdt_body *body;
465 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
469 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
470 if ((it->it_flags & FMODE_WRITE) &&
471 (body->valid & OBD_MD_FLSIZE))
472 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
473 lli->lli_ioepoch, PFID(&lli->lli_fid));
476 LUSTRE_FPRIVATE(file) = fd;
477 ll_readahead_init(inode, &fd->fd_ras);
478 fd->fd_omode = it->it_flags;
482 /* Open a file, and (for the very first open) create objects on the OSTs at
483 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
484 * creation or open until ll_lov_setstripe() ioctl is called.
486 * If we already have the stripe MD locally then we don't request it in
487 * md_open(), by passing a lmm_size = 0.
489 * It is up to the application to ensure no other processes open this file
490 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
491 * used. We might be able to avoid races of that sort by getting lli_open_sem
492 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
493 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
495 int ll_file_open(struct inode *inode, struct file *file)
497 struct ll_inode_info *lli = ll_i2info(inode);
498 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
499 .it_flags = file->f_flags };
500 struct lov_stripe_md *lsm;
501 struct obd_client_handle **och_p = NULL;
502 __u64 *och_usecount = NULL;
503 struct ll_file_data *fd;
504 int rc = 0, opendir_set = 0;
507 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
508 inode->i_generation, inode, file->f_flags);
510 it = file->private_data; /* XXX: compat macro */
511 file->private_data = NULL; /* prevent ll_local_open assertion */
513 fd = ll_file_data_get();
515 GOTO(out_och_free, rc = -ENOMEM);
518 if (S_ISDIR(inode->i_mode)) {
519 cfs_spin_lock(&lli->lli_sa_lock);
520 if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
521 lli->lli_opendir_pid == 0) {
522 lli->lli_opendir_key = fd;
523 lli->lli_opendir_pid = cfs_curproc_pid();
526 cfs_spin_unlock(&lli->lli_sa_lock);
529 if (inode->i_sb->s_root == file->f_dentry) {
530 LUSTRE_FPRIVATE(file) = fd;
534 if (!it || !it->d.lustre.it_disposition) {
535 /* Convert f_flags into access mode. We cannot use file->f_mode,
536 * because everything but O_ACCMODE mask was stripped from
538 if ((oit.it_flags + 1) & O_ACCMODE)
540 if (file->f_flags & O_TRUNC)
541 oit.it_flags |= FMODE_WRITE;
543 /* kernel only call f_op->open in dentry_open. filp_open calls
544 * dentry_open after call to open_namei that checks permissions.
545 * Only nfsd_open call dentry_open directly without checking
546 * permissions and because of that this code below is safe. */
547 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
548 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
550 /* We do not want O_EXCL here, presumably we opened the file
551 * already? XXX - NFS implications? */
552 oit.it_flags &= ~O_EXCL;
554 /* bug20584, if "it_flags" contains O_CREAT, the file will be
555 * created if necessary, then "IT_CREAT" should be set to keep
556 * consistent with it */
557 if (oit.it_flags & O_CREAT)
558 oit.it_op |= IT_CREAT;
564 /* Let's see if we have file open on MDS already. */
565 if (it->it_flags & FMODE_WRITE) {
566 och_p = &lli->lli_mds_write_och;
567 och_usecount = &lli->lli_open_fd_write_count;
568 } else if (it->it_flags & FMODE_EXEC) {
569 och_p = &lli->lli_mds_exec_och;
570 och_usecount = &lli->lli_open_fd_exec_count;
572 och_p = &lli->lli_mds_read_och;
573 och_usecount = &lli->lli_open_fd_read_count;
576 cfs_mutex_lock(&lli->lli_och_mutex);
577 if (*och_p) { /* Open handle is present */
578 if (it_disposition(it, DISP_OPEN_OPEN)) {
579 /* Well, there's extra open request that we do not need,
580 let's close it somehow. This will decref request. */
581 rc = it_open_error(DISP_OPEN_OPEN, it);
583 cfs_mutex_unlock(&lli->lli_och_mutex);
584 GOTO(out_openerr, rc);
587 ll_release_openhandle(file->f_dentry, it);
591 rc = ll_local_open(file, it, fd, NULL);
594 cfs_mutex_unlock(&lli->lli_och_mutex);
595 GOTO(out_openerr, rc);
598 LASSERT(*och_usecount == 0);
599 if (!it->d.lustre.it_disposition) {
600 /* We cannot just request lock handle now, new ELC code
601 means that one of other OPEN locks for this file
602 could be cancelled, and since blocking ast handler
603 would attempt to grab och_mutex as well, that would
604 result in a deadlock */
605 cfs_mutex_unlock(&lli->lli_och_mutex);
606 it->it_create_mode |= M_CHECK_STALE;
607 rc = ll_intent_file_open(file, NULL, 0, it);
608 it->it_create_mode &= ~M_CHECK_STALE;
610 GOTO(out_openerr, rc);
614 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
616 GOTO(out_och_free, rc = -ENOMEM);
620 /* md_intent_lock() didn't get a request ref if there was an
621 * open error, so don't do cleanup on the request here
623 /* XXX (green): Should not we bail out on any error here, not
624 * just open error? */
625 rc = it_open_error(DISP_OPEN_OPEN, it);
627 GOTO(out_och_free, rc);
629 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
631 rc = ll_local_open(file, it, fd, *och_p);
633 GOTO(out_och_free, rc);
635 cfs_mutex_unlock(&lli->lli_och_mutex);
638 /* Must do this outside lli_och_mutex lock to prevent deadlock where
639 different kind of OPEN lock for this same inode gets cancelled
640 by ldlm_cancel_lru */
641 if (!S_ISREG(inode->i_mode))
642 GOTO(out_och_free, rc);
648 if (file->f_flags & O_LOV_DELAY_CREATE ||
649 !(file->f_mode & FMODE_WRITE)) {
650 CDEBUG(D_INODE, "object creation was delayed\n");
651 GOTO(out_och_free, rc);
654 file->f_flags &= ~O_LOV_DELAY_CREATE;
655 GOTO(out_och_free, rc);
658 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
659 ptlrpc_req_finished(it->d.lustre.it_data);
660 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
664 if (och_p && *och_p) {
665 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
666 *och_p = NULL; /* OBD_FREE writes some magic there */
669 cfs_mutex_unlock(&lli->lli_och_mutex);
672 if (opendir_set != 0)
673 ll_stop_statahead(inode, lli->lli_opendir_key);
675 ll_file_data_put(fd);
677 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
683 /* Fills the obdo with the attributes for the lsm */
684 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
685 struct obd_capa *capa, struct obdo *obdo,
686 __u64 ioepoch, int sync)
688 struct ptlrpc_request_set *set;
689 struct obd_info oinfo = { { { 0 } } };
694 LASSERT(lsm != NULL);
698 oinfo.oi_oa->o_id = lsm->lsm_object_id;
699 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
700 oinfo.oi_oa->o_mode = S_IFREG;
701 oinfo.oi_oa->o_ioepoch = ioepoch;
702 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
703 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
704 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
705 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
706 OBD_MD_FLGROUP | OBD_MD_FLEPOCH |
707 OBD_MD_FLDATAVERSION;
708 oinfo.oi_capa = capa;
710 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
711 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
714 set = ptlrpc_prep_set();
716 CERROR("can't allocate ptlrpc set\n");
719 rc = obd_getattr_async(exp, &oinfo, set);
721 rc = ptlrpc_set_wait(set);
722 ptlrpc_set_destroy(set);
725 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
726 OBD_MD_FLATIME | OBD_MD_FLMTIME |
727 OBD_MD_FLCTIME | OBD_MD_FLSIZE |
728 OBD_MD_FLDATAVERSION);
733 * Performs the getattr on the inode and updates its fields.
734 * If @sync != 0, perform the getattr under the server-side lock.
736 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
737 __u64 ioepoch, int sync)
739 struct ll_inode_info *lli = ll_i2info(inode);
740 struct obd_capa *capa = ll_mdscapa_get(inode);
744 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
745 capa, obdo, ioepoch, sync);
748 obdo_refresh_inode(inode, obdo, obdo->o_valid);
750 "objid "LPX64" size %llu, blocks %llu, blksize %lu\n",
751 lli->lli_smd->lsm_object_id, i_size_read(inode),
752 (unsigned long long)inode->i_blocks,
753 (unsigned long)ll_inode_blksize(inode));
758 int ll_merge_lvb(struct inode *inode)
760 struct ll_inode_info *lli = ll_i2info(inode);
761 struct ll_sb_info *sbi = ll_i2sbi(inode);
767 ll_inode_size_lock(inode, 1);
768 inode_init_lvb(inode, &lvb);
770 /* merge timestamps the most resently obtained from mds with
771 timestamps obtained from osts */
772 lvb.lvb_atime = lli->lli_lvb.lvb_atime;
773 lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
774 lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
775 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
776 cl_isize_write_nolock(inode, lvb.lvb_size);
778 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
779 PFID(&lli->lli_fid), lvb.lvb_size);
780 inode->i_blocks = lvb.lvb_blocks;
782 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
783 LTIME_S(inode->i_atime) = lvb.lvb_atime;
784 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
785 ll_inode_size_unlock(inode, 1);
790 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
793 struct obdo obdo = { 0 };
796 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
798 st->st_size = obdo.o_size;
799 st->st_blocks = obdo.o_blocks;
800 st->st_mtime = obdo.o_mtime;
801 st->st_atime = obdo.o_atime;
802 st->st_ctime = obdo.o_ctime;
807 void ll_io_init(struct cl_io *io, const struct file *file, int write)
809 struct inode *inode = file->f_dentry->d_inode;
811 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
813 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
814 io->ci_obj = ll_i2info(inode)->lli_clob;
815 io->ci_lockreq = CILR_MAYBE;
816 if (ll_file_nolock(file)) {
817 io->ci_lockreq = CILR_NEVER;
818 io->ci_no_srvlock = 1;
819 } else if (file->f_flags & O_APPEND) {
820 io->ci_lockreq = CILR_MANDATORY;
824 static ssize_t ll_file_io_generic(const struct lu_env *env,
825 struct vvp_io_args *args, struct file *file,
826 enum cl_io_type iot, loff_t *ppos, size_t count)
828 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
833 io = ccc_env_thread_io(env);
834 ll_io_init(io, file, iot == CIT_WRITE);
836 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
837 struct vvp_io *vio = vvp_env_io(env);
838 struct ccc_io *cio = ccc_env_io(env);
839 int write_mutex_locked = 0;
841 cio->cui_fd = LUSTRE_FPRIVATE(file);
842 vio->cui_io_subtype = args->via_io_subtype;
844 switch (vio->cui_io_subtype) {
846 cio->cui_iov = args->u.normal.via_iov;
847 cio->cui_nrsegs = args->u.normal.via_nrsegs;
848 cio->cui_tot_nrsegs = cio->cui_nrsegs;
849 #ifndef HAVE_FILE_WRITEV
850 cio->cui_iocb = args->u.normal.via_iocb;
852 if ((iot == CIT_WRITE) &&
853 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
854 if (cfs_mutex_lock_interruptible(&lli->
856 GOTO(out, result = -ERESTARTSYS);
857 write_mutex_locked = 1;
858 } else if (iot == CIT_READ) {
859 cfs_down_read(&lli->lli_trunc_sem);
863 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
864 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
867 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
868 vio->u.splice.cui_flags = args->u.splice.via_flags;
871 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
874 result = cl_io_loop(env, io);
875 if (write_mutex_locked)
876 cfs_mutex_unlock(&lli->lli_write_mutex);
877 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
878 cfs_up_read(&lli->lli_trunc_sem);
880 /* cl_io_rw_init() handled IO */
881 result = io->ci_result;
884 if (io->ci_nob > 0) {
886 *ppos = io->u.ci_wr.wr.crw_pos;
892 if (iot == CIT_READ) {
894 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
895 LPROC_LL_READ_BYTES, result);
896 } else if (iot == CIT_WRITE) {
898 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
899 LPROC_LL_WRITE_BYTES, result);
900 lli->lli_write_rc = 0;
902 lli->lli_write_rc = result;
911 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
913 static int ll_file_get_iov_count(const struct iovec *iov,
914 unsigned long *nr_segs, size_t *count)
919 for (seg = 0; seg < *nr_segs; seg++) {
920 const struct iovec *iv = &iov[seg];
923 * If any segment has a negative length, or the cumulative
924 * length ever wraps negative then return -EINVAL.
927 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
929 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
934 cnt -= iv->iov_len; /* This segment is no good */
941 #ifdef HAVE_FILE_READV
942 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
943 unsigned long nr_segs, loff_t *ppos)
946 struct vvp_io_args *args;
952 result = ll_file_get_iov_count(iov, &nr_segs, &count);
956 env = cl_env_get(&refcheck);
958 RETURN(PTR_ERR(env));
960 args = vvp_env_args(env, IO_NORMAL);
961 args->u.normal.via_iov = (struct iovec *)iov;
962 args->u.normal.via_nrsegs = nr_segs;
964 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
965 cl_env_put(env, &refcheck);
969 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
973 struct iovec *local_iov;
978 env = cl_env_get(&refcheck);
980 RETURN(PTR_ERR(env));
982 local_iov = &vvp_env_info(env)->vti_local_iov;
983 local_iov->iov_base = (void __user *)buf;
984 local_iov->iov_len = count;
985 result = ll_file_readv(file, local_iov, 1, ppos);
986 cl_env_put(env, &refcheck);
991 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
992 unsigned long nr_segs, loff_t pos)
995 struct vvp_io_args *args;
1001 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1005 env = cl_env_get(&refcheck);
1007 RETURN(PTR_ERR(env));
1009 args = vvp_env_args(env, IO_NORMAL);
1010 args->u.normal.via_iov = (struct iovec *)iov;
1011 args->u.normal.via_nrsegs = nr_segs;
1012 args->u.normal.via_iocb = iocb;
1014 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1015 &iocb->ki_pos, count);
1016 cl_env_put(env, &refcheck);
1020 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1024 struct iovec *local_iov;
1025 struct kiocb *kiocb;
1030 env = cl_env_get(&refcheck);
1032 RETURN(PTR_ERR(env));
1034 local_iov = &vvp_env_info(env)->vti_local_iov;
1035 kiocb = &vvp_env_info(env)->vti_kiocb;
1036 local_iov->iov_base = (void __user *)buf;
1037 local_iov->iov_len = count;
1038 init_sync_kiocb(kiocb, file);
1039 kiocb->ki_pos = *ppos;
1040 kiocb->ki_left = count;
1042 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1043 *ppos = kiocb->ki_pos;
1045 cl_env_put(env, &refcheck);
1051 * Write to a file (through the page cache).
1053 #ifdef HAVE_FILE_WRITEV
1054 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1055 unsigned long nr_segs, loff_t *ppos)
1058 struct vvp_io_args *args;
1064 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1068 env = cl_env_get(&refcheck);
1070 RETURN(PTR_ERR(env));
1072 args = vvp_env_args(env, IO_NORMAL);
1073 args->u.normal.via_iov = (struct iovec *)iov;
1074 args->u.normal.via_nrsegs = nr_segs;
1076 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1077 cl_env_put(env, &refcheck);
1081 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1085 struct iovec *local_iov;
1090 env = cl_env_get(&refcheck);
1092 RETURN(PTR_ERR(env));
1094 local_iov = &vvp_env_info(env)->vti_local_iov;
1095 local_iov->iov_base = (void __user *)buf;
1096 local_iov->iov_len = count;
1098 result = ll_file_writev(file, local_iov, 1, ppos);
1099 cl_env_put(env, &refcheck);
1103 #else /* AIO stuff */
1104 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1105 unsigned long nr_segs, loff_t pos)
1108 struct vvp_io_args *args;
1114 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1118 env = cl_env_get(&refcheck);
1120 RETURN(PTR_ERR(env));
1122 args = vvp_env_args(env, IO_NORMAL);
1123 args->u.normal.via_iov = (struct iovec *)iov;
1124 args->u.normal.via_nrsegs = nr_segs;
1125 args->u.normal.via_iocb = iocb;
1127 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1128 &iocb->ki_pos, count);
1129 cl_env_put(env, &refcheck);
1133 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1137 struct iovec *local_iov;
1138 struct kiocb *kiocb;
1143 env = cl_env_get(&refcheck);
1145 RETURN(PTR_ERR(env));
1147 local_iov = &vvp_env_info(env)->vti_local_iov;
1148 kiocb = &vvp_env_info(env)->vti_kiocb;
1149 local_iov->iov_base = (void __user *)buf;
1150 local_iov->iov_len = count;
1151 init_sync_kiocb(kiocb, file);
1152 kiocb->ki_pos = *ppos;
1153 kiocb->ki_left = count;
1155 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1156 *ppos = kiocb->ki_pos;
1158 cl_env_put(env, &refcheck);
1164 #ifdef HAVE_KERNEL_SENDFILE
1166 * Send file content (through pagecache) somewhere with helper
1168 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1169 read_actor_t actor, void *target)
1172 struct vvp_io_args *args;
1177 env = cl_env_get(&refcheck);
1179 RETURN(PTR_ERR(env));
1181 args = vvp_env_args(env, IO_SENDFILE);
1182 args->u.sendfile.via_target = target;
1183 args->u.sendfile.via_actor = actor;
1185 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1186 cl_env_put(env, &refcheck);
1191 #ifdef HAVE_KERNEL_SPLICE_READ
1193 * Send file content (through pagecache) somewhere with helper
1195 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1196 struct pipe_inode_info *pipe, size_t count,
1200 struct vvp_io_args *args;
1205 env = cl_env_get(&refcheck);
1207 RETURN(PTR_ERR(env));
1209 args = vvp_env_args(env, IO_SPLICE);
1210 args->u.splice.via_pipe = pipe;
1211 args->u.splice.via_flags = flags;
1213 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1214 cl_env_put(env, &refcheck);
1219 static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
1222 struct obd_export *exp = ll_i2dtexp(inode);
1223 struct obd_trans_info oti = { 0 };
1224 struct obdo *oa = NULL;
1227 struct lov_stripe_md *lsm, *lsm2;
1234 ll_inode_size_lock(inode, 0);
1235 lsm = ll_i2info(inode)->lli_smd;
1237 GOTO(out, rc = -ENOENT);
1238 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1239 (lsm->lsm_stripe_count));
1241 OBD_ALLOC_LARGE(lsm2, lsm_size);
1243 GOTO(out, rc = -ENOMEM);
1247 oa->o_nlink = ost_idx;
1248 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1249 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1250 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1251 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1252 obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
1253 memcpy(lsm2, lsm, lsm_size);
1254 rc = obd_create(exp, oa, &lsm2, &oti);
1256 OBD_FREE_LARGE(lsm2, lsm_size);
1259 ll_inode_size_unlock(inode, 0);
1264 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1266 struct ll_recreate_obj ucreat;
1269 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1272 if (cfs_copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1273 sizeof(struct ll_recreate_obj)))
1276 RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
1277 ucreat.lrc_ost_idx));
1280 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1287 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1290 if (cfs_copy_from_user(&fid, (struct lu_fid *)arg,
1291 sizeof(struct lu_fid)))
1294 id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
1295 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1296 RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
1299 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1300 int flags, struct lov_user_md *lum, int lum_size)
1302 struct lov_stripe_md *lsm;
1303 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1307 ll_inode_size_lock(inode, 0);
1308 lsm = ll_i2info(inode)->lli_smd;
1310 ll_inode_size_unlock(inode, 0);
1311 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1316 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1319 rc = oit.d.lustre.it_status;
1321 GOTO(out_req_free, rc);
1323 ll_release_openhandle(file->f_dentry, &oit);
1326 ll_inode_size_unlock(inode, 0);
1327 ll_intent_release(&oit);
1330 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1334 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1335 struct lov_mds_md **lmmp, int *lmm_size,
1336 struct ptlrpc_request **request)
1338 struct ll_sb_info *sbi = ll_i2sbi(inode);
1339 struct mdt_body *body;
1340 struct lov_mds_md *lmm = NULL;
1341 struct ptlrpc_request *req = NULL;
1342 struct md_op_data *op_data;
1345 rc = ll_get_max_mdsize(sbi, &lmmsize);
1349 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1350 strlen(filename), lmmsize,
1351 LUSTRE_OPC_ANY, NULL);
1352 if (IS_ERR(op_data))
1353 RETURN(PTR_ERR(op_data));
1355 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1356 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1357 ll_finish_md_op_data(op_data);
1359 CDEBUG(D_INFO, "md_getattr_name failed "
1360 "on %s: rc %d\n", filename, rc);
1364 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1365 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1367 lmmsize = body->eadatasize;
1369 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1371 GOTO(out, rc = -ENODATA);
1374 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1375 LASSERT(lmm != NULL);
1377 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1378 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1379 GOTO(out, rc = -EPROTO);
1383 * This is coming from the MDS, so is probably in
1384 * little endian. We convert it to host endian before
1385 * passing it to userspace.
1387 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1388 /* if function called for directory - we should
1389 * avoid swab not existent lsm objects */
1390 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1391 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1392 if (S_ISREG(body->mode))
1393 lustre_swab_lov_user_md_objects(
1394 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1395 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1396 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1397 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1398 if (S_ISREG(body->mode))
1399 lustre_swab_lov_user_md_objects(
1400 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1401 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1407 *lmm_size = lmmsize;
1412 static int ll_lov_setea(struct inode *inode, struct file *file,
1415 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1416 struct lov_user_md *lump;
1417 int lum_size = sizeof(struct lov_user_md) +
1418 sizeof(struct lov_user_ost_data);
1422 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1425 OBD_ALLOC_LARGE(lump, lum_size);
1429 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1430 OBD_FREE_LARGE(lump, lum_size);
1434 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1436 OBD_FREE_LARGE(lump, lum_size);
1440 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1443 struct lov_user_md_v3 lumv3;
1444 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1445 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1446 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1449 int flags = FMODE_WRITE;
1452 /* first try with v1 which is smaller than v3 */
1453 lum_size = sizeof(struct lov_user_md_v1);
1454 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1457 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1458 lum_size = sizeof(struct lov_user_md_v3);
1459 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1463 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1465 put_user(0, &lumv1p->lmm_stripe_count);
1466 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1467 0, ll_i2info(inode)->lli_smd,
1473 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1475 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1480 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1485 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1487 struct ll_inode_info *lli = ll_i2info(inode);
1488 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1489 struct ccc_grouplock grouplock;
1493 if (ll_file_nolock(file))
1494 RETURN(-EOPNOTSUPP);
1496 cfs_spin_lock(&lli->lli_lock);
1497 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1498 CWARN("group lock already existed with gid %lu\n",
1499 fd->fd_grouplock.cg_gid);
1500 cfs_spin_unlock(&lli->lli_lock);
1503 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1504 cfs_spin_unlock(&lli->lli_lock);
1506 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1507 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1511 cfs_spin_lock(&lli->lli_lock);
1512 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1513 cfs_spin_unlock(&lli->lli_lock);
1514 CERROR("another thread just won the race\n");
1515 cl_put_grouplock(&grouplock);
1519 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1520 fd->fd_grouplock = grouplock;
1521 cfs_spin_unlock(&lli->lli_lock);
1523 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1527 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1529 struct ll_inode_info *lli = ll_i2info(inode);
1530 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1531 struct ccc_grouplock grouplock;
1534 cfs_spin_lock(&lli->lli_lock);
1535 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1536 cfs_spin_unlock(&lli->lli_lock);
1537 CWARN("no group lock held\n");
1540 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1542 if (fd->fd_grouplock.cg_gid != arg) {
1543 CWARN("group lock %lu doesn't match current id %lu\n",
1544 arg, fd->fd_grouplock.cg_gid);
1545 cfs_spin_unlock(&lli->lli_lock);
1549 grouplock = fd->fd_grouplock;
1550 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1551 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1552 cfs_spin_unlock(&lli->lli_lock);
1554 cl_put_grouplock(&grouplock);
1555 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1560 * Close inode open handle
1562 * \param dentry [in] dentry which contains the inode
1563 * \param it [in,out] intent which contains open info and result
1566 * \retval <0 failure
1568 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1570 struct inode *inode = dentry->d_inode;
1571 struct obd_client_handle *och;
1577 /* Root ? Do nothing. */
1578 if (dentry->d_inode->i_sb->s_root == dentry)
1581 /* No open handle to close? Move away */
1582 if (!it_disposition(it, DISP_OPEN_OPEN))
1585 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1587 OBD_ALLOC(och, sizeof(*och));
1589 GOTO(out, rc = -ENOMEM);
1591 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1592 ll_i2info(inode), it, och);
1594 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1597 /* this one is in place of ll_file_open */
1598 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1599 ptlrpc_req_finished(it->d.lustre.it_data);
1600 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1606 * Get size for inode for which FIEMAP mapping is requested.
1607 * Make the FIEMAP get_info call and returns the result.
1609 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1612 struct obd_export *exp = ll_i2dtexp(inode);
1613 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1614 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1615 int vallen = num_bytes;
1619 /* Checks for fiemap flags */
1620 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1621 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1625 /* Check for FIEMAP_FLAG_SYNC */
1626 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1627 rc = filemap_fdatawrite(inode->i_mapping);
1632 /* If the stripe_count > 1 and the application does not understand
1633 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1635 if (lsm->lsm_stripe_count > 1 &&
1636 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1639 fm_key.oa.o_id = lsm->lsm_object_id;
1640 fm_key.oa.o_seq = lsm->lsm_object_seq;
1641 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1643 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLSIZE);
1644 obdo_set_parent_fid(&fm_key.oa, &ll_i2info(inode)->lli_fid);
1645 /* If filesize is 0, then there would be no objects for mapping */
1646 if (fm_key.oa.o_size == 0) {
1647 fiemap->fm_mapped_extents = 0;
1651 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1653 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1655 CERROR("obd_get_info failed: rc = %d\n", rc);
1660 int ll_fid2path(struct obd_export *exp, void *arg)
1662 struct getinfo_fid2path *gfout, *gfin;
1666 /* Need to get the buflen */
1667 OBD_ALLOC_PTR(gfin);
1670 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1675 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1676 OBD_ALLOC(gfout, outsize);
1677 if (gfout == NULL) {
1681 memcpy(gfout, gfin, sizeof(*gfout));
1684 /* Call mdc_iocontrol */
1685 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1688 if (cfs_copy_to_user(arg, gfout, outsize))
1692 OBD_FREE(gfout, outsize);
1696 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1698 struct ll_user_fiemap *fiemap_s;
1699 size_t num_bytes, ret_bytes;
1700 unsigned int extent_count;
1703 /* Get the extent count so we can calculate the size of
1704 * required fiemap buffer */
1705 if (get_user(extent_count,
1706 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1708 num_bytes = sizeof(*fiemap_s) + (extent_count *
1709 sizeof(struct ll_fiemap_extent));
1711 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1712 if (fiemap_s == NULL)
1715 /* get the fiemap value */
1716 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1718 GOTO(error, rc = -EFAULT);
1720 /* If fm_extent_count is non-zero, read the first extent since
1721 * it is used to calculate end_offset and device from previous
1724 if (copy_from_user(&fiemap_s->fm_extents[0],
1725 (char __user *)arg + sizeof(*fiemap_s),
1726 sizeof(struct ll_fiemap_extent)))
1727 GOTO(error, rc = -EFAULT);
1730 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1734 ret_bytes = sizeof(struct ll_user_fiemap);
1736 if (extent_count != 0)
1737 ret_bytes += (fiemap_s->fm_mapped_extents *
1738 sizeof(struct ll_fiemap_extent));
1740 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1744 OBD_FREE_LARGE(fiemap_s, num_bytes);
1749 * Read the data_version for inode.
1751 * This value is computed using stripe object version on OST.
1752 * Version is computed using server side locking.
1754 * @param extent_lock Take extent lock. Not needed if a process is already
1755 * holding the OST object group locks.
1757 static int ll_data_version(struct inode *inode, __u64 *data_version,
1760 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1761 struct ll_sb_info *sbi = ll_i2sbi(inode);
1762 struct obdo *obdo = NULL;
1766 /* If no stripe, we consider version is 0. */
1769 CDEBUG(D_INODE, "No object for inode\n");
1773 OBD_ALLOC_PTR(obdo);
1777 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock);
1779 if (!(obdo->o_valid & OBD_MD_FLDATAVERSION))
1782 *data_version = obdo->o_data_version;
1790 #ifdef HAVE_UNLOCKED_IOCTL
1791 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1793 struct inode *inode = file->f_dentry->d_inode;
1795 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1799 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1804 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1805 inode->i_generation, inode, cmd);
1806 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1808 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1809 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1813 case LL_IOC_GETFLAGS:
1814 /* Get the current value of the file flags */
1815 return put_user(fd->fd_flags, (int *)arg);
1816 case LL_IOC_SETFLAGS:
1817 case LL_IOC_CLRFLAGS:
1818 /* Set or clear specific file flags */
1819 /* XXX This probably needs checks to ensure the flags are
1820 * not abused, and to handle any flag side effects.
1822 if (get_user(flags, (int *) arg))
1825 if (cmd == LL_IOC_SETFLAGS) {
1826 if ((flags & LL_FILE_IGNORE_LOCK) &&
1827 !(file->f_flags & O_DIRECT)) {
1828 CERROR("%s: unable to disable locking on "
1829 "non-O_DIRECT file\n", current->comm);
1833 fd->fd_flags |= flags;
1835 fd->fd_flags &= ~flags;
1838 case LL_IOC_LOV_SETSTRIPE:
1839 RETURN(ll_lov_setstripe(inode, file, arg));
1840 case LL_IOC_LOV_SETEA:
1841 RETURN(ll_lov_setea(inode, file, arg));
1842 case LL_IOC_LOV_GETSTRIPE:
1843 RETURN(ll_lov_getstripe(inode, arg));
1844 case LL_IOC_RECREATE_OBJ:
1845 RETURN(ll_lov_recreate_obj(inode, arg));
1846 case LL_IOC_RECREATE_FID:
1847 RETURN(ll_lov_recreate_fid(inode, arg));
1848 case FSFILT_IOC_FIEMAP:
1849 RETURN(ll_ioctl_fiemap(inode, arg));
1850 case FSFILT_IOC_GETFLAGS:
1851 case FSFILT_IOC_SETFLAGS:
1852 RETURN(ll_iocontrol(inode, file, cmd, arg));
1853 case FSFILT_IOC_GETVERSION_OLD:
1854 case FSFILT_IOC_GETVERSION:
1855 RETURN(put_user(inode->i_generation, (int *)arg));
1856 case LL_IOC_GROUP_LOCK:
1857 RETURN(ll_get_grouplock(inode, file, arg));
1858 case LL_IOC_GROUP_UNLOCK:
1859 RETURN(ll_put_grouplock(inode, file, arg));
1860 case IOC_OBD_STATFS:
1861 RETURN(ll_obd_statfs(inode, (void *)arg));
1863 /* We need to special case any other ioctls we want to handle,
1864 * to send them to the MDS/OST as appropriate and to properly
1865 * network encode the arg field.
1866 case FSFILT_IOC_SETVERSION_OLD:
1867 case FSFILT_IOC_SETVERSION:
1869 case LL_IOC_FLUSHCTX:
1870 RETURN(ll_flush_ctx(inode));
1871 case LL_IOC_PATH2FID: {
1872 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1873 sizeof(struct lu_fid)))
1878 case OBD_IOC_FID2PATH:
1879 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1880 case LL_IOC_DATA_VERSION: {
1881 struct ioc_data_version idv;
1884 if (cfs_copy_from_user(&idv, (char *)arg, sizeof(idv)))
1887 rc = ll_data_version(inode, &idv.idv_version,
1888 !(idv.idv_flags & LL_DV_NOFLUSH));
1891 cfs_copy_to_user((char *) arg, &idv, sizeof(idv)))
1897 case LL_IOC_GET_MDTIDX: {
1900 mdtidx = ll_get_mdt_idx(inode);
1904 if (put_user((int)mdtidx, (int*)arg))
1909 case OBD_IOC_GETDTNAME:
1910 case OBD_IOC_GETMDNAME:
1911 RETURN(ll_get_obd_name(inode, cmd, arg));
1916 ll_iocontrol_call(inode, file, cmd, arg, &err))
1919 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1925 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1927 struct inode *inode = file->f_dentry->d_inode;
1930 retval = offset + ((origin == 2) ? i_size_read(inode) :
1931 (origin == 1) ? file->f_pos : 0);
1932 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%llu=%#llx(%s)\n",
1933 inode->i_ino, inode->i_generation, inode, retval, retval,
1934 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1935 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1937 if (origin == 2) { /* SEEK_END */
1940 rc = ll_glimpse_size(inode);
1944 offset += i_size_read(inode);
1945 } else if (origin == 1) { /* SEEK_CUR */
1946 offset += file->f_pos;
1950 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1951 if (offset != file->f_pos) {
1952 file->f_pos = offset;
1960 #ifdef HAVE_FLUSH_OWNER_ID
1961 int ll_flush(struct file *file, fl_owner_t id)
1963 int ll_flush(struct file *file)
1966 struct inode *inode = file->f_dentry->d_inode;
1967 struct ll_inode_info *lli = ll_i2info(inode);
1968 struct lov_stripe_md *lsm = lli->lli_smd;
1971 LASSERT(!S_ISDIR(inode->i_mode));
1973 /* the application should know write failure already. */
1974 if (lli->lli_write_rc)
1977 /* catch async errors that were recorded back when async writeback
1978 * failed for pages in this mapping. */
1979 rc = lli->lli_async_rc;
1980 lli->lli_async_rc = 0;
1982 err = lov_test_and_clear_async_rc(lsm);
1987 return rc ? -EIO : 0;
1990 #ifdef HAVE_FILE_FSYNC_4ARGS
1991 int ll_fsync(struct file *file, loff_t start, loff_t end, int data)
1992 #elif defined(HAVE_FILE_FSYNC_2ARGS)
1993 int ll_fsync(struct file *file, int data)
1995 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1998 struct inode *inode = file->f_dentry->d_inode;
1999 struct ll_inode_info *lli = ll_i2info(inode);
2000 struct lov_stripe_md *lsm = lli->lli_smd;
2001 struct ptlrpc_request *req;
2002 struct obd_capa *oc;
2005 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2006 inode->i_generation, inode);
2007 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2009 /* fsync's caller has already called _fdata{sync,write}, we want
2010 * that IO to finish before calling the osc and mdc sync methods */
2011 rc = filemap_fdatawait(inode->i_mapping);
2013 /* catch async errors that were recorded back when async writeback
2014 * failed for pages in this mapping. */
2015 if (!S_ISDIR(inode->i_mode)) {
2016 err = lli->lli_async_rc;
2017 lli->lli_async_rc = 0;
2021 err = lov_test_and_clear_async_rc(lsm);
2027 oc = ll_mdscapa_get(inode);
2028 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2034 ptlrpc_req_finished(req);
2037 struct obd_info *oinfo;
2039 OBD_ALLOC_PTR(oinfo);
2041 RETURN(rc ? rc : -ENOMEM);
2042 OBDO_ALLOC(oinfo->oi_oa);
2043 if (!oinfo->oi_oa) {
2044 OBD_FREE_PTR(oinfo);
2045 RETURN(rc ? rc : -ENOMEM);
2047 oinfo->oi_oa->o_id = lsm->lsm_object_id;
2048 oinfo->oi_oa->o_seq = lsm->lsm_object_seq;
2049 oinfo->oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2050 obdo_from_inode(oinfo->oi_oa, inode,
2051 OBD_MD_FLTYPE | OBD_MD_FLATIME |
2052 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
2054 obdo_set_parent_fid(oinfo->oi_oa, &ll_i2info(inode)->lli_fid);
2056 oinfo->oi_capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2057 err = obd_sync_rqset(ll_i2sbi(inode)->ll_dt_exp, oinfo, 0,
2059 capa_put(oinfo->oi_capa);
2062 OBDO_FREE(oinfo->oi_oa);
2063 OBD_FREE_PTR(oinfo);
2064 lli->lli_write_rc = rc < 0 ? rc : 0;
2070 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2072 struct inode *inode = file->f_dentry->d_inode;
2073 struct ll_sb_info *sbi = ll_i2sbi(inode);
2074 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2075 .ei_cb_cp =ldlm_flock_completion_ast,
2076 .ei_cbdata = file_lock };
2077 struct md_op_data *op_data;
2078 struct lustre_handle lockh = {0};
2079 ldlm_policy_data_t flock = {{0}};
2084 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2085 inode->i_ino, file_lock);
2087 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2089 if (file_lock->fl_flags & FL_FLOCK) {
2090 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2091 /* flocks are whole-file locks */
2092 flock.l_flock.end = OFFSET_MAX;
2093 /* For flocks owner is determined by the local file desctiptor*/
2094 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2095 } else if (file_lock->fl_flags & FL_POSIX) {
2096 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2097 flock.l_flock.start = file_lock->fl_start;
2098 flock.l_flock.end = file_lock->fl_end;
2102 flock.l_flock.pid = file_lock->fl_pid;
2104 /* Somewhat ugly workaround for svc lockd.
2105 * lockd installs custom fl_lmops->fl_compare_owner that checks
2106 * for the fl_owner to be the same (which it always is on local node
2107 * I guess between lockd processes) and then compares pid.
2108 * As such we assign pid to the owner field to make it all work,
2109 * conflict with normal locks is unlikely since pid space and
2110 * pointer space for current->files are not intersecting */
2111 if (file_lock->fl_lmops && file_lock->fl_lmops->fl_compare_owner)
2112 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2114 switch (file_lock->fl_type) {
2116 einfo.ei_mode = LCK_PR;
2119 /* An unlock request may or may not have any relation to
2120 * existing locks so we may not be able to pass a lock handle
2121 * via a normal ldlm_lock_cancel() request. The request may even
2122 * unlock a byte range in the middle of an existing lock. In
2123 * order to process an unlock request we need all of the same
2124 * information that is given with a normal read or write record
2125 * lock request. To avoid creating another ldlm unlock (cancel)
2126 * message we'll treat a LCK_NL flock request as an unlock. */
2127 einfo.ei_mode = LCK_NL;
2130 einfo.ei_mode = LCK_PW;
2133 CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
2134 file_lock->fl_type);
2149 flags = LDLM_FL_BLOCK_NOWAIT;
2155 flags = LDLM_FL_TEST_LOCK;
2156 /* Save the old mode so that if the mode in the lock changes we
2157 * can decrement the appropriate reader or writer refcount. */
2158 file_lock->fl_type = einfo.ei_mode;
2161 CERROR("unknown fcntl lock command: %d\n", cmd);
2165 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2166 LUSTRE_OPC_ANY, NULL);
2167 if (IS_ERR(op_data))
2168 RETURN(PTR_ERR(op_data));
2170 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2171 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2172 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2174 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2175 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2177 ll_finish_md_op_data(op_data);
2179 if ((file_lock->fl_flags & FL_FLOCK) &&
2180 (rc == 0 || file_lock->fl_type == F_UNLCK))
2181 flock_lock_file_wait(file, file_lock);
2182 if ((file_lock->fl_flags & FL_POSIX) &&
2183 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2184 !(flags & LDLM_FL_TEST_LOCK))
2185 posix_lock_file_wait(file, file_lock);
2190 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2198 * test if some locks matching bits and l_req_mode are acquired
2199 * - bits can be in different locks
2200 * - if found clear the common lock bits in *bits
2201 * - the bits not found, are kept in *bits
2203 * \param bits [IN] searched lock bits [IN]
2204 * \param l_req_mode [IN] searched lock mode
2205 * \retval boolean, true iff all bits are found
2207 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2209 struct lustre_handle lockh;
2210 ldlm_policy_data_t policy;
2211 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2212 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2221 fid = &ll_i2info(inode)->lli_fid;
2222 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2223 ldlm_lockname[mode]);
2225 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2226 for (i = 0; i < MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2227 policy.l_inodebits.bits = *bits & (1 << i);
2228 if (policy.l_inodebits.bits == 0)
2231 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2232 &policy, mode, &lockh)) {
2233 struct ldlm_lock *lock;
2235 lock = ldlm_handle2lock(&lockh);
2238 ~(lock->l_policy_data.l_inodebits.bits);
2239 LDLM_LOCK_PUT(lock);
2241 *bits &= ~policy.l_inodebits.bits;
2248 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2249 struct lustre_handle *lockh)
2251 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2257 fid = &ll_i2info(inode)->lli_fid;
2258 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2260 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2261 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2262 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2266 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2267 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2268 * and return success */
2270 /* This path cannot be hit for regular files unless in
2271 * case of obscure races, so no need to to validate
2273 if (!S_ISREG(inode->i_mode) &&
2274 !S_ISDIR(inode->i_mode))
2279 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2287 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2290 struct inode *inode = dentry->d_inode;
2291 struct ptlrpc_request *req = NULL;
2292 struct obd_export *exp;
2297 CERROR("REPORT THIS LINE TO PETER\n");
2301 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2302 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2304 exp = ll_i2mdexp(inode);
2306 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2307 * But under CMD case, it caused some lock issues, should be fixed
2308 * with new CMD ibits lock. See bug 12718 */
2309 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2310 struct lookup_intent oit = { .it_op = IT_GETATTR };
2311 struct md_op_data *op_data;
2313 if (ibits == MDS_INODELOCK_LOOKUP)
2314 oit.it_op = IT_LOOKUP;
2316 /* Call getattr by fid, so do not provide name at all. */
2317 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2318 dentry->d_inode, NULL, 0, 0,
2319 LUSTRE_OPC_ANY, NULL);
2320 if (IS_ERR(op_data))
2321 RETURN(PTR_ERR(op_data));
2323 oit.it_create_mode |= M_CHECK_STALE;
2324 rc = md_intent_lock(exp, op_data, NULL, 0,
2325 /* we are not interested in name
2328 ll_md_blocking_ast, 0);
2329 ll_finish_md_op_data(op_data);
2330 oit.it_create_mode &= ~M_CHECK_STALE;
2332 rc = ll_inode_revalidate_fini(inode, rc);
2336 rc = ll_revalidate_it_finish(req, &oit, dentry);
2338 ll_intent_release(&oit);
2342 /* Unlinked? Unhash dentry, so it is not picked up later by
2343 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2344 here to preserve get_cwd functionality on 2.6.
2346 if (!dentry->d_inode->i_nlink) {
2347 cfs_spin_lock(&ll_lookup_lock);
2348 spin_lock(&dcache_lock);
2349 ll_drop_dentry(dentry);
2350 spin_unlock(&dcache_lock);
2351 cfs_spin_unlock(&ll_lookup_lock);
2354 ll_lookup_finish_locks(&oit, dentry);
2355 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
2356 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2357 obd_valid valid = OBD_MD_FLGETATTR;
2358 struct md_op_data *op_data;
2361 if (S_ISREG(inode->i_mode)) {
2362 rc = ll_get_max_mdsize(sbi, &ealen);
2365 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2368 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2369 0, ealen, LUSTRE_OPC_ANY,
2371 if (IS_ERR(op_data))
2372 RETURN(PTR_ERR(op_data));
2374 op_data->op_valid = valid;
2375 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2376 * capa for this inode. Because we only keep capas of dirs
2378 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2379 ll_finish_md_op_data(op_data);
2381 rc = ll_inode_revalidate_fini(inode, rc);
2385 rc = ll_prep_inode(&inode, req, NULL);
2388 ptlrpc_req_finished(req);
2392 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2395 struct inode *inode = dentry->d_inode;
2399 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2401 /* if object not yet allocated, don't validate size */
2402 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
2403 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2404 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2405 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2409 /* ll_glimpse_size will prefer locally cached writes if they extend
2413 rc = ll_glimpse_size(inode);
2418 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2419 struct lookup_intent *it, struct kstat *stat)
2421 struct inode *inode = de->d_inode;
2422 struct ll_sb_info *sbi = ll_i2sbi(inode);
2423 struct ll_inode_info *lli = ll_i2info(inode);
2426 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
2427 MDS_INODELOCK_LOOKUP);
2428 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
2433 stat->dev = inode->i_sb->s_dev;
2434 if (ll_need_32bit_api(sbi))
2435 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
2437 stat->ino = inode->i_ino;
2438 stat->mode = inode->i_mode;
2439 stat->nlink = inode->i_nlink;
2440 stat->uid = inode->i_uid;
2441 stat->gid = inode->i_gid;
2442 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2443 stat->atime = inode->i_atime;
2444 stat->mtime = inode->i_mtime;
2445 stat->ctime = inode->i_ctime;
2446 #ifdef HAVE_INODE_BLKSIZE
2447 stat->blksize = inode->i_blksize;
2449 stat->blksize = 1 << inode->i_blkbits;
2452 stat->size = i_size_read(inode);
2453 stat->blocks = inode->i_blocks;
2457 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2459 struct lookup_intent it = { .it_op = IT_GETATTR };
2461 return ll_getattr_it(mnt, de, &it, stat);
2464 #ifdef HAVE_LINUX_FIEMAP_H
2465 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2466 __u64 start, __u64 len)
2470 struct ll_user_fiemap *fiemap;
2471 unsigned int extent_count = fieinfo->fi_extents_max;
2473 num_bytes = sizeof(*fiemap) + (extent_count *
2474 sizeof(struct ll_fiemap_extent));
2475 OBD_ALLOC_LARGE(fiemap, num_bytes);
2480 fiemap->fm_flags = fieinfo->fi_flags;
2481 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2482 fiemap->fm_start = start;
2483 fiemap->fm_length = len;
2484 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2485 sizeof(struct ll_fiemap_extent));
2487 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2489 fieinfo->fi_flags = fiemap->fm_flags;
2490 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2491 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2492 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2494 OBD_FREE_LARGE(fiemap, num_bytes);
2501 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
2502 lustre_check_acl(struct inode *inode, int mask, unsigned int flags)
2504 lustre_check_acl(struct inode *inode, int mask)
2507 #ifdef CONFIG_FS_POSIX_ACL
2508 struct ll_inode_info *lli = ll_i2info(inode);
2509 struct posix_acl *acl;
2513 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
2514 if (flags & IPERM_FLAG_RCU)
2517 cfs_spin_lock(&lli->lli_lock);
2518 acl = posix_acl_dup(lli->lli_posix_acl);
2519 cfs_spin_unlock(&lli->lli_lock);
2524 rc = posix_acl_permission(inode, acl, mask);
2525 posix_acl_release(acl);
2533 #ifdef HAVE_GENERIC_PERMISSION_4ARGS
2534 int ll_inode_permission(struct inode *inode, int mask, unsigned int flags)
2536 # ifdef HAVE_INODE_PERMISION_2ARGS
2537 int ll_inode_permission(struct inode *inode, int mask)
2539 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2546 /* as root inode are NOT getting validated in lookup operation,
2547 * need to do it before permission check. */
2549 if (inode == inode->i_sb->s_root->d_inode) {
2550 struct lookup_intent it = { .it_op = IT_LOOKUP };
2552 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2553 MDS_INODELOCK_LOOKUP);
2558 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2559 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2561 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2562 return lustre_check_remote_perm(inode, mask);
2564 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2565 rc = ll_generic_permission(inode, mask, flags, lustre_check_acl);
2570 #ifdef HAVE_FILE_READV
2571 #define READ_METHOD readv
2572 #define READ_FUNCTION ll_file_readv
2573 #define WRITE_METHOD writev
2574 #define WRITE_FUNCTION ll_file_writev
2576 #define READ_METHOD aio_read
2577 #define READ_FUNCTION ll_file_aio_read
2578 #define WRITE_METHOD aio_write
2579 #define WRITE_FUNCTION ll_file_aio_write
2582 /* -o localflock - only provides locally consistent flock locks */
2583 struct file_operations ll_file_operations = {
2584 .read = ll_file_read,
2585 .READ_METHOD = READ_FUNCTION,
2586 .write = ll_file_write,
2587 .WRITE_METHOD = WRITE_FUNCTION,
2588 #ifdef HAVE_UNLOCKED_IOCTL
2589 .unlocked_ioctl = ll_file_ioctl,
2591 .ioctl = ll_file_ioctl,
2593 .open = ll_file_open,
2594 .release = ll_file_release,
2595 .mmap = ll_file_mmap,
2596 .llseek = ll_file_seek,
2597 #ifdef HAVE_KERNEL_SENDFILE
2598 .sendfile = ll_file_sendfile,
2600 #ifdef HAVE_KERNEL_SPLICE_READ
2601 .splice_read = ll_file_splice_read,
2607 struct file_operations ll_file_operations_flock = {
2608 .read = ll_file_read,
2609 .READ_METHOD = READ_FUNCTION,
2610 .write = ll_file_write,
2611 .WRITE_METHOD = WRITE_FUNCTION,
2612 #ifdef HAVE_UNLOCKED_IOCTL
2613 .unlocked_ioctl = ll_file_ioctl,
2615 .ioctl = ll_file_ioctl,
2617 .open = ll_file_open,
2618 .release = ll_file_release,
2619 .mmap = ll_file_mmap,
2620 .llseek = ll_file_seek,
2621 #ifdef HAVE_KERNEL_SENDFILE
2622 .sendfile = ll_file_sendfile,
2624 #ifdef HAVE_KERNEL_SPLICE_READ
2625 .splice_read = ll_file_splice_read,
2629 .flock = ll_file_flock,
2630 .lock = ll_file_flock
2633 /* These are for -o noflock - to return ENOSYS on flock calls */
2634 struct file_operations ll_file_operations_noflock = {
2635 .read = ll_file_read,
2636 .READ_METHOD = READ_FUNCTION,
2637 .write = ll_file_write,
2638 .WRITE_METHOD = WRITE_FUNCTION,
2639 #ifdef HAVE_UNLOCKED_IOCTL
2640 .unlocked_ioctl = ll_file_ioctl,
2642 .ioctl = ll_file_ioctl,
2644 .open = ll_file_open,
2645 .release = ll_file_release,
2646 .mmap = ll_file_mmap,
2647 .llseek = ll_file_seek,
2648 #ifdef HAVE_KERNEL_SENDFILE
2649 .sendfile = ll_file_sendfile,
2651 #ifdef HAVE_KERNEL_SPLICE_READ
2652 .splice_read = ll_file_splice_read,
2656 .flock = ll_file_noflock,
2657 .lock = ll_file_noflock
2660 struct inode_operations ll_file_inode_operations = {
2661 .setattr = ll_setattr,
2662 .truncate = ll_truncate,
2663 .getattr = ll_getattr,
2664 .permission = ll_inode_permission,
2665 .setxattr = ll_setxattr,
2666 .getxattr = ll_getxattr,
2667 .listxattr = ll_listxattr,
2668 .removexattr = ll_removexattr,
2669 #ifdef HAVE_LINUX_FIEMAP_H
2670 .fiemap = ll_fiemap,
2674 /* dynamic ioctl number support routins */
2675 static struct llioc_ctl_data {
2676 cfs_rw_semaphore_t ioc_sem;
2677 cfs_list_t ioc_head;
2679 __RWSEM_INITIALIZER(llioc.ioc_sem),
2680 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2685 cfs_list_t iocd_list;
2686 unsigned int iocd_size;
2687 llioc_callback_t iocd_cb;
2688 unsigned int iocd_count;
2689 unsigned int iocd_cmd[0];
2692 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2695 struct llioc_data *in_data = NULL;
2698 if (cb == NULL || cmd == NULL ||
2699 count > LLIOC_MAX_CMD || count < 0)
2702 size = sizeof(*in_data) + count * sizeof(unsigned int);
2703 OBD_ALLOC(in_data, size);
2704 if (in_data == NULL)
2707 memset(in_data, 0, sizeof(*in_data));
2708 in_data->iocd_size = size;
2709 in_data->iocd_cb = cb;
2710 in_data->iocd_count = count;
2711 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2713 cfs_down_write(&llioc.ioc_sem);
2714 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2715 cfs_up_write(&llioc.ioc_sem);
2720 void ll_iocontrol_unregister(void *magic)
2722 struct llioc_data *tmp;
2727 cfs_down_write(&llioc.ioc_sem);
2728 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2730 unsigned int size = tmp->iocd_size;
2732 cfs_list_del(&tmp->iocd_list);
2733 cfs_up_write(&llioc.ioc_sem);
2735 OBD_FREE(tmp, size);
2739 cfs_up_write(&llioc.ioc_sem);
2741 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2744 EXPORT_SYMBOL(ll_iocontrol_register);
2745 EXPORT_SYMBOL(ll_iocontrol_unregister);
2747 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2748 unsigned int cmd, unsigned long arg, int *rcp)
2750 enum llioc_iter ret = LLIOC_CONT;
2751 struct llioc_data *data;
2752 int rc = -EINVAL, i;
2754 cfs_down_read(&llioc.ioc_sem);
2755 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2756 for (i = 0; i < data->iocd_count; i++) {
2757 if (cmd != data->iocd_cmd[i])
2760 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2764 if (ret == LLIOC_STOP)
2767 cfs_up_read(&llioc.ioc_sem);