1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(exp_connect_som(ll_i2mdexp(inode))) || !S_ISREG(inode->i_mode))
96 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
98 ll_epoch_close(inode, op_data, &och, 0);
101 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
105 static int ll_close_inode_openhandle(struct obd_export *md_exp,
107 struct obd_client_handle *och)
109 struct obd_export *exp = ll_i2mdexp(inode);
110 struct md_op_data *op_data;
111 struct ptlrpc_request *req = NULL;
112 struct obd_device *obd = class_exp2obd(exp);
119 * XXX: in case of LMV, is this correct to access
122 CERROR("Invalid MDC connection handle "LPX64"\n",
123 ll_i2mdexp(inode)->exp_handle.h_cookie);
128 * here we check if this is forced umount. If so this is called on
129 * canceling "open lock" and we do not call md_close() in this case, as
130 * it will not be successful, as import is already deactivated.
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_sizeonmds_update(inode, &och->och_fh,
148 op_data->op_ioepoch);
150 CERROR("inode %lu mdc Size-on-MDS update failed: "
151 "rc = %d\n", inode->i_ino, rc);
155 CERROR("inode %lu mdc close failed: rc = %d\n",
158 ll_finish_md_op_data(op_data);
161 rc = ll_objects_destroy(req, inode);
163 CERROR("inode %lu ll_objects destroy: rc = %d\n",
170 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
171 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
172 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
174 md_clear_open_replay_data(md_exp, och);
175 /* Free @och if it is not waiting for DONE_WRITING. */
176 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
179 if (req) /* This is close request */
180 ptlrpc_req_finished(req);
184 int ll_md_real_close(struct inode *inode, int flags)
186 struct ll_inode_info *lli = ll_i2info(inode);
187 struct obd_client_handle **och_p;
188 struct obd_client_handle *och;
193 if (flags & FMODE_WRITE) {
194 och_p = &lli->lli_mds_write_och;
195 och_usecount = &lli->lli_open_fd_write_count;
196 } else if (flags & FMODE_EXEC) {
197 och_p = &lli->lli_mds_exec_och;
198 och_usecount = &lli->lli_open_fd_exec_count;
200 LASSERT(flags & FMODE_READ);
201 och_p = &lli->lli_mds_read_och;
202 och_usecount = &lli->lli_open_fd_read_count;
205 down(&lli->lli_och_sem);
206 if (*och_usecount) { /* There are still users of this handle, so
208 up(&lli->lli_och_sem);
213 up(&lli->lli_och_sem);
215 if (och) { /* There might be a race and somebody have freed this och
217 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
224 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
227 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
228 struct ll_inode_info *lli = ll_i2info(inode);
232 /* clear group lock, if present */
233 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
234 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
236 /* Let's see if we have good enough OPEN lock on the file and if
237 we can skip talking to MDS */
238 if (file->f_dentry->d_inode) { /* Can this ever be false? */
240 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
241 struct lustre_handle lockh;
242 struct inode *inode = file->f_dentry->d_inode;
243 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
245 down(&lli->lli_och_sem);
246 if (fd->fd_omode & FMODE_WRITE) {
248 LASSERT(lli->lli_open_fd_write_count);
249 lli->lli_open_fd_write_count--;
250 } else if (fd->fd_omode & FMODE_EXEC) {
252 LASSERT(lli->lli_open_fd_exec_count);
253 lli->lli_open_fd_exec_count--;
256 LASSERT(lli->lli_open_fd_read_count);
257 lli->lli_open_fd_read_count--;
259 up(&lli->lli_och_sem);
261 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
262 LDLM_IBITS, &policy, lockmode,
264 rc = ll_md_real_close(file->f_dentry->d_inode,
268 CERROR("Releasing a file %p with negative dentry %p. Name %s",
269 file, file->f_dentry, file->f_dentry->d_name.name);
272 LUSTRE_FPRIVATE(file) = NULL;
273 ll_file_data_put(fd);
274 ll_capa_close(inode);
279 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
281 /* While this returns an error code, fput() the caller does not, so we need
282 * to make every effort to clean up all of our state here. Also, applications
283 * rarely check close errors and even if an error is returned they will not
284 * re-try the close call.
286 int ll_file_release(struct inode *inode, struct file *file)
288 struct ll_file_data *fd;
289 struct ll_sb_info *sbi = ll_i2sbi(inode);
290 struct ll_inode_info *lli = ll_i2info(inode);
291 struct lov_stripe_md *lsm = lli->lli_smd;
295 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
296 inode->i_generation, inode);
298 #ifdef CONFIG_FS_POSIX_ACL
299 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
300 inode == inode->i_sb->s_root->d_inode) {
301 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
304 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
305 fd->fd_flags &= ~LL_FILE_RMTACL;
306 rct_del(&sbi->ll_rct, cfs_curproc_pid());
307 et_search_free(&sbi->ll_et, cfs_curproc_pid());
312 if (inode->i_sb->s_root != file->f_dentry)
313 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
314 fd = LUSTRE_FPRIVATE(file);
317 /* The last ref on @file, maybe not the the owner pid of statahead.
318 * Different processes can open the same dir, "ll_opendir_key" means:
319 * it is me that should stop the statahead thread. */
320 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
321 ll_stop_statahead(inode, lli->lli_opendir_key);
323 if (inode->i_sb->s_root == file->f_dentry) {
324 LUSTRE_FPRIVATE(file) = NULL;
325 ll_file_data_put(fd);
330 lov_test_and_clear_async_rc(lsm);
331 lli->lli_async_rc = 0;
333 rc = ll_md_close(sbi->ll_md_exp, inode, file);
335 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
336 libcfs_debug_dumplog();
341 static int ll_intent_file_open(struct file *file, void *lmm,
342 int lmmsize, struct lookup_intent *itp)
344 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
345 struct dentry *parent = file->f_dentry->d_parent;
346 const char *name = file->f_dentry->d_name.name;
347 const int len = file->f_dentry->d_name.len;
348 struct md_op_data *op_data;
349 struct ptlrpc_request *req;
356 /* Usually we come here only for NFSD, and we want open lock.
357 But we can also get here with pre 2.6.15 patchless kernels, and in
358 that case that lock is also ok */
359 /* We can also get here if there was cached open handle in revalidate_it
360 * but it disappeared while we were getting from there to ll_file_open.
361 * But this means this file was closed and immediatelly opened which
362 * makes a good candidate for using OPEN lock */
363 /* If lmmsize & lmm are not 0, we are just setting stripe info
364 * parameters. No need for the open lock */
365 if (!lmm && !lmmsize)
366 itp->it_flags |= MDS_OPEN_LOCK;
368 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
369 file->f_dentry->d_inode, name, len,
370 O_RDWR, LUSTRE_OPC_ANY, NULL);
372 RETURN(PTR_ERR(op_data));
374 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
375 0 /*unused */, &req, ll_md_blocking_ast, 0);
376 ll_finish_md_op_data(op_data);
378 /* reason for keep own exit path - don`t flood log
379 * with messages with -ESTALE errors.
381 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
382 it_open_error(DISP_OPEN_OPEN, itp))
384 ll_release_openhandle(file->f_dentry, itp);
388 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
389 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
390 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
394 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
395 if (!rc && itp->d.lustre.it_lock_mode)
396 md_set_lock_data(sbi->ll_md_exp,
397 &itp->d.lustre.it_lock_handle,
398 file->f_dentry->d_inode, NULL);
401 ptlrpc_req_finished(itp->d.lustre.it_data);
402 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
403 ll_intent_drop_lock(itp);
408 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
410 if (ioepoch && lli->lli_ioepoch != ioepoch) {
411 lli->lli_ioepoch = ioepoch;
412 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
413 ioepoch, PFID(&lli->lli_fid));
417 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
418 struct lookup_intent *it, struct obd_client_handle *och)
420 struct ptlrpc_request *req = it->d.lustre.it_data;
421 struct mdt_body *body;
425 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
426 LASSERT(body != NULL); /* reply already checked out */
428 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
429 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
430 och->och_fid = lli->lli_fid;
431 och->och_flags = it->it_flags;
432 ll_ioepoch_open(lli, body->ioepoch);
434 return md_set_open_replay_data(md_exp, och, req);
437 int ll_local_open(struct file *file, struct lookup_intent *it,
438 struct ll_file_data *fd, struct obd_client_handle *och)
440 struct inode *inode = file->f_dentry->d_inode;
441 struct ll_inode_info *lli = ll_i2info(inode);
444 LASSERT(!LUSTRE_FPRIVATE(file));
449 struct ptlrpc_request *req = it->d.lustre.it_data;
450 struct mdt_body *body;
453 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
457 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
458 if ((it->it_flags & FMODE_WRITE) &&
459 (body->valid & OBD_MD_FLSIZE))
460 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
461 lli->lli_ioepoch, PFID(&lli->lli_fid));
464 LUSTRE_FPRIVATE(file) = fd;
465 ll_readahead_init(inode, &fd->fd_ras);
466 fd->fd_omode = it->it_flags;
470 /* Open a file, and (for the very first open) create objects on the OSTs at
471 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
472 * creation or open until ll_lov_setstripe() ioctl is called. We grab
473 * lli_open_sem to ensure no other process will create objects, send the
474 * stripe MD to the MDS, or try to destroy the objects if that fails.
476 * If we already have the stripe MD locally then we don't request it in
477 * md_open(), by passing a lmm_size = 0.
479 * It is up to the application to ensure no other processes open this file
480 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
481 * used. We might be able to avoid races of that sort by getting lli_open_sem
482 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
483 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
485 int ll_file_open(struct inode *inode, struct file *file)
487 struct ll_inode_info *lli = ll_i2info(inode);
488 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
489 .it_flags = file->f_flags };
490 struct lov_stripe_md *lsm;
491 struct ptlrpc_request *req = NULL;
492 struct obd_client_handle **och_p;
494 struct ll_file_data *fd;
495 int rc = 0, opendir_set = 0;
498 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
499 inode->i_generation, inode, file->f_flags);
501 #ifdef HAVE_VFS_INTENT_PATCHES
504 it = file->private_data; /* XXX: compat macro */
505 file->private_data = NULL; /* prevent ll_local_open assertion */
508 fd = ll_file_data_get();
513 if (S_ISDIR(inode->i_mode)) {
514 spin_lock(&lli->lli_lock);
515 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
516 LASSERT(lli->lli_sai == NULL);
517 lli->lli_opendir_key = fd;
518 lli->lli_opendir_pid = cfs_curproc_pid();
521 spin_unlock(&lli->lli_lock);
524 if (inode->i_sb->s_root == file->f_dentry) {
525 LUSTRE_FPRIVATE(file) = fd;
529 if (!it || !it->d.lustre.it_disposition) {
530 /* Convert f_flags into access mode. We cannot use file->f_mode,
531 * because everything but O_ACCMODE mask was stripped from
533 if ((oit.it_flags + 1) & O_ACCMODE)
535 if (file->f_flags & O_TRUNC)
536 oit.it_flags |= FMODE_WRITE;
538 /* kernel only call f_op->open in dentry_open. filp_open calls
539 * dentry_open after call to open_namei that checks permissions.
540 * Only nfsd_open call dentry_open directly without checking
541 * permissions and because of that this code below is safe. */
542 if (oit.it_flags & FMODE_WRITE)
543 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
545 /* We do not want O_EXCL here, presumably we opened the file
546 * already? XXX - NFS implications? */
547 oit.it_flags &= ~O_EXCL;
549 /* bug20584, if "it_flags" contains O_CREAT, the file will be
550 * created if necessary, then "IT_CREAT" should be set to keep
551 * consistent with it */
552 if (oit.it_flags & O_CREAT)
553 oit.it_op |= IT_CREAT;
559 /* Let's see if we have file open on MDS already. */
560 if (it->it_flags & FMODE_WRITE) {
561 och_p = &lli->lli_mds_write_och;
562 och_usecount = &lli->lli_open_fd_write_count;
563 } else if (it->it_flags & FMODE_EXEC) {
564 och_p = &lli->lli_mds_exec_och;
565 och_usecount = &lli->lli_open_fd_exec_count;
567 och_p = &lli->lli_mds_read_och;
568 och_usecount = &lli->lli_open_fd_read_count;
571 down(&lli->lli_och_sem);
572 if (*och_p) { /* Open handle is present */
573 if (it_disposition(it, DISP_OPEN_OPEN)) {
574 /* Well, there's extra open request that we do not need,
575 let's close it somehow. This will decref request. */
576 rc = it_open_error(DISP_OPEN_OPEN, it);
578 up(&lli->lli_och_sem);
579 ll_file_data_put(fd);
580 GOTO(out_openerr, rc);
582 ll_release_openhandle(file->f_dentry, it);
583 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
588 rc = ll_local_open(file, it, fd, NULL);
591 up(&lli->lli_och_sem);
592 ll_file_data_put(fd);
593 GOTO(out_openerr, rc);
596 LASSERT(*och_usecount == 0);
597 if (!it->d.lustre.it_disposition) {
598 /* We cannot just request lock handle now, new ELC code
599 means that one of other OPEN locks for this file
600 could be cancelled, and since blocking ast handler
601 would attempt to grab och_sem as well, that would
602 result in a deadlock */
603 up(&lli->lli_och_sem);
604 it->it_create_mode |= M_CHECK_STALE;
605 rc = ll_intent_file_open(file, NULL, 0, it);
606 it->it_create_mode &= ~M_CHECK_STALE;
608 ll_file_data_put(fd);
609 GOTO(out_openerr, rc);
612 /* Got some error? Release the request */
613 if (it->d.lustre.it_status < 0) {
614 req = it->d.lustre.it_data;
615 ptlrpc_req_finished(req);
619 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
621 ll_file_data_put(fd);
622 GOTO(out_och_free, rc = -ENOMEM);
625 req = it->d.lustre.it_data;
627 /* md_intent_lock() didn't get a request ref if there was an
628 * open error, so don't do cleanup on the request here
630 /* XXX (green): Should not we bail out on any error here, not
631 * just open error? */
632 rc = it_open_error(DISP_OPEN_OPEN, it);
634 ll_file_data_put(fd);
635 GOTO(out_och_free, rc);
638 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
639 rc = ll_local_open(file, it, fd, *och_p);
641 ll_file_data_put(fd);
642 GOTO(out_och_free, rc);
645 up(&lli->lli_och_sem);
647 /* Must do this outside lli_och_sem lock to prevent deadlock where
648 different kind of OPEN lock for this same inode gets cancelled
649 by ldlm_cancel_lru */
650 if (!S_ISREG(inode->i_mode))
657 if (file->f_flags & O_LOV_DELAY_CREATE ||
658 !(file->f_mode & FMODE_WRITE)) {
659 CDEBUG(D_INODE, "object creation was delayed\n");
663 file->f_flags &= ~O_LOV_DELAY_CREATE;
666 ptlrpc_req_finished(req);
668 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
672 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
673 *och_p = NULL; /* OBD_FREE writes some magic there */
676 up(&lli->lli_och_sem);
678 if (opendir_set != 0)
679 ll_stop_statahead(inode, lli->lli_opendir_key);
685 /* Fills the obdo with the attributes for the lsm */
686 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
687 struct obd_capa *capa, struct obdo *obdo)
689 struct ptlrpc_request_set *set;
690 struct obd_info oinfo = { { { 0 } } };
695 LASSERT(lsm != NULL);
699 oinfo.oi_oa->o_id = lsm->lsm_object_id;
700 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
701 oinfo.oi_oa->o_mode = S_IFREG;
702 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
703 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
704 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
705 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
707 oinfo.oi_capa = capa;
709 set = ptlrpc_prep_set();
711 CERROR("can't allocate ptlrpc set\n");
714 rc = obd_getattr_async(exp, &oinfo, set);
716 rc = ptlrpc_set_wait(set);
717 ptlrpc_set_destroy(set);
720 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
721 OBD_MD_FLATIME | OBD_MD_FLMTIME |
722 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
726 /* Fills the obdo with the attributes for the inode defined by lsm */
727 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
729 struct ll_inode_info *lli = ll_i2info(inode);
730 struct obd_capa *capa = ll_mdscapa_get(inode);
734 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
737 obdo_refresh_inode(inode, obdo, obdo->o_valid);
739 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
740 lli->lli_smd->lsm_object_id, i_size_read(inode),
741 (unsigned long long)inode->i_blocks,
742 (unsigned long)ll_inode_blksize(inode));
747 int ll_merge_lvb(struct inode *inode)
749 struct ll_inode_info *lli = ll_i2info(inode);
750 struct ll_sb_info *sbi = ll_i2sbi(inode);
756 ll_inode_size_lock(inode, 1);
757 inode_init_lvb(inode, &lvb);
758 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
759 i_size_write(inode, lvb.lvb_size);
760 inode->i_blocks = lvb.lvb_blocks;
762 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
763 LTIME_S(inode->i_atime) = lvb.lvb_atime;
764 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
765 ll_inode_size_unlock(inode, 1);
770 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
773 struct obdo obdo = { 0 };
776 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
778 st->st_size = obdo.o_size;
779 st->st_blocks = obdo.o_blocks;
780 st->st_mtime = obdo.o_mtime;
781 st->st_atime = obdo.o_atime;
782 st->st_ctime = obdo.o_ctime;
787 void ll_io_init(struct cl_io *io, const struct file *file, int write)
789 struct inode *inode = file->f_dentry->d_inode;
791 memset(io, 0, sizeof *io);
792 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
794 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
795 io->ci_obj = ll_i2info(inode)->lli_clob;
796 io->ci_lockreq = CILR_MAYBE;
797 if (ll_file_nolock(file)) {
798 io->ci_lockreq = CILR_NEVER;
799 io->ci_no_srvlock = 1;
800 } else if (file->f_flags & O_APPEND) {
801 io->ci_lockreq = CILR_MANDATORY;
805 static ssize_t ll_file_io_generic(const struct lu_env *env,
806 struct vvp_io_args *args, struct file *file,
807 enum cl_io_type iot, loff_t *ppos, size_t count)
813 io = &ccc_env_info(env)->cti_io;
814 ll_io_init(io, file, iot == CIT_WRITE);
816 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
817 struct vvp_io *vio = vvp_env_io(env);
818 struct ccc_io *cio = ccc_env_io(env);
819 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
820 int write_sem_locked = 0;
822 cio->cui_fd = LUSTRE_FPRIVATE(file);
823 vio->cui_io_subtype = args->via_io_subtype;
825 switch (vio->cui_io_subtype) {
827 cio->cui_iov = args->u.normal.via_iov;
828 cio->cui_nrsegs = args->u.normal.via_nrsegs;
829 #ifndef HAVE_FILE_WRITEV
830 cio->cui_iocb = args->u.normal.via_iocb;
832 if ((iot == CIT_WRITE) &&
833 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
834 down(&lli->lli_write_sem);
835 write_sem_locked = 1;
839 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
840 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
843 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
844 vio->u.splice.cui_flags = args->u.splice.via_flags;
847 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
850 result = cl_io_loop(env, io);
851 if (write_sem_locked)
852 up(&lli->lli_write_sem);
854 /* cl_io_rw_init() handled IO */
855 result = io->ci_result;
858 if (io->ci_nob > 0) {
860 *ppos = io->u.ci_wr.wr.crw_pos;
868 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
870 static int ll_file_get_iov_count(const struct iovec *iov,
871 unsigned long *nr_segs, size_t *count)
876 for (seg = 0; seg < *nr_segs; seg++) {
877 const struct iovec *iv = &iov[seg];
880 * If any segment has a negative length, or the cumulative
881 * length ever wraps negative then return -EINVAL.
884 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
886 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
891 cnt -= iv->iov_len; /* This segment is no good */
898 #ifdef HAVE_FILE_READV
899 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
900 unsigned long nr_segs, loff_t *ppos)
903 struct vvp_io_args *args;
909 result = ll_file_get_iov_count(iov, &nr_segs, &count);
913 env = cl_env_get(&refcheck);
915 RETURN(PTR_ERR(env));
917 args = vvp_env_args(env, IO_NORMAL);
918 args->u.normal.via_iov = (struct iovec *)iov;
919 args->u.normal.via_nrsegs = nr_segs;
921 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
922 cl_env_put(env, &refcheck);
926 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
930 struct iovec *local_iov;
935 env = cl_env_get(&refcheck);
937 RETURN(PTR_ERR(env));
939 local_iov = &vvp_env_info(env)->vti_local_iov;
940 local_iov->iov_base = (void __user *)buf;
941 local_iov->iov_len = count;
942 result = ll_file_readv(file, local_iov, 1, ppos);
943 cl_env_put(env, &refcheck);
948 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
949 unsigned long nr_segs, loff_t pos)
952 struct vvp_io_args *args;
958 result = ll_file_get_iov_count(iov, &nr_segs, &count);
962 env = cl_env_get(&refcheck);
964 RETURN(PTR_ERR(env));
966 args = vvp_env_args(env, IO_NORMAL);
967 args->u.normal.via_iov = (struct iovec *)iov;
968 args->u.normal.via_nrsegs = nr_segs;
969 args->u.normal.via_iocb = iocb;
971 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
972 &iocb->ki_pos, count);
973 cl_env_put(env, &refcheck);
977 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
981 struct iovec *local_iov;
987 env = cl_env_get(&refcheck);
989 RETURN(PTR_ERR(env));
991 local_iov = &vvp_env_info(env)->vti_local_iov;
992 kiocb = &vvp_env_info(env)->vti_kiocb;
993 local_iov->iov_base = (void __user *)buf;
994 local_iov->iov_len = count;
995 init_sync_kiocb(kiocb, file);
996 kiocb->ki_pos = *ppos;
997 kiocb->ki_left = count;
999 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1000 *ppos = kiocb->ki_pos;
1002 cl_env_put(env, &refcheck);
1008 * Write to a file (through the page cache).
1010 #ifdef HAVE_FILE_WRITEV
1011 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1012 unsigned long nr_segs, loff_t *ppos)
1015 struct vvp_io_args *args;
1021 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1025 env = cl_env_get(&refcheck);
1027 RETURN(PTR_ERR(env));
1029 args = vvp_env_args(env, IO_NORMAL);
1030 args->u.normal.via_iov = (struct iovec *)iov;
1031 args->u.normal.via_nrsegs = nr_segs;
1033 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1034 cl_env_put(env, &refcheck);
1038 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1042 struct iovec *local_iov;
1047 env = cl_env_get(&refcheck);
1049 RETURN(PTR_ERR(env));
1051 local_iov = &vvp_env_info(env)->vti_local_iov;
1052 local_iov->iov_base = (void __user *)buf;
1053 local_iov->iov_len = count;
1055 result = ll_file_writev(file, local_iov, 1, ppos);
1056 cl_env_put(env, &refcheck);
1060 #else /* AIO stuff */
1061 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1062 unsigned long nr_segs, loff_t pos)
1065 struct vvp_io_args *args;
1071 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1075 env = cl_env_get(&refcheck);
1077 RETURN(PTR_ERR(env));
1079 args = vvp_env_args(env, IO_NORMAL);
1080 args->u.normal.via_iov = (struct iovec *)iov;
1081 args->u.normal.via_nrsegs = nr_segs;
1082 args->u.normal.via_iocb = iocb;
1084 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1085 &iocb->ki_pos, count);
1086 cl_env_put(env, &refcheck);
1090 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1094 struct iovec *local_iov;
1095 struct kiocb *kiocb;
1100 env = cl_env_get(&refcheck);
1102 RETURN(PTR_ERR(env));
1104 local_iov = &vvp_env_info(env)->vti_local_iov;
1105 kiocb = &vvp_env_info(env)->vti_kiocb;
1106 local_iov->iov_base = (void __user *)buf;
1107 local_iov->iov_len = count;
1108 init_sync_kiocb(kiocb, file);
1109 kiocb->ki_pos = *ppos;
1110 kiocb->ki_left = count;
1112 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1113 *ppos = kiocb->ki_pos;
1115 cl_env_put(env, &refcheck);
1121 #ifdef HAVE_KERNEL_SENDFILE
1123 * Send file content (through pagecache) somewhere with helper
1125 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1126 read_actor_t actor, void *target)
1129 struct vvp_io_args *args;
1134 env = cl_env_get(&refcheck);
1136 RETURN(PTR_ERR(env));
1138 args = vvp_env_args(env, IO_SENDFILE);
1139 args->u.sendfile.via_target = target;
1140 args->u.sendfile.via_actor = actor;
1142 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1143 cl_env_put(env, &refcheck);
1148 #ifdef HAVE_KERNEL_SPLICE_READ
1150 * Send file content (through pagecache) somewhere with helper
1152 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1153 struct pipe_inode_info *pipe, size_t count,
1157 struct vvp_io_args *args;
1162 env = cl_env_get(&refcheck);
1164 RETURN(PTR_ERR(env));
1166 args = vvp_env_args(env, IO_SPLICE);
1167 args->u.splice.via_pipe = pipe;
1168 args->u.splice.via_flags = flags;
1170 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1171 cl_env_put(env, &refcheck);
1176 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1179 struct obd_export *exp = ll_i2dtexp(inode);
1180 struct ll_recreate_obj ucreatp;
1181 struct obd_trans_info oti = { 0 };
1182 struct obdo *oa = NULL;
1185 struct lov_stripe_md *lsm, *lsm2;
1188 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1191 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1192 sizeof(struct ll_recreate_obj)))
1199 ll_inode_size_lock(inode, 0);
1200 lsm = ll_i2info(inode)->lli_smd;
1202 GOTO(out, rc = -ENOENT);
1203 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1204 (lsm->lsm_stripe_count));
1206 OBD_ALLOC(lsm2, lsm_size);
1208 GOTO(out, rc = -ENOMEM);
1210 oa->o_id = ucreatp.lrc_id;
1211 oa->o_gr = ucreatp.lrc_group;
1212 oa->o_nlink = ucreatp.lrc_ost_idx;
1213 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1214 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1215 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1216 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1218 memcpy(lsm2, lsm, lsm_size);
1219 rc = obd_create(exp, oa, &lsm2, &oti);
1221 OBD_FREE(lsm2, lsm_size);
1224 ll_inode_size_unlock(inode, 0);
1229 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1230 int flags, struct lov_user_md *lum, int lum_size)
1232 struct lov_stripe_md *lsm;
1233 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1237 ll_inode_size_lock(inode, 0);
1238 lsm = ll_i2info(inode)->lli_smd;
1240 ll_inode_size_unlock(inode, 0);
1241 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1246 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1249 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1250 GOTO(out_req_free, rc = -ENOENT);
1251 rc = oit.d.lustre.it_status;
1253 GOTO(out_req_free, rc);
1255 ll_release_openhandle(file->f_dentry, &oit);
1258 ll_inode_size_unlock(inode, 0);
1259 ll_intent_release(&oit);
1262 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1266 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1267 struct lov_mds_md **lmmp, int *lmm_size,
1268 struct ptlrpc_request **request)
1270 struct ll_sb_info *sbi = ll_i2sbi(inode);
1271 struct mdt_body *body;
1272 struct lov_mds_md *lmm = NULL;
1273 struct ptlrpc_request *req = NULL;
1274 struct obd_capa *oc;
1277 rc = ll_get_max_mdsize(sbi, &lmmsize);
1281 oc = ll_mdscapa_get(inode);
1282 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1283 oc, filename, strlen(filename) + 1,
1284 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1285 ll_i2suppgid(inode), &req);
1288 CDEBUG(D_INFO, "md_getattr_name failed "
1289 "on %s: rc %d\n", filename, rc);
1293 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1294 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1296 lmmsize = body->eadatasize;
1298 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1300 GOTO(out, rc = -ENODATA);
1303 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1304 LASSERT(lmm != NULL);
1306 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1307 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1308 GOTO(out, rc = -EPROTO);
1312 * This is coming from the MDS, so is probably in
1313 * little endian. We convert it to host endian before
1314 * passing it to userspace.
1316 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1317 /* if function called for directory - we should
1318 * avoid swab not existent lsm objects */
1319 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1320 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1321 if (S_ISREG(body->mode))
1322 lustre_swab_lov_user_md_objects(
1323 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1324 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1325 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1326 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1327 if (S_ISREG(body->mode))
1328 lustre_swab_lov_user_md_objects(
1329 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1330 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1336 *lmm_size = lmmsize;
1341 static int ll_lov_setea(struct inode *inode, struct file *file,
1344 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1345 struct lov_user_md *lump;
1346 int lum_size = sizeof(struct lov_user_md) +
1347 sizeof(struct lov_user_ost_data);
1351 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1354 OBD_ALLOC(lump, lum_size);
1358 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1359 OBD_FREE(lump, lum_size);
1363 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1365 OBD_FREE(lump, lum_size);
1369 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1372 struct lov_user_md_v3 lumv3;
1373 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1374 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1375 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1378 int flags = FMODE_WRITE;
1381 /* first try with v1 which is smaller than v3 */
1382 lum_size = sizeof(struct lov_user_md_v1);
1383 if (copy_from_user(lumv1, lumv1p, lum_size))
1386 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1387 lum_size = sizeof(struct lov_user_md_v3);
1388 if (copy_from_user(&lumv3, lumv3p, lum_size))
1392 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1394 put_user(0, &lumv1p->lmm_stripe_count);
1395 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1396 0, ll_i2info(inode)->lli_smd,
1402 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1404 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1409 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1413 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1415 struct ll_inode_info *lli = ll_i2info(inode);
1416 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1417 struct ccc_grouplock grouplock;
1421 if (ll_file_nolock(file))
1422 RETURN(-EOPNOTSUPP);
1424 spin_lock(&lli->lli_lock);
1425 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1426 CWARN("group lock already existed with gid %lu\n",
1427 fd->fd_grouplock.cg_gid);
1428 spin_unlock(&lli->lli_lock);
1431 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1432 spin_unlock(&lli->lli_lock);
1434 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1435 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1439 spin_lock(&lli->lli_lock);
1440 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1441 spin_unlock(&lli->lli_lock);
1442 CERROR("another thread just won the race\n");
1443 cl_put_grouplock(&grouplock);
1447 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1448 fd->fd_grouplock = grouplock;
1449 spin_unlock(&lli->lli_lock);
1451 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1455 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1457 struct ll_inode_info *lli = ll_i2info(inode);
1458 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1459 struct ccc_grouplock grouplock;
1462 spin_lock(&lli->lli_lock);
1463 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1464 spin_unlock(&lli->lli_lock);
1465 CWARN("no group lock held\n");
1468 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1470 if (fd->fd_grouplock.cg_gid != arg) {
1471 CWARN("group lock %lu doesn't match current id %lu\n",
1472 arg, fd->fd_grouplock.cg_gid);
1473 spin_unlock(&lli->lli_lock);
1477 grouplock = fd->fd_grouplock;
1478 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1479 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1480 spin_unlock(&lli->lli_lock);
1482 cl_put_grouplock(&grouplock);
1483 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1488 * Close inode open handle
1490 * \param dentry [in] dentry which contains the inode
1491 * \param it [in,out] intent which contains open info and result
1494 * \retval <0 failure
1496 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1498 struct inode *inode = dentry->d_inode;
1499 struct obd_client_handle *och;
1505 /* Root ? Do nothing. */
1506 if (dentry->d_inode->i_sb->s_root == dentry)
1509 /* No open handle to close? Move away */
1510 if (!it_disposition(it, DISP_OPEN_OPEN))
1513 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1515 OBD_ALLOC(och, sizeof(*och));
1517 GOTO(out, rc = -ENOMEM);
1519 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1520 ll_i2info(inode), it, och);
1522 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1525 /* this one is in place of ll_file_open */
1526 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1527 ptlrpc_req_finished(it->d.lustre.it_data);
1528 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1533 * Get size for inode for which FIEMAP mapping is requested.
1534 * Make the FIEMAP get_info call and returns the result.
1536 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1539 struct obd_export *exp = ll_i2dtexp(inode);
1540 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1541 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1542 int vallen = num_bytes;
1546 /* If the stripe_count > 1 and the application does not understand
1547 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1549 if (lsm->lsm_stripe_count > 1 &&
1550 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1553 fm_key.oa.o_id = lsm->lsm_object_id;
1554 fm_key.oa.o_gr = lsm->lsm_object_gr;
1555 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1557 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1560 /* If filesize is 0, then there would be no objects for mapping */
1561 if (fm_key.oa.o_size == 0) {
1562 fiemap->fm_mapped_extents = 0;
1566 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1568 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1570 CERROR("obd_get_info failed: rc = %d\n", rc);
1575 int ll_fid2path(struct obd_export *exp, void *arg)
1577 struct getinfo_fid2path *gfout, *gfin;
1581 /* Need to get the buflen */
1582 OBD_ALLOC_PTR(gfin);
1585 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1590 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1591 OBD_ALLOC(gfout, outsize);
1592 if (gfout == NULL) {
1596 memcpy(gfout, gfin, sizeof(*gfout));
1599 /* Call mdc_iocontrol */
1600 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1603 if (copy_to_user(arg, gfout, outsize))
1607 OBD_FREE(gfout, outsize);
1611 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1614 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1618 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1619 inode->i_generation, inode, cmd);
1620 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1622 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1623 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1627 case LL_IOC_GETFLAGS:
1628 /* Get the current value of the file flags */
1629 return put_user(fd->fd_flags, (int *)arg);
1630 case LL_IOC_SETFLAGS:
1631 case LL_IOC_CLRFLAGS:
1632 /* Set or clear specific file flags */
1633 /* XXX This probably needs checks to ensure the flags are
1634 * not abused, and to handle any flag side effects.
1636 if (get_user(flags, (int *) arg))
1639 if (cmd == LL_IOC_SETFLAGS) {
1640 if ((flags & LL_FILE_IGNORE_LOCK) &&
1641 !(file->f_flags & O_DIRECT)) {
1642 CERROR("%s: unable to disable locking on "
1643 "non-O_DIRECT file\n", current->comm);
1647 fd->fd_flags |= flags;
1649 fd->fd_flags &= ~flags;
1652 case LL_IOC_LOV_SETSTRIPE:
1653 RETURN(ll_lov_setstripe(inode, file, arg));
1654 case LL_IOC_LOV_SETEA:
1655 RETURN(ll_lov_setea(inode, file, arg));
1656 case LL_IOC_LOV_GETSTRIPE:
1657 RETURN(ll_lov_getstripe(inode, arg));
1658 case LL_IOC_RECREATE_OBJ:
1659 RETURN(ll_lov_recreate_obj(inode, file, arg));
1660 case FSFILT_IOC_FIEMAP: {
1661 struct ll_user_fiemap *fiemap_s;
1662 size_t num_bytes, ret_bytes;
1663 unsigned int extent_count;
1666 /* Get the extent count so we can calculate the size of
1667 * required fiemap buffer */
1668 if (get_user(extent_count,
1669 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1671 num_bytes = sizeof(*fiemap_s) + (extent_count *
1672 sizeof(struct ll_fiemap_extent));
1673 OBD_VMALLOC(fiemap_s, num_bytes);
1674 if (fiemap_s == NULL)
1677 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1679 GOTO(error, rc = -EFAULT);
1681 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1682 fiemap_s->fm_flags = fiemap_s->fm_flags &
1683 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1684 if (copy_to_user((char *)arg, fiemap_s,
1686 GOTO(error, rc = -EFAULT);
1688 GOTO(error, rc = -EBADR);
1691 /* If fm_extent_count is non-zero, read the first extent since
1692 * it is used to calculate end_offset and device from previous
1695 if (copy_from_user(&fiemap_s->fm_extents[0],
1696 (char __user *)arg + sizeof(*fiemap_s),
1697 sizeof(struct ll_fiemap_extent)))
1698 GOTO(error, rc = -EFAULT);
1701 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1704 rc = filemap_fdatawrite(inode->i_mapping);
1709 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1713 ret_bytes = sizeof(struct ll_user_fiemap);
1715 if (extent_count != 0)
1716 ret_bytes += (fiemap_s->fm_mapped_extents *
1717 sizeof(struct ll_fiemap_extent));
1719 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1723 OBD_VFREE(fiemap_s, num_bytes);
1726 case FSFILT_IOC_GETFLAGS:
1727 case FSFILT_IOC_SETFLAGS:
1728 RETURN(ll_iocontrol(inode, file, cmd, arg));
1729 case FSFILT_IOC_GETVERSION_OLD:
1730 case FSFILT_IOC_GETVERSION:
1731 RETURN(put_user(inode->i_generation, (int *)arg));
1732 case LL_IOC_GROUP_LOCK:
1733 RETURN(ll_get_grouplock(inode, file, arg));
1734 case LL_IOC_GROUP_UNLOCK:
1735 RETURN(ll_put_grouplock(inode, file, arg));
1736 case IOC_OBD_STATFS:
1737 RETURN(ll_obd_statfs(inode, (void *)arg));
1739 /* We need to special case any other ioctls we want to handle,
1740 * to send them to the MDS/OST as appropriate and to properly
1741 * network encode the arg field.
1742 case FSFILT_IOC_SETVERSION_OLD:
1743 case FSFILT_IOC_SETVERSION:
1745 case LL_IOC_FLUSHCTX:
1746 RETURN(ll_flush_ctx(inode));
1747 case LL_IOC_PATH2FID: {
1748 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1749 sizeof(struct lu_fid)))
1754 case OBD_IOC_FID2PATH:
1755 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1761 ll_iocontrol_call(inode, file, cmd, arg, &err))
1764 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1770 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1772 struct inode *inode = file->f_dentry->d_inode;
1775 retval = offset + ((origin == 2) ? i_size_read(inode) :
1776 (origin == 1) ? file->f_pos : 0);
1777 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1778 inode->i_ino, inode->i_generation, inode, retval, retval,
1779 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1780 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1782 if (origin == 2) { /* SEEK_END */
1783 int nonblock = 0, rc;
1785 if (file->f_flags & O_NONBLOCK)
1786 nonblock = LDLM_FL_BLOCK_NOWAIT;
1788 rc = cl_glimpse_size(inode);
1792 ll_inode_size_lock(inode, 0);
1793 offset += i_size_read(inode);
1794 ll_inode_size_unlock(inode, 0);
1795 } else if (origin == 1) { /* SEEK_CUR */
1796 offset += file->f_pos;
1800 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1801 if (offset != file->f_pos) {
1802 file->f_pos = offset;
1810 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1812 struct inode *inode = dentry->d_inode;
1813 struct ll_inode_info *lli = ll_i2info(inode);
1814 struct lov_stripe_md *lsm = lli->lli_smd;
1815 struct ptlrpc_request *req;
1816 struct obd_capa *oc;
1819 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1820 inode->i_generation, inode);
1821 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1823 /* fsync's caller has already called _fdata{sync,write}, we want
1824 * that IO to finish before calling the osc and mdc sync methods */
1825 rc = filemap_fdatawait(inode->i_mapping);
1827 /* catch async errors that were recorded back when async writeback
1828 * failed for pages in this mapping. */
1829 err = lli->lli_async_rc;
1830 lli->lli_async_rc = 0;
1834 err = lov_test_and_clear_async_rc(lsm);
1839 oc = ll_mdscapa_get(inode);
1840 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1846 ptlrpc_req_finished(req);
1853 RETURN(rc ? rc : -ENOMEM);
1855 oa->o_id = lsm->lsm_object_id;
1856 oa->o_gr = lsm->lsm_object_gr;
1857 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1858 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1859 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1862 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1863 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1864 0, OBD_OBJECT_EOF, oc);
1874 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1876 struct inode *inode = file->f_dentry->d_inode;
1877 struct ll_sb_info *sbi = ll_i2sbi(inode);
1878 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1879 .ei_cb_cp =ldlm_flock_completion_ast,
1880 .ei_cbdata = file_lock };
1881 struct md_op_data *op_data;
1882 struct lustre_handle lockh = {0};
1883 ldlm_policy_data_t flock;
1888 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1889 inode->i_ino, file_lock);
1891 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1893 if (file_lock->fl_flags & FL_FLOCK) {
1894 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1895 /* set missing params for flock() calls */
1896 file_lock->fl_end = OFFSET_MAX;
1897 file_lock->fl_pid = current->tgid;
1899 flock.l_flock.pid = file_lock->fl_pid;
1900 flock.l_flock.start = file_lock->fl_start;
1901 flock.l_flock.end = file_lock->fl_end;
1903 switch (file_lock->fl_type) {
1905 einfo.ei_mode = LCK_PR;
1908 /* An unlock request may or may not have any relation to
1909 * existing locks so we may not be able to pass a lock handle
1910 * via a normal ldlm_lock_cancel() request. The request may even
1911 * unlock a byte range in the middle of an existing lock. In
1912 * order to process an unlock request we need all of the same
1913 * information that is given with a normal read or write record
1914 * lock request. To avoid creating another ldlm unlock (cancel)
1915 * message we'll treat a LCK_NL flock request as an unlock. */
1916 einfo.ei_mode = LCK_NL;
1919 einfo.ei_mode = LCK_PW;
1922 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1937 flags = LDLM_FL_BLOCK_NOWAIT;
1943 flags = LDLM_FL_TEST_LOCK;
1944 /* Save the old mode so that if the mode in the lock changes we
1945 * can decrement the appropriate reader or writer refcount. */
1946 file_lock->fl_type = einfo.ei_mode;
1949 CERROR("unknown fcntl lock command: %d\n", cmd);
1953 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1954 LUSTRE_OPC_ANY, NULL);
1955 if (IS_ERR(op_data))
1956 RETURN(PTR_ERR(op_data));
1958 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
1959 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
1960 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1962 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
1963 op_data, &lockh, &flock, 0, NULL /* req */, flags);
1965 ll_finish_md_op_data(op_data);
1967 if ((file_lock->fl_flags & FL_FLOCK) &&
1968 (rc == 0 || file_lock->fl_type == F_UNLCK))
1969 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
1970 #ifdef HAVE_F_OP_FLOCK
1971 if ((file_lock->fl_flags & FL_POSIX) &&
1972 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
1973 !(flags & LDLM_FL_TEST_LOCK))
1974 posix_lock_file_wait(file, file_lock);
1980 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
1987 int ll_have_md_lock(struct inode *inode, __u64 bits)
1989 struct lustre_handle lockh;
1990 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
1998 fid = &ll_i2info(inode)->lli_fid;
1999 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2001 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2002 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2003 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2009 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2010 struct lustre_handle *lockh)
2012 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2018 fid = &ll_i2info(inode)->lli_fid;
2019 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2021 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2022 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2023 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2027 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2028 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2029 * and return success */
2031 /* This path cannot be hit for regular files unless in
2032 * case of obscure races, so no need to to validate
2034 if (!S_ISREG(inode->i_mode) &&
2035 !S_ISDIR(inode->i_mode))
2040 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2048 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2051 struct inode *inode = dentry->d_inode;
2052 struct ptlrpc_request *req = NULL;
2053 struct ll_sb_info *sbi;
2054 struct obd_export *exp;
2059 CERROR("REPORT THIS LINE TO PETER\n");
2062 sbi = ll_i2sbi(inode);
2064 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2065 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2067 exp = ll_i2mdexp(inode);
2069 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2070 struct lookup_intent oit = { .it_op = IT_GETATTR };
2071 struct md_op_data *op_data;
2073 /* Call getattr by fid, so do not provide name at all. */
2074 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2075 dentry->d_inode, NULL, 0, 0,
2076 LUSTRE_OPC_ANY, NULL);
2077 if (IS_ERR(op_data))
2078 RETURN(PTR_ERR(op_data));
2080 oit.it_create_mode |= M_CHECK_STALE;
2081 rc = md_intent_lock(exp, op_data, NULL, 0,
2082 /* we are not interested in name
2085 ll_md_blocking_ast, 0);
2086 ll_finish_md_op_data(op_data);
2087 oit.it_create_mode &= ~M_CHECK_STALE;
2089 rc = ll_inode_revalidate_fini(inode, rc);
2093 rc = ll_revalidate_it_finish(req, &oit, dentry);
2095 ll_intent_release(&oit);
2099 /* Unlinked? Unhash dentry, so it is not picked up later by
2100 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2101 here to preserve get_cwd functionality on 2.6.
2103 if (!dentry->d_inode->i_nlink) {
2104 spin_lock(&ll_lookup_lock);
2105 spin_lock(&dcache_lock);
2106 ll_drop_dentry(dentry);
2107 spin_unlock(&dcache_lock);
2108 spin_unlock(&ll_lookup_lock);
2111 ll_lookup_finish_locks(&oit, dentry);
2112 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2114 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2115 obd_valid valid = OBD_MD_FLGETATTR;
2116 struct obd_capa *oc;
2119 if (S_ISREG(inode->i_mode)) {
2120 rc = ll_get_max_mdsize(sbi, &ealen);
2123 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2125 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2126 * capa for this inode. Because we only keep capas of dirs
2128 oc = ll_mdscapa_get(inode);
2129 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2133 rc = ll_inode_revalidate_fini(inode, rc);
2137 rc = ll_prep_inode(&inode, req, NULL);
2140 ptlrpc_req_finished(req);
2144 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2149 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2150 MDS_INODELOCK_LOOKUP);
2152 /* if object not yet allocated, don't validate size */
2153 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2156 /* cl_glimpse_size will prefer locally cached writes if they extend
2160 rc = cl_glimpse_size(dentry->d_inode);
2165 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2166 struct lookup_intent *it, struct kstat *stat)
2168 struct inode *inode = de->d_inode;
2171 res = ll_inode_revalidate_it(de, it);
2172 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2177 stat->dev = inode->i_sb->s_dev;
2178 stat->ino = inode->i_ino;
2179 stat->mode = inode->i_mode;
2180 stat->nlink = inode->i_nlink;
2181 stat->uid = inode->i_uid;
2182 stat->gid = inode->i_gid;
2183 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2184 stat->atime = inode->i_atime;
2185 stat->mtime = inode->i_mtime;
2186 stat->ctime = inode->i_ctime;
2187 #ifdef HAVE_INODE_BLKSIZE
2188 stat->blksize = inode->i_blksize;
2190 stat->blksize = 1 << inode->i_blkbits;
2193 ll_inode_size_lock(inode, 0);
2194 stat->size = i_size_read(inode);
2195 stat->blocks = inode->i_blocks;
2196 ll_inode_size_unlock(inode, 0);
2200 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2202 struct lookup_intent it = { .it_op = IT_GETATTR };
2204 return ll_getattr_it(mnt, de, &it, stat);
2207 #ifdef HAVE_LINUX_FIEMAP_H
2208 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2209 __u64 start, __u64 len)
2212 struct ll_user_fiemap *fiemap = (struct ll_user_fiemap*)(
2213 fieinfo->fi_extents_start - sizeof(ll_user_fiemap));
2215 rc = ll_do_fiemap(inode, fiemap, sizeof(*fiemap) +
2216 fiemap->fm_extent_count *
2217 sizeof(struct ll_fiemap_extent));
2219 fieinfo->fi_flags = fiemap->fm_flags;
2220 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2228 int lustre_check_acl(struct inode *inode, int mask)
2230 #ifdef CONFIG_FS_POSIX_ACL
2231 struct ll_inode_info *lli = ll_i2info(inode);
2232 struct posix_acl *acl;
2236 spin_lock(&lli->lli_lock);
2237 acl = posix_acl_dup(lli->lli_posix_acl);
2238 spin_unlock(&lli->lli_lock);
2243 rc = posix_acl_permission(inode, acl, mask);
2244 posix_acl_release(acl);
2252 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2253 #ifndef HAVE_INODE_PERMISION_2ARGS
2254 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2256 int ll_inode_permission(struct inode *inode, int mask)
2262 /* as root inode are NOT getting validated in lookup operation,
2263 * need to do it before permission check. */
2265 if (inode == inode->i_sb->s_root->d_inode) {
2266 struct lookup_intent it = { .it_op = IT_LOOKUP };
2268 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2269 MDS_INODELOCK_LOOKUP);
2274 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2275 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2277 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2278 return lustre_check_remote_perm(inode, mask);
2280 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2281 rc = generic_permission(inode, mask, lustre_check_acl);
2286 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2288 int mode = inode->i_mode;
2291 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2292 inode->i_ino, inode->i_generation, inode, mask);
2294 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2295 return lustre_check_remote_perm(inode, mask);
2297 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2299 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2300 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2302 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2304 if (current->fsuid == inode->i_uid) {
2307 if (((mode >> 3) & mask & S_IRWXO) != mask)
2309 rc = lustre_check_acl(inode, mask);
2313 goto check_capabilities;
2317 if (in_group_p(inode->i_gid))
2320 if ((mode & mask & S_IRWXO) == mask)
2324 if (!(mask & MAY_EXEC) ||
2325 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2326 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2329 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2330 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2337 #ifdef HAVE_FILE_READV
2338 #define READ_METHOD readv
2339 #define READ_FUNCTION ll_file_readv
2340 #define WRITE_METHOD writev
2341 #define WRITE_FUNCTION ll_file_writev
2343 #define READ_METHOD aio_read
2344 #define READ_FUNCTION ll_file_aio_read
2345 #define WRITE_METHOD aio_write
2346 #define WRITE_FUNCTION ll_file_aio_write
2349 /* -o localflock - only provides locally consistent flock locks */
2350 struct file_operations ll_file_operations = {
2351 .read = ll_file_read,
2352 .READ_METHOD = READ_FUNCTION,
2353 .write = ll_file_write,
2354 .WRITE_METHOD = WRITE_FUNCTION,
2355 .ioctl = ll_file_ioctl,
2356 .open = ll_file_open,
2357 .release = ll_file_release,
2358 .mmap = ll_file_mmap,
2359 .llseek = ll_file_seek,
2360 #ifdef HAVE_KERNEL_SENDFILE
2361 .sendfile = ll_file_sendfile,
2363 #ifdef HAVE_KERNEL_SPLICE_READ
2364 .splice_read = ll_file_splice_read,
2369 struct file_operations ll_file_operations_flock = {
2370 .read = ll_file_read,
2371 .READ_METHOD = READ_FUNCTION,
2372 .write = ll_file_write,
2373 .WRITE_METHOD = WRITE_FUNCTION,
2374 .ioctl = ll_file_ioctl,
2375 .open = ll_file_open,
2376 .release = ll_file_release,
2377 .mmap = ll_file_mmap,
2378 .llseek = ll_file_seek,
2379 #ifdef HAVE_KERNEL_SENDFILE
2380 .sendfile = ll_file_sendfile,
2382 #ifdef HAVE_KERNEL_SPLICE_READ
2383 .splice_read = ll_file_splice_read,
2386 #ifdef HAVE_F_OP_FLOCK
2387 .flock = ll_file_flock,
2389 .lock = ll_file_flock
2392 /* These are for -o noflock - to return ENOSYS on flock calls */
2393 struct file_operations ll_file_operations_noflock = {
2394 .read = ll_file_read,
2395 .READ_METHOD = READ_FUNCTION,
2396 .write = ll_file_write,
2397 .WRITE_METHOD = WRITE_FUNCTION,
2398 .ioctl = ll_file_ioctl,
2399 .open = ll_file_open,
2400 .release = ll_file_release,
2401 .mmap = ll_file_mmap,
2402 .llseek = ll_file_seek,
2403 #ifdef HAVE_KERNEL_SENDFILE
2404 .sendfile = ll_file_sendfile,
2406 #ifdef HAVE_KERNEL_SPLICE_READ
2407 .splice_read = ll_file_splice_read,
2410 #ifdef HAVE_F_OP_FLOCK
2411 .flock = ll_file_noflock,
2413 .lock = ll_file_noflock
2416 struct inode_operations ll_file_inode_operations = {
2417 #ifdef HAVE_VFS_INTENT_PATCHES
2418 .setattr_raw = ll_setattr_raw,
2420 .setattr = ll_setattr,
2421 .truncate = ll_truncate,
2422 .getattr = ll_getattr,
2423 .permission = ll_inode_permission,
2424 .setxattr = ll_setxattr,
2425 .getxattr = ll_getxattr,
2426 .listxattr = ll_listxattr,
2427 .removexattr = ll_removexattr,
2428 #ifdef HAVE_LINUX_FIEMAP_H
2429 .fiemap = ll_fiemap,
2433 /* dynamic ioctl number support routins */
2434 static struct llioc_ctl_data {
2435 struct rw_semaphore ioc_sem;
2436 struct list_head ioc_head;
2438 __RWSEM_INITIALIZER(llioc.ioc_sem),
2439 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2444 struct list_head iocd_list;
2445 unsigned int iocd_size;
2446 llioc_callback_t iocd_cb;
2447 unsigned int iocd_count;
2448 unsigned int iocd_cmd[0];
2451 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2454 struct llioc_data *in_data = NULL;
2457 if (cb == NULL || cmd == NULL ||
2458 count > LLIOC_MAX_CMD || count < 0)
2461 size = sizeof(*in_data) + count * sizeof(unsigned int);
2462 OBD_ALLOC(in_data, size);
2463 if (in_data == NULL)
2466 memset(in_data, 0, sizeof(*in_data));
2467 in_data->iocd_size = size;
2468 in_data->iocd_cb = cb;
2469 in_data->iocd_count = count;
2470 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2472 down_write(&llioc.ioc_sem);
2473 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2474 up_write(&llioc.ioc_sem);
2479 void ll_iocontrol_unregister(void *magic)
2481 struct llioc_data *tmp;
2486 down_write(&llioc.ioc_sem);
2487 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2489 unsigned int size = tmp->iocd_size;
2491 list_del(&tmp->iocd_list);
2492 up_write(&llioc.ioc_sem);
2494 OBD_FREE(tmp, size);
2498 up_write(&llioc.ioc_sem);
2500 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2503 EXPORT_SYMBOL(ll_iocontrol_register);
2504 EXPORT_SYMBOL(ll_iocontrol_unregister);
2506 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2507 unsigned int cmd, unsigned long arg, int *rcp)
2509 enum llioc_iter ret = LLIOC_CONT;
2510 struct llioc_data *data;
2511 int rc = -EINVAL, i;
2513 down_read(&llioc.ioc_sem);
2514 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2515 for (i = 0; i < data->iocd_count; i++) {
2516 if (cmd != data->iocd_cmd[i])
2519 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2523 if (ret == LLIOC_STOP)
2526 up_read(&llioc.ioc_sem);