1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
81 op_data->op_handle = *fh;
82 op_data->op_capa1 = ll_mdscapa_get(inode);
85 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
86 struct obd_client_handle *och)
90 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
91 ATTR_MTIME_SET | ATTR_CTIME_SET;
93 if (!(och->och_flags & FMODE_WRITE))
96 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
97 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
99 ll_ioepoch_close(inode, op_data, &och, 0);
102 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
106 static int ll_close_inode_openhandle(struct obd_export *md_exp,
108 struct obd_client_handle *och)
110 struct obd_export *exp = ll_i2mdexp(inode);
111 struct md_op_data *op_data;
112 struct ptlrpc_request *req = NULL;
113 struct obd_device *obd = class_exp2obd(exp);
120 * XXX: in case of LMV, is this correct to access
123 CERROR("Invalid MDC connection handle "LPX64"\n",
124 ll_i2mdexp(inode)->exp_handle.h_cookie);
128 OBD_ALLOC_PTR(op_data);
130 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
132 ll_prepare_close(inode, op_data, och);
133 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
134 rc = md_close(md_exp, op_data, och->och_mod, &req);
136 /* This close must have the epoch closed. */
137 LASSERT(epoch_close);
138 /* MDS has instructed us to obtain Size-on-MDS attribute from
139 * OSTs and send setattr to back to MDS. */
140 rc = ll_sizeonmds_update(inode, &op_data->op_handle,
141 op_data->op_ioepoch);
143 CERROR("inode %lu mdc Size-on-MDS update failed: "
144 "rc = %d\n", inode->i_ino, rc);
148 CERROR("inode %lu mdc close failed: rc = %d\n",
151 ll_finish_md_op_data(op_data);
154 rc = ll_objects_destroy(req, inode);
156 CERROR("inode %lu ll_objects destroy: rc = %d\n",
163 if (exp_connect_som(exp) && !epoch_close &&
164 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
165 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
167 md_clear_open_replay_data(md_exp, och);
168 /* Free @och if it is not waiting for DONE_WRITING. */
169 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
172 if (req) /* This is close request */
173 ptlrpc_req_finished(req);
177 int ll_md_real_close(struct inode *inode, int flags)
179 struct ll_inode_info *lli = ll_i2info(inode);
180 struct obd_client_handle **och_p;
181 struct obd_client_handle *och;
186 if (flags & FMODE_WRITE) {
187 och_p = &lli->lli_mds_write_och;
188 och_usecount = &lli->lli_open_fd_write_count;
189 } else if (flags & FMODE_EXEC) {
190 och_p = &lli->lli_mds_exec_och;
191 och_usecount = &lli->lli_open_fd_exec_count;
193 LASSERT(flags & FMODE_READ);
194 och_p = &lli->lli_mds_read_och;
195 och_usecount = &lli->lli_open_fd_read_count;
198 down(&lli->lli_och_sem);
199 if (*och_usecount) { /* There are still users of this handle, so
201 up(&lli->lli_och_sem);
206 up(&lli->lli_och_sem);
208 if (och) { /* There might be a race and somebody have freed this och
210 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
217 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
220 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
221 struct ll_inode_info *lli = ll_i2info(inode);
225 /* clear group lock, if present */
226 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
227 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
229 /* Let's see if we have good enough OPEN lock on the file and if
230 we can skip talking to MDS */
231 if (file->f_dentry->d_inode) { /* Can this ever be false? */
233 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
234 struct lustre_handle lockh;
235 struct inode *inode = file->f_dentry->d_inode;
236 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
238 down(&lli->lli_och_sem);
239 if (fd->fd_omode & FMODE_WRITE) {
241 LASSERT(lli->lli_open_fd_write_count);
242 lli->lli_open_fd_write_count--;
243 } else if (fd->fd_omode & FMODE_EXEC) {
245 LASSERT(lli->lli_open_fd_exec_count);
246 lli->lli_open_fd_exec_count--;
249 LASSERT(lli->lli_open_fd_read_count);
250 lli->lli_open_fd_read_count--;
252 up(&lli->lli_och_sem);
254 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
255 LDLM_IBITS, &policy, lockmode,
257 rc = ll_md_real_close(file->f_dentry->d_inode,
261 CERROR("Releasing a file %p with negative dentry %p. Name %s",
262 file, file->f_dentry, file->f_dentry->d_name.name);
265 LUSTRE_FPRIVATE(file) = NULL;
266 ll_file_data_put(fd);
267 ll_capa_close(inode);
272 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
274 /* While this returns an error code, fput() the caller does not, so we need
275 * to make every effort to clean up all of our state here. Also, applications
276 * rarely check close errors and even if an error is returned they will not
277 * re-try the close call.
279 int ll_file_release(struct inode *inode, struct file *file)
281 struct ll_file_data *fd;
282 struct ll_sb_info *sbi = ll_i2sbi(inode);
283 struct ll_inode_info *lli = ll_i2info(inode);
284 struct lov_stripe_md *lsm = lli->lli_smd;
288 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
289 inode->i_generation, inode);
291 #ifdef CONFIG_FS_POSIX_ACL
292 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
293 inode == inode->i_sb->s_root->d_inode) {
294 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
297 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
298 fd->fd_flags &= ~LL_FILE_RMTACL;
299 rct_del(&sbi->ll_rct, cfs_curproc_pid());
300 et_search_free(&sbi->ll_et, cfs_curproc_pid());
305 if (inode->i_sb->s_root != file->f_dentry)
306 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
307 fd = LUSTRE_FPRIVATE(file);
310 /* The last ref on @file, maybe not the the owner pid of statahead.
311 * Different processes can open the same dir, "ll_opendir_key" means:
312 * it is me that should stop the statahead thread. */
313 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
314 ll_stop_statahead(inode, lli->lli_opendir_key);
316 if (inode->i_sb->s_root == file->f_dentry) {
317 LUSTRE_FPRIVATE(file) = NULL;
318 ll_file_data_put(fd);
323 lov_test_and_clear_async_rc(lsm);
324 lli->lli_async_rc = 0;
326 rc = ll_md_close(sbi->ll_md_exp, inode, file);
328 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
329 libcfs_debug_dumplog();
334 static int ll_intent_file_open(struct file *file, void *lmm,
335 int lmmsize, struct lookup_intent *itp)
337 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
338 struct dentry *parent = file->f_dentry->d_parent;
339 const char *name = file->f_dentry->d_name.name;
340 const int len = file->f_dentry->d_name.len;
341 struct md_op_data *op_data;
342 struct ptlrpc_request *req;
349 /* Usually we come here only for NFSD, and we want open lock.
350 But we can also get here with pre 2.6.15 patchless kernels, and in
351 that case that lock is also ok */
352 /* We can also get here if there was cached open handle in revalidate_it
353 * but it disappeared while we were getting from there to ll_file_open.
354 * But this means this file was closed and immediatelly opened which
355 * makes a good candidate for using OPEN lock */
356 /* If lmmsize & lmm are not 0, we are just setting stripe info
357 * parameters. No need for the open lock */
358 if (!lmm && !lmmsize)
359 itp->it_flags |= MDS_OPEN_LOCK;
361 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
362 file->f_dentry->d_inode, name, len,
363 O_RDWR, LUSTRE_OPC_ANY, NULL);
365 RETURN(PTR_ERR(op_data));
367 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
368 0 /*unused */, &req, ll_md_blocking_ast, 0);
369 ll_finish_md_op_data(op_data);
371 /* reason for keep own exit path - don`t flood log
372 * with messages with -ESTALE errors.
374 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
375 it_open_error(DISP_OPEN_OPEN, itp))
377 ll_release_openhandle(file->f_dentry, itp);
381 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
382 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
383 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
387 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
388 if (!rc && itp->d.lustre.it_lock_mode)
389 md_set_lock_data(sbi->ll_md_exp,
390 &itp->d.lustre.it_lock_handle,
391 file->f_dentry->d_inode, NULL);
394 ptlrpc_req_finished(itp->d.lustre.it_data);
395 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
396 ll_intent_drop_lock(itp);
401 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
403 if (ioepoch && lli->lli_ioepoch != ioepoch) {
404 lli->lli_ioepoch = ioepoch;
405 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
406 ioepoch, PFID(&lli->lli_fid));
410 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
411 struct lookup_intent *it, struct obd_client_handle *och)
413 struct ptlrpc_request *req = it->d.lustre.it_data;
414 struct mdt_body *body;
418 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
419 LASSERT(body != NULL); /* reply already checked out */
421 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
422 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
423 och->och_fid = lli->lli_fid;
424 och->och_flags = it->it_flags;
425 ll_ioepoch_open(lli, body->ioepoch);
427 return md_set_open_replay_data(md_exp, och, req);
430 int ll_local_open(struct file *file, struct lookup_intent *it,
431 struct ll_file_data *fd, struct obd_client_handle *och)
433 struct inode *inode = file->f_dentry->d_inode;
434 struct ll_inode_info *lli = ll_i2info(inode);
437 LASSERT(!LUSTRE_FPRIVATE(file));
442 struct ptlrpc_request *req = it->d.lustre.it_data;
443 struct mdt_body *body;
446 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
450 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
451 if ((it->it_flags & FMODE_WRITE) &&
452 (body->valid & OBD_MD_FLSIZE))
453 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
454 lli->lli_ioepoch, PFID(&lli->lli_fid));
457 LUSTRE_FPRIVATE(file) = fd;
458 ll_readahead_init(inode, &fd->fd_ras);
459 fd->fd_omode = it->it_flags;
463 /* Open a file, and (for the very first open) create objects on the OSTs at
464 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
465 * creation or open until ll_lov_setstripe() ioctl is called. We grab
466 * lli_open_sem to ensure no other process will create objects, send the
467 * stripe MD to the MDS, or try to destroy the objects if that fails.
469 * If we already have the stripe MD locally then we don't request it in
470 * md_open(), by passing a lmm_size = 0.
472 * It is up to the application to ensure no other processes open this file
473 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
474 * used. We might be able to avoid races of that sort by getting lli_open_sem
475 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
476 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
478 int ll_file_open(struct inode *inode, struct file *file)
480 struct ll_inode_info *lli = ll_i2info(inode);
481 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
482 .it_flags = file->f_flags };
483 struct lov_stripe_md *lsm;
484 struct ptlrpc_request *req = NULL;
485 struct obd_client_handle **och_p;
487 struct ll_file_data *fd;
488 int rc = 0, opendir_set = 0;
491 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
492 inode->i_generation, inode, file->f_flags);
494 #ifdef HAVE_VFS_INTENT_PATCHES
497 it = file->private_data; /* XXX: compat macro */
498 file->private_data = NULL; /* prevent ll_local_open assertion */
501 fd = ll_file_data_get();
506 if (S_ISDIR(inode->i_mode)) {
507 spin_lock(&lli->lli_lock);
508 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
509 LASSERT(lli->lli_sai == NULL);
510 lli->lli_opendir_key = fd;
511 lli->lli_opendir_pid = cfs_curproc_pid();
514 spin_unlock(&lli->lli_lock);
517 if (inode->i_sb->s_root == file->f_dentry) {
518 LUSTRE_FPRIVATE(file) = fd;
522 if (!it || !it->d.lustre.it_disposition) {
523 /* Convert f_flags into access mode. We cannot use file->f_mode,
524 * because everything but O_ACCMODE mask was stripped from
526 if ((oit.it_flags + 1) & O_ACCMODE)
528 if (file->f_flags & O_TRUNC)
529 oit.it_flags |= FMODE_WRITE;
531 /* kernel only call f_op->open in dentry_open. filp_open calls
532 * dentry_open after call to open_namei that checks permissions.
533 * Only nfsd_open call dentry_open directly without checking
534 * permissions and because of that this code below is safe. */
535 if (oit.it_flags & FMODE_WRITE)
536 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
538 /* We do not want O_EXCL here, presumably we opened the file
539 * already? XXX - NFS implications? */
540 oit.it_flags &= ~O_EXCL;
542 /* bug20584, if "it_flags" contains O_CREAT, the file will be
543 * created if necessary, then "IT_CREAT" should be set to keep
544 * consistent with it */
545 if (oit.it_flags & O_CREAT)
546 oit.it_op |= IT_CREAT;
552 /* Let's see if we have file open on MDS already. */
553 if (it->it_flags & FMODE_WRITE) {
554 och_p = &lli->lli_mds_write_och;
555 och_usecount = &lli->lli_open_fd_write_count;
556 } else if (it->it_flags & FMODE_EXEC) {
557 och_p = &lli->lli_mds_exec_och;
558 och_usecount = &lli->lli_open_fd_exec_count;
560 och_p = &lli->lli_mds_read_och;
561 och_usecount = &lli->lli_open_fd_read_count;
564 down(&lli->lli_och_sem);
565 if (*och_p) { /* Open handle is present */
566 if (it_disposition(it, DISP_OPEN_OPEN)) {
567 /* Well, there's extra open request that we do not need,
568 let's close it somehow. This will decref request. */
569 rc = it_open_error(DISP_OPEN_OPEN, it);
571 up(&lli->lli_och_sem);
572 ll_file_data_put(fd);
573 GOTO(out_openerr, rc);
575 ll_release_openhandle(file->f_dentry, it);
576 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
581 rc = ll_local_open(file, it, fd, NULL);
584 up(&lli->lli_och_sem);
585 ll_file_data_put(fd);
586 GOTO(out_openerr, rc);
589 LASSERT(*och_usecount == 0);
590 if (!it->d.lustre.it_disposition) {
591 /* We cannot just request lock handle now, new ELC code
592 means that one of other OPEN locks for this file
593 could be cancelled, and since blocking ast handler
594 would attempt to grab och_sem as well, that would
595 result in a deadlock */
596 up(&lli->lli_och_sem);
597 it->it_create_mode |= M_CHECK_STALE;
598 rc = ll_intent_file_open(file, NULL, 0, it);
599 it->it_create_mode &= ~M_CHECK_STALE;
601 ll_file_data_put(fd);
602 GOTO(out_openerr, rc);
605 /* Got some error? Release the request */
606 if (it->d.lustre.it_status < 0) {
607 req = it->d.lustre.it_data;
608 ptlrpc_req_finished(req);
612 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
614 ll_file_data_put(fd);
615 GOTO(out_och_free, rc = -ENOMEM);
618 req = it->d.lustre.it_data;
620 /* md_intent_lock() didn't get a request ref if there was an
621 * open error, so don't do cleanup on the request here
623 /* XXX (green): Should not we bail out on any error here, not
624 * just open error? */
625 rc = it_open_error(DISP_OPEN_OPEN, it);
627 ll_file_data_put(fd);
628 GOTO(out_och_free, rc);
631 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
632 rc = ll_local_open(file, it, fd, *och_p);
634 ll_file_data_put(fd);
635 GOTO(out_och_free, rc);
638 up(&lli->lli_och_sem);
640 /* Must do this outside lli_och_sem lock to prevent deadlock where
641 different kind of OPEN lock for this same inode gets cancelled
642 by ldlm_cancel_lru */
643 if (!S_ISREG(inode->i_mode))
650 if (file->f_flags & O_LOV_DELAY_CREATE ||
651 !(file->f_mode & FMODE_WRITE)) {
652 CDEBUG(D_INODE, "object creation was delayed\n");
656 file->f_flags &= ~O_LOV_DELAY_CREATE;
659 ptlrpc_req_finished(req);
661 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
665 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
666 *och_p = NULL; /* OBD_FREE writes some magic there */
669 up(&lli->lli_och_sem);
671 if (opendir_set != 0)
672 ll_stop_statahead(inode, lli->lli_opendir_key);
678 /* Fills the obdo with the attributes for the lsm */
679 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
680 struct obd_capa *capa, struct obdo *obdo)
682 struct ptlrpc_request_set *set;
683 struct obd_info oinfo = { { { 0 } } };
688 LASSERT(lsm != NULL);
692 oinfo.oi_oa->o_id = lsm->lsm_object_id;
693 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
694 oinfo.oi_oa->o_mode = S_IFREG;
695 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
696 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
697 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
698 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
700 oinfo.oi_capa = capa;
702 set = ptlrpc_prep_set();
704 CERROR("can't allocate ptlrpc set\n");
707 rc = obd_getattr_async(exp, &oinfo, set);
709 rc = ptlrpc_set_wait(set);
710 ptlrpc_set_destroy(set);
713 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
714 OBD_MD_FLATIME | OBD_MD_FLMTIME |
715 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
719 /* Fills the obdo with the attributes for the inode defined by lsm */
720 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
722 struct ll_inode_info *lli = ll_i2info(inode);
723 struct obd_capa *capa = ll_mdscapa_get(inode);
727 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
730 obdo_refresh_inode(inode, obdo, obdo->o_valid);
732 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
733 lli->lli_smd->lsm_object_id, i_size_read(inode),
734 (unsigned long long)inode->i_blocks,
735 (unsigned long)ll_inode_blksize(inode));
740 int ll_merge_lvb(struct inode *inode)
742 struct ll_inode_info *lli = ll_i2info(inode);
743 struct ll_sb_info *sbi = ll_i2sbi(inode);
749 ll_inode_size_lock(inode, 1);
750 inode_init_lvb(inode, &lvb);
751 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
752 i_size_write(inode, lvb.lvb_size);
753 inode->i_blocks = lvb.lvb_blocks;
755 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
756 LTIME_S(inode->i_atime) = lvb.lvb_atime;
757 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
758 ll_inode_size_unlock(inode, 1);
763 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
766 struct obdo obdo = { 0 };
769 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
771 st->st_size = obdo.o_size;
772 st->st_blocks = obdo.o_blocks;
773 st->st_mtime = obdo.o_mtime;
774 st->st_atime = obdo.o_atime;
775 st->st_ctime = obdo.o_ctime;
780 void ll_io_init(struct cl_io *io, const struct file *file, int write)
782 struct inode *inode = file->f_dentry->d_inode;
784 memset(io, 0, sizeof *io);
785 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
787 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
788 io->ci_obj = ll_i2info(inode)->lli_clob;
789 io->ci_lockreq = CILR_MAYBE;
790 if (ll_file_nolock(file)) {
791 io->ci_lockreq = CILR_NEVER;
792 io->ci_no_srvlock = 1;
793 } else if (file->f_flags & O_APPEND) {
794 io->ci_lockreq = CILR_MANDATORY;
798 static ssize_t ll_file_io_generic(const struct lu_env *env,
799 struct vvp_io_args *args, struct file *file,
800 enum cl_io_type iot, loff_t *ppos, size_t count)
806 io = &ccc_env_info(env)->cti_io;
807 ll_io_init(io, file, iot == CIT_WRITE);
809 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
810 struct vvp_io *vio = vvp_env_io(env);
811 struct ccc_io *cio = ccc_env_io(env);
812 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
813 int write_sem_locked = 0;
815 cio->cui_fd = LUSTRE_FPRIVATE(file);
816 vio->cui_io_subtype = args->via_io_subtype;
818 switch (vio->cui_io_subtype) {
820 cio->cui_iov = args->u.normal.via_iov;
821 cio->cui_nrsegs = args->u.normal.via_nrsegs;
822 #ifndef HAVE_FILE_WRITEV
823 cio->cui_iocb = args->u.normal.via_iocb;
825 if ((iot == CIT_WRITE) &&
826 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
827 down(&lli->lli_write_sem);
828 write_sem_locked = 1;
832 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
833 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
836 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
837 vio->u.splice.cui_flags = args->u.splice.via_flags;
840 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
843 result = cl_io_loop(env, io);
844 if (write_sem_locked)
845 up(&lli->lli_write_sem);
847 /* cl_io_rw_init() handled IO */
848 result = io->ci_result;
851 if (io->ci_nob > 0) {
853 *ppos = io->u.ci_wr.wr.crw_pos;
861 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
863 static int ll_file_get_iov_count(const struct iovec *iov,
864 unsigned long *nr_segs, size_t *count)
869 for (seg = 0; seg < *nr_segs; seg++) {
870 const struct iovec *iv = &iov[seg];
873 * If any segment has a negative length, or the cumulative
874 * length ever wraps negative then return -EINVAL.
877 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
879 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
884 cnt -= iv->iov_len; /* This segment is no good */
891 #ifdef HAVE_FILE_READV
892 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
893 unsigned long nr_segs, loff_t *ppos)
896 struct vvp_io_args *args;
902 result = ll_file_get_iov_count(iov, &nr_segs, &count);
906 env = cl_env_get(&refcheck);
908 RETURN(PTR_ERR(env));
910 args = vvp_env_args(env, IO_NORMAL);
911 args->u.normal.via_iov = (struct iovec *)iov;
912 args->u.normal.via_nrsegs = nr_segs;
914 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
915 cl_env_put(env, &refcheck);
919 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
923 struct iovec *local_iov;
928 env = cl_env_get(&refcheck);
930 RETURN(PTR_ERR(env));
932 local_iov = &vvp_env_info(env)->vti_local_iov;
933 local_iov->iov_base = (void __user *)buf;
934 local_iov->iov_len = count;
935 result = ll_file_readv(file, local_iov, 1, ppos);
936 cl_env_put(env, &refcheck);
941 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
942 unsigned long nr_segs, loff_t pos)
945 struct vvp_io_args *args;
951 result = ll_file_get_iov_count(iov, &nr_segs, &count);
955 env = cl_env_get(&refcheck);
957 RETURN(PTR_ERR(env));
959 args = vvp_env_args(env, IO_NORMAL);
960 args->u.normal.via_iov = (struct iovec *)iov;
961 args->u.normal.via_nrsegs = nr_segs;
962 args->u.normal.via_iocb = iocb;
964 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
965 &iocb->ki_pos, count);
966 cl_env_put(env, &refcheck);
970 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
974 struct iovec *local_iov;
980 env = cl_env_get(&refcheck);
982 RETURN(PTR_ERR(env));
984 local_iov = &vvp_env_info(env)->vti_local_iov;
985 kiocb = &vvp_env_info(env)->vti_kiocb;
986 local_iov->iov_base = (void __user *)buf;
987 local_iov->iov_len = count;
988 init_sync_kiocb(kiocb, file);
989 kiocb->ki_pos = *ppos;
990 kiocb->ki_left = count;
992 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
993 *ppos = kiocb->ki_pos;
995 cl_env_put(env, &refcheck);
1001 * Write to a file (through the page cache).
1003 #ifdef HAVE_FILE_WRITEV
1004 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1005 unsigned long nr_segs, loff_t *ppos)
1008 struct vvp_io_args *args;
1014 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1018 env = cl_env_get(&refcheck);
1020 RETURN(PTR_ERR(env));
1022 args = vvp_env_args(env, IO_NORMAL);
1023 args->u.normal.via_iov = (struct iovec *)iov;
1024 args->u.normal.via_nrsegs = nr_segs;
1026 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1027 cl_env_put(env, &refcheck);
1031 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1035 struct iovec *local_iov;
1040 env = cl_env_get(&refcheck);
1042 RETURN(PTR_ERR(env));
1044 local_iov = &vvp_env_info(env)->vti_local_iov;
1045 local_iov->iov_base = (void __user *)buf;
1046 local_iov->iov_len = count;
1048 result = ll_file_writev(file, local_iov, 1, ppos);
1049 cl_env_put(env, &refcheck);
1053 #else /* AIO stuff */
1054 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1055 unsigned long nr_segs, loff_t pos)
1058 struct vvp_io_args *args;
1064 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1068 env = cl_env_get(&refcheck);
1070 RETURN(PTR_ERR(env));
1072 args = vvp_env_args(env, IO_NORMAL);
1073 args->u.normal.via_iov = (struct iovec *)iov;
1074 args->u.normal.via_nrsegs = nr_segs;
1075 args->u.normal.via_iocb = iocb;
1077 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1078 &iocb->ki_pos, count);
1079 cl_env_put(env, &refcheck);
1083 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1087 struct iovec *local_iov;
1088 struct kiocb *kiocb;
1093 env = cl_env_get(&refcheck);
1095 RETURN(PTR_ERR(env));
1097 local_iov = &vvp_env_info(env)->vti_local_iov;
1098 kiocb = &vvp_env_info(env)->vti_kiocb;
1099 local_iov->iov_base = (void __user *)buf;
1100 local_iov->iov_len = count;
1101 init_sync_kiocb(kiocb, file);
1102 kiocb->ki_pos = *ppos;
1103 kiocb->ki_left = count;
1105 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1106 *ppos = kiocb->ki_pos;
1108 cl_env_put(env, &refcheck);
1114 #ifdef HAVE_KERNEL_SENDFILE
1116 * Send file content (through pagecache) somewhere with helper
1118 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1119 read_actor_t actor, void *target)
1122 struct vvp_io_args *args;
1127 env = cl_env_get(&refcheck);
1129 RETURN(PTR_ERR(env));
1131 args = vvp_env_args(env, IO_SENDFILE);
1132 args->u.sendfile.via_target = target;
1133 args->u.sendfile.via_actor = actor;
1135 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1136 cl_env_put(env, &refcheck);
1141 #ifdef HAVE_KERNEL_SPLICE_READ
1143 * Send file content (through pagecache) somewhere with helper
1145 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1146 struct pipe_inode_info *pipe, size_t count,
1150 struct vvp_io_args *args;
1155 env = cl_env_get(&refcheck);
1157 RETURN(PTR_ERR(env));
1159 args = vvp_env_args(env, IO_SPLICE);
1160 args->u.splice.via_pipe = pipe;
1161 args->u.splice.via_flags = flags;
1163 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1164 cl_env_put(env, &refcheck);
1169 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1172 struct obd_export *exp = ll_i2dtexp(inode);
1173 struct ll_recreate_obj ucreatp;
1174 struct obd_trans_info oti = { 0 };
1175 struct obdo *oa = NULL;
1178 struct lov_stripe_md *lsm, *lsm2;
1181 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1184 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1185 sizeof(struct ll_recreate_obj)))
1192 ll_inode_size_lock(inode, 0);
1193 lsm = ll_i2info(inode)->lli_smd;
1195 GOTO(out, rc = -ENOENT);
1196 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1197 (lsm->lsm_stripe_count));
1199 OBD_ALLOC(lsm2, lsm_size);
1201 GOTO(out, rc = -ENOMEM);
1203 oa->o_id = ucreatp.lrc_id;
1204 oa->o_gr = ucreatp.lrc_group;
1205 oa->o_nlink = ucreatp.lrc_ost_idx;
1206 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1207 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1208 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1209 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1211 memcpy(lsm2, lsm, lsm_size);
1212 rc = obd_create(exp, oa, &lsm2, &oti);
1214 OBD_FREE(lsm2, lsm_size);
1217 ll_inode_size_unlock(inode, 0);
1222 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1223 int flags, struct lov_user_md *lum, int lum_size)
1225 struct lov_stripe_md *lsm;
1226 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1230 ll_inode_size_lock(inode, 0);
1231 lsm = ll_i2info(inode)->lli_smd;
1233 ll_inode_size_unlock(inode, 0);
1234 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1239 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1242 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1243 GOTO(out_req_free, rc = -ENOENT);
1244 rc = oit.d.lustre.it_status;
1246 GOTO(out_req_free, rc);
1248 ll_release_openhandle(file->f_dentry, &oit);
1251 ll_inode_size_unlock(inode, 0);
1252 ll_intent_release(&oit);
1255 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1259 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1260 struct lov_mds_md **lmmp, int *lmm_size,
1261 struct ptlrpc_request **request)
1263 struct ll_sb_info *sbi = ll_i2sbi(inode);
1264 struct mdt_body *body;
1265 struct lov_mds_md *lmm = NULL;
1266 struct ptlrpc_request *req = NULL;
1267 struct obd_capa *oc;
1270 rc = ll_get_max_mdsize(sbi, &lmmsize);
1274 oc = ll_mdscapa_get(inode);
1275 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1276 oc, filename, strlen(filename) + 1,
1277 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1278 ll_i2suppgid(inode), &req);
1281 CDEBUG(D_INFO, "md_getattr_name failed "
1282 "on %s: rc %d\n", filename, rc);
1286 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1287 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1289 lmmsize = body->eadatasize;
1291 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1293 GOTO(out, rc = -ENODATA);
1296 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1297 LASSERT(lmm != NULL);
1299 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1300 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1301 GOTO(out, rc = -EPROTO);
1305 * This is coming from the MDS, so is probably in
1306 * little endian. We convert it to host endian before
1307 * passing it to userspace.
1309 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1310 /* if function called for directory - we should
1311 * avoid swab not existent lsm objects */
1312 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1313 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1314 if (S_ISREG(body->mode))
1315 lustre_swab_lov_user_md_objects(
1316 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1317 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1318 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1319 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1320 if (S_ISREG(body->mode))
1321 lustre_swab_lov_user_md_objects(
1322 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1323 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1329 *lmm_size = lmmsize;
1334 static int ll_lov_setea(struct inode *inode, struct file *file,
1337 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1338 struct lov_user_md *lump;
1339 int lum_size = sizeof(struct lov_user_md) +
1340 sizeof(struct lov_user_ost_data);
1344 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1347 OBD_ALLOC(lump, lum_size);
1351 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1352 OBD_FREE(lump, lum_size);
1356 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1358 OBD_FREE(lump, lum_size);
1362 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1365 struct lov_user_md_v3 lumv3;
1366 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1367 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1368 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1371 int flags = FMODE_WRITE;
1374 /* first try with v1 which is smaller than v3 */
1375 lum_size = sizeof(struct lov_user_md_v1);
1376 if (copy_from_user(lumv1, lumv1p, lum_size))
1379 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1380 lum_size = sizeof(struct lov_user_md_v3);
1381 if (copy_from_user(&lumv3, lumv3p, lum_size))
1385 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1387 put_user(0, &lumv1p->lmm_stripe_count);
1388 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1389 0, ll_i2info(inode)->lli_smd,
1395 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1397 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1402 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1406 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1408 struct ll_inode_info *lli = ll_i2info(inode);
1409 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1410 struct ccc_grouplock grouplock;
1414 if (ll_file_nolock(file))
1415 RETURN(-EOPNOTSUPP);
1417 spin_lock(&lli->lli_lock);
1418 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1419 CWARN("group lock already existed with gid %lu\n",
1420 fd->fd_grouplock.cg_gid);
1421 spin_unlock(&lli->lli_lock);
1424 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1425 spin_unlock(&lli->lli_lock);
1427 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1428 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1432 spin_lock(&lli->lli_lock);
1433 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1434 spin_unlock(&lli->lli_lock);
1435 CERROR("another thread just won the race\n");
1436 cl_put_grouplock(&grouplock);
1440 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1441 fd->fd_grouplock = grouplock;
1442 spin_unlock(&lli->lli_lock);
1444 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1448 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1450 struct ll_inode_info *lli = ll_i2info(inode);
1451 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1452 struct ccc_grouplock grouplock;
1455 spin_lock(&lli->lli_lock);
1456 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1457 spin_unlock(&lli->lli_lock);
1458 CWARN("no group lock held\n");
1461 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1463 if (fd->fd_grouplock.cg_gid != arg) {
1464 CWARN("group lock %lu doesn't match current id %lu\n",
1465 arg, fd->fd_grouplock.cg_gid);
1466 spin_unlock(&lli->lli_lock);
1470 grouplock = fd->fd_grouplock;
1471 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1472 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1473 spin_unlock(&lli->lli_lock);
1475 cl_put_grouplock(&grouplock);
1476 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1481 * Close inode open handle
1483 * \param dentry [in] dentry which contains the inode
1484 * \param it [in,out] intent which contains open info and result
1487 * \retval <0 failure
1489 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1491 struct inode *inode = dentry->d_inode;
1492 struct obd_client_handle *och;
1498 /* Root ? Do nothing. */
1499 if (dentry->d_inode->i_sb->s_root == dentry)
1502 /* No open handle to close? Move away */
1503 if (!it_disposition(it, DISP_OPEN_OPEN))
1506 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1508 OBD_ALLOC(och, sizeof(*och));
1510 GOTO(out, rc = -ENOMEM);
1512 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1513 ll_i2info(inode), it, och);
1515 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1518 /* this one is in place of ll_file_open */
1519 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1520 ptlrpc_req_finished(it->d.lustre.it_data);
1521 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1526 * Get size for inode for which FIEMAP mapping is requested.
1527 * Make the FIEMAP get_info call and returns the result.
1529 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1532 struct obd_export *exp = ll_i2dtexp(inode);
1533 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1534 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1535 int vallen = num_bytes;
1539 /* If the stripe_count > 1 and the application does not understand
1540 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1542 if (lsm->lsm_stripe_count > 1 &&
1543 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1546 fm_key.oa.o_id = lsm->lsm_object_id;
1547 fm_key.oa.o_gr = lsm->lsm_object_gr;
1548 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1550 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1553 /* If filesize is 0, then there would be no objects for mapping */
1554 if (fm_key.oa.o_size == 0) {
1555 fiemap->fm_mapped_extents = 0;
1559 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1561 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1563 CERROR("obd_get_info failed: rc = %d\n", rc);
1568 int ll_fid2path(struct obd_export *exp, void *arg)
1570 struct getinfo_fid2path *gfout, *gfin;
1574 /* Need to get the buflen */
1575 OBD_ALLOC_PTR(gfin);
1578 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1583 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1584 OBD_ALLOC(gfout, outsize);
1585 if (gfout == NULL) {
1589 memcpy(gfout, gfin, sizeof(*gfout));
1592 /* Call mdc_iocontrol */
1593 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1596 if (copy_to_user(arg, gfout, outsize))
1600 OBD_FREE(gfout, outsize);
1604 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1607 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1611 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1612 inode->i_generation, inode, cmd);
1613 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1615 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1616 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1620 case LL_IOC_GETFLAGS:
1621 /* Get the current value of the file flags */
1622 return put_user(fd->fd_flags, (int *)arg);
1623 case LL_IOC_SETFLAGS:
1624 case LL_IOC_CLRFLAGS:
1625 /* Set or clear specific file flags */
1626 /* XXX This probably needs checks to ensure the flags are
1627 * not abused, and to handle any flag side effects.
1629 if (get_user(flags, (int *) arg))
1632 if (cmd == LL_IOC_SETFLAGS) {
1633 if ((flags & LL_FILE_IGNORE_LOCK) &&
1634 !(file->f_flags & O_DIRECT)) {
1635 CERROR("%s: unable to disable locking on "
1636 "non-O_DIRECT file\n", current->comm);
1640 fd->fd_flags |= flags;
1642 fd->fd_flags &= ~flags;
1645 case LL_IOC_LOV_SETSTRIPE:
1646 RETURN(ll_lov_setstripe(inode, file, arg));
1647 case LL_IOC_LOV_SETEA:
1648 RETURN(ll_lov_setea(inode, file, arg));
1649 case LL_IOC_LOV_GETSTRIPE:
1650 RETURN(ll_lov_getstripe(inode, arg));
1651 case LL_IOC_RECREATE_OBJ:
1652 RETURN(ll_lov_recreate_obj(inode, file, arg));
1653 case FSFILT_IOC_FIEMAP: {
1654 struct ll_user_fiemap *fiemap_s;
1655 size_t num_bytes, ret_bytes;
1656 unsigned int extent_count;
1659 /* Get the extent count so we can calculate the size of
1660 * required fiemap buffer */
1661 if (get_user(extent_count,
1662 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1664 num_bytes = sizeof(*fiemap_s) + (extent_count *
1665 sizeof(struct ll_fiemap_extent));
1666 OBD_VMALLOC(fiemap_s, num_bytes);
1667 if (fiemap_s == NULL)
1670 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1672 GOTO(error, rc = -EFAULT);
1674 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1675 fiemap_s->fm_flags = fiemap_s->fm_flags &
1676 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1677 if (copy_to_user((char *)arg, fiemap_s,
1679 GOTO(error, rc = -EFAULT);
1681 GOTO(error, rc = -EBADR);
1684 /* If fm_extent_count is non-zero, read the first extent since
1685 * it is used to calculate end_offset and device from previous
1688 if (copy_from_user(&fiemap_s->fm_extents[0],
1689 (char __user *)arg + sizeof(*fiemap_s),
1690 sizeof(struct ll_fiemap_extent)))
1691 GOTO(error, rc = -EFAULT);
1694 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1697 rc = filemap_fdatawrite(inode->i_mapping);
1702 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1706 ret_bytes = sizeof(struct ll_user_fiemap);
1708 if (extent_count != 0)
1709 ret_bytes += (fiemap_s->fm_mapped_extents *
1710 sizeof(struct ll_fiemap_extent));
1712 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1716 OBD_VFREE(fiemap_s, num_bytes);
1719 case FSFILT_IOC_GETFLAGS:
1720 case FSFILT_IOC_SETFLAGS:
1721 RETURN(ll_iocontrol(inode, file, cmd, arg));
1722 case FSFILT_IOC_GETVERSION_OLD:
1723 case FSFILT_IOC_GETVERSION:
1724 RETURN(put_user(inode->i_generation, (int *)arg));
1725 case LL_IOC_GROUP_LOCK:
1726 RETURN(ll_get_grouplock(inode, file, arg));
1727 case LL_IOC_GROUP_UNLOCK:
1728 RETURN(ll_put_grouplock(inode, file, arg));
1729 case IOC_OBD_STATFS:
1730 RETURN(ll_obd_statfs(inode, (void *)arg));
1732 /* We need to special case any other ioctls we want to handle,
1733 * to send them to the MDS/OST as appropriate and to properly
1734 * network encode the arg field.
1735 case FSFILT_IOC_SETVERSION_OLD:
1736 case FSFILT_IOC_SETVERSION:
1738 case LL_IOC_FLUSHCTX:
1739 RETURN(ll_flush_ctx(inode));
1740 case LL_IOC_PATH2FID: {
1741 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1742 sizeof(struct lu_fid)))
1747 case OBD_IOC_FID2PATH:
1748 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1754 ll_iocontrol_call(inode, file, cmd, arg, &err))
1757 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1763 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1765 struct inode *inode = file->f_dentry->d_inode;
1768 retval = offset + ((origin == 2) ? i_size_read(inode) :
1769 (origin == 1) ? file->f_pos : 0);
1770 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1771 inode->i_ino, inode->i_generation, inode, retval, retval,
1772 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1773 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1775 if (origin == 2) { /* SEEK_END */
1776 int nonblock = 0, rc;
1778 if (file->f_flags & O_NONBLOCK)
1779 nonblock = LDLM_FL_BLOCK_NOWAIT;
1781 rc = cl_glimpse_size(inode);
1785 ll_inode_size_lock(inode, 0);
1786 offset += i_size_read(inode);
1787 ll_inode_size_unlock(inode, 0);
1788 } else if (origin == 1) { /* SEEK_CUR */
1789 offset += file->f_pos;
1793 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1794 if (offset != file->f_pos) {
1795 file->f_pos = offset;
1803 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1805 struct inode *inode = dentry->d_inode;
1806 struct ll_inode_info *lli = ll_i2info(inode);
1807 struct lov_stripe_md *lsm = lli->lli_smd;
1808 struct ptlrpc_request *req;
1809 struct obd_capa *oc;
1812 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1813 inode->i_generation, inode);
1814 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1816 /* fsync's caller has already called _fdata{sync,write}, we want
1817 * that IO to finish before calling the osc and mdc sync methods */
1818 rc = filemap_fdatawait(inode->i_mapping);
1820 /* catch async errors that were recorded back when async writeback
1821 * failed for pages in this mapping. */
1822 err = lli->lli_async_rc;
1823 lli->lli_async_rc = 0;
1827 err = lov_test_and_clear_async_rc(lsm);
1832 oc = ll_mdscapa_get(inode);
1833 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1839 ptlrpc_req_finished(req);
1846 RETURN(rc ? rc : -ENOMEM);
1848 oa->o_id = lsm->lsm_object_id;
1849 oa->o_gr = lsm->lsm_object_gr;
1850 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1851 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1852 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1855 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1856 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1857 0, OBD_OBJECT_EOF, oc);
1867 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1869 struct inode *inode = file->f_dentry->d_inode;
1870 struct ll_sb_info *sbi = ll_i2sbi(inode);
1871 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1872 .ei_cb_cp =ldlm_flock_completion_ast,
1873 .ei_cbdata = file_lock };
1874 struct md_op_data *op_data;
1875 struct lustre_handle lockh = {0};
1876 ldlm_policy_data_t flock;
1881 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1882 inode->i_ino, file_lock);
1884 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1886 if (file_lock->fl_flags & FL_FLOCK) {
1887 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1888 /* set missing params for flock() calls */
1889 file_lock->fl_end = OFFSET_MAX;
1890 file_lock->fl_pid = current->tgid;
1892 flock.l_flock.pid = file_lock->fl_pid;
1893 flock.l_flock.start = file_lock->fl_start;
1894 flock.l_flock.end = file_lock->fl_end;
1896 switch (file_lock->fl_type) {
1898 einfo.ei_mode = LCK_PR;
1901 /* An unlock request may or may not have any relation to
1902 * existing locks so we may not be able to pass a lock handle
1903 * via a normal ldlm_lock_cancel() request. The request may even
1904 * unlock a byte range in the middle of an existing lock. In
1905 * order to process an unlock request we need all of the same
1906 * information that is given with a normal read or write record
1907 * lock request. To avoid creating another ldlm unlock (cancel)
1908 * message we'll treat a LCK_NL flock request as an unlock. */
1909 einfo.ei_mode = LCK_NL;
1912 einfo.ei_mode = LCK_PW;
1915 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1930 flags = LDLM_FL_BLOCK_NOWAIT;
1936 flags = LDLM_FL_TEST_LOCK;
1937 /* Save the old mode so that if the mode in the lock changes we
1938 * can decrement the appropriate reader or writer refcount. */
1939 file_lock->fl_type = einfo.ei_mode;
1942 CERROR("unknown fcntl lock command: %d\n", cmd);
1946 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1947 LUSTRE_OPC_ANY, NULL);
1948 if (IS_ERR(op_data))
1949 RETURN(PTR_ERR(op_data));
1951 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
1952 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
1953 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1955 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
1956 op_data, &lockh, &flock, 0, NULL /* req */, flags);
1958 ll_finish_md_op_data(op_data);
1960 if ((file_lock->fl_flags & FL_FLOCK) &&
1961 (rc == 0 || file_lock->fl_type == F_UNLCK))
1962 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
1963 #ifdef HAVE_F_OP_FLOCK
1964 if ((file_lock->fl_flags & FL_POSIX) &&
1965 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
1966 !(flags & LDLM_FL_TEST_LOCK))
1967 posix_lock_file_wait(file, file_lock);
1973 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
1980 int ll_have_md_lock(struct inode *inode, __u64 bits)
1982 struct lustre_handle lockh;
1983 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
1991 fid = &ll_i2info(inode)->lli_fid;
1992 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
1994 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
1995 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
1996 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2002 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2003 struct lustre_handle *lockh)
2005 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2011 fid = &ll_i2info(inode)->lli_fid;
2012 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2014 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2015 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2016 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2020 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2021 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2022 * and return success */
2024 /* This path cannot be hit for regular files unless in
2025 * case of obscure races, so no need to to validate
2027 if (!S_ISREG(inode->i_mode) &&
2028 !S_ISDIR(inode->i_mode))
2033 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2041 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2044 struct inode *inode = dentry->d_inode;
2045 struct ptlrpc_request *req = NULL;
2046 struct ll_sb_info *sbi;
2047 struct obd_export *exp;
2052 CERROR("REPORT THIS LINE TO PETER\n");
2055 sbi = ll_i2sbi(inode);
2057 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2058 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2060 exp = ll_i2mdexp(inode);
2062 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2063 struct lookup_intent oit = { .it_op = IT_GETATTR };
2064 struct md_op_data *op_data;
2066 /* Call getattr by fid, so do not provide name at all. */
2067 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2068 dentry->d_inode, NULL, 0, 0,
2069 LUSTRE_OPC_ANY, NULL);
2070 if (IS_ERR(op_data))
2071 RETURN(PTR_ERR(op_data));
2073 oit.it_create_mode |= M_CHECK_STALE;
2074 rc = md_intent_lock(exp, op_data, NULL, 0,
2075 /* we are not interested in name
2078 ll_md_blocking_ast, 0);
2079 ll_finish_md_op_data(op_data);
2080 oit.it_create_mode &= ~M_CHECK_STALE;
2082 rc = ll_inode_revalidate_fini(inode, rc);
2086 rc = ll_revalidate_it_finish(req, &oit, dentry);
2088 ll_intent_release(&oit);
2092 /* Unlinked? Unhash dentry, so it is not picked up later by
2093 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2094 here to preserve get_cwd functionality on 2.6.
2096 if (!dentry->d_inode->i_nlink) {
2097 spin_lock(&dcache_lock);
2098 ll_drop_dentry(dentry);
2099 spin_unlock(&dcache_lock);
2102 ll_finish_locks(&oit, dentry);
2103 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2105 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2106 obd_valid valid = OBD_MD_FLGETATTR;
2107 struct obd_capa *oc;
2110 if (S_ISREG(inode->i_mode)) {
2111 rc = ll_get_max_mdsize(sbi, &ealen);
2114 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2116 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2117 * capa for this inode. Because we only keep capas of dirs
2119 oc = ll_mdscapa_get(inode);
2120 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2124 rc = ll_inode_revalidate_fini(inode, rc);
2128 rc = ll_prep_inode(&inode, req, NULL);
2131 ptlrpc_req_finished(req);
2135 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2140 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2141 MDS_INODELOCK_LOOKUP);
2143 /* if object not yet allocated, don't validate size */
2144 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2147 /* cl_glimpse_size will prefer locally cached writes if they extend
2151 rc = cl_glimpse_size(dentry->d_inode);
2156 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2157 struct lookup_intent *it, struct kstat *stat)
2159 struct inode *inode = de->d_inode;
2162 res = ll_inode_revalidate_it(de, it);
2163 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2168 stat->dev = inode->i_sb->s_dev;
2169 stat->ino = inode->i_ino;
2170 stat->mode = inode->i_mode;
2171 stat->nlink = inode->i_nlink;
2172 stat->uid = inode->i_uid;
2173 stat->gid = inode->i_gid;
2174 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2175 stat->atime = inode->i_atime;
2176 stat->mtime = inode->i_mtime;
2177 stat->ctime = inode->i_ctime;
2178 #ifdef HAVE_INODE_BLKSIZE
2179 stat->blksize = inode->i_blksize;
2181 stat->blksize = 1 << inode->i_blkbits;
2184 ll_inode_size_lock(inode, 0);
2185 stat->size = i_size_read(inode);
2186 stat->blocks = inode->i_blocks;
2187 ll_inode_size_unlock(inode, 0);
2191 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2193 struct lookup_intent it = { .it_op = IT_GETATTR };
2195 return ll_getattr_it(mnt, de, &it, stat);
2198 #ifdef HAVE_LINUX_FIEMAP_H
2199 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2200 __u64 start, __u64 len)
2203 struct ll_user_fiemap *fiemap = (struct ll_user_fiemap*)(
2204 fieinfo->fi_extents_start - sizeof(ll_user_fiemap));
2206 rc = ll_do_fiemap(inode, fiemap, sizeof(*fiemap) +
2207 fiemap->fm_extent_count *
2208 sizeof(struct ll_fiemap_extent));
2210 fieinfo->fi_flags = fiemap->fm_flags;
2211 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2219 int lustre_check_acl(struct inode *inode, int mask)
2221 #ifdef CONFIG_FS_POSIX_ACL
2222 struct ll_inode_info *lli = ll_i2info(inode);
2223 struct posix_acl *acl;
2227 spin_lock(&lli->lli_lock);
2228 acl = posix_acl_dup(lli->lli_posix_acl);
2229 spin_unlock(&lli->lli_lock);
2234 rc = posix_acl_permission(inode, acl, mask);
2235 posix_acl_release(acl);
2243 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2244 #ifndef HAVE_INODE_PERMISION_2ARGS
2245 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2247 int ll_inode_permission(struct inode *inode, int mask)
2253 /* as root inode are NOT getting validated in lookup operation,
2254 * need to do it before permission check. */
2256 if (inode == inode->i_sb->s_root->d_inode) {
2257 struct lookup_intent it = { .it_op = IT_LOOKUP };
2259 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2260 MDS_INODELOCK_LOOKUP);
2265 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2266 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2268 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2269 return lustre_check_remote_perm(inode, mask);
2271 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2272 rc = generic_permission(inode, mask, lustre_check_acl);
2277 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2279 int mode = inode->i_mode;
2282 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2283 inode->i_ino, inode->i_generation, inode, mask);
2285 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2286 return lustre_check_remote_perm(inode, mask);
2288 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2290 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2291 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2293 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2295 if (current->fsuid == inode->i_uid) {
2298 if (((mode >> 3) & mask & S_IRWXO) != mask)
2300 rc = lustre_check_acl(inode, mask);
2304 goto check_capabilities;
2308 if (in_group_p(inode->i_gid))
2311 if ((mode & mask & S_IRWXO) == mask)
2315 if (!(mask & MAY_EXEC) ||
2316 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2317 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2320 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2321 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2328 #ifdef HAVE_FILE_READV
2329 #define READ_METHOD readv
2330 #define READ_FUNCTION ll_file_readv
2331 #define WRITE_METHOD writev
2332 #define WRITE_FUNCTION ll_file_writev
2334 #define READ_METHOD aio_read
2335 #define READ_FUNCTION ll_file_aio_read
2336 #define WRITE_METHOD aio_write
2337 #define WRITE_FUNCTION ll_file_aio_write
2340 /* -o localflock - only provides locally consistent flock locks */
2341 struct file_operations ll_file_operations = {
2342 .read = ll_file_read,
2343 .READ_METHOD = READ_FUNCTION,
2344 .write = ll_file_write,
2345 .WRITE_METHOD = WRITE_FUNCTION,
2346 .ioctl = ll_file_ioctl,
2347 .open = ll_file_open,
2348 .release = ll_file_release,
2349 .mmap = ll_file_mmap,
2350 .llseek = ll_file_seek,
2351 #ifdef HAVE_KERNEL_SENDFILE
2352 .sendfile = ll_file_sendfile,
2354 #ifdef HAVE_KERNEL_SPLICE_READ
2355 .splice_read = ll_file_splice_read,
2360 struct file_operations ll_file_operations_flock = {
2361 .read = ll_file_read,
2362 .READ_METHOD = READ_FUNCTION,
2363 .write = ll_file_write,
2364 .WRITE_METHOD = WRITE_FUNCTION,
2365 .ioctl = ll_file_ioctl,
2366 .open = ll_file_open,
2367 .release = ll_file_release,
2368 .mmap = ll_file_mmap,
2369 .llseek = ll_file_seek,
2370 #ifdef HAVE_KERNEL_SENDFILE
2371 .sendfile = ll_file_sendfile,
2373 #ifdef HAVE_KERNEL_SPLICE_READ
2374 .splice_read = ll_file_splice_read,
2377 #ifdef HAVE_F_OP_FLOCK
2378 .flock = ll_file_flock,
2380 .lock = ll_file_flock
2383 /* These are for -o noflock - to return ENOSYS on flock calls */
2384 struct file_operations ll_file_operations_noflock = {
2385 .read = ll_file_read,
2386 .READ_METHOD = READ_FUNCTION,
2387 .write = ll_file_write,
2388 .WRITE_METHOD = WRITE_FUNCTION,
2389 .ioctl = ll_file_ioctl,
2390 .open = ll_file_open,
2391 .release = ll_file_release,
2392 .mmap = ll_file_mmap,
2393 .llseek = ll_file_seek,
2394 #ifdef HAVE_KERNEL_SENDFILE
2395 .sendfile = ll_file_sendfile,
2397 #ifdef HAVE_KERNEL_SPLICE_READ
2398 .splice_read = ll_file_splice_read,
2401 #ifdef HAVE_F_OP_FLOCK
2402 .flock = ll_file_noflock,
2404 .lock = ll_file_noflock
2407 struct inode_operations ll_file_inode_operations = {
2408 #ifdef HAVE_VFS_INTENT_PATCHES
2409 .setattr_raw = ll_setattr_raw,
2411 .setattr = ll_setattr,
2412 .truncate = ll_truncate,
2413 .getattr = ll_getattr,
2414 .permission = ll_inode_permission,
2415 .setxattr = ll_setxattr,
2416 .getxattr = ll_getxattr,
2417 .listxattr = ll_listxattr,
2418 .removexattr = ll_removexattr,
2419 #ifdef HAVE_LINUX_FIEMAP_H
2420 .fiemap = ll_fiemap,
2424 /* dynamic ioctl number support routins */
2425 static struct llioc_ctl_data {
2426 struct rw_semaphore ioc_sem;
2427 struct list_head ioc_head;
2429 __RWSEM_INITIALIZER(llioc.ioc_sem),
2430 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2435 struct list_head iocd_list;
2436 unsigned int iocd_size;
2437 llioc_callback_t iocd_cb;
2438 unsigned int iocd_count;
2439 unsigned int iocd_cmd[0];
2442 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2445 struct llioc_data *in_data = NULL;
2448 if (cb == NULL || cmd == NULL ||
2449 count > LLIOC_MAX_CMD || count < 0)
2452 size = sizeof(*in_data) + count * sizeof(unsigned int);
2453 OBD_ALLOC(in_data, size);
2454 if (in_data == NULL)
2457 memset(in_data, 0, sizeof(*in_data));
2458 in_data->iocd_size = size;
2459 in_data->iocd_cb = cb;
2460 in_data->iocd_count = count;
2461 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2463 down_write(&llioc.ioc_sem);
2464 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2465 up_write(&llioc.ioc_sem);
2470 void ll_iocontrol_unregister(void *magic)
2472 struct llioc_data *tmp;
2477 down_write(&llioc.ioc_sem);
2478 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2480 unsigned int size = tmp->iocd_size;
2482 list_del(&tmp->iocd_list);
2483 up_write(&llioc.ioc_sem);
2485 OBD_FREE(tmp, size);
2489 up_write(&llioc.ioc_sem);
2491 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2494 EXPORT_SYMBOL(ll_iocontrol_register);
2495 EXPORT_SYMBOL(ll_iocontrol_unregister);
2497 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2498 unsigned int cmd, unsigned long arg, int *rcp)
2500 enum llioc_iter ret = LLIOC_CONT;
2501 struct llioc_data *data;
2502 int rc = -EINVAL, i;
2504 down_read(&llioc.ioc_sem);
2505 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2506 for (i = 0; i < data->iocd_count; i++) {
2507 if (cmd != data->iocd_cmd[i])
2510 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2514 if (ret == LLIOC_STOP)
2517 up_read(&llioc.ioc_sem);