1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(exp_connect_som(ll_i2mdexp(inode))) || !S_ISREG(inode->i_mode))
96 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
98 ll_epoch_close(inode, op_data, &och, 0);
101 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
105 static int ll_close_inode_openhandle(struct obd_export *md_exp,
107 struct obd_client_handle *och)
109 struct obd_export *exp = ll_i2mdexp(inode);
110 struct md_op_data *op_data;
111 struct ptlrpc_request *req = NULL;
112 struct obd_device *obd = class_exp2obd(exp);
119 * XXX: in case of LMV, is this correct to access
122 CERROR("Invalid MDC connection handle "LPX64"\n",
123 ll_i2mdexp(inode)->exp_handle.h_cookie);
127 OBD_ALLOC_PTR(op_data);
129 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
131 ll_prepare_close(inode, op_data, och);
132 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
133 rc = md_close(md_exp, op_data, och->och_mod, &req);
135 /* This close must have the epoch closed. */
136 LASSERT(epoch_close);
137 /* MDS has instructed us to obtain Size-on-MDS attribute from
138 * OSTs and send setattr to back to MDS. */
139 rc = ll_sizeonmds_update(inode, &och->och_fh,
140 op_data->op_ioepoch);
142 CERROR("inode %lu mdc Size-on-MDS update failed: "
143 "rc = %d\n", inode->i_ino, rc);
147 CERROR("inode %lu mdc close failed: rc = %d\n",
150 ll_finish_md_op_data(op_data);
153 rc = ll_objects_destroy(req, inode);
155 CERROR("inode %lu ll_objects destroy: rc = %d\n",
162 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
163 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
164 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
166 md_clear_open_replay_data(md_exp, och);
167 /* Free @och if it is not waiting for DONE_WRITING. */
168 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
171 if (req) /* This is close request */
172 ptlrpc_req_finished(req);
176 int ll_md_real_close(struct inode *inode, int flags)
178 struct ll_inode_info *lli = ll_i2info(inode);
179 struct obd_client_handle **och_p;
180 struct obd_client_handle *och;
185 if (flags & FMODE_WRITE) {
186 och_p = &lli->lli_mds_write_och;
187 och_usecount = &lli->lli_open_fd_write_count;
188 } else if (flags & FMODE_EXEC) {
189 och_p = &lli->lli_mds_exec_och;
190 och_usecount = &lli->lli_open_fd_exec_count;
192 LASSERT(flags & FMODE_READ);
193 och_p = &lli->lli_mds_read_och;
194 och_usecount = &lli->lli_open_fd_read_count;
197 down(&lli->lli_och_sem);
198 if (*och_usecount) { /* There are still users of this handle, so
200 up(&lli->lli_och_sem);
205 up(&lli->lli_och_sem);
207 if (och) { /* There might be a race and somebody have freed this och
209 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
216 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
219 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
220 struct ll_inode_info *lli = ll_i2info(inode);
224 /* clear group lock, if present */
225 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
226 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
228 /* Let's see if we have good enough OPEN lock on the file and if
229 we can skip talking to MDS */
230 if (file->f_dentry->d_inode) { /* Can this ever be false? */
232 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
233 struct lustre_handle lockh;
234 struct inode *inode = file->f_dentry->d_inode;
235 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
237 down(&lli->lli_och_sem);
238 if (fd->fd_omode & FMODE_WRITE) {
240 LASSERT(lli->lli_open_fd_write_count);
241 lli->lli_open_fd_write_count--;
242 } else if (fd->fd_omode & FMODE_EXEC) {
244 LASSERT(lli->lli_open_fd_exec_count);
245 lli->lli_open_fd_exec_count--;
248 LASSERT(lli->lli_open_fd_read_count);
249 lli->lli_open_fd_read_count--;
251 up(&lli->lli_och_sem);
253 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
254 LDLM_IBITS, &policy, lockmode,
256 rc = ll_md_real_close(file->f_dentry->d_inode,
260 CERROR("Releasing a file %p with negative dentry %p. Name %s",
261 file, file->f_dentry, file->f_dentry->d_name.name);
264 LUSTRE_FPRIVATE(file) = NULL;
265 ll_file_data_put(fd);
266 ll_capa_close(inode);
271 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
273 /* While this returns an error code, fput() the caller does not, so we need
274 * to make every effort to clean up all of our state here. Also, applications
275 * rarely check close errors and even if an error is returned they will not
276 * re-try the close call.
278 int ll_file_release(struct inode *inode, struct file *file)
280 struct ll_file_data *fd;
281 struct ll_sb_info *sbi = ll_i2sbi(inode);
282 struct ll_inode_info *lli = ll_i2info(inode);
283 struct lov_stripe_md *lsm = lli->lli_smd;
287 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
288 inode->i_generation, inode);
290 #ifdef CONFIG_FS_POSIX_ACL
291 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
292 inode == inode->i_sb->s_root->d_inode) {
293 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
296 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
297 fd->fd_flags &= ~LL_FILE_RMTACL;
298 rct_del(&sbi->ll_rct, cfs_curproc_pid());
299 et_search_free(&sbi->ll_et, cfs_curproc_pid());
304 if (inode->i_sb->s_root != file->f_dentry)
305 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
306 fd = LUSTRE_FPRIVATE(file);
309 /* The last ref on @file, maybe not the the owner pid of statahead.
310 * Different processes can open the same dir, "ll_opendir_key" means:
311 * it is me that should stop the statahead thread. */
312 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
313 ll_stop_statahead(inode, lli->lli_opendir_key);
315 if (inode->i_sb->s_root == file->f_dentry) {
316 LUSTRE_FPRIVATE(file) = NULL;
317 ll_file_data_put(fd);
322 lov_test_and_clear_async_rc(lsm);
323 lli->lli_async_rc = 0;
325 rc = ll_md_close(sbi->ll_md_exp, inode, file);
327 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
328 libcfs_debug_dumplog();
333 static int ll_intent_file_open(struct file *file, void *lmm,
334 int lmmsize, struct lookup_intent *itp)
336 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
337 struct dentry *parent = file->f_dentry->d_parent;
338 const char *name = file->f_dentry->d_name.name;
339 const int len = file->f_dentry->d_name.len;
340 struct md_op_data *op_data;
341 struct ptlrpc_request *req;
348 /* Usually we come here only for NFSD, and we want open lock.
349 But we can also get here with pre 2.6.15 patchless kernels, and in
350 that case that lock is also ok */
351 /* We can also get here if there was cached open handle in revalidate_it
352 * but it disappeared while we were getting from there to ll_file_open.
353 * But this means this file was closed and immediatelly opened which
354 * makes a good candidate for using OPEN lock */
355 /* If lmmsize & lmm are not 0, we are just setting stripe info
356 * parameters. No need for the open lock */
357 if (!lmm && !lmmsize)
358 itp->it_flags |= MDS_OPEN_LOCK;
360 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
361 file->f_dentry->d_inode, name, len,
362 O_RDWR, LUSTRE_OPC_ANY, NULL);
364 RETURN(PTR_ERR(op_data));
366 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
367 0 /*unused */, &req, ll_md_blocking_ast, 0);
368 ll_finish_md_op_data(op_data);
370 /* reason for keep own exit path - don`t flood log
371 * with messages with -ESTALE errors.
373 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
374 it_open_error(DISP_OPEN_OPEN, itp))
376 ll_release_openhandle(file->f_dentry, itp);
380 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
381 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
382 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
386 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
387 if (!rc && itp->d.lustre.it_lock_mode)
388 md_set_lock_data(sbi->ll_md_exp,
389 &itp->d.lustre.it_lock_handle,
390 file->f_dentry->d_inode, NULL);
393 ptlrpc_req_finished(itp->d.lustre.it_data);
394 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
395 ll_intent_drop_lock(itp);
400 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
402 if (ioepoch && lli->lli_ioepoch != ioepoch) {
403 lli->lli_ioepoch = ioepoch;
404 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
405 ioepoch, PFID(&lli->lli_fid));
409 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
410 struct lookup_intent *it, struct obd_client_handle *och)
412 struct ptlrpc_request *req = it->d.lustre.it_data;
413 struct mdt_body *body;
417 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
418 LASSERT(body != NULL); /* reply already checked out */
420 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
421 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
422 och->och_fid = lli->lli_fid;
423 och->och_flags = it->it_flags;
424 ll_ioepoch_open(lli, body->ioepoch);
426 return md_set_open_replay_data(md_exp, och, req);
429 int ll_local_open(struct file *file, struct lookup_intent *it,
430 struct ll_file_data *fd, struct obd_client_handle *och)
432 struct inode *inode = file->f_dentry->d_inode;
433 struct ll_inode_info *lli = ll_i2info(inode);
436 LASSERT(!LUSTRE_FPRIVATE(file));
441 struct ptlrpc_request *req = it->d.lustre.it_data;
442 struct mdt_body *body;
445 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
449 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
450 if ((it->it_flags & FMODE_WRITE) &&
451 (body->valid & OBD_MD_FLSIZE))
452 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
453 lli->lli_ioepoch, PFID(&lli->lli_fid));
456 LUSTRE_FPRIVATE(file) = fd;
457 ll_readahead_init(inode, &fd->fd_ras);
458 fd->fd_omode = it->it_flags;
462 /* Open a file, and (for the very first open) create objects on the OSTs at
463 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
464 * creation or open until ll_lov_setstripe() ioctl is called. We grab
465 * lli_open_sem to ensure no other process will create objects, send the
466 * stripe MD to the MDS, or try to destroy the objects if that fails.
468 * If we already have the stripe MD locally then we don't request it in
469 * md_open(), by passing a lmm_size = 0.
471 * It is up to the application to ensure no other processes open this file
472 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
473 * used. We might be able to avoid races of that sort by getting lli_open_sem
474 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
475 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
477 int ll_file_open(struct inode *inode, struct file *file)
479 struct ll_inode_info *lli = ll_i2info(inode);
480 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
481 .it_flags = file->f_flags };
482 struct lov_stripe_md *lsm;
483 struct ptlrpc_request *req = NULL;
484 struct obd_client_handle **och_p;
486 struct ll_file_data *fd;
487 int rc = 0, opendir_set = 0;
490 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
491 inode->i_generation, inode, file->f_flags);
493 #ifdef HAVE_VFS_INTENT_PATCHES
496 it = file->private_data; /* XXX: compat macro */
497 file->private_data = NULL; /* prevent ll_local_open assertion */
500 fd = ll_file_data_get();
505 if (S_ISDIR(inode->i_mode)) {
506 spin_lock(&lli->lli_lock);
507 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
508 LASSERT(lli->lli_sai == NULL);
509 lli->lli_opendir_key = fd;
510 lli->lli_opendir_pid = cfs_curproc_pid();
513 spin_unlock(&lli->lli_lock);
516 if (inode->i_sb->s_root == file->f_dentry) {
517 LUSTRE_FPRIVATE(file) = fd;
521 if (!it || !it->d.lustre.it_disposition) {
522 /* Convert f_flags into access mode. We cannot use file->f_mode,
523 * because everything but O_ACCMODE mask was stripped from
525 if ((oit.it_flags + 1) & O_ACCMODE)
527 if (file->f_flags & O_TRUNC)
528 oit.it_flags |= FMODE_WRITE;
530 /* kernel only call f_op->open in dentry_open. filp_open calls
531 * dentry_open after call to open_namei that checks permissions.
532 * Only nfsd_open call dentry_open directly without checking
533 * permissions and because of that this code below is safe. */
534 if (oit.it_flags & FMODE_WRITE)
535 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
537 /* We do not want O_EXCL here, presumably we opened the file
538 * already? XXX - NFS implications? */
539 oit.it_flags &= ~O_EXCL;
541 /* bug20584, if "it_flags" contains O_CREAT, the file will be
542 * created if necessary, then "IT_CREAT" should be set to keep
543 * consistent with it */
544 if (oit.it_flags & O_CREAT)
545 oit.it_op |= IT_CREAT;
551 /* Let's see if we have file open on MDS already. */
552 if (it->it_flags & FMODE_WRITE) {
553 och_p = &lli->lli_mds_write_och;
554 och_usecount = &lli->lli_open_fd_write_count;
555 } else if (it->it_flags & FMODE_EXEC) {
556 och_p = &lli->lli_mds_exec_och;
557 och_usecount = &lli->lli_open_fd_exec_count;
559 och_p = &lli->lli_mds_read_och;
560 och_usecount = &lli->lli_open_fd_read_count;
563 down(&lli->lli_och_sem);
564 if (*och_p) { /* Open handle is present */
565 if (it_disposition(it, DISP_OPEN_OPEN)) {
566 /* Well, there's extra open request that we do not need,
567 let's close it somehow. This will decref request. */
568 rc = it_open_error(DISP_OPEN_OPEN, it);
570 up(&lli->lli_och_sem);
571 ll_file_data_put(fd);
572 GOTO(out_openerr, rc);
574 ll_release_openhandle(file->f_dentry, it);
575 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
580 rc = ll_local_open(file, it, fd, NULL);
583 up(&lli->lli_och_sem);
584 ll_file_data_put(fd);
585 GOTO(out_openerr, rc);
588 LASSERT(*och_usecount == 0);
589 if (!it->d.lustre.it_disposition) {
590 /* We cannot just request lock handle now, new ELC code
591 means that one of other OPEN locks for this file
592 could be cancelled, and since blocking ast handler
593 would attempt to grab och_sem as well, that would
594 result in a deadlock */
595 up(&lli->lli_och_sem);
596 it->it_create_mode |= M_CHECK_STALE;
597 rc = ll_intent_file_open(file, NULL, 0, it);
598 it->it_create_mode &= ~M_CHECK_STALE;
600 ll_file_data_put(fd);
601 GOTO(out_openerr, rc);
604 /* Got some error? Release the request */
605 if (it->d.lustre.it_status < 0) {
606 req = it->d.lustre.it_data;
607 ptlrpc_req_finished(req);
611 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
613 ll_file_data_put(fd);
614 GOTO(out_och_free, rc = -ENOMEM);
617 req = it->d.lustre.it_data;
619 /* md_intent_lock() didn't get a request ref if there was an
620 * open error, so don't do cleanup on the request here
622 /* XXX (green): Should not we bail out on any error here, not
623 * just open error? */
624 rc = it_open_error(DISP_OPEN_OPEN, it);
626 ll_file_data_put(fd);
627 GOTO(out_och_free, rc);
630 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
631 rc = ll_local_open(file, it, fd, *och_p);
633 ll_file_data_put(fd);
634 GOTO(out_och_free, rc);
637 up(&lli->lli_och_sem);
639 /* Must do this outside lli_och_sem lock to prevent deadlock where
640 different kind of OPEN lock for this same inode gets cancelled
641 by ldlm_cancel_lru */
642 if (!S_ISREG(inode->i_mode))
649 if (file->f_flags & O_LOV_DELAY_CREATE ||
650 !(file->f_mode & FMODE_WRITE)) {
651 CDEBUG(D_INODE, "object creation was delayed\n");
655 file->f_flags &= ~O_LOV_DELAY_CREATE;
658 ptlrpc_req_finished(req);
660 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
664 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
665 *och_p = NULL; /* OBD_FREE writes some magic there */
668 up(&lli->lli_och_sem);
670 if (opendir_set != 0)
671 ll_stop_statahead(inode, lli->lli_opendir_key);
677 /* Fills the obdo with the attributes for the lsm */
678 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
679 struct obd_capa *capa, struct obdo *obdo)
681 struct ptlrpc_request_set *set;
682 struct obd_info oinfo = { { { 0 } } };
687 LASSERT(lsm != NULL);
691 oinfo.oi_oa->o_id = lsm->lsm_object_id;
692 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
693 oinfo.oi_oa->o_mode = S_IFREG;
694 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
695 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
696 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
697 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
699 oinfo.oi_capa = capa;
701 set = ptlrpc_prep_set();
703 CERROR("can't allocate ptlrpc set\n");
706 rc = obd_getattr_async(exp, &oinfo, set);
708 rc = ptlrpc_set_wait(set);
709 ptlrpc_set_destroy(set);
712 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
713 OBD_MD_FLATIME | OBD_MD_FLMTIME |
714 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
718 /* Fills the obdo with the attributes for the inode defined by lsm */
719 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
721 struct ll_inode_info *lli = ll_i2info(inode);
722 struct obd_capa *capa = ll_mdscapa_get(inode);
726 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
729 obdo_refresh_inode(inode, obdo, obdo->o_valid);
731 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
732 lli->lli_smd->lsm_object_id, i_size_read(inode),
733 (unsigned long long)inode->i_blocks,
734 (unsigned long)ll_inode_blksize(inode));
739 int ll_merge_lvb(struct inode *inode)
741 struct ll_inode_info *lli = ll_i2info(inode);
742 struct ll_sb_info *sbi = ll_i2sbi(inode);
748 ll_inode_size_lock(inode, 1);
749 inode_init_lvb(inode, &lvb);
750 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
751 i_size_write(inode, lvb.lvb_size);
752 inode->i_blocks = lvb.lvb_blocks;
754 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
755 LTIME_S(inode->i_atime) = lvb.lvb_atime;
756 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
757 ll_inode_size_unlock(inode, 1);
762 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
765 struct obdo obdo = { 0 };
768 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
770 st->st_size = obdo.o_size;
771 st->st_blocks = obdo.o_blocks;
772 st->st_mtime = obdo.o_mtime;
773 st->st_atime = obdo.o_atime;
774 st->st_ctime = obdo.o_ctime;
779 void ll_io_init(struct cl_io *io, const struct file *file, int write)
781 struct inode *inode = file->f_dentry->d_inode;
783 memset(io, 0, sizeof *io);
784 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
786 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
787 io->ci_obj = ll_i2info(inode)->lli_clob;
788 io->ci_lockreq = CILR_MAYBE;
789 if (ll_file_nolock(file)) {
790 io->ci_lockreq = CILR_NEVER;
791 io->ci_no_srvlock = 1;
792 } else if (file->f_flags & O_APPEND) {
793 io->ci_lockreq = CILR_MANDATORY;
797 static ssize_t ll_file_io_generic(const struct lu_env *env,
798 struct vvp_io_args *args, struct file *file,
799 enum cl_io_type iot, loff_t *ppos, size_t count)
805 io = &ccc_env_info(env)->cti_io;
806 ll_io_init(io, file, iot == CIT_WRITE);
808 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
809 struct vvp_io *vio = vvp_env_io(env);
810 struct ccc_io *cio = ccc_env_io(env);
811 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
812 int write_sem_locked = 0;
814 cio->cui_fd = LUSTRE_FPRIVATE(file);
815 vio->cui_io_subtype = args->via_io_subtype;
817 switch (vio->cui_io_subtype) {
819 cio->cui_iov = args->u.normal.via_iov;
820 cio->cui_nrsegs = args->u.normal.via_nrsegs;
821 #ifndef HAVE_FILE_WRITEV
822 cio->cui_iocb = args->u.normal.via_iocb;
824 if ((iot == CIT_WRITE) &&
825 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
826 down(&lli->lli_write_sem);
827 write_sem_locked = 1;
831 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
832 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
835 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
836 vio->u.splice.cui_flags = args->u.splice.via_flags;
839 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
842 result = cl_io_loop(env, io);
843 if (write_sem_locked)
844 up(&lli->lli_write_sem);
846 /* cl_io_rw_init() handled IO */
847 result = io->ci_result;
850 if (io->ci_nob > 0) {
852 *ppos = io->u.ci_wr.wr.crw_pos;
860 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
862 static int ll_file_get_iov_count(const struct iovec *iov,
863 unsigned long *nr_segs, size_t *count)
868 for (seg = 0; seg < *nr_segs; seg++) {
869 const struct iovec *iv = &iov[seg];
872 * If any segment has a negative length, or the cumulative
873 * length ever wraps negative then return -EINVAL.
876 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
878 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
883 cnt -= iv->iov_len; /* This segment is no good */
890 #ifdef HAVE_FILE_READV
891 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
892 unsigned long nr_segs, loff_t *ppos)
895 struct vvp_io_args *args;
901 result = ll_file_get_iov_count(iov, &nr_segs, &count);
905 env = cl_env_get(&refcheck);
907 RETURN(PTR_ERR(env));
909 args = vvp_env_args(env, IO_NORMAL);
910 args->u.normal.via_iov = (struct iovec *)iov;
911 args->u.normal.via_nrsegs = nr_segs;
913 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
914 cl_env_put(env, &refcheck);
918 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
922 struct iovec *local_iov;
927 env = cl_env_get(&refcheck);
929 RETURN(PTR_ERR(env));
931 local_iov = &vvp_env_info(env)->vti_local_iov;
932 local_iov->iov_base = (void __user *)buf;
933 local_iov->iov_len = count;
934 result = ll_file_readv(file, local_iov, 1, ppos);
935 cl_env_put(env, &refcheck);
940 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
941 unsigned long nr_segs, loff_t pos)
944 struct vvp_io_args *args;
950 result = ll_file_get_iov_count(iov, &nr_segs, &count);
954 env = cl_env_get(&refcheck);
956 RETURN(PTR_ERR(env));
958 args = vvp_env_args(env, IO_NORMAL);
959 args->u.normal.via_iov = (struct iovec *)iov;
960 args->u.normal.via_nrsegs = nr_segs;
961 args->u.normal.via_iocb = iocb;
963 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
964 &iocb->ki_pos, count);
965 cl_env_put(env, &refcheck);
969 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
973 struct iovec *local_iov;
979 env = cl_env_get(&refcheck);
981 RETURN(PTR_ERR(env));
983 local_iov = &vvp_env_info(env)->vti_local_iov;
984 kiocb = &vvp_env_info(env)->vti_kiocb;
985 local_iov->iov_base = (void __user *)buf;
986 local_iov->iov_len = count;
987 init_sync_kiocb(kiocb, file);
988 kiocb->ki_pos = *ppos;
989 kiocb->ki_left = count;
991 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
992 *ppos = kiocb->ki_pos;
994 cl_env_put(env, &refcheck);
1000 * Write to a file (through the page cache).
1002 #ifdef HAVE_FILE_WRITEV
1003 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1004 unsigned long nr_segs, loff_t *ppos)
1007 struct vvp_io_args *args;
1013 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1017 env = cl_env_get(&refcheck);
1019 RETURN(PTR_ERR(env));
1021 args = vvp_env_args(env, IO_NORMAL);
1022 args->u.normal.via_iov = (struct iovec *)iov;
1023 args->u.normal.via_nrsegs = nr_segs;
1025 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1026 cl_env_put(env, &refcheck);
1030 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1034 struct iovec *local_iov;
1039 env = cl_env_get(&refcheck);
1041 RETURN(PTR_ERR(env));
1043 local_iov = &vvp_env_info(env)->vti_local_iov;
1044 local_iov->iov_base = (void __user *)buf;
1045 local_iov->iov_len = count;
1047 result = ll_file_writev(file, local_iov, 1, ppos);
1048 cl_env_put(env, &refcheck);
1052 #else /* AIO stuff */
1053 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1054 unsigned long nr_segs, loff_t pos)
1057 struct vvp_io_args *args;
1063 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1067 env = cl_env_get(&refcheck);
1069 RETURN(PTR_ERR(env));
1071 args = vvp_env_args(env, IO_NORMAL);
1072 args->u.normal.via_iov = (struct iovec *)iov;
1073 args->u.normal.via_nrsegs = nr_segs;
1074 args->u.normal.via_iocb = iocb;
1076 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1077 &iocb->ki_pos, count);
1078 cl_env_put(env, &refcheck);
1082 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1086 struct iovec *local_iov;
1087 struct kiocb *kiocb;
1092 env = cl_env_get(&refcheck);
1094 RETURN(PTR_ERR(env));
1096 local_iov = &vvp_env_info(env)->vti_local_iov;
1097 kiocb = &vvp_env_info(env)->vti_kiocb;
1098 local_iov->iov_base = (void __user *)buf;
1099 local_iov->iov_len = count;
1100 init_sync_kiocb(kiocb, file);
1101 kiocb->ki_pos = *ppos;
1102 kiocb->ki_left = count;
1104 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1105 *ppos = kiocb->ki_pos;
1107 cl_env_put(env, &refcheck);
1113 #ifdef HAVE_KERNEL_SENDFILE
1115 * Send file content (through pagecache) somewhere with helper
1117 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1118 read_actor_t actor, void *target)
1121 struct vvp_io_args *args;
1126 env = cl_env_get(&refcheck);
1128 RETURN(PTR_ERR(env));
1130 args = vvp_env_args(env, IO_SENDFILE);
1131 args->u.sendfile.via_target = target;
1132 args->u.sendfile.via_actor = actor;
1134 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1135 cl_env_put(env, &refcheck);
1140 #ifdef HAVE_KERNEL_SPLICE_READ
1142 * Send file content (through pagecache) somewhere with helper
1144 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1145 struct pipe_inode_info *pipe, size_t count,
1149 struct vvp_io_args *args;
1154 env = cl_env_get(&refcheck);
1156 RETURN(PTR_ERR(env));
1158 args = vvp_env_args(env, IO_SPLICE);
1159 args->u.splice.via_pipe = pipe;
1160 args->u.splice.via_flags = flags;
1162 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1163 cl_env_put(env, &refcheck);
1168 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1171 struct obd_export *exp = ll_i2dtexp(inode);
1172 struct ll_recreate_obj ucreatp;
1173 struct obd_trans_info oti = { 0 };
1174 struct obdo *oa = NULL;
1177 struct lov_stripe_md *lsm, *lsm2;
1180 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1183 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1184 sizeof(struct ll_recreate_obj)))
1191 ll_inode_size_lock(inode, 0);
1192 lsm = ll_i2info(inode)->lli_smd;
1194 GOTO(out, rc = -ENOENT);
1195 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1196 (lsm->lsm_stripe_count));
1198 OBD_ALLOC(lsm2, lsm_size);
1200 GOTO(out, rc = -ENOMEM);
1202 oa->o_id = ucreatp.lrc_id;
1203 oa->o_gr = ucreatp.lrc_group;
1204 oa->o_nlink = ucreatp.lrc_ost_idx;
1205 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1206 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1207 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1208 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1210 memcpy(lsm2, lsm, lsm_size);
1211 rc = obd_create(exp, oa, &lsm2, &oti);
1213 OBD_FREE(lsm2, lsm_size);
1216 ll_inode_size_unlock(inode, 0);
1221 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1222 int flags, struct lov_user_md *lum, int lum_size)
1224 struct lov_stripe_md *lsm;
1225 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1229 ll_inode_size_lock(inode, 0);
1230 lsm = ll_i2info(inode)->lli_smd;
1232 ll_inode_size_unlock(inode, 0);
1233 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1238 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1241 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1242 GOTO(out_req_free, rc = -ENOENT);
1243 rc = oit.d.lustre.it_status;
1245 GOTO(out_req_free, rc);
1247 ll_release_openhandle(file->f_dentry, &oit);
1250 ll_inode_size_unlock(inode, 0);
1251 ll_intent_release(&oit);
1254 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1258 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1259 struct lov_mds_md **lmmp, int *lmm_size,
1260 struct ptlrpc_request **request)
1262 struct ll_sb_info *sbi = ll_i2sbi(inode);
1263 struct mdt_body *body;
1264 struct lov_mds_md *lmm = NULL;
1265 struct ptlrpc_request *req = NULL;
1266 struct obd_capa *oc;
1269 rc = ll_get_max_mdsize(sbi, &lmmsize);
1273 oc = ll_mdscapa_get(inode);
1274 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1275 oc, filename, strlen(filename) + 1,
1276 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1277 ll_i2suppgid(inode), &req);
1280 CDEBUG(D_INFO, "md_getattr_name failed "
1281 "on %s: rc %d\n", filename, rc);
1285 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1286 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1288 lmmsize = body->eadatasize;
1290 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1292 GOTO(out, rc = -ENODATA);
1295 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1296 LASSERT(lmm != NULL);
1298 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1299 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1300 GOTO(out, rc = -EPROTO);
1304 * This is coming from the MDS, so is probably in
1305 * little endian. We convert it to host endian before
1306 * passing it to userspace.
1308 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1309 /* if function called for directory - we should
1310 * avoid swab not existent lsm objects */
1311 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1312 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1313 if (S_ISREG(body->mode))
1314 lustre_swab_lov_user_md_objects(
1315 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1316 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1317 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1318 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1319 if (S_ISREG(body->mode))
1320 lustre_swab_lov_user_md_objects(
1321 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1322 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1328 *lmm_size = lmmsize;
1333 static int ll_lov_setea(struct inode *inode, struct file *file,
1336 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1337 struct lov_user_md *lump;
1338 int lum_size = sizeof(struct lov_user_md) +
1339 sizeof(struct lov_user_ost_data);
1343 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1346 OBD_ALLOC(lump, lum_size);
1350 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1351 OBD_FREE(lump, lum_size);
1355 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1357 OBD_FREE(lump, lum_size);
1361 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1364 struct lov_user_md_v3 lumv3;
1365 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1366 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1367 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1370 int flags = FMODE_WRITE;
1373 /* first try with v1 which is smaller than v3 */
1374 lum_size = sizeof(struct lov_user_md_v1);
1375 if (copy_from_user(lumv1, lumv1p, lum_size))
1378 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1379 lum_size = sizeof(struct lov_user_md_v3);
1380 if (copy_from_user(&lumv3, lumv3p, lum_size))
1384 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1386 put_user(0, &lumv1p->lmm_stripe_count);
1387 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1388 0, ll_i2info(inode)->lli_smd,
1394 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1396 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1401 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1405 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1407 struct ll_inode_info *lli = ll_i2info(inode);
1408 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1409 struct ccc_grouplock grouplock;
1413 if (ll_file_nolock(file))
1414 RETURN(-EOPNOTSUPP);
1416 spin_lock(&lli->lli_lock);
1417 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1418 CWARN("group lock already existed with gid %lu\n",
1419 fd->fd_grouplock.cg_gid);
1420 spin_unlock(&lli->lli_lock);
1423 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1424 spin_unlock(&lli->lli_lock);
1426 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1427 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1431 spin_lock(&lli->lli_lock);
1432 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1433 spin_unlock(&lli->lli_lock);
1434 CERROR("another thread just won the race\n");
1435 cl_put_grouplock(&grouplock);
1439 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1440 fd->fd_grouplock = grouplock;
1441 spin_unlock(&lli->lli_lock);
1443 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1447 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1449 struct ll_inode_info *lli = ll_i2info(inode);
1450 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1451 struct ccc_grouplock grouplock;
1454 spin_lock(&lli->lli_lock);
1455 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1456 spin_unlock(&lli->lli_lock);
1457 CWARN("no group lock held\n");
1460 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1462 if (fd->fd_grouplock.cg_gid != arg) {
1463 CWARN("group lock %lu doesn't match current id %lu\n",
1464 arg, fd->fd_grouplock.cg_gid);
1465 spin_unlock(&lli->lli_lock);
1469 grouplock = fd->fd_grouplock;
1470 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1471 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1472 spin_unlock(&lli->lli_lock);
1474 cl_put_grouplock(&grouplock);
1475 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1480 * Close inode open handle
1482 * \param dentry [in] dentry which contains the inode
1483 * \param it [in,out] intent which contains open info and result
1486 * \retval <0 failure
1488 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1490 struct inode *inode = dentry->d_inode;
1491 struct obd_client_handle *och;
1497 /* Root ? Do nothing. */
1498 if (dentry->d_inode->i_sb->s_root == dentry)
1501 /* No open handle to close? Move away */
1502 if (!it_disposition(it, DISP_OPEN_OPEN))
1505 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1507 OBD_ALLOC(och, sizeof(*och));
1509 GOTO(out, rc = -ENOMEM);
1511 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1512 ll_i2info(inode), it, och);
1514 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1517 /* this one is in place of ll_file_open */
1518 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1519 ptlrpc_req_finished(it->d.lustre.it_data);
1520 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1525 * Get size for inode for which FIEMAP mapping is requested.
1526 * Make the FIEMAP get_info call and returns the result.
1528 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1531 struct obd_export *exp = ll_i2dtexp(inode);
1532 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1533 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1534 int vallen = num_bytes;
1538 /* If the stripe_count > 1 and the application does not understand
1539 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1541 if (lsm->lsm_stripe_count > 1 &&
1542 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1545 fm_key.oa.o_id = lsm->lsm_object_id;
1546 fm_key.oa.o_gr = lsm->lsm_object_gr;
1547 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1549 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1552 /* If filesize is 0, then there would be no objects for mapping */
1553 if (fm_key.oa.o_size == 0) {
1554 fiemap->fm_mapped_extents = 0;
1558 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1560 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1562 CERROR("obd_get_info failed: rc = %d\n", rc);
1567 int ll_fid2path(struct obd_export *exp, void *arg)
1569 struct getinfo_fid2path *gfout, *gfin;
1573 /* Need to get the buflen */
1574 OBD_ALLOC_PTR(gfin);
1577 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1582 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1583 OBD_ALLOC(gfout, outsize);
1584 if (gfout == NULL) {
1588 memcpy(gfout, gfin, sizeof(*gfout));
1591 /* Call mdc_iocontrol */
1592 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1595 if (copy_to_user(arg, gfout, outsize))
1599 OBD_FREE(gfout, outsize);
1603 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1606 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1610 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1611 inode->i_generation, inode, cmd);
1612 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1614 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1615 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1619 case LL_IOC_GETFLAGS:
1620 /* Get the current value of the file flags */
1621 return put_user(fd->fd_flags, (int *)arg);
1622 case LL_IOC_SETFLAGS:
1623 case LL_IOC_CLRFLAGS:
1624 /* Set or clear specific file flags */
1625 /* XXX This probably needs checks to ensure the flags are
1626 * not abused, and to handle any flag side effects.
1628 if (get_user(flags, (int *) arg))
1631 if (cmd == LL_IOC_SETFLAGS) {
1632 if ((flags & LL_FILE_IGNORE_LOCK) &&
1633 !(file->f_flags & O_DIRECT)) {
1634 CERROR("%s: unable to disable locking on "
1635 "non-O_DIRECT file\n", current->comm);
1639 fd->fd_flags |= flags;
1641 fd->fd_flags &= ~flags;
1644 case LL_IOC_LOV_SETSTRIPE:
1645 RETURN(ll_lov_setstripe(inode, file, arg));
1646 case LL_IOC_LOV_SETEA:
1647 RETURN(ll_lov_setea(inode, file, arg));
1648 case LL_IOC_LOV_GETSTRIPE:
1649 RETURN(ll_lov_getstripe(inode, arg));
1650 case LL_IOC_RECREATE_OBJ:
1651 RETURN(ll_lov_recreate_obj(inode, file, arg));
1652 case FSFILT_IOC_FIEMAP: {
1653 struct ll_user_fiemap *fiemap_s;
1654 size_t num_bytes, ret_bytes;
1655 unsigned int extent_count;
1658 /* Get the extent count so we can calculate the size of
1659 * required fiemap buffer */
1660 if (get_user(extent_count,
1661 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1663 num_bytes = sizeof(*fiemap_s) + (extent_count *
1664 sizeof(struct ll_fiemap_extent));
1665 OBD_VMALLOC(fiemap_s, num_bytes);
1666 if (fiemap_s == NULL)
1669 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1671 GOTO(error, rc = -EFAULT);
1673 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1674 fiemap_s->fm_flags = fiemap_s->fm_flags &
1675 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1676 if (copy_to_user((char *)arg, fiemap_s,
1678 GOTO(error, rc = -EFAULT);
1680 GOTO(error, rc = -EBADR);
1683 /* If fm_extent_count is non-zero, read the first extent since
1684 * it is used to calculate end_offset and device from previous
1687 if (copy_from_user(&fiemap_s->fm_extents[0],
1688 (char __user *)arg + sizeof(*fiemap_s),
1689 sizeof(struct ll_fiemap_extent)))
1690 GOTO(error, rc = -EFAULT);
1693 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1696 rc = filemap_fdatawrite(inode->i_mapping);
1701 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1705 ret_bytes = sizeof(struct ll_user_fiemap);
1707 if (extent_count != 0)
1708 ret_bytes += (fiemap_s->fm_mapped_extents *
1709 sizeof(struct ll_fiemap_extent));
1711 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1715 OBD_VFREE(fiemap_s, num_bytes);
1718 case FSFILT_IOC_GETFLAGS:
1719 case FSFILT_IOC_SETFLAGS:
1720 RETURN(ll_iocontrol(inode, file, cmd, arg));
1721 case FSFILT_IOC_GETVERSION_OLD:
1722 case FSFILT_IOC_GETVERSION:
1723 RETURN(put_user(inode->i_generation, (int *)arg));
1724 case LL_IOC_GROUP_LOCK:
1725 RETURN(ll_get_grouplock(inode, file, arg));
1726 case LL_IOC_GROUP_UNLOCK:
1727 RETURN(ll_put_grouplock(inode, file, arg));
1728 case IOC_OBD_STATFS:
1729 RETURN(ll_obd_statfs(inode, (void *)arg));
1731 /* We need to special case any other ioctls we want to handle,
1732 * to send them to the MDS/OST as appropriate and to properly
1733 * network encode the arg field.
1734 case FSFILT_IOC_SETVERSION_OLD:
1735 case FSFILT_IOC_SETVERSION:
1737 case LL_IOC_FLUSHCTX:
1738 RETURN(ll_flush_ctx(inode));
1739 case LL_IOC_PATH2FID: {
1740 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1741 sizeof(struct lu_fid)))
1746 case OBD_IOC_FID2PATH:
1747 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1753 ll_iocontrol_call(inode, file, cmd, arg, &err))
1756 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1762 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1764 struct inode *inode = file->f_dentry->d_inode;
1767 retval = offset + ((origin == 2) ? i_size_read(inode) :
1768 (origin == 1) ? file->f_pos : 0);
1769 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1770 inode->i_ino, inode->i_generation, inode, retval, retval,
1771 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1772 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1774 if (origin == 2) { /* SEEK_END */
1775 int nonblock = 0, rc;
1777 if (file->f_flags & O_NONBLOCK)
1778 nonblock = LDLM_FL_BLOCK_NOWAIT;
1780 rc = cl_glimpse_size(inode);
1784 ll_inode_size_lock(inode, 0);
1785 offset += i_size_read(inode);
1786 ll_inode_size_unlock(inode, 0);
1787 } else if (origin == 1) { /* SEEK_CUR */
1788 offset += file->f_pos;
1792 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1793 if (offset != file->f_pos) {
1794 file->f_pos = offset;
1802 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1804 struct inode *inode = dentry->d_inode;
1805 struct ll_inode_info *lli = ll_i2info(inode);
1806 struct lov_stripe_md *lsm = lli->lli_smd;
1807 struct ptlrpc_request *req;
1808 struct obd_capa *oc;
1811 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1812 inode->i_generation, inode);
1813 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1815 /* fsync's caller has already called _fdata{sync,write}, we want
1816 * that IO to finish before calling the osc and mdc sync methods */
1817 rc = filemap_fdatawait(inode->i_mapping);
1819 /* catch async errors that were recorded back when async writeback
1820 * failed for pages in this mapping. */
1821 err = lli->lli_async_rc;
1822 lli->lli_async_rc = 0;
1826 err = lov_test_and_clear_async_rc(lsm);
1831 oc = ll_mdscapa_get(inode);
1832 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1838 ptlrpc_req_finished(req);
1845 RETURN(rc ? rc : -ENOMEM);
1847 oa->o_id = lsm->lsm_object_id;
1848 oa->o_gr = lsm->lsm_object_gr;
1849 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1850 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1851 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1854 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1855 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1856 0, OBD_OBJECT_EOF, oc);
1866 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1868 struct inode *inode = file->f_dentry->d_inode;
1869 struct ll_sb_info *sbi = ll_i2sbi(inode);
1870 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1871 .ei_cb_cp =ldlm_flock_completion_ast,
1872 .ei_cbdata = file_lock };
1873 struct md_op_data *op_data;
1874 struct lustre_handle lockh = {0};
1875 ldlm_policy_data_t flock;
1880 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1881 inode->i_ino, file_lock);
1883 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1885 if (file_lock->fl_flags & FL_FLOCK) {
1886 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1887 /* set missing params for flock() calls */
1888 file_lock->fl_end = OFFSET_MAX;
1889 file_lock->fl_pid = current->tgid;
1891 flock.l_flock.pid = file_lock->fl_pid;
1892 flock.l_flock.start = file_lock->fl_start;
1893 flock.l_flock.end = file_lock->fl_end;
1895 switch (file_lock->fl_type) {
1897 einfo.ei_mode = LCK_PR;
1900 /* An unlock request may or may not have any relation to
1901 * existing locks so we may not be able to pass a lock handle
1902 * via a normal ldlm_lock_cancel() request. The request may even
1903 * unlock a byte range in the middle of an existing lock. In
1904 * order to process an unlock request we need all of the same
1905 * information that is given with a normal read or write record
1906 * lock request. To avoid creating another ldlm unlock (cancel)
1907 * message we'll treat a LCK_NL flock request as an unlock. */
1908 einfo.ei_mode = LCK_NL;
1911 einfo.ei_mode = LCK_PW;
1914 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1929 flags = LDLM_FL_BLOCK_NOWAIT;
1935 flags = LDLM_FL_TEST_LOCK;
1936 /* Save the old mode so that if the mode in the lock changes we
1937 * can decrement the appropriate reader or writer refcount. */
1938 file_lock->fl_type = einfo.ei_mode;
1941 CERROR("unknown fcntl lock command: %d\n", cmd);
1945 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1946 LUSTRE_OPC_ANY, NULL);
1947 if (IS_ERR(op_data))
1948 RETURN(PTR_ERR(op_data));
1950 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
1951 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
1952 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1954 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
1955 op_data, &lockh, &flock, 0, NULL /* req */, flags);
1957 ll_finish_md_op_data(op_data);
1959 if ((file_lock->fl_flags & FL_FLOCK) &&
1960 (rc == 0 || file_lock->fl_type == F_UNLCK))
1961 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
1962 #ifdef HAVE_F_OP_FLOCK
1963 if ((file_lock->fl_flags & FL_POSIX) &&
1964 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
1965 !(flags & LDLM_FL_TEST_LOCK))
1966 posix_lock_file_wait(file, file_lock);
1972 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
1979 int ll_have_md_lock(struct inode *inode, __u64 bits)
1981 struct lustre_handle lockh;
1982 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
1990 fid = &ll_i2info(inode)->lli_fid;
1991 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
1993 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
1994 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
1995 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2001 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2002 struct lustre_handle *lockh)
2004 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2010 fid = &ll_i2info(inode)->lli_fid;
2011 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2013 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2014 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2015 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2019 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2020 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2021 * and return success */
2023 /* This path cannot be hit for regular files unless in
2024 * case of obscure races, so no need to to validate
2026 if (!S_ISREG(inode->i_mode) &&
2027 !S_ISDIR(inode->i_mode))
2032 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2040 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2043 struct inode *inode = dentry->d_inode;
2044 struct ptlrpc_request *req = NULL;
2045 struct ll_sb_info *sbi;
2046 struct obd_export *exp;
2051 CERROR("REPORT THIS LINE TO PETER\n");
2054 sbi = ll_i2sbi(inode);
2056 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2057 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2059 exp = ll_i2mdexp(inode);
2061 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2062 struct lookup_intent oit = { .it_op = IT_GETATTR };
2063 struct md_op_data *op_data;
2065 /* Call getattr by fid, so do not provide name at all. */
2066 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2067 dentry->d_inode, NULL, 0, 0,
2068 LUSTRE_OPC_ANY, NULL);
2069 if (IS_ERR(op_data))
2070 RETURN(PTR_ERR(op_data));
2072 oit.it_create_mode |= M_CHECK_STALE;
2073 rc = md_intent_lock(exp, op_data, NULL, 0,
2074 /* we are not interested in name
2077 ll_md_blocking_ast, 0);
2078 ll_finish_md_op_data(op_data);
2079 oit.it_create_mode &= ~M_CHECK_STALE;
2081 rc = ll_inode_revalidate_fini(inode, rc);
2085 rc = ll_revalidate_it_finish(req, &oit, dentry);
2087 ll_intent_release(&oit);
2091 /* Unlinked? Unhash dentry, so it is not picked up later by
2092 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2093 here to preserve get_cwd functionality on 2.6.
2095 if (!dentry->d_inode->i_nlink) {
2096 spin_lock(&dcache_lock);
2097 ll_drop_dentry(dentry);
2098 spin_unlock(&dcache_lock);
2101 ll_finish_locks(&oit, dentry);
2102 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2104 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2105 obd_valid valid = OBD_MD_FLGETATTR;
2106 struct obd_capa *oc;
2109 if (S_ISREG(inode->i_mode)) {
2110 rc = ll_get_max_mdsize(sbi, &ealen);
2113 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2115 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2116 * capa for this inode. Because we only keep capas of dirs
2118 oc = ll_mdscapa_get(inode);
2119 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2123 rc = ll_inode_revalidate_fini(inode, rc);
2127 rc = ll_prep_inode(&inode, req, NULL);
2130 ptlrpc_req_finished(req);
2134 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2139 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2140 MDS_INODELOCK_LOOKUP);
2142 /* if object not yet allocated, don't validate size */
2143 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2146 /* cl_glimpse_size will prefer locally cached writes if they extend
2150 rc = cl_glimpse_size(dentry->d_inode);
2155 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2156 struct lookup_intent *it, struct kstat *stat)
2158 struct inode *inode = de->d_inode;
2161 res = ll_inode_revalidate_it(de, it);
2162 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2167 stat->dev = inode->i_sb->s_dev;
2168 stat->ino = inode->i_ino;
2169 stat->mode = inode->i_mode;
2170 stat->nlink = inode->i_nlink;
2171 stat->uid = inode->i_uid;
2172 stat->gid = inode->i_gid;
2173 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2174 stat->atime = inode->i_atime;
2175 stat->mtime = inode->i_mtime;
2176 stat->ctime = inode->i_ctime;
2177 #ifdef HAVE_INODE_BLKSIZE
2178 stat->blksize = inode->i_blksize;
2180 stat->blksize = 1 << inode->i_blkbits;
2183 ll_inode_size_lock(inode, 0);
2184 stat->size = i_size_read(inode);
2185 stat->blocks = inode->i_blocks;
2186 ll_inode_size_unlock(inode, 0);
2190 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2192 struct lookup_intent it = { .it_op = IT_GETATTR };
2194 return ll_getattr_it(mnt, de, &it, stat);
2197 #ifdef HAVE_LINUX_FIEMAP_H
2198 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2199 __u64 start, __u64 len)
2202 struct ll_user_fiemap *fiemap = (struct ll_user_fiemap*)(
2203 fieinfo->fi_extents_start - sizeof(ll_user_fiemap));
2205 rc = ll_do_fiemap(inode, fiemap, sizeof(*fiemap) +
2206 fiemap->fm_extent_count *
2207 sizeof(struct ll_fiemap_extent));
2209 fieinfo->fi_flags = fiemap->fm_flags;
2210 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2218 int lustre_check_acl(struct inode *inode, int mask)
2220 #ifdef CONFIG_FS_POSIX_ACL
2221 struct ll_inode_info *lli = ll_i2info(inode);
2222 struct posix_acl *acl;
2226 spin_lock(&lli->lli_lock);
2227 acl = posix_acl_dup(lli->lli_posix_acl);
2228 spin_unlock(&lli->lli_lock);
2233 rc = posix_acl_permission(inode, acl, mask);
2234 posix_acl_release(acl);
2242 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2243 #ifndef HAVE_INODE_PERMISION_2ARGS
2244 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2246 int ll_inode_permission(struct inode *inode, int mask)
2252 /* as root inode are NOT getting validated in lookup operation,
2253 * need to do it before permission check. */
2255 if (inode == inode->i_sb->s_root->d_inode) {
2256 struct lookup_intent it = { .it_op = IT_LOOKUP };
2258 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2259 MDS_INODELOCK_LOOKUP);
2264 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2265 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2267 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2268 return lustre_check_remote_perm(inode, mask);
2270 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2271 rc = generic_permission(inode, mask, lustre_check_acl);
2276 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2278 int mode = inode->i_mode;
2281 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2282 inode->i_ino, inode->i_generation, inode, mask);
2284 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2285 return lustre_check_remote_perm(inode, mask);
2287 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2289 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2290 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2292 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2294 if (current->fsuid == inode->i_uid) {
2297 if (((mode >> 3) & mask & S_IRWXO) != mask)
2299 rc = lustre_check_acl(inode, mask);
2303 goto check_capabilities;
2307 if (in_group_p(inode->i_gid))
2310 if ((mode & mask & S_IRWXO) == mask)
2314 if (!(mask & MAY_EXEC) ||
2315 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2316 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2319 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2320 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2327 #ifdef HAVE_FILE_READV
2328 #define READ_METHOD readv
2329 #define READ_FUNCTION ll_file_readv
2330 #define WRITE_METHOD writev
2331 #define WRITE_FUNCTION ll_file_writev
2333 #define READ_METHOD aio_read
2334 #define READ_FUNCTION ll_file_aio_read
2335 #define WRITE_METHOD aio_write
2336 #define WRITE_FUNCTION ll_file_aio_write
2339 /* -o localflock - only provides locally consistent flock locks */
2340 struct file_operations ll_file_operations = {
2341 .read = ll_file_read,
2342 .READ_METHOD = READ_FUNCTION,
2343 .write = ll_file_write,
2344 .WRITE_METHOD = WRITE_FUNCTION,
2345 .ioctl = ll_file_ioctl,
2346 .open = ll_file_open,
2347 .release = ll_file_release,
2348 .mmap = ll_file_mmap,
2349 .llseek = ll_file_seek,
2350 #ifdef HAVE_KERNEL_SENDFILE
2351 .sendfile = ll_file_sendfile,
2353 #ifdef HAVE_KERNEL_SPLICE_READ
2354 .splice_read = ll_file_splice_read,
2359 struct file_operations ll_file_operations_flock = {
2360 .read = ll_file_read,
2361 .READ_METHOD = READ_FUNCTION,
2362 .write = ll_file_write,
2363 .WRITE_METHOD = WRITE_FUNCTION,
2364 .ioctl = ll_file_ioctl,
2365 .open = ll_file_open,
2366 .release = ll_file_release,
2367 .mmap = ll_file_mmap,
2368 .llseek = ll_file_seek,
2369 #ifdef HAVE_KERNEL_SENDFILE
2370 .sendfile = ll_file_sendfile,
2372 #ifdef HAVE_KERNEL_SPLICE_READ
2373 .splice_read = ll_file_splice_read,
2376 #ifdef HAVE_F_OP_FLOCK
2377 .flock = ll_file_flock,
2379 .lock = ll_file_flock
2382 /* These are for -o noflock - to return ENOSYS on flock calls */
2383 struct file_operations ll_file_operations_noflock = {
2384 .read = ll_file_read,
2385 .READ_METHOD = READ_FUNCTION,
2386 .write = ll_file_write,
2387 .WRITE_METHOD = WRITE_FUNCTION,
2388 .ioctl = ll_file_ioctl,
2389 .open = ll_file_open,
2390 .release = ll_file_release,
2391 .mmap = ll_file_mmap,
2392 .llseek = ll_file_seek,
2393 #ifdef HAVE_KERNEL_SENDFILE
2394 .sendfile = ll_file_sendfile,
2396 #ifdef HAVE_KERNEL_SPLICE_READ
2397 .splice_read = ll_file_splice_read,
2400 #ifdef HAVE_F_OP_FLOCK
2401 .flock = ll_file_noflock,
2403 .lock = ll_file_noflock
2406 struct inode_operations ll_file_inode_operations = {
2407 #ifdef HAVE_VFS_INTENT_PATCHES
2408 .setattr_raw = ll_setattr_raw,
2410 .setattr = ll_setattr,
2411 .truncate = ll_truncate,
2412 .getattr = ll_getattr,
2413 .permission = ll_inode_permission,
2414 .setxattr = ll_setxattr,
2415 .getxattr = ll_getxattr,
2416 .listxattr = ll_listxattr,
2417 .removexattr = ll_removexattr,
2418 #ifdef HAVE_LINUX_FIEMAP_H
2419 .fiemap = ll_fiemap,
2423 /* dynamic ioctl number support routins */
2424 static struct llioc_ctl_data {
2425 struct rw_semaphore ioc_sem;
2426 struct list_head ioc_head;
2428 __RWSEM_INITIALIZER(llioc.ioc_sem),
2429 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2434 struct list_head iocd_list;
2435 unsigned int iocd_size;
2436 llioc_callback_t iocd_cb;
2437 unsigned int iocd_count;
2438 unsigned int iocd_cmd[0];
2441 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2444 struct llioc_data *in_data = NULL;
2447 if (cb == NULL || cmd == NULL ||
2448 count > LLIOC_MAX_CMD || count < 0)
2451 size = sizeof(*in_data) + count * sizeof(unsigned int);
2452 OBD_ALLOC(in_data, size);
2453 if (in_data == NULL)
2456 memset(in_data, 0, sizeof(*in_data));
2457 in_data->iocd_size = size;
2458 in_data->iocd_cb = cb;
2459 in_data->iocd_count = count;
2460 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2462 down_write(&llioc.ioc_sem);
2463 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2464 up_write(&llioc.ioc_sem);
2469 void ll_iocontrol_unregister(void *magic)
2471 struct llioc_data *tmp;
2476 down_write(&llioc.ioc_sem);
2477 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2479 unsigned int size = tmp->iocd_size;
2481 list_del(&tmp->iocd_list);
2482 up_write(&llioc.ioc_sem);
2484 OBD_FREE(tmp, size);
2488 up_write(&llioc.ioc_sem);
2490 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2493 EXPORT_SYMBOL(ll_iocontrol_register);
2494 EXPORT_SYMBOL(ll_iocontrol_unregister);
2496 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2497 unsigned int cmd, unsigned long arg, int *rcp)
2499 enum llioc_iter ret = LLIOC_CONT;
2500 struct llioc_data *data;
2501 int rc = -EINVAL, i;
2503 down_read(&llioc.ioc_sem);
2504 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2505 for (i = 0; i < data->iocd_count; i++) {
2506 if (cmd != data->iocd_cmd[i])
2509 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2513 if (ret == LLIOC_STOP)
2516 up_read(&llioc.ioc_sem);