1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
87 * Closes the IO epoch and packs all the attributes into @op_data for
90 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
91 struct obd_client_handle *och)
95 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
96 ATTR_MTIME_SET | ATTR_CTIME_SET;
98 if (!(och->och_flags & FMODE_WRITE))
101 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
102 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
104 ll_ioepoch_close(inode, op_data, &och, 0);
107 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
108 ll_prep_md_op_data(op_data, inode, NULL, NULL,
109 0, 0, LUSTRE_OPC_ANY, NULL);
113 static int ll_close_inode_openhandle(struct obd_export *md_exp,
115 struct obd_client_handle *och)
117 struct obd_export *exp = ll_i2mdexp(inode);
118 struct md_op_data *op_data;
119 struct ptlrpc_request *req = NULL;
120 struct obd_device *obd = class_exp2obd(exp);
127 * XXX: in case of LMV, is this correct to access
130 CERROR("Invalid MDC connection handle "LPX64"\n",
131 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_som_update(inode, op_data);
149 CERROR("inode %lu mdc Size-on-MDS update failed: "
150 "rc = %d\n", inode->i_ino, rc);
154 CERROR("inode %lu mdc close failed: rc = %d\n",
157 ll_finish_md_op_data(op_data);
160 rc = ll_objects_destroy(req, inode);
162 CERROR("inode %lu ll_objects destroy: rc = %d\n",
169 if (exp_connect_som(exp) && !epoch_close &&
170 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
171 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
173 md_clear_open_replay_data(md_exp, och);
174 /* Free @och if it is not waiting for DONE_WRITING. */
175 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
178 if (req) /* This is close request */
179 ptlrpc_req_finished(req);
183 int ll_md_real_close(struct inode *inode, int flags)
185 struct ll_inode_info *lli = ll_i2info(inode);
186 struct obd_client_handle **och_p;
187 struct obd_client_handle *och;
192 if (flags & FMODE_WRITE) {
193 och_p = &lli->lli_mds_write_och;
194 och_usecount = &lli->lli_open_fd_write_count;
195 } else if (flags & FMODE_EXEC) {
196 och_p = &lli->lli_mds_exec_och;
197 och_usecount = &lli->lli_open_fd_exec_count;
199 LASSERT(flags & FMODE_READ);
200 och_p = &lli->lli_mds_read_och;
201 och_usecount = &lli->lli_open_fd_read_count;
204 cfs_down(&lli->lli_och_sem);
205 if (*och_usecount) { /* There are still users of this handle, so
207 cfs_up(&lli->lli_och_sem);
212 cfs_up(&lli->lli_och_sem);
214 if (och) { /* There might be a race and somebody have freed this och
216 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
223 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
226 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
227 struct ll_inode_info *lli = ll_i2info(inode);
231 /* clear group lock, if present */
232 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
233 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
235 /* Let's see if we have good enough OPEN lock on the file and if
236 we can skip talking to MDS */
237 if (file->f_dentry->d_inode) { /* Can this ever be false? */
239 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
240 struct lustre_handle lockh;
241 struct inode *inode = file->f_dentry->d_inode;
242 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
244 cfs_down(&lli->lli_och_sem);
245 if (fd->fd_omode & FMODE_WRITE) {
247 LASSERT(lli->lli_open_fd_write_count);
248 lli->lli_open_fd_write_count--;
249 } else if (fd->fd_omode & FMODE_EXEC) {
251 LASSERT(lli->lli_open_fd_exec_count);
252 lli->lli_open_fd_exec_count--;
255 LASSERT(lli->lli_open_fd_read_count);
256 lli->lli_open_fd_read_count--;
258 cfs_up(&lli->lli_och_sem);
260 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
261 LDLM_IBITS, &policy, lockmode,
263 rc = ll_md_real_close(file->f_dentry->d_inode,
267 CERROR("Releasing a file %p with negative dentry %p. Name %s",
268 file, file->f_dentry, file->f_dentry->d_name.name);
271 LUSTRE_FPRIVATE(file) = NULL;
272 ll_file_data_put(fd);
273 ll_capa_close(inode);
278 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
280 /* While this returns an error code, fput() the caller does not, so we need
281 * to make every effort to clean up all of our state here. Also, applications
282 * rarely check close errors and even if an error is returned they will not
283 * re-try the close call.
285 int ll_file_release(struct inode *inode, struct file *file)
287 struct ll_file_data *fd;
288 struct ll_sb_info *sbi = ll_i2sbi(inode);
289 struct ll_inode_info *lli = ll_i2info(inode);
290 struct lov_stripe_md *lsm = lli->lli_smd;
294 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
295 inode->i_generation, inode);
297 #ifdef CONFIG_FS_POSIX_ACL
298 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
299 inode == inode->i_sb->s_root->d_inode) {
300 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
303 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
304 fd->fd_flags &= ~LL_FILE_RMTACL;
305 rct_del(&sbi->ll_rct, cfs_curproc_pid());
306 et_search_free(&sbi->ll_et, cfs_curproc_pid());
311 if (inode->i_sb->s_root != file->f_dentry)
312 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
313 fd = LUSTRE_FPRIVATE(file);
316 /* The last ref on @file, maybe not the the owner pid of statahead.
317 * Different processes can open the same dir, "ll_opendir_key" means:
318 * it is me that should stop the statahead thread. */
319 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
320 ll_stop_statahead(inode, lli->lli_opendir_key);
322 if (inode->i_sb->s_root == file->f_dentry) {
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
329 lov_test_and_clear_async_rc(lsm);
330 lli->lli_async_rc = 0;
332 rc = ll_md_close(sbi->ll_md_exp, inode, file);
334 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
335 libcfs_debug_dumplog();
340 static int ll_intent_file_open(struct file *file, void *lmm,
341 int lmmsize, struct lookup_intent *itp)
343 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
344 struct dentry *parent = file->f_dentry->d_parent;
345 const char *name = file->f_dentry->d_name.name;
346 const int len = file->f_dentry->d_name.len;
347 struct md_op_data *op_data;
348 struct ptlrpc_request *req;
355 /* Usually we come here only for NFSD, and we want open lock.
356 But we can also get here with pre 2.6.15 patchless kernels, and in
357 that case that lock is also ok */
358 /* We can also get here if there was cached open handle in revalidate_it
359 * but it disappeared while we were getting from there to ll_file_open.
360 * But this means this file was closed and immediatelly opened which
361 * makes a good candidate for using OPEN lock */
362 /* If lmmsize & lmm are not 0, we are just setting stripe info
363 * parameters. No need for the open lock */
364 if (!lmm && !lmmsize)
365 itp->it_flags |= MDS_OPEN_LOCK;
367 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
368 file->f_dentry->d_inode, name, len,
369 O_RDWR, LUSTRE_OPC_ANY, NULL);
371 RETURN(PTR_ERR(op_data));
373 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
374 0 /*unused */, &req, ll_md_blocking_ast, 0);
375 ll_finish_md_op_data(op_data);
377 /* reason for keep own exit path - don`t flood log
378 * with messages with -ESTALE errors.
380 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
381 it_open_error(DISP_OPEN_OPEN, itp))
383 ll_release_openhandle(file->f_dentry, itp);
387 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
388 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
389 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
393 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
394 if (!rc && itp->d.lustre.it_lock_mode)
395 md_set_lock_data(sbi->ll_md_exp,
396 &itp->d.lustre.it_lock_handle,
397 file->f_dentry->d_inode, NULL);
400 ptlrpc_req_finished(itp->d.lustre.it_data);
401 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
402 ll_intent_drop_lock(itp);
408 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
409 * not believe attributes if a few ioepoch holders exist. Attributes for
410 * previous ioepoch if new one is opened are also skipped by MDS.
412 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
414 if (ioepoch && lli->lli_ioepoch != ioepoch) {
415 lli->lli_ioepoch = ioepoch;
416 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
417 ioepoch, PFID(&lli->lli_fid));
421 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
422 struct lookup_intent *it, struct obd_client_handle *och)
424 struct ptlrpc_request *req = it->d.lustre.it_data;
425 struct mdt_body *body;
429 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
430 LASSERT(body != NULL); /* reply already checked out */
432 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
433 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
434 och->och_fid = lli->lli_fid;
435 och->och_flags = it->it_flags;
436 ll_ioepoch_open(lli, body->ioepoch);
438 return md_set_open_replay_data(md_exp, och, req);
441 int ll_local_open(struct file *file, struct lookup_intent *it,
442 struct ll_file_data *fd, struct obd_client_handle *och)
444 struct inode *inode = file->f_dentry->d_inode;
445 struct ll_inode_info *lli = ll_i2info(inode);
448 LASSERT(!LUSTRE_FPRIVATE(file));
453 struct ptlrpc_request *req = it->d.lustre.it_data;
454 struct mdt_body *body;
457 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
461 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
462 if ((it->it_flags & FMODE_WRITE) &&
463 (body->valid & OBD_MD_FLSIZE))
464 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
465 lli->lli_ioepoch, PFID(&lli->lli_fid));
468 LUSTRE_FPRIVATE(file) = fd;
469 ll_readahead_init(inode, &fd->fd_ras);
470 fd->fd_omode = it->it_flags;
474 /* Open a file, and (for the very first open) create objects on the OSTs at
475 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
476 * creation or open until ll_lov_setstripe() ioctl is called. We grab
477 * lli_open_sem to ensure no other process will create objects, send the
478 * stripe MD to the MDS, or try to destroy the objects if that fails.
480 * If we already have the stripe MD locally then we don't request it in
481 * md_open(), by passing a lmm_size = 0.
483 * It is up to the application to ensure no other processes open this file
484 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
485 * used. We might be able to avoid races of that sort by getting lli_open_sem
486 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
487 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
489 int ll_file_open(struct inode *inode, struct file *file)
491 struct ll_inode_info *lli = ll_i2info(inode);
492 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
493 .it_flags = file->f_flags };
494 struct lov_stripe_md *lsm;
495 struct ptlrpc_request *req = NULL;
496 struct obd_client_handle **och_p;
498 struct ll_file_data *fd;
499 int rc = 0, opendir_set = 0;
502 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
503 inode->i_generation, inode, file->f_flags);
505 #ifdef HAVE_VFS_INTENT_PATCHES
508 it = file->private_data; /* XXX: compat macro */
509 file->private_data = NULL; /* prevent ll_local_open assertion */
512 fd = ll_file_data_get();
517 if (S_ISDIR(inode->i_mode)) {
518 cfs_spin_lock(&lli->lli_lock);
519 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
520 LASSERT(lli->lli_sai == NULL);
521 lli->lli_opendir_key = fd;
522 lli->lli_opendir_pid = cfs_curproc_pid();
525 cfs_spin_unlock(&lli->lli_lock);
528 if (inode->i_sb->s_root == file->f_dentry) {
529 LUSTRE_FPRIVATE(file) = fd;
533 if (!it || !it->d.lustre.it_disposition) {
534 /* Convert f_flags into access mode. We cannot use file->f_mode,
535 * because everything but O_ACCMODE mask was stripped from
537 if ((oit.it_flags + 1) & O_ACCMODE)
539 if (file->f_flags & O_TRUNC)
540 oit.it_flags |= FMODE_WRITE;
542 /* kernel only call f_op->open in dentry_open. filp_open calls
543 * dentry_open after call to open_namei that checks permissions.
544 * Only nfsd_open call dentry_open directly without checking
545 * permissions and because of that this code below is safe. */
546 if (oit.it_flags & FMODE_WRITE)
547 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
549 /* We do not want O_EXCL here, presumably we opened the file
550 * already? XXX - NFS implications? */
551 oit.it_flags &= ~O_EXCL;
553 /* bug20584, if "it_flags" contains O_CREAT, the file will be
554 * created if necessary, then "IT_CREAT" should be set to keep
555 * consistent with it */
556 if (oit.it_flags & O_CREAT)
557 oit.it_op |= IT_CREAT;
563 /* Let's see if we have file open on MDS already. */
564 if (it->it_flags & FMODE_WRITE) {
565 och_p = &lli->lli_mds_write_och;
566 och_usecount = &lli->lli_open_fd_write_count;
567 } else if (it->it_flags & FMODE_EXEC) {
568 och_p = &lli->lli_mds_exec_och;
569 och_usecount = &lli->lli_open_fd_exec_count;
571 och_p = &lli->lli_mds_read_och;
572 och_usecount = &lli->lli_open_fd_read_count;
575 cfs_down(&lli->lli_och_sem);
576 if (*och_p) { /* Open handle is present */
577 if (it_disposition(it, DISP_OPEN_OPEN)) {
578 /* Well, there's extra open request that we do not need,
579 let's close it somehow. This will decref request. */
580 rc = it_open_error(DISP_OPEN_OPEN, it);
582 cfs_up(&lli->lli_och_sem);
583 ll_file_data_put(fd);
584 GOTO(out_openerr, rc);
586 ll_release_openhandle(file->f_dentry, it);
587 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
592 rc = ll_local_open(file, it, fd, NULL);
595 cfs_up(&lli->lli_och_sem);
596 ll_file_data_put(fd);
597 GOTO(out_openerr, rc);
600 LASSERT(*och_usecount == 0);
601 if (!it->d.lustre.it_disposition) {
602 /* We cannot just request lock handle now, new ELC code
603 means that one of other OPEN locks for this file
604 could be cancelled, and since blocking ast handler
605 would attempt to grab och_sem as well, that would
606 result in a deadlock */
607 cfs_up(&lli->lli_och_sem);
608 it->it_create_mode |= M_CHECK_STALE;
609 rc = ll_intent_file_open(file, NULL, 0, it);
610 it->it_create_mode &= ~M_CHECK_STALE;
612 ll_file_data_put(fd);
613 GOTO(out_openerr, rc);
616 /* Got some error? Release the request */
617 if (it->d.lustre.it_status < 0) {
618 req = it->d.lustre.it_data;
619 ptlrpc_req_finished(req);
623 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
625 ll_file_data_put(fd);
626 GOTO(out_och_free, rc = -ENOMEM);
629 req = it->d.lustre.it_data;
631 /* md_intent_lock() didn't get a request ref if there was an
632 * open error, so don't do cleanup on the request here
634 /* XXX (green): Should not we bail out on any error here, not
635 * just open error? */
636 rc = it_open_error(DISP_OPEN_OPEN, it);
638 ll_file_data_put(fd);
639 GOTO(out_och_free, rc);
642 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
643 rc = ll_local_open(file, it, fd, *och_p);
645 ll_file_data_put(fd);
646 GOTO(out_och_free, rc);
649 cfs_up(&lli->lli_och_sem);
651 /* Must do this outside lli_och_sem lock to prevent deadlock where
652 different kind of OPEN lock for this same inode gets cancelled
653 by ldlm_cancel_lru */
654 if (!S_ISREG(inode->i_mode))
661 if (file->f_flags & O_LOV_DELAY_CREATE ||
662 !(file->f_mode & FMODE_WRITE)) {
663 CDEBUG(D_INODE, "object creation was delayed\n");
667 file->f_flags &= ~O_LOV_DELAY_CREATE;
670 ptlrpc_req_finished(req);
672 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
676 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
677 *och_p = NULL; /* OBD_FREE writes some magic there */
680 cfs_up(&lli->lli_och_sem);
682 if (opendir_set != 0)
683 ll_stop_statahead(inode, lli->lli_opendir_key);
689 /* Fills the obdo with the attributes for the lsm */
690 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
691 struct obd_capa *capa, struct obdo *obdo,
692 __u64 ioepoch, int sync)
694 struct ptlrpc_request_set *set;
695 struct obd_info oinfo = { { { 0 } } };
700 LASSERT(lsm != NULL);
704 oinfo.oi_oa->o_id = lsm->lsm_object_id;
705 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
706 oinfo.oi_oa->o_mode = S_IFREG;
707 oinfo.oi_oa->o_ioepoch = ioepoch;
708 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
709 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
710 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
711 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
712 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
713 oinfo.oi_capa = capa;
715 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
716 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
719 set = ptlrpc_prep_set();
721 CERROR("can't allocate ptlrpc set\n");
724 rc = obd_getattr_async(exp, &oinfo, set);
726 rc = ptlrpc_set_wait(set);
727 ptlrpc_set_destroy(set);
730 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
731 OBD_MD_FLATIME | OBD_MD_FLMTIME |
732 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
737 * Performs the getattr on the inode and updates its fields.
738 * If @sync != 0, perform the getattr under the server-side lock.
740 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
741 __u64 ioepoch, int sync)
743 struct ll_inode_info *lli = ll_i2info(inode);
744 struct obd_capa *capa = ll_mdscapa_get(inode);
748 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
749 capa, obdo, ioepoch, sync);
752 obdo_refresh_inode(inode, obdo, obdo->o_valid);
754 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
755 lli->lli_smd->lsm_object_id, i_size_read(inode),
756 (unsigned long long)inode->i_blocks,
757 (unsigned long)ll_inode_blksize(inode));
762 int ll_merge_lvb(struct inode *inode)
764 struct ll_inode_info *lli = ll_i2info(inode);
765 struct ll_sb_info *sbi = ll_i2sbi(inode);
771 ll_inode_size_lock(inode, 1);
772 inode_init_lvb(inode, &lvb);
773 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
774 cl_isize_write_nolock(inode, lvb.lvb_size);
775 inode->i_blocks = lvb.lvb_blocks;
777 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
778 LTIME_S(inode->i_atime) = lvb.lvb_atime;
779 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
780 ll_inode_size_unlock(inode, 1);
785 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
788 struct obdo obdo = { 0 };
791 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
793 st->st_size = obdo.o_size;
794 st->st_blocks = obdo.o_blocks;
795 st->st_mtime = obdo.o_mtime;
796 st->st_atime = obdo.o_atime;
797 st->st_ctime = obdo.o_ctime;
802 void ll_io_init(struct cl_io *io, const struct file *file, int write)
804 struct inode *inode = file->f_dentry->d_inode;
806 memset(io, 0, sizeof *io);
807 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
809 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
810 io->ci_obj = ll_i2info(inode)->lli_clob;
811 io->ci_lockreq = CILR_MAYBE;
812 if (ll_file_nolock(file)) {
813 io->ci_lockreq = CILR_NEVER;
814 io->ci_no_srvlock = 1;
815 } else if (file->f_flags & O_APPEND) {
816 io->ci_lockreq = CILR_MANDATORY;
820 static ssize_t ll_file_io_generic(const struct lu_env *env,
821 struct vvp_io_args *args, struct file *file,
822 enum cl_io_type iot, loff_t *ppos, size_t count)
828 io = &ccc_env_info(env)->cti_io;
829 ll_io_init(io, file, iot == CIT_WRITE);
831 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
832 struct vvp_io *vio = vvp_env_io(env);
833 struct ccc_io *cio = ccc_env_io(env);
834 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
835 int write_sem_locked = 0;
837 cio->cui_fd = LUSTRE_FPRIVATE(file);
838 vio->cui_io_subtype = args->via_io_subtype;
840 switch (vio->cui_io_subtype) {
842 cio->cui_iov = args->u.normal.via_iov;
843 cio->cui_nrsegs = args->u.normal.via_nrsegs;
844 #ifndef HAVE_FILE_WRITEV
845 cio->cui_iocb = args->u.normal.via_iocb;
847 if ((iot == CIT_WRITE) &&
848 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
849 cfs_down(&lli->lli_write_sem);
850 write_sem_locked = 1;
854 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
855 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
858 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
859 vio->u.splice.cui_flags = args->u.splice.via_flags;
862 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
865 result = cl_io_loop(env, io);
866 if (write_sem_locked)
867 cfs_up(&lli->lli_write_sem);
869 /* cl_io_rw_init() handled IO */
870 result = io->ci_result;
873 if (io->ci_nob > 0) {
875 *ppos = io->u.ci_wr.wr.crw_pos;
883 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
885 static int ll_file_get_iov_count(const struct iovec *iov,
886 unsigned long *nr_segs, size_t *count)
891 for (seg = 0; seg < *nr_segs; seg++) {
892 const struct iovec *iv = &iov[seg];
895 * If any segment has a negative length, or the cumulative
896 * length ever wraps negative then return -EINVAL.
899 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
901 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
906 cnt -= iv->iov_len; /* This segment is no good */
913 #ifdef HAVE_FILE_READV
914 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
915 unsigned long nr_segs, loff_t *ppos)
918 struct vvp_io_args *args;
924 result = ll_file_get_iov_count(iov, &nr_segs, &count);
928 env = cl_env_get(&refcheck);
930 RETURN(PTR_ERR(env));
932 args = vvp_env_args(env, IO_NORMAL);
933 args->u.normal.via_iov = (struct iovec *)iov;
934 args->u.normal.via_nrsegs = nr_segs;
936 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
937 cl_env_put(env, &refcheck);
941 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
945 struct iovec *local_iov;
950 env = cl_env_get(&refcheck);
952 RETURN(PTR_ERR(env));
954 local_iov = &vvp_env_info(env)->vti_local_iov;
955 local_iov->iov_base = (void __user *)buf;
956 local_iov->iov_len = count;
957 result = ll_file_readv(file, local_iov, 1, ppos);
958 cl_env_put(env, &refcheck);
963 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
964 unsigned long nr_segs, loff_t pos)
967 struct vvp_io_args *args;
973 result = ll_file_get_iov_count(iov, &nr_segs, &count);
977 env = cl_env_get(&refcheck);
979 RETURN(PTR_ERR(env));
981 args = vvp_env_args(env, IO_NORMAL);
982 args->u.normal.via_iov = (struct iovec *)iov;
983 args->u.normal.via_nrsegs = nr_segs;
984 args->u.normal.via_iocb = iocb;
986 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
987 &iocb->ki_pos, count);
988 cl_env_put(env, &refcheck);
992 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
996 struct iovec *local_iov;
1002 env = cl_env_get(&refcheck);
1004 RETURN(PTR_ERR(env));
1006 local_iov = &vvp_env_info(env)->vti_local_iov;
1007 kiocb = &vvp_env_info(env)->vti_kiocb;
1008 local_iov->iov_base = (void __user *)buf;
1009 local_iov->iov_len = count;
1010 init_sync_kiocb(kiocb, file);
1011 kiocb->ki_pos = *ppos;
1012 kiocb->ki_left = count;
1014 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1015 *ppos = kiocb->ki_pos;
1017 cl_env_put(env, &refcheck);
1023 * Write to a file (through the page cache).
1025 #ifdef HAVE_FILE_WRITEV
1026 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1027 unsigned long nr_segs, loff_t *ppos)
1030 struct vvp_io_args *args;
1036 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1040 env = cl_env_get(&refcheck);
1042 RETURN(PTR_ERR(env));
1044 args = vvp_env_args(env, IO_NORMAL);
1045 args->u.normal.via_iov = (struct iovec *)iov;
1046 args->u.normal.via_nrsegs = nr_segs;
1048 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1049 cl_env_put(env, &refcheck);
1053 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1057 struct iovec *local_iov;
1062 env = cl_env_get(&refcheck);
1064 RETURN(PTR_ERR(env));
1066 local_iov = &vvp_env_info(env)->vti_local_iov;
1067 local_iov->iov_base = (void __user *)buf;
1068 local_iov->iov_len = count;
1070 result = ll_file_writev(file, local_iov, 1, ppos);
1071 cl_env_put(env, &refcheck);
1075 #else /* AIO stuff */
1076 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1077 unsigned long nr_segs, loff_t pos)
1080 struct vvp_io_args *args;
1086 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1090 env = cl_env_get(&refcheck);
1092 RETURN(PTR_ERR(env));
1094 args = vvp_env_args(env, IO_NORMAL);
1095 args->u.normal.via_iov = (struct iovec *)iov;
1096 args->u.normal.via_nrsegs = nr_segs;
1097 args->u.normal.via_iocb = iocb;
1099 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1100 &iocb->ki_pos, count);
1101 cl_env_put(env, &refcheck);
1105 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1109 struct iovec *local_iov;
1110 struct kiocb *kiocb;
1115 env = cl_env_get(&refcheck);
1117 RETURN(PTR_ERR(env));
1119 local_iov = &vvp_env_info(env)->vti_local_iov;
1120 kiocb = &vvp_env_info(env)->vti_kiocb;
1121 local_iov->iov_base = (void __user *)buf;
1122 local_iov->iov_len = count;
1123 init_sync_kiocb(kiocb, file);
1124 kiocb->ki_pos = *ppos;
1125 kiocb->ki_left = count;
1127 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1128 *ppos = kiocb->ki_pos;
1130 cl_env_put(env, &refcheck);
1136 #ifdef HAVE_KERNEL_SENDFILE
1138 * Send file content (through pagecache) somewhere with helper
1140 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1141 read_actor_t actor, void *target)
1144 struct vvp_io_args *args;
1149 env = cl_env_get(&refcheck);
1151 RETURN(PTR_ERR(env));
1153 args = vvp_env_args(env, IO_SENDFILE);
1154 args->u.sendfile.via_target = target;
1155 args->u.sendfile.via_actor = actor;
1157 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1158 cl_env_put(env, &refcheck);
1163 #ifdef HAVE_KERNEL_SPLICE_READ
1165 * Send file content (through pagecache) somewhere with helper
1167 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1168 struct pipe_inode_info *pipe, size_t count,
1172 struct vvp_io_args *args;
1177 env = cl_env_get(&refcheck);
1179 RETURN(PTR_ERR(env));
1181 args = vvp_env_args(env, IO_SPLICE);
1182 args->u.splice.via_pipe = pipe;
1183 args->u.splice.via_flags = flags;
1185 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1186 cl_env_put(env, &refcheck);
1191 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1194 struct obd_export *exp = ll_i2dtexp(inode);
1195 struct ll_recreate_obj ucreatp;
1196 struct obd_trans_info oti = { 0 };
1197 struct obdo *oa = NULL;
1200 struct lov_stripe_md *lsm, *lsm2;
1203 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1206 if (cfs_copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1207 sizeof(struct ll_recreate_obj)))
1214 ll_inode_size_lock(inode, 0);
1215 lsm = ll_i2info(inode)->lli_smd;
1217 GOTO(out, rc = -ENOENT);
1218 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1219 (lsm->lsm_stripe_count));
1221 OBD_ALLOC(lsm2, lsm_size);
1223 GOTO(out, rc = -ENOMEM);
1225 oa->o_id = ucreatp.lrc_id;
1226 oa->o_gr = ucreatp.lrc_group;
1227 oa->o_nlink = ucreatp.lrc_ost_idx;
1228 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1229 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1230 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1231 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1233 memcpy(lsm2, lsm, lsm_size);
1234 rc = obd_create(exp, oa, &lsm2, &oti);
1236 OBD_FREE(lsm2, lsm_size);
1239 ll_inode_size_unlock(inode, 0);
1244 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1245 int flags, struct lov_user_md *lum, int lum_size)
1247 struct lov_stripe_md *lsm;
1248 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1252 ll_inode_size_lock(inode, 0);
1253 lsm = ll_i2info(inode)->lli_smd;
1255 ll_inode_size_unlock(inode, 0);
1256 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1261 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1264 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1265 GOTO(out_req_free, rc = -ENOENT);
1266 rc = oit.d.lustre.it_status;
1268 GOTO(out_req_free, rc);
1270 ll_release_openhandle(file->f_dentry, &oit);
1273 ll_inode_size_unlock(inode, 0);
1274 ll_intent_release(&oit);
1277 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1281 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1282 struct lov_mds_md **lmmp, int *lmm_size,
1283 struct ptlrpc_request **request)
1285 struct ll_sb_info *sbi = ll_i2sbi(inode);
1286 struct mdt_body *body;
1287 struct lov_mds_md *lmm = NULL;
1288 struct ptlrpc_request *req = NULL;
1289 struct md_op_data *op_data;
1292 rc = ll_get_max_mdsize(sbi, &lmmsize);
1296 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1297 strlen(filename), lmmsize,
1298 LUSTRE_OPC_ANY, NULL);
1299 if (op_data == NULL)
1302 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1303 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1304 ll_finish_md_op_data(op_data);
1306 CDEBUG(D_INFO, "md_getattr_name failed "
1307 "on %s: rc %d\n", filename, rc);
1311 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1312 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1314 lmmsize = body->eadatasize;
1316 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1318 GOTO(out, rc = -ENODATA);
1321 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1322 LASSERT(lmm != NULL);
1324 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1325 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1326 GOTO(out, rc = -EPROTO);
1330 * This is coming from the MDS, so is probably in
1331 * little endian. We convert it to host endian before
1332 * passing it to userspace.
1334 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1335 /* if function called for directory - we should
1336 * avoid swab not existent lsm objects */
1337 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1338 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1339 if (S_ISREG(body->mode))
1340 lustre_swab_lov_user_md_objects(
1341 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1342 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1343 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1344 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1345 if (S_ISREG(body->mode))
1346 lustre_swab_lov_user_md_objects(
1347 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1348 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1354 *lmm_size = lmmsize;
1359 static int ll_lov_setea(struct inode *inode, struct file *file,
1362 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1363 struct lov_user_md *lump;
1364 int lum_size = sizeof(struct lov_user_md) +
1365 sizeof(struct lov_user_ost_data);
1369 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1372 OBD_ALLOC(lump, lum_size);
1376 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1377 OBD_FREE(lump, lum_size);
1381 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1383 OBD_FREE(lump, lum_size);
1387 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1390 struct lov_user_md_v3 lumv3;
1391 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1392 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1393 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1396 int flags = FMODE_WRITE;
1399 /* first try with v1 which is smaller than v3 */
1400 lum_size = sizeof(struct lov_user_md_v1);
1401 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1404 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1405 lum_size = sizeof(struct lov_user_md_v3);
1406 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1410 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1412 put_user(0, &lumv1p->lmm_stripe_count);
1413 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1414 0, ll_i2info(inode)->lli_smd,
1420 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1422 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1427 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1431 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1433 struct ll_inode_info *lli = ll_i2info(inode);
1434 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1435 struct ccc_grouplock grouplock;
1439 if (ll_file_nolock(file))
1440 RETURN(-EOPNOTSUPP);
1442 cfs_spin_lock(&lli->lli_lock);
1443 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1444 CWARN("group lock already existed with gid %lu\n",
1445 fd->fd_grouplock.cg_gid);
1446 cfs_spin_unlock(&lli->lli_lock);
1449 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1450 cfs_spin_unlock(&lli->lli_lock);
1452 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1453 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1457 cfs_spin_lock(&lli->lli_lock);
1458 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1459 cfs_spin_unlock(&lli->lli_lock);
1460 CERROR("another thread just won the race\n");
1461 cl_put_grouplock(&grouplock);
1465 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1466 fd->fd_grouplock = grouplock;
1467 cfs_spin_unlock(&lli->lli_lock);
1469 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1473 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1475 struct ll_inode_info *lli = ll_i2info(inode);
1476 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1477 struct ccc_grouplock grouplock;
1480 cfs_spin_lock(&lli->lli_lock);
1481 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1482 cfs_spin_unlock(&lli->lli_lock);
1483 CWARN("no group lock held\n");
1486 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1488 if (fd->fd_grouplock.cg_gid != arg) {
1489 CWARN("group lock %lu doesn't match current id %lu\n",
1490 arg, fd->fd_grouplock.cg_gid);
1491 cfs_spin_unlock(&lli->lli_lock);
1495 grouplock = fd->fd_grouplock;
1496 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1497 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1498 cfs_spin_unlock(&lli->lli_lock);
1500 cl_put_grouplock(&grouplock);
1501 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1506 * Close inode open handle
1508 * \param dentry [in] dentry which contains the inode
1509 * \param it [in,out] intent which contains open info and result
1512 * \retval <0 failure
1514 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1516 struct inode *inode = dentry->d_inode;
1517 struct obd_client_handle *och;
1523 /* Root ? Do nothing. */
1524 if (dentry->d_inode->i_sb->s_root == dentry)
1527 /* No open handle to close? Move away */
1528 if (!it_disposition(it, DISP_OPEN_OPEN))
1531 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1533 OBD_ALLOC(och, sizeof(*och));
1535 GOTO(out, rc = -ENOMEM);
1537 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1538 ll_i2info(inode), it, och);
1540 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1543 /* this one is in place of ll_file_open */
1544 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1545 ptlrpc_req_finished(it->d.lustre.it_data);
1546 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1551 * Get size for inode for which FIEMAP mapping is requested.
1552 * Make the FIEMAP get_info call and returns the result.
1554 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1557 struct obd_export *exp = ll_i2dtexp(inode);
1558 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1559 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1560 int vallen = num_bytes;
1564 /* Checks for fiemap flags */
1565 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1566 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1570 /* Check for FIEMAP_FLAG_SYNC */
1571 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1572 rc = filemap_fdatawrite(inode->i_mapping);
1577 /* If the stripe_count > 1 and the application does not understand
1578 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1580 if (lsm->lsm_stripe_count > 1 &&
1581 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1584 fm_key.oa.o_id = lsm->lsm_object_id;
1585 fm_key.oa.o_gr = lsm->lsm_object_gr;
1586 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1588 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1591 /* If filesize is 0, then there would be no objects for mapping */
1592 if (fm_key.oa.o_size == 0) {
1593 fiemap->fm_mapped_extents = 0;
1597 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1599 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1601 CERROR("obd_get_info failed: rc = %d\n", rc);
1606 int ll_fid2path(struct obd_export *exp, void *arg)
1608 struct getinfo_fid2path *gfout, *gfin;
1612 /* Need to get the buflen */
1613 OBD_ALLOC_PTR(gfin);
1616 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1621 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1622 OBD_ALLOC(gfout, outsize);
1623 if (gfout == NULL) {
1627 memcpy(gfout, gfin, sizeof(*gfout));
1630 /* Call mdc_iocontrol */
1631 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1634 if (cfs_copy_to_user(arg, gfout, outsize))
1638 OBD_FREE(gfout, outsize);
1642 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1644 struct ll_user_fiemap *fiemap_s;
1645 size_t num_bytes, ret_bytes;
1646 unsigned int extent_count;
1649 /* Get the extent count so we can calculate the size of
1650 * required fiemap buffer */
1651 if (get_user(extent_count,
1652 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1654 num_bytes = sizeof(*fiemap_s) + (extent_count *
1655 sizeof(struct ll_fiemap_extent));
1657 OBD_VMALLOC(fiemap_s, num_bytes);
1658 if (fiemap_s == NULL)
1661 /* get the fiemap value */
1662 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1664 GOTO(error, rc = -EFAULT);
1666 /* If fm_extent_count is non-zero, read the first extent since
1667 * it is used to calculate end_offset and device from previous
1670 if (copy_from_user(&fiemap_s->fm_extents[0],
1671 (char __user *)arg + sizeof(*fiemap_s),
1672 sizeof(struct ll_fiemap_extent)))
1673 GOTO(error, rc = -EFAULT);
1676 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1680 ret_bytes = sizeof(struct ll_user_fiemap);
1682 if (extent_count != 0)
1683 ret_bytes += (fiemap_s->fm_mapped_extents *
1684 sizeof(struct ll_fiemap_extent));
1686 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1690 OBD_VFREE(fiemap_s, num_bytes);
1694 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1697 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1701 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1702 inode->i_generation, inode, cmd);
1703 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1705 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1706 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1710 case LL_IOC_GETFLAGS:
1711 /* Get the current value of the file flags */
1712 return put_user(fd->fd_flags, (int *)arg);
1713 case LL_IOC_SETFLAGS:
1714 case LL_IOC_CLRFLAGS:
1715 /* Set or clear specific file flags */
1716 /* XXX This probably needs checks to ensure the flags are
1717 * not abused, and to handle any flag side effects.
1719 if (get_user(flags, (int *) arg))
1722 if (cmd == LL_IOC_SETFLAGS) {
1723 if ((flags & LL_FILE_IGNORE_LOCK) &&
1724 !(file->f_flags & O_DIRECT)) {
1725 CERROR("%s: unable to disable locking on "
1726 "non-O_DIRECT file\n", current->comm);
1730 fd->fd_flags |= flags;
1732 fd->fd_flags &= ~flags;
1735 case LL_IOC_LOV_SETSTRIPE:
1736 RETURN(ll_lov_setstripe(inode, file, arg));
1737 case LL_IOC_LOV_SETEA:
1738 RETURN(ll_lov_setea(inode, file, arg));
1739 case LL_IOC_LOV_GETSTRIPE:
1740 RETURN(ll_lov_getstripe(inode, arg));
1741 case LL_IOC_RECREATE_OBJ:
1742 RETURN(ll_lov_recreate_obj(inode, file, arg));
1743 case FSFILT_IOC_FIEMAP:
1744 RETURN(ll_ioctl_fiemap(inode, arg));
1745 case FSFILT_IOC_GETFLAGS:
1746 case FSFILT_IOC_SETFLAGS:
1747 RETURN(ll_iocontrol(inode, file, cmd, arg));
1748 case FSFILT_IOC_GETVERSION_OLD:
1749 case FSFILT_IOC_GETVERSION:
1750 RETURN(put_user(inode->i_generation, (int *)arg));
1751 case LL_IOC_GROUP_LOCK:
1752 RETURN(ll_get_grouplock(inode, file, arg));
1753 case LL_IOC_GROUP_UNLOCK:
1754 RETURN(ll_put_grouplock(inode, file, arg));
1755 case IOC_OBD_STATFS:
1756 RETURN(ll_obd_statfs(inode, (void *)arg));
1758 /* We need to special case any other ioctls we want to handle,
1759 * to send them to the MDS/OST as appropriate and to properly
1760 * network encode the arg field.
1761 case FSFILT_IOC_SETVERSION_OLD:
1762 case FSFILT_IOC_SETVERSION:
1764 case LL_IOC_FLUSHCTX:
1765 RETURN(ll_flush_ctx(inode));
1766 case LL_IOC_PATH2FID: {
1767 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1768 sizeof(struct lu_fid)))
1773 case OBD_IOC_FID2PATH:
1774 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1776 case LL_IOC_GET_MDTIDX: {
1779 mdtidx = ll_get_mdt_idx(inode);
1783 if (put_user((int)mdtidx, (int*)arg))
1793 ll_iocontrol_call(inode, file, cmd, arg, &err))
1796 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1802 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1804 struct inode *inode = file->f_dentry->d_inode;
1807 retval = offset + ((origin == 2) ? i_size_read(inode) :
1808 (origin == 1) ? file->f_pos : 0);
1809 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1810 inode->i_ino, inode->i_generation, inode, retval, retval,
1811 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1812 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1814 if (origin == 2) { /* SEEK_END */
1815 int nonblock = 0, rc;
1817 if (file->f_flags & O_NONBLOCK)
1818 nonblock = LDLM_FL_BLOCK_NOWAIT;
1820 rc = cl_glimpse_size(inode);
1824 offset += i_size_read(inode);
1825 } else if (origin == 1) { /* SEEK_CUR */
1826 offset += file->f_pos;
1830 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1831 if (offset != file->f_pos) {
1832 file->f_pos = offset;
1840 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1842 struct inode *inode = dentry->d_inode;
1843 struct ll_inode_info *lli = ll_i2info(inode);
1844 struct lov_stripe_md *lsm = lli->lli_smd;
1845 struct ptlrpc_request *req;
1846 struct obd_capa *oc;
1849 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1850 inode->i_generation, inode);
1851 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1853 /* fsync's caller has already called _fdata{sync,write}, we want
1854 * that IO to finish before calling the osc and mdc sync methods */
1855 rc = filemap_fdatawait(inode->i_mapping);
1857 /* catch async errors that were recorded back when async writeback
1858 * failed for pages in this mapping. */
1859 err = lli->lli_async_rc;
1860 lli->lli_async_rc = 0;
1864 err = lov_test_and_clear_async_rc(lsm);
1869 oc = ll_mdscapa_get(inode);
1870 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1876 ptlrpc_req_finished(req);
1883 RETURN(rc ? rc : -ENOMEM);
1885 oa->o_id = lsm->lsm_object_id;
1886 oa->o_gr = lsm->lsm_object_gr;
1887 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1888 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1889 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1892 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1893 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1894 0, OBD_OBJECT_EOF, oc);
1904 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1906 struct inode *inode = file->f_dentry->d_inode;
1907 struct ll_sb_info *sbi = ll_i2sbi(inode);
1908 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1909 .ei_cb_cp =ldlm_flock_completion_ast,
1910 .ei_cbdata = file_lock };
1911 struct md_op_data *op_data;
1912 struct lustre_handle lockh = {0};
1913 ldlm_policy_data_t flock;
1918 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1919 inode->i_ino, file_lock);
1921 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1923 if (file_lock->fl_flags & FL_FLOCK) {
1924 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1925 /* set missing params for flock() calls */
1926 file_lock->fl_end = OFFSET_MAX;
1927 file_lock->fl_pid = current->tgid;
1929 flock.l_flock.pid = file_lock->fl_pid;
1930 flock.l_flock.start = file_lock->fl_start;
1931 flock.l_flock.end = file_lock->fl_end;
1933 switch (file_lock->fl_type) {
1935 einfo.ei_mode = LCK_PR;
1938 /* An unlock request may or may not have any relation to
1939 * existing locks so we may not be able to pass a lock handle
1940 * via a normal ldlm_lock_cancel() request. The request may even
1941 * unlock a byte range in the middle of an existing lock. In
1942 * order to process an unlock request we need all of the same
1943 * information that is given with a normal read or write record
1944 * lock request. To avoid creating another ldlm unlock (cancel)
1945 * message we'll treat a LCK_NL flock request as an unlock. */
1946 einfo.ei_mode = LCK_NL;
1949 einfo.ei_mode = LCK_PW;
1952 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1967 flags = LDLM_FL_BLOCK_NOWAIT;
1973 flags = LDLM_FL_TEST_LOCK;
1974 /* Save the old mode so that if the mode in the lock changes we
1975 * can decrement the appropriate reader or writer refcount. */
1976 file_lock->fl_type = einfo.ei_mode;
1979 CERROR("unknown fcntl lock command: %d\n", cmd);
1983 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1984 LUSTRE_OPC_ANY, NULL);
1985 if (IS_ERR(op_data))
1986 RETURN(PTR_ERR(op_data));
1988 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
1989 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
1990 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1992 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
1993 op_data, &lockh, &flock, 0, NULL /* req */, flags);
1995 ll_finish_md_op_data(op_data);
1997 if ((file_lock->fl_flags & FL_FLOCK) &&
1998 (rc == 0 || file_lock->fl_type == F_UNLCK))
1999 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2000 #ifdef HAVE_F_OP_FLOCK
2001 if ((file_lock->fl_flags & FL_POSIX) &&
2002 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2003 !(flags & LDLM_FL_TEST_LOCK))
2004 posix_lock_file_wait(file, file_lock);
2010 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2017 int ll_have_md_lock(struct inode *inode, __u64 bits)
2019 struct lustre_handle lockh;
2020 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2028 fid = &ll_i2info(inode)->lli_fid;
2029 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2031 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2032 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2033 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2039 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2040 struct lustre_handle *lockh)
2042 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2048 fid = &ll_i2info(inode)->lli_fid;
2049 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2051 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2052 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2053 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2057 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2058 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2059 * and return success */
2061 /* This path cannot be hit for regular files unless in
2062 * case of obscure races, so no need to to validate
2064 if (!S_ISREG(inode->i_mode) &&
2065 !S_ISDIR(inode->i_mode))
2070 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2078 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2081 struct inode *inode = dentry->d_inode;
2082 struct ptlrpc_request *req = NULL;
2083 struct ll_sb_info *sbi;
2084 struct obd_export *exp;
2089 CERROR("REPORT THIS LINE TO PETER\n");
2092 sbi = ll_i2sbi(inode);
2094 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2095 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2097 exp = ll_i2mdexp(inode);
2099 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2100 struct lookup_intent oit = { .it_op = IT_GETATTR };
2101 struct md_op_data *op_data;
2103 /* Call getattr by fid, so do not provide name at all. */
2104 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2105 dentry->d_inode, NULL, 0, 0,
2106 LUSTRE_OPC_ANY, NULL);
2107 if (IS_ERR(op_data))
2108 RETURN(PTR_ERR(op_data));
2110 oit.it_create_mode |= M_CHECK_STALE;
2111 rc = md_intent_lock(exp, op_data, NULL, 0,
2112 /* we are not interested in name
2115 ll_md_blocking_ast, 0);
2116 ll_finish_md_op_data(op_data);
2117 oit.it_create_mode &= ~M_CHECK_STALE;
2119 rc = ll_inode_revalidate_fini(inode, rc);
2123 rc = ll_revalidate_it_finish(req, &oit, dentry);
2125 ll_intent_release(&oit);
2129 /* Unlinked? Unhash dentry, so it is not picked up later by
2130 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2131 here to preserve get_cwd functionality on 2.6.
2133 if (!dentry->d_inode->i_nlink) {
2134 cfs_spin_lock(&ll_lookup_lock);
2135 spin_lock(&dcache_lock);
2136 ll_drop_dentry(dentry);
2137 spin_unlock(&dcache_lock);
2138 cfs_spin_unlock(&ll_lookup_lock);
2141 ll_lookup_finish_locks(&oit, dentry);
2142 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2143 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2144 obd_valid valid = OBD_MD_FLGETATTR;
2145 struct md_op_data *op_data;
2148 if (S_ISREG(inode->i_mode)) {
2149 rc = ll_get_max_mdsize(sbi, &ealen);
2152 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2155 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2156 0, ealen, LUSTRE_OPC_ANY,
2158 if (op_data == NULL)
2161 op_data->op_valid = valid;
2162 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2163 * capa for this inode. Because we only keep capas of dirs
2165 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2166 ll_finish_md_op_data(op_data);
2168 rc = ll_inode_revalidate_fini(inode, rc);
2172 rc = ll_prep_inode(&inode, req, NULL);
2175 ptlrpc_req_finished(req);
2179 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2184 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2185 MDS_INODELOCK_LOOKUP);
2187 /* if object not yet allocated, don't validate size */
2188 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2191 /* cl_glimpse_size will prefer locally cached writes if they extend
2195 rc = cl_glimpse_size(dentry->d_inode);
2200 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2201 struct lookup_intent *it, struct kstat *stat)
2203 struct inode *inode = de->d_inode;
2206 res = ll_inode_revalidate_it(de, it);
2207 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2212 stat->dev = inode->i_sb->s_dev;
2213 stat->ino = inode->i_ino;
2214 stat->mode = inode->i_mode;
2215 stat->nlink = inode->i_nlink;
2216 stat->uid = inode->i_uid;
2217 stat->gid = inode->i_gid;
2218 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2219 stat->atime = inode->i_atime;
2220 stat->mtime = inode->i_mtime;
2221 stat->ctime = inode->i_ctime;
2222 #ifdef HAVE_INODE_BLKSIZE
2223 stat->blksize = inode->i_blksize;
2225 stat->blksize = 1 << inode->i_blkbits;
2228 stat->size = i_size_read(inode);
2229 stat->blocks = inode->i_blocks;
2233 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2235 struct lookup_intent it = { .it_op = IT_GETATTR };
2237 return ll_getattr_it(mnt, de, &it, stat);
2240 #ifdef HAVE_LINUX_FIEMAP_H
2241 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2242 __u64 start, __u64 len)
2246 struct ll_user_fiemap *fiemap;
2247 unsigned int extent_count = fieinfo->fi_extents_max;
2249 num_bytes = sizeof(*fiemap) + (extent_count *
2250 sizeof(struct ll_fiemap_extent));
2251 OBD_VMALLOC(fiemap, num_bytes);
2256 fiemap->fm_flags = fieinfo->fi_flags;
2257 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2258 fiemap->fm_start = start;
2259 fiemap->fm_length = len;
2260 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2261 sizeof(struct ll_fiemap_extent));
2263 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2265 fieinfo->fi_flags = fiemap->fm_flags;
2266 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2267 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2268 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2270 OBD_VFREE(fiemap, num_bytes);
2277 int lustre_check_acl(struct inode *inode, int mask)
2279 #ifdef CONFIG_FS_POSIX_ACL
2280 struct ll_inode_info *lli = ll_i2info(inode);
2281 struct posix_acl *acl;
2285 cfs_spin_lock(&lli->lli_lock);
2286 acl = posix_acl_dup(lli->lli_posix_acl);
2287 cfs_spin_unlock(&lli->lli_lock);
2292 rc = posix_acl_permission(inode, acl, mask);
2293 posix_acl_release(acl);
2301 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2302 #ifndef HAVE_INODE_PERMISION_2ARGS
2303 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2305 int ll_inode_permission(struct inode *inode, int mask)
2311 /* as root inode are NOT getting validated in lookup operation,
2312 * need to do it before permission check. */
2314 if (inode == inode->i_sb->s_root->d_inode) {
2315 struct lookup_intent it = { .it_op = IT_LOOKUP };
2317 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2318 MDS_INODELOCK_LOOKUP);
2323 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2324 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2326 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2327 return lustre_check_remote_perm(inode, mask);
2329 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2330 rc = generic_permission(inode, mask, lustre_check_acl);
2335 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2337 int mode = inode->i_mode;
2340 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2341 inode->i_ino, inode->i_generation, inode, mask);
2343 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2344 return lustre_check_remote_perm(inode, mask);
2346 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2348 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2349 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2351 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2353 if (current->fsuid == inode->i_uid) {
2356 if (((mode >> 3) & mask & S_IRWXO) != mask)
2358 rc = lustre_check_acl(inode, mask);
2362 goto check_capabilities;
2366 if (cfs_curproc_is_in_groups(inode->i_gid))
2369 if ((mode & mask & S_IRWXO) == mask)
2373 if (!(mask & MAY_EXEC) ||
2374 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2375 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2378 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2379 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2386 #ifdef HAVE_FILE_READV
2387 #define READ_METHOD readv
2388 #define READ_FUNCTION ll_file_readv
2389 #define WRITE_METHOD writev
2390 #define WRITE_FUNCTION ll_file_writev
2392 #define READ_METHOD aio_read
2393 #define READ_FUNCTION ll_file_aio_read
2394 #define WRITE_METHOD aio_write
2395 #define WRITE_FUNCTION ll_file_aio_write
2398 /* -o localflock - only provides locally consistent flock locks */
2399 struct file_operations ll_file_operations = {
2400 .read = ll_file_read,
2401 .READ_METHOD = READ_FUNCTION,
2402 .write = ll_file_write,
2403 .WRITE_METHOD = WRITE_FUNCTION,
2404 .ioctl = ll_file_ioctl,
2405 .open = ll_file_open,
2406 .release = ll_file_release,
2407 .mmap = ll_file_mmap,
2408 .llseek = ll_file_seek,
2409 #ifdef HAVE_KERNEL_SENDFILE
2410 .sendfile = ll_file_sendfile,
2412 #ifdef HAVE_KERNEL_SPLICE_READ
2413 .splice_read = ll_file_splice_read,
2418 struct file_operations ll_file_operations_flock = {
2419 .read = ll_file_read,
2420 .READ_METHOD = READ_FUNCTION,
2421 .write = ll_file_write,
2422 .WRITE_METHOD = WRITE_FUNCTION,
2423 .ioctl = ll_file_ioctl,
2424 .open = ll_file_open,
2425 .release = ll_file_release,
2426 .mmap = ll_file_mmap,
2427 .llseek = ll_file_seek,
2428 #ifdef HAVE_KERNEL_SENDFILE
2429 .sendfile = ll_file_sendfile,
2431 #ifdef HAVE_KERNEL_SPLICE_READ
2432 .splice_read = ll_file_splice_read,
2435 #ifdef HAVE_F_OP_FLOCK
2436 .flock = ll_file_flock,
2438 .lock = ll_file_flock
2441 /* These are for -o noflock - to return ENOSYS on flock calls */
2442 struct file_operations ll_file_operations_noflock = {
2443 .read = ll_file_read,
2444 .READ_METHOD = READ_FUNCTION,
2445 .write = ll_file_write,
2446 .WRITE_METHOD = WRITE_FUNCTION,
2447 .ioctl = ll_file_ioctl,
2448 .open = ll_file_open,
2449 .release = ll_file_release,
2450 .mmap = ll_file_mmap,
2451 .llseek = ll_file_seek,
2452 #ifdef HAVE_KERNEL_SENDFILE
2453 .sendfile = ll_file_sendfile,
2455 #ifdef HAVE_KERNEL_SPLICE_READ
2456 .splice_read = ll_file_splice_read,
2459 #ifdef HAVE_F_OP_FLOCK
2460 .flock = ll_file_noflock,
2462 .lock = ll_file_noflock
2465 struct inode_operations ll_file_inode_operations = {
2466 #ifdef HAVE_VFS_INTENT_PATCHES
2467 .setattr_raw = ll_setattr_raw,
2469 .setattr = ll_setattr,
2470 .truncate = ll_truncate,
2471 .getattr = ll_getattr,
2472 .permission = ll_inode_permission,
2473 .setxattr = ll_setxattr,
2474 .getxattr = ll_getxattr,
2475 .listxattr = ll_listxattr,
2476 .removexattr = ll_removexattr,
2477 #ifdef HAVE_LINUX_FIEMAP_H
2478 .fiemap = ll_fiemap,
2482 /* dynamic ioctl number support routins */
2483 static struct llioc_ctl_data {
2484 cfs_rw_semaphore_t ioc_sem;
2485 cfs_list_t ioc_head;
2487 __RWSEM_INITIALIZER(llioc.ioc_sem),
2488 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2493 cfs_list_t iocd_list;
2494 unsigned int iocd_size;
2495 llioc_callback_t iocd_cb;
2496 unsigned int iocd_count;
2497 unsigned int iocd_cmd[0];
2500 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2503 struct llioc_data *in_data = NULL;
2506 if (cb == NULL || cmd == NULL ||
2507 count > LLIOC_MAX_CMD || count < 0)
2510 size = sizeof(*in_data) + count * sizeof(unsigned int);
2511 OBD_ALLOC(in_data, size);
2512 if (in_data == NULL)
2515 memset(in_data, 0, sizeof(*in_data));
2516 in_data->iocd_size = size;
2517 in_data->iocd_cb = cb;
2518 in_data->iocd_count = count;
2519 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2521 cfs_down_write(&llioc.ioc_sem);
2522 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2523 cfs_up_write(&llioc.ioc_sem);
2528 void ll_iocontrol_unregister(void *magic)
2530 struct llioc_data *tmp;
2535 cfs_down_write(&llioc.ioc_sem);
2536 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2538 unsigned int size = tmp->iocd_size;
2540 cfs_list_del(&tmp->iocd_list);
2541 cfs_up_write(&llioc.ioc_sem);
2543 OBD_FREE(tmp, size);
2547 cfs_up_write(&llioc.ioc_sem);
2549 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2552 EXPORT_SYMBOL(ll_iocontrol_register);
2553 EXPORT_SYMBOL(ll_iocontrol_unregister);
2555 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2556 unsigned int cmd, unsigned long arg, int *rcp)
2558 enum llioc_iter ret = LLIOC_CONT;
2559 struct llioc_data *data;
2560 int rc = -EINVAL, i;
2562 cfs_down_read(&llioc.ioc_sem);
2563 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2564 for (i = 0; i < data->iocd_count; i++) {
2565 if (cmd != data->iocd_cmd[i])
2568 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2572 if (ret == LLIOC_STOP)
2575 cfs_up_read(&llioc.ioc_sem);