1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
87 * Closes the IO epoch and packs all the attributes into @op_data for
90 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
91 struct obd_client_handle *och)
95 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
96 ATTR_MTIME_SET | ATTR_CTIME_SET;
98 if (!(och->och_flags & FMODE_WRITE))
101 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
102 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
104 ll_ioepoch_close(inode, op_data, &och, 0);
107 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
108 ll_prep_md_op_data(op_data, inode, NULL, NULL,
109 0, 0, LUSTRE_OPC_ANY, NULL);
113 static int ll_close_inode_openhandle(struct obd_export *md_exp,
115 struct obd_client_handle *och)
117 struct obd_export *exp = ll_i2mdexp(inode);
118 struct md_op_data *op_data;
119 struct ptlrpc_request *req = NULL;
120 struct obd_device *obd = class_exp2obd(exp);
127 * XXX: in case of LMV, is this correct to access
130 CERROR("Invalid MDC connection handle "LPX64"\n",
131 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_som_update(inode, op_data);
149 CERROR("inode %lu mdc Size-on-MDS update failed: "
150 "rc = %d\n", inode->i_ino, rc);
154 CERROR("inode %lu mdc close failed: rc = %d\n",
157 ll_finish_md_op_data(op_data);
160 rc = ll_objects_destroy(req, inode);
162 CERROR("inode %lu ll_objects destroy: rc = %d\n",
169 if (exp_connect_som(exp) && !epoch_close &&
170 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
171 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
173 md_clear_open_replay_data(md_exp, och);
174 /* Free @och if it is not waiting for DONE_WRITING. */
175 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
178 if (req) /* This is close request */
179 ptlrpc_req_finished(req);
183 int ll_md_real_close(struct inode *inode, int flags)
185 struct ll_inode_info *lli = ll_i2info(inode);
186 struct obd_client_handle **och_p;
187 struct obd_client_handle *och;
192 if (flags & FMODE_WRITE) {
193 och_p = &lli->lli_mds_write_och;
194 och_usecount = &lli->lli_open_fd_write_count;
195 } else if (flags & FMODE_EXEC) {
196 och_p = &lli->lli_mds_exec_och;
197 och_usecount = &lli->lli_open_fd_exec_count;
199 LASSERT(flags & FMODE_READ);
200 och_p = &lli->lli_mds_read_och;
201 och_usecount = &lli->lli_open_fd_read_count;
204 cfs_down(&lli->lli_och_sem);
205 if (*och_usecount) { /* There are still users of this handle, so
207 cfs_up(&lli->lli_och_sem);
212 cfs_up(&lli->lli_och_sem);
214 if (och) { /* There might be a race and somebody have freed this och
216 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
223 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
226 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
227 struct ll_inode_info *lli = ll_i2info(inode);
231 /* clear group lock, if present */
232 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
233 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
235 /* Let's see if we have good enough OPEN lock on the file and if
236 we can skip talking to MDS */
237 if (file->f_dentry->d_inode) { /* Can this ever be false? */
239 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
240 struct lustre_handle lockh;
241 struct inode *inode = file->f_dentry->d_inode;
242 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
244 cfs_down(&lli->lli_och_sem);
245 if (fd->fd_omode & FMODE_WRITE) {
247 LASSERT(lli->lli_open_fd_write_count);
248 lli->lli_open_fd_write_count--;
249 } else if (fd->fd_omode & FMODE_EXEC) {
251 LASSERT(lli->lli_open_fd_exec_count);
252 lli->lli_open_fd_exec_count--;
255 LASSERT(lli->lli_open_fd_read_count);
256 lli->lli_open_fd_read_count--;
258 cfs_up(&lli->lli_och_sem);
260 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
261 LDLM_IBITS, &policy, lockmode,
263 rc = ll_md_real_close(file->f_dentry->d_inode,
267 CERROR("Releasing a file %p with negative dentry %p. Name %s",
268 file, file->f_dentry, file->f_dentry->d_name.name);
271 LUSTRE_FPRIVATE(file) = NULL;
272 ll_file_data_put(fd);
273 ll_capa_close(inode);
278 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
280 /* While this returns an error code, fput() the caller does not, so we need
281 * to make every effort to clean up all of our state here. Also, applications
282 * rarely check close errors and even if an error is returned they will not
283 * re-try the close call.
285 int ll_file_release(struct inode *inode, struct file *file)
287 struct ll_file_data *fd;
288 struct ll_sb_info *sbi = ll_i2sbi(inode);
289 struct ll_inode_info *lli = ll_i2info(inode);
290 struct lov_stripe_md *lsm = lli->lli_smd;
294 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
295 inode->i_generation, inode);
297 #ifdef CONFIG_FS_POSIX_ACL
298 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
299 inode == inode->i_sb->s_root->d_inode) {
300 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
303 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
304 fd->fd_flags &= ~LL_FILE_RMTACL;
305 rct_del(&sbi->ll_rct, cfs_curproc_pid());
306 et_search_free(&sbi->ll_et, cfs_curproc_pid());
311 if (inode->i_sb->s_root != file->f_dentry)
312 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
313 fd = LUSTRE_FPRIVATE(file);
316 /* The last ref on @file, maybe not the the owner pid of statahead.
317 * Different processes can open the same dir, "ll_opendir_key" means:
318 * it is me that should stop the statahead thread. */
319 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
320 ll_stop_statahead(inode, lli->lli_opendir_key);
322 if (inode->i_sb->s_root == file->f_dentry) {
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
329 lov_test_and_clear_async_rc(lsm);
330 lli->lli_async_rc = 0;
332 rc = ll_md_close(sbi->ll_md_exp, inode, file);
334 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
335 libcfs_debug_dumplog();
340 static int ll_intent_file_open(struct file *file, void *lmm,
341 int lmmsize, struct lookup_intent *itp)
343 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
344 struct dentry *parent = file->f_dentry->d_parent;
345 const char *name = file->f_dentry->d_name.name;
346 const int len = file->f_dentry->d_name.len;
347 struct md_op_data *op_data;
348 struct ptlrpc_request *req;
355 /* Usually we come here only for NFSD, and we want open lock.
356 But we can also get here with pre 2.6.15 patchless kernels, and in
357 that case that lock is also ok */
358 /* We can also get here if there was cached open handle in revalidate_it
359 * but it disappeared while we were getting from there to ll_file_open.
360 * But this means this file was closed and immediatelly opened which
361 * makes a good candidate for using OPEN lock */
362 /* If lmmsize & lmm are not 0, we are just setting stripe info
363 * parameters. No need for the open lock */
364 if (!lmm && !lmmsize)
365 itp->it_flags |= MDS_OPEN_LOCK;
367 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
368 file->f_dentry->d_inode, name, len,
369 O_RDWR, LUSTRE_OPC_ANY, NULL);
371 RETURN(PTR_ERR(op_data));
373 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
374 0 /*unused */, &req, ll_md_blocking_ast, 0);
375 ll_finish_md_op_data(op_data);
377 /* reason for keep own exit path - don`t flood log
378 * with messages with -ESTALE errors.
380 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
381 it_open_error(DISP_OPEN_OPEN, itp))
383 ll_release_openhandle(file->f_dentry, itp);
387 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
388 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
389 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
393 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
394 if (!rc && itp->d.lustre.it_lock_mode)
395 md_set_lock_data(sbi->ll_md_exp,
396 &itp->d.lustre.it_lock_handle,
397 file->f_dentry->d_inode, NULL);
400 ptlrpc_req_finished(itp->d.lustre.it_data);
401 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
402 ll_intent_drop_lock(itp);
408 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
409 * not believe attributes if a few ioepoch holders exist. Attributes for
410 * previous ioepoch if new one is opened are also skipped by MDS.
412 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
414 if (ioepoch && lli->lli_ioepoch != ioepoch) {
415 lli->lli_ioepoch = ioepoch;
416 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
417 ioepoch, PFID(&lli->lli_fid));
421 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
422 struct lookup_intent *it, struct obd_client_handle *och)
424 struct ptlrpc_request *req = it->d.lustre.it_data;
425 struct mdt_body *body;
429 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
430 LASSERT(body != NULL); /* reply already checked out */
432 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
433 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
434 och->och_fid = lli->lli_fid;
435 och->och_flags = it->it_flags;
436 ll_ioepoch_open(lli, body->ioepoch);
438 return md_set_open_replay_data(md_exp, och, req);
441 int ll_local_open(struct file *file, struct lookup_intent *it,
442 struct ll_file_data *fd, struct obd_client_handle *och)
444 struct inode *inode = file->f_dentry->d_inode;
445 struct ll_inode_info *lli = ll_i2info(inode);
448 LASSERT(!LUSTRE_FPRIVATE(file));
453 struct ptlrpc_request *req = it->d.lustre.it_data;
454 struct mdt_body *body;
457 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
461 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
462 if ((it->it_flags & FMODE_WRITE) &&
463 (body->valid & OBD_MD_FLSIZE))
464 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
465 lli->lli_ioepoch, PFID(&lli->lli_fid));
468 LUSTRE_FPRIVATE(file) = fd;
469 ll_readahead_init(inode, &fd->fd_ras);
470 fd->fd_omode = it->it_flags;
474 /* Open a file, and (for the very first open) create objects on the OSTs at
475 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
476 * creation or open until ll_lov_setstripe() ioctl is called. We grab
477 * lli_open_sem to ensure no other process will create objects, send the
478 * stripe MD to the MDS, or try to destroy the objects if that fails.
480 * If we already have the stripe MD locally then we don't request it in
481 * md_open(), by passing a lmm_size = 0.
483 * It is up to the application to ensure no other processes open this file
484 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
485 * used. We might be able to avoid races of that sort by getting lli_open_sem
486 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
487 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
489 int ll_file_open(struct inode *inode, struct file *file)
491 struct ll_inode_info *lli = ll_i2info(inode);
492 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
493 .it_flags = file->f_flags };
494 struct lov_stripe_md *lsm;
495 struct ptlrpc_request *req = NULL;
496 struct obd_client_handle **och_p;
498 struct ll_file_data *fd;
499 int rc = 0, opendir_set = 0;
502 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
503 inode->i_generation, inode, file->f_flags);
505 #ifdef HAVE_VFS_INTENT_PATCHES
508 it = file->private_data; /* XXX: compat macro */
509 file->private_data = NULL; /* prevent ll_local_open assertion */
512 fd = ll_file_data_get();
517 if (S_ISDIR(inode->i_mode)) {
518 cfs_spin_lock(&lli->lli_lock);
519 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
520 LASSERT(lli->lli_sai == NULL);
521 lli->lli_opendir_key = fd;
522 lli->lli_opendir_pid = cfs_curproc_pid();
525 cfs_spin_unlock(&lli->lli_lock);
528 if (inode->i_sb->s_root == file->f_dentry) {
529 LUSTRE_FPRIVATE(file) = fd;
533 if (!it || !it->d.lustre.it_disposition) {
534 /* Convert f_flags into access mode. We cannot use file->f_mode,
535 * because everything but O_ACCMODE mask was stripped from
537 if ((oit.it_flags + 1) & O_ACCMODE)
539 if (file->f_flags & O_TRUNC)
540 oit.it_flags |= FMODE_WRITE;
542 /* kernel only call f_op->open in dentry_open. filp_open calls
543 * dentry_open after call to open_namei that checks permissions.
544 * Only nfsd_open call dentry_open directly without checking
545 * permissions and because of that this code below is safe. */
546 if (oit.it_flags & FMODE_WRITE)
547 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
549 /* We do not want O_EXCL here, presumably we opened the file
550 * already? XXX - NFS implications? */
551 oit.it_flags &= ~O_EXCL;
553 /* bug20584, if "it_flags" contains O_CREAT, the file will be
554 * created if necessary, then "IT_CREAT" should be set to keep
555 * consistent with it */
556 if (oit.it_flags & O_CREAT)
557 oit.it_op |= IT_CREAT;
563 /* Let's see if we have file open on MDS already. */
564 if (it->it_flags & FMODE_WRITE) {
565 och_p = &lli->lli_mds_write_och;
566 och_usecount = &lli->lli_open_fd_write_count;
567 } else if (it->it_flags & FMODE_EXEC) {
568 och_p = &lli->lli_mds_exec_och;
569 och_usecount = &lli->lli_open_fd_exec_count;
571 och_p = &lli->lli_mds_read_och;
572 och_usecount = &lli->lli_open_fd_read_count;
575 cfs_down(&lli->lli_och_sem);
576 if (*och_p) { /* Open handle is present */
577 if (it_disposition(it, DISP_OPEN_OPEN)) {
578 /* Well, there's extra open request that we do not need,
579 let's close it somehow. This will decref request. */
580 rc = it_open_error(DISP_OPEN_OPEN, it);
582 cfs_up(&lli->lli_och_sem);
583 ll_file_data_put(fd);
584 GOTO(out_openerr, rc);
586 ll_release_openhandle(file->f_dentry, it);
587 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
592 rc = ll_local_open(file, it, fd, NULL);
595 cfs_up(&lli->lli_och_sem);
596 ll_file_data_put(fd);
597 GOTO(out_openerr, rc);
600 LASSERT(*och_usecount == 0);
601 if (!it->d.lustre.it_disposition) {
602 /* We cannot just request lock handle now, new ELC code
603 means that one of other OPEN locks for this file
604 could be cancelled, and since blocking ast handler
605 would attempt to grab och_sem as well, that would
606 result in a deadlock */
607 cfs_up(&lli->lli_och_sem);
608 it->it_create_mode |= M_CHECK_STALE;
609 rc = ll_intent_file_open(file, NULL, 0, it);
610 it->it_create_mode &= ~M_CHECK_STALE;
612 ll_file_data_put(fd);
613 GOTO(out_openerr, rc);
616 /* Got some error? Release the request */
617 if (it->d.lustre.it_status < 0) {
618 req = it->d.lustre.it_data;
619 ptlrpc_req_finished(req);
623 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
625 ll_file_data_put(fd);
626 GOTO(out_och_free, rc = -ENOMEM);
629 req = it->d.lustre.it_data;
631 /* md_intent_lock() didn't get a request ref if there was an
632 * open error, so don't do cleanup on the request here
634 /* XXX (green): Should not we bail out on any error here, not
635 * just open error? */
636 rc = it_open_error(DISP_OPEN_OPEN, it);
638 ll_file_data_put(fd);
639 GOTO(out_och_free, rc);
642 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
643 rc = ll_local_open(file, it, fd, *och_p);
645 ll_file_data_put(fd);
646 GOTO(out_och_free, rc);
649 cfs_up(&lli->lli_och_sem);
651 /* Must do this outside lli_och_sem lock to prevent deadlock where
652 different kind of OPEN lock for this same inode gets cancelled
653 by ldlm_cancel_lru */
654 if (!S_ISREG(inode->i_mode))
661 if (file->f_flags & O_LOV_DELAY_CREATE ||
662 !(file->f_mode & FMODE_WRITE)) {
663 CDEBUG(D_INODE, "object creation was delayed\n");
667 file->f_flags &= ~O_LOV_DELAY_CREATE;
670 ptlrpc_req_finished(req);
672 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
676 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
677 *och_p = NULL; /* OBD_FREE writes some magic there */
680 cfs_up(&lli->lli_och_sem);
682 if (opendir_set != 0)
683 ll_stop_statahead(inode, lli->lli_opendir_key);
689 /* Fills the obdo with the attributes for the lsm */
690 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
691 struct obd_capa *capa, struct obdo *obdo,
692 __u64 ioepoch, int sync)
694 struct ptlrpc_request_set *set;
695 struct obd_info oinfo = { { { 0 } } };
700 LASSERT(lsm != NULL);
704 oinfo.oi_oa->o_id = lsm->lsm_object_id;
705 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
706 oinfo.oi_oa->o_mode = S_IFREG;
707 oinfo.oi_oa->o_ioepoch = ioepoch;
708 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
709 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
710 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
711 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
712 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
713 oinfo.oi_capa = capa;
715 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
716 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
719 set = ptlrpc_prep_set();
721 CERROR("can't allocate ptlrpc set\n");
724 rc = obd_getattr_async(exp, &oinfo, set);
726 rc = ptlrpc_set_wait(set);
727 ptlrpc_set_destroy(set);
730 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
731 OBD_MD_FLATIME | OBD_MD_FLMTIME |
732 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
737 * Performs the getattr on the inode and updates its fields.
738 * If @sync != 0, perform the getattr under the server-side lock.
740 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
741 __u64 ioepoch, int sync)
743 struct ll_inode_info *lli = ll_i2info(inode);
744 struct obd_capa *capa = ll_mdscapa_get(inode);
748 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
749 capa, obdo, ioepoch, sync);
752 obdo_refresh_inode(inode, obdo, obdo->o_valid);
754 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
755 lli->lli_smd->lsm_object_id, i_size_read(inode),
756 (unsigned long long)inode->i_blocks,
757 (unsigned long)ll_inode_blksize(inode));
762 int ll_merge_lvb(struct inode *inode)
764 struct ll_inode_info *lli = ll_i2info(inode);
765 struct ll_sb_info *sbi = ll_i2sbi(inode);
771 ll_inode_size_lock(inode, 1);
772 inode_init_lvb(inode, &lvb);
773 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
774 cl_isize_write_nolock(inode, lvb.lvb_size);
775 inode->i_blocks = lvb.lvb_blocks;
777 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
778 LTIME_S(inode->i_atime) = lvb.lvb_atime;
779 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
780 ll_inode_size_unlock(inode, 1);
785 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
788 struct obdo obdo = { 0 };
791 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
793 st->st_size = obdo.o_size;
794 st->st_blocks = obdo.o_blocks;
795 st->st_mtime = obdo.o_mtime;
796 st->st_atime = obdo.o_atime;
797 st->st_ctime = obdo.o_ctime;
802 void ll_io_init(struct cl_io *io, const struct file *file, int write)
804 struct inode *inode = file->f_dentry->d_inode;
806 memset(io, 0, sizeof *io);
807 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
809 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
810 io->ci_obj = ll_i2info(inode)->lli_clob;
811 io->ci_lockreq = CILR_MAYBE;
812 if (ll_file_nolock(file)) {
813 io->ci_lockreq = CILR_NEVER;
814 io->ci_no_srvlock = 1;
815 } else if (file->f_flags & O_APPEND) {
816 io->ci_lockreq = CILR_MANDATORY;
820 static ssize_t ll_file_io_generic(const struct lu_env *env,
821 struct vvp_io_args *args, struct file *file,
822 enum cl_io_type iot, loff_t *ppos, size_t count)
828 io = &ccc_env_info(env)->cti_io;
829 ll_io_init(io, file, iot == CIT_WRITE);
831 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
832 struct vvp_io *vio = vvp_env_io(env);
833 struct ccc_io *cio = ccc_env_io(env);
834 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
835 int write_sem_locked = 0;
837 cio->cui_fd = LUSTRE_FPRIVATE(file);
838 vio->cui_io_subtype = args->via_io_subtype;
840 switch (vio->cui_io_subtype) {
842 cio->cui_iov = args->u.normal.via_iov;
843 cio->cui_nrsegs = args->u.normal.via_nrsegs;
844 cio->cui_tot_nrsegs = cio->cui_nrsegs;
845 #ifndef HAVE_FILE_WRITEV
846 cio->cui_iocb = args->u.normal.via_iocb;
848 if ((iot == CIT_WRITE) &&
849 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
850 cfs_down(&lli->lli_write_sem);
851 write_sem_locked = 1;
855 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
856 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
859 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
860 vio->u.splice.cui_flags = args->u.splice.via_flags;
863 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
866 result = cl_io_loop(env, io);
867 if (write_sem_locked)
868 cfs_up(&lli->lli_write_sem);
870 /* cl_io_rw_init() handled IO */
871 result = io->ci_result;
874 if (io->ci_nob > 0) {
876 *ppos = io->u.ci_wr.wr.crw_pos;
884 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
886 static int ll_file_get_iov_count(const struct iovec *iov,
887 unsigned long *nr_segs, size_t *count)
892 for (seg = 0; seg < *nr_segs; seg++) {
893 const struct iovec *iv = &iov[seg];
896 * If any segment has a negative length, or the cumulative
897 * length ever wraps negative then return -EINVAL.
900 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
902 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
907 cnt -= iv->iov_len; /* This segment is no good */
914 #ifdef HAVE_FILE_READV
915 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
916 unsigned long nr_segs, loff_t *ppos)
919 struct vvp_io_args *args;
925 result = ll_file_get_iov_count(iov, &nr_segs, &count);
929 env = cl_env_get(&refcheck);
931 RETURN(PTR_ERR(env));
933 args = vvp_env_args(env, IO_NORMAL);
934 args->u.normal.via_iov = (struct iovec *)iov;
935 args->u.normal.via_nrsegs = nr_segs;
937 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
938 cl_env_put(env, &refcheck);
942 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
946 struct iovec *local_iov;
951 env = cl_env_get(&refcheck);
953 RETURN(PTR_ERR(env));
955 local_iov = &vvp_env_info(env)->vti_local_iov;
956 local_iov->iov_base = (void __user *)buf;
957 local_iov->iov_len = count;
958 result = ll_file_readv(file, local_iov, 1, ppos);
959 cl_env_put(env, &refcheck);
964 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
965 unsigned long nr_segs, loff_t pos)
968 struct vvp_io_args *args;
974 result = ll_file_get_iov_count(iov, &nr_segs, &count);
978 env = cl_env_get(&refcheck);
980 RETURN(PTR_ERR(env));
982 args = vvp_env_args(env, IO_NORMAL);
983 args->u.normal.via_iov = (struct iovec *)iov;
984 args->u.normal.via_nrsegs = nr_segs;
985 args->u.normal.via_iocb = iocb;
987 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
988 &iocb->ki_pos, count);
989 cl_env_put(env, &refcheck);
993 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
997 struct iovec *local_iov;
1003 env = cl_env_get(&refcheck);
1005 RETURN(PTR_ERR(env));
1007 local_iov = &vvp_env_info(env)->vti_local_iov;
1008 kiocb = &vvp_env_info(env)->vti_kiocb;
1009 local_iov->iov_base = (void __user *)buf;
1010 local_iov->iov_len = count;
1011 init_sync_kiocb(kiocb, file);
1012 kiocb->ki_pos = *ppos;
1013 kiocb->ki_left = count;
1015 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1016 *ppos = kiocb->ki_pos;
1018 cl_env_put(env, &refcheck);
1024 * Write to a file (through the page cache).
1026 #ifdef HAVE_FILE_WRITEV
1027 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1028 unsigned long nr_segs, loff_t *ppos)
1031 struct vvp_io_args *args;
1037 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1041 env = cl_env_get(&refcheck);
1043 RETURN(PTR_ERR(env));
1045 args = vvp_env_args(env, IO_NORMAL);
1046 args->u.normal.via_iov = (struct iovec *)iov;
1047 args->u.normal.via_nrsegs = nr_segs;
1049 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1050 cl_env_put(env, &refcheck);
1054 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1058 struct iovec *local_iov;
1063 env = cl_env_get(&refcheck);
1065 RETURN(PTR_ERR(env));
1067 local_iov = &vvp_env_info(env)->vti_local_iov;
1068 local_iov->iov_base = (void __user *)buf;
1069 local_iov->iov_len = count;
1071 result = ll_file_writev(file, local_iov, 1, ppos);
1072 cl_env_put(env, &refcheck);
1076 #else /* AIO stuff */
1077 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1078 unsigned long nr_segs, loff_t pos)
1081 struct vvp_io_args *args;
1087 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1091 env = cl_env_get(&refcheck);
1093 RETURN(PTR_ERR(env));
1095 args = vvp_env_args(env, IO_NORMAL);
1096 args->u.normal.via_iov = (struct iovec *)iov;
1097 args->u.normal.via_nrsegs = nr_segs;
1098 args->u.normal.via_iocb = iocb;
1100 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1101 &iocb->ki_pos, count);
1102 cl_env_put(env, &refcheck);
1106 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1110 struct iovec *local_iov;
1111 struct kiocb *kiocb;
1116 env = cl_env_get(&refcheck);
1118 RETURN(PTR_ERR(env));
1120 local_iov = &vvp_env_info(env)->vti_local_iov;
1121 kiocb = &vvp_env_info(env)->vti_kiocb;
1122 local_iov->iov_base = (void __user *)buf;
1123 local_iov->iov_len = count;
1124 init_sync_kiocb(kiocb, file);
1125 kiocb->ki_pos = *ppos;
1126 kiocb->ki_left = count;
1128 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1129 *ppos = kiocb->ki_pos;
1131 cl_env_put(env, &refcheck);
1137 #ifdef HAVE_KERNEL_SENDFILE
1139 * Send file content (through pagecache) somewhere with helper
1141 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1142 read_actor_t actor, void *target)
1145 struct vvp_io_args *args;
1150 env = cl_env_get(&refcheck);
1152 RETURN(PTR_ERR(env));
1154 args = vvp_env_args(env, IO_SENDFILE);
1155 args->u.sendfile.via_target = target;
1156 args->u.sendfile.via_actor = actor;
1158 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1159 cl_env_put(env, &refcheck);
1164 #ifdef HAVE_KERNEL_SPLICE_READ
1166 * Send file content (through pagecache) somewhere with helper
1168 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1169 struct pipe_inode_info *pipe, size_t count,
1173 struct vvp_io_args *args;
1178 env = cl_env_get(&refcheck);
1180 RETURN(PTR_ERR(env));
1182 args = vvp_env_args(env, IO_SPLICE);
1183 args->u.splice.via_pipe = pipe;
1184 args->u.splice.via_flags = flags;
1186 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1187 cl_env_put(env, &refcheck);
1192 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1195 struct obd_export *exp = ll_i2dtexp(inode);
1196 struct ll_recreate_obj ucreatp;
1197 struct obd_trans_info oti = { 0 };
1198 struct obdo *oa = NULL;
1201 struct lov_stripe_md *lsm, *lsm2;
1204 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1207 if (cfs_copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1208 sizeof(struct ll_recreate_obj)))
1215 ll_inode_size_lock(inode, 0);
1216 lsm = ll_i2info(inode)->lli_smd;
1218 GOTO(out, rc = -ENOENT);
1219 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1220 (lsm->lsm_stripe_count));
1222 OBD_ALLOC(lsm2, lsm_size);
1224 GOTO(out, rc = -ENOMEM);
1226 oa->o_id = ucreatp.lrc_id;
1227 oa->o_gr = ucreatp.lrc_group;
1228 oa->o_nlink = ucreatp.lrc_ost_idx;
1229 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1230 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1231 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1232 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1234 memcpy(lsm2, lsm, lsm_size);
1235 rc = obd_create(exp, oa, &lsm2, &oti);
1237 OBD_FREE(lsm2, lsm_size);
1240 ll_inode_size_unlock(inode, 0);
1245 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1246 int flags, struct lov_user_md *lum, int lum_size)
1248 struct lov_stripe_md *lsm;
1249 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1253 ll_inode_size_lock(inode, 0);
1254 lsm = ll_i2info(inode)->lli_smd;
1256 ll_inode_size_unlock(inode, 0);
1257 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1262 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1265 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1266 GOTO(out_req_free, rc = -ENOENT);
1267 rc = oit.d.lustre.it_status;
1269 GOTO(out_req_free, rc);
1271 ll_release_openhandle(file->f_dentry, &oit);
1274 ll_inode_size_unlock(inode, 0);
1275 ll_intent_release(&oit);
1278 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1282 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1283 struct lov_mds_md **lmmp, int *lmm_size,
1284 struct ptlrpc_request **request)
1286 struct ll_sb_info *sbi = ll_i2sbi(inode);
1287 struct mdt_body *body;
1288 struct lov_mds_md *lmm = NULL;
1289 struct ptlrpc_request *req = NULL;
1290 struct md_op_data *op_data;
1293 rc = ll_get_max_mdsize(sbi, &lmmsize);
1297 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1298 strlen(filename), lmmsize,
1299 LUSTRE_OPC_ANY, NULL);
1300 if (op_data == NULL)
1303 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1304 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1305 ll_finish_md_op_data(op_data);
1307 CDEBUG(D_INFO, "md_getattr_name failed "
1308 "on %s: rc %d\n", filename, rc);
1312 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1313 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1315 lmmsize = body->eadatasize;
1317 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1319 GOTO(out, rc = -ENODATA);
1322 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1323 LASSERT(lmm != NULL);
1325 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1326 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1327 GOTO(out, rc = -EPROTO);
1331 * This is coming from the MDS, so is probably in
1332 * little endian. We convert it to host endian before
1333 * passing it to userspace.
1335 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1336 /* if function called for directory - we should
1337 * avoid swab not existent lsm objects */
1338 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1339 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1340 if (S_ISREG(body->mode))
1341 lustre_swab_lov_user_md_objects(
1342 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1343 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1344 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1345 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1346 if (S_ISREG(body->mode))
1347 lustre_swab_lov_user_md_objects(
1348 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1349 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1355 *lmm_size = lmmsize;
1360 static int ll_lov_setea(struct inode *inode, struct file *file,
1363 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1364 struct lov_user_md *lump;
1365 int lum_size = sizeof(struct lov_user_md) +
1366 sizeof(struct lov_user_ost_data);
1370 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1373 OBD_ALLOC(lump, lum_size);
1377 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1378 OBD_FREE(lump, lum_size);
1382 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1384 OBD_FREE(lump, lum_size);
1388 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1391 struct lov_user_md_v3 lumv3;
1392 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1393 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1394 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1397 int flags = FMODE_WRITE;
1400 /* first try with v1 which is smaller than v3 */
1401 lum_size = sizeof(struct lov_user_md_v1);
1402 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1405 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1406 lum_size = sizeof(struct lov_user_md_v3);
1407 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1411 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1413 put_user(0, &lumv1p->lmm_stripe_count);
1414 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1415 0, ll_i2info(inode)->lli_smd,
1421 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1423 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1428 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1432 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1434 struct ll_inode_info *lli = ll_i2info(inode);
1435 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1436 struct ccc_grouplock grouplock;
1440 if (ll_file_nolock(file))
1441 RETURN(-EOPNOTSUPP);
1443 cfs_spin_lock(&lli->lli_lock);
1444 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1445 CWARN("group lock already existed with gid %lu\n",
1446 fd->fd_grouplock.cg_gid);
1447 cfs_spin_unlock(&lli->lli_lock);
1450 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1451 cfs_spin_unlock(&lli->lli_lock);
1453 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1454 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1458 cfs_spin_lock(&lli->lli_lock);
1459 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1460 cfs_spin_unlock(&lli->lli_lock);
1461 CERROR("another thread just won the race\n");
1462 cl_put_grouplock(&grouplock);
1466 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1467 fd->fd_grouplock = grouplock;
1468 cfs_spin_unlock(&lli->lli_lock);
1470 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1474 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1476 struct ll_inode_info *lli = ll_i2info(inode);
1477 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1478 struct ccc_grouplock grouplock;
1481 cfs_spin_lock(&lli->lli_lock);
1482 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1483 cfs_spin_unlock(&lli->lli_lock);
1484 CWARN("no group lock held\n");
1487 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1489 if (fd->fd_grouplock.cg_gid != arg) {
1490 CWARN("group lock %lu doesn't match current id %lu\n",
1491 arg, fd->fd_grouplock.cg_gid);
1492 cfs_spin_unlock(&lli->lli_lock);
1496 grouplock = fd->fd_grouplock;
1497 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1498 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1499 cfs_spin_unlock(&lli->lli_lock);
1501 cl_put_grouplock(&grouplock);
1502 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1507 * Close inode open handle
1509 * \param dentry [in] dentry which contains the inode
1510 * \param it [in,out] intent which contains open info and result
1513 * \retval <0 failure
1515 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1517 struct inode *inode = dentry->d_inode;
1518 struct obd_client_handle *och;
1524 /* Root ? Do nothing. */
1525 if (dentry->d_inode->i_sb->s_root == dentry)
1528 /* No open handle to close? Move away */
1529 if (!it_disposition(it, DISP_OPEN_OPEN))
1532 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1534 OBD_ALLOC(och, sizeof(*och));
1536 GOTO(out, rc = -ENOMEM);
1538 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1539 ll_i2info(inode), it, och);
1541 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1544 /* this one is in place of ll_file_open */
1545 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1546 ptlrpc_req_finished(it->d.lustre.it_data);
1547 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1552 * Get size for inode for which FIEMAP mapping is requested.
1553 * Make the FIEMAP get_info call and returns the result.
1555 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1558 struct obd_export *exp = ll_i2dtexp(inode);
1559 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1560 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1561 int vallen = num_bytes;
1565 /* Checks for fiemap flags */
1566 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1567 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1571 /* Check for FIEMAP_FLAG_SYNC */
1572 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1573 rc = filemap_fdatawrite(inode->i_mapping);
1578 /* If the stripe_count > 1 and the application does not understand
1579 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1581 if (lsm->lsm_stripe_count > 1 &&
1582 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1585 fm_key.oa.o_id = lsm->lsm_object_id;
1586 fm_key.oa.o_gr = lsm->lsm_object_gr;
1587 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1589 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1592 /* If filesize is 0, then there would be no objects for mapping */
1593 if (fm_key.oa.o_size == 0) {
1594 fiemap->fm_mapped_extents = 0;
1598 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1600 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1602 CERROR("obd_get_info failed: rc = %d\n", rc);
1607 int ll_fid2path(struct obd_export *exp, void *arg)
1609 struct getinfo_fid2path *gfout, *gfin;
1613 /* Need to get the buflen */
1614 OBD_ALLOC_PTR(gfin);
1617 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1622 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1623 OBD_ALLOC(gfout, outsize);
1624 if (gfout == NULL) {
1628 memcpy(gfout, gfin, sizeof(*gfout));
1631 /* Call mdc_iocontrol */
1632 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1635 if (cfs_copy_to_user(arg, gfout, outsize))
1639 OBD_FREE(gfout, outsize);
1643 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1645 struct ll_user_fiemap *fiemap_s;
1646 size_t num_bytes, ret_bytes;
1647 unsigned int extent_count;
1650 /* Get the extent count so we can calculate the size of
1651 * required fiemap buffer */
1652 if (get_user(extent_count,
1653 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1655 num_bytes = sizeof(*fiemap_s) + (extent_count *
1656 sizeof(struct ll_fiemap_extent));
1658 OBD_VMALLOC(fiemap_s, num_bytes);
1659 if (fiemap_s == NULL)
1662 /* get the fiemap value */
1663 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1665 GOTO(error, rc = -EFAULT);
1667 /* If fm_extent_count is non-zero, read the first extent since
1668 * it is used to calculate end_offset and device from previous
1671 if (copy_from_user(&fiemap_s->fm_extents[0],
1672 (char __user *)arg + sizeof(*fiemap_s),
1673 sizeof(struct ll_fiemap_extent)))
1674 GOTO(error, rc = -EFAULT);
1677 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1681 ret_bytes = sizeof(struct ll_user_fiemap);
1683 if (extent_count != 0)
1684 ret_bytes += (fiemap_s->fm_mapped_extents *
1685 sizeof(struct ll_fiemap_extent));
1687 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1691 OBD_VFREE(fiemap_s, num_bytes);
1695 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1698 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1702 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1703 inode->i_generation, inode, cmd);
1704 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1706 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1707 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1711 case LL_IOC_GETFLAGS:
1712 /* Get the current value of the file flags */
1713 return put_user(fd->fd_flags, (int *)arg);
1714 case LL_IOC_SETFLAGS:
1715 case LL_IOC_CLRFLAGS:
1716 /* Set or clear specific file flags */
1717 /* XXX This probably needs checks to ensure the flags are
1718 * not abused, and to handle any flag side effects.
1720 if (get_user(flags, (int *) arg))
1723 if (cmd == LL_IOC_SETFLAGS) {
1724 if ((flags & LL_FILE_IGNORE_LOCK) &&
1725 !(file->f_flags & O_DIRECT)) {
1726 CERROR("%s: unable to disable locking on "
1727 "non-O_DIRECT file\n", current->comm);
1731 fd->fd_flags |= flags;
1733 fd->fd_flags &= ~flags;
1736 case LL_IOC_LOV_SETSTRIPE:
1737 RETURN(ll_lov_setstripe(inode, file, arg));
1738 case LL_IOC_LOV_SETEA:
1739 RETURN(ll_lov_setea(inode, file, arg));
1740 case LL_IOC_LOV_GETSTRIPE:
1741 RETURN(ll_lov_getstripe(inode, arg));
1742 case LL_IOC_RECREATE_OBJ:
1743 RETURN(ll_lov_recreate_obj(inode, file, arg));
1744 case FSFILT_IOC_FIEMAP:
1745 RETURN(ll_ioctl_fiemap(inode, arg));
1746 case FSFILT_IOC_GETFLAGS:
1747 case FSFILT_IOC_SETFLAGS:
1748 RETURN(ll_iocontrol(inode, file, cmd, arg));
1749 case FSFILT_IOC_GETVERSION_OLD:
1750 case FSFILT_IOC_GETVERSION:
1751 RETURN(put_user(inode->i_generation, (int *)arg));
1752 case LL_IOC_GROUP_LOCK:
1753 RETURN(ll_get_grouplock(inode, file, arg));
1754 case LL_IOC_GROUP_UNLOCK:
1755 RETURN(ll_put_grouplock(inode, file, arg));
1756 case IOC_OBD_STATFS:
1757 RETURN(ll_obd_statfs(inode, (void *)arg));
1759 /* We need to special case any other ioctls we want to handle,
1760 * to send them to the MDS/OST as appropriate and to properly
1761 * network encode the arg field.
1762 case FSFILT_IOC_SETVERSION_OLD:
1763 case FSFILT_IOC_SETVERSION:
1765 case LL_IOC_FLUSHCTX:
1766 RETURN(ll_flush_ctx(inode));
1767 case LL_IOC_PATH2FID: {
1768 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1769 sizeof(struct lu_fid)))
1774 case OBD_IOC_FID2PATH:
1775 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1777 case LL_IOC_GET_MDTIDX: {
1780 mdtidx = ll_get_mdt_idx(inode);
1784 if (put_user((int)mdtidx, (int*)arg))
1794 ll_iocontrol_call(inode, file, cmd, arg, &err))
1797 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1803 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1805 struct inode *inode = file->f_dentry->d_inode;
1808 retval = offset + ((origin == 2) ? i_size_read(inode) :
1809 (origin == 1) ? file->f_pos : 0);
1810 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1811 inode->i_ino, inode->i_generation, inode, retval, retval,
1812 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1813 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1815 if (origin == 2) { /* SEEK_END */
1816 int nonblock = 0, rc;
1818 if (file->f_flags & O_NONBLOCK)
1819 nonblock = LDLM_FL_BLOCK_NOWAIT;
1821 rc = cl_glimpse_size(inode);
1825 offset += i_size_read(inode);
1826 } else if (origin == 1) { /* SEEK_CUR */
1827 offset += file->f_pos;
1831 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1832 if (offset != file->f_pos) {
1833 file->f_pos = offset;
1841 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1843 struct inode *inode = dentry->d_inode;
1844 struct ll_inode_info *lli = ll_i2info(inode);
1845 struct lov_stripe_md *lsm = lli->lli_smd;
1846 struct ptlrpc_request *req;
1847 struct obd_capa *oc;
1850 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1851 inode->i_generation, inode);
1852 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1854 /* fsync's caller has already called _fdata{sync,write}, we want
1855 * that IO to finish before calling the osc and mdc sync methods */
1856 rc = filemap_fdatawait(inode->i_mapping);
1858 /* catch async errors that were recorded back when async writeback
1859 * failed for pages in this mapping. */
1860 err = lli->lli_async_rc;
1861 lli->lli_async_rc = 0;
1865 err = lov_test_and_clear_async_rc(lsm);
1870 oc = ll_mdscapa_get(inode);
1871 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1877 ptlrpc_req_finished(req);
1884 RETURN(rc ? rc : -ENOMEM);
1886 oa->o_id = lsm->lsm_object_id;
1887 oa->o_gr = lsm->lsm_object_gr;
1888 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1889 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1890 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1893 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1894 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1895 0, OBD_OBJECT_EOF, oc);
1905 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1907 struct inode *inode = file->f_dentry->d_inode;
1908 struct ll_sb_info *sbi = ll_i2sbi(inode);
1909 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1910 .ei_cb_cp =ldlm_flock_completion_ast,
1911 .ei_cbdata = file_lock };
1912 struct md_op_data *op_data;
1913 struct lustre_handle lockh = {0};
1914 ldlm_policy_data_t flock;
1919 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1920 inode->i_ino, file_lock);
1922 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1924 if (file_lock->fl_flags & FL_FLOCK) {
1925 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1926 /* set missing params for flock() calls */
1927 file_lock->fl_end = OFFSET_MAX;
1928 file_lock->fl_pid = current->tgid;
1930 flock.l_flock.pid = file_lock->fl_pid;
1931 flock.l_flock.start = file_lock->fl_start;
1932 flock.l_flock.end = file_lock->fl_end;
1934 switch (file_lock->fl_type) {
1936 einfo.ei_mode = LCK_PR;
1939 /* An unlock request may or may not have any relation to
1940 * existing locks so we may not be able to pass a lock handle
1941 * via a normal ldlm_lock_cancel() request. The request may even
1942 * unlock a byte range in the middle of an existing lock. In
1943 * order to process an unlock request we need all of the same
1944 * information that is given with a normal read or write record
1945 * lock request. To avoid creating another ldlm unlock (cancel)
1946 * message we'll treat a LCK_NL flock request as an unlock. */
1947 einfo.ei_mode = LCK_NL;
1950 einfo.ei_mode = LCK_PW;
1953 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1968 flags = LDLM_FL_BLOCK_NOWAIT;
1974 flags = LDLM_FL_TEST_LOCK;
1975 /* Save the old mode so that if the mode in the lock changes we
1976 * can decrement the appropriate reader or writer refcount. */
1977 file_lock->fl_type = einfo.ei_mode;
1980 CERROR("unknown fcntl lock command: %d\n", cmd);
1984 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1985 LUSTRE_OPC_ANY, NULL);
1986 if (IS_ERR(op_data))
1987 RETURN(PTR_ERR(op_data));
1989 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
1990 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
1991 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1993 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
1994 op_data, &lockh, &flock, 0, NULL /* req */, flags);
1996 ll_finish_md_op_data(op_data);
1998 if ((file_lock->fl_flags & FL_FLOCK) &&
1999 (rc == 0 || file_lock->fl_type == F_UNLCK))
2000 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2001 #ifdef HAVE_F_OP_FLOCK
2002 if ((file_lock->fl_flags & FL_POSIX) &&
2003 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2004 !(flags & LDLM_FL_TEST_LOCK))
2005 posix_lock_file_wait(file, file_lock);
2011 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2018 int ll_have_md_lock(struct inode *inode, __u64 bits)
2020 struct lustre_handle lockh;
2021 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2029 fid = &ll_i2info(inode)->lli_fid;
2030 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2032 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2033 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2034 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2040 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2041 struct lustre_handle *lockh)
2043 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2049 fid = &ll_i2info(inode)->lli_fid;
2050 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2052 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2053 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2054 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2058 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2059 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2060 * and return success */
2062 /* This path cannot be hit for regular files unless in
2063 * case of obscure races, so no need to to validate
2065 if (!S_ISREG(inode->i_mode) &&
2066 !S_ISDIR(inode->i_mode))
2071 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2079 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2082 struct inode *inode = dentry->d_inode;
2083 struct ptlrpc_request *req = NULL;
2084 struct ll_sb_info *sbi;
2085 struct obd_export *exp;
2090 CERROR("REPORT THIS LINE TO PETER\n");
2093 sbi = ll_i2sbi(inode);
2095 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2096 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2098 exp = ll_i2mdexp(inode);
2100 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2101 struct lookup_intent oit = { .it_op = IT_GETATTR };
2102 struct md_op_data *op_data;
2104 /* Call getattr by fid, so do not provide name at all. */
2105 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2106 dentry->d_inode, NULL, 0, 0,
2107 LUSTRE_OPC_ANY, NULL);
2108 if (IS_ERR(op_data))
2109 RETURN(PTR_ERR(op_data));
2111 oit.it_create_mode |= M_CHECK_STALE;
2112 rc = md_intent_lock(exp, op_data, NULL, 0,
2113 /* we are not interested in name
2116 ll_md_blocking_ast, 0);
2117 ll_finish_md_op_data(op_data);
2118 oit.it_create_mode &= ~M_CHECK_STALE;
2120 rc = ll_inode_revalidate_fini(inode, rc);
2124 rc = ll_revalidate_it_finish(req, &oit, dentry);
2126 ll_intent_release(&oit);
2130 /* Unlinked? Unhash dentry, so it is not picked up later by
2131 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2132 here to preserve get_cwd functionality on 2.6.
2134 if (!dentry->d_inode->i_nlink) {
2135 cfs_spin_lock(&ll_lookup_lock);
2136 spin_lock(&dcache_lock);
2137 ll_drop_dentry(dentry);
2138 spin_unlock(&dcache_lock);
2139 cfs_spin_unlock(&ll_lookup_lock);
2142 ll_lookup_finish_locks(&oit, dentry);
2143 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2144 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2145 obd_valid valid = OBD_MD_FLGETATTR;
2146 struct md_op_data *op_data;
2149 if (S_ISREG(inode->i_mode)) {
2150 rc = ll_get_max_mdsize(sbi, &ealen);
2153 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2156 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2157 0, ealen, LUSTRE_OPC_ANY,
2159 if (op_data == NULL)
2162 op_data->op_valid = valid;
2163 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2164 * capa for this inode. Because we only keep capas of dirs
2166 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2167 ll_finish_md_op_data(op_data);
2169 rc = ll_inode_revalidate_fini(inode, rc);
2173 rc = ll_prep_inode(&inode, req, NULL);
2176 ptlrpc_req_finished(req);
2180 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2185 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2186 MDS_INODELOCK_LOOKUP);
2188 /* if object not yet allocated, don't validate size */
2189 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2192 /* cl_glimpse_size will prefer locally cached writes if they extend
2196 rc = cl_glimpse_size(dentry->d_inode);
2201 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2202 struct lookup_intent *it, struct kstat *stat)
2204 struct inode *inode = de->d_inode;
2207 res = ll_inode_revalidate_it(de, it);
2208 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2213 stat->dev = inode->i_sb->s_dev;
2214 stat->ino = inode->i_ino;
2215 stat->mode = inode->i_mode;
2216 stat->nlink = inode->i_nlink;
2217 stat->uid = inode->i_uid;
2218 stat->gid = inode->i_gid;
2219 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2220 stat->atime = inode->i_atime;
2221 stat->mtime = inode->i_mtime;
2222 stat->ctime = inode->i_ctime;
2223 #ifdef HAVE_INODE_BLKSIZE
2224 stat->blksize = inode->i_blksize;
2226 stat->blksize = 1 << inode->i_blkbits;
2229 stat->size = i_size_read(inode);
2230 stat->blocks = inode->i_blocks;
2234 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2236 struct lookup_intent it = { .it_op = IT_GETATTR };
2238 return ll_getattr_it(mnt, de, &it, stat);
2241 #ifdef HAVE_LINUX_FIEMAP_H
2242 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2243 __u64 start, __u64 len)
2247 struct ll_user_fiemap *fiemap;
2248 unsigned int extent_count = fieinfo->fi_extents_max;
2250 num_bytes = sizeof(*fiemap) + (extent_count *
2251 sizeof(struct ll_fiemap_extent));
2252 OBD_VMALLOC(fiemap, num_bytes);
2257 fiemap->fm_flags = fieinfo->fi_flags;
2258 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2259 fiemap->fm_start = start;
2260 fiemap->fm_length = len;
2261 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2262 sizeof(struct ll_fiemap_extent));
2264 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2266 fieinfo->fi_flags = fiemap->fm_flags;
2267 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2268 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2269 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2271 OBD_VFREE(fiemap, num_bytes);
2278 int lustre_check_acl(struct inode *inode, int mask)
2280 #ifdef CONFIG_FS_POSIX_ACL
2281 struct ll_inode_info *lli = ll_i2info(inode);
2282 struct posix_acl *acl;
2286 cfs_spin_lock(&lli->lli_lock);
2287 acl = posix_acl_dup(lli->lli_posix_acl);
2288 cfs_spin_unlock(&lli->lli_lock);
2293 rc = posix_acl_permission(inode, acl, mask);
2294 posix_acl_release(acl);
2302 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2303 #ifndef HAVE_INODE_PERMISION_2ARGS
2304 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2306 int ll_inode_permission(struct inode *inode, int mask)
2312 /* as root inode are NOT getting validated in lookup operation,
2313 * need to do it before permission check. */
2315 if (inode == inode->i_sb->s_root->d_inode) {
2316 struct lookup_intent it = { .it_op = IT_LOOKUP };
2318 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2319 MDS_INODELOCK_LOOKUP);
2324 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2325 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2327 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2328 return lustre_check_remote_perm(inode, mask);
2330 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2331 rc = generic_permission(inode, mask, lustre_check_acl);
2336 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2338 int mode = inode->i_mode;
2341 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2342 inode->i_ino, inode->i_generation, inode, mask);
2344 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2345 return lustre_check_remote_perm(inode, mask);
2347 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2349 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2350 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2352 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2354 if (current->fsuid == inode->i_uid) {
2357 if (((mode >> 3) & mask & S_IRWXO) != mask)
2359 rc = lustre_check_acl(inode, mask);
2363 goto check_capabilities;
2367 if (cfs_curproc_is_in_groups(inode->i_gid))
2370 if ((mode & mask & S_IRWXO) == mask)
2374 if (!(mask & MAY_EXEC) ||
2375 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2376 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2379 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2380 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2387 #ifdef HAVE_FILE_READV
2388 #define READ_METHOD readv
2389 #define READ_FUNCTION ll_file_readv
2390 #define WRITE_METHOD writev
2391 #define WRITE_FUNCTION ll_file_writev
2393 #define READ_METHOD aio_read
2394 #define READ_FUNCTION ll_file_aio_read
2395 #define WRITE_METHOD aio_write
2396 #define WRITE_FUNCTION ll_file_aio_write
2399 /* -o localflock - only provides locally consistent flock locks */
2400 struct file_operations ll_file_operations = {
2401 .read = ll_file_read,
2402 .READ_METHOD = READ_FUNCTION,
2403 .write = ll_file_write,
2404 .WRITE_METHOD = WRITE_FUNCTION,
2405 .ioctl = ll_file_ioctl,
2406 .open = ll_file_open,
2407 .release = ll_file_release,
2408 .mmap = ll_file_mmap,
2409 .llseek = ll_file_seek,
2410 #ifdef HAVE_KERNEL_SENDFILE
2411 .sendfile = ll_file_sendfile,
2413 #ifdef HAVE_KERNEL_SPLICE_READ
2414 .splice_read = ll_file_splice_read,
2419 struct file_operations ll_file_operations_flock = {
2420 .read = ll_file_read,
2421 .READ_METHOD = READ_FUNCTION,
2422 .write = ll_file_write,
2423 .WRITE_METHOD = WRITE_FUNCTION,
2424 .ioctl = ll_file_ioctl,
2425 .open = ll_file_open,
2426 .release = ll_file_release,
2427 .mmap = ll_file_mmap,
2428 .llseek = ll_file_seek,
2429 #ifdef HAVE_KERNEL_SENDFILE
2430 .sendfile = ll_file_sendfile,
2432 #ifdef HAVE_KERNEL_SPLICE_READ
2433 .splice_read = ll_file_splice_read,
2436 #ifdef HAVE_F_OP_FLOCK
2437 .flock = ll_file_flock,
2439 .lock = ll_file_flock
2442 /* These are for -o noflock - to return ENOSYS on flock calls */
2443 struct file_operations ll_file_operations_noflock = {
2444 .read = ll_file_read,
2445 .READ_METHOD = READ_FUNCTION,
2446 .write = ll_file_write,
2447 .WRITE_METHOD = WRITE_FUNCTION,
2448 .ioctl = ll_file_ioctl,
2449 .open = ll_file_open,
2450 .release = ll_file_release,
2451 .mmap = ll_file_mmap,
2452 .llseek = ll_file_seek,
2453 #ifdef HAVE_KERNEL_SENDFILE
2454 .sendfile = ll_file_sendfile,
2456 #ifdef HAVE_KERNEL_SPLICE_READ
2457 .splice_read = ll_file_splice_read,
2460 #ifdef HAVE_F_OP_FLOCK
2461 .flock = ll_file_noflock,
2463 .lock = ll_file_noflock
2466 struct inode_operations ll_file_inode_operations = {
2467 #ifdef HAVE_VFS_INTENT_PATCHES
2468 .setattr_raw = ll_setattr_raw,
2470 .setattr = ll_setattr,
2471 .truncate = ll_truncate,
2472 .getattr = ll_getattr,
2473 .permission = ll_inode_permission,
2474 .setxattr = ll_setxattr,
2475 .getxattr = ll_getxattr,
2476 .listxattr = ll_listxattr,
2477 .removexattr = ll_removexattr,
2478 #ifdef HAVE_LINUX_FIEMAP_H
2479 .fiemap = ll_fiemap,
2483 /* dynamic ioctl number support routins */
2484 static struct llioc_ctl_data {
2485 cfs_rw_semaphore_t ioc_sem;
2486 cfs_list_t ioc_head;
2488 __RWSEM_INITIALIZER(llioc.ioc_sem),
2489 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2494 cfs_list_t iocd_list;
2495 unsigned int iocd_size;
2496 llioc_callback_t iocd_cb;
2497 unsigned int iocd_count;
2498 unsigned int iocd_cmd[0];
2501 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2504 struct llioc_data *in_data = NULL;
2507 if (cb == NULL || cmd == NULL ||
2508 count > LLIOC_MAX_CMD || count < 0)
2511 size = sizeof(*in_data) + count * sizeof(unsigned int);
2512 OBD_ALLOC(in_data, size);
2513 if (in_data == NULL)
2516 memset(in_data, 0, sizeof(*in_data));
2517 in_data->iocd_size = size;
2518 in_data->iocd_cb = cb;
2519 in_data->iocd_count = count;
2520 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2522 cfs_down_write(&llioc.ioc_sem);
2523 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2524 cfs_up_write(&llioc.ioc_sem);
2529 void ll_iocontrol_unregister(void *magic)
2531 struct llioc_data *tmp;
2536 cfs_down_write(&llioc.ioc_sem);
2537 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2539 unsigned int size = tmp->iocd_size;
2541 cfs_list_del(&tmp->iocd_list);
2542 cfs_up_write(&llioc.ioc_sem);
2544 OBD_FREE(tmp, size);
2548 cfs_up_write(&llioc.ioc_sem);
2550 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2553 EXPORT_SYMBOL(ll_iocontrol_register);
2554 EXPORT_SYMBOL(ll_iocontrol_unregister);
2556 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2557 unsigned int cmd, unsigned long arg, int *rcp)
2559 enum llioc_iter ret = LLIOC_CONT;
2560 struct llioc_data *data;
2561 int rc = -EINVAL, i;
2563 cfs_down_read(&llioc.ioc_sem);
2564 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2565 for (i = 0; i < data->iocd_count; i++) {
2566 if (cmd != data->iocd_cmd[i])
2569 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2573 if (ret == LLIOC_STOP)
2576 cfs_up_read(&llioc.ioc_sem);