1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
81 op_data->op_handle = *fh;
82 op_data->op_capa1 = ll_mdscapa_get(inode);
86 * Closes the IO epoch and packs all the attributes into @op_data for
89 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
90 struct obd_client_handle *och)
94 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
95 ATTR_MTIME_SET | ATTR_CTIME_SET;
97 if (!(och->och_flags & FMODE_WRITE))
100 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
101 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
103 ll_ioepoch_close(inode, op_data, &och, 0);
106 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
107 ll_prep_md_op_data(op_data, inode, NULL, NULL,
108 0, 0, LUSTRE_OPC_ANY, NULL);
112 static int ll_close_inode_openhandle(struct obd_export *md_exp,
114 struct obd_client_handle *och)
116 struct obd_export *exp = ll_i2mdexp(inode);
117 struct md_op_data *op_data;
118 struct ptlrpc_request *req = NULL;
119 struct obd_device *obd = class_exp2obd(exp);
126 * XXX: in case of LMV, is this correct to access
129 CERROR("Invalid MDC connection handle "LPX64"\n",
130 ll_i2mdexp(inode)->exp_handle.h_cookie);
134 OBD_ALLOC_PTR(op_data);
136 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
138 ll_prepare_close(inode, op_data, och);
139 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
140 rc = md_close(md_exp, op_data, och->och_mod, &req);
142 /* This close must have the epoch closed. */
143 LASSERT(epoch_close);
144 /* MDS has instructed us to obtain Size-on-MDS attribute from
145 * OSTs and send setattr to back to MDS. */
146 rc = ll_som_update(inode, op_data);
148 CERROR("inode %lu mdc Size-on-MDS update failed: "
149 "rc = %d\n", inode->i_ino, rc);
153 CERROR("inode %lu mdc close failed: rc = %d\n",
156 ll_finish_md_op_data(op_data);
159 rc = ll_objects_destroy(req, inode);
161 CERROR("inode %lu ll_objects destroy: rc = %d\n",
168 if (exp_connect_som(exp) && !epoch_close &&
169 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
170 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
172 md_clear_open_replay_data(md_exp, och);
173 /* Free @och if it is not waiting for DONE_WRITING. */
174 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
177 if (req) /* This is close request */
178 ptlrpc_req_finished(req);
182 int ll_md_real_close(struct inode *inode, int flags)
184 struct ll_inode_info *lli = ll_i2info(inode);
185 struct obd_client_handle **och_p;
186 struct obd_client_handle *och;
191 if (flags & FMODE_WRITE) {
192 och_p = &lli->lli_mds_write_och;
193 och_usecount = &lli->lli_open_fd_write_count;
194 } else if (flags & FMODE_EXEC) {
195 och_p = &lli->lli_mds_exec_och;
196 och_usecount = &lli->lli_open_fd_exec_count;
198 LASSERT(flags & FMODE_READ);
199 och_p = &lli->lli_mds_read_och;
200 och_usecount = &lli->lli_open_fd_read_count;
203 cfs_down(&lli->lli_och_sem);
204 if (*och_usecount) { /* There are still users of this handle, so
206 cfs_up(&lli->lli_och_sem);
211 cfs_up(&lli->lli_och_sem);
213 if (och) { /* There might be a race and somebody have freed this och
215 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
222 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
225 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
226 struct ll_inode_info *lli = ll_i2info(inode);
230 /* clear group lock, if present */
231 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
232 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
234 /* Let's see if we have good enough OPEN lock on the file and if
235 we can skip talking to MDS */
236 if (file->f_dentry->d_inode) { /* Can this ever be false? */
238 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
239 struct lustre_handle lockh;
240 struct inode *inode = file->f_dentry->d_inode;
241 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
243 cfs_down(&lli->lli_och_sem);
244 if (fd->fd_omode & FMODE_WRITE) {
246 LASSERT(lli->lli_open_fd_write_count);
247 lli->lli_open_fd_write_count--;
248 } else if (fd->fd_omode & FMODE_EXEC) {
250 LASSERT(lli->lli_open_fd_exec_count);
251 lli->lli_open_fd_exec_count--;
254 LASSERT(lli->lli_open_fd_read_count);
255 lli->lli_open_fd_read_count--;
257 cfs_up(&lli->lli_och_sem);
259 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
260 LDLM_IBITS, &policy, lockmode,
262 rc = ll_md_real_close(file->f_dentry->d_inode,
266 CERROR("Releasing a file %p with negative dentry %p. Name %s",
267 file, file->f_dentry, file->f_dentry->d_name.name);
270 LUSTRE_FPRIVATE(file) = NULL;
271 ll_file_data_put(fd);
272 ll_capa_close(inode);
277 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
279 /* While this returns an error code, fput() the caller does not, so we need
280 * to make every effort to clean up all of our state here. Also, applications
281 * rarely check close errors and even if an error is returned they will not
282 * re-try the close call.
284 int ll_file_release(struct inode *inode, struct file *file)
286 struct ll_file_data *fd;
287 struct ll_sb_info *sbi = ll_i2sbi(inode);
288 struct ll_inode_info *lli = ll_i2info(inode);
289 struct lov_stripe_md *lsm = lli->lli_smd;
293 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
294 inode->i_generation, inode);
296 #ifdef CONFIG_FS_POSIX_ACL
297 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
298 inode == inode->i_sb->s_root->d_inode) {
299 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
302 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
303 fd->fd_flags &= ~LL_FILE_RMTACL;
304 rct_del(&sbi->ll_rct, cfs_curproc_pid());
305 et_search_free(&sbi->ll_et, cfs_curproc_pid());
310 if (inode->i_sb->s_root != file->f_dentry)
311 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
312 fd = LUSTRE_FPRIVATE(file);
315 /* The last ref on @file, maybe not the the owner pid of statahead.
316 * Different processes can open the same dir, "ll_opendir_key" means:
317 * it is me that should stop the statahead thread. */
318 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
319 ll_stop_statahead(inode, lli->lli_opendir_key);
321 if (inode->i_sb->s_root == file->f_dentry) {
322 LUSTRE_FPRIVATE(file) = NULL;
323 ll_file_data_put(fd);
328 lov_test_and_clear_async_rc(lsm);
329 lli->lli_async_rc = 0;
331 rc = ll_md_close(sbi->ll_md_exp, inode, file);
333 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
334 libcfs_debug_dumplog();
339 static int ll_intent_file_open(struct file *file, void *lmm,
340 int lmmsize, struct lookup_intent *itp)
342 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
343 struct dentry *parent = file->f_dentry->d_parent;
344 const char *name = file->f_dentry->d_name.name;
345 const int len = file->f_dentry->d_name.len;
346 struct md_op_data *op_data;
347 struct ptlrpc_request *req;
354 /* Usually we come here only for NFSD, and we want open lock.
355 But we can also get here with pre 2.6.15 patchless kernels, and in
356 that case that lock is also ok */
357 /* We can also get here if there was cached open handle in revalidate_it
358 * but it disappeared while we were getting from there to ll_file_open.
359 * But this means this file was closed and immediatelly opened which
360 * makes a good candidate for using OPEN lock */
361 /* If lmmsize & lmm are not 0, we are just setting stripe info
362 * parameters. No need for the open lock */
363 if (!lmm && !lmmsize)
364 itp->it_flags |= MDS_OPEN_LOCK;
366 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
367 file->f_dentry->d_inode, name, len,
368 O_RDWR, LUSTRE_OPC_ANY, NULL);
370 RETURN(PTR_ERR(op_data));
372 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
373 0 /*unused */, &req, ll_md_blocking_ast, 0);
374 ll_finish_md_op_data(op_data);
376 /* reason for keep own exit path - don`t flood log
377 * with messages with -ESTALE errors.
379 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
380 it_open_error(DISP_OPEN_OPEN, itp))
382 ll_release_openhandle(file->f_dentry, itp);
386 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
387 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
388 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
392 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
393 if (!rc && itp->d.lustre.it_lock_mode)
394 md_set_lock_data(sbi->ll_md_exp,
395 &itp->d.lustre.it_lock_handle,
396 file->f_dentry->d_inode, NULL);
399 ptlrpc_req_finished(itp->d.lustre.it_data);
400 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
401 ll_intent_drop_lock(itp);
407 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
408 * not believe attributes if a few ioepoch holders exist. Attributes for
409 * previous ioepoch if new one is opened are also skipped by MDS.
411 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
413 if (ioepoch && lli->lli_ioepoch != ioepoch) {
414 lli->lli_ioepoch = ioepoch;
415 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
416 ioepoch, PFID(&lli->lli_fid));
420 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
421 struct lookup_intent *it, struct obd_client_handle *och)
423 struct ptlrpc_request *req = it->d.lustre.it_data;
424 struct mdt_body *body;
428 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
429 LASSERT(body != NULL); /* reply already checked out */
431 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
432 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
433 och->och_fid = lli->lli_fid;
434 och->och_flags = it->it_flags;
435 ll_ioepoch_open(lli, body->ioepoch);
437 return md_set_open_replay_data(md_exp, och, req);
440 int ll_local_open(struct file *file, struct lookup_intent *it,
441 struct ll_file_data *fd, struct obd_client_handle *och)
443 struct inode *inode = file->f_dentry->d_inode;
444 struct ll_inode_info *lli = ll_i2info(inode);
447 LASSERT(!LUSTRE_FPRIVATE(file));
452 struct ptlrpc_request *req = it->d.lustre.it_data;
453 struct mdt_body *body;
456 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
460 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
461 if ((it->it_flags & FMODE_WRITE) &&
462 (body->valid & OBD_MD_FLSIZE))
463 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
464 lli->lli_ioepoch, PFID(&lli->lli_fid));
467 LUSTRE_FPRIVATE(file) = fd;
468 ll_readahead_init(inode, &fd->fd_ras);
469 fd->fd_omode = it->it_flags;
473 /* Open a file, and (for the very first open) create objects on the OSTs at
474 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
475 * creation or open until ll_lov_setstripe() ioctl is called. We grab
476 * lli_open_sem to ensure no other process will create objects, send the
477 * stripe MD to the MDS, or try to destroy the objects if that fails.
479 * If we already have the stripe MD locally then we don't request it in
480 * md_open(), by passing a lmm_size = 0.
482 * It is up to the application to ensure no other processes open this file
483 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
484 * used. We might be able to avoid races of that sort by getting lli_open_sem
485 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
486 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
488 int ll_file_open(struct inode *inode, struct file *file)
490 struct ll_inode_info *lli = ll_i2info(inode);
491 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
492 .it_flags = file->f_flags };
493 struct lov_stripe_md *lsm;
494 struct ptlrpc_request *req = NULL;
495 struct obd_client_handle **och_p;
497 struct ll_file_data *fd;
498 int rc = 0, opendir_set = 0;
501 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
502 inode->i_generation, inode, file->f_flags);
504 #ifdef HAVE_VFS_INTENT_PATCHES
507 it = file->private_data; /* XXX: compat macro */
508 file->private_data = NULL; /* prevent ll_local_open assertion */
511 fd = ll_file_data_get();
516 if (S_ISDIR(inode->i_mode)) {
517 cfs_spin_lock(&lli->lli_lock);
518 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
519 LASSERT(lli->lli_sai == NULL);
520 lli->lli_opendir_key = fd;
521 lli->lli_opendir_pid = cfs_curproc_pid();
524 cfs_spin_unlock(&lli->lli_lock);
527 if (inode->i_sb->s_root == file->f_dentry) {
528 LUSTRE_FPRIVATE(file) = fd;
532 if (!it || !it->d.lustre.it_disposition) {
533 /* Convert f_flags into access mode. We cannot use file->f_mode,
534 * because everything but O_ACCMODE mask was stripped from
536 if ((oit.it_flags + 1) & O_ACCMODE)
538 if (file->f_flags & O_TRUNC)
539 oit.it_flags |= FMODE_WRITE;
541 /* kernel only call f_op->open in dentry_open. filp_open calls
542 * dentry_open after call to open_namei that checks permissions.
543 * Only nfsd_open call dentry_open directly without checking
544 * permissions and because of that this code below is safe. */
545 if (oit.it_flags & FMODE_WRITE)
546 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
548 /* We do not want O_EXCL here, presumably we opened the file
549 * already? XXX - NFS implications? */
550 oit.it_flags &= ~O_EXCL;
552 /* bug20584, if "it_flags" contains O_CREAT, the file will be
553 * created if necessary, then "IT_CREAT" should be set to keep
554 * consistent with it */
555 if (oit.it_flags & O_CREAT)
556 oit.it_op |= IT_CREAT;
562 /* Let's see if we have file open on MDS already. */
563 if (it->it_flags & FMODE_WRITE) {
564 och_p = &lli->lli_mds_write_och;
565 och_usecount = &lli->lli_open_fd_write_count;
566 } else if (it->it_flags & FMODE_EXEC) {
567 och_p = &lli->lli_mds_exec_och;
568 och_usecount = &lli->lli_open_fd_exec_count;
570 och_p = &lli->lli_mds_read_och;
571 och_usecount = &lli->lli_open_fd_read_count;
574 cfs_down(&lli->lli_och_sem);
575 if (*och_p) { /* Open handle is present */
576 if (it_disposition(it, DISP_OPEN_OPEN)) {
577 /* Well, there's extra open request that we do not need,
578 let's close it somehow. This will decref request. */
579 rc = it_open_error(DISP_OPEN_OPEN, it);
581 cfs_up(&lli->lli_och_sem);
582 ll_file_data_put(fd);
583 GOTO(out_openerr, rc);
585 ll_release_openhandle(file->f_dentry, it);
586 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
591 rc = ll_local_open(file, it, fd, NULL);
594 cfs_up(&lli->lli_och_sem);
595 ll_file_data_put(fd);
596 GOTO(out_openerr, rc);
599 LASSERT(*och_usecount == 0);
600 if (!it->d.lustre.it_disposition) {
601 /* We cannot just request lock handle now, new ELC code
602 means that one of other OPEN locks for this file
603 could be cancelled, and since blocking ast handler
604 would attempt to grab och_sem as well, that would
605 result in a deadlock */
606 cfs_up(&lli->lli_och_sem);
607 it->it_create_mode |= M_CHECK_STALE;
608 rc = ll_intent_file_open(file, NULL, 0, it);
609 it->it_create_mode &= ~M_CHECK_STALE;
611 ll_file_data_put(fd);
612 GOTO(out_openerr, rc);
615 /* Got some error? Release the request */
616 if (it->d.lustre.it_status < 0) {
617 req = it->d.lustre.it_data;
618 ptlrpc_req_finished(req);
622 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
624 ll_file_data_put(fd);
625 GOTO(out_och_free, rc = -ENOMEM);
628 req = it->d.lustre.it_data;
630 /* md_intent_lock() didn't get a request ref if there was an
631 * open error, so don't do cleanup on the request here
633 /* XXX (green): Should not we bail out on any error here, not
634 * just open error? */
635 rc = it_open_error(DISP_OPEN_OPEN, it);
637 ll_file_data_put(fd);
638 GOTO(out_och_free, rc);
641 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
642 rc = ll_local_open(file, it, fd, *och_p);
644 ll_file_data_put(fd);
645 GOTO(out_och_free, rc);
648 cfs_up(&lli->lli_och_sem);
650 /* Must do this outside lli_och_sem lock to prevent deadlock where
651 different kind of OPEN lock for this same inode gets cancelled
652 by ldlm_cancel_lru */
653 if (!S_ISREG(inode->i_mode))
660 if (file->f_flags & O_LOV_DELAY_CREATE ||
661 !(file->f_mode & FMODE_WRITE)) {
662 CDEBUG(D_INODE, "object creation was delayed\n");
666 file->f_flags &= ~O_LOV_DELAY_CREATE;
669 ptlrpc_req_finished(req);
671 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
675 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
676 *och_p = NULL; /* OBD_FREE writes some magic there */
679 cfs_up(&lli->lli_och_sem);
681 if (opendir_set != 0)
682 ll_stop_statahead(inode, lli->lli_opendir_key);
688 /* Fills the obdo with the attributes for the lsm */
689 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
690 struct obd_capa *capa, struct obdo *obdo,
691 __u64 ioepoch, int sync)
693 struct ptlrpc_request_set *set;
694 struct obd_info oinfo = { { { 0 } } };
699 LASSERT(lsm != NULL);
703 oinfo.oi_oa->o_id = lsm->lsm_object_id;
704 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
705 oinfo.oi_oa->o_mode = S_IFREG;
706 oinfo.oi_oa->o_ioepoch = ioepoch;
707 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
708 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
709 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
710 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
711 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
712 oinfo.oi_capa = capa;
714 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
715 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
718 set = ptlrpc_prep_set();
720 CERROR("can't allocate ptlrpc set\n");
723 rc = obd_getattr_async(exp, &oinfo, set);
725 rc = ptlrpc_set_wait(set);
726 ptlrpc_set_destroy(set);
729 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
730 OBD_MD_FLATIME | OBD_MD_FLMTIME |
731 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
736 * Performs the getattr on the inode and updates its fields.
737 * If @sync != 0, perform the getattr under the server-side lock.
739 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
740 __u64 ioepoch, int sync)
742 struct ll_inode_info *lli = ll_i2info(inode);
743 struct obd_capa *capa = ll_mdscapa_get(inode);
747 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
748 capa, obdo, ioepoch, sync);
751 obdo_refresh_inode(inode, obdo, obdo->o_valid);
753 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
754 lli->lli_smd->lsm_object_id, i_size_read(inode),
755 (unsigned long long)inode->i_blocks,
756 (unsigned long)ll_inode_blksize(inode));
761 int ll_merge_lvb(struct inode *inode)
763 struct ll_inode_info *lli = ll_i2info(inode);
764 struct ll_sb_info *sbi = ll_i2sbi(inode);
770 ll_inode_size_lock(inode, 1);
771 inode_init_lvb(inode, &lvb);
772 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
773 i_size_write(inode, lvb.lvb_size);
774 inode->i_blocks = lvb.lvb_blocks;
776 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
777 LTIME_S(inode->i_atime) = lvb.lvb_atime;
778 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
779 ll_inode_size_unlock(inode, 1);
784 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
787 struct obdo obdo = { 0 };
790 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
792 st->st_size = obdo.o_size;
793 st->st_blocks = obdo.o_blocks;
794 st->st_mtime = obdo.o_mtime;
795 st->st_atime = obdo.o_atime;
796 st->st_ctime = obdo.o_ctime;
801 void ll_io_init(struct cl_io *io, const struct file *file, int write)
803 struct inode *inode = file->f_dentry->d_inode;
805 memset(io, 0, sizeof *io);
806 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
808 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
809 io->ci_obj = ll_i2info(inode)->lli_clob;
810 io->ci_lockreq = CILR_MAYBE;
811 if (ll_file_nolock(file)) {
812 io->ci_lockreq = CILR_NEVER;
813 io->ci_no_srvlock = 1;
814 } else if (file->f_flags & O_APPEND) {
815 io->ci_lockreq = CILR_MANDATORY;
819 static ssize_t ll_file_io_generic(const struct lu_env *env,
820 struct vvp_io_args *args, struct file *file,
821 enum cl_io_type iot, loff_t *ppos, size_t count)
827 io = &ccc_env_info(env)->cti_io;
828 ll_io_init(io, file, iot == CIT_WRITE);
830 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
831 struct vvp_io *vio = vvp_env_io(env);
832 struct ccc_io *cio = ccc_env_io(env);
833 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
834 int write_sem_locked = 0;
836 cio->cui_fd = LUSTRE_FPRIVATE(file);
837 vio->cui_io_subtype = args->via_io_subtype;
839 switch (vio->cui_io_subtype) {
841 cio->cui_iov = args->u.normal.via_iov;
842 cio->cui_nrsegs = args->u.normal.via_nrsegs;
843 #ifndef HAVE_FILE_WRITEV
844 cio->cui_iocb = args->u.normal.via_iocb;
846 if ((iot == CIT_WRITE) &&
847 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
848 cfs_down(&lli->lli_write_sem);
849 write_sem_locked = 1;
853 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
854 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
857 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
858 vio->u.splice.cui_flags = args->u.splice.via_flags;
861 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
864 result = cl_io_loop(env, io);
865 if (write_sem_locked)
866 cfs_up(&lli->lli_write_sem);
868 /* cl_io_rw_init() handled IO */
869 result = io->ci_result;
872 if (io->ci_nob > 0) {
874 *ppos = io->u.ci_wr.wr.crw_pos;
882 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
884 static int ll_file_get_iov_count(const struct iovec *iov,
885 unsigned long *nr_segs, size_t *count)
890 for (seg = 0; seg < *nr_segs; seg++) {
891 const struct iovec *iv = &iov[seg];
894 * If any segment has a negative length, or the cumulative
895 * length ever wraps negative then return -EINVAL.
898 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
900 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
905 cnt -= iv->iov_len; /* This segment is no good */
912 #ifdef HAVE_FILE_READV
913 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
914 unsigned long nr_segs, loff_t *ppos)
917 struct vvp_io_args *args;
923 result = ll_file_get_iov_count(iov, &nr_segs, &count);
927 env = cl_env_get(&refcheck);
929 RETURN(PTR_ERR(env));
931 args = vvp_env_args(env, IO_NORMAL);
932 args->u.normal.via_iov = (struct iovec *)iov;
933 args->u.normal.via_nrsegs = nr_segs;
935 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
936 cl_env_put(env, &refcheck);
940 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
944 struct iovec *local_iov;
949 env = cl_env_get(&refcheck);
951 RETURN(PTR_ERR(env));
953 local_iov = &vvp_env_info(env)->vti_local_iov;
954 local_iov->iov_base = (void __user *)buf;
955 local_iov->iov_len = count;
956 result = ll_file_readv(file, local_iov, 1, ppos);
957 cl_env_put(env, &refcheck);
962 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
963 unsigned long nr_segs, loff_t pos)
966 struct vvp_io_args *args;
972 result = ll_file_get_iov_count(iov, &nr_segs, &count);
976 env = cl_env_get(&refcheck);
978 RETURN(PTR_ERR(env));
980 args = vvp_env_args(env, IO_NORMAL);
981 args->u.normal.via_iov = (struct iovec *)iov;
982 args->u.normal.via_nrsegs = nr_segs;
983 args->u.normal.via_iocb = iocb;
985 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
986 &iocb->ki_pos, count);
987 cl_env_put(env, &refcheck);
991 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
995 struct iovec *local_iov;
1001 env = cl_env_get(&refcheck);
1003 RETURN(PTR_ERR(env));
1005 local_iov = &vvp_env_info(env)->vti_local_iov;
1006 kiocb = &vvp_env_info(env)->vti_kiocb;
1007 local_iov->iov_base = (void __user *)buf;
1008 local_iov->iov_len = count;
1009 init_sync_kiocb(kiocb, file);
1010 kiocb->ki_pos = *ppos;
1011 kiocb->ki_left = count;
1013 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1014 *ppos = kiocb->ki_pos;
1016 cl_env_put(env, &refcheck);
1022 * Write to a file (through the page cache).
1024 #ifdef HAVE_FILE_WRITEV
1025 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1026 unsigned long nr_segs, loff_t *ppos)
1029 struct vvp_io_args *args;
1035 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1039 env = cl_env_get(&refcheck);
1041 RETURN(PTR_ERR(env));
1043 args = vvp_env_args(env, IO_NORMAL);
1044 args->u.normal.via_iov = (struct iovec *)iov;
1045 args->u.normal.via_nrsegs = nr_segs;
1047 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1048 cl_env_put(env, &refcheck);
1052 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1056 struct iovec *local_iov;
1061 env = cl_env_get(&refcheck);
1063 RETURN(PTR_ERR(env));
1065 local_iov = &vvp_env_info(env)->vti_local_iov;
1066 local_iov->iov_base = (void __user *)buf;
1067 local_iov->iov_len = count;
1069 result = ll_file_writev(file, local_iov, 1, ppos);
1070 cl_env_put(env, &refcheck);
1074 #else /* AIO stuff */
1075 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1076 unsigned long nr_segs, loff_t pos)
1079 struct vvp_io_args *args;
1085 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1089 env = cl_env_get(&refcheck);
1091 RETURN(PTR_ERR(env));
1093 args = vvp_env_args(env, IO_NORMAL);
1094 args->u.normal.via_iov = (struct iovec *)iov;
1095 args->u.normal.via_nrsegs = nr_segs;
1096 args->u.normal.via_iocb = iocb;
1098 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1099 &iocb->ki_pos, count);
1100 cl_env_put(env, &refcheck);
1104 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1108 struct iovec *local_iov;
1109 struct kiocb *kiocb;
1114 env = cl_env_get(&refcheck);
1116 RETURN(PTR_ERR(env));
1118 local_iov = &vvp_env_info(env)->vti_local_iov;
1119 kiocb = &vvp_env_info(env)->vti_kiocb;
1120 local_iov->iov_base = (void __user *)buf;
1121 local_iov->iov_len = count;
1122 init_sync_kiocb(kiocb, file);
1123 kiocb->ki_pos = *ppos;
1124 kiocb->ki_left = count;
1126 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1127 *ppos = kiocb->ki_pos;
1129 cl_env_put(env, &refcheck);
1135 #ifdef HAVE_KERNEL_SENDFILE
1137 * Send file content (through pagecache) somewhere with helper
1139 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1140 read_actor_t actor, void *target)
1143 struct vvp_io_args *args;
1148 env = cl_env_get(&refcheck);
1150 RETURN(PTR_ERR(env));
1152 args = vvp_env_args(env, IO_SENDFILE);
1153 args->u.sendfile.via_target = target;
1154 args->u.sendfile.via_actor = actor;
1156 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1157 cl_env_put(env, &refcheck);
1162 #ifdef HAVE_KERNEL_SPLICE_READ
1164 * Send file content (through pagecache) somewhere with helper
1166 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1167 struct pipe_inode_info *pipe, size_t count,
1171 struct vvp_io_args *args;
1176 env = cl_env_get(&refcheck);
1178 RETURN(PTR_ERR(env));
1180 args = vvp_env_args(env, IO_SPLICE);
1181 args->u.splice.via_pipe = pipe;
1182 args->u.splice.via_flags = flags;
1184 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1185 cl_env_put(env, &refcheck);
1190 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1193 struct obd_export *exp = ll_i2dtexp(inode);
1194 struct ll_recreate_obj ucreatp;
1195 struct obd_trans_info oti = { 0 };
1196 struct obdo *oa = NULL;
1199 struct lov_stripe_md *lsm, *lsm2;
1202 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1205 if (cfs_copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1206 sizeof(struct ll_recreate_obj)))
1213 ll_inode_size_lock(inode, 0);
1214 lsm = ll_i2info(inode)->lli_smd;
1216 GOTO(out, rc = -ENOENT);
1217 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1218 (lsm->lsm_stripe_count));
1220 OBD_ALLOC(lsm2, lsm_size);
1222 GOTO(out, rc = -ENOMEM);
1224 oa->o_id = ucreatp.lrc_id;
1225 oa->o_gr = ucreatp.lrc_group;
1226 oa->o_nlink = ucreatp.lrc_ost_idx;
1227 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1228 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1229 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1230 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1232 memcpy(lsm2, lsm, lsm_size);
1233 rc = obd_create(exp, oa, &lsm2, &oti);
1235 OBD_FREE(lsm2, lsm_size);
1238 ll_inode_size_unlock(inode, 0);
1243 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1244 int flags, struct lov_user_md *lum, int lum_size)
1246 struct lov_stripe_md *lsm;
1247 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1251 ll_inode_size_lock(inode, 0);
1252 lsm = ll_i2info(inode)->lli_smd;
1254 ll_inode_size_unlock(inode, 0);
1255 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1260 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1263 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1264 GOTO(out_req_free, rc = -ENOENT);
1265 rc = oit.d.lustre.it_status;
1267 GOTO(out_req_free, rc);
1269 ll_release_openhandle(file->f_dentry, &oit);
1272 ll_inode_size_unlock(inode, 0);
1273 ll_intent_release(&oit);
1276 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1280 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1281 struct lov_mds_md **lmmp, int *lmm_size,
1282 struct ptlrpc_request **request)
1284 struct ll_sb_info *sbi = ll_i2sbi(inode);
1285 struct mdt_body *body;
1286 struct lov_mds_md *lmm = NULL;
1287 struct ptlrpc_request *req = NULL;
1288 struct obd_capa *oc;
1291 rc = ll_get_max_mdsize(sbi, &lmmsize);
1295 oc = ll_mdscapa_get(inode);
1296 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1297 oc, filename, strlen(filename) + 1,
1298 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1299 ll_i2suppgid(inode), &req);
1302 CDEBUG(D_INFO, "md_getattr_name failed "
1303 "on %s: rc %d\n", filename, rc);
1307 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1308 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1310 lmmsize = body->eadatasize;
1312 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1314 GOTO(out, rc = -ENODATA);
1317 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1318 LASSERT(lmm != NULL);
1320 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1321 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1322 GOTO(out, rc = -EPROTO);
1326 * This is coming from the MDS, so is probably in
1327 * little endian. We convert it to host endian before
1328 * passing it to userspace.
1330 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1331 /* if function called for directory - we should
1332 * avoid swab not existent lsm objects */
1333 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1334 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1335 if (S_ISREG(body->mode))
1336 lustre_swab_lov_user_md_objects(
1337 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1338 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1339 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1340 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1341 if (S_ISREG(body->mode))
1342 lustre_swab_lov_user_md_objects(
1343 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1344 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1350 *lmm_size = lmmsize;
1355 static int ll_lov_setea(struct inode *inode, struct file *file,
1358 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1359 struct lov_user_md *lump;
1360 int lum_size = sizeof(struct lov_user_md) +
1361 sizeof(struct lov_user_ost_data);
1365 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1368 OBD_ALLOC(lump, lum_size);
1372 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1373 OBD_FREE(lump, lum_size);
1377 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1379 OBD_FREE(lump, lum_size);
1383 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1386 struct lov_user_md_v3 lumv3;
1387 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1388 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1389 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1392 int flags = FMODE_WRITE;
1395 /* first try with v1 which is smaller than v3 */
1396 lum_size = sizeof(struct lov_user_md_v1);
1397 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1400 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1401 lum_size = sizeof(struct lov_user_md_v3);
1402 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1406 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1408 put_user(0, &lumv1p->lmm_stripe_count);
1409 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1410 0, ll_i2info(inode)->lli_smd,
1416 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1418 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1423 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1427 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1429 struct ll_inode_info *lli = ll_i2info(inode);
1430 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1431 struct ccc_grouplock grouplock;
1435 if (ll_file_nolock(file))
1436 RETURN(-EOPNOTSUPP);
1438 cfs_spin_lock(&lli->lli_lock);
1439 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1440 CWARN("group lock already existed with gid %lu\n",
1441 fd->fd_grouplock.cg_gid);
1442 cfs_spin_unlock(&lli->lli_lock);
1445 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1446 cfs_spin_unlock(&lli->lli_lock);
1448 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1449 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1453 cfs_spin_lock(&lli->lli_lock);
1454 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1455 cfs_spin_unlock(&lli->lli_lock);
1456 CERROR("another thread just won the race\n");
1457 cl_put_grouplock(&grouplock);
1461 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1462 fd->fd_grouplock = grouplock;
1463 cfs_spin_unlock(&lli->lli_lock);
1465 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1469 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1471 struct ll_inode_info *lli = ll_i2info(inode);
1472 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1473 struct ccc_grouplock grouplock;
1476 cfs_spin_lock(&lli->lli_lock);
1477 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1478 cfs_spin_unlock(&lli->lli_lock);
1479 CWARN("no group lock held\n");
1482 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1484 if (fd->fd_grouplock.cg_gid != arg) {
1485 CWARN("group lock %lu doesn't match current id %lu\n",
1486 arg, fd->fd_grouplock.cg_gid);
1487 cfs_spin_unlock(&lli->lli_lock);
1491 grouplock = fd->fd_grouplock;
1492 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1493 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1494 cfs_spin_unlock(&lli->lli_lock);
1496 cl_put_grouplock(&grouplock);
1497 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1502 * Close inode open handle
1504 * \param dentry [in] dentry which contains the inode
1505 * \param it [in,out] intent which contains open info and result
1508 * \retval <0 failure
1510 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1512 struct inode *inode = dentry->d_inode;
1513 struct obd_client_handle *och;
1519 /* Root ? Do nothing. */
1520 if (dentry->d_inode->i_sb->s_root == dentry)
1523 /* No open handle to close? Move away */
1524 if (!it_disposition(it, DISP_OPEN_OPEN))
1527 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1529 OBD_ALLOC(och, sizeof(*och));
1531 GOTO(out, rc = -ENOMEM);
1533 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1534 ll_i2info(inode), it, och);
1536 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1539 /* this one is in place of ll_file_open */
1540 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1541 ptlrpc_req_finished(it->d.lustre.it_data);
1542 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1547 * Get size for inode for which FIEMAP mapping is requested.
1548 * Make the FIEMAP get_info call and returns the result.
1550 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1553 struct obd_export *exp = ll_i2dtexp(inode);
1554 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1555 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1556 int vallen = num_bytes;
1560 /* Checks for fiemap flags */
1561 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1562 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1566 /* Check for FIEMAP_FLAG_SYNC */
1567 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1568 rc = filemap_fdatawrite(inode->i_mapping);
1573 /* If the stripe_count > 1 and the application does not understand
1574 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1576 if (lsm->lsm_stripe_count > 1 &&
1577 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1580 fm_key.oa.o_id = lsm->lsm_object_id;
1581 fm_key.oa.o_gr = lsm->lsm_object_gr;
1582 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1584 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1587 /* If filesize is 0, then there would be no objects for mapping */
1588 if (fm_key.oa.o_size == 0) {
1589 fiemap->fm_mapped_extents = 0;
1593 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1595 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1597 CERROR("obd_get_info failed: rc = %d\n", rc);
1602 int ll_fid2path(struct obd_export *exp, void *arg)
1604 struct getinfo_fid2path *gfout, *gfin;
1608 /* Need to get the buflen */
1609 OBD_ALLOC_PTR(gfin);
1612 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1617 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1618 OBD_ALLOC(gfout, outsize);
1619 if (gfout == NULL) {
1623 memcpy(gfout, gfin, sizeof(*gfout));
1626 /* Call mdc_iocontrol */
1627 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1630 if (cfs_copy_to_user(arg, gfout, outsize))
1634 OBD_FREE(gfout, outsize);
1638 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1640 struct ll_user_fiemap *fiemap_s;
1641 size_t num_bytes, ret_bytes;
1642 unsigned int extent_count;
1645 /* Get the extent count so we can calculate the size of
1646 * required fiemap buffer */
1647 if (get_user(extent_count,
1648 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1650 num_bytes = sizeof(*fiemap_s) + (extent_count *
1651 sizeof(struct ll_fiemap_extent));
1653 OBD_VMALLOC(fiemap_s, num_bytes);
1654 if (fiemap_s == NULL)
1657 /* get the fiemap value */
1658 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1660 GOTO(error, rc = -EFAULT);
1662 /* If fm_extent_count is non-zero, read the first extent since
1663 * it is used to calculate end_offset and device from previous
1666 if (copy_from_user(&fiemap_s->fm_extents[0],
1667 (char __user *)arg + sizeof(*fiemap_s),
1668 sizeof(struct ll_fiemap_extent)))
1669 GOTO(error, rc = -EFAULT);
1672 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1676 ret_bytes = sizeof(struct ll_user_fiemap);
1678 if (extent_count != 0)
1679 ret_bytes += (fiemap_s->fm_mapped_extents *
1680 sizeof(struct ll_fiemap_extent));
1682 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1686 OBD_VFREE(fiemap_s, num_bytes);
1690 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1693 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1697 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1698 inode->i_generation, inode, cmd);
1699 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1701 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1702 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1706 case LL_IOC_GETFLAGS:
1707 /* Get the current value of the file flags */
1708 return put_user(fd->fd_flags, (int *)arg);
1709 case LL_IOC_SETFLAGS:
1710 case LL_IOC_CLRFLAGS:
1711 /* Set or clear specific file flags */
1712 /* XXX This probably needs checks to ensure the flags are
1713 * not abused, and to handle any flag side effects.
1715 if (get_user(flags, (int *) arg))
1718 if (cmd == LL_IOC_SETFLAGS) {
1719 if ((flags & LL_FILE_IGNORE_LOCK) &&
1720 !(file->f_flags & O_DIRECT)) {
1721 CERROR("%s: unable to disable locking on "
1722 "non-O_DIRECT file\n", current->comm);
1726 fd->fd_flags |= flags;
1728 fd->fd_flags &= ~flags;
1731 case LL_IOC_LOV_SETSTRIPE:
1732 RETURN(ll_lov_setstripe(inode, file, arg));
1733 case LL_IOC_LOV_SETEA:
1734 RETURN(ll_lov_setea(inode, file, arg));
1735 case LL_IOC_LOV_GETSTRIPE:
1736 RETURN(ll_lov_getstripe(inode, arg));
1737 case LL_IOC_RECREATE_OBJ:
1738 RETURN(ll_lov_recreate_obj(inode, file, arg));
1739 case FSFILT_IOC_FIEMAP:
1740 RETURN(ll_ioctl_fiemap(inode, arg));
1741 case FSFILT_IOC_GETFLAGS:
1742 case FSFILT_IOC_SETFLAGS:
1743 RETURN(ll_iocontrol(inode, file, cmd, arg));
1744 case FSFILT_IOC_GETVERSION_OLD:
1745 case FSFILT_IOC_GETVERSION:
1746 RETURN(put_user(inode->i_generation, (int *)arg));
1747 case LL_IOC_GROUP_LOCK:
1748 RETURN(ll_get_grouplock(inode, file, arg));
1749 case LL_IOC_GROUP_UNLOCK:
1750 RETURN(ll_put_grouplock(inode, file, arg));
1751 case IOC_OBD_STATFS:
1752 RETURN(ll_obd_statfs(inode, (void *)arg));
1754 /* We need to special case any other ioctls we want to handle,
1755 * to send them to the MDS/OST as appropriate and to properly
1756 * network encode the arg field.
1757 case FSFILT_IOC_SETVERSION_OLD:
1758 case FSFILT_IOC_SETVERSION:
1760 case LL_IOC_FLUSHCTX:
1761 RETURN(ll_flush_ctx(inode));
1762 case LL_IOC_PATH2FID: {
1763 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1764 sizeof(struct lu_fid)))
1769 case OBD_IOC_FID2PATH:
1770 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1776 ll_iocontrol_call(inode, file, cmd, arg, &err))
1779 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1785 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1787 struct inode *inode = file->f_dentry->d_inode;
1790 retval = offset + ((origin == 2) ? i_size_read(inode) :
1791 (origin == 1) ? file->f_pos : 0);
1792 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1793 inode->i_ino, inode->i_generation, inode, retval, retval,
1794 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1795 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1797 if (origin == 2) { /* SEEK_END */
1798 int nonblock = 0, rc;
1800 if (file->f_flags & O_NONBLOCK)
1801 nonblock = LDLM_FL_BLOCK_NOWAIT;
1803 rc = cl_glimpse_size(inode);
1807 ll_inode_size_lock(inode, 0);
1808 offset += i_size_read(inode);
1809 ll_inode_size_unlock(inode, 0);
1810 } else if (origin == 1) { /* SEEK_CUR */
1811 offset += file->f_pos;
1815 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1816 if (offset != file->f_pos) {
1817 file->f_pos = offset;
1825 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1827 struct inode *inode = dentry->d_inode;
1828 struct ll_inode_info *lli = ll_i2info(inode);
1829 struct lov_stripe_md *lsm = lli->lli_smd;
1830 struct ptlrpc_request *req;
1831 struct obd_capa *oc;
1834 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1835 inode->i_generation, inode);
1836 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1838 /* fsync's caller has already called _fdata{sync,write}, we want
1839 * that IO to finish before calling the osc and mdc sync methods */
1840 rc = filemap_fdatawait(inode->i_mapping);
1842 /* catch async errors that were recorded back when async writeback
1843 * failed for pages in this mapping. */
1844 err = lli->lli_async_rc;
1845 lli->lli_async_rc = 0;
1849 err = lov_test_and_clear_async_rc(lsm);
1854 oc = ll_mdscapa_get(inode);
1855 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1861 ptlrpc_req_finished(req);
1868 RETURN(rc ? rc : -ENOMEM);
1870 oa->o_id = lsm->lsm_object_id;
1871 oa->o_gr = lsm->lsm_object_gr;
1872 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1873 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1874 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1877 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1878 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1879 0, OBD_OBJECT_EOF, oc);
1889 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1891 struct inode *inode = file->f_dentry->d_inode;
1892 struct ll_sb_info *sbi = ll_i2sbi(inode);
1893 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1894 .ei_cb_cp =ldlm_flock_completion_ast,
1895 .ei_cbdata = file_lock };
1896 struct md_op_data *op_data;
1897 struct lustre_handle lockh = {0};
1898 ldlm_policy_data_t flock;
1903 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1904 inode->i_ino, file_lock);
1906 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1908 if (file_lock->fl_flags & FL_FLOCK) {
1909 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1910 /* set missing params for flock() calls */
1911 file_lock->fl_end = OFFSET_MAX;
1912 file_lock->fl_pid = current->tgid;
1914 flock.l_flock.pid = file_lock->fl_pid;
1915 flock.l_flock.start = file_lock->fl_start;
1916 flock.l_flock.end = file_lock->fl_end;
1918 switch (file_lock->fl_type) {
1920 einfo.ei_mode = LCK_PR;
1923 /* An unlock request may or may not have any relation to
1924 * existing locks so we may not be able to pass a lock handle
1925 * via a normal ldlm_lock_cancel() request. The request may even
1926 * unlock a byte range in the middle of an existing lock. In
1927 * order to process an unlock request we need all of the same
1928 * information that is given with a normal read or write record
1929 * lock request. To avoid creating another ldlm unlock (cancel)
1930 * message we'll treat a LCK_NL flock request as an unlock. */
1931 einfo.ei_mode = LCK_NL;
1934 einfo.ei_mode = LCK_PW;
1937 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1952 flags = LDLM_FL_BLOCK_NOWAIT;
1958 flags = LDLM_FL_TEST_LOCK;
1959 /* Save the old mode so that if the mode in the lock changes we
1960 * can decrement the appropriate reader or writer refcount. */
1961 file_lock->fl_type = einfo.ei_mode;
1964 CERROR("unknown fcntl lock command: %d\n", cmd);
1968 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1969 LUSTRE_OPC_ANY, NULL);
1970 if (IS_ERR(op_data))
1971 RETURN(PTR_ERR(op_data));
1973 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
1974 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
1975 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1977 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
1978 op_data, &lockh, &flock, 0, NULL /* req */, flags);
1980 ll_finish_md_op_data(op_data);
1982 if ((file_lock->fl_flags & FL_FLOCK) &&
1983 (rc == 0 || file_lock->fl_type == F_UNLCK))
1984 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
1985 #ifdef HAVE_F_OP_FLOCK
1986 if ((file_lock->fl_flags & FL_POSIX) &&
1987 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
1988 !(flags & LDLM_FL_TEST_LOCK))
1989 posix_lock_file_wait(file, file_lock);
1995 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2002 int ll_have_md_lock(struct inode *inode, __u64 bits)
2004 struct lustre_handle lockh;
2005 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2013 fid = &ll_i2info(inode)->lli_fid;
2014 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2016 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2017 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2018 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2024 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2025 struct lustre_handle *lockh)
2027 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2033 fid = &ll_i2info(inode)->lli_fid;
2034 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2036 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2037 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2038 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2042 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2043 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2044 * and return success */
2046 /* This path cannot be hit for regular files unless in
2047 * case of obscure races, so no need to to validate
2049 if (!S_ISREG(inode->i_mode) &&
2050 !S_ISDIR(inode->i_mode))
2055 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2063 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2066 struct inode *inode = dentry->d_inode;
2067 struct ptlrpc_request *req = NULL;
2068 struct ll_sb_info *sbi;
2069 struct obd_export *exp;
2074 CERROR("REPORT THIS LINE TO PETER\n");
2077 sbi = ll_i2sbi(inode);
2079 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2080 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2082 exp = ll_i2mdexp(inode);
2084 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2085 struct lookup_intent oit = { .it_op = IT_GETATTR };
2086 struct md_op_data *op_data;
2088 /* Call getattr by fid, so do not provide name at all. */
2089 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2090 dentry->d_inode, NULL, 0, 0,
2091 LUSTRE_OPC_ANY, NULL);
2092 if (IS_ERR(op_data))
2093 RETURN(PTR_ERR(op_data));
2095 oit.it_create_mode |= M_CHECK_STALE;
2096 rc = md_intent_lock(exp, op_data, NULL, 0,
2097 /* we are not interested in name
2100 ll_md_blocking_ast, 0);
2101 ll_finish_md_op_data(op_data);
2102 oit.it_create_mode &= ~M_CHECK_STALE;
2104 rc = ll_inode_revalidate_fini(inode, rc);
2108 rc = ll_revalidate_it_finish(req, &oit, dentry);
2110 ll_intent_release(&oit);
2114 /* Unlinked? Unhash dentry, so it is not picked up later by
2115 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2116 here to preserve get_cwd functionality on 2.6.
2118 if (!dentry->d_inode->i_nlink) {
2119 cfs_spin_lock(&ll_lookup_lock);
2120 spin_lock(&dcache_lock);
2121 ll_drop_dentry(dentry);
2122 spin_unlock(&dcache_lock);
2123 cfs_spin_unlock(&ll_lookup_lock);
2126 ll_lookup_finish_locks(&oit, dentry);
2127 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2129 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2130 obd_valid valid = OBD_MD_FLGETATTR;
2131 struct obd_capa *oc;
2134 if (S_ISREG(inode->i_mode)) {
2135 rc = ll_get_max_mdsize(sbi, &ealen);
2138 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2140 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2141 * capa for this inode. Because we only keep capas of dirs
2143 oc = ll_mdscapa_get(inode);
2144 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2148 rc = ll_inode_revalidate_fini(inode, rc);
2152 rc = ll_prep_inode(&inode, req, NULL);
2155 ptlrpc_req_finished(req);
2159 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2164 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2165 MDS_INODELOCK_LOOKUP);
2167 /* if object not yet allocated, don't validate size */
2168 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2171 /* cl_glimpse_size will prefer locally cached writes if they extend
2175 rc = cl_glimpse_size(dentry->d_inode);
2180 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2181 struct lookup_intent *it, struct kstat *stat)
2183 struct inode *inode = de->d_inode;
2186 res = ll_inode_revalidate_it(de, it);
2187 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2192 stat->dev = inode->i_sb->s_dev;
2193 stat->ino = inode->i_ino;
2194 stat->mode = inode->i_mode;
2195 stat->nlink = inode->i_nlink;
2196 stat->uid = inode->i_uid;
2197 stat->gid = inode->i_gid;
2198 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2199 stat->atime = inode->i_atime;
2200 stat->mtime = inode->i_mtime;
2201 stat->ctime = inode->i_ctime;
2202 #ifdef HAVE_INODE_BLKSIZE
2203 stat->blksize = inode->i_blksize;
2205 stat->blksize = 1 << inode->i_blkbits;
2208 ll_inode_size_lock(inode, 0);
2209 stat->size = i_size_read(inode);
2210 stat->blocks = inode->i_blocks;
2211 ll_inode_size_unlock(inode, 0);
2215 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2217 struct lookup_intent it = { .it_op = IT_GETATTR };
2219 return ll_getattr_it(mnt, de, &it, stat);
2222 #ifdef HAVE_LINUX_FIEMAP_H
2223 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2224 __u64 start, __u64 len)
2228 struct ll_user_fiemap *fiemap;
2229 unsigned int extent_count = fieinfo->fi_extents_max;
2231 num_bytes = sizeof(*fiemap) + (extent_count *
2232 sizeof(struct ll_fiemap_extent));
2233 OBD_VMALLOC(fiemap, num_bytes);
2238 fiemap->fm_flags = fieinfo->fi_flags;
2239 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2240 fiemap->fm_start = start;
2241 fiemap->fm_length = len;
2242 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2243 sizeof(struct ll_fiemap_extent));
2245 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2247 fieinfo->fi_flags = fiemap->fm_flags;
2248 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2249 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2250 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2252 OBD_VFREE(fiemap, num_bytes);
2259 int lustre_check_acl(struct inode *inode, int mask)
2261 #ifdef CONFIG_FS_POSIX_ACL
2262 struct ll_inode_info *lli = ll_i2info(inode);
2263 struct posix_acl *acl;
2267 cfs_spin_lock(&lli->lli_lock);
2268 acl = posix_acl_dup(lli->lli_posix_acl);
2269 cfs_spin_unlock(&lli->lli_lock);
2274 rc = posix_acl_permission(inode, acl, mask);
2275 posix_acl_release(acl);
2283 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2284 #ifndef HAVE_INODE_PERMISION_2ARGS
2285 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2287 int ll_inode_permission(struct inode *inode, int mask)
2293 /* as root inode are NOT getting validated in lookup operation,
2294 * need to do it before permission check. */
2296 if (inode == inode->i_sb->s_root->d_inode) {
2297 struct lookup_intent it = { .it_op = IT_LOOKUP };
2299 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2300 MDS_INODELOCK_LOOKUP);
2305 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2306 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2308 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2309 return lustre_check_remote_perm(inode, mask);
2311 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2312 rc = generic_permission(inode, mask, lustre_check_acl);
2317 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2319 int mode = inode->i_mode;
2322 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2323 inode->i_ino, inode->i_generation, inode, mask);
2325 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2326 return lustre_check_remote_perm(inode, mask);
2328 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2330 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2331 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2333 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2335 if (current->fsuid == inode->i_uid) {
2338 if (((mode >> 3) & mask & S_IRWXO) != mask)
2340 rc = lustre_check_acl(inode, mask);
2344 goto check_capabilities;
2348 if (cfs_curproc_is_in_groups(inode->i_gid))
2351 if ((mode & mask & S_IRWXO) == mask)
2355 if (!(mask & MAY_EXEC) ||
2356 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2357 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2360 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2361 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2368 #ifdef HAVE_FILE_READV
2369 #define READ_METHOD readv
2370 #define READ_FUNCTION ll_file_readv
2371 #define WRITE_METHOD writev
2372 #define WRITE_FUNCTION ll_file_writev
2374 #define READ_METHOD aio_read
2375 #define READ_FUNCTION ll_file_aio_read
2376 #define WRITE_METHOD aio_write
2377 #define WRITE_FUNCTION ll_file_aio_write
2380 /* -o localflock - only provides locally consistent flock locks */
2381 struct file_operations ll_file_operations = {
2382 .read = ll_file_read,
2383 .READ_METHOD = READ_FUNCTION,
2384 .write = ll_file_write,
2385 .WRITE_METHOD = WRITE_FUNCTION,
2386 .ioctl = ll_file_ioctl,
2387 .open = ll_file_open,
2388 .release = ll_file_release,
2389 .mmap = ll_file_mmap,
2390 .llseek = ll_file_seek,
2391 #ifdef HAVE_KERNEL_SENDFILE
2392 .sendfile = ll_file_sendfile,
2394 #ifdef HAVE_KERNEL_SPLICE_READ
2395 .splice_read = ll_file_splice_read,
2400 struct file_operations ll_file_operations_flock = {
2401 .read = ll_file_read,
2402 .READ_METHOD = READ_FUNCTION,
2403 .write = ll_file_write,
2404 .WRITE_METHOD = WRITE_FUNCTION,
2405 .ioctl = ll_file_ioctl,
2406 .open = ll_file_open,
2407 .release = ll_file_release,
2408 .mmap = ll_file_mmap,
2409 .llseek = ll_file_seek,
2410 #ifdef HAVE_KERNEL_SENDFILE
2411 .sendfile = ll_file_sendfile,
2413 #ifdef HAVE_KERNEL_SPLICE_READ
2414 .splice_read = ll_file_splice_read,
2417 #ifdef HAVE_F_OP_FLOCK
2418 .flock = ll_file_flock,
2420 .lock = ll_file_flock
2423 /* These are for -o noflock - to return ENOSYS on flock calls */
2424 struct file_operations ll_file_operations_noflock = {
2425 .read = ll_file_read,
2426 .READ_METHOD = READ_FUNCTION,
2427 .write = ll_file_write,
2428 .WRITE_METHOD = WRITE_FUNCTION,
2429 .ioctl = ll_file_ioctl,
2430 .open = ll_file_open,
2431 .release = ll_file_release,
2432 .mmap = ll_file_mmap,
2433 .llseek = ll_file_seek,
2434 #ifdef HAVE_KERNEL_SENDFILE
2435 .sendfile = ll_file_sendfile,
2437 #ifdef HAVE_KERNEL_SPLICE_READ
2438 .splice_read = ll_file_splice_read,
2441 #ifdef HAVE_F_OP_FLOCK
2442 .flock = ll_file_noflock,
2444 .lock = ll_file_noflock
2447 struct inode_operations ll_file_inode_operations = {
2448 #ifdef HAVE_VFS_INTENT_PATCHES
2449 .setattr_raw = ll_setattr_raw,
2451 .setattr = ll_setattr,
2452 .truncate = ll_truncate,
2453 .getattr = ll_getattr,
2454 .permission = ll_inode_permission,
2455 .setxattr = ll_setxattr,
2456 .getxattr = ll_getxattr,
2457 .listxattr = ll_listxattr,
2458 .removexattr = ll_removexattr,
2459 #ifdef HAVE_LINUX_FIEMAP_H
2460 .fiemap = ll_fiemap,
2464 /* dynamic ioctl number support routins */
2465 static struct llioc_ctl_data {
2466 cfs_rw_semaphore_t ioc_sem;
2467 cfs_list_t ioc_head;
2469 __RWSEM_INITIALIZER(llioc.ioc_sem),
2470 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2475 cfs_list_t iocd_list;
2476 unsigned int iocd_size;
2477 llioc_callback_t iocd_cb;
2478 unsigned int iocd_count;
2479 unsigned int iocd_cmd[0];
2482 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2485 struct llioc_data *in_data = NULL;
2488 if (cb == NULL || cmd == NULL ||
2489 count > LLIOC_MAX_CMD || count < 0)
2492 size = sizeof(*in_data) + count * sizeof(unsigned int);
2493 OBD_ALLOC(in_data, size);
2494 if (in_data == NULL)
2497 memset(in_data, 0, sizeof(*in_data));
2498 in_data->iocd_size = size;
2499 in_data->iocd_cb = cb;
2500 in_data->iocd_count = count;
2501 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2503 cfs_down_write(&llioc.ioc_sem);
2504 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2505 cfs_up_write(&llioc.ioc_sem);
2510 void ll_iocontrol_unregister(void *magic)
2512 struct llioc_data *tmp;
2517 cfs_down_write(&llioc.ioc_sem);
2518 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2520 unsigned int size = tmp->iocd_size;
2522 cfs_list_del(&tmp->iocd_list);
2523 cfs_up_write(&llioc.ioc_sem);
2525 OBD_FREE(tmp, size);
2529 cfs_up_write(&llioc.ioc_sem);
2531 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2534 EXPORT_SYMBOL(ll_iocontrol_register);
2535 EXPORT_SYMBOL(ll_iocontrol_unregister);
2537 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2538 unsigned int cmd, unsigned long arg, int *rcp)
2540 enum llioc_iter ret = LLIOC_CONT;
2541 struct llioc_data *data;
2542 int rc = -EINVAL, i;
2544 cfs_down_read(&llioc.ioc_sem);
2545 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2546 for (i = 0; i < data->iocd_count; i++) {
2547 if (cmd != data->iocd_cmd[i])
2550 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2554 if (ret == LLIOC_STOP)
2557 cfs_up_read(&llioc.ioc_sem);