1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
87 * Closes the IO epoch and packs all the attributes into @op_data for
90 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
91 struct obd_client_handle *och)
95 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
96 ATTR_MTIME_SET | ATTR_CTIME_SET;
98 if (!(och->och_flags & FMODE_WRITE))
101 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
102 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
104 ll_ioepoch_close(inode, op_data, &och, 0);
107 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
108 ll_prep_md_op_data(op_data, inode, NULL, NULL,
109 0, 0, LUSTRE_OPC_ANY, NULL);
113 static int ll_close_inode_openhandle(struct obd_export *md_exp,
115 struct obd_client_handle *och)
117 struct obd_export *exp = ll_i2mdexp(inode);
118 struct md_op_data *op_data;
119 struct ptlrpc_request *req = NULL;
120 struct obd_device *obd = class_exp2obd(exp);
127 * XXX: in case of LMV, is this correct to access
130 CERROR("Invalid MDC connection handle "LPX64"\n",
131 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_som_update(inode, op_data);
149 CERROR("inode %lu mdc Size-on-MDS update failed: "
150 "rc = %d\n", inode->i_ino, rc);
154 CERROR("inode %lu mdc close failed: rc = %d\n",
157 ll_finish_md_op_data(op_data);
160 rc = ll_objects_destroy(req, inode);
162 CERROR("inode %lu ll_objects destroy: rc = %d\n",
169 if (exp_connect_som(exp) && !epoch_close &&
170 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
171 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
173 md_clear_open_replay_data(md_exp, och);
174 /* Free @och if it is not waiting for DONE_WRITING. */
175 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
178 if (req) /* This is close request */
179 ptlrpc_req_finished(req);
183 int ll_md_real_close(struct inode *inode, int flags)
185 struct ll_inode_info *lli = ll_i2info(inode);
186 struct obd_client_handle **och_p;
187 struct obd_client_handle *och;
192 if (flags & FMODE_WRITE) {
193 och_p = &lli->lli_mds_write_och;
194 och_usecount = &lli->lli_open_fd_write_count;
195 } else if (flags & FMODE_EXEC) {
196 och_p = &lli->lli_mds_exec_och;
197 och_usecount = &lli->lli_open_fd_exec_count;
199 LASSERT(flags & FMODE_READ);
200 och_p = &lli->lli_mds_read_och;
201 och_usecount = &lli->lli_open_fd_read_count;
204 cfs_down(&lli->lli_och_sem);
205 if (*och_usecount) { /* There are still users of this handle, so
207 cfs_up(&lli->lli_och_sem);
212 cfs_up(&lli->lli_och_sem);
214 if (och) { /* There might be a race and somebody have freed this och
216 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
223 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
226 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
227 struct ll_inode_info *lli = ll_i2info(inode);
231 /* clear group lock, if present */
232 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
233 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
235 /* Let's see if we have good enough OPEN lock on the file and if
236 we can skip talking to MDS */
237 if (file->f_dentry->d_inode) { /* Can this ever be false? */
239 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
240 struct lustre_handle lockh;
241 struct inode *inode = file->f_dentry->d_inode;
242 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
244 cfs_down(&lli->lli_och_sem);
245 if (fd->fd_omode & FMODE_WRITE) {
247 LASSERT(lli->lli_open_fd_write_count);
248 lli->lli_open_fd_write_count--;
249 } else if (fd->fd_omode & FMODE_EXEC) {
251 LASSERT(lli->lli_open_fd_exec_count);
252 lli->lli_open_fd_exec_count--;
255 LASSERT(lli->lli_open_fd_read_count);
256 lli->lli_open_fd_read_count--;
258 cfs_up(&lli->lli_och_sem);
260 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
261 LDLM_IBITS, &policy, lockmode,
263 rc = ll_md_real_close(file->f_dentry->d_inode,
267 CERROR("Releasing a file %p with negative dentry %p. Name %s",
268 file, file->f_dentry, file->f_dentry->d_name.name);
271 LUSTRE_FPRIVATE(file) = NULL;
272 ll_file_data_put(fd);
273 ll_capa_close(inode);
278 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
280 /* While this returns an error code, fput() the caller does not, so we need
281 * to make every effort to clean up all of our state here. Also, applications
282 * rarely check close errors and even if an error is returned they will not
283 * re-try the close call.
285 int ll_file_release(struct inode *inode, struct file *file)
287 struct ll_file_data *fd;
288 struct ll_sb_info *sbi = ll_i2sbi(inode);
289 struct ll_inode_info *lli = ll_i2info(inode);
290 struct lov_stripe_md *lsm = lli->lli_smd;
294 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
295 inode->i_generation, inode);
297 #ifdef CONFIG_FS_POSIX_ACL
298 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
299 inode == inode->i_sb->s_root->d_inode) {
300 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
303 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
304 fd->fd_flags &= ~LL_FILE_RMTACL;
305 rct_del(&sbi->ll_rct, cfs_curproc_pid());
306 et_search_free(&sbi->ll_et, cfs_curproc_pid());
311 if (inode->i_sb->s_root != file->f_dentry)
312 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
313 fd = LUSTRE_FPRIVATE(file);
316 /* The last ref on @file, maybe not the the owner pid of statahead.
317 * Different processes can open the same dir, "ll_opendir_key" means:
318 * it is me that should stop the statahead thread. */
319 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
320 ll_stop_statahead(inode, lli->lli_opendir_key);
322 if (inode->i_sb->s_root == file->f_dentry) {
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
329 lov_test_and_clear_async_rc(lsm);
330 lli->lli_async_rc = 0;
332 rc = ll_md_close(sbi->ll_md_exp, inode, file);
334 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
335 libcfs_debug_dumplog();
340 static int ll_intent_file_open(struct file *file, void *lmm,
341 int lmmsize, struct lookup_intent *itp)
343 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
344 struct dentry *parent = file->f_dentry->d_parent;
345 const char *name = file->f_dentry->d_name.name;
346 const int len = file->f_dentry->d_name.len;
347 struct md_op_data *op_data;
348 struct ptlrpc_request *req;
355 /* Usually we come here only for NFSD, and we want open lock.
356 But we can also get here with pre 2.6.15 patchless kernels, and in
357 that case that lock is also ok */
358 /* We can also get here if there was cached open handle in revalidate_it
359 * but it disappeared while we were getting from there to ll_file_open.
360 * But this means this file was closed and immediatelly opened which
361 * makes a good candidate for using OPEN lock */
362 /* If lmmsize & lmm are not 0, we are just setting stripe info
363 * parameters. No need for the open lock */
364 if (!lmm && !lmmsize)
365 itp->it_flags |= MDS_OPEN_LOCK;
367 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
368 file->f_dentry->d_inode, name, len,
369 O_RDWR, LUSTRE_OPC_ANY, NULL);
371 RETURN(PTR_ERR(op_data));
373 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
374 0 /*unused */, &req, ll_md_blocking_ast, 0);
375 ll_finish_md_op_data(op_data);
377 /* reason for keep own exit path - don`t flood log
378 * with messages with -ESTALE errors.
380 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
381 it_open_error(DISP_OPEN_OPEN, itp))
383 ll_release_openhandle(file->f_dentry, itp);
387 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
388 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
389 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
393 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
394 if (!rc && itp->d.lustre.it_lock_mode)
395 md_set_lock_data(sbi->ll_md_exp,
396 &itp->d.lustre.it_lock_handle,
397 file->f_dentry->d_inode, NULL);
400 ptlrpc_req_finished(itp->d.lustre.it_data);
401 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
402 ll_intent_drop_lock(itp);
408 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
409 * not believe attributes if a few ioepoch holders exist. Attributes for
410 * previous ioepoch if new one is opened are also skipped by MDS.
412 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
414 if (ioepoch && lli->lli_ioepoch != ioepoch) {
415 lli->lli_ioepoch = ioepoch;
416 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
417 ioepoch, PFID(&lli->lli_fid));
421 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
422 struct lookup_intent *it, struct obd_client_handle *och)
424 struct ptlrpc_request *req = it->d.lustre.it_data;
425 struct mdt_body *body;
429 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
430 LASSERT(body != NULL); /* reply already checked out */
432 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
433 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
434 och->och_fid = lli->lli_fid;
435 och->och_flags = it->it_flags;
436 ll_ioepoch_open(lli, body->ioepoch);
438 return md_set_open_replay_data(md_exp, och, req);
441 int ll_local_open(struct file *file, struct lookup_intent *it,
442 struct ll_file_data *fd, struct obd_client_handle *och)
444 struct inode *inode = file->f_dentry->d_inode;
445 struct ll_inode_info *lli = ll_i2info(inode);
448 LASSERT(!LUSTRE_FPRIVATE(file));
453 struct ptlrpc_request *req = it->d.lustre.it_data;
454 struct mdt_body *body;
457 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
461 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
462 if ((it->it_flags & FMODE_WRITE) &&
463 (body->valid & OBD_MD_FLSIZE))
464 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
465 lli->lli_ioepoch, PFID(&lli->lli_fid));
468 LUSTRE_FPRIVATE(file) = fd;
469 ll_readahead_init(inode, &fd->fd_ras);
470 fd->fd_omode = it->it_flags;
474 /* Open a file, and (for the very first open) create objects on the OSTs at
475 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
476 * creation or open until ll_lov_setstripe() ioctl is called. We grab
477 * lli_open_sem to ensure no other process will create objects, send the
478 * stripe MD to the MDS, or try to destroy the objects if that fails.
480 * If we already have the stripe MD locally then we don't request it in
481 * md_open(), by passing a lmm_size = 0.
483 * It is up to the application to ensure no other processes open this file
484 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
485 * used. We might be able to avoid races of that sort by getting lli_open_sem
486 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
487 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
489 int ll_file_open(struct inode *inode, struct file *file)
491 struct ll_inode_info *lli = ll_i2info(inode);
492 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
493 .it_flags = file->f_flags };
494 struct lov_stripe_md *lsm;
495 struct ptlrpc_request *req = NULL;
496 struct obd_client_handle **och_p;
498 struct ll_file_data *fd;
499 int rc = 0, opendir_set = 0;
502 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
503 inode->i_generation, inode, file->f_flags);
505 #ifdef HAVE_VFS_INTENT_PATCHES
508 it = file->private_data; /* XXX: compat macro */
509 file->private_data = NULL; /* prevent ll_local_open assertion */
512 fd = ll_file_data_get();
517 if (S_ISDIR(inode->i_mode)) {
518 cfs_spin_lock(&lli->lli_sa_lock);
519 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
520 LASSERT(lli->lli_sai == NULL);
521 lli->lli_opendir_key = fd;
522 lli->lli_opendir_pid = cfs_curproc_pid();
525 cfs_spin_unlock(&lli->lli_sa_lock);
528 if (inode->i_sb->s_root == file->f_dentry) {
529 LUSTRE_FPRIVATE(file) = fd;
533 if (!it || !it->d.lustre.it_disposition) {
534 /* Convert f_flags into access mode. We cannot use file->f_mode,
535 * because everything but O_ACCMODE mask was stripped from
537 if ((oit.it_flags + 1) & O_ACCMODE)
539 if (file->f_flags & O_TRUNC)
540 oit.it_flags |= FMODE_WRITE;
542 /* kernel only call f_op->open in dentry_open. filp_open calls
543 * dentry_open after call to open_namei that checks permissions.
544 * Only nfsd_open call dentry_open directly without checking
545 * permissions and because of that this code below is safe. */
546 if (oit.it_flags & FMODE_WRITE)
547 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
549 /* We do not want O_EXCL here, presumably we opened the file
550 * already? XXX - NFS implications? */
551 oit.it_flags &= ~O_EXCL;
553 /* bug20584, if "it_flags" contains O_CREAT, the file will be
554 * created if necessary, then "IT_CREAT" should be set to keep
555 * consistent with it */
556 if (oit.it_flags & O_CREAT)
557 oit.it_op |= IT_CREAT;
563 /* Let's see if we have file open on MDS already. */
564 if (it->it_flags & FMODE_WRITE) {
565 och_p = &lli->lli_mds_write_och;
566 och_usecount = &lli->lli_open_fd_write_count;
567 } else if (it->it_flags & FMODE_EXEC) {
568 och_p = &lli->lli_mds_exec_och;
569 och_usecount = &lli->lli_open_fd_exec_count;
571 och_p = &lli->lli_mds_read_och;
572 och_usecount = &lli->lli_open_fd_read_count;
575 cfs_down(&lli->lli_och_sem);
576 if (*och_p) { /* Open handle is present */
577 if (it_disposition(it, DISP_OPEN_OPEN)) {
578 /* Well, there's extra open request that we do not need,
579 let's close it somehow. This will decref request. */
580 rc = it_open_error(DISP_OPEN_OPEN, it);
582 cfs_up(&lli->lli_och_sem);
583 ll_file_data_put(fd);
584 GOTO(out_openerr, rc);
586 ll_release_openhandle(file->f_dentry, it);
587 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
592 rc = ll_local_open(file, it, fd, NULL);
595 cfs_up(&lli->lli_och_sem);
596 ll_file_data_put(fd);
597 GOTO(out_openerr, rc);
600 LASSERT(*och_usecount == 0);
601 if (!it->d.lustre.it_disposition) {
602 /* We cannot just request lock handle now, new ELC code
603 means that one of other OPEN locks for this file
604 could be cancelled, and since blocking ast handler
605 would attempt to grab och_sem as well, that would
606 result in a deadlock */
607 cfs_up(&lli->lli_och_sem);
608 it->it_create_mode |= M_CHECK_STALE;
609 rc = ll_intent_file_open(file, NULL, 0, it);
610 it->it_create_mode &= ~M_CHECK_STALE;
612 ll_file_data_put(fd);
613 GOTO(out_openerr, rc);
616 /* Got some error? Release the request */
617 if (it->d.lustre.it_status < 0) {
618 req = it->d.lustre.it_data;
619 ptlrpc_req_finished(req);
623 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
625 ll_file_data_put(fd);
626 GOTO(out_och_free, rc = -ENOMEM);
629 req = it->d.lustre.it_data;
631 /* md_intent_lock() didn't get a request ref if there was an
632 * open error, so don't do cleanup on the request here
634 /* XXX (green): Should not we bail out on any error here, not
635 * just open error? */
636 rc = it_open_error(DISP_OPEN_OPEN, it);
638 ll_file_data_put(fd);
639 GOTO(out_och_free, rc);
642 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
643 rc = ll_local_open(file, it, fd, *och_p);
645 ll_file_data_put(fd);
646 GOTO(out_och_free, rc);
649 cfs_up(&lli->lli_och_sem);
651 /* Must do this outside lli_och_sem lock to prevent deadlock where
652 different kind of OPEN lock for this same inode gets cancelled
653 by ldlm_cancel_lru */
654 if (!S_ISREG(inode->i_mode))
661 if (file->f_flags & O_LOV_DELAY_CREATE ||
662 !(file->f_mode & FMODE_WRITE)) {
663 CDEBUG(D_INODE, "object creation was delayed\n");
667 file->f_flags &= ~O_LOV_DELAY_CREATE;
670 ptlrpc_req_finished(req);
672 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
676 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
677 *och_p = NULL; /* OBD_FREE writes some magic there */
680 cfs_up(&lli->lli_och_sem);
682 if (opendir_set != 0)
683 ll_stop_statahead(inode, lli->lli_opendir_key);
689 /* Fills the obdo with the attributes for the lsm */
690 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
691 struct obd_capa *capa, struct obdo *obdo,
692 __u64 ioepoch, int sync)
694 struct ptlrpc_request_set *set;
695 struct obd_info oinfo = { { { 0 } } };
700 LASSERT(lsm != NULL);
704 oinfo.oi_oa->o_id = lsm->lsm_object_id;
705 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
706 oinfo.oi_oa->o_mode = S_IFREG;
707 oinfo.oi_oa->o_ioepoch = ioepoch;
708 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
709 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
710 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
711 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
712 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
713 oinfo.oi_capa = capa;
715 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
716 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
719 set = ptlrpc_prep_set();
721 CERROR("can't allocate ptlrpc set\n");
724 rc = obd_getattr_async(exp, &oinfo, set);
726 rc = ptlrpc_set_wait(set);
727 ptlrpc_set_destroy(set);
730 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
731 OBD_MD_FLATIME | OBD_MD_FLMTIME |
732 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
737 * Performs the getattr on the inode and updates its fields.
738 * If @sync != 0, perform the getattr under the server-side lock.
740 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
741 __u64 ioepoch, int sync)
743 struct ll_inode_info *lli = ll_i2info(inode);
744 struct obd_capa *capa = ll_mdscapa_get(inode);
748 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
749 capa, obdo, ioepoch, sync);
752 obdo_refresh_inode(inode, obdo, obdo->o_valid);
754 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
755 lli->lli_smd->lsm_object_id, i_size_read(inode),
756 (unsigned long long)inode->i_blocks,
757 (unsigned long)ll_inode_blksize(inode));
762 int ll_merge_lvb(struct inode *inode)
764 struct ll_inode_info *lli = ll_i2info(inode);
765 struct ll_sb_info *sbi = ll_i2sbi(inode);
771 ll_inode_size_lock(inode, 1);
772 inode_init_lvb(inode, &lvb);
774 /* merge timestamps the most resently obtained from mds with
775 timestamps obtained from osts */
776 lvb.lvb_atime = lli->lli_lvb.lvb_atime;
777 lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
778 lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
779 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
780 cl_isize_write_nolock(inode, lvb.lvb_size);
781 inode->i_blocks = lvb.lvb_blocks;
783 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
784 LTIME_S(inode->i_atime) = lvb.lvb_atime;
785 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
786 ll_inode_size_unlock(inode, 1);
791 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
794 struct obdo obdo = { 0 };
797 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
799 st->st_size = obdo.o_size;
800 st->st_blocks = obdo.o_blocks;
801 st->st_mtime = obdo.o_mtime;
802 st->st_atime = obdo.o_atime;
803 st->st_ctime = obdo.o_ctime;
808 void ll_io_init(struct cl_io *io, const struct file *file, int write)
810 struct inode *inode = file->f_dentry->d_inode;
812 memset(io, 0, sizeof *io);
813 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
815 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
816 io->ci_obj = ll_i2info(inode)->lli_clob;
817 io->ci_lockreq = CILR_MAYBE;
818 if (ll_file_nolock(file)) {
819 io->ci_lockreq = CILR_NEVER;
820 io->ci_no_srvlock = 1;
821 } else if (file->f_flags & O_APPEND) {
822 io->ci_lockreq = CILR_MANDATORY;
826 static ssize_t ll_file_io_generic(const struct lu_env *env,
827 struct vvp_io_args *args, struct file *file,
828 enum cl_io_type iot, loff_t *ppos, size_t count)
834 io = &ccc_env_info(env)->cti_io;
835 ll_io_init(io, file, iot == CIT_WRITE);
837 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
838 struct vvp_io *vio = vvp_env_io(env);
839 struct ccc_io *cio = ccc_env_io(env);
840 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
841 int write_sem_locked = 0;
843 cio->cui_fd = LUSTRE_FPRIVATE(file);
844 vio->cui_io_subtype = args->via_io_subtype;
846 switch (vio->cui_io_subtype) {
848 cio->cui_iov = args->u.normal.via_iov;
849 cio->cui_nrsegs = args->u.normal.via_nrsegs;
850 cio->cui_tot_nrsegs = cio->cui_nrsegs;
851 #ifndef HAVE_FILE_WRITEV
852 cio->cui_iocb = args->u.normal.via_iocb;
854 if ((iot == CIT_WRITE) &&
855 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
856 cfs_down(&lli->lli_write_sem);
857 write_sem_locked = 1;
861 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
862 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
865 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
866 vio->u.splice.cui_flags = args->u.splice.via_flags;
869 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
872 result = cl_io_loop(env, io);
873 if (write_sem_locked)
874 cfs_up(&lli->lli_write_sem);
876 /* cl_io_rw_init() handled IO */
877 result = io->ci_result;
880 if (io->ci_nob > 0) {
882 *ppos = io->u.ci_wr.wr.crw_pos;
890 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
892 static int ll_file_get_iov_count(const struct iovec *iov,
893 unsigned long *nr_segs, size_t *count)
898 for (seg = 0; seg < *nr_segs; seg++) {
899 const struct iovec *iv = &iov[seg];
902 * If any segment has a negative length, or the cumulative
903 * length ever wraps negative then return -EINVAL.
906 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
908 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
913 cnt -= iv->iov_len; /* This segment is no good */
920 #ifdef HAVE_FILE_READV
921 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
922 unsigned long nr_segs, loff_t *ppos)
925 struct vvp_io_args *args;
931 result = ll_file_get_iov_count(iov, &nr_segs, &count);
935 env = cl_env_get(&refcheck);
937 RETURN(PTR_ERR(env));
939 args = vvp_env_args(env, IO_NORMAL);
940 args->u.normal.via_iov = (struct iovec *)iov;
941 args->u.normal.via_nrsegs = nr_segs;
943 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
944 cl_env_put(env, &refcheck);
948 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
952 struct iovec *local_iov;
957 env = cl_env_get(&refcheck);
959 RETURN(PTR_ERR(env));
961 local_iov = &vvp_env_info(env)->vti_local_iov;
962 local_iov->iov_base = (void __user *)buf;
963 local_iov->iov_len = count;
964 result = ll_file_readv(file, local_iov, 1, ppos);
965 cl_env_put(env, &refcheck);
970 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
971 unsigned long nr_segs, loff_t pos)
974 struct vvp_io_args *args;
980 result = ll_file_get_iov_count(iov, &nr_segs, &count);
984 env = cl_env_get(&refcheck);
986 RETURN(PTR_ERR(env));
988 args = vvp_env_args(env, IO_NORMAL);
989 args->u.normal.via_iov = (struct iovec *)iov;
990 args->u.normal.via_nrsegs = nr_segs;
991 args->u.normal.via_iocb = iocb;
993 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
994 &iocb->ki_pos, count);
995 cl_env_put(env, &refcheck);
999 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1003 struct iovec *local_iov;
1004 struct kiocb *kiocb;
1009 env = cl_env_get(&refcheck);
1011 RETURN(PTR_ERR(env));
1013 local_iov = &vvp_env_info(env)->vti_local_iov;
1014 kiocb = &vvp_env_info(env)->vti_kiocb;
1015 local_iov->iov_base = (void __user *)buf;
1016 local_iov->iov_len = count;
1017 init_sync_kiocb(kiocb, file);
1018 kiocb->ki_pos = *ppos;
1019 kiocb->ki_left = count;
1021 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1022 *ppos = kiocb->ki_pos;
1024 cl_env_put(env, &refcheck);
1030 * Write to a file (through the page cache).
1032 #ifdef HAVE_FILE_WRITEV
1033 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1034 unsigned long nr_segs, loff_t *ppos)
1037 struct vvp_io_args *args;
1043 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1047 env = cl_env_get(&refcheck);
1049 RETURN(PTR_ERR(env));
1051 args = vvp_env_args(env, IO_NORMAL);
1052 args->u.normal.via_iov = (struct iovec *)iov;
1053 args->u.normal.via_nrsegs = nr_segs;
1055 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1056 cl_env_put(env, &refcheck);
1060 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1064 struct iovec *local_iov;
1069 env = cl_env_get(&refcheck);
1071 RETURN(PTR_ERR(env));
1073 local_iov = &vvp_env_info(env)->vti_local_iov;
1074 local_iov->iov_base = (void __user *)buf;
1075 local_iov->iov_len = count;
1077 result = ll_file_writev(file, local_iov, 1, ppos);
1078 cl_env_put(env, &refcheck);
1082 #else /* AIO stuff */
1083 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1084 unsigned long nr_segs, loff_t pos)
1087 struct vvp_io_args *args;
1093 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1097 env = cl_env_get(&refcheck);
1099 RETURN(PTR_ERR(env));
1101 args = vvp_env_args(env, IO_NORMAL);
1102 args->u.normal.via_iov = (struct iovec *)iov;
1103 args->u.normal.via_nrsegs = nr_segs;
1104 args->u.normal.via_iocb = iocb;
1106 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1107 &iocb->ki_pos, count);
1108 cl_env_put(env, &refcheck);
1112 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1116 struct iovec *local_iov;
1117 struct kiocb *kiocb;
1122 env = cl_env_get(&refcheck);
1124 RETURN(PTR_ERR(env));
1126 local_iov = &vvp_env_info(env)->vti_local_iov;
1127 kiocb = &vvp_env_info(env)->vti_kiocb;
1128 local_iov->iov_base = (void __user *)buf;
1129 local_iov->iov_len = count;
1130 init_sync_kiocb(kiocb, file);
1131 kiocb->ki_pos = *ppos;
1132 kiocb->ki_left = count;
1134 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1135 *ppos = kiocb->ki_pos;
1137 cl_env_put(env, &refcheck);
1143 #ifdef HAVE_KERNEL_SENDFILE
1145 * Send file content (through pagecache) somewhere with helper
1147 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1148 read_actor_t actor, void *target)
1151 struct vvp_io_args *args;
1156 env = cl_env_get(&refcheck);
1158 RETURN(PTR_ERR(env));
1160 args = vvp_env_args(env, IO_SENDFILE);
1161 args->u.sendfile.via_target = target;
1162 args->u.sendfile.via_actor = actor;
1164 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1165 cl_env_put(env, &refcheck);
1170 #ifdef HAVE_KERNEL_SPLICE_READ
1172 * Send file content (through pagecache) somewhere with helper
1174 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1175 struct pipe_inode_info *pipe, size_t count,
1179 struct vvp_io_args *args;
1184 env = cl_env_get(&refcheck);
1186 RETURN(PTR_ERR(env));
1188 args = vvp_env_args(env, IO_SPLICE);
1189 args->u.splice.via_pipe = pipe;
1190 args->u.splice.via_flags = flags;
1192 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1193 cl_env_put(env, &refcheck);
1198 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1201 struct obd_export *exp = ll_i2dtexp(inode);
1202 struct ll_recreate_obj ucreatp;
1203 struct obd_trans_info oti = { 0 };
1204 struct obdo *oa = NULL;
1207 struct lov_stripe_md *lsm, *lsm2;
1210 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1213 if (cfs_copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1214 sizeof(struct ll_recreate_obj)))
1221 ll_inode_size_lock(inode, 0);
1222 lsm = ll_i2info(inode)->lli_smd;
1224 GOTO(out, rc = -ENOENT);
1225 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1226 (lsm->lsm_stripe_count));
1228 OBD_ALLOC(lsm2, lsm_size);
1230 GOTO(out, rc = -ENOMEM);
1232 oa->o_id = ucreatp.lrc_id;
1233 oa->o_seq = ucreatp.lrc_seq;
1234 oa->o_nlink = ucreatp.lrc_ost_idx;
1235 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1236 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1237 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
1238 OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1239 memcpy(lsm2, lsm, lsm_size);
1240 rc = obd_create(exp, oa, &lsm2, &oti);
1242 OBD_FREE(lsm2, lsm_size);
1245 ll_inode_size_unlock(inode, 0);
1250 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1251 int flags, struct lov_user_md *lum, int lum_size)
1253 struct lov_stripe_md *lsm;
1254 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1258 ll_inode_size_lock(inode, 0);
1259 lsm = ll_i2info(inode)->lli_smd;
1261 ll_inode_size_unlock(inode, 0);
1262 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1267 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1270 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1271 GOTO(out_req_free, rc = -ENOENT);
1272 rc = oit.d.lustre.it_status;
1274 GOTO(out_req_free, rc);
1276 ll_release_openhandle(file->f_dentry, &oit);
1279 ll_inode_size_unlock(inode, 0);
1280 ll_intent_release(&oit);
1283 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1287 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1288 struct lov_mds_md **lmmp, int *lmm_size,
1289 struct ptlrpc_request **request)
1291 struct ll_sb_info *sbi = ll_i2sbi(inode);
1292 struct mdt_body *body;
1293 struct lov_mds_md *lmm = NULL;
1294 struct ptlrpc_request *req = NULL;
1295 struct md_op_data *op_data;
1298 rc = ll_get_max_mdsize(sbi, &lmmsize);
1302 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1303 strlen(filename), lmmsize,
1304 LUSTRE_OPC_ANY, NULL);
1305 if (op_data == NULL)
1308 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1309 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1310 ll_finish_md_op_data(op_data);
1312 CDEBUG(D_INFO, "md_getattr_name failed "
1313 "on %s: rc %d\n", filename, rc);
1317 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1318 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1320 lmmsize = body->eadatasize;
1322 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1324 GOTO(out, rc = -ENODATA);
1327 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1328 LASSERT(lmm != NULL);
1330 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1331 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1332 GOTO(out, rc = -EPROTO);
1336 * This is coming from the MDS, so is probably in
1337 * little endian. We convert it to host endian before
1338 * passing it to userspace.
1340 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1341 /* if function called for directory - we should
1342 * avoid swab not existent lsm objects */
1343 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1344 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1345 if (S_ISREG(body->mode))
1346 lustre_swab_lov_user_md_objects(
1347 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1348 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1349 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1350 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1351 if (S_ISREG(body->mode))
1352 lustre_swab_lov_user_md_objects(
1353 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1354 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1360 *lmm_size = lmmsize;
1365 static int ll_lov_setea(struct inode *inode, struct file *file,
1368 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1369 struct lov_user_md *lump;
1370 int lum_size = sizeof(struct lov_user_md) +
1371 sizeof(struct lov_user_ost_data);
1375 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1378 OBD_ALLOC(lump, lum_size);
1382 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1383 OBD_FREE(lump, lum_size);
1387 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1389 OBD_FREE(lump, lum_size);
1393 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1396 struct lov_user_md_v3 lumv3;
1397 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1398 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1399 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1402 int flags = FMODE_WRITE;
1405 /* first try with v1 which is smaller than v3 */
1406 lum_size = sizeof(struct lov_user_md_v1);
1407 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1410 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1411 lum_size = sizeof(struct lov_user_md_v3);
1412 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1416 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1418 put_user(0, &lumv1p->lmm_stripe_count);
1419 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1420 0, ll_i2info(inode)->lli_smd,
1426 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1428 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1433 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1437 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1439 struct ll_inode_info *lli = ll_i2info(inode);
1440 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1441 struct ccc_grouplock grouplock;
1445 if (ll_file_nolock(file))
1446 RETURN(-EOPNOTSUPP);
1448 cfs_spin_lock(&lli->lli_lock);
1449 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1450 CWARN("group lock already existed with gid %lu\n",
1451 fd->fd_grouplock.cg_gid);
1452 cfs_spin_unlock(&lli->lli_lock);
1455 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1456 cfs_spin_unlock(&lli->lli_lock);
1458 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1459 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1463 cfs_spin_lock(&lli->lli_lock);
1464 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1465 cfs_spin_unlock(&lli->lli_lock);
1466 CERROR("another thread just won the race\n");
1467 cl_put_grouplock(&grouplock);
1471 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1472 fd->fd_grouplock = grouplock;
1473 cfs_spin_unlock(&lli->lli_lock);
1475 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1479 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1481 struct ll_inode_info *lli = ll_i2info(inode);
1482 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1483 struct ccc_grouplock grouplock;
1486 cfs_spin_lock(&lli->lli_lock);
1487 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1488 cfs_spin_unlock(&lli->lli_lock);
1489 CWARN("no group lock held\n");
1492 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1494 if (fd->fd_grouplock.cg_gid != arg) {
1495 CWARN("group lock %lu doesn't match current id %lu\n",
1496 arg, fd->fd_grouplock.cg_gid);
1497 cfs_spin_unlock(&lli->lli_lock);
1501 grouplock = fd->fd_grouplock;
1502 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1503 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1504 cfs_spin_unlock(&lli->lli_lock);
1506 cl_put_grouplock(&grouplock);
1507 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1512 * Close inode open handle
1514 * \param dentry [in] dentry which contains the inode
1515 * \param it [in,out] intent which contains open info and result
1518 * \retval <0 failure
1520 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1522 struct inode *inode = dentry->d_inode;
1523 struct obd_client_handle *och;
1529 /* Root ? Do nothing. */
1530 if (dentry->d_inode->i_sb->s_root == dentry)
1533 /* No open handle to close? Move away */
1534 if (!it_disposition(it, DISP_OPEN_OPEN))
1537 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1539 OBD_ALLOC(och, sizeof(*och));
1541 GOTO(out, rc = -ENOMEM);
1543 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1544 ll_i2info(inode), it, och);
1546 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1549 /* this one is in place of ll_file_open */
1550 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1551 ptlrpc_req_finished(it->d.lustre.it_data);
1552 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1557 * Get size for inode for which FIEMAP mapping is requested.
1558 * Make the FIEMAP get_info call and returns the result.
1560 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1563 struct obd_export *exp = ll_i2dtexp(inode);
1564 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1565 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1566 int vallen = num_bytes;
1570 /* Checks for fiemap flags */
1571 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1572 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1576 /* Check for FIEMAP_FLAG_SYNC */
1577 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1578 rc = filemap_fdatawrite(inode->i_mapping);
1583 /* If the stripe_count > 1 and the application does not understand
1584 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1586 if (lsm->lsm_stripe_count > 1 &&
1587 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1590 fm_key.oa.o_id = lsm->lsm_object_id;
1591 fm_key.oa.o_seq = lsm->lsm_object_seq;
1592 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1594 obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
1596 /* If filesize is 0, then there would be no objects for mapping */
1597 if (fm_key.oa.o_size == 0) {
1598 fiemap->fm_mapped_extents = 0;
1602 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1604 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1606 CERROR("obd_get_info failed: rc = %d\n", rc);
1611 int ll_fid2path(struct obd_export *exp, void *arg)
1613 struct getinfo_fid2path *gfout, *gfin;
1617 /* Need to get the buflen */
1618 OBD_ALLOC_PTR(gfin);
1621 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1626 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1627 OBD_ALLOC(gfout, outsize);
1628 if (gfout == NULL) {
1632 memcpy(gfout, gfin, sizeof(*gfout));
1635 /* Call mdc_iocontrol */
1636 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1639 if (cfs_copy_to_user(arg, gfout, outsize))
1643 OBD_FREE(gfout, outsize);
1647 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1649 struct ll_user_fiemap *fiemap_s;
1650 size_t num_bytes, ret_bytes;
1651 unsigned int extent_count;
1654 /* Get the extent count so we can calculate the size of
1655 * required fiemap buffer */
1656 if (get_user(extent_count,
1657 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1659 num_bytes = sizeof(*fiemap_s) + (extent_count *
1660 sizeof(struct ll_fiemap_extent));
1662 OBD_VMALLOC(fiemap_s, num_bytes);
1663 if (fiemap_s == NULL)
1666 /* get the fiemap value */
1667 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1669 GOTO(error, rc = -EFAULT);
1671 /* If fm_extent_count is non-zero, read the first extent since
1672 * it is used to calculate end_offset and device from previous
1675 if (copy_from_user(&fiemap_s->fm_extents[0],
1676 (char __user *)arg + sizeof(*fiemap_s),
1677 sizeof(struct ll_fiemap_extent)))
1678 GOTO(error, rc = -EFAULT);
1681 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1685 ret_bytes = sizeof(struct ll_user_fiemap);
1687 if (extent_count != 0)
1688 ret_bytes += (fiemap_s->fm_mapped_extents *
1689 sizeof(struct ll_fiemap_extent));
1691 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1695 OBD_VFREE(fiemap_s, num_bytes);
1699 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1702 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1706 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1707 inode->i_generation, inode, cmd);
1708 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1710 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1711 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1715 case LL_IOC_GETFLAGS:
1716 /* Get the current value of the file flags */
1717 return put_user(fd->fd_flags, (int *)arg);
1718 case LL_IOC_SETFLAGS:
1719 case LL_IOC_CLRFLAGS:
1720 /* Set or clear specific file flags */
1721 /* XXX This probably needs checks to ensure the flags are
1722 * not abused, and to handle any flag side effects.
1724 if (get_user(flags, (int *) arg))
1727 if (cmd == LL_IOC_SETFLAGS) {
1728 if ((flags & LL_FILE_IGNORE_LOCK) &&
1729 !(file->f_flags & O_DIRECT)) {
1730 CERROR("%s: unable to disable locking on "
1731 "non-O_DIRECT file\n", current->comm);
1735 fd->fd_flags |= flags;
1737 fd->fd_flags &= ~flags;
1740 case LL_IOC_LOV_SETSTRIPE:
1741 RETURN(ll_lov_setstripe(inode, file, arg));
1742 case LL_IOC_LOV_SETEA:
1743 RETURN(ll_lov_setea(inode, file, arg));
1744 case LL_IOC_LOV_GETSTRIPE:
1745 RETURN(ll_lov_getstripe(inode, arg));
1746 case LL_IOC_RECREATE_OBJ:
1747 RETURN(ll_lov_recreate_obj(inode, file, arg));
1748 case FSFILT_IOC_FIEMAP:
1749 RETURN(ll_ioctl_fiemap(inode, arg));
1750 case FSFILT_IOC_GETFLAGS:
1751 case FSFILT_IOC_SETFLAGS:
1752 RETURN(ll_iocontrol(inode, file, cmd, arg));
1753 case FSFILT_IOC_GETVERSION_OLD:
1754 case FSFILT_IOC_GETVERSION:
1755 RETURN(put_user(inode->i_generation, (int *)arg));
1756 case LL_IOC_GROUP_LOCK:
1757 RETURN(ll_get_grouplock(inode, file, arg));
1758 case LL_IOC_GROUP_UNLOCK:
1759 RETURN(ll_put_grouplock(inode, file, arg));
1760 case IOC_OBD_STATFS:
1761 RETURN(ll_obd_statfs(inode, (void *)arg));
1763 /* We need to special case any other ioctls we want to handle,
1764 * to send them to the MDS/OST as appropriate and to properly
1765 * network encode the arg field.
1766 case FSFILT_IOC_SETVERSION_OLD:
1767 case FSFILT_IOC_SETVERSION:
1769 case LL_IOC_FLUSHCTX:
1770 RETURN(ll_flush_ctx(inode));
1771 case LL_IOC_PATH2FID: {
1772 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1773 sizeof(struct lu_fid)))
1778 case OBD_IOC_FID2PATH:
1779 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1781 case LL_IOC_GET_MDTIDX: {
1784 mdtidx = ll_get_mdt_idx(inode);
1788 if (put_user((int)mdtidx, (int*)arg))
1798 ll_iocontrol_call(inode, file, cmd, arg, &err))
1801 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1807 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1809 struct inode *inode = file->f_dentry->d_inode;
1812 retval = offset + ((origin == 2) ? i_size_read(inode) :
1813 (origin == 1) ? file->f_pos : 0);
1814 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1815 inode->i_ino, inode->i_generation, inode, retval, retval,
1816 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1817 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1819 if (origin == 2) { /* SEEK_END */
1820 int nonblock = 0, rc;
1822 if (file->f_flags & O_NONBLOCK)
1823 nonblock = LDLM_FL_BLOCK_NOWAIT;
1825 rc = cl_glimpse_size(inode);
1829 offset += i_size_read(inode);
1830 } else if (origin == 1) { /* SEEK_CUR */
1831 offset += file->f_pos;
1835 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1836 if (offset != file->f_pos) {
1837 file->f_pos = offset;
1845 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1847 struct inode *inode = dentry->d_inode;
1848 struct ll_inode_info *lli = ll_i2info(inode);
1849 struct lov_stripe_md *lsm = lli->lli_smd;
1850 struct ptlrpc_request *req;
1851 struct obd_capa *oc;
1854 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1855 inode->i_generation, inode);
1856 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1858 /* fsync's caller has already called _fdata{sync,write}, we want
1859 * that IO to finish before calling the osc and mdc sync methods */
1860 rc = filemap_fdatawait(inode->i_mapping);
1862 /* catch async errors that were recorded back when async writeback
1863 * failed for pages in this mapping. */
1864 err = lli->lli_async_rc;
1865 lli->lli_async_rc = 0;
1869 err = lov_test_and_clear_async_rc(lsm);
1874 oc = ll_mdscapa_get(inode);
1875 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1881 ptlrpc_req_finished(req);
1888 RETURN(rc ? rc : -ENOMEM);
1890 oa->o_id = lsm->lsm_object_id;
1891 oa->o_seq = lsm->lsm_object_seq;
1892 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1893 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid,
1894 OBD_MD_FLTYPE | OBD_MD_FLATIME |
1895 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1898 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1899 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1900 0, OBD_OBJECT_EOF, oc);
1910 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1912 struct inode *inode = file->f_dentry->d_inode;
1913 struct ll_sb_info *sbi = ll_i2sbi(inode);
1914 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1915 .ei_cb_cp =ldlm_flock_completion_ast,
1916 .ei_cbdata = file_lock };
1917 struct md_op_data *op_data;
1918 struct lustre_handle lockh = {0};
1919 ldlm_policy_data_t flock;
1924 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1925 inode->i_ino, file_lock);
1927 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1929 if (file_lock->fl_flags & FL_FLOCK) {
1930 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1931 /* set missing params for flock() calls */
1932 file_lock->fl_end = OFFSET_MAX;
1933 file_lock->fl_pid = current->tgid;
1935 flock.l_flock.pid = file_lock->fl_pid;
1936 flock.l_flock.start = file_lock->fl_start;
1937 flock.l_flock.end = file_lock->fl_end;
1939 switch (file_lock->fl_type) {
1941 einfo.ei_mode = LCK_PR;
1944 /* An unlock request may or may not have any relation to
1945 * existing locks so we may not be able to pass a lock handle
1946 * via a normal ldlm_lock_cancel() request. The request may even
1947 * unlock a byte range in the middle of an existing lock. In
1948 * order to process an unlock request we need all of the same
1949 * information that is given with a normal read or write record
1950 * lock request. To avoid creating another ldlm unlock (cancel)
1951 * message we'll treat a LCK_NL flock request as an unlock. */
1952 einfo.ei_mode = LCK_NL;
1955 einfo.ei_mode = LCK_PW;
1958 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1973 flags = LDLM_FL_BLOCK_NOWAIT;
1979 flags = LDLM_FL_TEST_LOCK;
1980 /* Save the old mode so that if the mode in the lock changes we
1981 * can decrement the appropriate reader or writer refcount. */
1982 file_lock->fl_type = einfo.ei_mode;
1985 CERROR("unknown fcntl lock command: %d\n", cmd);
1989 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1990 LUSTRE_OPC_ANY, NULL);
1991 if (IS_ERR(op_data))
1992 RETURN(PTR_ERR(op_data));
1994 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
1995 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
1996 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1998 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
1999 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2001 ll_finish_md_op_data(op_data);
2003 if ((file_lock->fl_flags & FL_FLOCK) &&
2004 (rc == 0 || file_lock->fl_type == F_UNLCK))
2005 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2006 #ifdef HAVE_F_OP_FLOCK
2007 if ((file_lock->fl_flags & FL_POSIX) &&
2008 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2009 !(flags & LDLM_FL_TEST_LOCK))
2010 posix_lock_file_wait(file, file_lock);
2016 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2023 int ll_have_md_lock(struct inode *inode, __u64 bits)
2025 struct lustre_handle lockh;
2026 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2034 fid = &ll_i2info(inode)->lli_fid;
2035 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2037 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2038 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2039 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2045 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2046 struct lustre_handle *lockh)
2048 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2054 fid = &ll_i2info(inode)->lli_fid;
2055 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2057 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2058 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2059 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2063 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2064 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2065 * and return success */
2067 /* This path cannot be hit for regular files unless in
2068 * case of obscure races, so no need to to validate
2070 if (!S_ISREG(inode->i_mode) &&
2071 !S_ISDIR(inode->i_mode))
2076 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2084 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2087 struct inode *inode = dentry->d_inode;
2088 struct ptlrpc_request *req = NULL;
2089 struct ll_sb_info *sbi;
2090 struct obd_export *exp;
2095 CERROR("REPORT THIS LINE TO PETER\n");
2098 sbi = ll_i2sbi(inode);
2100 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2101 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2103 exp = ll_i2mdexp(inode);
2105 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2106 struct lookup_intent oit = { .it_op = IT_GETATTR };
2107 struct md_op_data *op_data;
2109 /* Call getattr by fid, so do not provide name at all. */
2110 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2111 dentry->d_inode, NULL, 0, 0,
2112 LUSTRE_OPC_ANY, NULL);
2113 if (IS_ERR(op_data))
2114 RETURN(PTR_ERR(op_data));
2116 oit.it_create_mode |= M_CHECK_STALE;
2117 rc = md_intent_lock(exp, op_data, NULL, 0,
2118 /* we are not interested in name
2121 ll_md_blocking_ast, 0);
2122 ll_finish_md_op_data(op_data);
2123 oit.it_create_mode &= ~M_CHECK_STALE;
2125 rc = ll_inode_revalidate_fini(inode, rc);
2129 rc = ll_revalidate_it_finish(req, &oit, dentry);
2131 ll_intent_release(&oit);
2135 /* Unlinked? Unhash dentry, so it is not picked up later by
2136 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2137 here to preserve get_cwd functionality on 2.6.
2139 if (!dentry->d_inode->i_nlink) {
2140 cfs_spin_lock(&ll_lookup_lock);
2141 spin_lock(&dcache_lock);
2142 ll_drop_dentry(dentry);
2143 spin_unlock(&dcache_lock);
2144 cfs_spin_unlock(&ll_lookup_lock);
2147 ll_lookup_finish_locks(&oit, dentry);
2148 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2149 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2150 obd_valid valid = OBD_MD_FLGETATTR;
2151 struct md_op_data *op_data;
2154 if (S_ISREG(inode->i_mode)) {
2155 rc = ll_get_max_mdsize(sbi, &ealen);
2158 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2161 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2162 0, ealen, LUSTRE_OPC_ANY,
2164 if (op_data == NULL)
2167 op_data->op_valid = valid;
2168 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2169 * capa for this inode. Because we only keep capas of dirs
2171 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2172 ll_finish_md_op_data(op_data);
2174 rc = ll_inode_revalidate_fini(inode, rc);
2178 rc = ll_prep_inode(&inode, req, NULL);
2181 ptlrpc_req_finished(req);
2185 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2187 struct inode *inode = dentry->d_inode;
2191 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2192 MDS_INODELOCK_LOOKUP);
2194 /* if object not yet allocated, don't validate size */
2195 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
2196 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2197 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2198 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2202 /* cl_glimpse_size will prefer locally cached writes if they extend
2206 rc = cl_glimpse_size(inode);
2211 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2212 struct lookup_intent *it, struct kstat *stat)
2214 struct inode *inode = de->d_inode;
2215 struct ll_inode_info *lli = ll_i2info(inode);
2218 res = ll_inode_revalidate_it(de, it);
2219 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2224 stat->dev = inode->i_sb->s_dev;
2225 if (cfs_curproc_is_32bit())
2226 stat->ino = cl_fid_build_ino32(&lli->lli_fid);
2228 stat->ino = inode->i_ino;
2230 stat->mode = inode->i_mode;
2231 stat->nlink = inode->i_nlink;
2232 stat->uid = inode->i_uid;
2233 stat->gid = inode->i_gid;
2234 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2235 stat->atime = inode->i_atime;
2236 stat->mtime = inode->i_mtime;
2237 stat->ctime = inode->i_ctime;
2238 #ifdef HAVE_INODE_BLKSIZE
2239 stat->blksize = inode->i_blksize;
2241 stat->blksize = 1 << inode->i_blkbits;
2244 stat->size = i_size_read(inode);
2245 stat->blocks = inode->i_blocks;
2249 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2251 struct lookup_intent it = { .it_op = IT_GETATTR };
2253 return ll_getattr_it(mnt, de, &it, stat);
2256 #ifdef HAVE_LINUX_FIEMAP_H
2257 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2258 __u64 start, __u64 len)
2262 struct ll_user_fiemap *fiemap;
2263 unsigned int extent_count = fieinfo->fi_extents_max;
2265 num_bytes = sizeof(*fiemap) + (extent_count *
2266 sizeof(struct ll_fiemap_extent));
2267 OBD_VMALLOC(fiemap, num_bytes);
2272 fiemap->fm_flags = fieinfo->fi_flags;
2273 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2274 fiemap->fm_start = start;
2275 fiemap->fm_length = len;
2276 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2277 sizeof(struct ll_fiemap_extent));
2279 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2281 fieinfo->fi_flags = fiemap->fm_flags;
2282 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2283 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2284 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2286 OBD_VFREE(fiemap, num_bytes);
2293 int lustre_check_acl(struct inode *inode, int mask)
2295 #ifdef CONFIG_FS_POSIX_ACL
2296 struct ll_inode_info *lli = ll_i2info(inode);
2297 struct posix_acl *acl;
2301 cfs_spin_lock(&lli->lli_lock);
2302 acl = posix_acl_dup(lli->lli_posix_acl);
2303 cfs_spin_unlock(&lli->lli_lock);
2308 rc = posix_acl_permission(inode, acl, mask);
2309 posix_acl_release(acl);
2317 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2318 #ifndef HAVE_INODE_PERMISION_2ARGS
2319 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2321 int ll_inode_permission(struct inode *inode, int mask)
2327 /* as root inode are NOT getting validated in lookup operation,
2328 * need to do it before permission check. */
2330 if (inode == inode->i_sb->s_root->d_inode) {
2331 struct lookup_intent it = { .it_op = IT_LOOKUP };
2333 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2334 MDS_INODELOCK_LOOKUP);
2339 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2340 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2342 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2343 return lustre_check_remote_perm(inode, mask);
2345 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2346 rc = generic_permission(inode, mask, lustre_check_acl);
2351 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2353 int mode = inode->i_mode;
2356 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2357 inode->i_ino, inode->i_generation, inode, mask);
2359 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2360 return lustre_check_remote_perm(inode, mask);
2362 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2364 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2365 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2367 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2369 if (current->fsuid == inode->i_uid) {
2372 if (((mode >> 3) & mask & S_IRWXO) != mask)
2374 rc = lustre_check_acl(inode, mask);
2378 goto check_capabilities;
2382 if (cfs_curproc_is_in_groups(inode->i_gid))
2385 if ((mode & mask & S_IRWXO) == mask)
2389 if (!(mask & MAY_EXEC) ||
2390 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2391 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2394 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2395 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2402 #ifdef HAVE_FILE_READV
2403 #define READ_METHOD readv
2404 #define READ_FUNCTION ll_file_readv
2405 #define WRITE_METHOD writev
2406 #define WRITE_FUNCTION ll_file_writev
2408 #define READ_METHOD aio_read
2409 #define READ_FUNCTION ll_file_aio_read
2410 #define WRITE_METHOD aio_write
2411 #define WRITE_FUNCTION ll_file_aio_write
2414 /* -o localflock - only provides locally consistent flock locks */
2415 struct file_operations ll_file_operations = {
2416 .read = ll_file_read,
2417 .READ_METHOD = READ_FUNCTION,
2418 .write = ll_file_write,
2419 .WRITE_METHOD = WRITE_FUNCTION,
2420 .ioctl = ll_file_ioctl,
2421 .open = ll_file_open,
2422 .release = ll_file_release,
2423 .mmap = ll_file_mmap,
2424 .llseek = ll_file_seek,
2425 #ifdef HAVE_KERNEL_SENDFILE
2426 .sendfile = ll_file_sendfile,
2428 #ifdef HAVE_KERNEL_SPLICE_READ
2429 .splice_read = ll_file_splice_read,
2434 struct file_operations ll_file_operations_flock = {
2435 .read = ll_file_read,
2436 .READ_METHOD = READ_FUNCTION,
2437 .write = ll_file_write,
2438 .WRITE_METHOD = WRITE_FUNCTION,
2439 .ioctl = ll_file_ioctl,
2440 .open = ll_file_open,
2441 .release = ll_file_release,
2442 .mmap = ll_file_mmap,
2443 .llseek = ll_file_seek,
2444 #ifdef HAVE_KERNEL_SENDFILE
2445 .sendfile = ll_file_sendfile,
2447 #ifdef HAVE_KERNEL_SPLICE_READ
2448 .splice_read = ll_file_splice_read,
2451 #ifdef HAVE_F_OP_FLOCK
2452 .flock = ll_file_flock,
2454 .lock = ll_file_flock
2457 /* These are for -o noflock - to return ENOSYS on flock calls */
2458 struct file_operations ll_file_operations_noflock = {
2459 .read = ll_file_read,
2460 .READ_METHOD = READ_FUNCTION,
2461 .write = ll_file_write,
2462 .WRITE_METHOD = WRITE_FUNCTION,
2463 .ioctl = ll_file_ioctl,
2464 .open = ll_file_open,
2465 .release = ll_file_release,
2466 .mmap = ll_file_mmap,
2467 .llseek = ll_file_seek,
2468 #ifdef HAVE_KERNEL_SENDFILE
2469 .sendfile = ll_file_sendfile,
2471 #ifdef HAVE_KERNEL_SPLICE_READ
2472 .splice_read = ll_file_splice_read,
2475 #ifdef HAVE_F_OP_FLOCK
2476 .flock = ll_file_noflock,
2478 .lock = ll_file_noflock
2481 struct inode_operations ll_file_inode_operations = {
2482 #ifdef HAVE_VFS_INTENT_PATCHES
2483 .setattr_raw = ll_setattr_raw,
2485 .setattr = ll_setattr,
2486 .truncate = ll_truncate,
2487 .getattr = ll_getattr,
2488 .permission = ll_inode_permission,
2489 .setxattr = ll_setxattr,
2490 .getxattr = ll_getxattr,
2491 .listxattr = ll_listxattr,
2492 .removexattr = ll_removexattr,
2493 #ifdef HAVE_LINUX_FIEMAP_H
2494 .fiemap = ll_fiemap,
2498 /* dynamic ioctl number support routins */
2499 static struct llioc_ctl_data {
2500 cfs_rw_semaphore_t ioc_sem;
2501 cfs_list_t ioc_head;
2503 __RWSEM_INITIALIZER(llioc.ioc_sem),
2504 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2509 cfs_list_t iocd_list;
2510 unsigned int iocd_size;
2511 llioc_callback_t iocd_cb;
2512 unsigned int iocd_count;
2513 unsigned int iocd_cmd[0];
2516 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2519 struct llioc_data *in_data = NULL;
2522 if (cb == NULL || cmd == NULL ||
2523 count > LLIOC_MAX_CMD || count < 0)
2526 size = sizeof(*in_data) + count * sizeof(unsigned int);
2527 OBD_ALLOC(in_data, size);
2528 if (in_data == NULL)
2531 memset(in_data, 0, sizeof(*in_data));
2532 in_data->iocd_size = size;
2533 in_data->iocd_cb = cb;
2534 in_data->iocd_count = count;
2535 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2537 cfs_down_write(&llioc.ioc_sem);
2538 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2539 cfs_up_write(&llioc.ioc_sem);
2544 void ll_iocontrol_unregister(void *magic)
2546 struct llioc_data *tmp;
2551 cfs_down_write(&llioc.ioc_sem);
2552 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2554 unsigned int size = tmp->iocd_size;
2556 cfs_list_del(&tmp->iocd_list);
2557 cfs_up_write(&llioc.ioc_sem);
2559 OBD_FREE(tmp, size);
2563 cfs_up_write(&llioc.ioc_sem);
2565 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2568 EXPORT_SYMBOL(ll_iocontrol_register);
2569 EXPORT_SYMBOL(ll_iocontrol_unregister);
2571 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2572 unsigned int cmd, unsigned long arg, int *rcp)
2574 enum llioc_iter ret = LLIOC_CONT;
2575 struct llioc_data *data;
2576 int rc = -EINVAL, i;
2578 cfs_down_read(&llioc.ioc_sem);
2579 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2580 for (i = 0; i < data->iocd_count; i++) {
2581 if (cmd != data->iocd_cmd[i])
2584 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2588 if (ret == LLIOC_STOP)
2591 cfs_up_read(&llioc.ioc_sem);