1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
87 * Closes the IO epoch and packs all the attributes into @op_data for
90 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
91 struct obd_client_handle *och)
95 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
96 ATTR_MTIME_SET | ATTR_CTIME_SET;
98 if (!(och->och_flags & FMODE_WRITE))
101 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
102 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
104 ll_ioepoch_close(inode, op_data, &och, 0);
107 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
108 ll_prep_md_op_data(op_data, inode, NULL, NULL,
109 0, 0, LUSTRE_OPC_ANY, NULL);
113 static int ll_close_inode_openhandle(struct obd_export *md_exp,
115 struct obd_client_handle *och)
117 struct obd_export *exp = ll_i2mdexp(inode);
118 struct md_op_data *op_data;
119 struct ptlrpc_request *req = NULL;
120 struct obd_device *obd = class_exp2obd(exp);
127 * XXX: in case of LMV, is this correct to access
130 CERROR("Invalid MDC connection handle "LPX64"\n",
131 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_som_update(inode, op_data);
149 CERROR("inode %lu mdc Size-on-MDS update failed: "
150 "rc = %d\n", inode->i_ino, rc);
154 CERROR("inode %lu mdc close failed: rc = %d\n",
157 ll_finish_md_op_data(op_data);
160 rc = ll_objects_destroy(req, inode);
162 CERROR("inode %lu ll_objects destroy: rc = %d\n",
169 if (exp_connect_som(exp) && !epoch_close &&
170 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
171 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
173 md_clear_open_replay_data(md_exp, och);
174 /* Free @och if it is not waiting for DONE_WRITING. */
175 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
178 if (req) /* This is close request */
179 ptlrpc_req_finished(req);
183 int ll_md_real_close(struct inode *inode, int flags)
185 struct ll_inode_info *lli = ll_i2info(inode);
186 struct obd_client_handle **och_p;
187 struct obd_client_handle *och;
192 if (flags & FMODE_WRITE) {
193 och_p = &lli->lli_mds_write_och;
194 och_usecount = &lli->lli_open_fd_write_count;
195 } else if (flags & FMODE_EXEC) {
196 och_p = &lli->lli_mds_exec_och;
197 och_usecount = &lli->lli_open_fd_exec_count;
199 LASSERT(flags & FMODE_READ);
200 och_p = &lli->lli_mds_read_och;
201 och_usecount = &lli->lli_open_fd_read_count;
204 cfs_down(&lli->lli_och_sem);
205 if (*och_usecount) { /* There are still users of this handle, so
207 cfs_up(&lli->lli_och_sem);
212 cfs_up(&lli->lli_och_sem);
214 if (och) { /* There might be a race and somebody have freed this och
216 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
223 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
226 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
227 struct ll_inode_info *lli = ll_i2info(inode);
231 /* clear group lock, if present */
232 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
233 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
235 /* Let's see if we have good enough OPEN lock on the file and if
236 we can skip talking to MDS */
237 if (file->f_dentry->d_inode) { /* Can this ever be false? */
239 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
240 struct lustre_handle lockh;
241 struct inode *inode = file->f_dentry->d_inode;
242 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
244 cfs_down(&lli->lli_och_sem);
245 if (fd->fd_omode & FMODE_WRITE) {
247 LASSERT(lli->lli_open_fd_write_count);
248 lli->lli_open_fd_write_count--;
249 } else if (fd->fd_omode & FMODE_EXEC) {
251 LASSERT(lli->lli_open_fd_exec_count);
252 lli->lli_open_fd_exec_count--;
255 LASSERT(lli->lli_open_fd_read_count);
256 lli->lli_open_fd_read_count--;
258 cfs_up(&lli->lli_och_sem);
260 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
261 LDLM_IBITS, &policy, lockmode,
263 rc = ll_md_real_close(file->f_dentry->d_inode,
267 CERROR("Releasing a file %p with negative dentry %p. Name %s",
268 file, file->f_dentry, file->f_dentry->d_name.name);
271 LUSTRE_FPRIVATE(file) = NULL;
272 ll_file_data_put(fd);
273 ll_capa_close(inode);
278 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
280 /* While this returns an error code, fput() the caller does not, so we need
281 * to make every effort to clean up all of our state here. Also, applications
282 * rarely check close errors and even if an error is returned they will not
283 * re-try the close call.
285 int ll_file_release(struct inode *inode, struct file *file)
287 struct ll_file_data *fd;
288 struct ll_sb_info *sbi = ll_i2sbi(inode);
289 struct ll_inode_info *lli = ll_i2info(inode);
290 struct lov_stripe_md *lsm = lli->lli_smd;
294 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
295 inode->i_generation, inode);
297 #ifdef CONFIG_FS_POSIX_ACL
298 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
299 inode == inode->i_sb->s_root->d_inode) {
300 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
303 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
304 fd->fd_flags &= ~LL_FILE_RMTACL;
305 rct_del(&sbi->ll_rct, cfs_curproc_pid());
306 et_search_free(&sbi->ll_et, cfs_curproc_pid());
311 if (inode->i_sb->s_root != file->f_dentry)
312 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
313 fd = LUSTRE_FPRIVATE(file);
316 /* The last ref on @file, maybe not the the owner pid of statahead.
317 * Different processes can open the same dir, "ll_opendir_key" means:
318 * it is me that should stop the statahead thread. */
319 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
320 ll_stop_statahead(inode, lli->lli_opendir_key);
322 if (inode->i_sb->s_root == file->f_dentry) {
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
329 lov_test_and_clear_async_rc(lsm);
330 lli->lli_async_rc = 0;
332 rc = ll_md_close(sbi->ll_md_exp, inode, file);
334 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
335 libcfs_debug_dumplog();
340 static int ll_intent_file_open(struct file *file, void *lmm,
341 int lmmsize, struct lookup_intent *itp)
343 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
344 struct dentry *parent = file->f_dentry->d_parent;
345 const char *name = file->f_dentry->d_name.name;
346 const int len = file->f_dentry->d_name.len;
347 struct md_op_data *op_data;
348 struct ptlrpc_request *req;
355 /* Usually we come here only for NFSD, and we want open lock.
356 But we can also get here with pre 2.6.15 patchless kernels, and in
357 that case that lock is also ok */
358 /* We can also get here if there was cached open handle in revalidate_it
359 * but it disappeared while we were getting from there to ll_file_open.
360 * But this means this file was closed and immediatelly opened which
361 * makes a good candidate for using OPEN lock */
362 /* If lmmsize & lmm are not 0, we are just setting stripe info
363 * parameters. No need for the open lock */
364 if (!lmm && !lmmsize)
365 itp->it_flags |= MDS_OPEN_LOCK;
367 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
368 file->f_dentry->d_inode, name, len,
369 O_RDWR, LUSTRE_OPC_ANY, NULL);
371 RETURN(PTR_ERR(op_data));
373 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
374 0 /*unused */, &req, ll_md_blocking_ast, 0);
375 ll_finish_md_op_data(op_data);
377 /* reason for keep own exit path - don`t flood log
378 * with messages with -ESTALE errors.
380 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
381 it_open_error(DISP_OPEN_OPEN, itp))
383 ll_release_openhandle(file->f_dentry, itp);
387 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
388 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
389 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
393 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
394 if (!rc && itp->d.lustre.it_lock_mode)
395 md_set_lock_data(sbi->ll_md_exp,
396 &itp->d.lustre.it_lock_handle,
397 file->f_dentry->d_inode, NULL);
400 ptlrpc_req_finished(itp->d.lustre.it_data);
401 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
402 ll_intent_drop_lock(itp);
408 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
409 * not believe attributes if a few ioepoch holders exist. Attributes for
410 * previous ioepoch if new one is opened are also skipped by MDS.
412 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
414 if (ioepoch && lli->lli_ioepoch != ioepoch) {
415 lli->lli_ioepoch = ioepoch;
416 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
417 ioepoch, PFID(&lli->lli_fid));
421 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
422 struct lookup_intent *it, struct obd_client_handle *och)
424 struct ptlrpc_request *req = it->d.lustre.it_data;
425 struct mdt_body *body;
429 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
430 LASSERT(body != NULL); /* reply already checked out */
432 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
433 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
434 och->och_fid = lli->lli_fid;
435 och->och_flags = it->it_flags;
436 ll_ioepoch_open(lli, body->ioepoch);
438 return md_set_open_replay_data(md_exp, och, req);
441 int ll_local_open(struct file *file, struct lookup_intent *it,
442 struct ll_file_data *fd, struct obd_client_handle *och)
444 struct inode *inode = file->f_dentry->d_inode;
445 struct ll_inode_info *lli = ll_i2info(inode);
448 LASSERT(!LUSTRE_FPRIVATE(file));
453 struct ptlrpc_request *req = it->d.lustre.it_data;
454 struct mdt_body *body;
457 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
461 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
462 if ((it->it_flags & FMODE_WRITE) &&
463 (body->valid & OBD_MD_FLSIZE))
464 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
465 lli->lli_ioepoch, PFID(&lli->lli_fid));
468 LUSTRE_FPRIVATE(file) = fd;
469 ll_readahead_init(inode, &fd->fd_ras);
470 fd->fd_omode = it->it_flags;
474 /* Open a file, and (for the very first open) create objects on the OSTs at
475 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
476 * creation or open until ll_lov_setstripe() ioctl is called. We grab
477 * lli_open_sem to ensure no other process will create objects, send the
478 * stripe MD to the MDS, or try to destroy the objects if that fails.
480 * If we already have the stripe MD locally then we don't request it in
481 * md_open(), by passing a lmm_size = 0.
483 * It is up to the application to ensure no other processes open this file
484 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
485 * used. We might be able to avoid races of that sort by getting lli_open_sem
486 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
487 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
489 int ll_file_open(struct inode *inode, struct file *file)
491 struct ll_inode_info *lli = ll_i2info(inode);
492 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
493 .it_flags = file->f_flags };
494 struct lov_stripe_md *lsm;
495 struct ptlrpc_request *req = NULL;
496 struct obd_client_handle **och_p;
498 struct ll_file_data *fd;
499 int rc = 0, opendir_set = 0;
502 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
503 inode->i_generation, inode, file->f_flags);
505 #ifdef HAVE_VFS_INTENT_PATCHES
508 it = file->private_data; /* XXX: compat macro */
509 file->private_data = NULL; /* prevent ll_local_open assertion */
512 fd = ll_file_data_get();
517 if (S_ISDIR(inode->i_mode)) {
518 cfs_spin_lock(&lli->lli_sa_lock);
519 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
520 LASSERT(lli->lli_sai == NULL);
521 lli->lli_opendir_key = fd;
522 lli->lli_opendir_pid = cfs_curproc_pid();
525 cfs_spin_unlock(&lli->lli_sa_lock);
528 if (inode->i_sb->s_root == file->f_dentry) {
529 LUSTRE_FPRIVATE(file) = fd;
533 if (!it || !it->d.lustre.it_disposition) {
534 /* Convert f_flags into access mode. We cannot use file->f_mode,
535 * because everything but O_ACCMODE mask was stripped from
537 if ((oit.it_flags + 1) & O_ACCMODE)
539 if (file->f_flags & O_TRUNC)
540 oit.it_flags |= FMODE_WRITE;
542 /* kernel only call f_op->open in dentry_open. filp_open calls
543 * dentry_open after call to open_namei that checks permissions.
544 * Only nfsd_open call dentry_open directly without checking
545 * permissions and because of that this code below is safe. */
546 if (oit.it_flags & FMODE_WRITE)
547 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
549 /* We do not want O_EXCL here, presumably we opened the file
550 * already? XXX - NFS implications? */
551 oit.it_flags &= ~O_EXCL;
553 /* bug20584, if "it_flags" contains O_CREAT, the file will be
554 * created if necessary, then "IT_CREAT" should be set to keep
555 * consistent with it */
556 if (oit.it_flags & O_CREAT)
557 oit.it_op |= IT_CREAT;
563 /* Let's see if we have file open on MDS already. */
564 if (it->it_flags & FMODE_WRITE) {
565 och_p = &lli->lli_mds_write_och;
566 och_usecount = &lli->lli_open_fd_write_count;
567 } else if (it->it_flags & FMODE_EXEC) {
568 och_p = &lli->lli_mds_exec_och;
569 och_usecount = &lli->lli_open_fd_exec_count;
571 och_p = &lli->lli_mds_read_och;
572 och_usecount = &lli->lli_open_fd_read_count;
575 cfs_down(&lli->lli_och_sem);
576 if (*och_p) { /* Open handle is present */
577 if (it_disposition(it, DISP_OPEN_OPEN)) {
578 /* Well, there's extra open request that we do not need,
579 let's close it somehow. This will decref request. */
580 rc = it_open_error(DISP_OPEN_OPEN, it);
582 cfs_up(&lli->lli_och_sem);
583 ll_file_data_put(fd);
584 GOTO(out_openerr, rc);
586 ll_release_openhandle(file->f_dentry, it);
587 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
592 rc = ll_local_open(file, it, fd, NULL);
595 cfs_up(&lli->lli_och_sem);
596 ll_file_data_put(fd);
597 GOTO(out_openerr, rc);
600 LASSERT(*och_usecount == 0);
601 if (!it->d.lustre.it_disposition) {
602 /* We cannot just request lock handle now, new ELC code
603 means that one of other OPEN locks for this file
604 could be cancelled, and since blocking ast handler
605 would attempt to grab och_sem as well, that would
606 result in a deadlock */
607 cfs_up(&lli->lli_och_sem);
608 it->it_create_mode |= M_CHECK_STALE;
609 rc = ll_intent_file_open(file, NULL, 0, it);
610 it->it_create_mode &= ~M_CHECK_STALE;
612 ll_file_data_put(fd);
613 GOTO(out_openerr, rc);
616 /* Got some error? Release the request */
617 if (it->d.lustre.it_status < 0) {
618 req = it->d.lustre.it_data;
619 ptlrpc_req_finished(req);
623 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
625 ll_file_data_put(fd);
626 GOTO(out_och_free, rc = -ENOMEM);
629 req = it->d.lustre.it_data;
631 /* md_intent_lock() didn't get a request ref if there was an
632 * open error, so don't do cleanup on the request here
634 /* XXX (green): Should not we bail out on any error here, not
635 * just open error? */
636 rc = it_open_error(DISP_OPEN_OPEN, it);
638 ll_file_data_put(fd);
639 GOTO(out_och_free, rc);
642 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
643 rc = ll_local_open(file, it, fd, *och_p);
645 ll_file_data_put(fd);
646 GOTO(out_och_free, rc);
649 cfs_up(&lli->lli_och_sem);
651 /* Must do this outside lli_och_sem lock to prevent deadlock where
652 different kind of OPEN lock for this same inode gets cancelled
653 by ldlm_cancel_lru */
654 if (!S_ISREG(inode->i_mode))
661 if (file->f_flags & O_LOV_DELAY_CREATE ||
662 !(file->f_mode & FMODE_WRITE)) {
663 CDEBUG(D_INODE, "object creation was delayed\n");
667 file->f_flags &= ~O_LOV_DELAY_CREATE;
670 ptlrpc_req_finished(req);
672 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
676 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
677 *och_p = NULL; /* OBD_FREE writes some magic there */
680 cfs_up(&lli->lli_och_sem);
682 if (opendir_set != 0)
683 ll_stop_statahead(inode, lli->lli_opendir_key);
689 /* Fills the obdo with the attributes for the lsm */
690 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
691 struct obd_capa *capa, struct obdo *obdo,
692 __u64 ioepoch, int sync)
694 struct ptlrpc_request_set *set;
695 struct obd_info oinfo = { { { 0 } } };
700 LASSERT(lsm != NULL);
704 oinfo.oi_oa->o_id = lsm->lsm_object_id;
705 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
706 oinfo.oi_oa->o_mode = S_IFREG;
707 oinfo.oi_oa->o_ioepoch = ioepoch;
708 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
709 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
710 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
711 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
712 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
713 oinfo.oi_capa = capa;
715 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
716 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
719 set = ptlrpc_prep_set();
721 CERROR("can't allocate ptlrpc set\n");
724 rc = obd_getattr_async(exp, &oinfo, set);
726 rc = ptlrpc_set_wait(set);
727 ptlrpc_set_destroy(set);
730 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
731 OBD_MD_FLATIME | OBD_MD_FLMTIME |
732 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
737 * Performs the getattr on the inode and updates its fields.
738 * If @sync != 0, perform the getattr under the server-side lock.
740 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
741 __u64 ioepoch, int sync)
743 struct ll_inode_info *lli = ll_i2info(inode);
744 struct obd_capa *capa = ll_mdscapa_get(inode);
748 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
749 capa, obdo, ioepoch, sync);
752 obdo_refresh_inode(inode, obdo, obdo->o_valid);
754 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
755 lli->lli_smd->lsm_object_id, i_size_read(inode),
756 (unsigned long long)inode->i_blocks,
757 (unsigned long)ll_inode_blksize(inode));
762 int ll_merge_lvb(struct inode *inode)
764 struct ll_inode_info *lli = ll_i2info(inode);
765 struct ll_sb_info *sbi = ll_i2sbi(inode);
771 ll_inode_size_lock(inode, 1);
772 inode_init_lvb(inode, &lvb);
774 /* merge timestamps the most resently obtained from mds with
775 timestamps obtained from osts */
776 lvb.lvb_atime = lli->lli_lvb.lvb_atime;
777 lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
778 lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
779 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
780 cl_isize_write_nolock(inode, lvb.lvb_size);
782 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
783 PFID(&lli->lli_fid), lvb.lvb_size);
784 inode->i_blocks = lvb.lvb_blocks;
786 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
787 LTIME_S(inode->i_atime) = lvb.lvb_atime;
788 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
789 ll_inode_size_unlock(inode, 1);
794 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
797 struct obdo obdo = { 0 };
800 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
802 st->st_size = obdo.o_size;
803 st->st_blocks = obdo.o_blocks;
804 st->st_mtime = obdo.o_mtime;
805 st->st_atime = obdo.o_atime;
806 st->st_ctime = obdo.o_ctime;
811 void ll_io_init(struct cl_io *io, const struct file *file, int write)
813 struct inode *inode = file->f_dentry->d_inode;
815 memset(io, 0, sizeof *io);
816 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
818 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
819 io->ci_obj = ll_i2info(inode)->lli_clob;
820 io->ci_lockreq = CILR_MAYBE;
821 if (ll_file_nolock(file)) {
822 io->ci_lockreq = CILR_NEVER;
823 io->ci_no_srvlock = 1;
824 } else if (file->f_flags & O_APPEND) {
825 io->ci_lockreq = CILR_MANDATORY;
829 static ssize_t ll_file_io_generic(const struct lu_env *env,
830 struct vvp_io_args *args, struct file *file,
831 enum cl_io_type iot, loff_t *ppos, size_t count)
837 io = &ccc_env_info(env)->cti_io;
838 ll_io_init(io, file, iot == CIT_WRITE);
840 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
841 struct vvp_io *vio = vvp_env_io(env);
842 struct ccc_io *cio = ccc_env_io(env);
843 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
844 int write_sem_locked = 0;
846 cio->cui_fd = LUSTRE_FPRIVATE(file);
847 vio->cui_io_subtype = args->via_io_subtype;
849 switch (vio->cui_io_subtype) {
851 cio->cui_iov = args->u.normal.via_iov;
852 cio->cui_nrsegs = args->u.normal.via_nrsegs;
853 cio->cui_tot_nrsegs = cio->cui_nrsegs;
854 #ifndef HAVE_FILE_WRITEV
855 cio->cui_iocb = args->u.normal.via_iocb;
857 if ((iot == CIT_WRITE) &&
858 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
859 cfs_down(&lli->lli_write_sem);
860 write_sem_locked = 1;
864 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
865 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
868 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
869 vio->u.splice.cui_flags = args->u.splice.via_flags;
872 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
875 result = cl_io_loop(env, io);
876 if (write_sem_locked)
877 cfs_up(&lli->lli_write_sem);
879 /* cl_io_rw_init() handled IO */
880 result = io->ci_result;
883 if (io->ci_nob > 0) {
885 *ppos = io->u.ci_wr.wr.crw_pos;
893 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
895 static int ll_file_get_iov_count(const struct iovec *iov,
896 unsigned long *nr_segs, size_t *count)
901 for (seg = 0; seg < *nr_segs; seg++) {
902 const struct iovec *iv = &iov[seg];
905 * If any segment has a negative length, or the cumulative
906 * length ever wraps negative then return -EINVAL.
909 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
911 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
916 cnt -= iv->iov_len; /* This segment is no good */
923 #ifdef HAVE_FILE_READV
924 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
925 unsigned long nr_segs, loff_t *ppos)
928 struct vvp_io_args *args;
934 result = ll_file_get_iov_count(iov, &nr_segs, &count);
938 env = cl_env_get(&refcheck);
940 RETURN(PTR_ERR(env));
942 args = vvp_env_args(env, IO_NORMAL);
943 args->u.normal.via_iov = (struct iovec *)iov;
944 args->u.normal.via_nrsegs = nr_segs;
946 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
947 cl_env_put(env, &refcheck);
951 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
955 struct iovec *local_iov;
960 env = cl_env_get(&refcheck);
962 RETURN(PTR_ERR(env));
964 local_iov = &vvp_env_info(env)->vti_local_iov;
965 local_iov->iov_base = (void __user *)buf;
966 local_iov->iov_len = count;
967 result = ll_file_readv(file, local_iov, 1, ppos);
968 cl_env_put(env, &refcheck);
973 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
974 unsigned long nr_segs, loff_t pos)
977 struct vvp_io_args *args;
983 result = ll_file_get_iov_count(iov, &nr_segs, &count);
987 env = cl_env_get(&refcheck);
989 RETURN(PTR_ERR(env));
991 args = vvp_env_args(env, IO_NORMAL);
992 args->u.normal.via_iov = (struct iovec *)iov;
993 args->u.normal.via_nrsegs = nr_segs;
994 args->u.normal.via_iocb = iocb;
996 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
997 &iocb->ki_pos, count);
998 cl_env_put(env, &refcheck);
1002 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1006 struct iovec *local_iov;
1007 struct kiocb *kiocb;
1012 env = cl_env_get(&refcheck);
1014 RETURN(PTR_ERR(env));
1016 local_iov = &vvp_env_info(env)->vti_local_iov;
1017 kiocb = &vvp_env_info(env)->vti_kiocb;
1018 local_iov->iov_base = (void __user *)buf;
1019 local_iov->iov_len = count;
1020 init_sync_kiocb(kiocb, file);
1021 kiocb->ki_pos = *ppos;
1022 kiocb->ki_left = count;
1024 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1025 *ppos = kiocb->ki_pos;
1027 cl_env_put(env, &refcheck);
1033 * Write to a file (through the page cache).
1035 #ifdef HAVE_FILE_WRITEV
1036 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1037 unsigned long nr_segs, loff_t *ppos)
1040 struct vvp_io_args *args;
1046 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1050 env = cl_env_get(&refcheck);
1052 RETURN(PTR_ERR(env));
1054 args = vvp_env_args(env, IO_NORMAL);
1055 args->u.normal.via_iov = (struct iovec *)iov;
1056 args->u.normal.via_nrsegs = nr_segs;
1058 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1059 cl_env_put(env, &refcheck);
1063 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1067 struct iovec *local_iov;
1072 env = cl_env_get(&refcheck);
1074 RETURN(PTR_ERR(env));
1076 local_iov = &vvp_env_info(env)->vti_local_iov;
1077 local_iov->iov_base = (void __user *)buf;
1078 local_iov->iov_len = count;
1080 result = ll_file_writev(file, local_iov, 1, ppos);
1081 cl_env_put(env, &refcheck);
1085 #else /* AIO stuff */
1086 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1087 unsigned long nr_segs, loff_t pos)
1090 struct vvp_io_args *args;
1096 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1100 env = cl_env_get(&refcheck);
1102 RETURN(PTR_ERR(env));
1104 args = vvp_env_args(env, IO_NORMAL);
1105 args->u.normal.via_iov = (struct iovec *)iov;
1106 args->u.normal.via_nrsegs = nr_segs;
1107 args->u.normal.via_iocb = iocb;
1109 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1110 &iocb->ki_pos, count);
1111 cl_env_put(env, &refcheck);
1115 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1119 struct iovec *local_iov;
1120 struct kiocb *kiocb;
1125 env = cl_env_get(&refcheck);
1127 RETURN(PTR_ERR(env));
1129 local_iov = &vvp_env_info(env)->vti_local_iov;
1130 kiocb = &vvp_env_info(env)->vti_kiocb;
1131 local_iov->iov_base = (void __user *)buf;
1132 local_iov->iov_len = count;
1133 init_sync_kiocb(kiocb, file);
1134 kiocb->ki_pos = *ppos;
1135 kiocb->ki_left = count;
1137 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1138 *ppos = kiocb->ki_pos;
1140 cl_env_put(env, &refcheck);
1146 #ifdef HAVE_KERNEL_SENDFILE
1148 * Send file content (through pagecache) somewhere with helper
1150 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1151 read_actor_t actor, void *target)
1154 struct vvp_io_args *args;
1159 env = cl_env_get(&refcheck);
1161 RETURN(PTR_ERR(env));
1163 args = vvp_env_args(env, IO_SENDFILE);
1164 args->u.sendfile.via_target = target;
1165 args->u.sendfile.via_actor = actor;
1167 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1168 cl_env_put(env, &refcheck);
1173 #ifdef HAVE_KERNEL_SPLICE_READ
1175 * Send file content (through pagecache) somewhere with helper
1177 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1178 struct pipe_inode_info *pipe, size_t count,
1182 struct vvp_io_args *args;
1187 env = cl_env_get(&refcheck);
1189 RETURN(PTR_ERR(env));
1191 args = vvp_env_args(env, IO_SPLICE);
1192 args->u.splice.via_pipe = pipe;
1193 args->u.splice.via_flags = flags;
1195 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1196 cl_env_put(env, &refcheck);
1201 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1204 struct obd_export *exp = ll_i2dtexp(inode);
1205 struct ll_recreate_obj ucreatp;
1206 struct obd_trans_info oti = { 0 };
1207 struct obdo *oa = NULL;
1210 struct lov_stripe_md *lsm, *lsm2;
1213 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1216 if (cfs_copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1217 sizeof(struct ll_recreate_obj)))
1224 ll_inode_size_lock(inode, 0);
1225 lsm = ll_i2info(inode)->lli_smd;
1227 GOTO(out, rc = -ENOENT);
1228 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1229 (lsm->lsm_stripe_count));
1231 OBD_ALLOC(lsm2, lsm_size);
1233 GOTO(out, rc = -ENOMEM);
1235 oa->o_id = ucreatp.lrc_id;
1236 oa->o_seq = ucreatp.lrc_seq;
1237 oa->o_nlink = ucreatp.lrc_ost_idx;
1238 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1239 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1240 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
1241 OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1242 memcpy(lsm2, lsm, lsm_size);
1243 rc = obd_create(exp, oa, &lsm2, &oti);
1245 OBD_FREE(lsm2, lsm_size);
1248 ll_inode_size_unlock(inode, 0);
1253 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1254 int flags, struct lov_user_md *lum, int lum_size)
1256 struct lov_stripe_md *lsm;
1257 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1261 ll_inode_size_lock(inode, 0);
1262 lsm = ll_i2info(inode)->lli_smd;
1264 ll_inode_size_unlock(inode, 0);
1265 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1270 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1273 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1274 GOTO(out_req_free, rc = -ENOENT);
1275 rc = oit.d.lustre.it_status;
1277 GOTO(out_req_free, rc);
1279 ll_release_openhandle(file->f_dentry, &oit);
1282 ll_inode_size_unlock(inode, 0);
1283 ll_intent_release(&oit);
1286 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1290 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1291 struct lov_mds_md **lmmp, int *lmm_size,
1292 struct ptlrpc_request **request)
1294 struct ll_sb_info *sbi = ll_i2sbi(inode);
1295 struct mdt_body *body;
1296 struct lov_mds_md *lmm = NULL;
1297 struct ptlrpc_request *req = NULL;
1298 struct md_op_data *op_data;
1301 rc = ll_get_max_mdsize(sbi, &lmmsize);
1305 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1306 strlen(filename), lmmsize,
1307 LUSTRE_OPC_ANY, NULL);
1308 if (op_data == NULL)
1311 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1312 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1313 ll_finish_md_op_data(op_data);
1315 CDEBUG(D_INFO, "md_getattr_name failed "
1316 "on %s: rc %d\n", filename, rc);
1320 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1321 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1323 lmmsize = body->eadatasize;
1325 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1327 GOTO(out, rc = -ENODATA);
1330 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1331 LASSERT(lmm != NULL);
1333 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1334 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1335 GOTO(out, rc = -EPROTO);
1339 * This is coming from the MDS, so is probably in
1340 * little endian. We convert it to host endian before
1341 * passing it to userspace.
1343 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1344 /* if function called for directory - we should
1345 * avoid swab not existent lsm objects */
1346 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1347 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1348 if (S_ISREG(body->mode))
1349 lustre_swab_lov_user_md_objects(
1350 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1351 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1352 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1353 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1354 if (S_ISREG(body->mode))
1355 lustre_swab_lov_user_md_objects(
1356 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1357 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1363 *lmm_size = lmmsize;
1368 static int ll_lov_setea(struct inode *inode, struct file *file,
1371 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1372 struct lov_user_md *lump;
1373 int lum_size = sizeof(struct lov_user_md) +
1374 sizeof(struct lov_user_ost_data);
1378 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1381 OBD_ALLOC(lump, lum_size);
1385 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1386 OBD_FREE(lump, lum_size);
1390 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1392 OBD_FREE(lump, lum_size);
1396 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1399 struct lov_user_md_v3 lumv3;
1400 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1401 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1402 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1405 int flags = FMODE_WRITE;
1408 /* first try with v1 which is smaller than v3 */
1409 lum_size = sizeof(struct lov_user_md_v1);
1410 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1413 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1414 lum_size = sizeof(struct lov_user_md_v3);
1415 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1419 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1421 put_user(0, &lumv1p->lmm_stripe_count);
1422 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1423 0, ll_i2info(inode)->lli_smd,
1429 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1431 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1436 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1440 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1442 struct ll_inode_info *lli = ll_i2info(inode);
1443 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1444 struct ccc_grouplock grouplock;
1448 if (ll_file_nolock(file))
1449 RETURN(-EOPNOTSUPP);
1451 cfs_spin_lock(&lli->lli_lock);
1452 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1453 CWARN("group lock already existed with gid %lu\n",
1454 fd->fd_grouplock.cg_gid);
1455 cfs_spin_unlock(&lli->lli_lock);
1458 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1459 cfs_spin_unlock(&lli->lli_lock);
1461 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1462 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1466 cfs_spin_lock(&lli->lli_lock);
1467 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1468 cfs_spin_unlock(&lli->lli_lock);
1469 CERROR("another thread just won the race\n");
1470 cl_put_grouplock(&grouplock);
1474 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1475 fd->fd_grouplock = grouplock;
1476 cfs_spin_unlock(&lli->lli_lock);
1478 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1482 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1484 struct ll_inode_info *lli = ll_i2info(inode);
1485 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1486 struct ccc_grouplock grouplock;
1489 cfs_spin_lock(&lli->lli_lock);
1490 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1491 cfs_spin_unlock(&lli->lli_lock);
1492 CWARN("no group lock held\n");
1495 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1497 if (fd->fd_grouplock.cg_gid != arg) {
1498 CWARN("group lock %lu doesn't match current id %lu\n",
1499 arg, fd->fd_grouplock.cg_gid);
1500 cfs_spin_unlock(&lli->lli_lock);
1504 grouplock = fd->fd_grouplock;
1505 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1506 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1507 cfs_spin_unlock(&lli->lli_lock);
1509 cl_put_grouplock(&grouplock);
1510 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1515 * Close inode open handle
1517 * \param dentry [in] dentry which contains the inode
1518 * \param it [in,out] intent which contains open info and result
1521 * \retval <0 failure
1523 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1525 struct inode *inode = dentry->d_inode;
1526 struct obd_client_handle *och;
1532 /* Root ? Do nothing. */
1533 if (dentry->d_inode->i_sb->s_root == dentry)
1536 /* No open handle to close? Move away */
1537 if (!it_disposition(it, DISP_OPEN_OPEN))
1540 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1542 OBD_ALLOC(och, sizeof(*och));
1544 GOTO(out, rc = -ENOMEM);
1546 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1547 ll_i2info(inode), it, och);
1549 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1552 /* this one is in place of ll_file_open */
1553 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1554 ptlrpc_req_finished(it->d.lustre.it_data);
1555 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1560 * Get size for inode for which FIEMAP mapping is requested.
1561 * Make the FIEMAP get_info call and returns the result.
1563 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1566 struct obd_export *exp = ll_i2dtexp(inode);
1567 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1568 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1569 int vallen = num_bytes;
1573 /* Checks for fiemap flags */
1574 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1575 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1579 /* Check for FIEMAP_FLAG_SYNC */
1580 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1581 rc = filemap_fdatawrite(inode->i_mapping);
1586 /* If the stripe_count > 1 and the application does not understand
1587 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1589 if (lsm->lsm_stripe_count > 1 &&
1590 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1593 fm_key.oa.o_id = lsm->lsm_object_id;
1594 fm_key.oa.o_seq = lsm->lsm_object_seq;
1595 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1597 obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
1599 /* If filesize is 0, then there would be no objects for mapping */
1600 if (fm_key.oa.o_size == 0) {
1601 fiemap->fm_mapped_extents = 0;
1605 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1607 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1609 CERROR("obd_get_info failed: rc = %d\n", rc);
1614 int ll_fid2path(struct obd_export *exp, void *arg)
1616 struct getinfo_fid2path *gfout, *gfin;
1620 /* Need to get the buflen */
1621 OBD_ALLOC_PTR(gfin);
1624 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1629 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1630 OBD_ALLOC(gfout, outsize);
1631 if (gfout == NULL) {
1635 memcpy(gfout, gfin, sizeof(*gfout));
1638 /* Call mdc_iocontrol */
1639 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1642 if (cfs_copy_to_user(arg, gfout, outsize))
1646 OBD_FREE(gfout, outsize);
1650 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1652 struct ll_user_fiemap *fiemap_s;
1653 size_t num_bytes, ret_bytes;
1654 unsigned int extent_count;
1657 /* Get the extent count so we can calculate the size of
1658 * required fiemap buffer */
1659 if (get_user(extent_count,
1660 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1662 num_bytes = sizeof(*fiemap_s) + (extent_count *
1663 sizeof(struct ll_fiemap_extent));
1665 OBD_VMALLOC(fiemap_s, num_bytes);
1666 if (fiemap_s == NULL)
1669 /* get the fiemap value */
1670 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1672 GOTO(error, rc = -EFAULT);
1674 /* If fm_extent_count is non-zero, read the first extent since
1675 * it is used to calculate end_offset and device from previous
1678 if (copy_from_user(&fiemap_s->fm_extents[0],
1679 (char __user *)arg + sizeof(*fiemap_s),
1680 sizeof(struct ll_fiemap_extent)))
1681 GOTO(error, rc = -EFAULT);
1684 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1688 ret_bytes = sizeof(struct ll_user_fiemap);
1690 if (extent_count != 0)
1691 ret_bytes += (fiemap_s->fm_mapped_extents *
1692 sizeof(struct ll_fiemap_extent));
1694 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1698 OBD_VFREE(fiemap_s, num_bytes);
1702 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1705 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1709 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1710 inode->i_generation, inode, cmd);
1711 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1713 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1714 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1718 case LL_IOC_GETFLAGS:
1719 /* Get the current value of the file flags */
1720 return put_user(fd->fd_flags, (int *)arg);
1721 case LL_IOC_SETFLAGS:
1722 case LL_IOC_CLRFLAGS:
1723 /* Set or clear specific file flags */
1724 /* XXX This probably needs checks to ensure the flags are
1725 * not abused, and to handle any flag side effects.
1727 if (get_user(flags, (int *) arg))
1730 if (cmd == LL_IOC_SETFLAGS) {
1731 if ((flags & LL_FILE_IGNORE_LOCK) &&
1732 !(file->f_flags & O_DIRECT)) {
1733 CERROR("%s: unable to disable locking on "
1734 "non-O_DIRECT file\n", current->comm);
1738 fd->fd_flags |= flags;
1740 fd->fd_flags &= ~flags;
1743 case LL_IOC_LOV_SETSTRIPE:
1744 RETURN(ll_lov_setstripe(inode, file, arg));
1745 case LL_IOC_LOV_SETEA:
1746 RETURN(ll_lov_setea(inode, file, arg));
1747 case LL_IOC_LOV_GETSTRIPE:
1748 RETURN(ll_lov_getstripe(inode, arg));
1749 case LL_IOC_RECREATE_OBJ:
1750 RETURN(ll_lov_recreate_obj(inode, file, arg));
1751 case FSFILT_IOC_FIEMAP:
1752 RETURN(ll_ioctl_fiemap(inode, arg));
1753 case FSFILT_IOC_GETFLAGS:
1754 case FSFILT_IOC_SETFLAGS:
1755 RETURN(ll_iocontrol(inode, file, cmd, arg));
1756 case FSFILT_IOC_GETVERSION_OLD:
1757 case FSFILT_IOC_GETVERSION:
1758 RETURN(put_user(inode->i_generation, (int *)arg));
1759 case LL_IOC_GROUP_LOCK:
1760 RETURN(ll_get_grouplock(inode, file, arg));
1761 case LL_IOC_GROUP_UNLOCK:
1762 RETURN(ll_put_grouplock(inode, file, arg));
1763 case IOC_OBD_STATFS:
1764 RETURN(ll_obd_statfs(inode, (void *)arg));
1766 /* We need to special case any other ioctls we want to handle,
1767 * to send them to the MDS/OST as appropriate and to properly
1768 * network encode the arg field.
1769 case FSFILT_IOC_SETVERSION_OLD:
1770 case FSFILT_IOC_SETVERSION:
1772 case LL_IOC_FLUSHCTX:
1773 RETURN(ll_flush_ctx(inode));
1774 case LL_IOC_PATH2FID: {
1775 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1776 sizeof(struct lu_fid)))
1781 case OBD_IOC_FID2PATH:
1782 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1784 case LL_IOC_GET_MDTIDX: {
1787 mdtidx = ll_get_mdt_idx(inode);
1791 if (put_user((int)mdtidx, (int*)arg))
1801 ll_iocontrol_call(inode, file, cmd, arg, &err))
1804 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1810 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1812 struct inode *inode = file->f_dentry->d_inode;
1815 retval = offset + ((origin == 2) ? i_size_read(inode) :
1816 (origin == 1) ? file->f_pos : 0);
1817 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1818 inode->i_ino, inode->i_generation, inode, retval, retval,
1819 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1820 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1822 if (origin == 2) { /* SEEK_END */
1823 int nonblock = 0, rc;
1825 if (file->f_flags & O_NONBLOCK)
1826 nonblock = LDLM_FL_BLOCK_NOWAIT;
1828 rc = cl_glimpse_size(inode);
1832 offset += i_size_read(inode);
1833 } else if (origin == 1) { /* SEEK_CUR */
1834 offset += file->f_pos;
1838 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1839 if (offset != file->f_pos) {
1840 file->f_pos = offset;
1848 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1850 struct inode *inode = dentry->d_inode;
1851 struct ll_inode_info *lli = ll_i2info(inode);
1852 struct lov_stripe_md *lsm = lli->lli_smd;
1853 struct ptlrpc_request *req;
1854 struct obd_capa *oc;
1857 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1858 inode->i_generation, inode);
1859 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1861 /* fsync's caller has already called _fdata{sync,write}, we want
1862 * that IO to finish before calling the osc and mdc sync methods */
1863 rc = filemap_fdatawait(inode->i_mapping);
1865 /* catch async errors that were recorded back when async writeback
1866 * failed for pages in this mapping. */
1867 err = lli->lli_async_rc;
1868 lli->lli_async_rc = 0;
1872 err = lov_test_and_clear_async_rc(lsm);
1877 oc = ll_mdscapa_get(inode);
1878 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1884 ptlrpc_req_finished(req);
1891 RETURN(rc ? rc : -ENOMEM);
1893 oa->o_id = lsm->lsm_object_id;
1894 oa->o_seq = lsm->lsm_object_seq;
1895 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1896 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid,
1897 OBD_MD_FLTYPE | OBD_MD_FLATIME |
1898 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1901 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1902 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1903 0, OBD_OBJECT_EOF, oc);
1913 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1915 struct inode *inode = file->f_dentry->d_inode;
1916 struct ll_sb_info *sbi = ll_i2sbi(inode);
1917 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1918 .ei_cb_cp =ldlm_flock_completion_ast,
1919 .ei_cbdata = file_lock };
1920 struct md_op_data *op_data;
1921 struct lustre_handle lockh = {0};
1922 ldlm_policy_data_t flock;
1927 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1928 inode->i_ino, file_lock);
1930 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1932 if (file_lock->fl_flags & FL_FLOCK) {
1933 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1934 /* set missing params for flock() calls */
1935 file_lock->fl_end = OFFSET_MAX;
1936 file_lock->fl_pid = current->tgid;
1938 flock.l_flock.pid = file_lock->fl_pid;
1939 flock.l_flock.start = file_lock->fl_start;
1940 flock.l_flock.end = file_lock->fl_end;
1942 switch (file_lock->fl_type) {
1944 einfo.ei_mode = LCK_PR;
1947 /* An unlock request may or may not have any relation to
1948 * existing locks so we may not be able to pass a lock handle
1949 * via a normal ldlm_lock_cancel() request. The request may even
1950 * unlock a byte range in the middle of an existing lock. In
1951 * order to process an unlock request we need all of the same
1952 * information that is given with a normal read or write record
1953 * lock request. To avoid creating another ldlm unlock (cancel)
1954 * message we'll treat a LCK_NL flock request as an unlock. */
1955 einfo.ei_mode = LCK_NL;
1958 einfo.ei_mode = LCK_PW;
1961 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1976 flags = LDLM_FL_BLOCK_NOWAIT;
1982 flags = LDLM_FL_TEST_LOCK;
1983 /* Save the old mode so that if the mode in the lock changes we
1984 * can decrement the appropriate reader or writer refcount. */
1985 file_lock->fl_type = einfo.ei_mode;
1988 CERROR("unknown fcntl lock command: %d\n", cmd);
1992 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1993 LUSTRE_OPC_ANY, NULL);
1994 if (IS_ERR(op_data))
1995 RETURN(PTR_ERR(op_data));
1997 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
1998 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
1999 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2001 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2002 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2004 ll_finish_md_op_data(op_data);
2006 if ((file_lock->fl_flags & FL_FLOCK) &&
2007 (rc == 0 || file_lock->fl_type == F_UNLCK))
2008 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2009 #ifdef HAVE_F_OP_FLOCK
2010 if ((file_lock->fl_flags & FL_POSIX) &&
2011 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2012 !(flags & LDLM_FL_TEST_LOCK))
2013 posix_lock_file_wait(file, file_lock);
2019 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2026 int ll_have_md_lock(struct inode *inode, __u64 bits)
2028 struct lustre_handle lockh;
2029 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2037 fid = &ll_i2info(inode)->lli_fid;
2038 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2040 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2041 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2042 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2048 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2049 struct lustre_handle *lockh)
2051 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2057 fid = &ll_i2info(inode)->lli_fid;
2058 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2060 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2061 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2062 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2066 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2067 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2068 * and return success */
2070 /* This path cannot be hit for regular files unless in
2071 * case of obscure races, so no need to to validate
2073 if (!S_ISREG(inode->i_mode) &&
2074 !S_ISDIR(inode->i_mode))
2079 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2087 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2090 struct inode *inode = dentry->d_inode;
2091 struct ptlrpc_request *req = NULL;
2092 struct ll_sb_info *sbi;
2093 struct obd_export *exp;
2098 CERROR("REPORT THIS LINE TO PETER\n");
2101 sbi = ll_i2sbi(inode);
2103 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2104 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2106 exp = ll_i2mdexp(inode);
2108 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2109 struct lookup_intent oit = { .it_op = IT_GETATTR };
2110 struct md_op_data *op_data;
2112 /* Call getattr by fid, so do not provide name at all. */
2113 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2114 dentry->d_inode, NULL, 0, 0,
2115 LUSTRE_OPC_ANY, NULL);
2116 if (IS_ERR(op_data))
2117 RETURN(PTR_ERR(op_data));
2119 oit.it_create_mode |= M_CHECK_STALE;
2120 rc = md_intent_lock(exp, op_data, NULL, 0,
2121 /* we are not interested in name
2124 ll_md_blocking_ast, 0);
2125 ll_finish_md_op_data(op_data);
2126 oit.it_create_mode &= ~M_CHECK_STALE;
2128 rc = ll_inode_revalidate_fini(inode, rc);
2132 rc = ll_revalidate_it_finish(req, &oit, dentry);
2134 ll_intent_release(&oit);
2138 /* Unlinked? Unhash dentry, so it is not picked up later by
2139 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2140 here to preserve get_cwd functionality on 2.6.
2142 if (!dentry->d_inode->i_nlink) {
2143 cfs_spin_lock(&ll_lookup_lock);
2144 spin_lock(&dcache_lock);
2145 ll_drop_dentry(dentry);
2146 spin_unlock(&dcache_lock);
2147 cfs_spin_unlock(&ll_lookup_lock);
2150 ll_lookup_finish_locks(&oit, dentry);
2151 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2152 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2153 obd_valid valid = OBD_MD_FLGETATTR;
2154 struct md_op_data *op_data;
2157 if (S_ISREG(inode->i_mode)) {
2158 rc = ll_get_max_mdsize(sbi, &ealen);
2161 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2164 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2165 0, ealen, LUSTRE_OPC_ANY,
2167 if (op_data == NULL)
2170 op_data->op_valid = valid;
2171 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2172 * capa for this inode. Because we only keep capas of dirs
2174 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2175 ll_finish_md_op_data(op_data);
2177 rc = ll_inode_revalidate_fini(inode, rc);
2181 rc = ll_prep_inode(&inode, req, NULL);
2184 ptlrpc_req_finished(req);
2188 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2190 struct inode *inode = dentry->d_inode;
2194 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2195 MDS_INODELOCK_LOOKUP);
2197 /* if object not yet allocated, don't validate size */
2198 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
2199 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2200 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2201 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2205 /* cl_glimpse_size will prefer locally cached writes if they extend
2209 rc = cl_glimpse_size(inode);
2214 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2215 struct lookup_intent *it, struct kstat *stat)
2217 struct inode *inode = de->d_inode;
2218 struct ll_inode_info *lli = ll_i2info(inode);
2221 res = ll_inode_revalidate_it(de, it);
2222 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2227 stat->dev = inode->i_sb->s_dev;
2228 if (cfs_curproc_is_32bit())
2229 stat->ino = cl_fid_build_ino32(&lli->lli_fid);
2231 stat->ino = inode->i_ino;
2233 stat->mode = inode->i_mode;
2234 stat->nlink = inode->i_nlink;
2235 stat->uid = inode->i_uid;
2236 stat->gid = inode->i_gid;
2237 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2238 stat->atime = inode->i_atime;
2239 stat->mtime = inode->i_mtime;
2240 stat->ctime = inode->i_ctime;
2241 #ifdef HAVE_INODE_BLKSIZE
2242 stat->blksize = inode->i_blksize;
2244 stat->blksize = 1 << inode->i_blkbits;
2247 stat->size = i_size_read(inode);
2248 stat->blocks = inode->i_blocks;
2252 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2254 struct lookup_intent it = { .it_op = IT_GETATTR };
2256 return ll_getattr_it(mnt, de, &it, stat);
2259 #ifdef HAVE_LINUX_FIEMAP_H
2260 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2261 __u64 start, __u64 len)
2265 struct ll_user_fiemap *fiemap;
2266 unsigned int extent_count = fieinfo->fi_extents_max;
2268 num_bytes = sizeof(*fiemap) + (extent_count *
2269 sizeof(struct ll_fiemap_extent));
2270 OBD_VMALLOC(fiemap, num_bytes);
2275 fiemap->fm_flags = fieinfo->fi_flags;
2276 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2277 fiemap->fm_start = start;
2278 fiemap->fm_length = len;
2279 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2280 sizeof(struct ll_fiemap_extent));
2282 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2284 fieinfo->fi_flags = fiemap->fm_flags;
2285 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2286 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2287 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2289 OBD_VFREE(fiemap, num_bytes);
2296 int lustre_check_acl(struct inode *inode, int mask)
2298 #ifdef CONFIG_FS_POSIX_ACL
2299 struct ll_inode_info *lli = ll_i2info(inode);
2300 struct posix_acl *acl;
2304 cfs_spin_lock(&lli->lli_lock);
2305 acl = posix_acl_dup(lli->lli_posix_acl);
2306 cfs_spin_unlock(&lli->lli_lock);
2311 rc = posix_acl_permission(inode, acl, mask);
2312 posix_acl_release(acl);
2320 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2321 #ifndef HAVE_INODE_PERMISION_2ARGS
2322 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2324 int ll_inode_permission(struct inode *inode, int mask)
2330 /* as root inode are NOT getting validated in lookup operation,
2331 * need to do it before permission check. */
2333 if (inode == inode->i_sb->s_root->d_inode) {
2334 struct lookup_intent it = { .it_op = IT_LOOKUP };
2336 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2337 MDS_INODELOCK_LOOKUP);
2342 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2343 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2345 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2346 return lustre_check_remote_perm(inode, mask);
2348 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2349 rc = generic_permission(inode, mask, lustre_check_acl);
2354 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2356 int mode = inode->i_mode;
2359 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2360 inode->i_ino, inode->i_generation, inode, mask);
2362 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2363 return lustre_check_remote_perm(inode, mask);
2365 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2367 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2368 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2370 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2372 if (current->fsuid == inode->i_uid) {
2375 if (((mode >> 3) & mask & S_IRWXO) != mask)
2377 rc = lustre_check_acl(inode, mask);
2381 goto check_capabilities;
2385 if (cfs_curproc_is_in_groups(inode->i_gid))
2388 if ((mode & mask & S_IRWXO) == mask)
2392 if (!(mask & MAY_EXEC) ||
2393 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2394 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2397 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2398 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2405 #ifdef HAVE_FILE_READV
2406 #define READ_METHOD readv
2407 #define READ_FUNCTION ll_file_readv
2408 #define WRITE_METHOD writev
2409 #define WRITE_FUNCTION ll_file_writev
2411 #define READ_METHOD aio_read
2412 #define READ_FUNCTION ll_file_aio_read
2413 #define WRITE_METHOD aio_write
2414 #define WRITE_FUNCTION ll_file_aio_write
2417 /* -o localflock - only provides locally consistent flock locks */
2418 struct file_operations ll_file_operations = {
2419 .read = ll_file_read,
2420 .READ_METHOD = READ_FUNCTION,
2421 .write = ll_file_write,
2422 .WRITE_METHOD = WRITE_FUNCTION,
2423 .ioctl = ll_file_ioctl,
2424 .open = ll_file_open,
2425 .release = ll_file_release,
2426 .mmap = ll_file_mmap,
2427 .llseek = ll_file_seek,
2428 #ifdef HAVE_KERNEL_SENDFILE
2429 .sendfile = ll_file_sendfile,
2431 #ifdef HAVE_KERNEL_SPLICE_READ
2432 .splice_read = ll_file_splice_read,
2437 struct file_operations ll_file_operations_flock = {
2438 .read = ll_file_read,
2439 .READ_METHOD = READ_FUNCTION,
2440 .write = ll_file_write,
2441 .WRITE_METHOD = WRITE_FUNCTION,
2442 .ioctl = ll_file_ioctl,
2443 .open = ll_file_open,
2444 .release = ll_file_release,
2445 .mmap = ll_file_mmap,
2446 .llseek = ll_file_seek,
2447 #ifdef HAVE_KERNEL_SENDFILE
2448 .sendfile = ll_file_sendfile,
2450 #ifdef HAVE_KERNEL_SPLICE_READ
2451 .splice_read = ll_file_splice_read,
2454 #ifdef HAVE_F_OP_FLOCK
2455 .flock = ll_file_flock,
2457 .lock = ll_file_flock
2460 /* These are for -o noflock - to return ENOSYS on flock calls */
2461 struct file_operations ll_file_operations_noflock = {
2462 .read = ll_file_read,
2463 .READ_METHOD = READ_FUNCTION,
2464 .write = ll_file_write,
2465 .WRITE_METHOD = WRITE_FUNCTION,
2466 .ioctl = ll_file_ioctl,
2467 .open = ll_file_open,
2468 .release = ll_file_release,
2469 .mmap = ll_file_mmap,
2470 .llseek = ll_file_seek,
2471 #ifdef HAVE_KERNEL_SENDFILE
2472 .sendfile = ll_file_sendfile,
2474 #ifdef HAVE_KERNEL_SPLICE_READ
2475 .splice_read = ll_file_splice_read,
2478 #ifdef HAVE_F_OP_FLOCK
2479 .flock = ll_file_noflock,
2481 .lock = ll_file_noflock
2484 struct inode_operations ll_file_inode_operations = {
2485 #ifdef HAVE_VFS_INTENT_PATCHES
2486 .setattr_raw = ll_setattr_raw,
2488 .setattr = ll_setattr,
2489 .truncate = ll_truncate,
2490 .getattr = ll_getattr,
2491 .permission = ll_inode_permission,
2492 .setxattr = ll_setxattr,
2493 .getxattr = ll_getxattr,
2494 .listxattr = ll_listxattr,
2495 .removexattr = ll_removexattr,
2496 #ifdef HAVE_LINUX_FIEMAP_H
2497 .fiemap = ll_fiemap,
2501 /* dynamic ioctl number support routins */
2502 static struct llioc_ctl_data {
2503 cfs_rw_semaphore_t ioc_sem;
2504 cfs_list_t ioc_head;
2506 __RWSEM_INITIALIZER(llioc.ioc_sem),
2507 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2512 cfs_list_t iocd_list;
2513 unsigned int iocd_size;
2514 llioc_callback_t iocd_cb;
2515 unsigned int iocd_count;
2516 unsigned int iocd_cmd[0];
2519 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2522 struct llioc_data *in_data = NULL;
2525 if (cb == NULL || cmd == NULL ||
2526 count > LLIOC_MAX_CMD || count < 0)
2529 size = sizeof(*in_data) + count * sizeof(unsigned int);
2530 OBD_ALLOC(in_data, size);
2531 if (in_data == NULL)
2534 memset(in_data, 0, sizeof(*in_data));
2535 in_data->iocd_size = size;
2536 in_data->iocd_cb = cb;
2537 in_data->iocd_count = count;
2538 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2540 cfs_down_write(&llioc.ioc_sem);
2541 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2542 cfs_up_write(&llioc.ioc_sem);
2547 void ll_iocontrol_unregister(void *magic)
2549 struct llioc_data *tmp;
2554 cfs_down_write(&llioc.ioc_sem);
2555 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2557 unsigned int size = tmp->iocd_size;
2559 cfs_list_del(&tmp->iocd_list);
2560 cfs_up_write(&llioc.ioc_sem);
2562 OBD_FREE(tmp, size);
2566 cfs_up_write(&llioc.ioc_sem);
2568 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2571 EXPORT_SYMBOL(ll_iocontrol_register);
2572 EXPORT_SYMBOL(ll_iocontrol_unregister);
2574 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2575 unsigned int cmd, unsigned long arg, int *rcp)
2577 enum llioc_iter ret = LLIOC_CONT;
2578 struct llioc_data *data;
2579 int rc = -EINVAL, i;
2581 cfs_down_read(&llioc.ioc_sem);
2582 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2583 for (i = 0; i < data->iocd_count; i++) {
2584 if (cmd != data->iocd_cmd[i])
2587 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2591 if (ret == LLIOC_STOP)
2594 cfs_up_read(&llioc.ioc_sem);