1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
87 * Closes the IO epoch and packs all the attributes into @op_data for
90 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
91 struct obd_client_handle *och)
95 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
96 ATTR_MTIME_SET | ATTR_CTIME_SET;
98 if (!(och->och_flags & FMODE_WRITE))
101 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
102 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
104 ll_ioepoch_close(inode, op_data, &och, 0);
107 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
108 ll_prep_md_op_data(op_data, inode, NULL, NULL,
109 0, 0, LUSTRE_OPC_ANY, NULL);
113 static int ll_close_inode_openhandle(struct obd_export *md_exp,
115 struct obd_client_handle *och)
117 struct obd_export *exp = ll_i2mdexp(inode);
118 struct md_op_data *op_data;
119 struct ptlrpc_request *req = NULL;
120 struct obd_device *obd = class_exp2obd(exp);
127 * XXX: in case of LMV, is this correct to access
130 CERROR("Invalid MDC connection handle "LPX64"\n",
131 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_som_update(inode, op_data);
149 CERROR("inode %lu mdc Size-on-MDS update failed: "
150 "rc = %d\n", inode->i_ino, rc);
154 CERROR("inode %lu mdc close failed: rc = %d\n",
157 ll_finish_md_op_data(op_data);
160 rc = ll_objects_destroy(req, inode);
162 CERROR("inode %lu ll_objects destroy: rc = %d\n",
169 if (exp_connect_som(exp) && !epoch_close &&
170 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
171 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
173 md_clear_open_replay_data(md_exp, och);
174 /* Free @och if it is not waiting for DONE_WRITING. */
175 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
178 if (req) /* This is close request */
179 ptlrpc_req_finished(req);
183 int ll_md_real_close(struct inode *inode, int flags)
185 struct ll_inode_info *lli = ll_i2info(inode);
186 struct obd_client_handle **och_p;
187 struct obd_client_handle *och;
192 if (flags & FMODE_WRITE) {
193 och_p = &lli->lli_mds_write_och;
194 och_usecount = &lli->lli_open_fd_write_count;
195 } else if (flags & FMODE_EXEC) {
196 och_p = &lli->lli_mds_exec_och;
197 och_usecount = &lli->lli_open_fd_exec_count;
199 LASSERT(flags & FMODE_READ);
200 och_p = &lli->lli_mds_read_och;
201 och_usecount = &lli->lli_open_fd_read_count;
204 cfs_down(&lli->lli_och_sem);
205 if (*och_usecount) { /* There are still users of this handle, so
207 cfs_up(&lli->lli_och_sem);
212 cfs_up(&lli->lli_och_sem);
214 if (och) { /* There might be a race and somebody have freed this och
216 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
223 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
226 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
227 struct ll_inode_info *lli = ll_i2info(inode);
231 /* clear group lock, if present */
232 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
233 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
235 /* Let's see if we have good enough OPEN lock on the file and if
236 we can skip talking to MDS */
237 if (file->f_dentry->d_inode) { /* Can this ever be false? */
239 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
240 struct lustre_handle lockh;
241 struct inode *inode = file->f_dentry->d_inode;
242 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
244 cfs_down(&lli->lli_och_sem);
245 if (fd->fd_omode & FMODE_WRITE) {
247 LASSERT(lli->lli_open_fd_write_count);
248 lli->lli_open_fd_write_count--;
249 } else if (fd->fd_omode & FMODE_EXEC) {
251 LASSERT(lli->lli_open_fd_exec_count);
252 lli->lli_open_fd_exec_count--;
255 LASSERT(lli->lli_open_fd_read_count);
256 lli->lli_open_fd_read_count--;
258 cfs_up(&lli->lli_och_sem);
260 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
261 LDLM_IBITS, &policy, lockmode,
263 rc = ll_md_real_close(file->f_dentry->d_inode,
267 CERROR("Releasing a file %p with negative dentry %p. Name %s",
268 file, file->f_dentry, file->f_dentry->d_name.name);
271 LUSTRE_FPRIVATE(file) = NULL;
272 ll_file_data_put(fd);
273 ll_capa_close(inode);
278 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
280 /* While this returns an error code, fput() the caller does not, so we need
281 * to make every effort to clean up all of our state here. Also, applications
282 * rarely check close errors and even if an error is returned they will not
283 * re-try the close call.
285 int ll_file_release(struct inode *inode, struct file *file)
287 struct ll_file_data *fd;
288 struct ll_sb_info *sbi = ll_i2sbi(inode);
289 struct ll_inode_info *lli = ll_i2info(inode);
290 struct lov_stripe_md *lsm = lli->lli_smd;
294 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
295 inode->i_generation, inode);
297 #ifdef CONFIG_FS_POSIX_ACL
298 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
299 inode == inode->i_sb->s_root->d_inode) {
300 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
303 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
304 fd->fd_flags &= ~LL_FILE_RMTACL;
305 rct_del(&sbi->ll_rct, cfs_curproc_pid());
306 et_search_free(&sbi->ll_et, cfs_curproc_pid());
311 if (inode->i_sb->s_root != file->f_dentry)
312 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
313 fd = LUSTRE_FPRIVATE(file);
316 /* The last ref on @file, maybe not the the owner pid of statahead.
317 * Different processes can open the same dir, "ll_opendir_key" means:
318 * it is me that should stop the statahead thread. */
319 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
320 ll_stop_statahead(inode, lli->lli_opendir_key);
322 if (inode->i_sb->s_root == file->f_dentry) {
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
329 lov_test_and_clear_async_rc(lsm);
330 lli->lli_async_rc = 0;
332 rc = ll_md_close(sbi->ll_md_exp, inode, file);
334 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
335 libcfs_debug_dumplog();
340 static int ll_intent_file_open(struct file *file, void *lmm,
341 int lmmsize, struct lookup_intent *itp)
343 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
344 struct dentry *parent = file->f_dentry->d_parent;
345 const char *name = file->f_dentry->d_name.name;
346 const int len = file->f_dentry->d_name.len;
347 struct md_op_data *op_data;
348 struct ptlrpc_request *req;
355 /* Usually we come here only for NFSD, and we want open lock.
356 But we can also get here with pre 2.6.15 patchless kernels, and in
357 that case that lock is also ok */
358 /* We can also get here if there was cached open handle in revalidate_it
359 * but it disappeared while we were getting from there to ll_file_open.
360 * But this means this file was closed and immediatelly opened which
361 * makes a good candidate for using OPEN lock */
362 /* If lmmsize & lmm are not 0, we are just setting stripe info
363 * parameters. No need for the open lock */
364 if (!lmm && !lmmsize)
365 itp->it_flags |= MDS_OPEN_LOCK;
367 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
368 file->f_dentry->d_inode, name, len,
369 O_RDWR, LUSTRE_OPC_ANY, NULL);
371 RETURN(PTR_ERR(op_data));
373 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
374 0 /*unused */, &req, ll_md_blocking_ast, 0);
375 ll_finish_md_op_data(op_data);
377 /* reason for keep own exit path - don`t flood log
378 * with messages with -ESTALE errors.
380 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
381 it_open_error(DISP_OPEN_OPEN, itp))
383 ll_release_openhandle(file->f_dentry, itp);
387 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
388 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
389 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
393 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
394 if (!rc && itp->d.lustre.it_lock_mode)
395 md_set_lock_data(sbi->ll_md_exp,
396 &itp->d.lustre.it_lock_handle,
397 file->f_dentry->d_inode, NULL);
400 ptlrpc_req_finished(itp->d.lustre.it_data);
401 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
402 ll_intent_drop_lock(itp);
408 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
409 * not believe attributes if a few ioepoch holders exist. Attributes for
410 * previous ioepoch if new one is opened are also skipped by MDS.
412 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
414 if (ioepoch && lli->lli_ioepoch != ioepoch) {
415 lli->lli_ioepoch = ioepoch;
416 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
417 ioepoch, PFID(&lli->lli_fid));
421 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
422 struct lookup_intent *it, struct obd_client_handle *och)
424 struct ptlrpc_request *req = it->d.lustre.it_data;
425 struct mdt_body *body;
429 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
430 LASSERT(body != NULL); /* reply already checked out */
432 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
433 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
434 och->och_fid = lli->lli_fid;
435 och->och_flags = it->it_flags;
436 ll_ioepoch_open(lli, body->ioepoch);
438 return md_set_open_replay_data(md_exp, och, req);
441 int ll_local_open(struct file *file, struct lookup_intent *it,
442 struct ll_file_data *fd, struct obd_client_handle *och)
444 struct inode *inode = file->f_dentry->d_inode;
445 struct ll_inode_info *lli = ll_i2info(inode);
448 LASSERT(!LUSTRE_FPRIVATE(file));
453 struct ptlrpc_request *req = it->d.lustre.it_data;
454 struct mdt_body *body;
457 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
461 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
462 if ((it->it_flags & FMODE_WRITE) &&
463 (body->valid & OBD_MD_FLSIZE))
464 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
465 lli->lli_ioepoch, PFID(&lli->lli_fid));
468 LUSTRE_FPRIVATE(file) = fd;
469 ll_readahead_init(inode, &fd->fd_ras);
470 fd->fd_omode = it->it_flags;
474 /* Open a file, and (for the very first open) create objects on the OSTs at
475 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
476 * creation or open until ll_lov_setstripe() ioctl is called. We grab
477 * lli_open_sem to ensure no other process will create objects, send the
478 * stripe MD to the MDS, or try to destroy the objects if that fails.
480 * If we already have the stripe MD locally then we don't request it in
481 * md_open(), by passing a lmm_size = 0.
483 * It is up to the application to ensure no other processes open this file
484 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
485 * used. We might be able to avoid races of that sort by getting lli_open_sem
486 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
487 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
489 int ll_file_open(struct inode *inode, struct file *file)
491 struct ll_inode_info *lli = ll_i2info(inode);
492 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
493 .it_flags = file->f_flags };
494 struct lov_stripe_md *lsm;
495 struct ptlrpc_request *req = NULL;
496 struct obd_client_handle **och_p;
498 struct ll_file_data *fd;
499 int rc = 0, opendir_set = 0;
502 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
503 inode->i_generation, inode, file->f_flags);
505 #ifdef HAVE_VFS_INTENT_PATCHES
508 it = file->private_data; /* XXX: compat macro */
509 file->private_data = NULL; /* prevent ll_local_open assertion */
512 fd = ll_file_data_get();
517 if (S_ISDIR(inode->i_mode)) {
518 cfs_spin_lock(&lli->lli_sa_lock);
519 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
520 LASSERT(lli->lli_sai == NULL);
521 lli->lli_opendir_key = fd;
522 lli->lli_opendir_pid = cfs_curproc_pid();
525 cfs_spin_unlock(&lli->lli_sa_lock);
528 if (inode->i_sb->s_root == file->f_dentry) {
529 LUSTRE_FPRIVATE(file) = fd;
533 if (!it || !it->d.lustre.it_disposition) {
534 /* Convert f_flags into access mode. We cannot use file->f_mode,
535 * because everything but O_ACCMODE mask was stripped from
537 if ((oit.it_flags + 1) & O_ACCMODE)
539 if (file->f_flags & O_TRUNC)
540 oit.it_flags |= FMODE_WRITE;
542 /* kernel only call f_op->open in dentry_open. filp_open calls
543 * dentry_open after call to open_namei that checks permissions.
544 * Only nfsd_open call dentry_open directly without checking
545 * permissions and because of that this code below is safe. */
546 if (oit.it_flags & FMODE_WRITE)
547 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
549 /* We do not want O_EXCL here, presumably we opened the file
550 * already? XXX - NFS implications? */
551 oit.it_flags &= ~O_EXCL;
553 /* bug20584, if "it_flags" contains O_CREAT, the file will be
554 * created if necessary, then "IT_CREAT" should be set to keep
555 * consistent with it */
556 if (oit.it_flags & O_CREAT)
557 oit.it_op |= IT_CREAT;
563 /* Let's see if we have file open on MDS already. */
564 if (it->it_flags & FMODE_WRITE) {
565 och_p = &lli->lli_mds_write_och;
566 och_usecount = &lli->lli_open_fd_write_count;
567 } else if (it->it_flags & FMODE_EXEC) {
568 och_p = &lli->lli_mds_exec_och;
569 och_usecount = &lli->lli_open_fd_exec_count;
571 och_p = &lli->lli_mds_read_och;
572 och_usecount = &lli->lli_open_fd_read_count;
575 cfs_down(&lli->lli_och_sem);
576 if (*och_p) { /* Open handle is present */
577 if (it_disposition(it, DISP_OPEN_OPEN)) {
578 /* Well, there's extra open request that we do not need,
579 let's close it somehow. This will decref request. */
580 rc = it_open_error(DISP_OPEN_OPEN, it);
582 cfs_up(&lli->lli_och_sem);
583 ll_file_data_put(fd);
584 GOTO(out_openerr, rc);
586 ll_release_openhandle(file->f_dentry, it);
587 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
592 rc = ll_local_open(file, it, fd, NULL);
595 cfs_up(&lli->lli_och_sem);
596 ll_file_data_put(fd);
597 GOTO(out_openerr, rc);
600 LASSERT(*och_usecount == 0);
601 if (!it->d.lustre.it_disposition) {
602 /* We cannot just request lock handle now, new ELC code
603 means that one of other OPEN locks for this file
604 could be cancelled, and since blocking ast handler
605 would attempt to grab och_sem as well, that would
606 result in a deadlock */
607 cfs_up(&lli->lli_och_sem);
608 it->it_create_mode |= M_CHECK_STALE;
609 rc = ll_intent_file_open(file, NULL, 0, it);
610 it->it_create_mode &= ~M_CHECK_STALE;
612 ll_file_data_put(fd);
613 GOTO(out_openerr, rc);
616 /* Got some error? Release the request */
617 if (it->d.lustre.it_status < 0) {
618 req = it->d.lustre.it_data;
619 ptlrpc_req_finished(req);
623 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
625 ll_file_data_put(fd);
626 GOTO(out_och_free, rc = -ENOMEM);
629 req = it->d.lustre.it_data;
631 /* md_intent_lock() didn't get a request ref if there was an
632 * open error, so don't do cleanup on the request here
634 /* XXX (green): Should not we bail out on any error here, not
635 * just open error? */
636 rc = it_open_error(DISP_OPEN_OPEN, it);
638 ll_file_data_put(fd);
639 GOTO(out_och_free, rc);
642 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
643 rc = ll_local_open(file, it, fd, *och_p);
645 ll_file_data_put(fd);
646 GOTO(out_och_free, rc);
649 cfs_up(&lli->lli_och_sem);
651 /* Must do this outside lli_och_sem lock to prevent deadlock where
652 different kind of OPEN lock for this same inode gets cancelled
653 by ldlm_cancel_lru */
654 if (!S_ISREG(inode->i_mode))
661 if (file->f_flags & O_LOV_DELAY_CREATE ||
662 !(file->f_mode & FMODE_WRITE)) {
663 CDEBUG(D_INODE, "object creation was delayed\n");
667 file->f_flags &= ~O_LOV_DELAY_CREATE;
670 ptlrpc_req_finished(req);
672 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
676 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
677 *och_p = NULL; /* OBD_FREE writes some magic there */
680 cfs_up(&lli->lli_och_sem);
682 if (opendir_set != 0)
683 ll_stop_statahead(inode, lli->lli_opendir_key);
689 /* Fills the obdo with the attributes for the lsm */
690 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
691 struct obd_capa *capa, struct obdo *obdo,
692 __u64 ioepoch, int sync)
694 struct ptlrpc_request_set *set;
695 struct obd_info oinfo = { { { 0 } } };
700 LASSERT(lsm != NULL);
704 oinfo.oi_oa->o_id = lsm->lsm_object_id;
705 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
706 oinfo.oi_oa->o_mode = S_IFREG;
707 oinfo.oi_oa->o_ioepoch = ioepoch;
708 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
709 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
710 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
711 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
712 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
713 oinfo.oi_capa = capa;
715 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
716 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
719 set = ptlrpc_prep_set();
721 CERROR("can't allocate ptlrpc set\n");
724 rc = obd_getattr_async(exp, &oinfo, set);
726 rc = ptlrpc_set_wait(set);
727 ptlrpc_set_destroy(set);
730 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
731 OBD_MD_FLATIME | OBD_MD_FLMTIME |
732 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
737 * Performs the getattr on the inode and updates its fields.
738 * If @sync != 0, perform the getattr under the server-side lock.
740 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
741 __u64 ioepoch, int sync)
743 struct ll_inode_info *lli = ll_i2info(inode);
744 struct obd_capa *capa = ll_mdscapa_get(inode);
748 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
749 capa, obdo, ioepoch, sync);
752 obdo_refresh_inode(inode, obdo, obdo->o_valid);
754 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
755 lli->lli_smd->lsm_object_id, i_size_read(inode),
756 (unsigned long long)inode->i_blocks,
757 (unsigned long)ll_inode_blksize(inode));
762 int ll_merge_lvb(struct inode *inode)
764 struct ll_inode_info *lli = ll_i2info(inode);
765 struct ll_sb_info *sbi = ll_i2sbi(inode);
771 ll_inode_size_lock(inode, 1);
772 inode_init_lvb(inode, &lvb);
774 /* merge timestamps the most resently obtained from mds with
775 timestamps obtained from osts */
776 lvb.lvb_atime = lli->lli_lvb.lvb_atime;
777 lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
778 lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
779 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
780 cl_isize_write_nolock(inode, lvb.lvb_size);
782 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
783 PFID(&lli->lli_fid), lvb.lvb_size);
784 inode->i_blocks = lvb.lvb_blocks;
786 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
787 LTIME_S(inode->i_atime) = lvb.lvb_atime;
788 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
789 ll_inode_size_unlock(inode, 1);
794 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
797 struct obdo obdo = { 0 };
800 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
802 st->st_size = obdo.o_size;
803 st->st_blocks = obdo.o_blocks;
804 st->st_mtime = obdo.o_mtime;
805 st->st_atime = obdo.o_atime;
806 st->st_ctime = obdo.o_ctime;
811 void ll_io_init(struct cl_io *io, const struct file *file, int write)
813 struct inode *inode = file->f_dentry->d_inode;
815 memset(io, 0, sizeof *io);
816 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
818 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
819 io->ci_obj = ll_i2info(inode)->lli_clob;
820 io->ci_lockreq = CILR_MAYBE;
821 if (ll_file_nolock(file)) {
822 io->ci_lockreq = CILR_NEVER;
823 io->ci_no_srvlock = 1;
824 } else if (file->f_flags & O_APPEND) {
825 io->ci_lockreq = CILR_MANDATORY;
829 static ssize_t ll_file_io_generic(const struct lu_env *env,
830 struct vvp_io_args *args, struct file *file,
831 enum cl_io_type iot, loff_t *ppos, size_t count)
837 io = &ccc_env_info(env)->cti_io;
838 ll_io_init(io, file, iot == CIT_WRITE);
840 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
841 struct vvp_io *vio = vvp_env_io(env);
842 struct ccc_io *cio = ccc_env_io(env);
843 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
844 int write_sem_locked = 0;
846 cio->cui_fd = LUSTRE_FPRIVATE(file);
847 vio->cui_io_subtype = args->via_io_subtype;
849 switch (vio->cui_io_subtype) {
851 cio->cui_iov = args->u.normal.via_iov;
852 cio->cui_nrsegs = args->u.normal.via_nrsegs;
853 cio->cui_tot_nrsegs = cio->cui_nrsegs;
854 #ifndef HAVE_FILE_WRITEV
855 cio->cui_iocb = args->u.normal.via_iocb;
857 if ((iot == CIT_WRITE) &&
858 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
859 if(cfs_down_interruptible(&lli->lli_write_sem))
860 GOTO(out, result = -ERESTARTSYS);
861 write_sem_locked = 1;
862 } else if (iot == CIT_READ) {
863 cfs_down_read(&lli->lli_trunc_sem);
867 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
868 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
871 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
872 vio->u.splice.cui_flags = args->u.splice.via_flags;
875 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
878 result = cl_io_loop(env, io);
879 if (write_sem_locked)
880 cfs_up(&lli->lli_write_sem);
881 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
882 cfs_up_read(&lli->lli_trunc_sem);
884 /* cl_io_rw_init() handled IO */
885 result = io->ci_result;
888 if (io->ci_nob > 0) {
890 *ppos = io->u.ci_wr.wr.crw_pos;
900 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
902 static int ll_file_get_iov_count(const struct iovec *iov,
903 unsigned long *nr_segs, size_t *count)
908 for (seg = 0; seg < *nr_segs; seg++) {
909 const struct iovec *iv = &iov[seg];
912 * If any segment has a negative length, or the cumulative
913 * length ever wraps negative then return -EINVAL.
916 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
918 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
923 cnt -= iv->iov_len; /* This segment is no good */
930 #ifdef HAVE_FILE_READV
931 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
932 unsigned long nr_segs, loff_t *ppos)
935 struct vvp_io_args *args;
941 result = ll_file_get_iov_count(iov, &nr_segs, &count);
945 env = cl_env_get(&refcheck);
947 RETURN(PTR_ERR(env));
949 args = vvp_env_args(env, IO_NORMAL);
950 args->u.normal.via_iov = (struct iovec *)iov;
951 args->u.normal.via_nrsegs = nr_segs;
953 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
954 cl_env_put(env, &refcheck);
958 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
962 struct iovec *local_iov;
967 env = cl_env_get(&refcheck);
969 RETURN(PTR_ERR(env));
971 local_iov = &vvp_env_info(env)->vti_local_iov;
972 local_iov->iov_base = (void __user *)buf;
973 local_iov->iov_len = count;
974 result = ll_file_readv(file, local_iov, 1, ppos);
975 cl_env_put(env, &refcheck);
980 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
981 unsigned long nr_segs, loff_t pos)
984 struct vvp_io_args *args;
990 result = ll_file_get_iov_count(iov, &nr_segs, &count);
994 env = cl_env_get(&refcheck);
996 RETURN(PTR_ERR(env));
998 args = vvp_env_args(env, IO_NORMAL);
999 args->u.normal.via_iov = (struct iovec *)iov;
1000 args->u.normal.via_nrsegs = nr_segs;
1001 args->u.normal.via_iocb = iocb;
1003 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1004 &iocb->ki_pos, count);
1005 cl_env_put(env, &refcheck);
1009 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1013 struct iovec *local_iov;
1014 struct kiocb *kiocb;
1019 env = cl_env_get(&refcheck);
1021 RETURN(PTR_ERR(env));
1023 local_iov = &vvp_env_info(env)->vti_local_iov;
1024 kiocb = &vvp_env_info(env)->vti_kiocb;
1025 local_iov->iov_base = (void __user *)buf;
1026 local_iov->iov_len = count;
1027 init_sync_kiocb(kiocb, file);
1028 kiocb->ki_pos = *ppos;
1029 kiocb->ki_left = count;
1031 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1032 *ppos = kiocb->ki_pos;
1034 cl_env_put(env, &refcheck);
1040 * Write to a file (through the page cache).
1042 #ifdef HAVE_FILE_WRITEV
1043 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1044 unsigned long nr_segs, loff_t *ppos)
1047 struct vvp_io_args *args;
1053 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1057 env = cl_env_get(&refcheck);
1059 RETURN(PTR_ERR(env));
1061 args = vvp_env_args(env, IO_NORMAL);
1062 args->u.normal.via_iov = (struct iovec *)iov;
1063 args->u.normal.via_nrsegs = nr_segs;
1065 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1066 cl_env_put(env, &refcheck);
1070 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1074 struct iovec *local_iov;
1079 env = cl_env_get(&refcheck);
1081 RETURN(PTR_ERR(env));
1083 local_iov = &vvp_env_info(env)->vti_local_iov;
1084 local_iov->iov_base = (void __user *)buf;
1085 local_iov->iov_len = count;
1087 result = ll_file_writev(file, local_iov, 1, ppos);
1088 cl_env_put(env, &refcheck);
1092 #else /* AIO stuff */
1093 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1094 unsigned long nr_segs, loff_t pos)
1097 struct vvp_io_args *args;
1103 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1107 env = cl_env_get(&refcheck);
1109 RETURN(PTR_ERR(env));
1111 args = vvp_env_args(env, IO_NORMAL);
1112 args->u.normal.via_iov = (struct iovec *)iov;
1113 args->u.normal.via_nrsegs = nr_segs;
1114 args->u.normal.via_iocb = iocb;
1116 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1117 &iocb->ki_pos, count);
1118 cl_env_put(env, &refcheck);
1122 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1126 struct iovec *local_iov;
1127 struct kiocb *kiocb;
1132 env = cl_env_get(&refcheck);
1134 RETURN(PTR_ERR(env));
1136 local_iov = &vvp_env_info(env)->vti_local_iov;
1137 kiocb = &vvp_env_info(env)->vti_kiocb;
1138 local_iov->iov_base = (void __user *)buf;
1139 local_iov->iov_len = count;
1140 init_sync_kiocb(kiocb, file);
1141 kiocb->ki_pos = *ppos;
1142 kiocb->ki_left = count;
1144 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1145 *ppos = kiocb->ki_pos;
1147 cl_env_put(env, &refcheck);
1153 #ifdef HAVE_KERNEL_SENDFILE
1155 * Send file content (through pagecache) somewhere with helper
1157 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1158 read_actor_t actor, void *target)
1161 struct vvp_io_args *args;
1166 env = cl_env_get(&refcheck);
1168 RETURN(PTR_ERR(env));
1170 args = vvp_env_args(env, IO_SENDFILE);
1171 args->u.sendfile.via_target = target;
1172 args->u.sendfile.via_actor = actor;
1174 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1175 cl_env_put(env, &refcheck);
1180 #ifdef HAVE_KERNEL_SPLICE_READ
1182 * Send file content (through pagecache) somewhere with helper
1184 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1185 struct pipe_inode_info *pipe, size_t count,
1189 struct vvp_io_args *args;
1194 env = cl_env_get(&refcheck);
1196 RETURN(PTR_ERR(env));
1198 args = vvp_env_args(env, IO_SPLICE);
1199 args->u.splice.via_pipe = pipe;
1200 args->u.splice.via_flags = flags;
1202 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1203 cl_env_put(env, &refcheck);
1208 static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
1211 struct obd_export *exp = ll_i2dtexp(inode);
1212 struct obd_trans_info oti = { 0 };
1213 struct obdo *oa = NULL;
1216 struct lov_stripe_md *lsm, *lsm2;
1223 ll_inode_size_lock(inode, 0);
1224 lsm = ll_i2info(inode)->lli_smd;
1226 GOTO(out, rc = -ENOENT);
1227 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1228 (lsm->lsm_stripe_count));
1230 OBD_ALLOC(lsm2, lsm_size);
1232 GOTO(out, rc = -ENOMEM);
1236 oa->o_nlink = ost_idx;
1237 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1238 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1239 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
1240 OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1241 memcpy(lsm2, lsm, lsm_size);
1242 rc = obd_create(exp, oa, &lsm2, &oti);
1244 OBD_FREE(lsm2, lsm_size);
1247 ll_inode_size_unlock(inode, 0);
1252 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1254 struct ll_recreate_obj ucreat;
1257 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1260 if (cfs_copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1261 sizeof(struct ll_recreate_obj)))
1264 RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
1265 ucreat.lrc_ost_idx));
1268 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1275 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1278 if (cfs_copy_from_user(&fid, (struct lu_fid *)arg,
1279 sizeof(struct lu_fid)))
1282 id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
1283 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1284 RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
1287 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1288 int flags, struct lov_user_md *lum, int lum_size)
1290 struct lov_stripe_md *lsm;
1291 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1295 ll_inode_size_lock(inode, 0);
1296 lsm = ll_i2info(inode)->lli_smd;
1298 ll_inode_size_unlock(inode, 0);
1299 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1304 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1307 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1308 GOTO(out_req_free, rc = -ENOENT);
1309 rc = oit.d.lustre.it_status;
1311 GOTO(out_req_free, rc);
1313 ll_release_openhandle(file->f_dentry, &oit);
1316 ll_inode_size_unlock(inode, 0);
1317 ll_intent_release(&oit);
1320 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1324 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1325 struct lov_mds_md **lmmp, int *lmm_size,
1326 struct ptlrpc_request **request)
1328 struct ll_sb_info *sbi = ll_i2sbi(inode);
1329 struct mdt_body *body;
1330 struct lov_mds_md *lmm = NULL;
1331 struct ptlrpc_request *req = NULL;
1332 struct md_op_data *op_data;
1335 rc = ll_get_max_mdsize(sbi, &lmmsize);
1339 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1340 strlen(filename), lmmsize,
1341 LUSTRE_OPC_ANY, NULL);
1342 if (op_data == NULL)
1345 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1346 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1347 ll_finish_md_op_data(op_data);
1349 CDEBUG(D_INFO, "md_getattr_name failed "
1350 "on %s: rc %d\n", filename, rc);
1354 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1355 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1357 lmmsize = body->eadatasize;
1359 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1361 GOTO(out, rc = -ENODATA);
1364 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1365 LASSERT(lmm != NULL);
1367 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1368 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1369 GOTO(out, rc = -EPROTO);
1373 * This is coming from the MDS, so is probably in
1374 * little endian. We convert it to host endian before
1375 * passing it to userspace.
1377 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1378 /* if function called for directory - we should
1379 * avoid swab not existent lsm objects */
1380 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1381 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1382 if (S_ISREG(body->mode))
1383 lustre_swab_lov_user_md_objects(
1384 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1385 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1386 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1387 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1388 if (S_ISREG(body->mode))
1389 lustre_swab_lov_user_md_objects(
1390 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1391 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1397 *lmm_size = lmmsize;
1402 static int ll_lov_setea(struct inode *inode, struct file *file,
1405 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1406 struct lov_user_md *lump;
1407 int lum_size = sizeof(struct lov_user_md) +
1408 sizeof(struct lov_user_ost_data);
1412 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1415 OBD_ALLOC(lump, lum_size);
1419 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1420 OBD_FREE(lump, lum_size);
1424 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1426 OBD_FREE(lump, lum_size);
1430 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1433 struct lov_user_md_v3 lumv3;
1434 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1435 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1436 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1439 int flags = FMODE_WRITE;
1442 /* first try with v1 which is smaller than v3 */
1443 lum_size = sizeof(struct lov_user_md_v1);
1444 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1447 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1448 lum_size = sizeof(struct lov_user_md_v3);
1449 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1453 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1455 put_user(0, &lumv1p->lmm_stripe_count);
1456 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1457 0, ll_i2info(inode)->lli_smd,
1463 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1465 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1470 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1474 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1476 struct ll_inode_info *lli = ll_i2info(inode);
1477 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1478 struct ccc_grouplock grouplock;
1482 if (ll_file_nolock(file))
1483 RETURN(-EOPNOTSUPP);
1485 cfs_spin_lock(&lli->lli_lock);
1486 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1487 CWARN("group lock already existed with gid %lu\n",
1488 fd->fd_grouplock.cg_gid);
1489 cfs_spin_unlock(&lli->lli_lock);
1492 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1493 cfs_spin_unlock(&lli->lli_lock);
1495 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1496 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1500 cfs_spin_lock(&lli->lli_lock);
1501 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1502 cfs_spin_unlock(&lli->lli_lock);
1503 CERROR("another thread just won the race\n");
1504 cl_put_grouplock(&grouplock);
1508 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1509 fd->fd_grouplock = grouplock;
1510 cfs_spin_unlock(&lli->lli_lock);
1512 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1516 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1518 struct ll_inode_info *lli = ll_i2info(inode);
1519 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1520 struct ccc_grouplock grouplock;
1523 cfs_spin_lock(&lli->lli_lock);
1524 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1525 cfs_spin_unlock(&lli->lli_lock);
1526 CWARN("no group lock held\n");
1529 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1531 if (fd->fd_grouplock.cg_gid != arg) {
1532 CWARN("group lock %lu doesn't match current id %lu\n",
1533 arg, fd->fd_grouplock.cg_gid);
1534 cfs_spin_unlock(&lli->lli_lock);
1538 grouplock = fd->fd_grouplock;
1539 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1540 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1541 cfs_spin_unlock(&lli->lli_lock);
1543 cl_put_grouplock(&grouplock);
1544 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1549 * Close inode open handle
1551 * \param dentry [in] dentry which contains the inode
1552 * \param it [in,out] intent which contains open info and result
1555 * \retval <0 failure
1557 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1559 struct inode *inode = dentry->d_inode;
1560 struct obd_client_handle *och;
1566 /* Root ? Do nothing. */
1567 if (dentry->d_inode->i_sb->s_root == dentry)
1570 /* No open handle to close? Move away */
1571 if (!it_disposition(it, DISP_OPEN_OPEN))
1574 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1576 OBD_ALLOC(och, sizeof(*och));
1578 GOTO(out, rc = -ENOMEM);
1580 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1581 ll_i2info(inode), it, och);
1583 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1586 /* this one is in place of ll_file_open */
1587 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1588 ptlrpc_req_finished(it->d.lustre.it_data);
1589 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1594 * Get size for inode for which FIEMAP mapping is requested.
1595 * Make the FIEMAP get_info call and returns the result.
1597 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1600 struct obd_export *exp = ll_i2dtexp(inode);
1601 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1602 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1603 int vallen = num_bytes;
1607 /* Checks for fiemap flags */
1608 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1609 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1613 /* Check for FIEMAP_FLAG_SYNC */
1614 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1615 rc = filemap_fdatawrite(inode->i_mapping);
1620 /* If the stripe_count > 1 and the application does not understand
1621 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1623 if (lsm->lsm_stripe_count > 1 &&
1624 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1627 fm_key.oa.o_id = lsm->lsm_object_id;
1628 fm_key.oa.o_seq = lsm->lsm_object_seq;
1629 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1631 obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
1633 /* If filesize is 0, then there would be no objects for mapping */
1634 if (fm_key.oa.o_size == 0) {
1635 fiemap->fm_mapped_extents = 0;
1639 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1641 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1643 CERROR("obd_get_info failed: rc = %d\n", rc);
1648 int ll_fid2path(struct obd_export *exp, void *arg)
1650 struct getinfo_fid2path *gfout, *gfin;
1654 /* Need to get the buflen */
1655 OBD_ALLOC_PTR(gfin);
1658 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1663 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1664 OBD_ALLOC(gfout, outsize);
1665 if (gfout == NULL) {
1669 memcpy(gfout, gfin, sizeof(*gfout));
1672 /* Call mdc_iocontrol */
1673 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1676 if (cfs_copy_to_user(arg, gfout, outsize))
1680 OBD_FREE(gfout, outsize);
1684 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1686 struct ll_user_fiemap *fiemap_s;
1687 size_t num_bytes, ret_bytes;
1688 unsigned int extent_count;
1691 /* Get the extent count so we can calculate the size of
1692 * required fiemap buffer */
1693 if (get_user(extent_count,
1694 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1696 num_bytes = sizeof(*fiemap_s) + (extent_count *
1697 sizeof(struct ll_fiemap_extent));
1699 OBD_VMALLOC(fiemap_s, num_bytes);
1700 if (fiemap_s == NULL)
1703 /* get the fiemap value */
1704 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1706 GOTO(error, rc = -EFAULT);
1708 /* If fm_extent_count is non-zero, read the first extent since
1709 * it is used to calculate end_offset and device from previous
1712 if (copy_from_user(&fiemap_s->fm_extents[0],
1713 (char __user *)arg + sizeof(*fiemap_s),
1714 sizeof(struct ll_fiemap_extent)))
1715 GOTO(error, rc = -EFAULT);
1718 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1722 ret_bytes = sizeof(struct ll_user_fiemap);
1724 if (extent_count != 0)
1725 ret_bytes += (fiemap_s->fm_mapped_extents *
1726 sizeof(struct ll_fiemap_extent));
1728 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1732 OBD_VFREE(fiemap_s, num_bytes);
1736 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1739 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1743 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1744 inode->i_generation, inode, cmd);
1745 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1747 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1748 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1752 case LL_IOC_GETFLAGS:
1753 /* Get the current value of the file flags */
1754 return put_user(fd->fd_flags, (int *)arg);
1755 case LL_IOC_SETFLAGS:
1756 case LL_IOC_CLRFLAGS:
1757 /* Set or clear specific file flags */
1758 /* XXX This probably needs checks to ensure the flags are
1759 * not abused, and to handle any flag side effects.
1761 if (get_user(flags, (int *) arg))
1764 if (cmd == LL_IOC_SETFLAGS) {
1765 if ((flags & LL_FILE_IGNORE_LOCK) &&
1766 !(file->f_flags & O_DIRECT)) {
1767 CERROR("%s: unable to disable locking on "
1768 "non-O_DIRECT file\n", current->comm);
1772 fd->fd_flags |= flags;
1774 fd->fd_flags &= ~flags;
1777 case LL_IOC_LOV_SETSTRIPE:
1778 RETURN(ll_lov_setstripe(inode, file, arg));
1779 case LL_IOC_LOV_SETEA:
1780 RETURN(ll_lov_setea(inode, file, arg));
1781 case LL_IOC_LOV_GETSTRIPE:
1782 RETURN(ll_lov_getstripe(inode, arg));
1783 case LL_IOC_RECREATE_OBJ:
1784 RETURN(ll_lov_recreate_obj(inode, arg));
1785 case LL_IOC_RECREATE_FID:
1786 RETURN(ll_lov_recreate_fid(inode, arg));
1787 case FSFILT_IOC_FIEMAP:
1788 RETURN(ll_ioctl_fiemap(inode, arg));
1789 case FSFILT_IOC_GETFLAGS:
1790 case FSFILT_IOC_SETFLAGS:
1791 RETURN(ll_iocontrol(inode, file, cmd, arg));
1792 case FSFILT_IOC_GETVERSION_OLD:
1793 case FSFILT_IOC_GETVERSION:
1794 RETURN(put_user(inode->i_generation, (int *)arg));
1795 case LL_IOC_GROUP_LOCK:
1796 RETURN(ll_get_grouplock(inode, file, arg));
1797 case LL_IOC_GROUP_UNLOCK:
1798 RETURN(ll_put_grouplock(inode, file, arg));
1799 case IOC_OBD_STATFS:
1800 RETURN(ll_obd_statfs(inode, (void *)arg));
1802 /* We need to special case any other ioctls we want to handle,
1803 * to send them to the MDS/OST as appropriate and to properly
1804 * network encode the arg field.
1805 case FSFILT_IOC_SETVERSION_OLD:
1806 case FSFILT_IOC_SETVERSION:
1808 case LL_IOC_FLUSHCTX:
1809 RETURN(ll_flush_ctx(inode));
1810 case LL_IOC_PATH2FID: {
1811 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1812 sizeof(struct lu_fid)))
1817 case OBD_IOC_FID2PATH:
1818 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1820 case LL_IOC_GET_MDTIDX: {
1823 mdtidx = ll_get_mdt_idx(inode);
1827 if (put_user((int)mdtidx, (int*)arg))
1837 ll_iocontrol_call(inode, file, cmd, arg, &err))
1840 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1846 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1848 struct inode *inode = file->f_dentry->d_inode;
1851 retval = offset + ((origin == 2) ? i_size_read(inode) :
1852 (origin == 1) ? file->f_pos : 0);
1853 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1854 inode->i_ino, inode->i_generation, inode, retval, retval,
1855 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1856 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1858 if (origin == 2) { /* SEEK_END */
1859 int nonblock = 0, rc;
1861 if (file->f_flags & O_NONBLOCK)
1862 nonblock = LDLM_FL_BLOCK_NOWAIT;
1864 rc = cl_glimpse_size(inode);
1868 offset += i_size_read(inode);
1869 } else if (origin == 1) { /* SEEK_CUR */
1870 offset += file->f_pos;
1874 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1875 if (offset != file->f_pos) {
1876 file->f_pos = offset;
1884 #ifdef HAVE_FLUSH_OWNER_ID
1885 int ll_flush(struct file *file, fl_owner_t id)
1887 int ll_flush(struct file *file)
1890 struct inode *inode = file->f_dentry->d_inode;
1891 struct ll_inode_info *lli = ll_i2info(inode);
1892 struct lov_stripe_md *lsm = lli->lli_smd;
1895 /* catch async errors that were recorded back when async writeback
1896 * failed for pages in this mapping. */
1897 rc = lli->lli_async_rc;
1898 lli->lli_async_rc = 0;
1900 err = lov_test_and_clear_async_rc(lsm);
1905 return rc ? -EIO : 0;
1908 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1910 struct inode *inode = dentry->d_inode;
1911 struct ll_inode_info *lli = ll_i2info(inode);
1912 struct lov_stripe_md *lsm = lli->lli_smd;
1913 struct ptlrpc_request *req;
1914 struct obd_capa *oc;
1917 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1918 inode->i_generation, inode);
1919 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1921 /* fsync's caller has already called _fdata{sync,write}, we want
1922 * that IO to finish before calling the osc and mdc sync methods */
1923 rc = filemap_fdatawait(inode->i_mapping);
1925 /* catch async errors that were recorded back when async writeback
1926 * failed for pages in this mapping. */
1927 err = lli->lli_async_rc;
1928 lli->lli_async_rc = 0;
1932 err = lov_test_and_clear_async_rc(lsm);
1937 oc = ll_mdscapa_get(inode);
1938 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1944 ptlrpc_req_finished(req);
1951 RETURN(rc ? rc : -ENOMEM);
1953 oa->o_id = lsm->lsm_object_id;
1954 oa->o_seq = lsm->lsm_object_seq;
1955 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1956 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid,
1957 OBD_MD_FLTYPE | OBD_MD_FLATIME |
1958 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1961 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1962 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1963 0, OBD_OBJECT_EOF, oc);
1973 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1975 struct inode *inode = file->f_dentry->d_inode;
1976 struct ll_sb_info *sbi = ll_i2sbi(inode);
1977 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1978 .ei_cb_cp =ldlm_flock_completion_ast,
1979 .ei_cbdata = file_lock };
1980 struct md_op_data *op_data;
1981 struct lustre_handle lockh = {0};
1982 ldlm_policy_data_t flock;
1987 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1988 inode->i_ino, file_lock);
1990 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1992 if (file_lock->fl_flags & FL_FLOCK) {
1993 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1994 /* set missing params for flock() calls */
1995 file_lock->fl_end = OFFSET_MAX;
1996 file_lock->fl_pid = current->tgid;
1998 flock.l_flock.pid = file_lock->fl_pid;
1999 flock.l_flock.start = file_lock->fl_start;
2000 flock.l_flock.end = file_lock->fl_end;
2002 switch (file_lock->fl_type) {
2004 einfo.ei_mode = LCK_PR;
2007 /* An unlock request may or may not have any relation to
2008 * existing locks so we may not be able to pass a lock handle
2009 * via a normal ldlm_lock_cancel() request. The request may even
2010 * unlock a byte range in the middle of an existing lock. In
2011 * order to process an unlock request we need all of the same
2012 * information that is given with a normal read or write record
2013 * lock request. To avoid creating another ldlm unlock (cancel)
2014 * message we'll treat a LCK_NL flock request as an unlock. */
2015 einfo.ei_mode = LCK_NL;
2018 einfo.ei_mode = LCK_PW;
2021 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2036 flags = LDLM_FL_BLOCK_NOWAIT;
2042 flags = LDLM_FL_TEST_LOCK;
2043 /* Save the old mode so that if the mode in the lock changes we
2044 * can decrement the appropriate reader or writer refcount. */
2045 file_lock->fl_type = einfo.ei_mode;
2048 CERROR("unknown fcntl lock command: %d\n", cmd);
2052 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2053 LUSTRE_OPC_ANY, NULL);
2054 if (IS_ERR(op_data))
2055 RETURN(PTR_ERR(op_data));
2057 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2058 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2059 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2061 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2062 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2064 ll_finish_md_op_data(op_data);
2066 if ((file_lock->fl_flags & FL_FLOCK) &&
2067 (rc == 0 || file_lock->fl_type == F_UNLCK))
2068 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2069 #ifdef HAVE_F_OP_FLOCK
2070 if ((file_lock->fl_flags & FL_POSIX) &&
2071 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2072 !(flags & LDLM_FL_TEST_LOCK))
2073 posix_lock_file_wait(file, file_lock);
2079 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2086 int ll_have_md_lock(struct inode *inode, __u64 bits, ldlm_mode_t l_req_mode)
2088 struct lustre_handle lockh;
2089 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2090 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2091 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2099 fid = &ll_i2info(inode)->lli_fid;
2100 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2101 ldlm_lockname[mode]);
2103 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2104 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2111 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2112 struct lustre_handle *lockh)
2114 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2120 fid = &ll_i2info(inode)->lli_fid;
2121 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2123 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2124 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2125 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2129 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2130 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2131 * and return success */
2133 /* This path cannot be hit for regular files unless in
2134 * case of obscure races, so no need to to validate
2136 if (!S_ISREG(inode->i_mode) &&
2137 !S_ISDIR(inode->i_mode))
2142 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2150 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2153 struct inode *inode = dentry->d_inode;
2154 struct ptlrpc_request *req = NULL;
2155 struct ll_sb_info *sbi;
2156 struct obd_export *exp;
2161 CERROR("REPORT THIS LINE TO PETER\n");
2164 sbi = ll_i2sbi(inode);
2166 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2167 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2169 exp = ll_i2mdexp(inode);
2171 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2172 struct lookup_intent oit = { .it_op = IT_GETATTR };
2173 struct md_op_data *op_data;
2175 /* Call getattr by fid, so do not provide name at all. */
2176 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2177 dentry->d_inode, NULL, 0, 0,
2178 LUSTRE_OPC_ANY, NULL);
2179 if (IS_ERR(op_data))
2180 RETURN(PTR_ERR(op_data));
2182 oit.it_create_mode |= M_CHECK_STALE;
2183 rc = md_intent_lock(exp, op_data, NULL, 0,
2184 /* we are not interested in name
2187 ll_md_blocking_ast, 0);
2188 ll_finish_md_op_data(op_data);
2189 oit.it_create_mode &= ~M_CHECK_STALE;
2191 rc = ll_inode_revalidate_fini(inode, rc);
2195 rc = ll_revalidate_it_finish(req, &oit, dentry);
2197 ll_intent_release(&oit);
2201 /* Unlinked? Unhash dentry, so it is not picked up later by
2202 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2203 here to preserve get_cwd functionality on 2.6.
2205 if (!dentry->d_inode->i_nlink) {
2206 cfs_spin_lock(&ll_lookup_lock);
2207 spin_lock(&dcache_lock);
2208 ll_drop_dentry(dentry);
2209 spin_unlock(&dcache_lock);
2210 cfs_spin_unlock(&ll_lookup_lock);
2213 ll_lookup_finish_locks(&oit, dentry);
2214 } else if (!ll_have_md_lock(dentry->d_inode, ibits, LCK_MINMODE)) {
2215 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2216 obd_valid valid = OBD_MD_FLGETATTR;
2217 struct md_op_data *op_data;
2220 if (S_ISREG(inode->i_mode)) {
2221 rc = ll_get_max_mdsize(sbi, &ealen);
2224 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2227 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2228 0, ealen, LUSTRE_OPC_ANY,
2230 if (op_data == NULL)
2233 op_data->op_valid = valid;
2234 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2235 * capa for this inode. Because we only keep capas of dirs
2237 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2238 ll_finish_md_op_data(op_data);
2240 rc = ll_inode_revalidate_fini(inode, rc);
2244 rc = ll_prep_inode(&inode, req, NULL);
2247 ptlrpc_req_finished(req);
2251 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2253 struct inode *inode = dentry->d_inode;
2257 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2258 MDS_INODELOCK_LOOKUP);
2260 /* if object not yet allocated, don't validate size */
2261 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
2262 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2263 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2264 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2268 /* cl_glimpse_size will prefer locally cached writes if they extend
2272 rc = cl_glimpse_size(inode);
2277 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2278 struct lookup_intent *it, struct kstat *stat)
2280 struct inode *inode = de->d_inode;
2281 struct ll_inode_info *lli = ll_i2info(inode);
2284 res = ll_inode_revalidate_it(de, it);
2285 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2290 stat->dev = inode->i_sb->s_dev;
2291 if (ll_need_32bit_api(ll_i2sbi(inode)))
2292 stat->ino = cl_fid_build_ino32(&lli->lli_fid);
2294 stat->ino = inode->i_ino;
2296 stat->mode = inode->i_mode;
2297 stat->nlink = inode->i_nlink;
2298 stat->uid = inode->i_uid;
2299 stat->gid = inode->i_gid;
2300 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2301 stat->atime = inode->i_atime;
2302 stat->mtime = inode->i_mtime;
2303 stat->ctime = inode->i_ctime;
2304 #ifdef HAVE_INODE_BLKSIZE
2305 stat->blksize = inode->i_blksize;
2307 stat->blksize = 1 << inode->i_blkbits;
2310 stat->size = i_size_read(inode);
2311 stat->blocks = inode->i_blocks;
2315 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2317 struct lookup_intent it = { .it_op = IT_GETATTR };
2319 return ll_getattr_it(mnt, de, &it, stat);
2322 #ifdef HAVE_LINUX_FIEMAP_H
2323 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2324 __u64 start, __u64 len)
2328 struct ll_user_fiemap *fiemap;
2329 unsigned int extent_count = fieinfo->fi_extents_max;
2331 num_bytes = sizeof(*fiemap) + (extent_count *
2332 sizeof(struct ll_fiemap_extent));
2333 OBD_VMALLOC(fiemap, num_bytes);
2338 fiemap->fm_flags = fieinfo->fi_flags;
2339 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2340 fiemap->fm_start = start;
2341 fiemap->fm_length = len;
2342 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2343 sizeof(struct ll_fiemap_extent));
2345 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2347 fieinfo->fi_flags = fiemap->fm_flags;
2348 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2349 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2350 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2352 OBD_VFREE(fiemap, num_bytes);
2359 int lustre_check_acl(struct inode *inode, int mask)
2361 #ifdef CONFIG_FS_POSIX_ACL
2362 struct ll_inode_info *lli = ll_i2info(inode);
2363 struct posix_acl *acl;
2367 cfs_spin_lock(&lli->lli_lock);
2368 acl = posix_acl_dup(lli->lli_posix_acl);
2369 cfs_spin_unlock(&lli->lli_lock);
2374 rc = posix_acl_permission(inode, acl, mask);
2375 posix_acl_release(acl);
2383 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2384 #ifndef HAVE_INODE_PERMISION_2ARGS
2385 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2387 int ll_inode_permission(struct inode *inode, int mask)
2393 /* as root inode are NOT getting validated in lookup operation,
2394 * need to do it before permission check. */
2396 if (inode == inode->i_sb->s_root->d_inode) {
2397 struct lookup_intent it = { .it_op = IT_LOOKUP };
2399 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2400 MDS_INODELOCK_LOOKUP);
2405 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2406 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2408 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2409 return lustre_check_remote_perm(inode, mask);
2411 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2412 rc = generic_permission(inode, mask, lustre_check_acl);
2417 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2419 int mode = inode->i_mode;
2422 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2423 inode->i_ino, inode->i_generation, inode, mask);
2425 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2426 return lustre_check_remote_perm(inode, mask);
2428 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2430 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2431 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2433 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2435 if (cfs_curproc_fsuid() == inode->i_uid) {
2438 if (((mode >> 3) & mask & S_IRWXO) != mask)
2440 rc = lustre_check_acl(inode, mask);
2444 goto check_capabilities;
2448 if (cfs_curproc_is_in_groups(inode->i_gid))
2451 if ((mode & mask & S_IRWXO) == mask)
2455 if (!(mask & MAY_EXEC) ||
2456 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2457 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2460 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2461 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2468 #ifdef HAVE_FILE_READV
2469 #define READ_METHOD readv
2470 #define READ_FUNCTION ll_file_readv
2471 #define WRITE_METHOD writev
2472 #define WRITE_FUNCTION ll_file_writev
2474 #define READ_METHOD aio_read
2475 #define READ_FUNCTION ll_file_aio_read
2476 #define WRITE_METHOD aio_write
2477 #define WRITE_FUNCTION ll_file_aio_write
2480 /* -o localflock - only provides locally consistent flock locks */
2481 struct file_operations ll_file_operations = {
2482 .read = ll_file_read,
2483 .READ_METHOD = READ_FUNCTION,
2484 .write = ll_file_write,
2485 .WRITE_METHOD = WRITE_FUNCTION,
2486 .ioctl = ll_file_ioctl,
2487 .open = ll_file_open,
2488 .release = ll_file_release,
2489 .mmap = ll_file_mmap,
2490 .llseek = ll_file_seek,
2491 #ifdef HAVE_KERNEL_SENDFILE
2492 .sendfile = ll_file_sendfile,
2494 #ifdef HAVE_KERNEL_SPLICE_READ
2495 .splice_read = ll_file_splice_read,
2501 struct file_operations ll_file_operations_flock = {
2502 .read = ll_file_read,
2503 .READ_METHOD = READ_FUNCTION,
2504 .write = ll_file_write,
2505 .WRITE_METHOD = WRITE_FUNCTION,
2506 .ioctl = ll_file_ioctl,
2507 .open = ll_file_open,
2508 .release = ll_file_release,
2509 .mmap = ll_file_mmap,
2510 .llseek = ll_file_seek,
2511 #ifdef HAVE_KERNEL_SENDFILE
2512 .sendfile = ll_file_sendfile,
2514 #ifdef HAVE_KERNEL_SPLICE_READ
2515 .splice_read = ll_file_splice_read,
2519 #ifdef HAVE_F_OP_FLOCK
2520 .flock = ll_file_flock,
2522 .lock = ll_file_flock
2525 /* These are for -o noflock - to return ENOSYS on flock calls */
2526 struct file_operations ll_file_operations_noflock = {
2527 .read = ll_file_read,
2528 .READ_METHOD = READ_FUNCTION,
2529 .write = ll_file_write,
2530 .WRITE_METHOD = WRITE_FUNCTION,
2531 .ioctl = ll_file_ioctl,
2532 .open = ll_file_open,
2533 .release = ll_file_release,
2534 .mmap = ll_file_mmap,
2535 .llseek = ll_file_seek,
2536 #ifdef HAVE_KERNEL_SENDFILE
2537 .sendfile = ll_file_sendfile,
2539 #ifdef HAVE_KERNEL_SPLICE_READ
2540 .splice_read = ll_file_splice_read,
2544 #ifdef HAVE_F_OP_FLOCK
2545 .flock = ll_file_noflock,
2547 .lock = ll_file_noflock
2550 struct inode_operations ll_file_inode_operations = {
2551 #ifdef HAVE_VFS_INTENT_PATCHES
2552 .setattr_raw = ll_setattr_raw,
2554 .setattr = ll_setattr,
2555 .truncate = ll_truncate,
2556 .getattr = ll_getattr,
2557 .permission = ll_inode_permission,
2558 .setxattr = ll_setxattr,
2559 .getxattr = ll_getxattr,
2560 .listxattr = ll_listxattr,
2561 .removexattr = ll_removexattr,
2562 #ifdef HAVE_LINUX_FIEMAP_H
2563 .fiemap = ll_fiemap,
2567 /* dynamic ioctl number support routins */
2568 static struct llioc_ctl_data {
2569 cfs_rw_semaphore_t ioc_sem;
2570 cfs_list_t ioc_head;
2572 __RWSEM_INITIALIZER(llioc.ioc_sem),
2573 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2578 cfs_list_t iocd_list;
2579 unsigned int iocd_size;
2580 llioc_callback_t iocd_cb;
2581 unsigned int iocd_count;
2582 unsigned int iocd_cmd[0];
2585 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2588 struct llioc_data *in_data = NULL;
2591 if (cb == NULL || cmd == NULL ||
2592 count > LLIOC_MAX_CMD || count < 0)
2595 size = sizeof(*in_data) + count * sizeof(unsigned int);
2596 OBD_ALLOC(in_data, size);
2597 if (in_data == NULL)
2600 memset(in_data, 0, sizeof(*in_data));
2601 in_data->iocd_size = size;
2602 in_data->iocd_cb = cb;
2603 in_data->iocd_count = count;
2604 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2606 cfs_down_write(&llioc.ioc_sem);
2607 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2608 cfs_up_write(&llioc.ioc_sem);
2613 void ll_iocontrol_unregister(void *magic)
2615 struct llioc_data *tmp;
2620 cfs_down_write(&llioc.ioc_sem);
2621 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2623 unsigned int size = tmp->iocd_size;
2625 cfs_list_del(&tmp->iocd_list);
2626 cfs_up_write(&llioc.ioc_sem);
2628 OBD_FREE(tmp, size);
2632 cfs_up_write(&llioc.ioc_sem);
2634 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2637 EXPORT_SYMBOL(ll_iocontrol_register);
2638 EXPORT_SYMBOL(ll_iocontrol_unregister);
2640 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2641 unsigned int cmd, unsigned long arg, int *rcp)
2643 enum llioc_iter ret = LLIOC_CONT;
2644 struct llioc_data *data;
2645 int rc = -EINVAL, i;
2647 cfs_down_read(&llioc.ioc_sem);
2648 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2649 for (i = 0; i < data->iocd_count; i++) {
2650 if (cmd != data->iocd_cmd[i])
2653 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2657 if (ret == LLIOC_STOP)
2660 cfs_up_read(&llioc.ioc_sem);