1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * Copyright (c) 2011 Whamcloud, Inc.
36 * This file is part of Lustre, http://www.lustre.org/
37 * Lustre is a trademark of Sun Microsystems, Inc.
41 * Author: Peter Braam <braam@clusterfs.com>
42 * Author: Phil Schwan <phil@clusterfs.com>
43 * Author: Andreas Dilger <adilger@clusterfs.com>
46 #define DEBUG_SUBSYSTEM S_LLITE
47 #include <lustre_dlm.h>
48 #include <lustre_lite.h>
49 #include <linux/pagemap.h>
50 #include <linux/file.h>
51 #include "llite_internal.h"
52 #include <lustre/ll_fiemap.h>
54 #include "cl_object.h"
56 struct ll_file_data *ll_file_data_get(void)
58 struct ll_file_data *fd;
60 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
64 static void ll_file_data_put(struct ll_file_data *fd)
67 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
70 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
71 struct lustre_handle *fh)
73 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
74 op_data->op_attr.ia_mode = inode->i_mode;
75 op_data->op_attr.ia_atime = inode->i_atime;
76 op_data->op_attr.ia_mtime = inode->i_mtime;
77 op_data->op_attr.ia_ctime = inode->i_ctime;
78 op_data->op_attr.ia_size = i_size_read(inode);
79 op_data->op_attr_blocks = inode->i_blocks;
80 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
81 ll_inode_to_ext_flags(inode->i_flags);
82 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
84 op_data->op_handle = *fh;
85 op_data->op_capa1 = ll_mdscapa_get(inode);
89 * Closes the IO epoch and packs all the attributes into @op_data for
92 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
93 struct obd_client_handle *och)
97 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
98 ATTR_MTIME_SET | ATTR_CTIME_SET;
100 if (!(och->och_flags & FMODE_WRITE))
103 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
104 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
106 ll_ioepoch_close(inode, op_data, &och, 0);
109 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
110 ll_prep_md_op_data(op_data, inode, NULL, NULL,
111 0, 0, LUSTRE_OPC_ANY, NULL);
115 static int ll_close_inode_openhandle(struct obd_export *md_exp,
117 struct obd_client_handle *och)
119 struct obd_export *exp = ll_i2mdexp(inode);
120 struct md_op_data *op_data;
121 struct ptlrpc_request *req = NULL;
122 struct obd_device *obd = class_exp2obd(exp);
129 * XXX: in case of LMV, is this correct to access
132 CERROR("Invalid MDC connection handle "LPX64"\n",
133 ll_i2mdexp(inode)->exp_handle.h_cookie);
137 OBD_ALLOC_PTR(op_data);
139 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
141 ll_prepare_close(inode, op_data, och);
142 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
143 rc = md_close(md_exp, op_data, och->och_mod, &req);
145 /* This close must have the epoch closed. */
146 LASSERT(epoch_close);
147 /* MDS has instructed us to obtain Size-on-MDS attribute from
148 * OSTs and send setattr to back to MDS. */
149 rc = ll_som_update(inode, op_data);
151 CERROR("inode %lu mdc Size-on-MDS update failed: "
152 "rc = %d\n", inode->i_ino, rc);
156 CERROR("inode %lu mdc close failed: rc = %d\n",
159 ll_finish_md_op_data(op_data);
162 rc = ll_objects_destroy(req, inode);
164 CERROR("inode %lu ll_objects destroy: rc = %d\n",
171 if (exp_connect_som(exp) && !epoch_close &&
172 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
173 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
175 md_clear_open_replay_data(md_exp, och);
176 /* Free @och if it is not waiting for DONE_WRITING. */
177 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
180 if (req) /* This is close request */
181 ptlrpc_req_finished(req);
185 int ll_md_real_close(struct inode *inode, int flags)
187 struct ll_inode_info *lli = ll_i2info(inode);
188 struct obd_client_handle **och_p;
189 struct obd_client_handle *och;
194 if (flags & FMODE_WRITE) {
195 och_p = &lli->lli_mds_write_och;
196 och_usecount = &lli->lli_open_fd_write_count;
197 } else if (flags & FMODE_EXEC) {
198 och_p = &lli->lli_mds_exec_och;
199 och_usecount = &lli->lli_open_fd_exec_count;
201 LASSERT(flags & FMODE_READ);
202 och_p = &lli->lli_mds_read_och;
203 och_usecount = &lli->lli_open_fd_read_count;
206 cfs_down(&lli->lli_och_sem);
207 if (*och_usecount) { /* There are still users of this handle, so
209 cfs_up(&lli->lli_och_sem);
214 cfs_up(&lli->lli_och_sem);
216 if (och) { /* There might be a race and somebody have freed this och
218 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
225 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
228 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
229 struct ll_inode_info *lli = ll_i2info(inode);
233 /* clear group lock, if present */
234 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
235 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
237 /* Let's see if we have good enough OPEN lock on the file and if
238 we can skip talking to MDS */
239 if (file->f_dentry->d_inode) { /* Can this ever be false? */
241 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
242 struct lustre_handle lockh;
243 struct inode *inode = file->f_dentry->d_inode;
244 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
246 cfs_down(&lli->lli_och_sem);
247 if (fd->fd_omode & FMODE_WRITE) {
249 LASSERT(lli->lli_open_fd_write_count);
250 lli->lli_open_fd_write_count--;
251 } else if (fd->fd_omode & FMODE_EXEC) {
253 LASSERT(lli->lli_open_fd_exec_count);
254 lli->lli_open_fd_exec_count--;
257 LASSERT(lli->lli_open_fd_read_count);
258 lli->lli_open_fd_read_count--;
260 cfs_up(&lli->lli_och_sem);
262 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
263 LDLM_IBITS, &policy, lockmode,
265 rc = ll_md_real_close(file->f_dentry->d_inode,
269 CERROR("Releasing a file %p with negative dentry %p. Name %s",
270 file, file->f_dentry, file->f_dentry->d_name.name);
273 LUSTRE_FPRIVATE(file) = NULL;
274 ll_file_data_put(fd);
275 ll_capa_close(inode);
280 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
282 /* While this returns an error code, fput() the caller does not, so we need
283 * to make every effort to clean up all of our state here. Also, applications
284 * rarely check close errors and even if an error is returned they will not
285 * re-try the close call.
287 int ll_file_release(struct inode *inode, struct file *file)
289 struct ll_file_data *fd;
290 struct ll_sb_info *sbi = ll_i2sbi(inode);
291 struct ll_inode_info *lli = ll_i2info(inode);
292 struct lov_stripe_md *lsm = lli->lli_smd;
296 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
297 inode->i_generation, inode);
299 #ifdef CONFIG_FS_POSIX_ACL
300 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
301 inode == inode->i_sb->s_root->d_inode) {
302 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
305 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
306 fd->fd_flags &= ~LL_FILE_RMTACL;
307 rct_del(&sbi->ll_rct, cfs_curproc_pid());
308 et_search_free(&sbi->ll_et, cfs_curproc_pid());
313 if (inode->i_sb->s_root != file->f_dentry)
314 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
315 fd = LUSTRE_FPRIVATE(file);
318 /* The last ref on @file, maybe not the the owner pid of statahead.
319 * Different processes can open the same dir, "ll_opendir_key" means:
320 * it is me that should stop the statahead thread. */
321 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
322 ll_stop_statahead(inode, lli->lli_opendir_key);
324 if (inode->i_sb->s_root == file->f_dentry) {
325 LUSTRE_FPRIVATE(file) = NULL;
326 ll_file_data_put(fd);
331 lov_test_and_clear_async_rc(lsm);
332 lli->lli_async_rc = 0;
334 rc = ll_md_close(sbi->ll_md_exp, inode, file);
336 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
337 libcfs_debug_dumplog();
342 static int ll_intent_file_open(struct file *file, void *lmm,
343 int lmmsize, struct lookup_intent *itp)
345 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
346 struct dentry *parent = file->f_dentry->d_parent;
347 const char *name = file->f_dentry->d_name.name;
348 const int len = file->f_dentry->d_name.len;
349 struct md_op_data *op_data;
350 struct ptlrpc_request *req;
351 __u32 opc = LUSTRE_OPC_ANY;
358 /* Usually we come here only for NFSD, and we want open lock.
359 But we can also get here with pre 2.6.15 patchless kernels, and in
360 that case that lock is also ok */
361 /* We can also get here if there was cached open handle in revalidate_it
362 * but it disappeared while we were getting from there to ll_file_open.
363 * But this means this file was closed and immediatelly opened which
364 * makes a good candidate for using OPEN lock */
365 /* If lmmsize & lmm are not 0, we are just setting stripe info
366 * parameters. No need for the open lock */
367 if (lmm == NULL && lmmsize == 0) {
368 itp->it_flags |= MDS_OPEN_LOCK;
369 if (itp->it_flags & FMODE_WRITE)
370 opc = LUSTRE_OPC_CREATE;
373 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
374 file->f_dentry->d_inode, name, len,
377 RETURN(PTR_ERR(op_data));
379 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
380 0 /*unused */, &req, ll_md_blocking_ast, 0);
381 ll_finish_md_op_data(op_data);
383 /* reason for keep own exit path - don`t flood log
384 * with messages with -ESTALE errors.
386 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
387 it_open_error(DISP_OPEN_OPEN, itp))
389 ll_release_openhandle(file->f_dentry, itp);
393 if (it_disposition(itp, DISP_LOOKUP_NEG))
394 GOTO(out, rc = -ENOENT);
396 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
397 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
398 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
402 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
403 if (!rc && itp->d.lustre.it_lock_mode)
404 ll_set_lock_data(sbi->ll_md_exp, file->f_dentry->d_inode,
408 ptlrpc_req_finished(itp->d.lustre.it_data);
409 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
410 ll_intent_drop_lock(itp);
416 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
417 * not believe attributes if a few ioepoch holders exist. Attributes for
418 * previous ioepoch if new one is opened are also skipped by MDS.
420 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
422 if (ioepoch && lli->lli_ioepoch != ioepoch) {
423 lli->lli_ioepoch = ioepoch;
424 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
425 ioepoch, PFID(&lli->lli_fid));
429 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
430 struct lookup_intent *it, struct obd_client_handle *och)
432 struct ptlrpc_request *req = it->d.lustre.it_data;
433 struct mdt_body *body;
437 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
438 LASSERT(body != NULL); /* reply already checked out */
440 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
441 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
442 och->och_fid = lli->lli_fid;
443 och->och_flags = it->it_flags;
444 ll_ioepoch_open(lli, body->ioepoch);
446 return md_set_open_replay_data(md_exp, och, req);
449 int ll_local_open(struct file *file, struct lookup_intent *it,
450 struct ll_file_data *fd, struct obd_client_handle *och)
452 struct inode *inode = file->f_dentry->d_inode;
453 struct ll_inode_info *lli = ll_i2info(inode);
456 LASSERT(!LUSTRE_FPRIVATE(file));
461 struct ptlrpc_request *req = it->d.lustre.it_data;
462 struct mdt_body *body;
465 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
469 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
470 if ((it->it_flags & FMODE_WRITE) &&
471 (body->valid & OBD_MD_FLSIZE))
472 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
473 lli->lli_ioepoch, PFID(&lli->lli_fid));
476 LUSTRE_FPRIVATE(file) = fd;
477 ll_readahead_init(inode, &fd->fd_ras);
478 fd->fd_omode = it->it_flags;
482 /* Open a file, and (for the very first open) create objects on the OSTs at
483 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
484 * creation or open until ll_lov_setstripe() ioctl is called. We grab
485 * lli_open_sem to ensure no other process will create objects, send the
486 * stripe MD to the MDS, or try to destroy the objects if that fails.
488 * If we already have the stripe MD locally then we don't request it in
489 * md_open(), by passing a lmm_size = 0.
491 * It is up to the application to ensure no other processes open this file
492 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
493 * used. We might be able to avoid races of that sort by getting lli_open_sem
494 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
495 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
497 int ll_file_open(struct inode *inode, struct file *file)
499 struct ll_inode_info *lli = ll_i2info(inode);
500 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
501 .it_flags = file->f_flags };
502 struct lov_stripe_md *lsm;
503 struct obd_client_handle **och_p = NULL;
504 __u64 *och_usecount = NULL;
505 struct ll_file_data *fd;
506 int rc = 0, opendir_set = 0;
509 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
510 inode->i_generation, inode, file->f_flags);
512 it = file->private_data; /* XXX: compat macro */
513 file->private_data = NULL; /* prevent ll_local_open assertion */
515 fd = ll_file_data_get();
517 GOTO(out_och_free, rc = -ENOMEM);
520 if (S_ISDIR(inode->i_mode)) {
521 cfs_spin_lock(&lli->lli_sa_lock);
522 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0 &&
523 lli->lli_sai == NULL) {
524 lli->lli_opendir_key = fd;
525 lli->lli_opendir_pid = cfs_curproc_pid();
528 cfs_spin_unlock(&lli->lli_sa_lock);
531 if (inode->i_sb->s_root == file->f_dentry) {
532 LUSTRE_FPRIVATE(file) = fd;
536 if (!it || !it->d.lustre.it_disposition) {
537 /* Convert f_flags into access mode. We cannot use file->f_mode,
538 * because everything but O_ACCMODE mask was stripped from
540 if ((oit.it_flags + 1) & O_ACCMODE)
542 if (file->f_flags & O_TRUNC)
543 oit.it_flags |= FMODE_WRITE;
545 /* kernel only call f_op->open in dentry_open. filp_open calls
546 * dentry_open after call to open_namei that checks permissions.
547 * Only nfsd_open call dentry_open directly without checking
548 * permissions and because of that this code below is safe. */
549 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
550 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
552 /* We do not want O_EXCL here, presumably we opened the file
553 * already? XXX - NFS implications? */
554 oit.it_flags &= ~O_EXCL;
556 /* bug20584, if "it_flags" contains O_CREAT, the file will be
557 * created if necessary, then "IT_CREAT" should be set to keep
558 * consistent with it */
559 if (oit.it_flags & O_CREAT)
560 oit.it_op |= IT_CREAT;
566 /* Let's see if we have file open on MDS already. */
567 if (it->it_flags & FMODE_WRITE) {
568 och_p = &lli->lli_mds_write_och;
569 och_usecount = &lli->lli_open_fd_write_count;
570 } else if (it->it_flags & FMODE_EXEC) {
571 och_p = &lli->lli_mds_exec_och;
572 och_usecount = &lli->lli_open_fd_exec_count;
574 och_p = &lli->lli_mds_read_och;
575 och_usecount = &lli->lli_open_fd_read_count;
578 cfs_down(&lli->lli_och_sem);
579 if (*och_p) { /* Open handle is present */
580 if (it_disposition(it, DISP_OPEN_OPEN)) {
581 /* Well, there's extra open request that we do not need,
582 let's close it somehow. This will decref request. */
583 rc = it_open_error(DISP_OPEN_OPEN, it);
585 cfs_up(&lli->lli_och_sem);
586 GOTO(out_openerr, rc);
589 ll_release_openhandle(file->f_dentry, it);
590 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
595 rc = ll_local_open(file, it, fd, NULL);
598 cfs_up(&lli->lli_och_sem);
599 GOTO(out_openerr, rc);
602 LASSERT(*och_usecount == 0);
603 if (!it->d.lustre.it_disposition) {
604 /* We cannot just request lock handle now, new ELC code
605 means that one of other OPEN locks for this file
606 could be cancelled, and since blocking ast handler
607 would attempt to grab och_sem as well, that would
608 result in a deadlock */
609 cfs_up(&lli->lli_och_sem);
610 it->it_create_mode |= M_CHECK_STALE;
611 rc = ll_intent_file_open(file, NULL, 0, it);
612 it->it_create_mode &= ~M_CHECK_STALE;
614 GOTO(out_openerr, rc);
618 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
620 GOTO(out_och_free, rc = -ENOMEM);
624 /* md_intent_lock() didn't get a request ref if there was an
625 * open error, so don't do cleanup on the request here
627 /* XXX (green): Should not we bail out on any error here, not
628 * just open error? */
629 rc = it_open_error(DISP_OPEN_OPEN, it);
631 GOTO(out_och_free, rc);
633 LASSERT(it_disposition(it, DISP_ENQ_OPEN_REF));
635 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
636 rc = ll_local_open(file, it, fd, *och_p);
638 GOTO(out_och_free, rc);
640 cfs_up(&lli->lli_och_sem);
643 /* Must do this outside lli_och_sem lock to prevent deadlock where
644 different kind of OPEN lock for this same inode gets cancelled
645 by ldlm_cancel_lru */
646 if (!S_ISREG(inode->i_mode))
647 GOTO(out_och_free, rc);
653 if (file->f_flags & O_LOV_DELAY_CREATE ||
654 !(file->f_mode & FMODE_WRITE)) {
655 CDEBUG(D_INODE, "object creation was delayed\n");
656 GOTO(out_och_free, rc);
659 file->f_flags &= ~O_LOV_DELAY_CREATE;
660 GOTO(out_och_free, rc);
663 if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) {
664 ptlrpc_req_finished(it->d.lustre.it_data);
665 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
669 if (och_p && *och_p) {
670 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
671 *och_p = NULL; /* OBD_FREE writes some magic there */
674 cfs_up(&lli->lli_och_sem);
677 if (opendir_set != 0)
678 ll_stop_statahead(inode, lli->lli_opendir_key);
680 ll_file_data_put(fd);
686 /* Fills the obdo with the attributes for the lsm */
687 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
688 struct obd_capa *capa, struct obdo *obdo,
689 __u64 ioepoch, int sync)
691 struct ptlrpc_request_set *set;
692 struct obd_info oinfo = { { { 0 } } };
697 LASSERT(lsm != NULL);
701 oinfo.oi_oa->o_id = lsm->lsm_object_id;
702 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
703 oinfo.oi_oa->o_mode = S_IFREG;
704 oinfo.oi_oa->o_ioepoch = ioepoch;
705 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
706 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
707 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
708 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
709 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
710 oinfo.oi_capa = capa;
712 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
713 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
716 set = ptlrpc_prep_set();
718 CERROR("can't allocate ptlrpc set\n");
721 rc = obd_getattr_async(exp, &oinfo, set);
723 rc = ptlrpc_set_wait(set);
724 ptlrpc_set_destroy(set);
727 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
728 OBD_MD_FLATIME | OBD_MD_FLMTIME |
729 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
734 * Performs the getattr on the inode and updates its fields.
735 * If @sync != 0, perform the getattr under the server-side lock.
737 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
738 __u64 ioepoch, int sync)
740 struct ll_inode_info *lli = ll_i2info(inode);
741 struct obd_capa *capa = ll_mdscapa_get(inode);
745 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
746 capa, obdo, ioepoch, sync);
749 obdo_refresh_inode(inode, obdo, obdo->o_valid);
751 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
752 lli->lli_smd->lsm_object_id, i_size_read(inode),
753 (unsigned long long)inode->i_blocks,
754 (unsigned long)ll_inode_blksize(inode));
759 int ll_merge_lvb(struct inode *inode)
761 struct ll_inode_info *lli = ll_i2info(inode);
762 struct ll_sb_info *sbi = ll_i2sbi(inode);
768 ll_inode_size_lock(inode, 1);
769 inode_init_lvb(inode, &lvb);
771 /* merge timestamps the most resently obtained from mds with
772 timestamps obtained from osts */
773 lvb.lvb_atime = lli->lli_lvb.lvb_atime;
774 lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
775 lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
776 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
777 cl_isize_write_nolock(inode, lvb.lvb_size);
779 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
780 PFID(&lli->lli_fid), lvb.lvb_size);
781 inode->i_blocks = lvb.lvb_blocks;
783 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
784 LTIME_S(inode->i_atime) = lvb.lvb_atime;
785 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
786 ll_inode_size_unlock(inode, 1);
791 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
794 struct obdo obdo = { 0 };
797 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
799 st->st_size = obdo.o_size;
800 st->st_blocks = obdo.o_blocks;
801 st->st_mtime = obdo.o_mtime;
802 st->st_atime = obdo.o_atime;
803 st->st_ctime = obdo.o_ctime;
808 void ll_io_init(struct cl_io *io, const struct file *file, int write)
810 struct inode *inode = file->f_dentry->d_inode;
812 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
814 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
815 io->ci_obj = ll_i2info(inode)->lli_clob;
816 io->ci_lockreq = CILR_MAYBE;
817 if (ll_file_nolock(file)) {
818 io->ci_lockreq = CILR_NEVER;
819 io->ci_no_srvlock = 1;
820 } else if (file->f_flags & O_APPEND) {
821 io->ci_lockreq = CILR_MANDATORY;
825 static ssize_t ll_file_io_generic(const struct lu_env *env,
826 struct vvp_io_args *args, struct file *file,
827 enum cl_io_type iot, loff_t *ppos, size_t count)
829 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
834 io = ccc_env_thread_io(env);
835 ll_io_init(io, file, iot == CIT_WRITE);
837 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
838 struct vvp_io *vio = vvp_env_io(env);
839 struct ccc_io *cio = ccc_env_io(env);
840 int write_sem_locked = 0;
842 cio->cui_fd = LUSTRE_FPRIVATE(file);
843 vio->cui_io_subtype = args->via_io_subtype;
845 switch (vio->cui_io_subtype) {
847 cio->cui_iov = args->u.normal.via_iov;
848 cio->cui_nrsegs = args->u.normal.via_nrsegs;
849 cio->cui_tot_nrsegs = cio->cui_nrsegs;
850 #ifndef HAVE_FILE_WRITEV
851 cio->cui_iocb = args->u.normal.via_iocb;
853 if ((iot == CIT_WRITE) &&
854 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
855 if(cfs_down_interruptible(&lli->lli_write_sem))
856 GOTO(out, result = -ERESTARTSYS);
857 write_sem_locked = 1;
858 } else if (iot == CIT_READ) {
859 cfs_down_read(&lli->lli_trunc_sem);
863 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
864 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
867 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
868 vio->u.splice.cui_flags = args->u.splice.via_flags;
871 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
874 result = cl_io_loop(env, io);
875 if (write_sem_locked)
876 cfs_up(&lli->lli_write_sem);
877 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
878 cfs_up_read(&lli->lli_trunc_sem);
880 /* cl_io_rw_init() handled IO */
881 result = io->ci_result;
884 if (io->ci_nob > 0) {
886 *ppos = io->u.ci_wr.wr.crw_pos;
892 if (iot == CIT_READ) {
894 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
895 LPROC_LL_READ_BYTES, result);
896 } else if (iot == CIT_WRITE) {
898 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
899 LPROC_LL_WRITE_BYTES, result);
900 lli->lli_write_rc = 0;
902 lli->lli_write_rc = result;
911 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
913 static int ll_file_get_iov_count(const struct iovec *iov,
914 unsigned long *nr_segs, size_t *count)
919 for (seg = 0; seg < *nr_segs; seg++) {
920 const struct iovec *iv = &iov[seg];
923 * If any segment has a negative length, or the cumulative
924 * length ever wraps negative then return -EINVAL.
927 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
929 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
934 cnt -= iv->iov_len; /* This segment is no good */
941 #ifdef HAVE_FILE_READV
942 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
943 unsigned long nr_segs, loff_t *ppos)
946 struct vvp_io_args *args;
952 result = ll_file_get_iov_count(iov, &nr_segs, &count);
956 env = cl_env_get(&refcheck);
958 RETURN(PTR_ERR(env));
960 args = vvp_env_args(env, IO_NORMAL);
961 args->u.normal.via_iov = (struct iovec *)iov;
962 args->u.normal.via_nrsegs = nr_segs;
964 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
965 cl_env_put(env, &refcheck);
969 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
973 struct iovec *local_iov;
978 env = cl_env_get(&refcheck);
980 RETURN(PTR_ERR(env));
982 local_iov = &vvp_env_info(env)->vti_local_iov;
983 local_iov->iov_base = (void __user *)buf;
984 local_iov->iov_len = count;
985 result = ll_file_readv(file, local_iov, 1, ppos);
986 cl_env_put(env, &refcheck);
991 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
992 unsigned long nr_segs, loff_t pos)
995 struct vvp_io_args *args;
1001 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1005 env = cl_env_get(&refcheck);
1007 RETURN(PTR_ERR(env));
1009 args = vvp_env_args(env, IO_NORMAL);
1010 args->u.normal.via_iov = (struct iovec *)iov;
1011 args->u.normal.via_nrsegs = nr_segs;
1012 args->u.normal.via_iocb = iocb;
1014 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1015 &iocb->ki_pos, count);
1016 cl_env_put(env, &refcheck);
1020 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1024 struct iovec *local_iov;
1025 struct kiocb *kiocb;
1030 env = cl_env_get(&refcheck);
1032 RETURN(PTR_ERR(env));
1034 local_iov = &vvp_env_info(env)->vti_local_iov;
1035 kiocb = &vvp_env_info(env)->vti_kiocb;
1036 local_iov->iov_base = (void __user *)buf;
1037 local_iov->iov_len = count;
1038 init_sync_kiocb(kiocb, file);
1039 kiocb->ki_pos = *ppos;
1040 kiocb->ki_left = count;
1042 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1043 *ppos = kiocb->ki_pos;
1045 cl_env_put(env, &refcheck);
1051 * Write to a file (through the page cache).
1053 #ifdef HAVE_FILE_WRITEV
1054 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1055 unsigned long nr_segs, loff_t *ppos)
1058 struct vvp_io_args *args;
1064 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1068 env = cl_env_get(&refcheck);
1070 RETURN(PTR_ERR(env));
1072 args = vvp_env_args(env, IO_NORMAL);
1073 args->u.normal.via_iov = (struct iovec *)iov;
1074 args->u.normal.via_nrsegs = nr_segs;
1076 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1077 cl_env_put(env, &refcheck);
1081 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1085 struct iovec *local_iov;
1090 env = cl_env_get(&refcheck);
1092 RETURN(PTR_ERR(env));
1094 local_iov = &vvp_env_info(env)->vti_local_iov;
1095 local_iov->iov_base = (void __user *)buf;
1096 local_iov->iov_len = count;
1098 result = ll_file_writev(file, local_iov, 1, ppos);
1099 cl_env_put(env, &refcheck);
1103 #else /* AIO stuff */
1104 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1105 unsigned long nr_segs, loff_t pos)
1108 struct vvp_io_args *args;
1114 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1118 env = cl_env_get(&refcheck);
1120 RETURN(PTR_ERR(env));
1122 args = vvp_env_args(env, IO_NORMAL);
1123 args->u.normal.via_iov = (struct iovec *)iov;
1124 args->u.normal.via_nrsegs = nr_segs;
1125 args->u.normal.via_iocb = iocb;
1127 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1128 &iocb->ki_pos, count);
1129 cl_env_put(env, &refcheck);
1133 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1137 struct iovec *local_iov;
1138 struct kiocb *kiocb;
1143 env = cl_env_get(&refcheck);
1145 RETURN(PTR_ERR(env));
1147 local_iov = &vvp_env_info(env)->vti_local_iov;
1148 kiocb = &vvp_env_info(env)->vti_kiocb;
1149 local_iov->iov_base = (void __user *)buf;
1150 local_iov->iov_len = count;
1151 init_sync_kiocb(kiocb, file);
1152 kiocb->ki_pos = *ppos;
1153 kiocb->ki_left = count;
1155 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1156 *ppos = kiocb->ki_pos;
1158 cl_env_put(env, &refcheck);
1164 #ifdef HAVE_KERNEL_SENDFILE
1166 * Send file content (through pagecache) somewhere with helper
1168 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1169 read_actor_t actor, void *target)
1172 struct vvp_io_args *args;
1177 env = cl_env_get(&refcheck);
1179 RETURN(PTR_ERR(env));
1181 args = vvp_env_args(env, IO_SENDFILE);
1182 args->u.sendfile.via_target = target;
1183 args->u.sendfile.via_actor = actor;
1185 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1186 cl_env_put(env, &refcheck);
1191 #ifdef HAVE_KERNEL_SPLICE_READ
1193 * Send file content (through pagecache) somewhere with helper
1195 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1196 struct pipe_inode_info *pipe, size_t count,
1200 struct vvp_io_args *args;
1205 env = cl_env_get(&refcheck);
1207 RETURN(PTR_ERR(env));
1209 args = vvp_env_args(env, IO_SPLICE);
1210 args->u.splice.via_pipe = pipe;
1211 args->u.splice.via_flags = flags;
1213 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1214 cl_env_put(env, &refcheck);
1219 static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
1222 struct obd_export *exp = ll_i2dtexp(inode);
1223 struct obd_trans_info oti = { 0 };
1224 struct obdo *oa = NULL;
1227 struct lov_stripe_md *lsm, *lsm2;
1234 ll_inode_size_lock(inode, 0);
1235 lsm = ll_i2info(inode)->lli_smd;
1237 GOTO(out, rc = -ENOENT);
1238 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1239 (lsm->lsm_stripe_count));
1241 OBD_ALLOC_LARGE(lsm2, lsm_size);
1243 GOTO(out, rc = -ENOMEM);
1247 oa->o_nlink = ost_idx;
1248 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1249 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1250 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
1251 OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1252 memcpy(lsm2, lsm, lsm_size);
1253 rc = obd_create(exp, oa, &lsm2, &oti);
1255 OBD_FREE_LARGE(lsm2, lsm_size);
1258 ll_inode_size_unlock(inode, 0);
1263 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1265 struct ll_recreate_obj ucreat;
1268 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1271 if (cfs_copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1272 sizeof(struct ll_recreate_obj)))
1275 RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
1276 ucreat.lrc_ost_idx));
1279 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1286 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1289 if (cfs_copy_from_user(&fid, (struct lu_fid *)arg,
1290 sizeof(struct lu_fid)))
1293 id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
1294 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1295 RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
1298 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1299 int flags, struct lov_user_md *lum, int lum_size)
1301 struct lov_stripe_md *lsm;
1302 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1306 ll_inode_size_lock(inode, 0);
1307 lsm = ll_i2info(inode)->lli_smd;
1309 ll_inode_size_unlock(inode, 0);
1310 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1315 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1318 rc = oit.d.lustre.it_status;
1320 GOTO(out_req_free, rc);
1322 ll_release_openhandle(file->f_dentry, &oit);
1325 ll_inode_size_unlock(inode, 0);
1326 ll_intent_release(&oit);
1329 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1333 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1334 struct lov_mds_md **lmmp, int *lmm_size,
1335 struct ptlrpc_request **request)
1337 struct ll_sb_info *sbi = ll_i2sbi(inode);
1338 struct mdt_body *body;
1339 struct lov_mds_md *lmm = NULL;
1340 struct ptlrpc_request *req = NULL;
1341 struct md_op_data *op_data;
1344 rc = ll_get_max_mdsize(sbi, &lmmsize);
1348 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1349 strlen(filename), lmmsize,
1350 LUSTRE_OPC_ANY, NULL);
1351 if (op_data == NULL)
1354 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1355 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1356 ll_finish_md_op_data(op_data);
1358 CDEBUG(D_INFO, "md_getattr_name failed "
1359 "on %s: rc %d\n", filename, rc);
1363 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1364 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1366 lmmsize = body->eadatasize;
1368 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1370 GOTO(out, rc = -ENODATA);
1373 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1374 LASSERT(lmm != NULL);
1376 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1377 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1378 GOTO(out, rc = -EPROTO);
1382 * This is coming from the MDS, so is probably in
1383 * little endian. We convert it to host endian before
1384 * passing it to userspace.
1386 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1387 /* if function called for directory - we should
1388 * avoid swab not existent lsm objects */
1389 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1390 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1391 if (S_ISREG(body->mode))
1392 lustre_swab_lov_user_md_objects(
1393 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1394 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1395 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1396 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1397 if (S_ISREG(body->mode))
1398 lustre_swab_lov_user_md_objects(
1399 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1400 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1406 *lmm_size = lmmsize;
1411 static int ll_lov_setea(struct inode *inode, struct file *file,
1414 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1415 struct lov_user_md *lump;
1416 int lum_size = sizeof(struct lov_user_md) +
1417 sizeof(struct lov_user_ost_data);
1421 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1424 OBD_ALLOC_LARGE(lump, lum_size);
1428 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1429 OBD_FREE_LARGE(lump, lum_size);
1433 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1435 OBD_FREE_LARGE(lump, lum_size);
1439 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1442 struct lov_user_md_v3 lumv3;
1443 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1444 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1445 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1448 int flags = FMODE_WRITE;
1451 /* first try with v1 which is smaller than v3 */
1452 lum_size = sizeof(struct lov_user_md_v1);
1453 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1456 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1457 lum_size = sizeof(struct lov_user_md_v3);
1458 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1462 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1464 put_user(0, &lumv1p->lmm_stripe_count);
1465 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1466 0, ll_i2info(inode)->lli_smd,
1472 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1474 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1479 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1484 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1486 struct ll_inode_info *lli = ll_i2info(inode);
1487 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1488 struct ccc_grouplock grouplock;
1492 if (ll_file_nolock(file))
1493 RETURN(-EOPNOTSUPP);
1495 cfs_spin_lock(&lli->lli_lock);
1496 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1497 CWARN("group lock already existed with gid %lu\n",
1498 fd->fd_grouplock.cg_gid);
1499 cfs_spin_unlock(&lli->lli_lock);
1502 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1503 cfs_spin_unlock(&lli->lli_lock);
1505 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1506 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1510 cfs_spin_lock(&lli->lli_lock);
1511 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1512 cfs_spin_unlock(&lli->lli_lock);
1513 CERROR("another thread just won the race\n");
1514 cl_put_grouplock(&grouplock);
1518 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1519 fd->fd_grouplock = grouplock;
1520 cfs_spin_unlock(&lli->lli_lock);
1522 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1526 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1528 struct ll_inode_info *lli = ll_i2info(inode);
1529 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1530 struct ccc_grouplock grouplock;
1533 cfs_spin_lock(&lli->lli_lock);
1534 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1535 cfs_spin_unlock(&lli->lli_lock);
1536 CWARN("no group lock held\n");
1539 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1541 if (fd->fd_grouplock.cg_gid != arg) {
1542 CWARN("group lock %lu doesn't match current id %lu\n",
1543 arg, fd->fd_grouplock.cg_gid);
1544 cfs_spin_unlock(&lli->lli_lock);
1548 grouplock = fd->fd_grouplock;
1549 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1550 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1551 cfs_spin_unlock(&lli->lli_lock);
1553 cl_put_grouplock(&grouplock);
1554 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1559 * Close inode open handle
1561 * \param dentry [in] dentry which contains the inode
1562 * \param it [in,out] intent which contains open info and result
1565 * \retval <0 failure
1567 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1569 struct inode *inode = dentry->d_inode;
1570 struct obd_client_handle *och;
1576 /* Root ? Do nothing. */
1577 if (dentry->d_inode->i_sb->s_root == dentry)
1580 /* No open handle to close? Move away */
1581 if (!it_disposition(it, DISP_OPEN_OPEN))
1584 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1586 OBD_ALLOC(och, sizeof(*och));
1588 GOTO(out, rc = -ENOMEM);
1590 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1591 ll_i2info(inode), it, och);
1593 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1596 /* this one is in place of ll_file_open */
1597 if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
1598 ptlrpc_req_finished(it->d.lustre.it_data);
1599 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1605 * Get size for inode for which FIEMAP mapping is requested.
1606 * Make the FIEMAP get_info call and returns the result.
1608 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1611 struct obd_export *exp = ll_i2dtexp(inode);
1612 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1613 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1614 int vallen = num_bytes;
1618 /* Checks for fiemap flags */
1619 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1620 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1624 /* Check for FIEMAP_FLAG_SYNC */
1625 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1626 rc = filemap_fdatawrite(inode->i_mapping);
1631 /* If the stripe_count > 1 and the application does not understand
1632 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1634 if (lsm->lsm_stripe_count > 1 &&
1635 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1638 fm_key.oa.o_id = lsm->lsm_object_id;
1639 fm_key.oa.o_seq = lsm->lsm_object_seq;
1640 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1642 obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
1644 /* If filesize is 0, then there would be no objects for mapping */
1645 if (fm_key.oa.o_size == 0) {
1646 fiemap->fm_mapped_extents = 0;
1650 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1652 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1654 CERROR("obd_get_info failed: rc = %d\n", rc);
1659 int ll_fid2path(struct obd_export *exp, void *arg)
1661 struct getinfo_fid2path *gfout, *gfin;
1665 /* Need to get the buflen */
1666 OBD_ALLOC_PTR(gfin);
1669 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1674 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1675 OBD_ALLOC(gfout, outsize);
1676 if (gfout == NULL) {
1680 memcpy(gfout, gfin, sizeof(*gfout));
1683 /* Call mdc_iocontrol */
1684 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1687 if (cfs_copy_to_user(arg, gfout, outsize))
1691 OBD_FREE(gfout, outsize);
1695 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1697 struct ll_user_fiemap *fiemap_s;
1698 size_t num_bytes, ret_bytes;
1699 unsigned int extent_count;
1702 /* Get the extent count so we can calculate the size of
1703 * required fiemap buffer */
1704 if (get_user(extent_count,
1705 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1707 num_bytes = sizeof(*fiemap_s) + (extent_count *
1708 sizeof(struct ll_fiemap_extent));
1710 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1711 if (fiemap_s == NULL)
1714 /* get the fiemap value */
1715 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1717 GOTO(error, rc = -EFAULT);
1719 /* If fm_extent_count is non-zero, read the first extent since
1720 * it is used to calculate end_offset and device from previous
1723 if (copy_from_user(&fiemap_s->fm_extents[0],
1724 (char __user *)arg + sizeof(*fiemap_s),
1725 sizeof(struct ll_fiemap_extent)))
1726 GOTO(error, rc = -EFAULT);
1729 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1733 ret_bytes = sizeof(struct ll_user_fiemap);
1735 if (extent_count != 0)
1736 ret_bytes += (fiemap_s->fm_mapped_extents *
1737 sizeof(struct ll_fiemap_extent));
1739 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1743 OBD_FREE_LARGE(fiemap_s, num_bytes);
1747 #ifdef HAVE_UNLOCKED_IOCTL
1748 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1750 struct inode *inode = file->f_dentry->d_inode;
1752 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1756 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1760 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1761 inode->i_generation, inode, cmd);
1762 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1764 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1765 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1769 case LL_IOC_GETFLAGS:
1770 /* Get the current value of the file flags */
1771 return put_user(fd->fd_flags, (int *)arg);
1772 case LL_IOC_SETFLAGS:
1773 case LL_IOC_CLRFLAGS:
1774 /* Set or clear specific file flags */
1775 /* XXX This probably needs checks to ensure the flags are
1776 * not abused, and to handle any flag side effects.
1778 if (get_user(flags, (int *) arg))
1781 if (cmd == LL_IOC_SETFLAGS) {
1782 if ((flags & LL_FILE_IGNORE_LOCK) &&
1783 !(file->f_flags & O_DIRECT)) {
1784 CERROR("%s: unable to disable locking on "
1785 "non-O_DIRECT file\n", current->comm);
1789 fd->fd_flags |= flags;
1791 fd->fd_flags &= ~flags;
1794 case LL_IOC_LOV_SETSTRIPE:
1795 RETURN(ll_lov_setstripe(inode, file, arg));
1796 case LL_IOC_LOV_SETEA:
1797 RETURN(ll_lov_setea(inode, file, arg));
1798 case LL_IOC_LOV_GETSTRIPE:
1799 RETURN(ll_lov_getstripe(inode, arg));
1800 case LL_IOC_RECREATE_OBJ:
1801 RETURN(ll_lov_recreate_obj(inode, arg));
1802 case LL_IOC_RECREATE_FID:
1803 RETURN(ll_lov_recreate_fid(inode, arg));
1804 case FSFILT_IOC_FIEMAP:
1805 RETURN(ll_ioctl_fiemap(inode, arg));
1806 case FSFILT_IOC_GETFLAGS:
1807 case FSFILT_IOC_SETFLAGS:
1808 RETURN(ll_iocontrol(inode, file, cmd, arg));
1809 case FSFILT_IOC_GETVERSION_OLD:
1810 case FSFILT_IOC_GETVERSION:
1811 RETURN(put_user(inode->i_generation, (int *)arg));
1812 case LL_IOC_GROUP_LOCK:
1813 RETURN(ll_get_grouplock(inode, file, arg));
1814 case LL_IOC_GROUP_UNLOCK:
1815 RETURN(ll_put_grouplock(inode, file, arg));
1816 case IOC_OBD_STATFS:
1817 RETURN(ll_obd_statfs(inode, (void *)arg));
1819 /* We need to special case any other ioctls we want to handle,
1820 * to send them to the MDS/OST as appropriate and to properly
1821 * network encode the arg field.
1822 case FSFILT_IOC_SETVERSION_OLD:
1823 case FSFILT_IOC_SETVERSION:
1825 case LL_IOC_FLUSHCTX:
1826 RETURN(ll_flush_ctx(inode));
1827 case LL_IOC_PATH2FID: {
1828 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1829 sizeof(struct lu_fid)))
1834 case OBD_IOC_FID2PATH:
1835 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1837 case LL_IOC_GET_MDTIDX: {
1840 mdtidx = ll_get_mdt_idx(inode);
1844 if (put_user((int)mdtidx, (int*)arg))
1854 ll_iocontrol_call(inode, file, cmd, arg, &err))
1857 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1863 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1865 struct inode *inode = file->f_dentry->d_inode;
1868 retval = offset + ((origin == 2) ? i_size_read(inode) :
1869 (origin == 1) ? file->f_pos : 0);
1870 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1871 inode->i_ino, inode->i_generation, inode, retval, retval,
1872 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1873 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1875 if (origin == 2) { /* SEEK_END */
1876 int nonblock = 0, rc;
1878 if (file->f_flags & O_NONBLOCK)
1879 nonblock = LDLM_FL_BLOCK_NOWAIT;
1881 rc = cl_glimpse_size(inode);
1885 offset += i_size_read(inode);
1886 } else if (origin == 1) { /* SEEK_CUR */
1887 offset += file->f_pos;
1891 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1892 if (offset != file->f_pos) {
1893 file->f_pos = offset;
1901 #ifdef HAVE_FLUSH_OWNER_ID
1902 int ll_flush(struct file *file, fl_owner_t id)
1904 int ll_flush(struct file *file)
1907 struct inode *inode = file->f_dentry->d_inode;
1908 struct ll_inode_info *lli = ll_i2info(inode);
1909 struct lov_stripe_md *lsm = lli->lli_smd;
1912 /* the application should know write failure already. */
1913 if (lli->lli_write_rc)
1916 /* catch async errors that were recorded back when async writeback
1917 * failed for pages in this mapping. */
1918 rc = lli->lli_async_rc;
1919 lli->lli_async_rc = 0;
1921 err = lov_test_and_clear_async_rc(lsm);
1926 return rc ? -EIO : 0;
1929 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1931 struct inode *inode = dentry->d_inode;
1932 struct ll_inode_info *lli = ll_i2info(inode);
1933 struct lov_stripe_md *lsm = lli->lli_smd;
1934 struct ptlrpc_request *req;
1935 struct obd_capa *oc;
1938 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1939 inode->i_generation, inode);
1940 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1942 /* fsync's caller has already called _fdata{sync,write}, we want
1943 * that IO to finish before calling the osc and mdc sync methods */
1944 rc = filemap_fdatawait(inode->i_mapping);
1946 /* catch async errors that were recorded back when async writeback
1947 * failed for pages in this mapping. */
1948 err = lli->lli_async_rc;
1949 lli->lli_async_rc = 0;
1953 err = lov_test_and_clear_async_rc(lsm);
1958 oc = ll_mdscapa_get(inode);
1959 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1965 ptlrpc_req_finished(req);
1968 struct obd_info *oinfo;
1970 OBD_ALLOC_PTR(oinfo);
1972 RETURN(rc ? rc : -ENOMEM);
1973 OBDO_ALLOC(oinfo->oi_oa);
1974 if (!oinfo->oi_oa) {
1975 OBD_FREE_PTR(oinfo);
1976 RETURN(rc ? rc : -ENOMEM);
1978 oinfo->oi_oa->o_id = lsm->lsm_object_id;
1979 oinfo->oi_oa->o_seq = lsm->lsm_object_seq;
1980 oinfo->oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1981 obdo_from_inode(oinfo->oi_oa, inode, &ll_i2info(inode)->lli_fid,
1982 OBD_MD_FLTYPE | OBD_MD_FLATIME |
1983 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1986 oinfo->oi_capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1987 err = obd_sync_rqset(ll_i2sbi(inode)->ll_dt_exp, oinfo, 0,
1989 capa_put(oinfo->oi_capa);
1992 OBDO_FREE(oinfo->oi_oa);
1993 OBD_FREE_PTR(oinfo);
1994 lli->lli_write_rc = err < 0 ? : 0;
2000 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2002 struct inode *inode = file->f_dentry->d_inode;
2003 struct ll_sb_info *sbi = ll_i2sbi(inode);
2004 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2005 .ei_cb_cp =ldlm_flock_completion_ast,
2006 .ei_cbdata = file_lock };
2007 struct md_op_data *op_data;
2008 struct lustre_handle lockh = {0};
2009 ldlm_policy_data_t flock = {{0}};
2014 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2015 inode->i_ino, file_lock);
2017 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2019 if (file_lock->fl_flags & FL_FLOCK) {
2020 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2021 /* flocks are whole-file locks */
2022 flock.l_flock.end = OFFSET_MAX;
2023 /* For flocks owner is determined by the local file desctiptor*/
2024 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2025 } else if (file_lock->fl_flags & FL_POSIX) {
2026 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2027 flock.l_flock.start = file_lock->fl_start;
2028 flock.l_flock.end = file_lock->fl_end;
2032 flock.l_flock.pid = file_lock->fl_pid;
2034 /* Somewhat ugly workaround for svc lockd.
2035 * lockd installs custom fl_lmops->fl_compare_owner that checks
2036 * for the fl_owner to be the same (which it always is on local node
2037 * I guess between lockd processes) and then compares pid.
2038 * As such we assign pid to the owner field to make it all work,
2039 * conflict with normal locks is unlikely since pid space and
2040 * pointer space for current->files are not intersecting */
2041 if (file_lock->fl_lmops && file_lock->fl_lmops->fl_compare_owner)
2042 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2044 switch (file_lock->fl_type) {
2046 einfo.ei_mode = LCK_PR;
2049 /* An unlock request may or may not have any relation to
2050 * existing locks so we may not be able to pass a lock handle
2051 * via a normal ldlm_lock_cancel() request. The request may even
2052 * unlock a byte range in the middle of an existing lock. In
2053 * order to process an unlock request we need all of the same
2054 * information that is given with a normal read or write record
2055 * lock request. To avoid creating another ldlm unlock (cancel)
2056 * message we'll treat a LCK_NL flock request as an unlock. */
2057 einfo.ei_mode = LCK_NL;
2060 einfo.ei_mode = LCK_PW;
2063 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2078 flags = LDLM_FL_BLOCK_NOWAIT;
2084 flags = LDLM_FL_TEST_LOCK;
2085 /* Save the old mode so that if the mode in the lock changes we
2086 * can decrement the appropriate reader or writer refcount. */
2087 file_lock->fl_type = einfo.ei_mode;
2090 CERROR("unknown fcntl lock command: %d\n", cmd);
2094 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2095 LUSTRE_OPC_ANY, NULL);
2096 if (IS_ERR(op_data))
2097 RETURN(PTR_ERR(op_data));
2099 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2100 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2101 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2103 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2104 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2106 ll_finish_md_op_data(op_data);
2108 if ((file_lock->fl_flags & FL_FLOCK) &&
2109 (rc == 0 || file_lock->fl_type == F_UNLCK))
2110 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2111 #ifdef HAVE_F_OP_FLOCK
2112 if ((file_lock->fl_flags & FL_POSIX) &&
2113 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2114 !(flags & LDLM_FL_TEST_LOCK))
2115 posix_lock_file_wait(file, file_lock);
2121 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2129 * test if some locks matching bits and l_req_mode are acquired
2130 * - bits can be in different locks
2131 * - if found clear the common lock bits in *bits
2132 * - the bits not found, are kept in *bits
2134 * \param bits [IN] searched lock bits [IN]
2135 * \param l_req_mode [IN] searched lock mode
2136 * \retval boolean, true iff all bits are found
2138 int ll_have_md_lock(struct inode *inode, __u64 *bits, ldlm_mode_t l_req_mode)
2140 struct lustre_handle lockh;
2141 ldlm_policy_data_t policy;
2142 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2143 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2152 fid = &ll_i2info(inode)->lli_fid;
2153 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2154 ldlm_lockname[mode]);
2156 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2157 for (i = 0; i < MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) {
2158 policy.l_inodebits.bits = *bits & (1 << i);
2159 if (policy.l_inodebits.bits == 0)
2162 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS,
2163 &policy, mode, &lockh)) {
2164 struct ldlm_lock *lock;
2166 lock = ldlm_handle2lock(&lockh);
2169 ~(lock->l_policy_data.l_inodebits.bits);
2170 LDLM_LOCK_PUT(lock);
2172 *bits &= ~policy.l_inodebits.bits;
2179 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2180 struct lustre_handle *lockh)
2182 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2188 fid = &ll_i2info(inode)->lli_fid;
2189 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2191 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2192 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2193 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2197 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2198 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2199 * and return success */
2201 /* This path cannot be hit for regular files unless in
2202 * case of obscure races, so no need to to validate
2204 if (!S_ISREG(inode->i_mode) &&
2205 !S_ISDIR(inode->i_mode))
2210 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2218 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2221 struct inode *inode = dentry->d_inode;
2222 struct ptlrpc_request *req = NULL;
2223 struct ll_sb_info *sbi;
2224 struct obd_export *exp;
2229 CERROR("REPORT THIS LINE TO PETER\n");
2232 sbi = ll_i2sbi(inode);
2234 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2235 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2237 exp = ll_i2mdexp(inode);
2239 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2240 * But under CMD case, it caused some lock issues, should be fixed
2241 * with new CMD ibits lock. See bug 12718 */
2242 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2243 struct lookup_intent oit = { .it_op = IT_GETATTR };
2244 struct md_op_data *op_data;
2246 if (ibits == MDS_INODELOCK_LOOKUP)
2247 oit.it_op = IT_LOOKUP;
2249 /* Call getattr by fid, so do not provide name at all. */
2250 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2251 dentry->d_inode, NULL, 0, 0,
2252 LUSTRE_OPC_ANY, NULL);
2253 if (IS_ERR(op_data))
2254 RETURN(PTR_ERR(op_data));
2256 oit.it_create_mode |= M_CHECK_STALE;
2257 rc = md_intent_lock(exp, op_data, NULL, 0,
2258 /* we are not interested in name
2261 ll_md_blocking_ast, 0);
2262 ll_finish_md_op_data(op_data);
2263 oit.it_create_mode &= ~M_CHECK_STALE;
2265 rc = ll_inode_revalidate_fini(inode, rc);
2269 rc = ll_revalidate_it_finish(req, &oit, dentry);
2271 ll_intent_release(&oit);
2275 /* Unlinked? Unhash dentry, so it is not picked up later by
2276 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2277 here to preserve get_cwd functionality on 2.6.
2279 if (!dentry->d_inode->i_nlink) {
2280 cfs_spin_lock(&ll_lookup_lock);
2281 spin_lock(&dcache_lock);
2282 ll_drop_dentry(dentry);
2283 spin_unlock(&dcache_lock);
2284 cfs_spin_unlock(&ll_lookup_lock);
2287 ll_lookup_finish_locks(&oit, dentry);
2288 } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
2289 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2290 obd_valid valid = OBD_MD_FLGETATTR;
2291 struct md_op_data *op_data;
2294 if (S_ISREG(inode->i_mode)) {
2295 rc = ll_get_max_mdsize(sbi, &ealen);
2298 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2301 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2302 0, ealen, LUSTRE_OPC_ANY,
2304 if (op_data == NULL)
2307 op_data->op_valid = valid;
2308 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2309 * capa for this inode. Because we only keep capas of dirs
2311 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2312 ll_finish_md_op_data(op_data);
2314 rc = ll_inode_revalidate_fini(inode, rc);
2318 rc = ll_prep_inode(&inode, req, NULL);
2321 ptlrpc_req_finished(req);
2325 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2328 struct inode *inode = dentry->d_inode;
2332 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2334 /* if object not yet allocated, don't validate size */
2335 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
2336 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2337 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2338 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2342 /* cl_glimpse_size will prefer locally cached writes if they extend
2346 rc = cl_glimpse_size(inode);
2351 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2352 struct lookup_intent *it, struct kstat *stat)
2354 struct inode *inode = de->d_inode;
2355 struct ll_sb_info *sbi = ll_i2sbi(inode);
2356 struct ll_inode_info *lli = ll_i2info(inode);
2359 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
2360 MDS_INODELOCK_LOOKUP);
2361 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
2366 stat->dev = inode->i_sb->s_dev;
2367 if (ll_need_32bit_api(sbi))
2368 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
2370 stat->ino = inode->i_ino;
2371 stat->mode = inode->i_mode;
2372 stat->nlink = inode->i_nlink;
2373 stat->uid = inode->i_uid;
2374 stat->gid = inode->i_gid;
2375 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2376 stat->atime = inode->i_atime;
2377 stat->mtime = inode->i_mtime;
2378 stat->ctime = inode->i_ctime;
2379 #ifdef HAVE_INODE_BLKSIZE
2380 stat->blksize = inode->i_blksize;
2382 stat->blksize = 1 << inode->i_blkbits;
2385 stat->size = i_size_read(inode);
2386 stat->blocks = inode->i_blocks;
2390 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2392 struct lookup_intent it = { .it_op = IT_GETATTR };
2394 return ll_getattr_it(mnt, de, &it, stat);
2397 #ifdef HAVE_LINUX_FIEMAP_H
2398 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2399 __u64 start, __u64 len)
2403 struct ll_user_fiemap *fiemap;
2404 unsigned int extent_count = fieinfo->fi_extents_max;
2406 num_bytes = sizeof(*fiemap) + (extent_count *
2407 sizeof(struct ll_fiemap_extent));
2408 OBD_ALLOC_LARGE(fiemap, num_bytes);
2413 fiemap->fm_flags = fieinfo->fi_flags;
2414 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2415 fiemap->fm_start = start;
2416 fiemap->fm_length = len;
2417 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2418 sizeof(struct ll_fiemap_extent));
2420 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2422 fieinfo->fi_flags = fiemap->fm_flags;
2423 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2424 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2425 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2427 OBD_FREE_LARGE(fiemap, num_bytes);
2434 int lustre_check_acl(struct inode *inode, int mask)
2436 #ifdef CONFIG_FS_POSIX_ACL
2437 struct ll_inode_info *lli = ll_i2info(inode);
2438 struct posix_acl *acl;
2442 cfs_spin_lock(&lli->lli_lock);
2443 acl = posix_acl_dup(lli->lli_posix_acl);
2444 cfs_spin_unlock(&lli->lli_lock);
2449 rc = posix_acl_permission(inode, acl, mask);
2450 posix_acl_release(acl);
2458 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2459 #ifndef HAVE_INODE_PERMISION_2ARGS
2460 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2462 int ll_inode_permission(struct inode *inode, int mask)
2468 /* as root inode are NOT getting validated in lookup operation,
2469 * need to do it before permission check. */
2471 if (inode == inode->i_sb->s_root->d_inode) {
2472 struct lookup_intent it = { .it_op = IT_LOOKUP };
2474 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2475 MDS_INODELOCK_LOOKUP);
2480 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2481 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2483 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2484 return lustre_check_remote_perm(inode, mask);
2486 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2487 rc = generic_permission(inode, mask, lustre_check_acl);
2492 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2494 int mode = inode->i_mode;
2497 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2498 inode->i_ino, inode->i_generation, inode, mask);
2500 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2501 return lustre_check_remote_perm(inode, mask);
2503 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2505 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2506 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2508 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2510 if (cfs_curproc_fsuid() == inode->i_uid) {
2513 if (((mode >> 3) & mask & S_IRWXO) != mask)
2515 rc = lustre_check_acl(inode, mask);
2519 goto check_capabilities;
2523 if (cfs_curproc_is_in_groups(inode->i_gid))
2526 if ((mode & mask & S_IRWXO) == mask)
2530 if (!(mask & MAY_EXEC) ||
2531 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2532 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2535 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2536 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2543 #ifdef HAVE_FILE_READV
2544 #define READ_METHOD readv
2545 #define READ_FUNCTION ll_file_readv
2546 #define WRITE_METHOD writev
2547 #define WRITE_FUNCTION ll_file_writev
2549 #define READ_METHOD aio_read
2550 #define READ_FUNCTION ll_file_aio_read
2551 #define WRITE_METHOD aio_write
2552 #define WRITE_FUNCTION ll_file_aio_write
2555 /* -o localflock - only provides locally consistent flock locks */
2556 struct file_operations ll_file_operations = {
2557 .read = ll_file_read,
2558 .READ_METHOD = READ_FUNCTION,
2559 .write = ll_file_write,
2560 .WRITE_METHOD = WRITE_FUNCTION,
2561 #ifdef HAVE_UNLOCKED_IOCTL
2562 .unlocked_ioctl = ll_file_ioctl,
2564 .ioctl = ll_file_ioctl,
2566 .open = ll_file_open,
2567 .release = ll_file_release,
2568 .mmap = ll_file_mmap,
2569 .llseek = ll_file_seek,
2570 #ifdef HAVE_KERNEL_SENDFILE
2571 .sendfile = ll_file_sendfile,
2573 #ifdef HAVE_KERNEL_SPLICE_READ
2574 .splice_read = ll_file_splice_read,
2580 struct file_operations ll_file_operations_flock = {
2581 .read = ll_file_read,
2582 .READ_METHOD = READ_FUNCTION,
2583 .write = ll_file_write,
2584 .WRITE_METHOD = WRITE_FUNCTION,
2585 #ifdef HAVE_UNLOCKED_IOCTL
2586 .unlocked_ioctl = ll_file_ioctl,
2588 .ioctl = ll_file_ioctl,
2590 .open = ll_file_open,
2591 .release = ll_file_release,
2592 .mmap = ll_file_mmap,
2593 .llseek = ll_file_seek,
2594 #ifdef HAVE_KERNEL_SENDFILE
2595 .sendfile = ll_file_sendfile,
2597 #ifdef HAVE_KERNEL_SPLICE_READ
2598 .splice_read = ll_file_splice_read,
2602 #ifdef HAVE_F_OP_FLOCK
2603 .flock = ll_file_flock,
2605 .lock = ll_file_flock
2608 /* These are for -o noflock - to return ENOSYS on flock calls */
2609 struct file_operations ll_file_operations_noflock = {
2610 .read = ll_file_read,
2611 .READ_METHOD = READ_FUNCTION,
2612 .write = ll_file_write,
2613 .WRITE_METHOD = WRITE_FUNCTION,
2614 #ifdef HAVE_UNLOCKED_IOCTL
2615 .unlocked_ioctl = ll_file_ioctl,
2617 .ioctl = ll_file_ioctl,
2619 .open = ll_file_open,
2620 .release = ll_file_release,
2621 .mmap = ll_file_mmap,
2622 .llseek = ll_file_seek,
2623 #ifdef HAVE_KERNEL_SENDFILE
2624 .sendfile = ll_file_sendfile,
2626 #ifdef HAVE_KERNEL_SPLICE_READ
2627 .splice_read = ll_file_splice_read,
2631 #ifdef HAVE_F_OP_FLOCK
2632 .flock = ll_file_noflock,
2634 .lock = ll_file_noflock
2637 struct inode_operations ll_file_inode_operations = {
2638 .setattr = ll_setattr,
2639 .truncate = ll_truncate,
2640 .getattr = ll_getattr,
2641 .permission = ll_inode_permission,
2642 .setxattr = ll_setxattr,
2643 .getxattr = ll_getxattr,
2644 .listxattr = ll_listxattr,
2645 .removexattr = ll_removexattr,
2646 #ifdef HAVE_LINUX_FIEMAP_H
2647 .fiemap = ll_fiemap,
2651 /* dynamic ioctl number support routins */
2652 static struct llioc_ctl_data {
2653 cfs_rw_semaphore_t ioc_sem;
2654 cfs_list_t ioc_head;
2656 __RWSEM_INITIALIZER(llioc.ioc_sem),
2657 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2662 cfs_list_t iocd_list;
2663 unsigned int iocd_size;
2664 llioc_callback_t iocd_cb;
2665 unsigned int iocd_count;
2666 unsigned int iocd_cmd[0];
2669 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2672 struct llioc_data *in_data = NULL;
2675 if (cb == NULL || cmd == NULL ||
2676 count > LLIOC_MAX_CMD || count < 0)
2679 size = sizeof(*in_data) + count * sizeof(unsigned int);
2680 OBD_ALLOC(in_data, size);
2681 if (in_data == NULL)
2684 memset(in_data, 0, sizeof(*in_data));
2685 in_data->iocd_size = size;
2686 in_data->iocd_cb = cb;
2687 in_data->iocd_count = count;
2688 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2690 cfs_down_write(&llioc.ioc_sem);
2691 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2692 cfs_up_write(&llioc.ioc_sem);
2697 void ll_iocontrol_unregister(void *magic)
2699 struct llioc_data *tmp;
2704 cfs_down_write(&llioc.ioc_sem);
2705 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2707 unsigned int size = tmp->iocd_size;
2709 cfs_list_del(&tmp->iocd_list);
2710 cfs_up_write(&llioc.ioc_sem);
2712 OBD_FREE(tmp, size);
2716 cfs_up_write(&llioc.ioc_sem);
2718 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2721 EXPORT_SYMBOL(ll_iocontrol_register);
2722 EXPORT_SYMBOL(ll_iocontrol_unregister);
2724 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2725 unsigned int cmd, unsigned long arg, int *rcp)
2727 enum llioc_iter ret = LLIOC_CONT;
2728 struct llioc_data *data;
2729 int rc = -EINVAL, i;
2731 cfs_down_read(&llioc.ioc_sem);
2732 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2733 for (i = 0; i < data->iocd_count; i++) {
2734 if (cmd != data->iocd_cmd[i])
2737 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2741 if (ret == LLIOC_STOP)
2744 cfs_up_read(&llioc.ioc_sem);