1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
87 * Closes the IO epoch and packs all the attributes into @op_data for
90 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
91 struct obd_client_handle *och)
95 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
96 ATTR_MTIME_SET | ATTR_CTIME_SET;
98 if (!(och->och_flags & FMODE_WRITE))
101 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
102 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
104 ll_ioepoch_close(inode, op_data, &och, 0);
107 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
108 ll_prep_md_op_data(op_data, inode, NULL, NULL,
109 0, 0, LUSTRE_OPC_ANY, NULL);
113 static int ll_close_inode_openhandle(struct obd_export *md_exp,
115 struct obd_client_handle *och)
117 struct obd_export *exp = ll_i2mdexp(inode);
118 struct md_op_data *op_data;
119 struct ptlrpc_request *req = NULL;
120 struct obd_device *obd = class_exp2obd(exp);
127 * XXX: in case of LMV, is this correct to access
130 CERROR("Invalid MDC connection handle "LPX64"\n",
131 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_som_update(inode, op_data);
149 CERROR("inode %lu mdc Size-on-MDS update failed: "
150 "rc = %d\n", inode->i_ino, rc);
154 CERROR("inode %lu mdc close failed: rc = %d\n",
157 ll_finish_md_op_data(op_data);
160 rc = ll_objects_destroy(req, inode);
162 CERROR("inode %lu ll_objects destroy: rc = %d\n",
169 if (exp_connect_som(exp) && !epoch_close &&
170 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
171 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
173 md_clear_open_replay_data(md_exp, och);
174 /* Free @och if it is not waiting for DONE_WRITING. */
175 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
178 if (req) /* This is close request */
179 ptlrpc_req_finished(req);
183 int ll_md_real_close(struct inode *inode, int flags)
185 struct ll_inode_info *lli = ll_i2info(inode);
186 struct obd_client_handle **och_p;
187 struct obd_client_handle *och;
192 if (flags & FMODE_WRITE) {
193 och_p = &lli->lli_mds_write_och;
194 och_usecount = &lli->lli_open_fd_write_count;
195 } else if (flags & FMODE_EXEC) {
196 och_p = &lli->lli_mds_exec_och;
197 och_usecount = &lli->lli_open_fd_exec_count;
199 LASSERT(flags & FMODE_READ);
200 och_p = &lli->lli_mds_read_och;
201 och_usecount = &lli->lli_open_fd_read_count;
204 cfs_down(&lli->lli_och_sem);
205 if (*och_usecount) { /* There are still users of this handle, so
207 cfs_up(&lli->lli_och_sem);
212 cfs_up(&lli->lli_och_sem);
214 if (och) { /* There might be a race and somebody have freed this och
216 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
223 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
226 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
227 struct ll_inode_info *lli = ll_i2info(inode);
231 /* clear group lock, if present */
232 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
233 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
235 /* Let's see if we have good enough OPEN lock on the file and if
236 we can skip talking to MDS */
237 if (file->f_dentry->d_inode) { /* Can this ever be false? */
239 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
240 struct lustre_handle lockh;
241 struct inode *inode = file->f_dentry->d_inode;
242 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
244 cfs_down(&lli->lli_och_sem);
245 if (fd->fd_omode & FMODE_WRITE) {
247 LASSERT(lli->lli_open_fd_write_count);
248 lli->lli_open_fd_write_count--;
249 } else if (fd->fd_omode & FMODE_EXEC) {
251 LASSERT(lli->lli_open_fd_exec_count);
252 lli->lli_open_fd_exec_count--;
255 LASSERT(lli->lli_open_fd_read_count);
256 lli->lli_open_fd_read_count--;
258 cfs_up(&lli->lli_och_sem);
260 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
261 LDLM_IBITS, &policy, lockmode,
263 rc = ll_md_real_close(file->f_dentry->d_inode,
267 CERROR("Releasing a file %p with negative dentry %p. Name %s",
268 file, file->f_dentry, file->f_dentry->d_name.name);
271 LUSTRE_FPRIVATE(file) = NULL;
272 ll_file_data_put(fd);
273 ll_capa_close(inode);
278 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
280 /* While this returns an error code, fput() the caller does not, so we need
281 * to make every effort to clean up all of our state here. Also, applications
282 * rarely check close errors and even if an error is returned they will not
283 * re-try the close call.
285 int ll_file_release(struct inode *inode, struct file *file)
287 struct ll_file_data *fd;
288 struct ll_sb_info *sbi = ll_i2sbi(inode);
289 struct ll_inode_info *lli = ll_i2info(inode);
290 struct lov_stripe_md *lsm = lli->lli_smd;
294 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
295 inode->i_generation, inode);
297 #ifdef CONFIG_FS_POSIX_ACL
298 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
299 inode == inode->i_sb->s_root->d_inode) {
300 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
303 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
304 fd->fd_flags &= ~LL_FILE_RMTACL;
305 rct_del(&sbi->ll_rct, cfs_curproc_pid());
306 et_search_free(&sbi->ll_et, cfs_curproc_pid());
311 if (inode->i_sb->s_root != file->f_dentry)
312 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
313 fd = LUSTRE_FPRIVATE(file);
316 /* The last ref on @file, maybe not the the owner pid of statahead.
317 * Different processes can open the same dir, "ll_opendir_key" means:
318 * it is me that should stop the statahead thread. */
319 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
320 ll_stop_statahead(inode, lli->lli_opendir_key);
322 if (inode->i_sb->s_root == file->f_dentry) {
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
329 lov_test_and_clear_async_rc(lsm);
330 lli->lli_async_rc = 0;
332 rc = ll_md_close(sbi->ll_md_exp, inode, file);
334 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
335 libcfs_debug_dumplog();
340 static int ll_intent_file_open(struct file *file, void *lmm,
341 int lmmsize, struct lookup_intent *itp)
343 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
344 struct dentry *parent = file->f_dentry->d_parent;
345 const char *name = file->f_dentry->d_name.name;
346 const int len = file->f_dentry->d_name.len;
347 struct md_op_data *op_data;
348 struct ptlrpc_request *req;
355 /* Usually we come here only for NFSD, and we want open lock.
356 But we can also get here with pre 2.6.15 patchless kernels, and in
357 that case that lock is also ok */
358 /* We can also get here if there was cached open handle in revalidate_it
359 * but it disappeared while we were getting from there to ll_file_open.
360 * But this means this file was closed and immediatelly opened which
361 * makes a good candidate for using OPEN lock */
362 /* If lmmsize & lmm are not 0, we are just setting stripe info
363 * parameters. No need for the open lock */
364 if (!lmm && !lmmsize)
365 itp->it_flags |= MDS_OPEN_LOCK;
367 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
368 file->f_dentry->d_inode, name, len,
369 O_RDWR, LUSTRE_OPC_ANY, NULL);
371 RETURN(PTR_ERR(op_data));
373 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
374 0 /*unused */, &req, ll_md_blocking_ast, 0);
375 ll_finish_md_op_data(op_data);
377 /* reason for keep own exit path - don`t flood log
378 * with messages with -ESTALE errors.
380 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
381 it_open_error(DISP_OPEN_OPEN, itp))
383 ll_release_openhandle(file->f_dentry, itp);
387 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
388 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
389 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
393 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
394 if (!rc && itp->d.lustre.it_lock_mode)
395 md_set_lock_data(sbi->ll_md_exp,
396 &itp->d.lustre.it_lock_handle,
397 file->f_dentry->d_inode, NULL);
400 ptlrpc_req_finished(itp->d.lustre.it_data);
401 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
402 ll_intent_drop_lock(itp);
408 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
409 * not believe attributes if a few ioepoch holders exist. Attributes for
410 * previous ioepoch if new one is opened are also skipped by MDS.
412 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
414 if (ioepoch && lli->lli_ioepoch != ioepoch) {
415 lli->lli_ioepoch = ioepoch;
416 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
417 ioepoch, PFID(&lli->lli_fid));
421 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
422 struct lookup_intent *it, struct obd_client_handle *och)
424 struct ptlrpc_request *req = it->d.lustre.it_data;
425 struct mdt_body *body;
429 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
430 LASSERT(body != NULL); /* reply already checked out */
432 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
433 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
434 och->och_fid = lli->lli_fid;
435 och->och_flags = it->it_flags;
436 ll_ioepoch_open(lli, body->ioepoch);
438 return md_set_open_replay_data(md_exp, och, req);
441 int ll_local_open(struct file *file, struct lookup_intent *it,
442 struct ll_file_data *fd, struct obd_client_handle *och)
444 struct inode *inode = file->f_dentry->d_inode;
445 struct ll_inode_info *lli = ll_i2info(inode);
448 LASSERT(!LUSTRE_FPRIVATE(file));
453 struct ptlrpc_request *req = it->d.lustre.it_data;
454 struct mdt_body *body;
457 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
461 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
462 if ((it->it_flags & FMODE_WRITE) &&
463 (body->valid & OBD_MD_FLSIZE))
464 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
465 lli->lli_ioepoch, PFID(&lli->lli_fid));
468 LUSTRE_FPRIVATE(file) = fd;
469 ll_readahead_init(inode, &fd->fd_ras);
470 fd->fd_omode = it->it_flags;
474 /* Open a file, and (for the very first open) create objects on the OSTs at
475 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
476 * creation or open until ll_lov_setstripe() ioctl is called. We grab
477 * lli_open_sem to ensure no other process will create objects, send the
478 * stripe MD to the MDS, or try to destroy the objects if that fails.
480 * If we already have the stripe MD locally then we don't request it in
481 * md_open(), by passing a lmm_size = 0.
483 * It is up to the application to ensure no other processes open this file
484 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
485 * used. We might be able to avoid races of that sort by getting lli_open_sem
486 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
487 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
489 int ll_file_open(struct inode *inode, struct file *file)
491 struct ll_inode_info *lli = ll_i2info(inode);
492 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
493 .it_flags = file->f_flags };
494 struct lov_stripe_md *lsm;
495 struct ptlrpc_request *req = NULL;
496 struct obd_client_handle **och_p;
498 struct ll_file_data *fd;
499 int rc = 0, opendir_set = 0;
502 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
503 inode->i_generation, inode, file->f_flags);
505 it = file->private_data; /* XXX: compat macro */
506 file->private_data = NULL; /* prevent ll_local_open assertion */
508 fd = ll_file_data_get();
513 if (S_ISDIR(inode->i_mode)) {
514 cfs_spin_lock(&lli->lli_sa_lock);
515 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
516 LASSERT(lli->lli_sai == NULL);
517 lli->lli_opendir_key = fd;
518 lli->lli_opendir_pid = cfs_curproc_pid();
521 cfs_spin_unlock(&lli->lli_sa_lock);
524 if (inode->i_sb->s_root == file->f_dentry) {
525 LUSTRE_FPRIVATE(file) = fd;
529 if (!it || !it->d.lustre.it_disposition) {
530 /* Convert f_flags into access mode. We cannot use file->f_mode,
531 * because everything but O_ACCMODE mask was stripped from
533 if ((oit.it_flags + 1) & O_ACCMODE)
535 if (file->f_flags & O_TRUNC)
536 oit.it_flags |= FMODE_WRITE;
538 /* kernel only call f_op->open in dentry_open. filp_open calls
539 * dentry_open after call to open_namei that checks permissions.
540 * Only nfsd_open call dentry_open directly without checking
541 * permissions and because of that this code below is safe. */
542 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
543 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
545 /* We do not want O_EXCL here, presumably we opened the file
546 * already? XXX - NFS implications? */
547 oit.it_flags &= ~O_EXCL;
549 /* bug20584, if "it_flags" contains O_CREAT, the file will be
550 * created if necessary, then "IT_CREAT" should be set to keep
551 * consistent with it */
552 if (oit.it_flags & O_CREAT)
553 oit.it_op |= IT_CREAT;
559 /* Let's see if we have file open on MDS already. */
560 if (it->it_flags & FMODE_WRITE) {
561 och_p = &lli->lli_mds_write_och;
562 och_usecount = &lli->lli_open_fd_write_count;
563 } else if (it->it_flags & FMODE_EXEC) {
564 och_p = &lli->lli_mds_exec_och;
565 och_usecount = &lli->lli_open_fd_exec_count;
567 och_p = &lli->lli_mds_read_och;
568 och_usecount = &lli->lli_open_fd_read_count;
571 cfs_down(&lli->lli_och_sem);
572 if (*och_p) { /* Open handle is present */
573 if (it_disposition(it, DISP_OPEN_OPEN)) {
574 /* Well, there's extra open request that we do not need,
575 let's close it somehow. This will decref request. */
576 rc = it_open_error(DISP_OPEN_OPEN, it);
578 cfs_up(&lli->lli_och_sem);
579 ll_file_data_put(fd);
580 GOTO(out_openerr, rc);
582 ll_release_openhandle(file->f_dentry, it);
583 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
588 rc = ll_local_open(file, it, fd, NULL);
591 cfs_up(&lli->lli_och_sem);
592 ll_file_data_put(fd);
593 GOTO(out_openerr, rc);
596 LASSERT(*och_usecount == 0);
597 if (!it->d.lustre.it_disposition) {
598 /* We cannot just request lock handle now, new ELC code
599 means that one of other OPEN locks for this file
600 could be cancelled, and since blocking ast handler
601 would attempt to grab och_sem as well, that would
602 result in a deadlock */
603 cfs_up(&lli->lli_och_sem);
604 it->it_create_mode |= M_CHECK_STALE;
605 rc = ll_intent_file_open(file, NULL, 0, it);
606 it->it_create_mode &= ~M_CHECK_STALE;
608 ll_file_data_put(fd);
609 GOTO(out_openerr, rc);
612 /* Got some error? Release the request */
613 if (it->d.lustre.it_status < 0) {
614 req = it->d.lustre.it_data;
615 ptlrpc_req_finished(req);
619 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
621 ll_file_data_put(fd);
622 GOTO(out_och_free, rc = -ENOMEM);
625 req = it->d.lustre.it_data;
627 /* md_intent_lock() didn't get a request ref if there was an
628 * open error, so don't do cleanup on the request here
630 /* XXX (green): Should not we bail out on any error here, not
631 * just open error? */
632 rc = it_open_error(DISP_OPEN_OPEN, it);
634 ll_file_data_put(fd);
635 GOTO(out_och_free, rc);
638 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
639 rc = ll_local_open(file, it, fd, *och_p);
641 ll_file_data_put(fd);
642 GOTO(out_och_free, rc);
645 cfs_up(&lli->lli_och_sem);
647 /* Must do this outside lli_och_sem lock to prevent deadlock where
648 different kind of OPEN lock for this same inode gets cancelled
649 by ldlm_cancel_lru */
650 if (!S_ISREG(inode->i_mode))
657 if (file->f_flags & O_LOV_DELAY_CREATE ||
658 !(file->f_mode & FMODE_WRITE)) {
659 CDEBUG(D_INODE, "object creation was delayed\n");
663 file->f_flags &= ~O_LOV_DELAY_CREATE;
666 ptlrpc_req_finished(req);
668 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
672 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
673 *och_p = NULL; /* OBD_FREE writes some magic there */
676 cfs_up(&lli->lli_och_sem);
678 if (opendir_set != 0)
679 ll_stop_statahead(inode, lli->lli_opendir_key);
685 /* Fills the obdo with the attributes for the lsm */
686 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
687 struct obd_capa *capa, struct obdo *obdo,
688 __u64 ioepoch, int sync)
690 struct ptlrpc_request_set *set;
691 struct obd_info oinfo = { { { 0 } } };
696 LASSERT(lsm != NULL);
700 oinfo.oi_oa->o_id = lsm->lsm_object_id;
701 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
702 oinfo.oi_oa->o_mode = S_IFREG;
703 oinfo.oi_oa->o_ioepoch = ioepoch;
704 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
705 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
706 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
707 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
708 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
709 oinfo.oi_capa = capa;
711 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
712 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
715 set = ptlrpc_prep_set();
717 CERROR("can't allocate ptlrpc set\n");
720 rc = obd_getattr_async(exp, &oinfo, set);
722 rc = ptlrpc_set_wait(set);
723 ptlrpc_set_destroy(set);
726 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
727 OBD_MD_FLATIME | OBD_MD_FLMTIME |
728 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
733 * Performs the getattr on the inode and updates its fields.
734 * If @sync != 0, perform the getattr under the server-side lock.
736 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
737 __u64 ioepoch, int sync)
739 struct ll_inode_info *lli = ll_i2info(inode);
740 struct obd_capa *capa = ll_mdscapa_get(inode);
744 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
745 capa, obdo, ioepoch, sync);
748 obdo_refresh_inode(inode, obdo, obdo->o_valid);
750 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
751 lli->lli_smd->lsm_object_id, i_size_read(inode),
752 (unsigned long long)inode->i_blocks,
753 (unsigned long)ll_inode_blksize(inode));
758 int ll_merge_lvb(struct inode *inode)
760 struct ll_inode_info *lli = ll_i2info(inode);
761 struct ll_sb_info *sbi = ll_i2sbi(inode);
767 ll_inode_size_lock(inode, 1);
768 inode_init_lvb(inode, &lvb);
770 /* merge timestamps the most resently obtained from mds with
771 timestamps obtained from osts */
772 lvb.lvb_atime = lli->lli_lvb.lvb_atime;
773 lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
774 lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
775 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
776 cl_isize_write_nolock(inode, lvb.lvb_size);
778 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
779 PFID(&lli->lli_fid), lvb.lvb_size);
780 inode->i_blocks = lvb.lvb_blocks;
782 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
783 LTIME_S(inode->i_atime) = lvb.lvb_atime;
784 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
785 ll_inode_size_unlock(inode, 1);
790 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
793 struct obdo obdo = { 0 };
796 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
798 st->st_size = obdo.o_size;
799 st->st_blocks = obdo.o_blocks;
800 st->st_mtime = obdo.o_mtime;
801 st->st_atime = obdo.o_atime;
802 st->st_ctime = obdo.o_ctime;
807 void ll_io_init(struct cl_io *io, const struct file *file, int write)
809 struct inode *inode = file->f_dentry->d_inode;
811 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
813 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
814 io->ci_obj = ll_i2info(inode)->lli_clob;
815 io->ci_lockreq = CILR_MAYBE;
816 if (ll_file_nolock(file)) {
817 io->ci_lockreq = CILR_NEVER;
818 io->ci_no_srvlock = 1;
819 } else if (file->f_flags & O_APPEND) {
820 io->ci_lockreq = CILR_MANDATORY;
824 static ssize_t ll_file_io_generic(const struct lu_env *env,
825 struct vvp_io_args *args, struct file *file,
826 enum cl_io_type iot, loff_t *ppos, size_t count)
828 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
833 io = ccc_env_thread_io(env);
834 ll_io_init(io, file, iot == CIT_WRITE);
836 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
837 struct vvp_io *vio = vvp_env_io(env);
838 struct ccc_io *cio = ccc_env_io(env);
839 int write_sem_locked = 0;
841 cio->cui_fd = LUSTRE_FPRIVATE(file);
842 vio->cui_io_subtype = args->via_io_subtype;
844 switch (vio->cui_io_subtype) {
846 cio->cui_iov = args->u.normal.via_iov;
847 cio->cui_nrsegs = args->u.normal.via_nrsegs;
848 cio->cui_tot_nrsegs = cio->cui_nrsegs;
849 #ifndef HAVE_FILE_WRITEV
850 cio->cui_iocb = args->u.normal.via_iocb;
852 if ((iot == CIT_WRITE) &&
853 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
854 if(cfs_down_interruptible(&lli->lli_write_sem))
855 GOTO(out, result = -ERESTARTSYS);
856 write_sem_locked = 1;
857 } else if (iot == CIT_READ) {
858 cfs_down_read(&lli->lli_trunc_sem);
862 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
863 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
866 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
867 vio->u.splice.cui_flags = args->u.splice.via_flags;
870 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
873 result = cl_io_loop(env, io);
874 if (write_sem_locked)
875 cfs_up(&lli->lli_write_sem);
876 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
877 cfs_up_read(&lli->lli_trunc_sem);
879 /* cl_io_rw_init() handled IO */
880 result = io->ci_result;
883 if (io->ci_nob > 0) {
885 *ppos = io->u.ci_wr.wr.crw_pos;
890 if (iot == CIT_WRITE)
891 lli->lli_write_rc = result < 0 ? : 0;
897 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
899 static int ll_file_get_iov_count(const struct iovec *iov,
900 unsigned long *nr_segs, size_t *count)
905 for (seg = 0; seg < *nr_segs; seg++) {
906 const struct iovec *iv = &iov[seg];
909 * If any segment has a negative length, or the cumulative
910 * length ever wraps negative then return -EINVAL.
913 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
915 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
920 cnt -= iv->iov_len; /* This segment is no good */
927 #ifdef HAVE_FILE_READV
928 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
929 unsigned long nr_segs, loff_t *ppos)
932 struct vvp_io_args *args;
938 result = ll_file_get_iov_count(iov, &nr_segs, &count);
942 env = cl_env_get(&refcheck);
944 RETURN(PTR_ERR(env));
946 args = vvp_env_args(env, IO_NORMAL);
947 args->u.normal.via_iov = (struct iovec *)iov;
948 args->u.normal.via_nrsegs = nr_segs;
950 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
951 cl_env_put(env, &refcheck);
955 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
959 struct iovec *local_iov;
964 env = cl_env_get(&refcheck);
966 RETURN(PTR_ERR(env));
968 local_iov = &vvp_env_info(env)->vti_local_iov;
969 local_iov->iov_base = (void __user *)buf;
970 local_iov->iov_len = count;
971 result = ll_file_readv(file, local_iov, 1, ppos);
972 cl_env_put(env, &refcheck);
977 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
978 unsigned long nr_segs, loff_t pos)
981 struct vvp_io_args *args;
987 result = ll_file_get_iov_count(iov, &nr_segs, &count);
991 env = cl_env_get(&refcheck);
993 RETURN(PTR_ERR(env));
995 args = vvp_env_args(env, IO_NORMAL);
996 args->u.normal.via_iov = (struct iovec *)iov;
997 args->u.normal.via_nrsegs = nr_segs;
998 args->u.normal.via_iocb = iocb;
1000 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1001 &iocb->ki_pos, count);
1002 cl_env_put(env, &refcheck);
1006 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1010 struct iovec *local_iov;
1011 struct kiocb *kiocb;
1016 env = cl_env_get(&refcheck);
1018 RETURN(PTR_ERR(env));
1020 local_iov = &vvp_env_info(env)->vti_local_iov;
1021 kiocb = &vvp_env_info(env)->vti_kiocb;
1022 local_iov->iov_base = (void __user *)buf;
1023 local_iov->iov_len = count;
1024 init_sync_kiocb(kiocb, file);
1025 kiocb->ki_pos = *ppos;
1026 kiocb->ki_left = count;
1028 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1029 *ppos = kiocb->ki_pos;
1031 cl_env_put(env, &refcheck);
1037 * Write to a file (through the page cache).
1039 #ifdef HAVE_FILE_WRITEV
1040 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1041 unsigned long nr_segs, loff_t *ppos)
1044 struct vvp_io_args *args;
1050 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1054 env = cl_env_get(&refcheck);
1056 RETURN(PTR_ERR(env));
1058 args = vvp_env_args(env, IO_NORMAL);
1059 args->u.normal.via_iov = (struct iovec *)iov;
1060 args->u.normal.via_nrsegs = nr_segs;
1062 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1063 cl_env_put(env, &refcheck);
1067 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1071 struct iovec *local_iov;
1076 env = cl_env_get(&refcheck);
1078 RETURN(PTR_ERR(env));
1080 local_iov = &vvp_env_info(env)->vti_local_iov;
1081 local_iov->iov_base = (void __user *)buf;
1082 local_iov->iov_len = count;
1084 result = ll_file_writev(file, local_iov, 1, ppos);
1085 cl_env_put(env, &refcheck);
1089 #else /* AIO stuff */
1090 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1091 unsigned long nr_segs, loff_t pos)
1094 struct vvp_io_args *args;
1100 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1104 env = cl_env_get(&refcheck);
1106 RETURN(PTR_ERR(env));
1108 args = vvp_env_args(env, IO_NORMAL);
1109 args->u.normal.via_iov = (struct iovec *)iov;
1110 args->u.normal.via_nrsegs = nr_segs;
1111 args->u.normal.via_iocb = iocb;
1113 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1114 &iocb->ki_pos, count);
1115 cl_env_put(env, &refcheck);
1119 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1123 struct iovec *local_iov;
1124 struct kiocb *kiocb;
1129 env = cl_env_get(&refcheck);
1131 RETURN(PTR_ERR(env));
1133 local_iov = &vvp_env_info(env)->vti_local_iov;
1134 kiocb = &vvp_env_info(env)->vti_kiocb;
1135 local_iov->iov_base = (void __user *)buf;
1136 local_iov->iov_len = count;
1137 init_sync_kiocb(kiocb, file);
1138 kiocb->ki_pos = *ppos;
1139 kiocb->ki_left = count;
1141 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1142 *ppos = kiocb->ki_pos;
1144 cl_env_put(env, &refcheck);
1150 #ifdef HAVE_KERNEL_SENDFILE
1152 * Send file content (through pagecache) somewhere with helper
1154 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1155 read_actor_t actor, void *target)
1158 struct vvp_io_args *args;
1163 env = cl_env_get(&refcheck);
1165 RETURN(PTR_ERR(env));
1167 args = vvp_env_args(env, IO_SENDFILE);
1168 args->u.sendfile.via_target = target;
1169 args->u.sendfile.via_actor = actor;
1171 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1172 cl_env_put(env, &refcheck);
1177 #ifdef HAVE_KERNEL_SPLICE_READ
1179 * Send file content (through pagecache) somewhere with helper
1181 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1182 struct pipe_inode_info *pipe, size_t count,
1186 struct vvp_io_args *args;
1191 env = cl_env_get(&refcheck);
1193 RETURN(PTR_ERR(env));
1195 args = vvp_env_args(env, IO_SPLICE);
1196 args->u.splice.via_pipe = pipe;
1197 args->u.splice.via_flags = flags;
1199 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1200 cl_env_put(env, &refcheck);
1205 static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
1208 struct obd_export *exp = ll_i2dtexp(inode);
1209 struct obd_trans_info oti = { 0 };
1210 struct obdo *oa = NULL;
1213 struct lov_stripe_md *lsm, *lsm2;
1220 ll_inode_size_lock(inode, 0);
1221 lsm = ll_i2info(inode)->lli_smd;
1223 GOTO(out, rc = -ENOENT);
1224 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1225 (lsm->lsm_stripe_count));
1227 OBD_ALLOC_LARGE(lsm2, lsm_size);
1229 GOTO(out, rc = -ENOMEM);
1233 oa->o_nlink = ost_idx;
1234 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1235 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1236 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
1237 OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1238 memcpy(lsm2, lsm, lsm_size);
1239 rc = obd_create(exp, oa, &lsm2, &oti);
1241 OBD_FREE_LARGE(lsm2, lsm_size);
1244 ll_inode_size_unlock(inode, 0);
1249 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1251 struct ll_recreate_obj ucreat;
1254 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1257 if (cfs_copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1258 sizeof(struct ll_recreate_obj)))
1261 RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
1262 ucreat.lrc_ost_idx));
1265 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1272 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1275 if (cfs_copy_from_user(&fid, (struct lu_fid *)arg,
1276 sizeof(struct lu_fid)))
1279 id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
1280 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1281 RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
1284 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1285 int flags, struct lov_user_md *lum, int lum_size)
1287 struct lov_stripe_md *lsm;
1288 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1292 ll_inode_size_lock(inode, 0);
1293 lsm = ll_i2info(inode)->lli_smd;
1295 ll_inode_size_unlock(inode, 0);
1296 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1301 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1304 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1305 GOTO(out_req_free, rc = -ENOENT);
1306 rc = oit.d.lustre.it_status;
1308 GOTO(out_req_free, rc);
1310 ll_release_openhandle(file->f_dentry, &oit);
1313 ll_inode_size_unlock(inode, 0);
1314 ll_intent_release(&oit);
1317 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1321 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1322 struct lov_mds_md **lmmp, int *lmm_size,
1323 struct ptlrpc_request **request)
1325 struct ll_sb_info *sbi = ll_i2sbi(inode);
1326 struct mdt_body *body;
1327 struct lov_mds_md *lmm = NULL;
1328 struct ptlrpc_request *req = NULL;
1329 struct md_op_data *op_data;
1332 rc = ll_get_max_mdsize(sbi, &lmmsize);
1336 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1337 strlen(filename), lmmsize,
1338 LUSTRE_OPC_ANY, NULL);
1339 if (op_data == NULL)
1342 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1343 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1344 ll_finish_md_op_data(op_data);
1346 CDEBUG(D_INFO, "md_getattr_name failed "
1347 "on %s: rc %d\n", filename, rc);
1351 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1352 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1354 lmmsize = body->eadatasize;
1356 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1358 GOTO(out, rc = -ENODATA);
1361 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1362 LASSERT(lmm != NULL);
1364 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1365 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1366 GOTO(out, rc = -EPROTO);
1370 * This is coming from the MDS, so is probably in
1371 * little endian. We convert it to host endian before
1372 * passing it to userspace.
1374 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1375 /* if function called for directory - we should
1376 * avoid swab not existent lsm objects */
1377 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1378 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1379 if (S_ISREG(body->mode))
1380 lustre_swab_lov_user_md_objects(
1381 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1382 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1383 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1384 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1385 if (S_ISREG(body->mode))
1386 lustre_swab_lov_user_md_objects(
1387 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1388 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1394 *lmm_size = lmmsize;
1399 static int ll_lov_setea(struct inode *inode, struct file *file,
1402 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1403 struct lov_user_md *lump;
1404 int lum_size = sizeof(struct lov_user_md) +
1405 sizeof(struct lov_user_ost_data);
1409 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1412 OBD_ALLOC_LARGE(lump, lum_size);
1416 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1417 OBD_FREE_LARGE(lump, lum_size);
1421 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1423 OBD_FREE_LARGE(lump, lum_size);
1427 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1430 struct lov_user_md_v3 lumv3;
1431 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1432 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1433 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1436 int flags = FMODE_WRITE;
1439 /* first try with v1 which is smaller than v3 */
1440 lum_size = sizeof(struct lov_user_md_v1);
1441 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1444 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1445 lum_size = sizeof(struct lov_user_md_v3);
1446 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1450 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1452 put_user(0, &lumv1p->lmm_stripe_count);
1453 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1454 0, ll_i2info(inode)->lli_smd,
1460 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1462 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1467 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1471 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1473 struct ll_inode_info *lli = ll_i2info(inode);
1474 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1475 struct ccc_grouplock grouplock;
1479 if (ll_file_nolock(file))
1480 RETURN(-EOPNOTSUPP);
1482 cfs_spin_lock(&lli->lli_lock);
1483 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1484 CWARN("group lock already existed with gid %lu\n",
1485 fd->fd_grouplock.cg_gid);
1486 cfs_spin_unlock(&lli->lli_lock);
1489 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1490 cfs_spin_unlock(&lli->lli_lock);
1492 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1493 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1497 cfs_spin_lock(&lli->lli_lock);
1498 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1499 cfs_spin_unlock(&lli->lli_lock);
1500 CERROR("another thread just won the race\n");
1501 cl_put_grouplock(&grouplock);
1505 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1506 fd->fd_grouplock = grouplock;
1507 cfs_spin_unlock(&lli->lli_lock);
1509 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1513 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1515 struct ll_inode_info *lli = ll_i2info(inode);
1516 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1517 struct ccc_grouplock grouplock;
1520 cfs_spin_lock(&lli->lli_lock);
1521 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1522 cfs_spin_unlock(&lli->lli_lock);
1523 CWARN("no group lock held\n");
1526 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1528 if (fd->fd_grouplock.cg_gid != arg) {
1529 CWARN("group lock %lu doesn't match current id %lu\n",
1530 arg, fd->fd_grouplock.cg_gid);
1531 cfs_spin_unlock(&lli->lli_lock);
1535 grouplock = fd->fd_grouplock;
1536 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1537 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1538 cfs_spin_unlock(&lli->lli_lock);
1540 cl_put_grouplock(&grouplock);
1541 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1546 * Close inode open handle
1548 * \param dentry [in] dentry which contains the inode
1549 * \param it [in,out] intent which contains open info and result
1552 * \retval <0 failure
1554 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1556 struct inode *inode = dentry->d_inode;
1557 struct obd_client_handle *och;
1563 /* Root ? Do nothing. */
1564 if (dentry->d_inode->i_sb->s_root == dentry)
1567 /* No open handle to close? Move away */
1568 if (!it_disposition(it, DISP_OPEN_OPEN))
1571 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1573 OBD_ALLOC(och, sizeof(*och));
1575 GOTO(out, rc = -ENOMEM);
1577 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1578 ll_i2info(inode), it, och);
1580 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1583 /* this one is in place of ll_file_open */
1584 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1585 ptlrpc_req_finished(it->d.lustre.it_data);
1586 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1591 * Get size for inode for which FIEMAP mapping is requested.
1592 * Make the FIEMAP get_info call and returns the result.
1594 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1597 struct obd_export *exp = ll_i2dtexp(inode);
1598 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1599 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1600 int vallen = num_bytes;
1604 /* Checks for fiemap flags */
1605 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1606 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1610 /* Check for FIEMAP_FLAG_SYNC */
1611 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1612 rc = filemap_fdatawrite(inode->i_mapping);
1617 /* If the stripe_count > 1 and the application does not understand
1618 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1620 if (lsm->lsm_stripe_count > 1 &&
1621 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1624 fm_key.oa.o_id = lsm->lsm_object_id;
1625 fm_key.oa.o_seq = lsm->lsm_object_seq;
1626 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1628 obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
1630 /* If filesize is 0, then there would be no objects for mapping */
1631 if (fm_key.oa.o_size == 0) {
1632 fiemap->fm_mapped_extents = 0;
1636 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1638 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1640 CERROR("obd_get_info failed: rc = %d\n", rc);
1645 int ll_fid2path(struct obd_export *exp, void *arg)
1647 struct getinfo_fid2path *gfout, *gfin;
1651 /* Need to get the buflen */
1652 OBD_ALLOC_PTR(gfin);
1655 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1660 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1661 OBD_ALLOC(gfout, outsize);
1662 if (gfout == NULL) {
1666 memcpy(gfout, gfin, sizeof(*gfout));
1669 /* Call mdc_iocontrol */
1670 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1673 if (cfs_copy_to_user(arg, gfout, outsize))
1677 OBD_FREE(gfout, outsize);
1681 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1683 struct ll_user_fiemap *fiemap_s;
1684 size_t num_bytes, ret_bytes;
1685 unsigned int extent_count;
1688 /* Get the extent count so we can calculate the size of
1689 * required fiemap buffer */
1690 if (get_user(extent_count,
1691 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1693 num_bytes = sizeof(*fiemap_s) + (extent_count *
1694 sizeof(struct ll_fiemap_extent));
1696 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1697 if (fiemap_s == NULL)
1700 /* get the fiemap value */
1701 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1703 GOTO(error, rc = -EFAULT);
1705 /* If fm_extent_count is non-zero, read the first extent since
1706 * it is used to calculate end_offset and device from previous
1709 if (copy_from_user(&fiemap_s->fm_extents[0],
1710 (char __user *)arg + sizeof(*fiemap_s),
1711 sizeof(struct ll_fiemap_extent)))
1712 GOTO(error, rc = -EFAULT);
1715 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1719 ret_bytes = sizeof(struct ll_user_fiemap);
1721 if (extent_count != 0)
1722 ret_bytes += (fiemap_s->fm_mapped_extents *
1723 sizeof(struct ll_fiemap_extent));
1725 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1729 OBD_FREE_LARGE(fiemap_s, num_bytes);
1733 #ifdef HAVE_UNLOCKED_IOCTL
1734 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1736 struct inode *inode = file->f_dentry->d_inode;
1738 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1742 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1746 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1747 inode->i_generation, inode, cmd);
1748 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1750 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1751 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1755 case LL_IOC_GETFLAGS:
1756 /* Get the current value of the file flags */
1757 return put_user(fd->fd_flags, (int *)arg);
1758 case LL_IOC_SETFLAGS:
1759 case LL_IOC_CLRFLAGS:
1760 /* Set or clear specific file flags */
1761 /* XXX This probably needs checks to ensure the flags are
1762 * not abused, and to handle any flag side effects.
1764 if (get_user(flags, (int *) arg))
1767 if (cmd == LL_IOC_SETFLAGS) {
1768 if ((flags & LL_FILE_IGNORE_LOCK) &&
1769 !(file->f_flags & O_DIRECT)) {
1770 CERROR("%s: unable to disable locking on "
1771 "non-O_DIRECT file\n", current->comm);
1775 fd->fd_flags |= flags;
1777 fd->fd_flags &= ~flags;
1780 case LL_IOC_LOV_SETSTRIPE:
1781 RETURN(ll_lov_setstripe(inode, file, arg));
1782 case LL_IOC_LOV_SETEA:
1783 RETURN(ll_lov_setea(inode, file, arg));
1784 case LL_IOC_LOV_GETSTRIPE:
1785 RETURN(ll_lov_getstripe(inode, arg));
1786 case LL_IOC_RECREATE_OBJ:
1787 RETURN(ll_lov_recreate_obj(inode, arg));
1788 case LL_IOC_RECREATE_FID:
1789 RETURN(ll_lov_recreate_fid(inode, arg));
1790 case FSFILT_IOC_FIEMAP:
1791 RETURN(ll_ioctl_fiemap(inode, arg));
1792 case FSFILT_IOC_GETFLAGS:
1793 case FSFILT_IOC_SETFLAGS:
1794 RETURN(ll_iocontrol(inode, file, cmd, arg));
1795 case FSFILT_IOC_GETVERSION_OLD:
1796 case FSFILT_IOC_GETVERSION:
1797 RETURN(put_user(inode->i_generation, (int *)arg));
1798 case LL_IOC_GROUP_LOCK:
1799 RETURN(ll_get_grouplock(inode, file, arg));
1800 case LL_IOC_GROUP_UNLOCK:
1801 RETURN(ll_put_grouplock(inode, file, arg));
1802 case IOC_OBD_STATFS:
1803 RETURN(ll_obd_statfs(inode, (void *)arg));
1805 /* We need to special case any other ioctls we want to handle,
1806 * to send them to the MDS/OST as appropriate and to properly
1807 * network encode the arg field.
1808 case FSFILT_IOC_SETVERSION_OLD:
1809 case FSFILT_IOC_SETVERSION:
1811 case LL_IOC_FLUSHCTX:
1812 RETURN(ll_flush_ctx(inode));
1813 case LL_IOC_PATH2FID: {
1814 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1815 sizeof(struct lu_fid)))
1820 case OBD_IOC_FID2PATH:
1821 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1823 case LL_IOC_GET_MDTIDX: {
1826 mdtidx = ll_get_mdt_idx(inode);
1830 if (put_user((int)mdtidx, (int*)arg))
1840 ll_iocontrol_call(inode, file, cmd, arg, &err))
1843 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1849 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1851 struct inode *inode = file->f_dentry->d_inode;
1854 retval = offset + ((origin == 2) ? i_size_read(inode) :
1855 (origin == 1) ? file->f_pos : 0);
1856 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1857 inode->i_ino, inode->i_generation, inode, retval, retval,
1858 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1859 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1861 if (origin == 2) { /* SEEK_END */
1862 int nonblock = 0, rc;
1864 if (file->f_flags & O_NONBLOCK)
1865 nonblock = LDLM_FL_BLOCK_NOWAIT;
1867 rc = cl_glimpse_size(inode);
1871 offset += i_size_read(inode);
1872 } else if (origin == 1) { /* SEEK_CUR */
1873 offset += file->f_pos;
1877 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1878 if (offset != file->f_pos) {
1879 file->f_pos = offset;
1887 #ifdef HAVE_FLUSH_OWNER_ID
1888 int ll_flush(struct file *file, fl_owner_t id)
1890 int ll_flush(struct file *file)
1893 struct inode *inode = file->f_dentry->d_inode;
1894 struct ll_inode_info *lli = ll_i2info(inode);
1895 struct lov_stripe_md *lsm = lli->lli_smd;
1898 /* the application should know write failure already. */
1899 if (lli->lli_write_rc)
1902 /* catch async errors that were recorded back when async writeback
1903 * failed for pages in this mapping. */
1904 rc = lli->lli_async_rc;
1905 lli->lli_async_rc = 0;
1907 err = lov_test_and_clear_async_rc(lsm);
1912 return rc ? -EIO : 0;
1915 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1917 struct inode *inode = dentry->d_inode;
1918 struct ll_inode_info *lli = ll_i2info(inode);
1919 struct lov_stripe_md *lsm = lli->lli_smd;
1920 struct ptlrpc_request *req;
1921 struct obd_capa *oc;
1924 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1925 inode->i_generation, inode);
1926 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1928 /* fsync's caller has already called _fdata{sync,write}, we want
1929 * that IO to finish before calling the osc and mdc sync methods */
1930 rc = filemap_fdatawait(inode->i_mapping);
1932 /* catch async errors that were recorded back when async writeback
1933 * failed for pages in this mapping. */
1934 err = lli->lli_async_rc;
1935 lli->lli_async_rc = 0;
1939 err = lov_test_and_clear_async_rc(lsm);
1944 oc = ll_mdscapa_get(inode);
1945 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1951 ptlrpc_req_finished(req);
1954 struct obd_info *oinfo;
1956 OBD_ALLOC_PTR(oinfo);
1958 RETURN(rc ? rc : -ENOMEM);
1959 OBDO_ALLOC(oinfo->oi_oa);
1960 if (!oinfo->oi_oa) {
1961 OBD_FREE_PTR(oinfo);
1962 RETURN(rc ? rc : -ENOMEM);
1964 oinfo->oi_oa->o_id = lsm->lsm_object_id;
1965 oinfo->oi_oa->o_seq = lsm->lsm_object_seq;
1966 oinfo->oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1967 obdo_from_inode(oinfo->oi_oa, inode, &ll_i2info(inode)->lli_fid,
1968 OBD_MD_FLTYPE | OBD_MD_FLATIME |
1969 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1972 oinfo->oi_capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1973 err = obd_sync_rqset(ll_i2sbi(inode)->ll_dt_exp, oinfo, 0,
1975 capa_put(oinfo->oi_capa);
1978 OBDO_FREE(oinfo->oi_oa);
1979 OBD_FREE_PTR(oinfo);
1980 lli->lli_write_rc = err < 0 ? : 0;
1986 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1988 struct inode *inode = file->f_dentry->d_inode;
1989 struct ll_sb_info *sbi = ll_i2sbi(inode);
1990 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1991 .ei_cb_cp =ldlm_flock_completion_ast,
1992 .ei_cbdata = file_lock };
1993 struct md_op_data *op_data;
1994 struct lustre_handle lockh = {0};
1995 ldlm_policy_data_t flock = {{0}};
2000 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2001 inode->i_ino, file_lock);
2003 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2005 if (file_lock->fl_flags & FL_FLOCK) {
2006 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2007 /* flocks are whole-file locks */
2008 flock.l_flock.end = OFFSET_MAX;
2009 /* For flocks owner is determined by the local file desctiptor*/
2010 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2011 } else if (file_lock->fl_flags & FL_POSIX) {
2012 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2013 flock.l_flock.start = file_lock->fl_start;
2014 flock.l_flock.end = file_lock->fl_end;
2018 flock.l_flock.pid = file_lock->fl_pid;
2020 switch (file_lock->fl_type) {
2022 einfo.ei_mode = LCK_PR;
2025 /* An unlock request may or may not have any relation to
2026 * existing locks so we may not be able to pass a lock handle
2027 * via a normal ldlm_lock_cancel() request. The request may even
2028 * unlock a byte range in the middle of an existing lock. In
2029 * order to process an unlock request we need all of the same
2030 * information that is given with a normal read or write record
2031 * lock request. To avoid creating another ldlm unlock (cancel)
2032 * message we'll treat a LCK_NL flock request as an unlock. */
2033 einfo.ei_mode = LCK_NL;
2036 einfo.ei_mode = LCK_PW;
2039 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2054 flags = LDLM_FL_BLOCK_NOWAIT;
2060 flags = LDLM_FL_TEST_LOCK;
2061 /* Save the old mode so that if the mode in the lock changes we
2062 * can decrement the appropriate reader or writer refcount. */
2063 file_lock->fl_type = einfo.ei_mode;
2066 CERROR("unknown fcntl lock command: %d\n", cmd);
2070 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2071 LUSTRE_OPC_ANY, NULL);
2072 if (IS_ERR(op_data))
2073 RETURN(PTR_ERR(op_data));
2075 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2076 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2077 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2079 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2080 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2082 ll_finish_md_op_data(op_data);
2084 if ((file_lock->fl_flags & FL_FLOCK) &&
2085 (rc == 0 || file_lock->fl_type == F_UNLCK))
2086 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2087 #ifdef HAVE_F_OP_FLOCK
2088 if ((file_lock->fl_flags & FL_POSIX) &&
2089 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2090 !(flags & LDLM_FL_TEST_LOCK))
2091 posix_lock_file_wait(file, file_lock);
2097 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2104 int ll_have_md_lock(struct inode *inode, __u64 bits, ldlm_mode_t l_req_mode)
2106 struct lustre_handle lockh;
2107 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2108 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2109 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2117 fid = &ll_i2info(inode)->lli_fid;
2118 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2119 ldlm_lockname[mode]);
2121 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2122 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2129 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2130 struct lustre_handle *lockh)
2132 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2138 fid = &ll_i2info(inode)->lli_fid;
2139 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2141 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2142 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2143 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2147 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2148 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2149 * and return success */
2151 /* This path cannot be hit for regular files unless in
2152 * case of obscure races, so no need to to validate
2154 if (!S_ISREG(inode->i_mode) &&
2155 !S_ISDIR(inode->i_mode))
2160 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2168 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2171 struct inode *inode = dentry->d_inode;
2172 struct ptlrpc_request *req = NULL;
2173 struct ll_sb_info *sbi;
2174 struct obd_export *exp;
2179 CERROR("REPORT THIS LINE TO PETER\n");
2182 sbi = ll_i2sbi(inode);
2184 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2185 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2187 exp = ll_i2mdexp(inode);
2189 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2190 * But under CMD case, it caused some lock issues, should be fixed
2191 * with new CMD ibits lock. See bug 12718 */
2192 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2193 struct lookup_intent oit = { .it_op = IT_GETATTR };
2194 struct md_op_data *op_data;
2196 if (ibits == MDS_INODELOCK_LOOKUP)
2197 oit.it_op = IT_LOOKUP;
2199 /* Call getattr by fid, so do not provide name at all. */
2200 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2201 dentry->d_inode, NULL, 0, 0,
2202 LUSTRE_OPC_ANY, NULL);
2203 if (IS_ERR(op_data))
2204 RETURN(PTR_ERR(op_data));
2206 oit.it_create_mode |= M_CHECK_STALE;
2207 rc = md_intent_lock(exp, op_data, NULL, 0,
2208 /* we are not interested in name
2211 ll_md_blocking_ast, 0);
2212 ll_finish_md_op_data(op_data);
2213 oit.it_create_mode &= ~M_CHECK_STALE;
2215 rc = ll_inode_revalidate_fini(inode, rc);
2219 rc = ll_revalidate_it_finish(req, &oit, dentry);
2221 ll_intent_release(&oit);
2225 /* Unlinked? Unhash dentry, so it is not picked up later by
2226 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2227 here to preserve get_cwd functionality on 2.6.
2229 if (!dentry->d_inode->i_nlink) {
2230 cfs_spin_lock(&ll_lookup_lock);
2231 spin_lock(&dcache_lock);
2232 ll_drop_dentry(dentry);
2233 spin_unlock(&dcache_lock);
2234 cfs_spin_unlock(&ll_lookup_lock);
2237 ll_lookup_finish_locks(&oit, dentry);
2238 } else if (!ll_have_md_lock(dentry->d_inode, ibits, LCK_MINMODE)) {
2239 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2240 obd_valid valid = OBD_MD_FLGETATTR;
2241 struct md_op_data *op_data;
2244 if (S_ISREG(inode->i_mode)) {
2245 rc = ll_get_max_mdsize(sbi, &ealen);
2248 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2251 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2252 0, ealen, LUSTRE_OPC_ANY,
2254 if (op_data == NULL)
2257 op_data->op_valid = valid;
2258 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2259 * capa for this inode. Because we only keep capas of dirs
2261 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2262 ll_finish_md_op_data(op_data);
2264 rc = ll_inode_revalidate_fini(inode, rc);
2268 rc = ll_prep_inode(&inode, req, NULL);
2271 ptlrpc_req_finished(req);
2275 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2278 struct inode *inode = dentry->d_inode;
2282 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2284 /* if object not yet allocated, don't validate size */
2285 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
2286 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2287 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2288 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2292 /* cl_glimpse_size will prefer locally cached writes if they extend
2296 rc = cl_glimpse_size(inode);
2301 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2302 struct lookup_intent *it, struct kstat *stat)
2304 struct inode *inode = de->d_inode;
2305 struct ll_sb_info *sbi = ll_i2sbi(inode);
2306 struct ll_inode_info *lli = ll_i2info(inode);
2309 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
2310 MDS_INODELOCK_LOOKUP);
2311 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
2316 stat->dev = inode->i_sb->s_dev;
2317 if (ll_need_32bit_api(sbi))
2318 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
2320 stat->ino = inode->i_ino;
2321 stat->mode = inode->i_mode;
2322 stat->nlink = inode->i_nlink;
2323 stat->uid = inode->i_uid;
2324 stat->gid = inode->i_gid;
2325 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2326 stat->atime = inode->i_atime;
2327 stat->mtime = inode->i_mtime;
2328 stat->ctime = inode->i_ctime;
2329 #ifdef HAVE_INODE_BLKSIZE
2330 stat->blksize = inode->i_blksize;
2332 stat->blksize = 1 << inode->i_blkbits;
2335 stat->size = i_size_read(inode);
2336 stat->blocks = inode->i_blocks;
2340 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2342 struct lookup_intent it = { .it_op = IT_GETATTR };
2344 return ll_getattr_it(mnt, de, &it, stat);
2347 #ifdef HAVE_LINUX_FIEMAP_H
2348 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2349 __u64 start, __u64 len)
2353 struct ll_user_fiemap *fiemap;
2354 unsigned int extent_count = fieinfo->fi_extents_max;
2356 num_bytes = sizeof(*fiemap) + (extent_count *
2357 sizeof(struct ll_fiemap_extent));
2358 OBD_ALLOC_LARGE(fiemap, num_bytes);
2363 fiemap->fm_flags = fieinfo->fi_flags;
2364 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2365 fiemap->fm_start = start;
2366 fiemap->fm_length = len;
2367 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2368 sizeof(struct ll_fiemap_extent));
2370 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2372 fieinfo->fi_flags = fiemap->fm_flags;
2373 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2374 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2375 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2377 OBD_FREE_LARGE(fiemap, num_bytes);
2384 int lustre_check_acl(struct inode *inode, int mask)
2386 #ifdef CONFIG_FS_POSIX_ACL
2387 struct ll_inode_info *lli = ll_i2info(inode);
2388 struct posix_acl *acl;
2392 cfs_spin_lock(&lli->lli_lock);
2393 acl = posix_acl_dup(lli->lli_posix_acl);
2394 cfs_spin_unlock(&lli->lli_lock);
2399 rc = posix_acl_permission(inode, acl, mask);
2400 posix_acl_release(acl);
2408 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2409 #ifndef HAVE_INODE_PERMISION_2ARGS
2410 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2412 int ll_inode_permission(struct inode *inode, int mask)
2418 /* as root inode are NOT getting validated in lookup operation,
2419 * need to do it before permission check. */
2421 if (inode == inode->i_sb->s_root->d_inode) {
2422 struct lookup_intent it = { .it_op = IT_LOOKUP };
2424 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2425 MDS_INODELOCK_LOOKUP);
2430 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2431 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2433 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2434 return lustre_check_remote_perm(inode, mask);
2436 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2437 rc = generic_permission(inode, mask, lustre_check_acl);
2442 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2444 int mode = inode->i_mode;
2447 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2448 inode->i_ino, inode->i_generation, inode, mask);
2450 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2451 return lustre_check_remote_perm(inode, mask);
2453 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2455 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2456 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2458 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2460 if (cfs_curproc_fsuid() == inode->i_uid) {
2463 if (((mode >> 3) & mask & S_IRWXO) != mask)
2465 rc = lustre_check_acl(inode, mask);
2469 goto check_capabilities;
2473 if (cfs_curproc_is_in_groups(inode->i_gid))
2476 if ((mode & mask & S_IRWXO) == mask)
2480 if (!(mask & MAY_EXEC) ||
2481 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2482 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2485 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2486 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2493 #ifdef HAVE_FILE_READV
2494 #define READ_METHOD readv
2495 #define READ_FUNCTION ll_file_readv
2496 #define WRITE_METHOD writev
2497 #define WRITE_FUNCTION ll_file_writev
2499 #define READ_METHOD aio_read
2500 #define READ_FUNCTION ll_file_aio_read
2501 #define WRITE_METHOD aio_write
2502 #define WRITE_FUNCTION ll_file_aio_write
2505 /* -o localflock - only provides locally consistent flock locks */
2506 struct file_operations ll_file_operations = {
2507 .read = ll_file_read,
2508 .READ_METHOD = READ_FUNCTION,
2509 .write = ll_file_write,
2510 .WRITE_METHOD = WRITE_FUNCTION,
2511 #ifdef HAVE_UNLOCKED_IOCTL
2512 .unlocked_ioctl = ll_file_ioctl,
2514 .ioctl = ll_file_ioctl,
2516 .open = ll_file_open,
2517 .release = ll_file_release,
2518 .mmap = ll_file_mmap,
2519 .llseek = ll_file_seek,
2520 #ifdef HAVE_KERNEL_SENDFILE
2521 .sendfile = ll_file_sendfile,
2523 #ifdef HAVE_KERNEL_SPLICE_READ
2524 .splice_read = ll_file_splice_read,
2530 struct file_operations ll_file_operations_flock = {
2531 .read = ll_file_read,
2532 .READ_METHOD = READ_FUNCTION,
2533 .write = ll_file_write,
2534 .WRITE_METHOD = WRITE_FUNCTION,
2535 #ifdef HAVE_UNLOCKED_IOCTL
2536 .unlocked_ioctl = ll_file_ioctl,
2538 .ioctl = ll_file_ioctl,
2540 .open = ll_file_open,
2541 .release = ll_file_release,
2542 .mmap = ll_file_mmap,
2543 .llseek = ll_file_seek,
2544 #ifdef HAVE_KERNEL_SENDFILE
2545 .sendfile = ll_file_sendfile,
2547 #ifdef HAVE_KERNEL_SPLICE_READ
2548 .splice_read = ll_file_splice_read,
2552 #ifdef HAVE_F_OP_FLOCK
2553 .flock = ll_file_flock,
2555 .lock = ll_file_flock
2558 /* These are for -o noflock - to return ENOSYS on flock calls */
2559 struct file_operations ll_file_operations_noflock = {
2560 .read = ll_file_read,
2561 .READ_METHOD = READ_FUNCTION,
2562 .write = ll_file_write,
2563 .WRITE_METHOD = WRITE_FUNCTION,
2564 #ifdef HAVE_UNLOCKED_IOCTL
2565 .unlocked_ioctl = ll_file_ioctl,
2567 .ioctl = ll_file_ioctl,
2569 .open = ll_file_open,
2570 .release = ll_file_release,
2571 .mmap = ll_file_mmap,
2572 .llseek = ll_file_seek,
2573 #ifdef HAVE_KERNEL_SENDFILE
2574 .sendfile = ll_file_sendfile,
2576 #ifdef HAVE_KERNEL_SPLICE_READ
2577 .splice_read = ll_file_splice_read,
2581 #ifdef HAVE_F_OP_FLOCK
2582 .flock = ll_file_noflock,
2584 .lock = ll_file_noflock
2587 struct inode_operations ll_file_inode_operations = {
2588 .setattr = ll_setattr,
2589 .truncate = ll_truncate,
2590 .getattr = ll_getattr,
2591 .permission = ll_inode_permission,
2592 .setxattr = ll_setxattr,
2593 .getxattr = ll_getxattr,
2594 .listxattr = ll_listxattr,
2595 .removexattr = ll_removexattr,
2596 #ifdef HAVE_LINUX_FIEMAP_H
2597 .fiemap = ll_fiemap,
2601 /* dynamic ioctl number support routins */
2602 static struct llioc_ctl_data {
2603 cfs_rw_semaphore_t ioc_sem;
2604 cfs_list_t ioc_head;
2606 __RWSEM_INITIALIZER(llioc.ioc_sem),
2607 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2612 cfs_list_t iocd_list;
2613 unsigned int iocd_size;
2614 llioc_callback_t iocd_cb;
2615 unsigned int iocd_count;
2616 unsigned int iocd_cmd[0];
2619 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2622 struct llioc_data *in_data = NULL;
2625 if (cb == NULL || cmd == NULL ||
2626 count > LLIOC_MAX_CMD || count < 0)
2629 size = sizeof(*in_data) + count * sizeof(unsigned int);
2630 OBD_ALLOC(in_data, size);
2631 if (in_data == NULL)
2634 memset(in_data, 0, sizeof(*in_data));
2635 in_data->iocd_size = size;
2636 in_data->iocd_cb = cb;
2637 in_data->iocd_count = count;
2638 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2640 cfs_down_write(&llioc.ioc_sem);
2641 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2642 cfs_up_write(&llioc.ioc_sem);
2647 void ll_iocontrol_unregister(void *magic)
2649 struct llioc_data *tmp;
2654 cfs_down_write(&llioc.ioc_sem);
2655 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2657 unsigned int size = tmp->iocd_size;
2659 cfs_list_del(&tmp->iocd_list);
2660 cfs_up_write(&llioc.ioc_sem);
2662 OBD_FREE(tmp, size);
2666 cfs_up_write(&llioc.ioc_sem);
2668 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2671 EXPORT_SYMBOL(ll_iocontrol_register);
2672 EXPORT_SYMBOL(ll_iocontrol_unregister);
2674 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2675 unsigned int cmd, unsigned long arg, int *rcp)
2677 enum llioc_iter ret = LLIOC_CONT;
2678 struct llioc_data *data;
2679 int rc = -EINVAL, i;
2681 cfs_down_read(&llioc.ioc_sem);
2682 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2683 for (i = 0; i < data->iocd_count; i++) {
2684 if (cmd != data->iocd_cmd[i])
2687 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2691 if (ret == LLIOC_STOP)
2694 cfs_up_read(&llioc.ioc_sem);