1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
87 * Closes the IO epoch and packs all the attributes into @op_data for
90 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
91 struct obd_client_handle *och)
95 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
96 ATTR_MTIME_SET | ATTR_CTIME_SET;
98 if (!(och->och_flags & FMODE_WRITE))
101 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
102 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
104 ll_ioepoch_close(inode, op_data, &och, 0);
107 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
108 ll_prep_md_op_data(op_data, inode, NULL, NULL,
109 0, 0, LUSTRE_OPC_ANY, NULL);
113 static int ll_close_inode_openhandle(struct obd_export *md_exp,
115 struct obd_client_handle *och)
117 struct obd_export *exp = ll_i2mdexp(inode);
118 struct md_op_data *op_data;
119 struct ptlrpc_request *req = NULL;
120 struct obd_device *obd = class_exp2obd(exp);
127 * XXX: in case of LMV, is this correct to access
130 CERROR("Invalid MDC connection handle "LPX64"\n",
131 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_som_update(inode, op_data);
149 CERROR("inode %lu mdc Size-on-MDS update failed: "
150 "rc = %d\n", inode->i_ino, rc);
154 CERROR("inode %lu mdc close failed: rc = %d\n",
157 ll_finish_md_op_data(op_data);
160 rc = ll_objects_destroy(req, inode);
162 CERROR("inode %lu ll_objects destroy: rc = %d\n",
169 if (exp_connect_som(exp) && !epoch_close &&
170 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
171 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
173 md_clear_open_replay_data(md_exp, och);
174 /* Free @och if it is not waiting for DONE_WRITING. */
175 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
178 if (req) /* This is close request */
179 ptlrpc_req_finished(req);
183 int ll_md_real_close(struct inode *inode, int flags)
185 struct ll_inode_info *lli = ll_i2info(inode);
186 struct obd_client_handle **och_p;
187 struct obd_client_handle *och;
192 if (flags & FMODE_WRITE) {
193 och_p = &lli->lli_mds_write_och;
194 och_usecount = &lli->lli_open_fd_write_count;
195 } else if (flags & FMODE_EXEC) {
196 och_p = &lli->lli_mds_exec_och;
197 och_usecount = &lli->lli_open_fd_exec_count;
199 LASSERT(flags & FMODE_READ);
200 och_p = &lli->lli_mds_read_och;
201 och_usecount = &lli->lli_open_fd_read_count;
204 cfs_down(&lli->lli_och_sem);
205 if (*och_usecount) { /* There are still users of this handle, so
207 cfs_up(&lli->lli_och_sem);
212 cfs_up(&lli->lli_och_sem);
214 if (och) { /* There might be a race and somebody have freed this och
216 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
223 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
226 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
227 struct ll_inode_info *lli = ll_i2info(inode);
231 /* clear group lock, if present */
232 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
233 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
235 /* Let's see if we have good enough OPEN lock on the file and if
236 we can skip talking to MDS */
237 if (file->f_dentry->d_inode) { /* Can this ever be false? */
239 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
240 struct lustre_handle lockh;
241 struct inode *inode = file->f_dentry->d_inode;
242 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
244 cfs_down(&lli->lli_och_sem);
245 if (fd->fd_omode & FMODE_WRITE) {
247 LASSERT(lli->lli_open_fd_write_count);
248 lli->lli_open_fd_write_count--;
249 } else if (fd->fd_omode & FMODE_EXEC) {
251 LASSERT(lli->lli_open_fd_exec_count);
252 lli->lli_open_fd_exec_count--;
255 LASSERT(lli->lli_open_fd_read_count);
256 lli->lli_open_fd_read_count--;
258 cfs_up(&lli->lli_och_sem);
260 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
261 LDLM_IBITS, &policy, lockmode,
263 rc = ll_md_real_close(file->f_dentry->d_inode,
267 CERROR("Releasing a file %p with negative dentry %p. Name %s",
268 file, file->f_dentry, file->f_dentry->d_name.name);
271 LUSTRE_FPRIVATE(file) = NULL;
272 ll_file_data_put(fd);
273 ll_capa_close(inode);
278 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
280 /* While this returns an error code, fput() the caller does not, so we need
281 * to make every effort to clean up all of our state here. Also, applications
282 * rarely check close errors and even if an error is returned they will not
283 * re-try the close call.
285 int ll_file_release(struct inode *inode, struct file *file)
287 struct ll_file_data *fd;
288 struct ll_sb_info *sbi = ll_i2sbi(inode);
289 struct ll_inode_info *lli = ll_i2info(inode);
290 struct lov_stripe_md *lsm = lli->lli_smd;
294 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
295 inode->i_generation, inode);
297 #ifdef CONFIG_FS_POSIX_ACL
298 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
299 inode == inode->i_sb->s_root->d_inode) {
300 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
303 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
304 fd->fd_flags &= ~LL_FILE_RMTACL;
305 rct_del(&sbi->ll_rct, cfs_curproc_pid());
306 et_search_free(&sbi->ll_et, cfs_curproc_pid());
311 if (inode->i_sb->s_root != file->f_dentry)
312 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
313 fd = LUSTRE_FPRIVATE(file);
316 /* The last ref on @file, maybe not the the owner pid of statahead.
317 * Different processes can open the same dir, "ll_opendir_key" means:
318 * it is me that should stop the statahead thread. */
319 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
320 ll_stop_statahead(inode, lli->lli_opendir_key);
322 if (inode->i_sb->s_root == file->f_dentry) {
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
329 lov_test_and_clear_async_rc(lsm);
330 lli->lli_async_rc = 0;
332 rc = ll_md_close(sbi->ll_md_exp, inode, file);
334 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
335 libcfs_debug_dumplog();
340 static int ll_intent_file_open(struct file *file, void *lmm,
341 int lmmsize, struct lookup_intent *itp)
343 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
344 struct dentry *parent = file->f_dentry->d_parent;
345 const char *name = file->f_dentry->d_name.name;
346 const int len = file->f_dentry->d_name.len;
347 struct md_op_data *op_data;
348 struct ptlrpc_request *req;
355 /* Usually we come here only for NFSD, and we want open lock.
356 But we can also get here with pre 2.6.15 patchless kernels, and in
357 that case that lock is also ok */
358 /* We can also get here if there was cached open handle in revalidate_it
359 * but it disappeared while we were getting from there to ll_file_open.
360 * But this means this file was closed and immediatelly opened which
361 * makes a good candidate for using OPEN lock */
362 /* If lmmsize & lmm are not 0, we are just setting stripe info
363 * parameters. No need for the open lock */
364 if (!lmm && !lmmsize)
365 itp->it_flags |= MDS_OPEN_LOCK;
367 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
368 file->f_dentry->d_inode, name, len,
369 O_RDWR, LUSTRE_OPC_ANY, NULL);
371 RETURN(PTR_ERR(op_data));
373 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
374 0 /*unused */, &req, ll_md_blocking_ast, 0);
375 ll_finish_md_op_data(op_data);
377 /* reason for keep own exit path - don`t flood log
378 * with messages with -ESTALE errors.
380 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
381 it_open_error(DISP_OPEN_OPEN, itp))
383 ll_release_openhandle(file->f_dentry, itp);
387 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
388 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
389 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
393 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
394 if (!rc && itp->d.lustre.it_lock_mode)
395 md_set_lock_data(sbi->ll_md_exp,
396 &itp->d.lustre.it_lock_handle,
397 file->f_dentry->d_inode, NULL);
400 ptlrpc_req_finished(itp->d.lustre.it_data);
401 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
402 ll_intent_drop_lock(itp);
408 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
409 * not believe attributes if a few ioepoch holders exist. Attributes for
410 * previous ioepoch if new one is opened are also skipped by MDS.
412 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
414 if (ioepoch && lli->lli_ioepoch != ioepoch) {
415 lli->lli_ioepoch = ioepoch;
416 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
417 ioepoch, PFID(&lli->lli_fid));
421 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
422 struct lookup_intent *it, struct obd_client_handle *och)
424 struct ptlrpc_request *req = it->d.lustre.it_data;
425 struct mdt_body *body;
429 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
430 LASSERT(body != NULL); /* reply already checked out */
432 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
433 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
434 och->och_fid = lli->lli_fid;
435 och->och_flags = it->it_flags;
436 ll_ioepoch_open(lli, body->ioepoch);
438 return md_set_open_replay_data(md_exp, och, req);
441 int ll_local_open(struct file *file, struct lookup_intent *it,
442 struct ll_file_data *fd, struct obd_client_handle *och)
444 struct inode *inode = file->f_dentry->d_inode;
445 struct ll_inode_info *lli = ll_i2info(inode);
448 LASSERT(!LUSTRE_FPRIVATE(file));
453 struct ptlrpc_request *req = it->d.lustre.it_data;
454 struct mdt_body *body;
457 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
461 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
462 if ((it->it_flags & FMODE_WRITE) &&
463 (body->valid & OBD_MD_FLSIZE))
464 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
465 lli->lli_ioepoch, PFID(&lli->lli_fid));
468 LUSTRE_FPRIVATE(file) = fd;
469 ll_readahead_init(inode, &fd->fd_ras);
470 fd->fd_omode = it->it_flags;
474 /* Open a file, and (for the very first open) create objects on the OSTs at
475 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
476 * creation or open until ll_lov_setstripe() ioctl is called. We grab
477 * lli_open_sem to ensure no other process will create objects, send the
478 * stripe MD to the MDS, or try to destroy the objects if that fails.
480 * If we already have the stripe MD locally then we don't request it in
481 * md_open(), by passing a lmm_size = 0.
483 * It is up to the application to ensure no other processes open this file
484 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
485 * used. We might be able to avoid races of that sort by getting lli_open_sem
486 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
487 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
489 int ll_file_open(struct inode *inode, struct file *file)
491 struct ll_inode_info *lli = ll_i2info(inode);
492 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
493 .it_flags = file->f_flags };
494 struct lov_stripe_md *lsm;
495 struct ptlrpc_request *req = NULL;
496 struct obd_client_handle **och_p;
498 struct ll_file_data *fd;
499 int rc = 0, opendir_set = 0;
502 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
503 inode->i_generation, inode, file->f_flags);
505 it = file->private_data; /* XXX: compat macro */
506 file->private_data = NULL; /* prevent ll_local_open assertion */
508 fd = ll_file_data_get();
513 if (S_ISDIR(inode->i_mode)) {
514 cfs_spin_lock(&lli->lli_sa_lock);
515 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
516 LASSERT(lli->lli_sai == NULL);
517 lli->lli_opendir_key = fd;
518 lli->lli_opendir_pid = cfs_curproc_pid();
521 cfs_spin_unlock(&lli->lli_sa_lock);
524 if (inode->i_sb->s_root == file->f_dentry) {
525 LUSTRE_FPRIVATE(file) = fd;
529 if (!it || !it->d.lustre.it_disposition) {
530 /* Convert f_flags into access mode. We cannot use file->f_mode,
531 * because everything but O_ACCMODE mask was stripped from
533 if ((oit.it_flags + 1) & O_ACCMODE)
535 if (file->f_flags & O_TRUNC)
536 oit.it_flags |= FMODE_WRITE;
538 /* kernel only call f_op->open in dentry_open. filp_open calls
539 * dentry_open after call to open_namei that checks permissions.
540 * Only nfsd_open call dentry_open directly without checking
541 * permissions and because of that this code below is safe. */
542 if (oit.it_flags & FMODE_WRITE)
543 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
545 /* We do not want O_EXCL here, presumably we opened the file
546 * already? XXX - NFS implications? */
547 oit.it_flags &= ~O_EXCL;
549 /* bug20584, if "it_flags" contains O_CREAT, the file will be
550 * created if necessary, then "IT_CREAT" should be set to keep
551 * consistent with it */
552 if (oit.it_flags & O_CREAT)
553 oit.it_op |= IT_CREAT;
559 /* Let's see if we have file open on MDS already. */
560 if (it->it_flags & FMODE_WRITE) {
561 och_p = &lli->lli_mds_write_och;
562 och_usecount = &lli->lli_open_fd_write_count;
563 } else if (it->it_flags & FMODE_EXEC) {
564 och_p = &lli->lli_mds_exec_och;
565 och_usecount = &lli->lli_open_fd_exec_count;
567 och_p = &lli->lli_mds_read_och;
568 och_usecount = &lli->lli_open_fd_read_count;
571 cfs_down(&lli->lli_och_sem);
572 if (*och_p) { /* Open handle is present */
573 if (it_disposition(it, DISP_OPEN_OPEN)) {
574 /* Well, there's extra open request that we do not need,
575 let's close it somehow. This will decref request. */
576 rc = it_open_error(DISP_OPEN_OPEN, it);
578 cfs_up(&lli->lli_och_sem);
579 ll_file_data_put(fd);
580 GOTO(out_openerr, rc);
582 ll_release_openhandle(file->f_dentry, it);
583 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
588 rc = ll_local_open(file, it, fd, NULL);
591 cfs_up(&lli->lli_och_sem);
592 ll_file_data_put(fd);
593 GOTO(out_openerr, rc);
596 LASSERT(*och_usecount == 0);
597 if (!it->d.lustre.it_disposition) {
598 /* We cannot just request lock handle now, new ELC code
599 means that one of other OPEN locks for this file
600 could be cancelled, and since blocking ast handler
601 would attempt to grab och_sem as well, that would
602 result in a deadlock */
603 cfs_up(&lli->lli_och_sem);
604 it->it_create_mode |= M_CHECK_STALE;
605 rc = ll_intent_file_open(file, NULL, 0, it);
606 it->it_create_mode &= ~M_CHECK_STALE;
608 ll_file_data_put(fd);
609 GOTO(out_openerr, rc);
612 /* Got some error? Release the request */
613 if (it->d.lustre.it_status < 0) {
614 req = it->d.lustre.it_data;
615 ptlrpc_req_finished(req);
619 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
621 ll_file_data_put(fd);
622 GOTO(out_och_free, rc = -ENOMEM);
625 req = it->d.lustre.it_data;
627 /* md_intent_lock() didn't get a request ref if there was an
628 * open error, so don't do cleanup on the request here
630 /* XXX (green): Should not we bail out on any error here, not
631 * just open error? */
632 rc = it_open_error(DISP_OPEN_OPEN, it);
634 ll_file_data_put(fd);
635 GOTO(out_och_free, rc);
638 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
639 rc = ll_local_open(file, it, fd, *och_p);
641 ll_file_data_put(fd);
642 GOTO(out_och_free, rc);
645 cfs_up(&lli->lli_och_sem);
647 /* Must do this outside lli_och_sem lock to prevent deadlock where
648 different kind of OPEN lock for this same inode gets cancelled
649 by ldlm_cancel_lru */
650 if (!S_ISREG(inode->i_mode))
657 if (file->f_flags & O_LOV_DELAY_CREATE ||
658 !(file->f_mode & FMODE_WRITE)) {
659 CDEBUG(D_INODE, "object creation was delayed\n");
663 file->f_flags &= ~O_LOV_DELAY_CREATE;
666 ptlrpc_req_finished(req);
668 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
672 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
673 *och_p = NULL; /* OBD_FREE writes some magic there */
676 cfs_up(&lli->lli_och_sem);
678 if (opendir_set != 0)
679 ll_stop_statahead(inode, lli->lli_opendir_key);
685 /* Fills the obdo with the attributes for the lsm */
686 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
687 struct obd_capa *capa, struct obdo *obdo,
688 __u64 ioepoch, int sync)
690 struct ptlrpc_request_set *set;
691 struct obd_info oinfo = { { { 0 } } };
696 LASSERT(lsm != NULL);
700 oinfo.oi_oa->o_id = lsm->lsm_object_id;
701 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
702 oinfo.oi_oa->o_mode = S_IFREG;
703 oinfo.oi_oa->o_ioepoch = ioepoch;
704 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
705 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
706 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
707 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
708 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
709 oinfo.oi_capa = capa;
711 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
712 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
715 set = ptlrpc_prep_set();
717 CERROR("can't allocate ptlrpc set\n");
720 rc = obd_getattr_async(exp, &oinfo, set);
722 rc = ptlrpc_set_wait(set);
723 ptlrpc_set_destroy(set);
726 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
727 OBD_MD_FLATIME | OBD_MD_FLMTIME |
728 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
733 * Performs the getattr on the inode and updates its fields.
734 * If @sync != 0, perform the getattr under the server-side lock.
736 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
737 __u64 ioepoch, int sync)
739 struct ll_inode_info *lli = ll_i2info(inode);
740 struct obd_capa *capa = ll_mdscapa_get(inode);
744 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
745 capa, obdo, ioepoch, sync);
748 obdo_refresh_inode(inode, obdo, obdo->o_valid);
750 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
751 lli->lli_smd->lsm_object_id, i_size_read(inode),
752 (unsigned long long)inode->i_blocks,
753 (unsigned long)ll_inode_blksize(inode));
758 int ll_merge_lvb(struct inode *inode)
760 struct ll_inode_info *lli = ll_i2info(inode);
761 struct ll_sb_info *sbi = ll_i2sbi(inode);
767 ll_inode_size_lock(inode, 1);
768 inode_init_lvb(inode, &lvb);
770 /* merge timestamps the most resently obtained from mds with
771 timestamps obtained from osts */
772 lvb.lvb_atime = lli->lli_lvb.lvb_atime;
773 lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
774 lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
775 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
776 cl_isize_write_nolock(inode, lvb.lvb_size);
778 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
779 PFID(&lli->lli_fid), lvb.lvb_size);
780 inode->i_blocks = lvb.lvb_blocks;
782 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
783 LTIME_S(inode->i_atime) = lvb.lvb_atime;
784 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
785 ll_inode_size_unlock(inode, 1);
790 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
793 struct obdo obdo = { 0 };
796 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
798 st->st_size = obdo.o_size;
799 st->st_blocks = obdo.o_blocks;
800 st->st_mtime = obdo.o_mtime;
801 st->st_atime = obdo.o_atime;
802 st->st_ctime = obdo.o_ctime;
807 void ll_io_init(struct cl_io *io, const struct file *file, int write)
809 struct inode *inode = file->f_dentry->d_inode;
811 memset(io, 0, sizeof *io);
812 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
814 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
815 io->ci_obj = ll_i2info(inode)->lli_clob;
816 io->ci_lockreq = CILR_MAYBE;
817 if (ll_file_nolock(file)) {
818 io->ci_lockreq = CILR_NEVER;
819 io->ci_no_srvlock = 1;
820 } else if (file->f_flags & O_APPEND) {
821 io->ci_lockreq = CILR_MANDATORY;
825 static ssize_t ll_file_io_generic(const struct lu_env *env,
826 struct vvp_io_args *args, struct file *file,
827 enum cl_io_type iot, loff_t *ppos, size_t count)
829 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
834 io = &ccc_env_info(env)->cti_io;
835 ll_io_init(io, file, iot == CIT_WRITE);
837 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
838 struct vvp_io *vio = vvp_env_io(env);
839 struct ccc_io *cio = ccc_env_io(env);
840 int write_sem_locked = 0;
842 cio->cui_fd = LUSTRE_FPRIVATE(file);
843 vio->cui_io_subtype = args->via_io_subtype;
845 switch (vio->cui_io_subtype) {
847 cio->cui_iov = args->u.normal.via_iov;
848 cio->cui_nrsegs = args->u.normal.via_nrsegs;
849 cio->cui_tot_nrsegs = cio->cui_nrsegs;
850 #ifndef HAVE_FILE_WRITEV
851 cio->cui_iocb = args->u.normal.via_iocb;
853 if ((iot == CIT_WRITE) &&
854 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
855 if(cfs_down_interruptible(&lli->lli_write_sem))
856 GOTO(out, result = -ERESTARTSYS);
857 write_sem_locked = 1;
858 } else if (iot == CIT_READ) {
859 cfs_down_read(&lli->lli_trunc_sem);
863 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
864 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
867 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
868 vio->u.splice.cui_flags = args->u.splice.via_flags;
871 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
874 result = cl_io_loop(env, io);
875 if (write_sem_locked)
876 cfs_up(&lli->lli_write_sem);
877 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
878 cfs_up_read(&lli->lli_trunc_sem);
880 /* cl_io_rw_init() handled IO */
881 result = io->ci_result;
884 if (io->ci_nob > 0) {
886 *ppos = io->u.ci_wr.wr.crw_pos;
891 if (iot == CIT_WRITE)
892 lli->lli_write_rc = result < 0 ? : 0;
898 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
900 static int ll_file_get_iov_count(const struct iovec *iov,
901 unsigned long *nr_segs, size_t *count)
906 for (seg = 0; seg < *nr_segs; seg++) {
907 const struct iovec *iv = &iov[seg];
910 * If any segment has a negative length, or the cumulative
911 * length ever wraps negative then return -EINVAL.
914 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
916 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
921 cnt -= iv->iov_len; /* This segment is no good */
928 #ifdef HAVE_FILE_READV
929 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
930 unsigned long nr_segs, loff_t *ppos)
933 struct vvp_io_args *args;
939 result = ll_file_get_iov_count(iov, &nr_segs, &count);
943 env = cl_env_get(&refcheck);
945 RETURN(PTR_ERR(env));
947 args = vvp_env_args(env, IO_NORMAL);
948 args->u.normal.via_iov = (struct iovec *)iov;
949 args->u.normal.via_nrsegs = nr_segs;
951 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
952 cl_env_put(env, &refcheck);
956 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
960 struct iovec *local_iov;
965 env = cl_env_get(&refcheck);
967 RETURN(PTR_ERR(env));
969 local_iov = &vvp_env_info(env)->vti_local_iov;
970 local_iov->iov_base = (void __user *)buf;
971 local_iov->iov_len = count;
972 result = ll_file_readv(file, local_iov, 1, ppos);
973 cl_env_put(env, &refcheck);
978 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
979 unsigned long nr_segs, loff_t pos)
982 struct vvp_io_args *args;
988 result = ll_file_get_iov_count(iov, &nr_segs, &count);
992 env = cl_env_get(&refcheck);
994 RETURN(PTR_ERR(env));
996 args = vvp_env_args(env, IO_NORMAL);
997 args->u.normal.via_iov = (struct iovec *)iov;
998 args->u.normal.via_nrsegs = nr_segs;
999 args->u.normal.via_iocb = iocb;
1001 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1002 &iocb->ki_pos, count);
1003 cl_env_put(env, &refcheck);
1007 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1011 struct iovec *local_iov;
1012 struct kiocb *kiocb;
1017 env = cl_env_get(&refcheck);
1019 RETURN(PTR_ERR(env));
1021 local_iov = &vvp_env_info(env)->vti_local_iov;
1022 kiocb = &vvp_env_info(env)->vti_kiocb;
1023 local_iov->iov_base = (void __user *)buf;
1024 local_iov->iov_len = count;
1025 init_sync_kiocb(kiocb, file);
1026 kiocb->ki_pos = *ppos;
1027 kiocb->ki_left = count;
1029 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1030 *ppos = kiocb->ki_pos;
1032 cl_env_put(env, &refcheck);
1038 * Write to a file (through the page cache).
1040 #ifdef HAVE_FILE_WRITEV
1041 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1042 unsigned long nr_segs, loff_t *ppos)
1045 struct vvp_io_args *args;
1051 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1055 env = cl_env_get(&refcheck);
1057 RETURN(PTR_ERR(env));
1059 args = vvp_env_args(env, IO_NORMAL);
1060 args->u.normal.via_iov = (struct iovec *)iov;
1061 args->u.normal.via_nrsegs = nr_segs;
1063 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1064 cl_env_put(env, &refcheck);
1068 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1072 struct iovec *local_iov;
1077 env = cl_env_get(&refcheck);
1079 RETURN(PTR_ERR(env));
1081 local_iov = &vvp_env_info(env)->vti_local_iov;
1082 local_iov->iov_base = (void __user *)buf;
1083 local_iov->iov_len = count;
1085 result = ll_file_writev(file, local_iov, 1, ppos);
1086 cl_env_put(env, &refcheck);
1090 #else /* AIO stuff */
1091 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1092 unsigned long nr_segs, loff_t pos)
1095 struct vvp_io_args *args;
1101 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1105 env = cl_env_get(&refcheck);
1107 RETURN(PTR_ERR(env));
1109 args = vvp_env_args(env, IO_NORMAL);
1110 args->u.normal.via_iov = (struct iovec *)iov;
1111 args->u.normal.via_nrsegs = nr_segs;
1112 args->u.normal.via_iocb = iocb;
1114 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1115 &iocb->ki_pos, count);
1116 cl_env_put(env, &refcheck);
1120 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1124 struct iovec *local_iov;
1125 struct kiocb *kiocb;
1130 env = cl_env_get(&refcheck);
1132 RETURN(PTR_ERR(env));
1134 local_iov = &vvp_env_info(env)->vti_local_iov;
1135 kiocb = &vvp_env_info(env)->vti_kiocb;
1136 local_iov->iov_base = (void __user *)buf;
1137 local_iov->iov_len = count;
1138 init_sync_kiocb(kiocb, file);
1139 kiocb->ki_pos = *ppos;
1140 kiocb->ki_left = count;
1142 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1143 *ppos = kiocb->ki_pos;
1145 cl_env_put(env, &refcheck);
1151 #ifdef HAVE_KERNEL_SENDFILE
1153 * Send file content (through pagecache) somewhere with helper
1155 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1156 read_actor_t actor, void *target)
1159 struct vvp_io_args *args;
1164 env = cl_env_get(&refcheck);
1166 RETURN(PTR_ERR(env));
1168 args = vvp_env_args(env, IO_SENDFILE);
1169 args->u.sendfile.via_target = target;
1170 args->u.sendfile.via_actor = actor;
1172 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1173 cl_env_put(env, &refcheck);
1178 #ifdef HAVE_KERNEL_SPLICE_READ
1180 * Send file content (through pagecache) somewhere with helper
1182 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1183 struct pipe_inode_info *pipe, size_t count,
1187 struct vvp_io_args *args;
1192 env = cl_env_get(&refcheck);
1194 RETURN(PTR_ERR(env));
1196 args = vvp_env_args(env, IO_SPLICE);
1197 args->u.splice.via_pipe = pipe;
1198 args->u.splice.via_flags = flags;
1200 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1201 cl_env_put(env, &refcheck);
1206 static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
1209 struct obd_export *exp = ll_i2dtexp(inode);
1210 struct obd_trans_info oti = { 0 };
1211 struct obdo *oa = NULL;
1214 struct lov_stripe_md *lsm, *lsm2;
1221 ll_inode_size_lock(inode, 0);
1222 lsm = ll_i2info(inode)->lli_smd;
1224 GOTO(out, rc = -ENOENT);
1225 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1226 (lsm->lsm_stripe_count));
1228 OBD_ALLOC_LARGE(lsm2, lsm_size);
1230 GOTO(out, rc = -ENOMEM);
1234 oa->o_nlink = ost_idx;
1235 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1236 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1237 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
1238 OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1239 memcpy(lsm2, lsm, lsm_size);
1240 rc = obd_create(exp, oa, &lsm2, &oti);
1242 OBD_FREE_LARGE(lsm2, lsm_size);
1245 ll_inode_size_unlock(inode, 0);
1250 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1252 struct ll_recreate_obj ucreat;
1255 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1258 if (cfs_copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1259 sizeof(struct ll_recreate_obj)))
1262 RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
1263 ucreat.lrc_ost_idx));
1266 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1273 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1276 if (cfs_copy_from_user(&fid, (struct lu_fid *)arg,
1277 sizeof(struct lu_fid)))
1280 id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
1281 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1282 RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
1285 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1286 int flags, struct lov_user_md *lum, int lum_size)
1288 struct lov_stripe_md *lsm;
1289 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1293 ll_inode_size_lock(inode, 0);
1294 lsm = ll_i2info(inode)->lli_smd;
1296 ll_inode_size_unlock(inode, 0);
1297 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1302 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1305 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1306 GOTO(out_req_free, rc = -ENOENT);
1307 rc = oit.d.lustre.it_status;
1309 GOTO(out_req_free, rc);
1311 ll_release_openhandle(file->f_dentry, &oit);
1314 ll_inode_size_unlock(inode, 0);
1315 ll_intent_release(&oit);
1318 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1322 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1323 struct lov_mds_md **lmmp, int *lmm_size,
1324 struct ptlrpc_request **request)
1326 struct ll_sb_info *sbi = ll_i2sbi(inode);
1327 struct mdt_body *body;
1328 struct lov_mds_md *lmm = NULL;
1329 struct ptlrpc_request *req = NULL;
1330 struct md_op_data *op_data;
1333 rc = ll_get_max_mdsize(sbi, &lmmsize);
1337 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1338 strlen(filename), lmmsize,
1339 LUSTRE_OPC_ANY, NULL);
1340 if (op_data == NULL)
1343 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1344 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1345 ll_finish_md_op_data(op_data);
1347 CDEBUG(D_INFO, "md_getattr_name failed "
1348 "on %s: rc %d\n", filename, rc);
1352 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1353 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1355 lmmsize = body->eadatasize;
1357 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1359 GOTO(out, rc = -ENODATA);
1362 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1363 LASSERT(lmm != NULL);
1365 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1366 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1367 GOTO(out, rc = -EPROTO);
1371 * This is coming from the MDS, so is probably in
1372 * little endian. We convert it to host endian before
1373 * passing it to userspace.
1375 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1376 /* if function called for directory - we should
1377 * avoid swab not existent lsm objects */
1378 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1379 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1380 if (S_ISREG(body->mode))
1381 lustre_swab_lov_user_md_objects(
1382 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1383 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1384 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1385 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1386 if (S_ISREG(body->mode))
1387 lustre_swab_lov_user_md_objects(
1388 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1389 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1395 *lmm_size = lmmsize;
1400 static int ll_lov_setea(struct inode *inode, struct file *file,
1403 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1404 struct lov_user_md *lump;
1405 int lum_size = sizeof(struct lov_user_md) +
1406 sizeof(struct lov_user_ost_data);
1410 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1413 OBD_ALLOC_LARGE(lump, lum_size);
1417 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1418 OBD_FREE_LARGE(lump, lum_size);
1422 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1424 OBD_FREE_LARGE(lump, lum_size);
1428 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1431 struct lov_user_md_v3 lumv3;
1432 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1433 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1434 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1437 int flags = FMODE_WRITE;
1440 /* first try with v1 which is smaller than v3 */
1441 lum_size = sizeof(struct lov_user_md_v1);
1442 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1445 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1446 lum_size = sizeof(struct lov_user_md_v3);
1447 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1451 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1453 put_user(0, &lumv1p->lmm_stripe_count);
1454 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1455 0, ll_i2info(inode)->lli_smd,
1461 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1463 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1468 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1472 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1474 struct ll_inode_info *lli = ll_i2info(inode);
1475 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1476 struct ccc_grouplock grouplock;
1480 if (ll_file_nolock(file))
1481 RETURN(-EOPNOTSUPP);
1483 cfs_spin_lock(&lli->lli_lock);
1484 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1485 CWARN("group lock already existed with gid %lu\n",
1486 fd->fd_grouplock.cg_gid);
1487 cfs_spin_unlock(&lli->lli_lock);
1490 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1491 cfs_spin_unlock(&lli->lli_lock);
1493 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1494 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1498 cfs_spin_lock(&lli->lli_lock);
1499 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1500 cfs_spin_unlock(&lli->lli_lock);
1501 CERROR("another thread just won the race\n");
1502 cl_put_grouplock(&grouplock);
1506 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1507 fd->fd_grouplock = grouplock;
1508 cfs_spin_unlock(&lli->lli_lock);
1510 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1514 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1516 struct ll_inode_info *lli = ll_i2info(inode);
1517 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1518 struct ccc_grouplock grouplock;
1521 cfs_spin_lock(&lli->lli_lock);
1522 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1523 cfs_spin_unlock(&lli->lli_lock);
1524 CWARN("no group lock held\n");
1527 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1529 if (fd->fd_grouplock.cg_gid != arg) {
1530 CWARN("group lock %lu doesn't match current id %lu\n",
1531 arg, fd->fd_grouplock.cg_gid);
1532 cfs_spin_unlock(&lli->lli_lock);
1536 grouplock = fd->fd_grouplock;
1537 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1538 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1539 cfs_spin_unlock(&lli->lli_lock);
1541 cl_put_grouplock(&grouplock);
1542 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1547 * Close inode open handle
1549 * \param dentry [in] dentry which contains the inode
1550 * \param it [in,out] intent which contains open info and result
1553 * \retval <0 failure
1555 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1557 struct inode *inode = dentry->d_inode;
1558 struct obd_client_handle *och;
1564 /* Root ? Do nothing. */
1565 if (dentry->d_inode->i_sb->s_root == dentry)
1568 /* No open handle to close? Move away */
1569 if (!it_disposition(it, DISP_OPEN_OPEN))
1572 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1574 OBD_ALLOC(och, sizeof(*och));
1576 GOTO(out, rc = -ENOMEM);
1578 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1579 ll_i2info(inode), it, och);
1581 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1584 /* this one is in place of ll_file_open */
1585 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1586 ptlrpc_req_finished(it->d.lustre.it_data);
1587 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1592 * Get size for inode for which FIEMAP mapping is requested.
1593 * Make the FIEMAP get_info call and returns the result.
1595 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1598 struct obd_export *exp = ll_i2dtexp(inode);
1599 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1600 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1601 int vallen = num_bytes;
1605 /* Checks for fiemap flags */
1606 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1607 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1611 /* Check for FIEMAP_FLAG_SYNC */
1612 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1613 rc = filemap_fdatawrite(inode->i_mapping);
1618 /* If the stripe_count > 1 and the application does not understand
1619 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1621 if (lsm->lsm_stripe_count > 1 &&
1622 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1625 fm_key.oa.o_id = lsm->lsm_object_id;
1626 fm_key.oa.o_seq = lsm->lsm_object_seq;
1627 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1629 obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
1631 /* If filesize is 0, then there would be no objects for mapping */
1632 if (fm_key.oa.o_size == 0) {
1633 fiemap->fm_mapped_extents = 0;
1637 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1639 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1641 CERROR("obd_get_info failed: rc = %d\n", rc);
1646 int ll_fid2path(struct obd_export *exp, void *arg)
1648 struct getinfo_fid2path *gfout, *gfin;
1652 /* Need to get the buflen */
1653 OBD_ALLOC_PTR(gfin);
1656 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1661 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1662 OBD_ALLOC(gfout, outsize);
1663 if (gfout == NULL) {
1667 memcpy(gfout, gfin, sizeof(*gfout));
1670 /* Call mdc_iocontrol */
1671 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1674 if (cfs_copy_to_user(arg, gfout, outsize))
1678 OBD_FREE(gfout, outsize);
1682 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1684 struct ll_user_fiemap *fiemap_s;
1685 size_t num_bytes, ret_bytes;
1686 unsigned int extent_count;
1689 /* Get the extent count so we can calculate the size of
1690 * required fiemap buffer */
1691 if (get_user(extent_count,
1692 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1694 num_bytes = sizeof(*fiemap_s) + (extent_count *
1695 sizeof(struct ll_fiemap_extent));
1697 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1698 if (fiemap_s == NULL)
1701 /* get the fiemap value */
1702 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1704 GOTO(error, rc = -EFAULT);
1706 /* If fm_extent_count is non-zero, read the first extent since
1707 * it is used to calculate end_offset and device from previous
1710 if (copy_from_user(&fiemap_s->fm_extents[0],
1711 (char __user *)arg + sizeof(*fiemap_s),
1712 sizeof(struct ll_fiemap_extent)))
1713 GOTO(error, rc = -EFAULT);
1716 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1720 ret_bytes = sizeof(struct ll_user_fiemap);
1722 if (extent_count != 0)
1723 ret_bytes += (fiemap_s->fm_mapped_extents *
1724 sizeof(struct ll_fiemap_extent));
1726 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1730 OBD_FREE_LARGE(fiemap_s, num_bytes);
1734 #ifdef HAVE_UNLOCKED_IOCTL
1735 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1737 struct inode *inode = file->f_dentry->d_inode;
1739 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1743 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1747 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1748 inode->i_generation, inode, cmd);
1749 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1751 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1752 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1756 case LL_IOC_GETFLAGS:
1757 /* Get the current value of the file flags */
1758 return put_user(fd->fd_flags, (int *)arg);
1759 case LL_IOC_SETFLAGS:
1760 case LL_IOC_CLRFLAGS:
1761 /* Set or clear specific file flags */
1762 /* XXX This probably needs checks to ensure the flags are
1763 * not abused, and to handle any flag side effects.
1765 if (get_user(flags, (int *) arg))
1768 if (cmd == LL_IOC_SETFLAGS) {
1769 if ((flags & LL_FILE_IGNORE_LOCK) &&
1770 !(file->f_flags & O_DIRECT)) {
1771 CERROR("%s: unable to disable locking on "
1772 "non-O_DIRECT file\n", current->comm);
1776 fd->fd_flags |= flags;
1778 fd->fd_flags &= ~flags;
1781 case LL_IOC_LOV_SETSTRIPE:
1782 RETURN(ll_lov_setstripe(inode, file, arg));
1783 case LL_IOC_LOV_SETEA:
1784 RETURN(ll_lov_setea(inode, file, arg));
1785 case LL_IOC_LOV_GETSTRIPE:
1786 RETURN(ll_lov_getstripe(inode, arg));
1787 case LL_IOC_RECREATE_OBJ:
1788 RETURN(ll_lov_recreate_obj(inode, arg));
1789 case LL_IOC_RECREATE_FID:
1790 RETURN(ll_lov_recreate_fid(inode, arg));
1791 case FSFILT_IOC_FIEMAP:
1792 RETURN(ll_ioctl_fiemap(inode, arg));
1793 case FSFILT_IOC_GETFLAGS:
1794 case FSFILT_IOC_SETFLAGS:
1795 RETURN(ll_iocontrol(inode, file, cmd, arg));
1796 case FSFILT_IOC_GETVERSION_OLD:
1797 case FSFILT_IOC_GETVERSION:
1798 RETURN(put_user(inode->i_generation, (int *)arg));
1799 case LL_IOC_GROUP_LOCK:
1800 RETURN(ll_get_grouplock(inode, file, arg));
1801 case LL_IOC_GROUP_UNLOCK:
1802 RETURN(ll_put_grouplock(inode, file, arg));
1803 case IOC_OBD_STATFS:
1804 RETURN(ll_obd_statfs(inode, (void *)arg));
1806 /* We need to special case any other ioctls we want to handle,
1807 * to send them to the MDS/OST as appropriate and to properly
1808 * network encode the arg field.
1809 case FSFILT_IOC_SETVERSION_OLD:
1810 case FSFILT_IOC_SETVERSION:
1812 case LL_IOC_FLUSHCTX:
1813 RETURN(ll_flush_ctx(inode));
1814 case LL_IOC_PATH2FID: {
1815 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1816 sizeof(struct lu_fid)))
1821 case OBD_IOC_FID2PATH:
1822 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1824 case LL_IOC_GET_MDTIDX: {
1827 mdtidx = ll_get_mdt_idx(inode);
1831 if (put_user((int)mdtidx, (int*)arg))
1841 ll_iocontrol_call(inode, file, cmd, arg, &err))
1844 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1850 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1852 struct inode *inode = file->f_dentry->d_inode;
1855 retval = offset + ((origin == 2) ? i_size_read(inode) :
1856 (origin == 1) ? file->f_pos : 0);
1857 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1858 inode->i_ino, inode->i_generation, inode, retval, retval,
1859 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1860 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1862 if (origin == 2) { /* SEEK_END */
1863 int nonblock = 0, rc;
1865 if (file->f_flags & O_NONBLOCK)
1866 nonblock = LDLM_FL_BLOCK_NOWAIT;
1868 rc = cl_glimpse_size(inode);
1872 offset += i_size_read(inode);
1873 } else if (origin == 1) { /* SEEK_CUR */
1874 offset += file->f_pos;
1878 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1879 if (offset != file->f_pos) {
1880 file->f_pos = offset;
1888 #ifdef HAVE_FLUSH_OWNER_ID
1889 int ll_flush(struct file *file, fl_owner_t id)
1891 int ll_flush(struct file *file)
1894 struct inode *inode = file->f_dentry->d_inode;
1895 struct ll_inode_info *lli = ll_i2info(inode);
1896 struct lov_stripe_md *lsm = lli->lli_smd;
1899 /* the application should know write failure already. */
1900 if (lli->lli_write_rc)
1903 /* catch async errors that were recorded back when async writeback
1904 * failed for pages in this mapping. */
1905 rc = lli->lli_async_rc;
1906 lli->lli_async_rc = 0;
1908 err = lov_test_and_clear_async_rc(lsm);
1913 return rc ? -EIO : 0;
1916 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1918 struct inode *inode = dentry->d_inode;
1919 struct ll_inode_info *lli = ll_i2info(inode);
1920 struct lov_stripe_md *lsm = lli->lli_smd;
1921 struct ptlrpc_request *req;
1922 struct obd_capa *oc;
1925 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1926 inode->i_generation, inode);
1927 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1929 /* fsync's caller has already called _fdata{sync,write}, we want
1930 * that IO to finish before calling the osc and mdc sync methods */
1931 rc = filemap_fdatawait(inode->i_mapping);
1933 /* catch async errors that were recorded back when async writeback
1934 * failed for pages in this mapping. */
1935 err = lli->lli_async_rc;
1936 lli->lli_async_rc = 0;
1940 err = lov_test_and_clear_async_rc(lsm);
1945 oc = ll_mdscapa_get(inode);
1946 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1952 ptlrpc_req_finished(req);
1955 struct obd_info *oinfo;
1957 OBD_ALLOC_PTR(oinfo);
1959 RETURN(rc ? rc : -ENOMEM);
1960 OBDO_ALLOC(oinfo->oi_oa);
1961 if (!oinfo->oi_oa) {
1962 OBD_FREE_PTR(oinfo);
1963 RETURN(rc ? rc : -ENOMEM);
1965 oinfo->oi_oa->o_id = lsm->lsm_object_id;
1966 oinfo->oi_oa->o_seq = lsm->lsm_object_seq;
1967 oinfo->oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1968 obdo_from_inode(oinfo->oi_oa, inode, &ll_i2info(inode)->lli_fid,
1969 OBD_MD_FLTYPE | OBD_MD_FLATIME |
1970 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1973 oinfo->oi_capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1974 err = obd_sync_rqset(ll_i2sbi(inode)->ll_dt_exp, oinfo, 0,
1976 capa_put(oinfo->oi_capa);
1979 OBDO_FREE(oinfo->oi_oa);
1980 OBD_FREE_PTR(oinfo);
1981 lli->lli_write_rc = err < 0 ? : 0;
1987 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1989 struct inode *inode = file->f_dentry->d_inode;
1990 struct ll_sb_info *sbi = ll_i2sbi(inode);
1991 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1992 .ei_cb_cp =ldlm_flock_completion_ast,
1993 .ei_cbdata = file_lock };
1994 struct md_op_data *op_data;
1995 struct lustre_handle lockh = {0};
1996 ldlm_policy_data_t flock = {{0}};
2001 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2002 inode->i_ino, file_lock);
2004 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2006 if (file_lock->fl_flags & FL_FLOCK) {
2007 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2008 /* flocks are whole-file locks */
2009 flock.l_flock.end = OFFSET_MAX;
2010 /* For flocks owner is determined by the local file desctiptor*/
2011 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2012 } else if (file_lock->fl_flags & FL_POSIX) {
2013 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2014 flock.l_flock.start = file_lock->fl_start;
2015 flock.l_flock.end = file_lock->fl_end;
2019 flock.l_flock.pid = file_lock->fl_pid;
2021 switch (file_lock->fl_type) {
2023 einfo.ei_mode = LCK_PR;
2026 /* An unlock request may or may not have any relation to
2027 * existing locks so we may not be able to pass a lock handle
2028 * via a normal ldlm_lock_cancel() request. The request may even
2029 * unlock a byte range in the middle of an existing lock. In
2030 * order to process an unlock request we need all of the same
2031 * information that is given with a normal read or write record
2032 * lock request. To avoid creating another ldlm unlock (cancel)
2033 * message we'll treat a LCK_NL flock request as an unlock. */
2034 einfo.ei_mode = LCK_NL;
2037 einfo.ei_mode = LCK_PW;
2040 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2055 flags = LDLM_FL_BLOCK_NOWAIT;
2061 flags = LDLM_FL_TEST_LOCK;
2062 /* Save the old mode so that if the mode in the lock changes we
2063 * can decrement the appropriate reader or writer refcount. */
2064 file_lock->fl_type = einfo.ei_mode;
2067 CERROR("unknown fcntl lock command: %d\n", cmd);
2071 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2072 LUSTRE_OPC_ANY, NULL);
2073 if (IS_ERR(op_data))
2074 RETURN(PTR_ERR(op_data));
2076 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2077 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2078 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2080 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2081 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2083 ll_finish_md_op_data(op_data);
2085 if ((file_lock->fl_flags & FL_FLOCK) &&
2086 (rc == 0 || file_lock->fl_type == F_UNLCK))
2087 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2088 #ifdef HAVE_F_OP_FLOCK
2089 if ((file_lock->fl_flags & FL_POSIX) &&
2090 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2091 !(flags & LDLM_FL_TEST_LOCK))
2092 posix_lock_file_wait(file, file_lock);
2098 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2105 int ll_have_md_lock(struct inode *inode, __u64 bits, ldlm_mode_t l_req_mode)
2107 struct lustre_handle lockh;
2108 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2109 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2110 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2118 fid = &ll_i2info(inode)->lli_fid;
2119 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2120 ldlm_lockname[mode]);
2122 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2123 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2130 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2131 struct lustre_handle *lockh)
2133 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2139 fid = &ll_i2info(inode)->lli_fid;
2140 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2142 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2143 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2144 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2148 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2149 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2150 * and return success */
2152 /* This path cannot be hit for regular files unless in
2153 * case of obscure races, so no need to to validate
2155 if (!S_ISREG(inode->i_mode) &&
2156 !S_ISDIR(inode->i_mode))
2161 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2169 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2172 struct inode *inode = dentry->d_inode;
2173 struct ptlrpc_request *req = NULL;
2174 struct ll_sb_info *sbi;
2175 struct obd_export *exp;
2180 CERROR("REPORT THIS LINE TO PETER\n");
2183 sbi = ll_i2sbi(inode);
2185 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2186 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2188 exp = ll_i2mdexp(inode);
2190 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2191 * But under CMD case, it caused some lock issues, should be fixed
2192 * with new CMD ibits lock. See bug 12718 */
2193 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2194 struct lookup_intent oit = { .it_op = IT_GETATTR };
2195 struct md_op_data *op_data;
2197 if (ibits == MDS_INODELOCK_LOOKUP)
2198 oit.it_op = IT_LOOKUP;
2200 /* Call getattr by fid, so do not provide name at all. */
2201 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2202 dentry->d_inode, NULL, 0, 0,
2203 LUSTRE_OPC_ANY, NULL);
2204 if (IS_ERR(op_data))
2205 RETURN(PTR_ERR(op_data));
2207 oit.it_create_mode |= M_CHECK_STALE;
2208 rc = md_intent_lock(exp, op_data, NULL, 0,
2209 /* we are not interested in name
2212 ll_md_blocking_ast, 0);
2213 ll_finish_md_op_data(op_data);
2214 oit.it_create_mode &= ~M_CHECK_STALE;
2216 rc = ll_inode_revalidate_fini(inode, rc);
2220 rc = ll_revalidate_it_finish(req, &oit, dentry);
2222 ll_intent_release(&oit);
2226 /* Unlinked? Unhash dentry, so it is not picked up later by
2227 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2228 here to preserve get_cwd functionality on 2.6.
2230 if (!dentry->d_inode->i_nlink) {
2231 cfs_spin_lock(&ll_lookup_lock);
2232 spin_lock(&dcache_lock);
2233 ll_drop_dentry(dentry);
2234 spin_unlock(&dcache_lock);
2235 cfs_spin_unlock(&ll_lookup_lock);
2238 ll_lookup_finish_locks(&oit, dentry);
2239 } else if (!ll_have_md_lock(dentry->d_inode, ibits, LCK_MINMODE)) {
2240 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2241 obd_valid valid = OBD_MD_FLGETATTR;
2242 struct md_op_data *op_data;
2245 if (S_ISREG(inode->i_mode)) {
2246 rc = ll_get_max_mdsize(sbi, &ealen);
2249 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2252 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2253 0, ealen, LUSTRE_OPC_ANY,
2255 if (op_data == NULL)
2258 op_data->op_valid = valid;
2259 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2260 * capa for this inode. Because we only keep capas of dirs
2262 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2263 ll_finish_md_op_data(op_data);
2265 rc = ll_inode_revalidate_fini(inode, rc);
2269 rc = ll_prep_inode(&inode, req, NULL);
2272 ptlrpc_req_finished(req);
2276 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2279 struct inode *inode = dentry->d_inode;
2283 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2285 /* if object not yet allocated, don't validate size */
2286 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
2287 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2288 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2289 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2293 /* cl_glimpse_size will prefer locally cached writes if they extend
2297 rc = cl_glimpse_size(inode);
2302 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2303 struct lookup_intent *it, struct kstat *stat)
2305 struct inode *inode = de->d_inode;
2306 struct ll_inode_info *lli = ll_i2info(inode);
2309 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
2310 MDS_INODELOCK_LOOKUP);
2311 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2316 stat->dev = inode->i_sb->s_dev;
2317 if (ll_need_32bit_api(ll_i2sbi(inode)))
2318 stat->ino = cl_fid_build_ino32(&lli->lli_fid);
2320 stat->ino = inode->i_ino;
2322 stat->mode = inode->i_mode;
2323 stat->nlink = inode->i_nlink;
2324 stat->uid = inode->i_uid;
2325 stat->gid = inode->i_gid;
2326 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2327 stat->atime = inode->i_atime;
2328 stat->mtime = inode->i_mtime;
2329 stat->ctime = inode->i_ctime;
2330 #ifdef HAVE_INODE_BLKSIZE
2331 stat->blksize = inode->i_blksize;
2333 stat->blksize = 1 << inode->i_blkbits;
2336 stat->size = i_size_read(inode);
2337 stat->blocks = inode->i_blocks;
2341 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2343 struct lookup_intent it = { .it_op = IT_GETATTR };
2345 return ll_getattr_it(mnt, de, &it, stat);
2348 #ifdef HAVE_LINUX_FIEMAP_H
2349 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2350 __u64 start, __u64 len)
2354 struct ll_user_fiemap *fiemap;
2355 unsigned int extent_count = fieinfo->fi_extents_max;
2357 num_bytes = sizeof(*fiemap) + (extent_count *
2358 sizeof(struct ll_fiemap_extent));
2359 OBD_ALLOC_LARGE(fiemap, num_bytes);
2364 fiemap->fm_flags = fieinfo->fi_flags;
2365 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2366 fiemap->fm_start = start;
2367 fiemap->fm_length = len;
2368 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2369 sizeof(struct ll_fiemap_extent));
2371 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2373 fieinfo->fi_flags = fiemap->fm_flags;
2374 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2375 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2376 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2378 OBD_FREE_LARGE(fiemap, num_bytes);
2385 int lustre_check_acl(struct inode *inode, int mask)
2387 #ifdef CONFIG_FS_POSIX_ACL
2388 struct ll_inode_info *lli = ll_i2info(inode);
2389 struct posix_acl *acl;
2393 cfs_spin_lock(&lli->lli_lock);
2394 acl = posix_acl_dup(lli->lli_posix_acl);
2395 cfs_spin_unlock(&lli->lli_lock);
2400 rc = posix_acl_permission(inode, acl, mask);
2401 posix_acl_release(acl);
2409 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2410 #ifndef HAVE_INODE_PERMISION_2ARGS
2411 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2413 int ll_inode_permission(struct inode *inode, int mask)
2419 /* as root inode are NOT getting validated in lookup operation,
2420 * need to do it before permission check. */
2422 if (inode == inode->i_sb->s_root->d_inode) {
2423 struct lookup_intent it = { .it_op = IT_LOOKUP };
2425 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2426 MDS_INODELOCK_LOOKUP);
2431 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2432 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2434 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2435 return lustre_check_remote_perm(inode, mask);
2437 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2438 rc = generic_permission(inode, mask, lustre_check_acl);
2443 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2445 int mode = inode->i_mode;
2448 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2449 inode->i_ino, inode->i_generation, inode, mask);
2451 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2452 return lustre_check_remote_perm(inode, mask);
2454 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2456 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2457 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2459 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2461 if (cfs_curproc_fsuid() == inode->i_uid) {
2464 if (((mode >> 3) & mask & S_IRWXO) != mask)
2466 rc = lustre_check_acl(inode, mask);
2470 goto check_capabilities;
2474 if (cfs_curproc_is_in_groups(inode->i_gid))
2477 if ((mode & mask & S_IRWXO) == mask)
2481 if (!(mask & MAY_EXEC) ||
2482 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2483 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2486 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2487 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2494 #ifdef HAVE_FILE_READV
2495 #define READ_METHOD readv
2496 #define READ_FUNCTION ll_file_readv
2497 #define WRITE_METHOD writev
2498 #define WRITE_FUNCTION ll_file_writev
2500 #define READ_METHOD aio_read
2501 #define READ_FUNCTION ll_file_aio_read
2502 #define WRITE_METHOD aio_write
2503 #define WRITE_FUNCTION ll_file_aio_write
2506 /* -o localflock - only provides locally consistent flock locks */
2507 struct file_operations ll_file_operations = {
2508 .read = ll_file_read,
2509 .READ_METHOD = READ_FUNCTION,
2510 .write = ll_file_write,
2511 .WRITE_METHOD = WRITE_FUNCTION,
2512 #ifdef HAVE_UNLOCKED_IOCTL
2513 .unlocked_ioctl = ll_file_ioctl,
2515 .ioctl = ll_file_ioctl,
2517 .open = ll_file_open,
2518 .release = ll_file_release,
2519 .mmap = ll_file_mmap,
2520 .llseek = ll_file_seek,
2521 #ifdef HAVE_KERNEL_SENDFILE
2522 .sendfile = ll_file_sendfile,
2524 #ifdef HAVE_KERNEL_SPLICE_READ
2525 .splice_read = ll_file_splice_read,
2531 struct file_operations ll_file_operations_flock = {
2532 .read = ll_file_read,
2533 .READ_METHOD = READ_FUNCTION,
2534 .write = ll_file_write,
2535 .WRITE_METHOD = WRITE_FUNCTION,
2536 #ifdef HAVE_UNLOCKED_IOCTL
2537 .unlocked_ioctl = ll_file_ioctl,
2539 .ioctl = ll_file_ioctl,
2541 .open = ll_file_open,
2542 .release = ll_file_release,
2543 .mmap = ll_file_mmap,
2544 .llseek = ll_file_seek,
2545 #ifdef HAVE_KERNEL_SENDFILE
2546 .sendfile = ll_file_sendfile,
2548 #ifdef HAVE_KERNEL_SPLICE_READ
2549 .splice_read = ll_file_splice_read,
2553 #ifdef HAVE_F_OP_FLOCK
2554 .flock = ll_file_flock,
2556 .lock = ll_file_flock
2559 /* These are for -o noflock - to return ENOSYS on flock calls */
2560 struct file_operations ll_file_operations_noflock = {
2561 .read = ll_file_read,
2562 .READ_METHOD = READ_FUNCTION,
2563 .write = ll_file_write,
2564 .WRITE_METHOD = WRITE_FUNCTION,
2565 #ifdef HAVE_UNLOCKED_IOCTL
2566 .unlocked_ioctl = ll_file_ioctl,
2568 .ioctl = ll_file_ioctl,
2570 .open = ll_file_open,
2571 .release = ll_file_release,
2572 .mmap = ll_file_mmap,
2573 .llseek = ll_file_seek,
2574 #ifdef HAVE_KERNEL_SENDFILE
2575 .sendfile = ll_file_sendfile,
2577 #ifdef HAVE_KERNEL_SPLICE_READ
2578 .splice_read = ll_file_splice_read,
2582 #ifdef HAVE_F_OP_FLOCK
2583 .flock = ll_file_noflock,
2585 .lock = ll_file_noflock
2588 struct inode_operations ll_file_inode_operations = {
2589 .setattr = ll_setattr,
2590 .truncate = ll_truncate,
2591 .getattr = ll_getattr,
2592 .permission = ll_inode_permission,
2593 .setxattr = ll_setxattr,
2594 .getxattr = ll_getxattr,
2595 .listxattr = ll_listxattr,
2596 .removexattr = ll_removexattr,
2597 #ifdef HAVE_LINUX_FIEMAP_H
2598 .fiemap = ll_fiemap,
2602 /* dynamic ioctl number support routins */
2603 static struct llioc_ctl_data {
2604 cfs_rw_semaphore_t ioc_sem;
2605 cfs_list_t ioc_head;
2607 __RWSEM_INITIALIZER(llioc.ioc_sem),
2608 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2613 cfs_list_t iocd_list;
2614 unsigned int iocd_size;
2615 llioc_callback_t iocd_cb;
2616 unsigned int iocd_count;
2617 unsigned int iocd_cmd[0];
2620 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2623 struct llioc_data *in_data = NULL;
2626 if (cb == NULL || cmd == NULL ||
2627 count > LLIOC_MAX_CMD || count < 0)
2630 size = sizeof(*in_data) + count * sizeof(unsigned int);
2631 OBD_ALLOC(in_data, size);
2632 if (in_data == NULL)
2635 memset(in_data, 0, sizeof(*in_data));
2636 in_data->iocd_size = size;
2637 in_data->iocd_cb = cb;
2638 in_data->iocd_count = count;
2639 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2641 cfs_down_write(&llioc.ioc_sem);
2642 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2643 cfs_up_write(&llioc.ioc_sem);
2648 void ll_iocontrol_unregister(void *magic)
2650 struct llioc_data *tmp;
2655 cfs_down_write(&llioc.ioc_sem);
2656 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2658 unsigned int size = tmp->iocd_size;
2660 cfs_list_del(&tmp->iocd_list);
2661 cfs_up_write(&llioc.ioc_sem);
2663 OBD_FREE(tmp, size);
2667 cfs_up_write(&llioc.ioc_sem);
2669 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2672 EXPORT_SYMBOL(ll_iocontrol_register);
2673 EXPORT_SYMBOL(ll_iocontrol_unregister);
2675 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2676 unsigned int cmd, unsigned long arg, int *rcp)
2678 enum llioc_iter ret = LLIOC_CONT;
2679 struct llioc_data *data;
2680 int rc = -EINVAL, i;
2682 cfs_down_read(&llioc.ioc_sem);
2683 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2684 for (i = 0; i < data->iocd_count; i++) {
2685 if (cmd != data->iocd_cmd[i])
2688 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2692 if (ret == LLIOC_STOP)
2695 cfs_up_read(&llioc.ioc_sem);