1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
87 * Closes the IO epoch and packs all the attributes into @op_data for
90 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
91 struct obd_client_handle *och)
95 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
96 ATTR_MTIME_SET | ATTR_CTIME_SET;
98 if (!(och->och_flags & FMODE_WRITE))
101 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
102 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
104 ll_ioepoch_close(inode, op_data, &och, 0);
107 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
108 ll_prep_md_op_data(op_data, inode, NULL, NULL,
109 0, 0, LUSTRE_OPC_ANY, NULL);
113 static int ll_close_inode_openhandle(struct obd_export *md_exp,
115 struct obd_client_handle *och)
117 struct obd_export *exp = ll_i2mdexp(inode);
118 struct md_op_data *op_data;
119 struct ptlrpc_request *req = NULL;
120 struct obd_device *obd = class_exp2obd(exp);
127 * XXX: in case of LMV, is this correct to access
130 CERROR("Invalid MDC connection handle "LPX64"\n",
131 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_som_update(inode, op_data);
149 CERROR("inode %lu mdc Size-on-MDS update failed: "
150 "rc = %d\n", inode->i_ino, rc);
154 CERROR("inode %lu mdc close failed: rc = %d\n",
157 ll_finish_md_op_data(op_data);
160 rc = ll_objects_destroy(req, inode);
162 CERROR("inode %lu ll_objects destroy: rc = %d\n",
169 if (exp_connect_som(exp) && !epoch_close &&
170 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
171 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
173 md_clear_open_replay_data(md_exp, och);
174 /* Free @och if it is not waiting for DONE_WRITING. */
175 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
178 if (req) /* This is close request */
179 ptlrpc_req_finished(req);
183 int ll_md_real_close(struct inode *inode, int flags)
185 struct ll_inode_info *lli = ll_i2info(inode);
186 struct obd_client_handle **och_p;
187 struct obd_client_handle *och;
192 if (flags & FMODE_WRITE) {
193 och_p = &lli->lli_mds_write_och;
194 och_usecount = &lli->lli_open_fd_write_count;
195 } else if (flags & FMODE_EXEC) {
196 och_p = &lli->lli_mds_exec_och;
197 och_usecount = &lli->lli_open_fd_exec_count;
199 LASSERT(flags & FMODE_READ);
200 och_p = &lli->lli_mds_read_och;
201 och_usecount = &lli->lli_open_fd_read_count;
204 cfs_down(&lli->lli_och_sem);
205 if (*och_usecount) { /* There are still users of this handle, so
207 cfs_up(&lli->lli_och_sem);
212 cfs_up(&lli->lli_och_sem);
214 if (och) { /* There might be a race and somebody have freed this och
216 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
223 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
226 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
227 struct ll_inode_info *lli = ll_i2info(inode);
231 /* clear group lock, if present */
232 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
233 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
235 /* Let's see if we have good enough OPEN lock on the file and if
236 we can skip talking to MDS */
237 if (file->f_dentry->d_inode) { /* Can this ever be false? */
239 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
240 struct lustre_handle lockh;
241 struct inode *inode = file->f_dentry->d_inode;
242 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
244 cfs_down(&lli->lli_och_sem);
245 if (fd->fd_omode & FMODE_WRITE) {
247 LASSERT(lli->lli_open_fd_write_count);
248 lli->lli_open_fd_write_count--;
249 } else if (fd->fd_omode & FMODE_EXEC) {
251 LASSERT(lli->lli_open_fd_exec_count);
252 lli->lli_open_fd_exec_count--;
255 LASSERT(lli->lli_open_fd_read_count);
256 lli->lli_open_fd_read_count--;
258 cfs_up(&lli->lli_och_sem);
260 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
261 LDLM_IBITS, &policy, lockmode,
263 rc = ll_md_real_close(file->f_dentry->d_inode,
267 CERROR("Releasing a file %p with negative dentry %p. Name %s",
268 file, file->f_dentry, file->f_dentry->d_name.name);
271 LUSTRE_FPRIVATE(file) = NULL;
272 ll_file_data_put(fd);
273 ll_capa_close(inode);
278 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
280 /* While this returns an error code, fput() the caller does not, so we need
281 * to make every effort to clean up all of our state here. Also, applications
282 * rarely check close errors and even if an error is returned they will not
283 * re-try the close call.
285 int ll_file_release(struct inode *inode, struct file *file)
287 struct ll_file_data *fd;
288 struct ll_sb_info *sbi = ll_i2sbi(inode);
289 struct ll_inode_info *lli = ll_i2info(inode);
290 struct lov_stripe_md *lsm = lli->lli_smd;
294 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
295 inode->i_generation, inode);
297 #ifdef CONFIG_FS_POSIX_ACL
298 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
299 inode == inode->i_sb->s_root->d_inode) {
300 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
303 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
304 fd->fd_flags &= ~LL_FILE_RMTACL;
305 rct_del(&sbi->ll_rct, cfs_curproc_pid());
306 et_search_free(&sbi->ll_et, cfs_curproc_pid());
311 if (inode->i_sb->s_root != file->f_dentry)
312 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
313 fd = LUSTRE_FPRIVATE(file);
316 /* The last ref on @file, maybe not the the owner pid of statahead.
317 * Different processes can open the same dir, "ll_opendir_key" means:
318 * it is me that should stop the statahead thread. */
319 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
320 ll_stop_statahead(inode, lli->lli_opendir_key);
322 if (inode->i_sb->s_root == file->f_dentry) {
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
329 lov_test_and_clear_async_rc(lsm);
330 lli->lli_async_rc = 0;
332 rc = ll_md_close(sbi->ll_md_exp, inode, file);
334 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
335 libcfs_debug_dumplog();
340 static int ll_intent_file_open(struct file *file, void *lmm,
341 int lmmsize, struct lookup_intent *itp)
343 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
344 struct dentry *parent = file->f_dentry->d_parent;
345 const char *name = file->f_dentry->d_name.name;
346 const int len = file->f_dentry->d_name.len;
347 struct md_op_data *op_data;
348 struct ptlrpc_request *req;
349 __u32 opc = LUSTRE_OPC_ANY;
356 /* Usually we come here only for NFSD, and we want open lock.
357 But we can also get here with pre 2.6.15 patchless kernels, and in
358 that case that lock is also ok */
359 /* We can also get here if there was cached open handle in revalidate_it
360 * but it disappeared while we were getting from there to ll_file_open.
361 * But this means this file was closed and immediatelly opened which
362 * makes a good candidate for using OPEN lock */
363 /* If lmmsize & lmm are not 0, we are just setting stripe info
364 * parameters. No need for the open lock */
365 if (lmm == NULL && lmmsize == 0) {
366 itp->it_flags |= MDS_OPEN_LOCK;
367 if (itp->it_flags & FMODE_WRITE)
368 opc = LUSTRE_OPC_CREATE;
371 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
372 file->f_dentry->d_inode, name, len,
375 RETURN(PTR_ERR(op_data));
377 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
378 0 /*unused */, &req, ll_md_blocking_ast, 0);
379 ll_finish_md_op_data(op_data);
381 /* reason for keep own exit path - don`t flood log
382 * with messages with -ESTALE errors.
384 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
385 it_open_error(DISP_OPEN_OPEN, itp))
387 ll_release_openhandle(file->f_dentry, itp);
391 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
392 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
393 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
397 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
398 if (!rc && itp->d.lustre.it_lock_mode)
399 md_set_lock_data(sbi->ll_md_exp,
400 &itp->d.lustre.it_lock_handle,
401 file->f_dentry->d_inode, NULL);
404 ptlrpc_req_finished(itp->d.lustre.it_data);
405 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
406 ll_intent_drop_lock(itp);
412 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
413 * not believe attributes if a few ioepoch holders exist. Attributes for
414 * previous ioepoch if new one is opened are also skipped by MDS.
416 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
418 if (ioepoch && lli->lli_ioepoch != ioepoch) {
419 lli->lli_ioepoch = ioepoch;
420 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
421 ioepoch, PFID(&lli->lli_fid));
425 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
426 struct lookup_intent *it, struct obd_client_handle *och)
428 struct ptlrpc_request *req = it->d.lustre.it_data;
429 struct mdt_body *body;
433 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
434 LASSERT(body != NULL); /* reply already checked out */
436 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
437 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
438 och->och_fid = lli->lli_fid;
439 och->och_flags = it->it_flags;
440 ll_ioepoch_open(lli, body->ioepoch);
442 return md_set_open_replay_data(md_exp, och, req);
445 int ll_local_open(struct file *file, struct lookup_intent *it,
446 struct ll_file_data *fd, struct obd_client_handle *och)
448 struct inode *inode = file->f_dentry->d_inode;
449 struct ll_inode_info *lli = ll_i2info(inode);
452 LASSERT(!LUSTRE_FPRIVATE(file));
457 struct ptlrpc_request *req = it->d.lustre.it_data;
458 struct mdt_body *body;
461 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
465 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
466 if ((it->it_flags & FMODE_WRITE) &&
467 (body->valid & OBD_MD_FLSIZE))
468 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
469 lli->lli_ioepoch, PFID(&lli->lli_fid));
472 LUSTRE_FPRIVATE(file) = fd;
473 ll_readahead_init(inode, &fd->fd_ras);
474 fd->fd_omode = it->it_flags;
478 /* Open a file, and (for the very first open) create objects on the OSTs at
479 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
480 * creation or open until ll_lov_setstripe() ioctl is called. We grab
481 * lli_open_sem to ensure no other process will create objects, send the
482 * stripe MD to the MDS, or try to destroy the objects if that fails.
484 * If we already have the stripe MD locally then we don't request it in
485 * md_open(), by passing a lmm_size = 0.
487 * It is up to the application to ensure no other processes open this file
488 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
489 * used. We might be able to avoid races of that sort by getting lli_open_sem
490 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
491 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
493 int ll_file_open(struct inode *inode, struct file *file)
495 struct ll_inode_info *lli = ll_i2info(inode);
496 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
497 .it_flags = file->f_flags };
498 struct lov_stripe_md *lsm;
499 struct ptlrpc_request *req = NULL;
500 struct obd_client_handle **och_p;
502 struct ll_file_data *fd;
503 int rc = 0, opendir_set = 0;
506 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
507 inode->i_generation, inode, file->f_flags);
509 it = file->private_data; /* XXX: compat macro */
510 file->private_data = NULL; /* prevent ll_local_open assertion */
512 fd = ll_file_data_get();
517 if (S_ISDIR(inode->i_mode)) {
518 cfs_spin_lock(&lli->lli_sa_lock);
519 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
520 LASSERT(lli->lli_sai == NULL);
521 lli->lli_opendir_key = fd;
522 lli->lli_opendir_pid = cfs_curproc_pid();
525 cfs_spin_unlock(&lli->lli_sa_lock);
528 if (inode->i_sb->s_root == file->f_dentry) {
529 LUSTRE_FPRIVATE(file) = fd;
533 if (!it || !it->d.lustre.it_disposition) {
534 /* Convert f_flags into access mode. We cannot use file->f_mode,
535 * because everything but O_ACCMODE mask was stripped from
537 if ((oit.it_flags + 1) & O_ACCMODE)
539 if (file->f_flags & O_TRUNC)
540 oit.it_flags |= FMODE_WRITE;
542 /* kernel only call f_op->open in dentry_open. filp_open calls
543 * dentry_open after call to open_namei that checks permissions.
544 * Only nfsd_open call dentry_open directly without checking
545 * permissions and because of that this code below is safe. */
546 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
547 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
549 /* We do not want O_EXCL here, presumably we opened the file
550 * already? XXX - NFS implications? */
551 oit.it_flags &= ~O_EXCL;
553 /* bug20584, if "it_flags" contains O_CREAT, the file will be
554 * created if necessary, then "IT_CREAT" should be set to keep
555 * consistent with it */
556 if (oit.it_flags & O_CREAT)
557 oit.it_op |= IT_CREAT;
563 /* Let's see if we have file open on MDS already. */
564 if (it->it_flags & FMODE_WRITE) {
565 och_p = &lli->lli_mds_write_och;
566 och_usecount = &lli->lli_open_fd_write_count;
567 } else if (it->it_flags & FMODE_EXEC) {
568 och_p = &lli->lli_mds_exec_och;
569 och_usecount = &lli->lli_open_fd_exec_count;
571 och_p = &lli->lli_mds_read_och;
572 och_usecount = &lli->lli_open_fd_read_count;
575 cfs_down(&lli->lli_och_sem);
576 if (*och_p) { /* Open handle is present */
577 if (it_disposition(it, DISP_OPEN_OPEN)) {
578 /* Well, there's extra open request that we do not need,
579 let's close it somehow. This will decref request. */
580 rc = it_open_error(DISP_OPEN_OPEN, it);
582 cfs_up(&lli->lli_och_sem);
583 ll_file_data_put(fd);
584 GOTO(out_openerr, rc);
586 ll_release_openhandle(file->f_dentry, it);
587 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
592 rc = ll_local_open(file, it, fd, NULL);
595 cfs_up(&lli->lli_och_sem);
596 ll_file_data_put(fd);
597 GOTO(out_openerr, rc);
600 LASSERT(*och_usecount == 0);
601 if (!it->d.lustre.it_disposition) {
602 /* We cannot just request lock handle now, new ELC code
603 means that one of other OPEN locks for this file
604 could be cancelled, and since blocking ast handler
605 would attempt to grab och_sem as well, that would
606 result in a deadlock */
607 cfs_up(&lli->lli_och_sem);
608 it->it_create_mode |= M_CHECK_STALE;
609 rc = ll_intent_file_open(file, NULL, 0, it);
610 it->it_create_mode &= ~M_CHECK_STALE;
612 ll_file_data_put(fd);
613 GOTO(out_openerr, rc);
616 /* Got some error? Release the request */
617 if (it->d.lustre.it_status < 0) {
618 req = it->d.lustre.it_data;
619 ptlrpc_req_finished(req);
623 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
625 ll_file_data_put(fd);
626 GOTO(out_och_free, rc = -ENOMEM);
629 req = it->d.lustre.it_data;
631 /* md_intent_lock() didn't get a request ref if there was an
632 * open error, so don't do cleanup on the request here
634 /* XXX (green): Should not we bail out on any error here, not
635 * just open error? */
636 rc = it_open_error(DISP_OPEN_OPEN, it);
638 ll_file_data_put(fd);
639 GOTO(out_och_free, rc);
642 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
643 rc = ll_local_open(file, it, fd, *och_p);
645 ll_file_data_put(fd);
646 GOTO(out_och_free, rc);
649 cfs_up(&lli->lli_och_sem);
651 /* Must do this outside lli_och_sem lock to prevent deadlock where
652 different kind of OPEN lock for this same inode gets cancelled
653 by ldlm_cancel_lru */
654 if (!S_ISREG(inode->i_mode))
661 if (file->f_flags & O_LOV_DELAY_CREATE ||
662 !(file->f_mode & FMODE_WRITE)) {
663 CDEBUG(D_INODE, "object creation was delayed\n");
667 file->f_flags &= ~O_LOV_DELAY_CREATE;
670 ptlrpc_req_finished(req);
672 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
676 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
677 *och_p = NULL; /* OBD_FREE writes some magic there */
680 cfs_up(&lli->lli_och_sem);
682 if (opendir_set != 0)
683 ll_stop_statahead(inode, lli->lli_opendir_key);
689 /* Fills the obdo with the attributes for the lsm */
690 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
691 struct obd_capa *capa, struct obdo *obdo,
692 __u64 ioepoch, int sync)
694 struct ptlrpc_request_set *set;
695 struct obd_info oinfo = { { { 0 } } };
700 LASSERT(lsm != NULL);
704 oinfo.oi_oa->o_id = lsm->lsm_object_id;
705 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
706 oinfo.oi_oa->o_mode = S_IFREG;
707 oinfo.oi_oa->o_ioepoch = ioepoch;
708 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
709 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
710 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
711 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
712 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
713 oinfo.oi_capa = capa;
715 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
716 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
719 set = ptlrpc_prep_set();
721 CERROR("can't allocate ptlrpc set\n");
724 rc = obd_getattr_async(exp, &oinfo, set);
726 rc = ptlrpc_set_wait(set);
727 ptlrpc_set_destroy(set);
730 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
731 OBD_MD_FLATIME | OBD_MD_FLMTIME |
732 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
737 * Performs the getattr on the inode and updates its fields.
738 * If @sync != 0, perform the getattr under the server-side lock.
740 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
741 __u64 ioepoch, int sync)
743 struct ll_inode_info *lli = ll_i2info(inode);
744 struct obd_capa *capa = ll_mdscapa_get(inode);
748 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
749 capa, obdo, ioepoch, sync);
752 obdo_refresh_inode(inode, obdo, obdo->o_valid);
754 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
755 lli->lli_smd->lsm_object_id, i_size_read(inode),
756 (unsigned long long)inode->i_blocks,
757 (unsigned long)ll_inode_blksize(inode));
762 int ll_merge_lvb(struct inode *inode)
764 struct ll_inode_info *lli = ll_i2info(inode);
765 struct ll_sb_info *sbi = ll_i2sbi(inode);
771 ll_inode_size_lock(inode, 1);
772 inode_init_lvb(inode, &lvb);
774 /* merge timestamps the most resently obtained from mds with
775 timestamps obtained from osts */
776 lvb.lvb_atime = lli->lli_lvb.lvb_atime;
777 lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
778 lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
779 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
780 cl_isize_write_nolock(inode, lvb.lvb_size);
782 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
783 PFID(&lli->lli_fid), lvb.lvb_size);
784 inode->i_blocks = lvb.lvb_blocks;
786 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
787 LTIME_S(inode->i_atime) = lvb.lvb_atime;
788 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
789 ll_inode_size_unlock(inode, 1);
794 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
797 struct obdo obdo = { 0 };
800 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
802 st->st_size = obdo.o_size;
803 st->st_blocks = obdo.o_blocks;
804 st->st_mtime = obdo.o_mtime;
805 st->st_atime = obdo.o_atime;
806 st->st_ctime = obdo.o_ctime;
811 void ll_io_init(struct cl_io *io, const struct file *file, int write)
813 struct inode *inode = file->f_dentry->d_inode;
815 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
817 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
818 io->ci_obj = ll_i2info(inode)->lli_clob;
819 io->ci_lockreq = CILR_MAYBE;
820 if (ll_file_nolock(file)) {
821 io->ci_lockreq = CILR_NEVER;
822 io->ci_no_srvlock = 1;
823 } else if (file->f_flags & O_APPEND) {
824 io->ci_lockreq = CILR_MANDATORY;
828 static ssize_t ll_file_io_generic(const struct lu_env *env,
829 struct vvp_io_args *args, struct file *file,
830 enum cl_io_type iot, loff_t *ppos, size_t count)
832 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
837 io = ccc_env_thread_io(env);
838 ll_io_init(io, file, iot == CIT_WRITE);
840 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
841 struct vvp_io *vio = vvp_env_io(env);
842 struct ccc_io *cio = ccc_env_io(env);
843 int write_sem_locked = 0;
845 cio->cui_fd = LUSTRE_FPRIVATE(file);
846 vio->cui_io_subtype = args->via_io_subtype;
848 switch (vio->cui_io_subtype) {
850 cio->cui_iov = args->u.normal.via_iov;
851 cio->cui_nrsegs = args->u.normal.via_nrsegs;
852 cio->cui_tot_nrsegs = cio->cui_nrsegs;
853 #ifndef HAVE_FILE_WRITEV
854 cio->cui_iocb = args->u.normal.via_iocb;
856 if ((iot == CIT_WRITE) &&
857 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
858 if(cfs_down_interruptible(&lli->lli_write_sem))
859 GOTO(out, result = -ERESTARTSYS);
860 write_sem_locked = 1;
861 } else if (iot == CIT_READ) {
862 cfs_down_read(&lli->lli_trunc_sem);
866 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
867 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
870 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
871 vio->u.splice.cui_flags = args->u.splice.via_flags;
874 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
877 result = cl_io_loop(env, io);
878 if (write_sem_locked)
879 cfs_up(&lli->lli_write_sem);
880 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
881 cfs_up_read(&lli->lli_trunc_sem);
883 /* cl_io_rw_init() handled IO */
884 result = io->ci_result;
887 if (io->ci_nob > 0) {
889 *ppos = io->u.ci_wr.wr.crw_pos;
894 if (iot == CIT_WRITE)
895 lli->lli_write_rc = result < 0 ? : 0;
901 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
903 static int ll_file_get_iov_count(const struct iovec *iov,
904 unsigned long *nr_segs, size_t *count)
909 for (seg = 0; seg < *nr_segs; seg++) {
910 const struct iovec *iv = &iov[seg];
913 * If any segment has a negative length, or the cumulative
914 * length ever wraps negative then return -EINVAL.
917 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
919 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
924 cnt -= iv->iov_len; /* This segment is no good */
931 #ifdef HAVE_FILE_READV
932 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
933 unsigned long nr_segs, loff_t *ppos)
936 struct vvp_io_args *args;
942 result = ll_file_get_iov_count(iov, &nr_segs, &count);
946 env = cl_env_get(&refcheck);
948 RETURN(PTR_ERR(env));
950 args = vvp_env_args(env, IO_NORMAL);
951 args->u.normal.via_iov = (struct iovec *)iov;
952 args->u.normal.via_nrsegs = nr_segs;
954 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
955 cl_env_put(env, &refcheck);
959 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
963 struct iovec *local_iov;
968 env = cl_env_get(&refcheck);
970 RETURN(PTR_ERR(env));
972 local_iov = &vvp_env_info(env)->vti_local_iov;
973 local_iov->iov_base = (void __user *)buf;
974 local_iov->iov_len = count;
975 result = ll_file_readv(file, local_iov, 1, ppos);
976 cl_env_put(env, &refcheck);
981 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
982 unsigned long nr_segs, loff_t pos)
985 struct vvp_io_args *args;
991 result = ll_file_get_iov_count(iov, &nr_segs, &count);
995 env = cl_env_get(&refcheck);
997 RETURN(PTR_ERR(env));
999 args = vvp_env_args(env, IO_NORMAL);
1000 args->u.normal.via_iov = (struct iovec *)iov;
1001 args->u.normal.via_nrsegs = nr_segs;
1002 args->u.normal.via_iocb = iocb;
1004 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1005 &iocb->ki_pos, count);
1006 cl_env_put(env, &refcheck);
1010 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1014 struct iovec *local_iov;
1015 struct kiocb *kiocb;
1020 env = cl_env_get(&refcheck);
1022 RETURN(PTR_ERR(env));
1024 local_iov = &vvp_env_info(env)->vti_local_iov;
1025 kiocb = &vvp_env_info(env)->vti_kiocb;
1026 local_iov->iov_base = (void __user *)buf;
1027 local_iov->iov_len = count;
1028 init_sync_kiocb(kiocb, file);
1029 kiocb->ki_pos = *ppos;
1030 kiocb->ki_left = count;
1032 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1033 *ppos = kiocb->ki_pos;
1035 cl_env_put(env, &refcheck);
1041 * Write to a file (through the page cache).
1043 #ifdef HAVE_FILE_WRITEV
1044 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1045 unsigned long nr_segs, loff_t *ppos)
1048 struct vvp_io_args *args;
1054 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1058 env = cl_env_get(&refcheck);
1060 RETURN(PTR_ERR(env));
1062 args = vvp_env_args(env, IO_NORMAL);
1063 args->u.normal.via_iov = (struct iovec *)iov;
1064 args->u.normal.via_nrsegs = nr_segs;
1066 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1067 cl_env_put(env, &refcheck);
1071 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1075 struct iovec *local_iov;
1080 env = cl_env_get(&refcheck);
1082 RETURN(PTR_ERR(env));
1084 local_iov = &vvp_env_info(env)->vti_local_iov;
1085 local_iov->iov_base = (void __user *)buf;
1086 local_iov->iov_len = count;
1088 result = ll_file_writev(file, local_iov, 1, ppos);
1089 cl_env_put(env, &refcheck);
1093 #else /* AIO stuff */
1094 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1095 unsigned long nr_segs, loff_t pos)
1098 struct vvp_io_args *args;
1104 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1108 env = cl_env_get(&refcheck);
1110 RETURN(PTR_ERR(env));
1112 args = vvp_env_args(env, IO_NORMAL);
1113 args->u.normal.via_iov = (struct iovec *)iov;
1114 args->u.normal.via_nrsegs = nr_segs;
1115 args->u.normal.via_iocb = iocb;
1117 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1118 &iocb->ki_pos, count);
1119 cl_env_put(env, &refcheck);
1123 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1127 struct iovec *local_iov;
1128 struct kiocb *kiocb;
1133 env = cl_env_get(&refcheck);
1135 RETURN(PTR_ERR(env));
1137 local_iov = &vvp_env_info(env)->vti_local_iov;
1138 kiocb = &vvp_env_info(env)->vti_kiocb;
1139 local_iov->iov_base = (void __user *)buf;
1140 local_iov->iov_len = count;
1141 init_sync_kiocb(kiocb, file);
1142 kiocb->ki_pos = *ppos;
1143 kiocb->ki_left = count;
1145 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1146 *ppos = kiocb->ki_pos;
1148 cl_env_put(env, &refcheck);
1154 #ifdef HAVE_KERNEL_SENDFILE
1156 * Send file content (through pagecache) somewhere with helper
1158 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1159 read_actor_t actor, void *target)
1162 struct vvp_io_args *args;
1167 env = cl_env_get(&refcheck);
1169 RETURN(PTR_ERR(env));
1171 args = vvp_env_args(env, IO_SENDFILE);
1172 args->u.sendfile.via_target = target;
1173 args->u.sendfile.via_actor = actor;
1175 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1176 cl_env_put(env, &refcheck);
1181 #ifdef HAVE_KERNEL_SPLICE_READ
1183 * Send file content (through pagecache) somewhere with helper
1185 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1186 struct pipe_inode_info *pipe, size_t count,
1190 struct vvp_io_args *args;
1195 env = cl_env_get(&refcheck);
1197 RETURN(PTR_ERR(env));
1199 args = vvp_env_args(env, IO_SPLICE);
1200 args->u.splice.via_pipe = pipe;
1201 args->u.splice.via_flags = flags;
1203 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1204 cl_env_put(env, &refcheck);
1209 static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
1212 struct obd_export *exp = ll_i2dtexp(inode);
1213 struct obd_trans_info oti = { 0 };
1214 struct obdo *oa = NULL;
1217 struct lov_stripe_md *lsm, *lsm2;
1224 ll_inode_size_lock(inode, 0);
1225 lsm = ll_i2info(inode)->lli_smd;
1227 GOTO(out, rc = -ENOENT);
1228 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1229 (lsm->lsm_stripe_count));
1231 OBD_ALLOC_LARGE(lsm2, lsm_size);
1233 GOTO(out, rc = -ENOMEM);
1237 oa->o_nlink = ost_idx;
1238 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1239 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1240 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
1241 OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1242 memcpy(lsm2, lsm, lsm_size);
1243 rc = obd_create(exp, oa, &lsm2, &oti);
1245 OBD_FREE_LARGE(lsm2, lsm_size);
1248 ll_inode_size_unlock(inode, 0);
1253 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1255 struct ll_recreate_obj ucreat;
1258 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1261 if (cfs_copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1262 sizeof(struct ll_recreate_obj)))
1265 RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
1266 ucreat.lrc_ost_idx));
1269 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1276 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1279 if (cfs_copy_from_user(&fid, (struct lu_fid *)arg,
1280 sizeof(struct lu_fid)))
1283 id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
1284 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1285 RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
1288 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1289 int flags, struct lov_user_md *lum, int lum_size)
1291 struct lov_stripe_md *lsm;
1292 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1296 ll_inode_size_lock(inode, 0);
1297 lsm = ll_i2info(inode)->lli_smd;
1299 ll_inode_size_unlock(inode, 0);
1300 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1305 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1308 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1309 GOTO(out_req_free, rc = -ENOENT);
1310 rc = oit.d.lustre.it_status;
1312 GOTO(out_req_free, rc);
1314 ll_release_openhandle(file->f_dentry, &oit);
1317 ll_inode_size_unlock(inode, 0);
1318 ll_intent_release(&oit);
1321 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1325 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1326 struct lov_mds_md **lmmp, int *lmm_size,
1327 struct ptlrpc_request **request)
1329 struct ll_sb_info *sbi = ll_i2sbi(inode);
1330 struct mdt_body *body;
1331 struct lov_mds_md *lmm = NULL;
1332 struct ptlrpc_request *req = NULL;
1333 struct md_op_data *op_data;
1336 rc = ll_get_max_mdsize(sbi, &lmmsize);
1340 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1341 strlen(filename), lmmsize,
1342 LUSTRE_OPC_ANY, NULL);
1343 if (op_data == NULL)
1346 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1347 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1348 ll_finish_md_op_data(op_data);
1350 CDEBUG(D_INFO, "md_getattr_name failed "
1351 "on %s: rc %d\n", filename, rc);
1355 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1356 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1358 lmmsize = body->eadatasize;
1360 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1362 GOTO(out, rc = -ENODATA);
1365 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1366 LASSERT(lmm != NULL);
1368 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1369 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1370 GOTO(out, rc = -EPROTO);
1374 * This is coming from the MDS, so is probably in
1375 * little endian. We convert it to host endian before
1376 * passing it to userspace.
1378 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1379 /* if function called for directory - we should
1380 * avoid swab not existent lsm objects */
1381 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1382 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1383 if (S_ISREG(body->mode))
1384 lustre_swab_lov_user_md_objects(
1385 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1386 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1387 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1388 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1389 if (S_ISREG(body->mode))
1390 lustre_swab_lov_user_md_objects(
1391 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1392 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1398 *lmm_size = lmmsize;
1403 static int ll_lov_setea(struct inode *inode, struct file *file,
1406 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1407 struct lov_user_md *lump;
1408 int lum_size = sizeof(struct lov_user_md) +
1409 sizeof(struct lov_user_ost_data);
1413 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1416 OBD_ALLOC_LARGE(lump, lum_size);
1420 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1421 OBD_FREE_LARGE(lump, lum_size);
1425 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1427 OBD_FREE_LARGE(lump, lum_size);
1431 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1434 struct lov_user_md_v3 lumv3;
1435 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1436 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1437 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1440 int flags = FMODE_WRITE;
1443 /* first try with v1 which is smaller than v3 */
1444 lum_size = sizeof(struct lov_user_md_v1);
1445 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1448 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1449 lum_size = sizeof(struct lov_user_md_v3);
1450 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1454 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1456 put_user(0, &lumv1p->lmm_stripe_count);
1457 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1458 0, ll_i2info(inode)->lli_smd,
1464 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1466 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1471 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1476 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1478 struct ll_inode_info *lli = ll_i2info(inode);
1479 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1480 struct ccc_grouplock grouplock;
1484 if (ll_file_nolock(file))
1485 RETURN(-EOPNOTSUPP);
1487 cfs_spin_lock(&lli->lli_lock);
1488 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1489 CWARN("group lock already existed with gid %lu\n",
1490 fd->fd_grouplock.cg_gid);
1491 cfs_spin_unlock(&lli->lli_lock);
1494 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1495 cfs_spin_unlock(&lli->lli_lock);
1497 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1498 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1502 cfs_spin_lock(&lli->lli_lock);
1503 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1504 cfs_spin_unlock(&lli->lli_lock);
1505 CERROR("another thread just won the race\n");
1506 cl_put_grouplock(&grouplock);
1510 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1511 fd->fd_grouplock = grouplock;
1512 cfs_spin_unlock(&lli->lli_lock);
1514 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1518 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1520 struct ll_inode_info *lli = ll_i2info(inode);
1521 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1522 struct ccc_grouplock grouplock;
1525 cfs_spin_lock(&lli->lli_lock);
1526 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1527 cfs_spin_unlock(&lli->lli_lock);
1528 CWARN("no group lock held\n");
1531 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1533 if (fd->fd_grouplock.cg_gid != arg) {
1534 CWARN("group lock %lu doesn't match current id %lu\n",
1535 arg, fd->fd_grouplock.cg_gid);
1536 cfs_spin_unlock(&lli->lli_lock);
1540 grouplock = fd->fd_grouplock;
1541 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1542 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1543 cfs_spin_unlock(&lli->lli_lock);
1545 cl_put_grouplock(&grouplock);
1546 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1551 * Close inode open handle
1553 * \param dentry [in] dentry which contains the inode
1554 * \param it [in,out] intent which contains open info and result
1557 * \retval <0 failure
1559 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1561 struct inode *inode = dentry->d_inode;
1562 struct obd_client_handle *och;
1568 /* Root ? Do nothing. */
1569 if (dentry->d_inode->i_sb->s_root == dentry)
1572 /* No open handle to close? Move away */
1573 if (!it_disposition(it, DISP_OPEN_OPEN))
1576 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1578 OBD_ALLOC(och, sizeof(*och));
1580 GOTO(out, rc = -ENOMEM);
1582 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1583 ll_i2info(inode), it, och);
1585 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1588 /* this one is in place of ll_file_open */
1589 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1590 ptlrpc_req_finished(it->d.lustre.it_data);
1591 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1596 * Get size for inode for which FIEMAP mapping is requested.
1597 * Make the FIEMAP get_info call and returns the result.
1599 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1602 struct obd_export *exp = ll_i2dtexp(inode);
1603 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1604 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1605 int vallen = num_bytes;
1609 /* Checks for fiemap flags */
1610 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1611 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1615 /* Check for FIEMAP_FLAG_SYNC */
1616 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1617 rc = filemap_fdatawrite(inode->i_mapping);
1622 /* If the stripe_count > 1 and the application does not understand
1623 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1625 if (lsm->lsm_stripe_count > 1 &&
1626 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1629 fm_key.oa.o_id = lsm->lsm_object_id;
1630 fm_key.oa.o_seq = lsm->lsm_object_seq;
1631 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1633 obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
1635 /* If filesize is 0, then there would be no objects for mapping */
1636 if (fm_key.oa.o_size == 0) {
1637 fiemap->fm_mapped_extents = 0;
1641 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1643 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1645 CERROR("obd_get_info failed: rc = %d\n", rc);
1650 int ll_fid2path(struct obd_export *exp, void *arg)
1652 struct getinfo_fid2path *gfout, *gfin;
1656 /* Need to get the buflen */
1657 OBD_ALLOC_PTR(gfin);
1660 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1665 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1666 OBD_ALLOC(gfout, outsize);
1667 if (gfout == NULL) {
1671 memcpy(gfout, gfin, sizeof(*gfout));
1674 /* Call mdc_iocontrol */
1675 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1678 if (cfs_copy_to_user(arg, gfout, outsize))
1682 OBD_FREE(gfout, outsize);
1686 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1688 struct ll_user_fiemap *fiemap_s;
1689 size_t num_bytes, ret_bytes;
1690 unsigned int extent_count;
1693 /* Get the extent count so we can calculate the size of
1694 * required fiemap buffer */
1695 if (get_user(extent_count,
1696 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1698 num_bytes = sizeof(*fiemap_s) + (extent_count *
1699 sizeof(struct ll_fiemap_extent));
1701 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1702 if (fiemap_s == NULL)
1705 /* get the fiemap value */
1706 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1708 GOTO(error, rc = -EFAULT);
1710 /* If fm_extent_count is non-zero, read the first extent since
1711 * it is used to calculate end_offset and device from previous
1714 if (copy_from_user(&fiemap_s->fm_extents[0],
1715 (char __user *)arg + sizeof(*fiemap_s),
1716 sizeof(struct ll_fiemap_extent)))
1717 GOTO(error, rc = -EFAULT);
1720 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1724 ret_bytes = sizeof(struct ll_user_fiemap);
1726 if (extent_count != 0)
1727 ret_bytes += (fiemap_s->fm_mapped_extents *
1728 sizeof(struct ll_fiemap_extent));
1730 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1734 OBD_FREE_LARGE(fiemap_s, num_bytes);
1738 #ifdef HAVE_UNLOCKED_IOCTL
1739 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1741 struct inode *inode = file->f_dentry->d_inode;
1743 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1747 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1751 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1752 inode->i_generation, inode, cmd);
1753 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1755 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1756 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1760 case LL_IOC_GETFLAGS:
1761 /* Get the current value of the file flags */
1762 return put_user(fd->fd_flags, (int *)arg);
1763 case LL_IOC_SETFLAGS:
1764 case LL_IOC_CLRFLAGS:
1765 /* Set or clear specific file flags */
1766 /* XXX This probably needs checks to ensure the flags are
1767 * not abused, and to handle any flag side effects.
1769 if (get_user(flags, (int *) arg))
1772 if (cmd == LL_IOC_SETFLAGS) {
1773 if ((flags & LL_FILE_IGNORE_LOCK) &&
1774 !(file->f_flags & O_DIRECT)) {
1775 CERROR("%s: unable to disable locking on "
1776 "non-O_DIRECT file\n", current->comm);
1780 fd->fd_flags |= flags;
1782 fd->fd_flags &= ~flags;
1785 case LL_IOC_LOV_SETSTRIPE:
1786 RETURN(ll_lov_setstripe(inode, file, arg));
1787 case LL_IOC_LOV_SETEA:
1788 RETURN(ll_lov_setea(inode, file, arg));
1789 case LL_IOC_LOV_GETSTRIPE:
1790 RETURN(ll_lov_getstripe(inode, arg));
1791 case LL_IOC_RECREATE_OBJ:
1792 RETURN(ll_lov_recreate_obj(inode, arg));
1793 case LL_IOC_RECREATE_FID:
1794 RETURN(ll_lov_recreate_fid(inode, arg));
1795 case FSFILT_IOC_FIEMAP:
1796 RETURN(ll_ioctl_fiemap(inode, arg));
1797 case FSFILT_IOC_GETFLAGS:
1798 case FSFILT_IOC_SETFLAGS:
1799 RETURN(ll_iocontrol(inode, file, cmd, arg));
1800 case FSFILT_IOC_GETVERSION_OLD:
1801 case FSFILT_IOC_GETVERSION:
1802 RETURN(put_user(inode->i_generation, (int *)arg));
1803 case LL_IOC_GROUP_LOCK:
1804 RETURN(ll_get_grouplock(inode, file, arg));
1805 case LL_IOC_GROUP_UNLOCK:
1806 RETURN(ll_put_grouplock(inode, file, arg));
1807 case IOC_OBD_STATFS:
1808 RETURN(ll_obd_statfs(inode, (void *)arg));
1810 /* We need to special case any other ioctls we want to handle,
1811 * to send them to the MDS/OST as appropriate and to properly
1812 * network encode the arg field.
1813 case FSFILT_IOC_SETVERSION_OLD:
1814 case FSFILT_IOC_SETVERSION:
1816 case LL_IOC_FLUSHCTX:
1817 RETURN(ll_flush_ctx(inode));
1818 case LL_IOC_PATH2FID: {
1819 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1820 sizeof(struct lu_fid)))
1825 case OBD_IOC_FID2PATH:
1826 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1828 case LL_IOC_GET_MDTIDX: {
1831 mdtidx = ll_get_mdt_idx(inode);
1835 if (put_user((int)mdtidx, (int*)arg))
1845 ll_iocontrol_call(inode, file, cmd, arg, &err))
1848 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1854 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1856 struct inode *inode = file->f_dentry->d_inode;
1859 retval = offset + ((origin == 2) ? i_size_read(inode) :
1860 (origin == 1) ? file->f_pos : 0);
1861 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1862 inode->i_ino, inode->i_generation, inode, retval, retval,
1863 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1864 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1866 if (origin == 2) { /* SEEK_END */
1867 int nonblock = 0, rc;
1869 if (file->f_flags & O_NONBLOCK)
1870 nonblock = LDLM_FL_BLOCK_NOWAIT;
1872 rc = cl_glimpse_size(inode);
1876 offset += i_size_read(inode);
1877 } else if (origin == 1) { /* SEEK_CUR */
1878 offset += file->f_pos;
1882 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1883 if (offset != file->f_pos) {
1884 file->f_pos = offset;
1892 #ifdef HAVE_FLUSH_OWNER_ID
1893 int ll_flush(struct file *file, fl_owner_t id)
1895 int ll_flush(struct file *file)
1898 struct inode *inode = file->f_dentry->d_inode;
1899 struct ll_inode_info *lli = ll_i2info(inode);
1900 struct lov_stripe_md *lsm = lli->lli_smd;
1903 /* the application should know write failure already. */
1904 if (lli->lli_write_rc)
1907 /* catch async errors that were recorded back when async writeback
1908 * failed for pages in this mapping. */
1909 rc = lli->lli_async_rc;
1910 lli->lli_async_rc = 0;
1912 err = lov_test_and_clear_async_rc(lsm);
1917 return rc ? -EIO : 0;
1920 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1922 struct inode *inode = dentry->d_inode;
1923 struct ll_inode_info *lli = ll_i2info(inode);
1924 struct lov_stripe_md *lsm = lli->lli_smd;
1925 struct ptlrpc_request *req;
1926 struct obd_capa *oc;
1929 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1930 inode->i_generation, inode);
1931 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1933 /* fsync's caller has already called _fdata{sync,write}, we want
1934 * that IO to finish before calling the osc and mdc sync methods */
1935 rc = filemap_fdatawait(inode->i_mapping);
1937 /* catch async errors that were recorded back when async writeback
1938 * failed for pages in this mapping. */
1939 err = lli->lli_async_rc;
1940 lli->lli_async_rc = 0;
1944 err = lov_test_and_clear_async_rc(lsm);
1949 oc = ll_mdscapa_get(inode);
1950 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1956 ptlrpc_req_finished(req);
1959 struct obd_info *oinfo;
1961 OBD_ALLOC_PTR(oinfo);
1963 RETURN(rc ? rc : -ENOMEM);
1964 OBDO_ALLOC(oinfo->oi_oa);
1965 if (!oinfo->oi_oa) {
1966 OBD_FREE_PTR(oinfo);
1967 RETURN(rc ? rc : -ENOMEM);
1969 oinfo->oi_oa->o_id = lsm->lsm_object_id;
1970 oinfo->oi_oa->o_seq = lsm->lsm_object_seq;
1971 oinfo->oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1972 obdo_from_inode(oinfo->oi_oa, inode, &ll_i2info(inode)->lli_fid,
1973 OBD_MD_FLTYPE | OBD_MD_FLATIME |
1974 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1977 oinfo->oi_capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1978 err = obd_sync_rqset(ll_i2sbi(inode)->ll_dt_exp, oinfo, 0,
1980 capa_put(oinfo->oi_capa);
1983 OBDO_FREE(oinfo->oi_oa);
1984 OBD_FREE_PTR(oinfo);
1985 lli->lli_write_rc = err < 0 ? : 0;
1991 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1993 struct inode *inode = file->f_dentry->d_inode;
1994 struct ll_sb_info *sbi = ll_i2sbi(inode);
1995 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1996 .ei_cb_cp =ldlm_flock_completion_ast,
1997 .ei_cbdata = file_lock };
1998 struct md_op_data *op_data;
1999 struct lustre_handle lockh = {0};
2000 ldlm_policy_data_t flock = {{0}};
2005 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2006 inode->i_ino, file_lock);
2008 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2010 if (file_lock->fl_flags & FL_FLOCK) {
2011 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2012 /* flocks are whole-file locks */
2013 flock.l_flock.end = OFFSET_MAX;
2014 /* For flocks owner is determined by the local file desctiptor*/
2015 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2016 } else if (file_lock->fl_flags & FL_POSIX) {
2017 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2018 flock.l_flock.start = file_lock->fl_start;
2019 flock.l_flock.end = file_lock->fl_end;
2023 flock.l_flock.pid = file_lock->fl_pid;
2025 switch (file_lock->fl_type) {
2027 einfo.ei_mode = LCK_PR;
2030 /* An unlock request may or may not have any relation to
2031 * existing locks so we may not be able to pass a lock handle
2032 * via a normal ldlm_lock_cancel() request. The request may even
2033 * unlock a byte range in the middle of an existing lock. In
2034 * order to process an unlock request we need all of the same
2035 * information that is given with a normal read or write record
2036 * lock request. To avoid creating another ldlm unlock (cancel)
2037 * message we'll treat a LCK_NL flock request as an unlock. */
2038 einfo.ei_mode = LCK_NL;
2041 einfo.ei_mode = LCK_PW;
2044 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2059 flags = LDLM_FL_BLOCK_NOWAIT;
2065 flags = LDLM_FL_TEST_LOCK;
2066 /* Save the old mode so that if the mode in the lock changes we
2067 * can decrement the appropriate reader or writer refcount. */
2068 file_lock->fl_type = einfo.ei_mode;
2071 CERROR("unknown fcntl lock command: %d\n", cmd);
2075 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2076 LUSTRE_OPC_ANY, NULL);
2077 if (IS_ERR(op_data))
2078 RETURN(PTR_ERR(op_data));
2080 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2081 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2082 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2084 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2085 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2087 ll_finish_md_op_data(op_data);
2089 if ((file_lock->fl_flags & FL_FLOCK) &&
2090 (rc == 0 || file_lock->fl_type == F_UNLCK))
2091 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2092 #ifdef HAVE_F_OP_FLOCK
2093 if ((file_lock->fl_flags & FL_POSIX) &&
2094 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2095 !(flags & LDLM_FL_TEST_LOCK))
2096 posix_lock_file_wait(file, file_lock);
2102 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2109 int ll_have_md_lock(struct inode *inode, __u64 bits, ldlm_mode_t l_req_mode)
2111 struct lustre_handle lockh;
2112 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2113 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2114 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2122 fid = &ll_i2info(inode)->lli_fid;
2123 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2124 ldlm_lockname[mode]);
2126 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2127 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2134 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2135 struct lustre_handle *lockh)
2137 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2143 fid = &ll_i2info(inode)->lli_fid;
2144 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2146 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2147 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2148 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2152 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2153 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2154 * and return success */
2156 /* This path cannot be hit for regular files unless in
2157 * case of obscure races, so no need to to validate
2159 if (!S_ISREG(inode->i_mode) &&
2160 !S_ISDIR(inode->i_mode))
2165 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2173 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2176 struct inode *inode = dentry->d_inode;
2177 struct ptlrpc_request *req = NULL;
2178 struct ll_sb_info *sbi;
2179 struct obd_export *exp;
2184 CERROR("REPORT THIS LINE TO PETER\n");
2187 sbi = ll_i2sbi(inode);
2189 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2190 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2192 exp = ll_i2mdexp(inode);
2194 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2195 * But under CMD case, it caused some lock issues, should be fixed
2196 * with new CMD ibits lock. See bug 12718 */
2197 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2198 struct lookup_intent oit = { .it_op = IT_GETATTR };
2199 struct md_op_data *op_data;
2201 if (ibits == MDS_INODELOCK_LOOKUP)
2202 oit.it_op = IT_LOOKUP;
2204 /* Call getattr by fid, so do not provide name at all. */
2205 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2206 dentry->d_inode, NULL, 0, 0,
2207 LUSTRE_OPC_ANY, NULL);
2208 if (IS_ERR(op_data))
2209 RETURN(PTR_ERR(op_data));
2211 oit.it_create_mode |= M_CHECK_STALE;
2212 rc = md_intent_lock(exp, op_data, NULL, 0,
2213 /* we are not interested in name
2216 ll_md_blocking_ast, 0);
2217 ll_finish_md_op_data(op_data);
2218 oit.it_create_mode &= ~M_CHECK_STALE;
2220 rc = ll_inode_revalidate_fini(inode, rc);
2224 rc = ll_revalidate_it_finish(req, &oit, dentry);
2226 ll_intent_release(&oit);
2230 /* Unlinked? Unhash dentry, so it is not picked up later by
2231 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2232 here to preserve get_cwd functionality on 2.6.
2234 if (!dentry->d_inode->i_nlink) {
2235 cfs_spin_lock(&ll_lookup_lock);
2236 spin_lock(&dcache_lock);
2237 ll_drop_dentry(dentry);
2238 spin_unlock(&dcache_lock);
2239 cfs_spin_unlock(&ll_lookup_lock);
2242 ll_lookup_finish_locks(&oit, dentry);
2243 } else if (!ll_have_md_lock(dentry->d_inode, ibits, LCK_MINMODE)) {
2244 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2245 obd_valid valid = OBD_MD_FLGETATTR;
2246 struct md_op_data *op_data;
2249 if (S_ISREG(inode->i_mode)) {
2250 rc = ll_get_max_mdsize(sbi, &ealen);
2253 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2256 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2257 0, ealen, LUSTRE_OPC_ANY,
2259 if (op_data == NULL)
2262 op_data->op_valid = valid;
2263 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2264 * capa for this inode. Because we only keep capas of dirs
2266 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2267 ll_finish_md_op_data(op_data);
2269 rc = ll_inode_revalidate_fini(inode, rc);
2273 rc = ll_prep_inode(&inode, req, NULL);
2276 ptlrpc_req_finished(req);
2280 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2283 struct inode *inode = dentry->d_inode;
2287 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2289 /* if object not yet allocated, don't validate size */
2290 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
2291 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2292 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2293 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2297 /* cl_glimpse_size will prefer locally cached writes if they extend
2301 rc = cl_glimpse_size(inode);
2306 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2307 struct lookup_intent *it, struct kstat *stat)
2309 struct inode *inode = de->d_inode;
2310 struct ll_sb_info *sbi = ll_i2sbi(inode);
2311 struct ll_inode_info *lli = ll_i2info(inode);
2314 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
2315 MDS_INODELOCK_LOOKUP);
2316 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
2321 stat->dev = inode->i_sb->s_dev;
2322 if (ll_need_32bit_api(sbi))
2323 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
2325 stat->ino = inode->i_ino;
2326 stat->mode = inode->i_mode;
2327 stat->nlink = inode->i_nlink;
2328 stat->uid = inode->i_uid;
2329 stat->gid = inode->i_gid;
2330 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2331 stat->atime = inode->i_atime;
2332 stat->mtime = inode->i_mtime;
2333 stat->ctime = inode->i_ctime;
2334 #ifdef HAVE_INODE_BLKSIZE
2335 stat->blksize = inode->i_blksize;
2337 stat->blksize = 1 << inode->i_blkbits;
2340 stat->size = i_size_read(inode);
2341 stat->blocks = inode->i_blocks;
2345 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2347 struct lookup_intent it = { .it_op = IT_GETATTR };
2349 return ll_getattr_it(mnt, de, &it, stat);
2352 #ifdef HAVE_LINUX_FIEMAP_H
2353 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2354 __u64 start, __u64 len)
2358 struct ll_user_fiemap *fiemap;
2359 unsigned int extent_count = fieinfo->fi_extents_max;
2361 num_bytes = sizeof(*fiemap) + (extent_count *
2362 sizeof(struct ll_fiemap_extent));
2363 OBD_ALLOC_LARGE(fiemap, num_bytes);
2368 fiemap->fm_flags = fieinfo->fi_flags;
2369 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2370 fiemap->fm_start = start;
2371 fiemap->fm_length = len;
2372 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2373 sizeof(struct ll_fiemap_extent));
2375 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2377 fieinfo->fi_flags = fiemap->fm_flags;
2378 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2379 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2380 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2382 OBD_FREE_LARGE(fiemap, num_bytes);
2389 int lustre_check_acl(struct inode *inode, int mask)
2391 #ifdef CONFIG_FS_POSIX_ACL
2392 struct ll_inode_info *lli = ll_i2info(inode);
2393 struct posix_acl *acl;
2397 cfs_spin_lock(&lli->lli_lock);
2398 acl = posix_acl_dup(lli->lli_posix_acl);
2399 cfs_spin_unlock(&lli->lli_lock);
2404 rc = posix_acl_permission(inode, acl, mask);
2405 posix_acl_release(acl);
2413 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2414 #ifndef HAVE_INODE_PERMISION_2ARGS
2415 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2417 int ll_inode_permission(struct inode *inode, int mask)
2423 /* as root inode are NOT getting validated in lookup operation,
2424 * need to do it before permission check. */
2426 if (inode == inode->i_sb->s_root->d_inode) {
2427 struct lookup_intent it = { .it_op = IT_LOOKUP };
2429 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2430 MDS_INODELOCK_LOOKUP);
2435 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2436 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2438 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2439 return lustre_check_remote_perm(inode, mask);
2441 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2442 rc = generic_permission(inode, mask, lustre_check_acl);
2447 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2449 int mode = inode->i_mode;
2452 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2453 inode->i_ino, inode->i_generation, inode, mask);
2455 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2456 return lustre_check_remote_perm(inode, mask);
2458 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2460 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2461 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2463 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2465 if (cfs_curproc_fsuid() == inode->i_uid) {
2468 if (((mode >> 3) & mask & S_IRWXO) != mask)
2470 rc = lustre_check_acl(inode, mask);
2474 goto check_capabilities;
2478 if (cfs_curproc_is_in_groups(inode->i_gid))
2481 if ((mode & mask & S_IRWXO) == mask)
2485 if (!(mask & MAY_EXEC) ||
2486 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2487 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2490 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2491 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2498 #ifdef HAVE_FILE_READV
2499 #define READ_METHOD readv
2500 #define READ_FUNCTION ll_file_readv
2501 #define WRITE_METHOD writev
2502 #define WRITE_FUNCTION ll_file_writev
2504 #define READ_METHOD aio_read
2505 #define READ_FUNCTION ll_file_aio_read
2506 #define WRITE_METHOD aio_write
2507 #define WRITE_FUNCTION ll_file_aio_write
2510 /* -o localflock - only provides locally consistent flock locks */
2511 struct file_operations ll_file_operations = {
2512 .read = ll_file_read,
2513 .READ_METHOD = READ_FUNCTION,
2514 .write = ll_file_write,
2515 .WRITE_METHOD = WRITE_FUNCTION,
2516 #ifdef HAVE_UNLOCKED_IOCTL
2517 .unlocked_ioctl = ll_file_ioctl,
2519 .ioctl = ll_file_ioctl,
2521 .open = ll_file_open,
2522 .release = ll_file_release,
2523 .mmap = ll_file_mmap,
2524 .llseek = ll_file_seek,
2525 #ifdef HAVE_KERNEL_SENDFILE
2526 .sendfile = ll_file_sendfile,
2528 #ifdef HAVE_KERNEL_SPLICE_READ
2529 .splice_read = ll_file_splice_read,
2535 struct file_operations ll_file_operations_flock = {
2536 .read = ll_file_read,
2537 .READ_METHOD = READ_FUNCTION,
2538 .write = ll_file_write,
2539 .WRITE_METHOD = WRITE_FUNCTION,
2540 #ifdef HAVE_UNLOCKED_IOCTL
2541 .unlocked_ioctl = ll_file_ioctl,
2543 .ioctl = ll_file_ioctl,
2545 .open = ll_file_open,
2546 .release = ll_file_release,
2547 .mmap = ll_file_mmap,
2548 .llseek = ll_file_seek,
2549 #ifdef HAVE_KERNEL_SENDFILE
2550 .sendfile = ll_file_sendfile,
2552 #ifdef HAVE_KERNEL_SPLICE_READ
2553 .splice_read = ll_file_splice_read,
2557 #ifdef HAVE_F_OP_FLOCK
2558 .flock = ll_file_flock,
2560 .lock = ll_file_flock
2563 /* These are for -o noflock - to return ENOSYS on flock calls */
2564 struct file_operations ll_file_operations_noflock = {
2565 .read = ll_file_read,
2566 .READ_METHOD = READ_FUNCTION,
2567 .write = ll_file_write,
2568 .WRITE_METHOD = WRITE_FUNCTION,
2569 #ifdef HAVE_UNLOCKED_IOCTL
2570 .unlocked_ioctl = ll_file_ioctl,
2572 .ioctl = ll_file_ioctl,
2574 .open = ll_file_open,
2575 .release = ll_file_release,
2576 .mmap = ll_file_mmap,
2577 .llseek = ll_file_seek,
2578 #ifdef HAVE_KERNEL_SENDFILE
2579 .sendfile = ll_file_sendfile,
2581 #ifdef HAVE_KERNEL_SPLICE_READ
2582 .splice_read = ll_file_splice_read,
2586 #ifdef HAVE_F_OP_FLOCK
2587 .flock = ll_file_noflock,
2589 .lock = ll_file_noflock
2592 struct inode_operations ll_file_inode_operations = {
2593 .setattr = ll_setattr,
2594 .truncate = ll_truncate,
2595 .getattr = ll_getattr,
2596 .permission = ll_inode_permission,
2597 .setxattr = ll_setxattr,
2598 .getxattr = ll_getxattr,
2599 .listxattr = ll_listxattr,
2600 .removexattr = ll_removexattr,
2601 #ifdef HAVE_LINUX_FIEMAP_H
2602 .fiemap = ll_fiemap,
2606 /* dynamic ioctl number support routins */
2607 static struct llioc_ctl_data {
2608 cfs_rw_semaphore_t ioc_sem;
2609 cfs_list_t ioc_head;
2611 __RWSEM_INITIALIZER(llioc.ioc_sem),
2612 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2617 cfs_list_t iocd_list;
2618 unsigned int iocd_size;
2619 llioc_callback_t iocd_cb;
2620 unsigned int iocd_count;
2621 unsigned int iocd_cmd[0];
2624 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2627 struct llioc_data *in_data = NULL;
2630 if (cb == NULL || cmd == NULL ||
2631 count > LLIOC_MAX_CMD || count < 0)
2634 size = sizeof(*in_data) + count * sizeof(unsigned int);
2635 OBD_ALLOC(in_data, size);
2636 if (in_data == NULL)
2639 memset(in_data, 0, sizeof(*in_data));
2640 in_data->iocd_size = size;
2641 in_data->iocd_cb = cb;
2642 in_data->iocd_count = count;
2643 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2645 cfs_down_write(&llioc.ioc_sem);
2646 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2647 cfs_up_write(&llioc.ioc_sem);
2652 void ll_iocontrol_unregister(void *magic)
2654 struct llioc_data *tmp;
2659 cfs_down_write(&llioc.ioc_sem);
2660 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2662 unsigned int size = tmp->iocd_size;
2664 cfs_list_del(&tmp->iocd_list);
2665 cfs_up_write(&llioc.ioc_sem);
2667 OBD_FREE(tmp, size);
2671 cfs_up_write(&llioc.ioc_sem);
2673 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2676 EXPORT_SYMBOL(ll_iocontrol_register);
2677 EXPORT_SYMBOL(ll_iocontrol_unregister);
2679 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2680 unsigned int cmd, unsigned long arg, int *rcp)
2682 enum llioc_iter ret = LLIOC_CONT;
2683 struct llioc_data *data;
2684 int rc = -EINVAL, i;
2686 cfs_down_read(&llioc.ioc_sem);
2687 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2688 for (i = 0; i < data->iocd_count; i++) {
2689 if (cmd != data->iocd_cmd[i])
2692 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2696 if (ret == LLIOC_STOP)
2699 cfs_up_read(&llioc.ioc_sem);