1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
79 ll_inode_to_ext_flags(inode->i_flags);
80 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
82 op_data->op_handle = *fh;
83 op_data->op_capa1 = ll_mdscapa_get(inode);
87 * Closes the IO epoch and packs all the attributes into @op_data for
90 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
91 struct obd_client_handle *och)
95 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
96 ATTR_MTIME_SET | ATTR_CTIME_SET;
98 if (!(och->och_flags & FMODE_WRITE))
101 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
102 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
104 ll_ioepoch_close(inode, op_data, &och, 0);
107 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
108 ll_prep_md_op_data(op_data, inode, NULL, NULL,
109 0, 0, LUSTRE_OPC_ANY, NULL);
113 static int ll_close_inode_openhandle(struct obd_export *md_exp,
115 struct obd_client_handle *och)
117 struct obd_export *exp = ll_i2mdexp(inode);
118 struct md_op_data *op_data;
119 struct ptlrpc_request *req = NULL;
120 struct obd_device *obd = class_exp2obd(exp);
127 * XXX: in case of LMV, is this correct to access
130 CERROR("Invalid MDC connection handle "LPX64"\n",
131 ll_i2mdexp(inode)->exp_handle.h_cookie);
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_som_update(inode, op_data);
149 CERROR("inode %lu mdc Size-on-MDS update failed: "
150 "rc = %d\n", inode->i_ino, rc);
154 CERROR("inode %lu mdc close failed: rc = %d\n",
157 ll_finish_md_op_data(op_data);
160 rc = ll_objects_destroy(req, inode);
162 CERROR("inode %lu ll_objects destroy: rc = %d\n",
169 if (exp_connect_som(exp) && !epoch_close &&
170 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
171 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
173 md_clear_open_replay_data(md_exp, och);
174 /* Free @och if it is not waiting for DONE_WRITING. */
175 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
178 if (req) /* This is close request */
179 ptlrpc_req_finished(req);
183 int ll_md_real_close(struct inode *inode, int flags)
185 struct ll_inode_info *lli = ll_i2info(inode);
186 struct obd_client_handle **och_p;
187 struct obd_client_handle *och;
192 if (flags & FMODE_WRITE) {
193 och_p = &lli->lli_mds_write_och;
194 och_usecount = &lli->lli_open_fd_write_count;
195 } else if (flags & FMODE_EXEC) {
196 och_p = &lli->lli_mds_exec_och;
197 och_usecount = &lli->lli_open_fd_exec_count;
199 LASSERT(flags & FMODE_READ);
200 och_p = &lli->lli_mds_read_och;
201 och_usecount = &lli->lli_open_fd_read_count;
204 cfs_down(&lli->lli_och_sem);
205 if (*och_usecount) { /* There are still users of this handle, so
207 cfs_up(&lli->lli_och_sem);
212 cfs_up(&lli->lli_och_sem);
214 if (och) { /* There might be a race and somebody have freed this och
216 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
223 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
226 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
227 struct ll_inode_info *lli = ll_i2info(inode);
231 /* clear group lock, if present */
232 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
233 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
235 /* Let's see if we have good enough OPEN lock on the file and if
236 we can skip talking to MDS */
237 if (file->f_dentry->d_inode) { /* Can this ever be false? */
239 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
240 struct lustre_handle lockh;
241 struct inode *inode = file->f_dentry->d_inode;
242 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
244 cfs_down(&lli->lli_och_sem);
245 if (fd->fd_omode & FMODE_WRITE) {
247 LASSERT(lli->lli_open_fd_write_count);
248 lli->lli_open_fd_write_count--;
249 } else if (fd->fd_omode & FMODE_EXEC) {
251 LASSERT(lli->lli_open_fd_exec_count);
252 lli->lli_open_fd_exec_count--;
255 LASSERT(lli->lli_open_fd_read_count);
256 lli->lli_open_fd_read_count--;
258 cfs_up(&lli->lli_och_sem);
260 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
261 LDLM_IBITS, &policy, lockmode,
263 rc = ll_md_real_close(file->f_dentry->d_inode,
267 CERROR("Releasing a file %p with negative dentry %p. Name %s",
268 file, file->f_dentry, file->f_dentry->d_name.name);
271 LUSTRE_FPRIVATE(file) = NULL;
272 ll_file_data_put(fd);
273 ll_capa_close(inode);
278 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
280 /* While this returns an error code, fput() the caller does not, so we need
281 * to make every effort to clean up all of our state here. Also, applications
282 * rarely check close errors and even if an error is returned they will not
283 * re-try the close call.
285 int ll_file_release(struct inode *inode, struct file *file)
287 struct ll_file_data *fd;
288 struct ll_sb_info *sbi = ll_i2sbi(inode);
289 struct ll_inode_info *lli = ll_i2info(inode);
290 struct lov_stripe_md *lsm = lli->lli_smd;
294 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
295 inode->i_generation, inode);
297 #ifdef CONFIG_FS_POSIX_ACL
298 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
299 inode == inode->i_sb->s_root->d_inode) {
300 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
303 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
304 fd->fd_flags &= ~LL_FILE_RMTACL;
305 rct_del(&sbi->ll_rct, cfs_curproc_pid());
306 et_search_free(&sbi->ll_et, cfs_curproc_pid());
311 if (inode->i_sb->s_root != file->f_dentry)
312 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
313 fd = LUSTRE_FPRIVATE(file);
316 /* The last ref on @file, maybe not the the owner pid of statahead.
317 * Different processes can open the same dir, "ll_opendir_key" means:
318 * it is me that should stop the statahead thread. */
319 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
320 ll_stop_statahead(inode, lli->lli_opendir_key);
322 if (inode->i_sb->s_root == file->f_dentry) {
323 LUSTRE_FPRIVATE(file) = NULL;
324 ll_file_data_put(fd);
329 lov_test_and_clear_async_rc(lsm);
330 lli->lli_async_rc = 0;
332 rc = ll_md_close(sbi->ll_md_exp, inode, file);
334 if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val))
335 libcfs_debug_dumplog();
340 static int ll_intent_file_open(struct file *file, void *lmm,
341 int lmmsize, struct lookup_intent *itp)
343 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
344 struct dentry *parent = file->f_dentry->d_parent;
345 const char *name = file->f_dentry->d_name.name;
346 const int len = file->f_dentry->d_name.len;
347 struct md_op_data *op_data;
348 struct ptlrpc_request *req;
349 __u32 opc = LUSTRE_OPC_ANY;
356 /* Usually we come here only for NFSD, and we want open lock.
357 But we can also get here with pre 2.6.15 patchless kernels, and in
358 that case that lock is also ok */
359 /* We can also get here if there was cached open handle in revalidate_it
360 * but it disappeared while we were getting from there to ll_file_open.
361 * But this means this file was closed and immediatelly opened which
362 * makes a good candidate for using OPEN lock */
363 /* If lmmsize & lmm are not 0, we are just setting stripe info
364 * parameters. No need for the open lock */
365 if (lmm == NULL && lmmsize == 0) {
366 itp->it_flags |= MDS_OPEN_LOCK;
367 if (itp->it_flags & FMODE_WRITE)
368 opc = LUSTRE_OPC_CREATE;
371 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
372 file->f_dentry->d_inode, name, len,
375 RETURN(PTR_ERR(op_data));
377 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
378 0 /*unused */, &req, ll_md_blocking_ast, 0);
379 ll_finish_md_op_data(op_data);
381 /* reason for keep own exit path - don`t flood log
382 * with messages with -ESTALE errors.
384 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
385 it_open_error(DISP_OPEN_OPEN, itp))
387 ll_release_openhandle(file->f_dentry, itp);
391 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
392 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
393 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
397 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
398 if (!rc && itp->d.lustre.it_lock_mode)
399 md_set_lock_data(sbi->ll_md_exp,
400 &itp->d.lustre.it_lock_handle,
401 file->f_dentry->d_inode, NULL);
404 ptlrpc_req_finished(itp->d.lustre.it_data);
405 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
406 ll_intent_drop_lock(itp);
412 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
413 * not believe attributes if a few ioepoch holders exist. Attributes for
414 * previous ioepoch if new one is opened are also skipped by MDS.
416 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
418 if (ioepoch && lli->lli_ioepoch != ioepoch) {
419 lli->lli_ioepoch = ioepoch;
420 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
421 ioepoch, PFID(&lli->lli_fid));
425 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
426 struct lookup_intent *it, struct obd_client_handle *och)
428 struct ptlrpc_request *req = it->d.lustre.it_data;
429 struct mdt_body *body;
433 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
434 LASSERT(body != NULL); /* reply already checked out */
436 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
437 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
438 och->och_fid = lli->lli_fid;
439 och->och_flags = it->it_flags;
440 ll_ioepoch_open(lli, body->ioepoch);
442 return md_set_open_replay_data(md_exp, och, req);
445 int ll_local_open(struct file *file, struct lookup_intent *it,
446 struct ll_file_data *fd, struct obd_client_handle *och)
448 struct inode *inode = file->f_dentry->d_inode;
449 struct ll_inode_info *lli = ll_i2info(inode);
452 LASSERT(!LUSTRE_FPRIVATE(file));
457 struct ptlrpc_request *req = it->d.lustre.it_data;
458 struct mdt_body *body;
461 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
465 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
466 if ((it->it_flags & FMODE_WRITE) &&
467 (body->valid & OBD_MD_FLSIZE))
468 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
469 lli->lli_ioepoch, PFID(&lli->lli_fid));
472 LUSTRE_FPRIVATE(file) = fd;
473 ll_readahead_init(inode, &fd->fd_ras);
474 fd->fd_omode = it->it_flags;
478 /* Open a file, and (for the very first open) create objects on the OSTs at
479 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
480 * creation or open until ll_lov_setstripe() ioctl is called. We grab
481 * lli_open_sem to ensure no other process will create objects, send the
482 * stripe MD to the MDS, or try to destroy the objects if that fails.
484 * If we already have the stripe MD locally then we don't request it in
485 * md_open(), by passing a lmm_size = 0.
487 * It is up to the application to ensure no other processes open this file
488 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
489 * used. We might be able to avoid races of that sort by getting lli_open_sem
490 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
491 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
493 int ll_file_open(struct inode *inode, struct file *file)
495 struct ll_inode_info *lli = ll_i2info(inode);
496 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
497 .it_flags = file->f_flags };
498 struct lov_stripe_md *lsm;
499 struct ptlrpc_request *req = NULL;
500 struct obd_client_handle **och_p;
502 struct ll_file_data *fd;
503 int rc = 0, opendir_set = 0;
506 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
507 inode->i_generation, inode, file->f_flags);
509 it = file->private_data; /* XXX: compat macro */
510 file->private_data = NULL; /* prevent ll_local_open assertion */
512 fd = ll_file_data_get();
517 if (S_ISDIR(inode->i_mode)) {
518 cfs_spin_lock(&lli->lli_sa_lock);
519 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
520 LASSERT(lli->lli_sai == NULL);
521 lli->lli_opendir_key = fd;
522 lli->lli_opendir_pid = cfs_curproc_pid();
525 cfs_spin_unlock(&lli->lli_sa_lock);
528 if (inode->i_sb->s_root == file->f_dentry) {
529 LUSTRE_FPRIVATE(file) = fd;
533 if (!it || !it->d.lustre.it_disposition) {
534 /* Convert f_flags into access mode. We cannot use file->f_mode,
535 * because everything but O_ACCMODE mask was stripped from
537 if ((oit.it_flags + 1) & O_ACCMODE)
539 if (file->f_flags & O_TRUNC)
540 oit.it_flags |= FMODE_WRITE;
542 /* kernel only call f_op->open in dentry_open. filp_open calls
543 * dentry_open after call to open_namei that checks permissions.
544 * Only nfsd_open call dentry_open directly without checking
545 * permissions and because of that this code below is safe. */
546 if (oit.it_flags & (FMODE_WRITE | FMODE_READ))
547 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
549 /* We do not want O_EXCL here, presumably we opened the file
550 * already? XXX - NFS implications? */
551 oit.it_flags &= ~O_EXCL;
553 /* bug20584, if "it_flags" contains O_CREAT, the file will be
554 * created if necessary, then "IT_CREAT" should be set to keep
555 * consistent with it */
556 if (oit.it_flags & O_CREAT)
557 oit.it_op |= IT_CREAT;
563 /* Let's see if we have file open on MDS already. */
564 if (it->it_flags & FMODE_WRITE) {
565 och_p = &lli->lli_mds_write_och;
566 och_usecount = &lli->lli_open_fd_write_count;
567 } else if (it->it_flags & FMODE_EXEC) {
568 och_p = &lli->lli_mds_exec_och;
569 och_usecount = &lli->lli_open_fd_exec_count;
571 och_p = &lli->lli_mds_read_och;
572 och_usecount = &lli->lli_open_fd_read_count;
575 cfs_down(&lli->lli_och_sem);
576 if (*och_p) { /* Open handle is present */
577 if (it_disposition(it, DISP_OPEN_OPEN)) {
578 /* Well, there's extra open request that we do not need,
579 let's close it somehow. This will decref request. */
580 rc = it_open_error(DISP_OPEN_OPEN, it);
582 cfs_up(&lli->lli_och_sem);
583 ll_file_data_put(fd);
584 GOTO(out_openerr, rc);
586 ll_release_openhandle(file->f_dentry, it);
587 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
592 rc = ll_local_open(file, it, fd, NULL);
595 cfs_up(&lli->lli_och_sem);
596 ll_file_data_put(fd);
597 GOTO(out_openerr, rc);
600 LASSERT(*och_usecount == 0);
601 if (!it->d.lustre.it_disposition) {
602 /* We cannot just request lock handle now, new ELC code
603 means that one of other OPEN locks for this file
604 could be cancelled, and since blocking ast handler
605 would attempt to grab och_sem as well, that would
606 result in a deadlock */
607 cfs_up(&lli->lli_och_sem);
608 it->it_create_mode |= M_CHECK_STALE;
609 rc = ll_intent_file_open(file, NULL, 0, it);
610 it->it_create_mode &= ~M_CHECK_STALE;
612 ll_file_data_put(fd);
613 GOTO(out_openerr, rc);
616 /* Got some error? Release the request */
617 if (it->d.lustre.it_status < 0) {
618 req = it->d.lustre.it_data;
619 ptlrpc_req_finished(req);
623 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
625 ll_file_data_put(fd);
626 GOTO(out_och_free, rc = -ENOMEM);
629 req = it->d.lustre.it_data;
631 /* md_intent_lock() didn't get a request ref if there was an
632 * open error, so don't do cleanup on the request here
634 /* XXX (green): Should not we bail out on any error here, not
635 * just open error? */
636 rc = it_open_error(DISP_OPEN_OPEN, it);
638 ll_file_data_put(fd);
639 GOTO(out_och_free, rc);
642 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
643 rc = ll_local_open(file, it, fd, *och_p);
645 ll_file_data_put(fd);
646 GOTO(out_och_free, rc);
649 cfs_up(&lli->lli_och_sem);
651 /* Must do this outside lli_och_sem lock to prevent deadlock where
652 different kind of OPEN lock for this same inode gets cancelled
653 by ldlm_cancel_lru */
654 if (!S_ISREG(inode->i_mode))
661 if (file->f_flags & O_LOV_DELAY_CREATE ||
662 !(file->f_mode & FMODE_WRITE)) {
663 CDEBUG(D_INODE, "object creation was delayed\n");
667 file->f_flags &= ~O_LOV_DELAY_CREATE;
670 ptlrpc_req_finished(req);
672 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
676 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
677 *och_p = NULL; /* OBD_FREE writes some magic there */
680 cfs_up(&lli->lli_och_sem);
682 if (opendir_set != 0)
683 ll_stop_statahead(inode, lli->lli_opendir_key);
689 /* Fills the obdo with the attributes for the lsm */
690 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
691 struct obd_capa *capa, struct obdo *obdo,
692 __u64 ioepoch, int sync)
694 struct ptlrpc_request_set *set;
695 struct obd_info oinfo = { { { 0 } } };
700 LASSERT(lsm != NULL);
704 oinfo.oi_oa->o_id = lsm->lsm_object_id;
705 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
706 oinfo.oi_oa->o_mode = S_IFREG;
707 oinfo.oi_oa->o_ioepoch = ioepoch;
708 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
709 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
710 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
711 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
712 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
713 oinfo.oi_capa = capa;
715 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
716 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
719 set = ptlrpc_prep_set();
721 CERROR("can't allocate ptlrpc set\n");
724 rc = obd_getattr_async(exp, &oinfo, set);
726 rc = ptlrpc_set_wait(set);
727 ptlrpc_set_destroy(set);
730 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
731 OBD_MD_FLATIME | OBD_MD_FLMTIME |
732 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
737 * Performs the getattr on the inode and updates its fields.
738 * If @sync != 0, perform the getattr under the server-side lock.
740 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
741 __u64 ioepoch, int sync)
743 struct ll_inode_info *lli = ll_i2info(inode);
744 struct obd_capa *capa = ll_mdscapa_get(inode);
748 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
749 capa, obdo, ioepoch, sync);
752 obdo_refresh_inode(inode, obdo, obdo->o_valid);
754 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
755 lli->lli_smd->lsm_object_id, i_size_read(inode),
756 (unsigned long long)inode->i_blocks,
757 (unsigned long)ll_inode_blksize(inode));
762 int ll_merge_lvb(struct inode *inode)
764 struct ll_inode_info *lli = ll_i2info(inode);
765 struct ll_sb_info *sbi = ll_i2sbi(inode);
771 ll_inode_size_lock(inode, 1);
772 inode_init_lvb(inode, &lvb);
774 /* merge timestamps the most resently obtained from mds with
775 timestamps obtained from osts */
776 lvb.lvb_atime = lli->lli_lvb.lvb_atime;
777 lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
778 lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
779 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
780 cl_isize_write_nolock(inode, lvb.lvb_size);
782 CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
783 PFID(&lli->lli_fid), lvb.lvb_size);
784 inode->i_blocks = lvb.lvb_blocks;
786 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
787 LTIME_S(inode->i_atime) = lvb.lvb_atime;
788 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
789 ll_inode_size_unlock(inode, 1);
794 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
797 struct obdo obdo = { 0 };
800 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
802 st->st_size = obdo.o_size;
803 st->st_blocks = obdo.o_blocks;
804 st->st_mtime = obdo.o_mtime;
805 st->st_atime = obdo.o_atime;
806 st->st_ctime = obdo.o_ctime;
811 void ll_io_init(struct cl_io *io, const struct file *file, int write)
813 struct inode *inode = file->f_dentry->d_inode;
815 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
817 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
818 io->ci_obj = ll_i2info(inode)->lli_clob;
819 io->ci_lockreq = CILR_MAYBE;
820 if (ll_file_nolock(file)) {
821 io->ci_lockreq = CILR_NEVER;
822 io->ci_no_srvlock = 1;
823 } else if (file->f_flags & O_APPEND) {
824 io->ci_lockreq = CILR_MANDATORY;
828 static ssize_t ll_file_io_generic(const struct lu_env *env,
829 struct vvp_io_args *args, struct file *file,
830 enum cl_io_type iot, loff_t *ppos, size_t count)
832 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
837 io = ccc_env_thread_io(env);
838 ll_io_init(io, file, iot == CIT_WRITE);
840 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
841 struct vvp_io *vio = vvp_env_io(env);
842 struct ccc_io *cio = ccc_env_io(env);
843 int write_sem_locked = 0;
845 cio->cui_fd = LUSTRE_FPRIVATE(file);
846 vio->cui_io_subtype = args->via_io_subtype;
848 switch (vio->cui_io_subtype) {
850 cio->cui_iov = args->u.normal.via_iov;
851 cio->cui_nrsegs = args->u.normal.via_nrsegs;
852 cio->cui_tot_nrsegs = cio->cui_nrsegs;
853 #ifndef HAVE_FILE_WRITEV
854 cio->cui_iocb = args->u.normal.via_iocb;
856 if ((iot == CIT_WRITE) &&
857 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
858 if(cfs_down_interruptible(&lli->lli_write_sem))
859 GOTO(out, result = -ERESTARTSYS);
860 write_sem_locked = 1;
861 } else if (iot == CIT_READ) {
862 cfs_down_read(&lli->lli_trunc_sem);
866 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
867 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
870 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
871 vio->u.splice.cui_flags = args->u.splice.via_flags;
874 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
877 result = cl_io_loop(env, io);
878 if (write_sem_locked)
879 cfs_up(&lli->lli_write_sem);
880 else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
881 cfs_up_read(&lli->lli_trunc_sem);
883 /* cl_io_rw_init() handled IO */
884 result = io->ci_result;
887 if (io->ci_nob > 0) {
889 *ppos = io->u.ci_wr.wr.crw_pos;
895 if (iot == CIT_READ) {
897 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
898 LPROC_LL_READ_BYTES, result);
899 } else if (iot == CIT_WRITE) {
901 ll_stats_ops_tally(ll_i2sbi(file->f_dentry->d_inode),
902 LPROC_LL_WRITE_BYTES, result);
903 lli->lli_write_rc = 0;
905 lli->lli_write_rc = result;
914 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
916 static int ll_file_get_iov_count(const struct iovec *iov,
917 unsigned long *nr_segs, size_t *count)
922 for (seg = 0; seg < *nr_segs; seg++) {
923 const struct iovec *iv = &iov[seg];
926 * If any segment has a negative length, or the cumulative
927 * length ever wraps negative then return -EINVAL.
930 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
932 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
937 cnt -= iv->iov_len; /* This segment is no good */
944 #ifdef HAVE_FILE_READV
945 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
946 unsigned long nr_segs, loff_t *ppos)
949 struct vvp_io_args *args;
955 result = ll_file_get_iov_count(iov, &nr_segs, &count);
959 env = cl_env_get(&refcheck);
961 RETURN(PTR_ERR(env));
963 args = vvp_env_args(env, IO_NORMAL);
964 args->u.normal.via_iov = (struct iovec *)iov;
965 args->u.normal.via_nrsegs = nr_segs;
967 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
968 cl_env_put(env, &refcheck);
972 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
976 struct iovec *local_iov;
981 env = cl_env_get(&refcheck);
983 RETURN(PTR_ERR(env));
985 local_iov = &vvp_env_info(env)->vti_local_iov;
986 local_iov->iov_base = (void __user *)buf;
987 local_iov->iov_len = count;
988 result = ll_file_readv(file, local_iov, 1, ppos);
989 cl_env_put(env, &refcheck);
994 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
995 unsigned long nr_segs, loff_t pos)
998 struct vvp_io_args *args;
1004 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1008 env = cl_env_get(&refcheck);
1010 RETURN(PTR_ERR(env));
1012 args = vvp_env_args(env, IO_NORMAL);
1013 args->u.normal.via_iov = (struct iovec *)iov;
1014 args->u.normal.via_nrsegs = nr_segs;
1015 args->u.normal.via_iocb = iocb;
1017 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1018 &iocb->ki_pos, count);
1019 cl_env_put(env, &refcheck);
1023 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1027 struct iovec *local_iov;
1028 struct kiocb *kiocb;
1033 env = cl_env_get(&refcheck);
1035 RETURN(PTR_ERR(env));
1037 local_iov = &vvp_env_info(env)->vti_local_iov;
1038 kiocb = &vvp_env_info(env)->vti_kiocb;
1039 local_iov->iov_base = (void __user *)buf;
1040 local_iov->iov_len = count;
1041 init_sync_kiocb(kiocb, file);
1042 kiocb->ki_pos = *ppos;
1043 kiocb->ki_left = count;
1045 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1046 *ppos = kiocb->ki_pos;
1048 cl_env_put(env, &refcheck);
1054 * Write to a file (through the page cache).
1056 #ifdef HAVE_FILE_WRITEV
1057 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1058 unsigned long nr_segs, loff_t *ppos)
1061 struct vvp_io_args *args;
1067 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1071 env = cl_env_get(&refcheck);
1073 RETURN(PTR_ERR(env));
1075 args = vvp_env_args(env, IO_NORMAL);
1076 args->u.normal.via_iov = (struct iovec *)iov;
1077 args->u.normal.via_nrsegs = nr_segs;
1079 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1080 cl_env_put(env, &refcheck);
1084 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1088 struct iovec *local_iov;
1093 env = cl_env_get(&refcheck);
1095 RETURN(PTR_ERR(env));
1097 local_iov = &vvp_env_info(env)->vti_local_iov;
1098 local_iov->iov_base = (void __user *)buf;
1099 local_iov->iov_len = count;
1101 result = ll_file_writev(file, local_iov, 1, ppos);
1102 cl_env_put(env, &refcheck);
1106 #else /* AIO stuff */
1107 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1108 unsigned long nr_segs, loff_t pos)
1111 struct vvp_io_args *args;
1117 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1121 env = cl_env_get(&refcheck);
1123 RETURN(PTR_ERR(env));
1125 args = vvp_env_args(env, IO_NORMAL);
1126 args->u.normal.via_iov = (struct iovec *)iov;
1127 args->u.normal.via_nrsegs = nr_segs;
1128 args->u.normal.via_iocb = iocb;
1130 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1131 &iocb->ki_pos, count);
1132 cl_env_put(env, &refcheck);
1136 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1140 struct iovec *local_iov;
1141 struct kiocb *kiocb;
1146 env = cl_env_get(&refcheck);
1148 RETURN(PTR_ERR(env));
1150 local_iov = &vvp_env_info(env)->vti_local_iov;
1151 kiocb = &vvp_env_info(env)->vti_kiocb;
1152 local_iov->iov_base = (void __user *)buf;
1153 local_iov->iov_len = count;
1154 init_sync_kiocb(kiocb, file);
1155 kiocb->ki_pos = *ppos;
1156 kiocb->ki_left = count;
1158 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1159 *ppos = kiocb->ki_pos;
1161 cl_env_put(env, &refcheck);
1167 #ifdef HAVE_KERNEL_SENDFILE
1169 * Send file content (through pagecache) somewhere with helper
1171 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1172 read_actor_t actor, void *target)
1175 struct vvp_io_args *args;
1180 env = cl_env_get(&refcheck);
1182 RETURN(PTR_ERR(env));
1184 args = vvp_env_args(env, IO_SENDFILE);
1185 args->u.sendfile.via_target = target;
1186 args->u.sendfile.via_actor = actor;
1188 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1189 cl_env_put(env, &refcheck);
1194 #ifdef HAVE_KERNEL_SPLICE_READ
1196 * Send file content (through pagecache) somewhere with helper
1198 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1199 struct pipe_inode_info *pipe, size_t count,
1203 struct vvp_io_args *args;
1208 env = cl_env_get(&refcheck);
1210 RETURN(PTR_ERR(env));
1212 args = vvp_env_args(env, IO_SPLICE);
1213 args->u.splice.via_pipe = pipe;
1214 args->u.splice.via_flags = flags;
1216 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1217 cl_env_put(env, &refcheck);
1222 static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
1225 struct obd_export *exp = ll_i2dtexp(inode);
1226 struct obd_trans_info oti = { 0 };
1227 struct obdo *oa = NULL;
1230 struct lov_stripe_md *lsm, *lsm2;
1237 ll_inode_size_lock(inode, 0);
1238 lsm = ll_i2info(inode)->lli_smd;
1240 GOTO(out, rc = -ENOENT);
1241 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1242 (lsm->lsm_stripe_count));
1244 OBD_ALLOC_LARGE(lsm2, lsm_size);
1246 GOTO(out, rc = -ENOMEM);
1250 oa->o_nlink = ost_idx;
1251 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1252 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1253 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
1254 OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1255 memcpy(lsm2, lsm, lsm_size);
1256 rc = obd_create(exp, oa, &lsm2, &oti);
1258 OBD_FREE_LARGE(lsm2, lsm_size);
1261 ll_inode_size_unlock(inode, 0);
1266 static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
1268 struct ll_recreate_obj ucreat;
1271 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1274 if (cfs_copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
1275 sizeof(struct ll_recreate_obj)))
1278 RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
1279 ucreat.lrc_ost_idx));
1282 static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
1289 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1292 if (cfs_copy_from_user(&fid, (struct lu_fid *)arg,
1293 sizeof(struct lu_fid)))
1296 id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
1297 ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
1298 RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
1301 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1302 int flags, struct lov_user_md *lum, int lum_size)
1304 struct lov_stripe_md *lsm;
1305 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1309 ll_inode_size_lock(inode, 0);
1310 lsm = ll_i2info(inode)->lli_smd;
1312 ll_inode_size_unlock(inode, 0);
1313 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1318 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1321 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1322 GOTO(out_req_free, rc = -ENOENT);
1323 rc = oit.d.lustre.it_status;
1325 GOTO(out_req_free, rc);
1327 ll_release_openhandle(file->f_dentry, &oit);
1330 ll_inode_size_unlock(inode, 0);
1331 ll_intent_release(&oit);
1334 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1338 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1339 struct lov_mds_md **lmmp, int *lmm_size,
1340 struct ptlrpc_request **request)
1342 struct ll_sb_info *sbi = ll_i2sbi(inode);
1343 struct mdt_body *body;
1344 struct lov_mds_md *lmm = NULL;
1345 struct ptlrpc_request *req = NULL;
1346 struct md_op_data *op_data;
1349 rc = ll_get_max_mdsize(sbi, &lmmsize);
1353 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1354 strlen(filename), lmmsize,
1355 LUSTRE_OPC_ANY, NULL);
1356 if (op_data == NULL)
1359 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1360 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1361 ll_finish_md_op_data(op_data);
1363 CDEBUG(D_INFO, "md_getattr_name failed "
1364 "on %s: rc %d\n", filename, rc);
1368 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1369 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1371 lmmsize = body->eadatasize;
1373 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1375 GOTO(out, rc = -ENODATA);
1378 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1379 LASSERT(lmm != NULL);
1381 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1382 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1383 GOTO(out, rc = -EPROTO);
1387 * This is coming from the MDS, so is probably in
1388 * little endian. We convert it to host endian before
1389 * passing it to userspace.
1391 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1392 /* if function called for directory - we should
1393 * avoid swab not existent lsm objects */
1394 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1395 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1396 if (S_ISREG(body->mode))
1397 lustre_swab_lov_user_md_objects(
1398 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1399 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1400 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1401 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1402 if (S_ISREG(body->mode))
1403 lustre_swab_lov_user_md_objects(
1404 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1405 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1411 *lmm_size = lmmsize;
1416 static int ll_lov_setea(struct inode *inode, struct file *file,
1419 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1420 struct lov_user_md *lump;
1421 int lum_size = sizeof(struct lov_user_md) +
1422 sizeof(struct lov_user_ost_data);
1426 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1429 OBD_ALLOC_LARGE(lump, lum_size);
1433 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1434 OBD_FREE_LARGE(lump, lum_size);
1438 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1440 OBD_FREE_LARGE(lump, lum_size);
1444 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1447 struct lov_user_md_v3 lumv3;
1448 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1449 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1450 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1453 int flags = FMODE_WRITE;
1456 /* first try with v1 which is smaller than v3 */
1457 lum_size = sizeof(struct lov_user_md_v1);
1458 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1461 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1462 lum_size = sizeof(struct lov_user_md_v3);
1463 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1467 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1469 put_user(0, &lumv1p->lmm_stripe_count);
1470 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1471 0, ll_i2info(inode)->lli_smd,
1477 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1479 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1484 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
1489 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1491 struct ll_inode_info *lli = ll_i2info(inode);
1492 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1493 struct ccc_grouplock grouplock;
1497 if (ll_file_nolock(file))
1498 RETURN(-EOPNOTSUPP);
1500 cfs_spin_lock(&lli->lli_lock);
1501 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1502 CWARN("group lock already existed with gid %lu\n",
1503 fd->fd_grouplock.cg_gid);
1504 cfs_spin_unlock(&lli->lli_lock);
1507 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1508 cfs_spin_unlock(&lli->lli_lock);
1510 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1511 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1515 cfs_spin_lock(&lli->lli_lock);
1516 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1517 cfs_spin_unlock(&lli->lli_lock);
1518 CERROR("another thread just won the race\n");
1519 cl_put_grouplock(&grouplock);
1523 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1524 fd->fd_grouplock = grouplock;
1525 cfs_spin_unlock(&lli->lli_lock);
1527 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1531 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1533 struct ll_inode_info *lli = ll_i2info(inode);
1534 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1535 struct ccc_grouplock grouplock;
1538 cfs_spin_lock(&lli->lli_lock);
1539 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1540 cfs_spin_unlock(&lli->lli_lock);
1541 CWARN("no group lock held\n");
1544 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1546 if (fd->fd_grouplock.cg_gid != arg) {
1547 CWARN("group lock %lu doesn't match current id %lu\n",
1548 arg, fd->fd_grouplock.cg_gid);
1549 cfs_spin_unlock(&lli->lli_lock);
1553 grouplock = fd->fd_grouplock;
1554 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1555 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1556 cfs_spin_unlock(&lli->lli_lock);
1558 cl_put_grouplock(&grouplock);
1559 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1564 * Close inode open handle
1566 * \param dentry [in] dentry which contains the inode
1567 * \param it [in,out] intent which contains open info and result
1570 * \retval <0 failure
1572 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1574 struct inode *inode = dentry->d_inode;
1575 struct obd_client_handle *och;
1581 /* Root ? Do nothing. */
1582 if (dentry->d_inode->i_sb->s_root == dentry)
1585 /* No open handle to close? Move away */
1586 if (!it_disposition(it, DISP_OPEN_OPEN))
1589 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1591 OBD_ALLOC(och, sizeof(*och));
1593 GOTO(out, rc = -ENOMEM);
1595 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1596 ll_i2info(inode), it, och);
1598 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1601 /* this one is in place of ll_file_open */
1602 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1603 ptlrpc_req_finished(it->d.lustre.it_data);
1604 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1609 * Get size for inode for which FIEMAP mapping is requested.
1610 * Make the FIEMAP get_info call and returns the result.
1612 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1615 struct obd_export *exp = ll_i2dtexp(inode);
1616 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1617 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1618 int vallen = num_bytes;
1622 /* Checks for fiemap flags */
1623 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1624 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1628 /* Check for FIEMAP_FLAG_SYNC */
1629 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1630 rc = filemap_fdatawrite(inode->i_mapping);
1635 /* If the stripe_count > 1 and the application does not understand
1636 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1638 if (lsm->lsm_stripe_count > 1 &&
1639 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1642 fm_key.oa.o_id = lsm->lsm_object_id;
1643 fm_key.oa.o_seq = lsm->lsm_object_seq;
1644 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1646 obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
1648 /* If filesize is 0, then there would be no objects for mapping */
1649 if (fm_key.oa.o_size == 0) {
1650 fiemap->fm_mapped_extents = 0;
1654 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1656 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1658 CERROR("obd_get_info failed: rc = %d\n", rc);
1663 int ll_fid2path(struct obd_export *exp, void *arg)
1665 struct getinfo_fid2path *gfout, *gfin;
1669 /* Need to get the buflen */
1670 OBD_ALLOC_PTR(gfin);
1673 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1678 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1679 OBD_ALLOC(gfout, outsize);
1680 if (gfout == NULL) {
1684 memcpy(gfout, gfin, sizeof(*gfout));
1687 /* Call mdc_iocontrol */
1688 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1691 if (cfs_copy_to_user(arg, gfout, outsize))
1695 OBD_FREE(gfout, outsize);
1699 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1701 struct ll_user_fiemap *fiemap_s;
1702 size_t num_bytes, ret_bytes;
1703 unsigned int extent_count;
1706 /* Get the extent count so we can calculate the size of
1707 * required fiemap buffer */
1708 if (get_user(extent_count,
1709 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1711 num_bytes = sizeof(*fiemap_s) + (extent_count *
1712 sizeof(struct ll_fiemap_extent));
1714 OBD_ALLOC_LARGE(fiemap_s, num_bytes);
1715 if (fiemap_s == NULL)
1718 /* get the fiemap value */
1719 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1721 GOTO(error, rc = -EFAULT);
1723 /* If fm_extent_count is non-zero, read the first extent since
1724 * it is used to calculate end_offset and device from previous
1727 if (copy_from_user(&fiemap_s->fm_extents[0],
1728 (char __user *)arg + sizeof(*fiemap_s),
1729 sizeof(struct ll_fiemap_extent)))
1730 GOTO(error, rc = -EFAULT);
1733 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1737 ret_bytes = sizeof(struct ll_user_fiemap);
1739 if (extent_count != 0)
1740 ret_bytes += (fiemap_s->fm_mapped_extents *
1741 sizeof(struct ll_fiemap_extent));
1743 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1747 OBD_FREE_LARGE(fiemap_s, num_bytes);
1751 #ifdef HAVE_UNLOCKED_IOCTL
1752 long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1754 struct inode *inode = file->f_dentry->d_inode;
1756 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1760 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1764 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1765 inode->i_generation, inode, cmd);
1766 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1768 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1769 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1773 case LL_IOC_GETFLAGS:
1774 /* Get the current value of the file flags */
1775 return put_user(fd->fd_flags, (int *)arg);
1776 case LL_IOC_SETFLAGS:
1777 case LL_IOC_CLRFLAGS:
1778 /* Set or clear specific file flags */
1779 /* XXX This probably needs checks to ensure the flags are
1780 * not abused, and to handle any flag side effects.
1782 if (get_user(flags, (int *) arg))
1785 if (cmd == LL_IOC_SETFLAGS) {
1786 if ((flags & LL_FILE_IGNORE_LOCK) &&
1787 !(file->f_flags & O_DIRECT)) {
1788 CERROR("%s: unable to disable locking on "
1789 "non-O_DIRECT file\n", current->comm);
1793 fd->fd_flags |= flags;
1795 fd->fd_flags &= ~flags;
1798 case LL_IOC_LOV_SETSTRIPE:
1799 RETURN(ll_lov_setstripe(inode, file, arg));
1800 case LL_IOC_LOV_SETEA:
1801 RETURN(ll_lov_setea(inode, file, arg));
1802 case LL_IOC_LOV_GETSTRIPE:
1803 RETURN(ll_lov_getstripe(inode, arg));
1804 case LL_IOC_RECREATE_OBJ:
1805 RETURN(ll_lov_recreate_obj(inode, arg));
1806 case LL_IOC_RECREATE_FID:
1807 RETURN(ll_lov_recreate_fid(inode, arg));
1808 case FSFILT_IOC_FIEMAP:
1809 RETURN(ll_ioctl_fiemap(inode, arg));
1810 case FSFILT_IOC_GETFLAGS:
1811 case FSFILT_IOC_SETFLAGS:
1812 RETURN(ll_iocontrol(inode, file, cmd, arg));
1813 case FSFILT_IOC_GETVERSION_OLD:
1814 case FSFILT_IOC_GETVERSION:
1815 RETURN(put_user(inode->i_generation, (int *)arg));
1816 case LL_IOC_GROUP_LOCK:
1817 RETURN(ll_get_grouplock(inode, file, arg));
1818 case LL_IOC_GROUP_UNLOCK:
1819 RETURN(ll_put_grouplock(inode, file, arg));
1820 case IOC_OBD_STATFS:
1821 RETURN(ll_obd_statfs(inode, (void *)arg));
1823 /* We need to special case any other ioctls we want to handle,
1824 * to send them to the MDS/OST as appropriate and to properly
1825 * network encode the arg field.
1826 case FSFILT_IOC_SETVERSION_OLD:
1827 case FSFILT_IOC_SETVERSION:
1829 case LL_IOC_FLUSHCTX:
1830 RETURN(ll_flush_ctx(inode));
1831 case LL_IOC_PATH2FID: {
1832 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1833 sizeof(struct lu_fid)))
1838 case OBD_IOC_FID2PATH:
1839 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1841 case LL_IOC_GET_MDTIDX: {
1844 mdtidx = ll_get_mdt_idx(inode);
1848 if (put_user((int)mdtidx, (int*)arg))
1858 ll_iocontrol_call(inode, file, cmd, arg, &err))
1861 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1867 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1869 struct inode *inode = file->f_dentry->d_inode;
1872 retval = offset + ((origin == 2) ? i_size_read(inode) :
1873 (origin == 1) ? file->f_pos : 0);
1874 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1875 inode->i_ino, inode->i_generation, inode, retval, retval,
1876 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1877 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1879 if (origin == 2) { /* SEEK_END */
1880 int nonblock = 0, rc;
1882 if (file->f_flags & O_NONBLOCK)
1883 nonblock = LDLM_FL_BLOCK_NOWAIT;
1885 rc = cl_glimpse_size(inode);
1889 offset += i_size_read(inode);
1890 } else if (origin == 1) { /* SEEK_CUR */
1891 offset += file->f_pos;
1895 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1896 if (offset != file->f_pos) {
1897 file->f_pos = offset;
1905 #ifdef HAVE_FLUSH_OWNER_ID
1906 int ll_flush(struct file *file, fl_owner_t id)
1908 int ll_flush(struct file *file)
1911 struct inode *inode = file->f_dentry->d_inode;
1912 struct ll_inode_info *lli = ll_i2info(inode);
1913 struct lov_stripe_md *lsm = lli->lli_smd;
1916 /* the application should know write failure already. */
1917 if (lli->lli_write_rc)
1920 /* catch async errors that were recorded back when async writeback
1921 * failed for pages in this mapping. */
1922 rc = lli->lli_async_rc;
1923 lli->lli_async_rc = 0;
1925 err = lov_test_and_clear_async_rc(lsm);
1930 return rc ? -EIO : 0;
1933 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1935 struct inode *inode = dentry->d_inode;
1936 struct ll_inode_info *lli = ll_i2info(inode);
1937 struct lov_stripe_md *lsm = lli->lli_smd;
1938 struct ptlrpc_request *req;
1939 struct obd_capa *oc;
1942 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1943 inode->i_generation, inode);
1944 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1946 /* fsync's caller has already called _fdata{sync,write}, we want
1947 * that IO to finish before calling the osc and mdc sync methods */
1948 rc = filemap_fdatawait(inode->i_mapping);
1950 /* catch async errors that were recorded back when async writeback
1951 * failed for pages in this mapping. */
1952 err = lli->lli_async_rc;
1953 lli->lli_async_rc = 0;
1957 err = lov_test_and_clear_async_rc(lsm);
1962 oc = ll_mdscapa_get(inode);
1963 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1969 ptlrpc_req_finished(req);
1972 struct obd_info *oinfo;
1974 OBD_ALLOC_PTR(oinfo);
1976 RETURN(rc ? rc : -ENOMEM);
1977 OBDO_ALLOC(oinfo->oi_oa);
1978 if (!oinfo->oi_oa) {
1979 OBD_FREE_PTR(oinfo);
1980 RETURN(rc ? rc : -ENOMEM);
1982 oinfo->oi_oa->o_id = lsm->lsm_object_id;
1983 oinfo->oi_oa->o_seq = lsm->lsm_object_seq;
1984 oinfo->oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1985 obdo_from_inode(oinfo->oi_oa, inode, &ll_i2info(inode)->lli_fid,
1986 OBD_MD_FLTYPE | OBD_MD_FLATIME |
1987 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1990 oinfo->oi_capa = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1991 err = obd_sync_rqset(ll_i2sbi(inode)->ll_dt_exp, oinfo, 0,
1993 capa_put(oinfo->oi_capa);
1996 OBDO_FREE(oinfo->oi_oa);
1997 OBD_FREE_PTR(oinfo);
1998 lli->lli_write_rc = err < 0 ? : 0;
2004 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2006 struct inode *inode = file->f_dentry->d_inode;
2007 struct ll_sb_info *sbi = ll_i2sbi(inode);
2008 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2009 .ei_cb_cp =ldlm_flock_completion_ast,
2010 .ei_cbdata = file_lock };
2011 struct md_op_data *op_data;
2012 struct lustre_handle lockh = {0};
2013 ldlm_policy_data_t flock = {{0}};
2018 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2019 inode->i_ino, file_lock);
2021 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2023 if (file_lock->fl_flags & FL_FLOCK) {
2024 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2025 /* flocks are whole-file locks */
2026 flock.l_flock.end = OFFSET_MAX;
2027 /* For flocks owner is determined by the local file desctiptor*/
2028 flock.l_flock.owner = (unsigned long)file_lock->fl_file;
2029 } else if (file_lock->fl_flags & FL_POSIX) {
2030 flock.l_flock.owner = (unsigned long)file_lock->fl_owner;
2031 flock.l_flock.start = file_lock->fl_start;
2032 flock.l_flock.end = file_lock->fl_end;
2036 flock.l_flock.pid = file_lock->fl_pid;
2038 /* Somewhat ugly workaround for svc lockd.
2039 * lockd installs custom fl_lmops->fl_compare_owner that checks
2040 * for the fl_owner to be the same (which it always is on local node
2041 * I guess between lockd processes) and then compares pid.
2042 * As such we assign pid to the owner field to make it all work,
2043 * conflict with normal locks is unlikely since pid space and
2044 * pointer space for current->files are not intersecting */
2045 if (file_lock->fl_lmops && file_lock->fl_lmops->fl_compare_owner)
2046 flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
2048 switch (file_lock->fl_type) {
2050 einfo.ei_mode = LCK_PR;
2053 /* An unlock request may or may not have any relation to
2054 * existing locks so we may not be able to pass a lock handle
2055 * via a normal ldlm_lock_cancel() request. The request may even
2056 * unlock a byte range in the middle of an existing lock. In
2057 * order to process an unlock request we need all of the same
2058 * information that is given with a normal read or write record
2059 * lock request. To avoid creating another ldlm unlock (cancel)
2060 * message we'll treat a LCK_NL flock request as an unlock. */
2061 einfo.ei_mode = LCK_NL;
2064 einfo.ei_mode = LCK_PW;
2067 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2082 flags = LDLM_FL_BLOCK_NOWAIT;
2088 flags = LDLM_FL_TEST_LOCK;
2089 /* Save the old mode so that if the mode in the lock changes we
2090 * can decrement the appropriate reader or writer refcount. */
2091 file_lock->fl_type = einfo.ei_mode;
2094 CERROR("unknown fcntl lock command: %d\n", cmd);
2098 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2099 LUSTRE_OPC_ANY, NULL);
2100 if (IS_ERR(op_data))
2101 RETURN(PTR_ERR(op_data));
2103 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2104 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2105 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2107 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2108 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2110 ll_finish_md_op_data(op_data);
2112 if ((file_lock->fl_flags & FL_FLOCK) &&
2113 (rc == 0 || file_lock->fl_type == F_UNLCK))
2114 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2115 #ifdef HAVE_F_OP_FLOCK
2116 if ((file_lock->fl_flags & FL_POSIX) &&
2117 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2118 !(flags & LDLM_FL_TEST_LOCK))
2119 posix_lock_file_wait(file, file_lock);
2125 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2132 int ll_have_md_lock(struct inode *inode, __u64 bits, ldlm_mode_t l_req_mode)
2134 struct lustre_handle lockh;
2135 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2136 ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
2137 (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
2145 fid = &ll_i2info(inode)->lli_fid;
2146 CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
2147 ldlm_lockname[mode]);
2149 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2150 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2157 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2158 struct lustre_handle *lockh)
2160 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2166 fid = &ll_i2info(inode)->lli_fid;
2167 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2169 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2170 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2171 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2175 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2176 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2177 * and return success */
2179 /* This path cannot be hit for regular files unless in
2180 * case of obscure races, so no need to to validate
2182 if (!S_ISREG(inode->i_mode) &&
2183 !S_ISDIR(inode->i_mode))
2188 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2196 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2199 struct inode *inode = dentry->d_inode;
2200 struct ptlrpc_request *req = NULL;
2201 struct ll_sb_info *sbi;
2202 struct obd_export *exp;
2207 CERROR("REPORT THIS LINE TO PETER\n");
2210 sbi = ll_i2sbi(inode);
2212 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2213 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2215 exp = ll_i2mdexp(inode);
2217 /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC.
2218 * But under CMD case, it caused some lock issues, should be fixed
2219 * with new CMD ibits lock. See bug 12718 */
2220 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2221 struct lookup_intent oit = { .it_op = IT_GETATTR };
2222 struct md_op_data *op_data;
2224 if (ibits == MDS_INODELOCK_LOOKUP)
2225 oit.it_op = IT_LOOKUP;
2227 /* Call getattr by fid, so do not provide name at all. */
2228 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2229 dentry->d_inode, NULL, 0, 0,
2230 LUSTRE_OPC_ANY, NULL);
2231 if (IS_ERR(op_data))
2232 RETURN(PTR_ERR(op_data));
2234 oit.it_create_mode |= M_CHECK_STALE;
2235 rc = md_intent_lock(exp, op_data, NULL, 0,
2236 /* we are not interested in name
2239 ll_md_blocking_ast, 0);
2240 ll_finish_md_op_data(op_data);
2241 oit.it_create_mode &= ~M_CHECK_STALE;
2243 rc = ll_inode_revalidate_fini(inode, rc);
2247 rc = ll_revalidate_it_finish(req, &oit, dentry);
2249 ll_intent_release(&oit);
2253 /* Unlinked? Unhash dentry, so it is not picked up later by
2254 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2255 here to preserve get_cwd functionality on 2.6.
2257 if (!dentry->d_inode->i_nlink) {
2258 cfs_spin_lock(&ll_lookup_lock);
2259 spin_lock(&dcache_lock);
2260 ll_drop_dentry(dentry);
2261 spin_unlock(&dcache_lock);
2262 cfs_spin_unlock(&ll_lookup_lock);
2265 ll_lookup_finish_locks(&oit, dentry);
2266 } else if (!ll_have_md_lock(dentry->d_inode, ibits, LCK_MINMODE)) {
2267 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2268 obd_valid valid = OBD_MD_FLGETATTR;
2269 struct md_op_data *op_data;
2272 if (S_ISREG(inode->i_mode)) {
2273 rc = ll_get_max_mdsize(sbi, &ealen);
2276 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2279 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2280 0, ealen, LUSTRE_OPC_ANY,
2282 if (op_data == NULL)
2285 op_data->op_valid = valid;
2286 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2287 * capa for this inode. Because we only keep capas of dirs
2289 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2290 ll_finish_md_op_data(op_data);
2292 rc = ll_inode_revalidate_fini(inode, rc);
2296 rc = ll_prep_inode(&inode, req, NULL);
2299 ptlrpc_req_finished(req);
2303 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2306 struct inode *inode = dentry->d_inode;
2310 rc = __ll_inode_revalidate_it(dentry, it, ibits);
2312 /* if object not yet allocated, don't validate size */
2313 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
2314 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2315 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2316 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2320 /* cl_glimpse_size will prefer locally cached writes if they extend
2324 rc = cl_glimpse_size(inode);
2329 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2330 struct lookup_intent *it, struct kstat *stat)
2332 struct inode *inode = de->d_inode;
2333 struct ll_sb_info *sbi = ll_i2sbi(inode);
2334 struct ll_inode_info *lli = ll_i2info(inode);
2337 res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE |
2338 MDS_INODELOCK_LOOKUP);
2339 ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
2344 stat->dev = inode->i_sb->s_dev;
2345 if (ll_need_32bit_api(sbi))
2346 stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
2348 stat->ino = inode->i_ino;
2349 stat->mode = inode->i_mode;
2350 stat->nlink = inode->i_nlink;
2351 stat->uid = inode->i_uid;
2352 stat->gid = inode->i_gid;
2353 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2354 stat->atime = inode->i_atime;
2355 stat->mtime = inode->i_mtime;
2356 stat->ctime = inode->i_ctime;
2357 #ifdef HAVE_INODE_BLKSIZE
2358 stat->blksize = inode->i_blksize;
2360 stat->blksize = 1 << inode->i_blkbits;
2363 stat->size = i_size_read(inode);
2364 stat->blocks = inode->i_blocks;
2368 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2370 struct lookup_intent it = { .it_op = IT_GETATTR };
2372 return ll_getattr_it(mnt, de, &it, stat);
2375 #ifdef HAVE_LINUX_FIEMAP_H
2376 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2377 __u64 start, __u64 len)
2381 struct ll_user_fiemap *fiemap;
2382 unsigned int extent_count = fieinfo->fi_extents_max;
2384 num_bytes = sizeof(*fiemap) + (extent_count *
2385 sizeof(struct ll_fiemap_extent));
2386 OBD_ALLOC_LARGE(fiemap, num_bytes);
2391 fiemap->fm_flags = fieinfo->fi_flags;
2392 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2393 fiemap->fm_start = start;
2394 fiemap->fm_length = len;
2395 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2396 sizeof(struct ll_fiemap_extent));
2398 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2400 fieinfo->fi_flags = fiemap->fm_flags;
2401 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2402 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2403 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2405 OBD_FREE_LARGE(fiemap, num_bytes);
2412 int lustre_check_acl(struct inode *inode, int mask)
2414 #ifdef CONFIG_FS_POSIX_ACL
2415 struct ll_inode_info *lli = ll_i2info(inode);
2416 struct posix_acl *acl;
2420 cfs_spin_lock(&lli->lli_lock);
2421 acl = posix_acl_dup(lli->lli_posix_acl);
2422 cfs_spin_unlock(&lli->lli_lock);
2427 rc = posix_acl_permission(inode, acl, mask);
2428 posix_acl_release(acl);
2436 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2437 #ifndef HAVE_INODE_PERMISION_2ARGS
2438 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2440 int ll_inode_permission(struct inode *inode, int mask)
2446 /* as root inode are NOT getting validated in lookup operation,
2447 * need to do it before permission check. */
2449 if (inode == inode->i_sb->s_root->d_inode) {
2450 struct lookup_intent it = { .it_op = IT_LOOKUP };
2452 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2453 MDS_INODELOCK_LOOKUP);
2458 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2459 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2461 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2462 return lustre_check_remote_perm(inode, mask);
2464 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2465 rc = generic_permission(inode, mask, lustre_check_acl);
2470 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2472 int mode = inode->i_mode;
2475 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2476 inode->i_ino, inode->i_generation, inode, mask);
2478 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2479 return lustre_check_remote_perm(inode, mask);
2481 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2483 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2484 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2486 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2488 if (cfs_curproc_fsuid() == inode->i_uid) {
2491 if (((mode >> 3) & mask & S_IRWXO) != mask)
2493 rc = lustre_check_acl(inode, mask);
2497 goto check_capabilities;
2501 if (cfs_curproc_is_in_groups(inode->i_gid))
2504 if ((mode & mask & S_IRWXO) == mask)
2508 if (!(mask & MAY_EXEC) ||
2509 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2510 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2513 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2514 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2521 #ifdef HAVE_FILE_READV
2522 #define READ_METHOD readv
2523 #define READ_FUNCTION ll_file_readv
2524 #define WRITE_METHOD writev
2525 #define WRITE_FUNCTION ll_file_writev
2527 #define READ_METHOD aio_read
2528 #define READ_FUNCTION ll_file_aio_read
2529 #define WRITE_METHOD aio_write
2530 #define WRITE_FUNCTION ll_file_aio_write
2533 /* -o localflock - only provides locally consistent flock locks */
2534 struct file_operations ll_file_operations = {
2535 .read = ll_file_read,
2536 .READ_METHOD = READ_FUNCTION,
2537 .write = ll_file_write,
2538 .WRITE_METHOD = WRITE_FUNCTION,
2539 #ifdef HAVE_UNLOCKED_IOCTL
2540 .unlocked_ioctl = ll_file_ioctl,
2542 .ioctl = ll_file_ioctl,
2544 .open = ll_file_open,
2545 .release = ll_file_release,
2546 .mmap = ll_file_mmap,
2547 .llseek = ll_file_seek,
2548 #ifdef HAVE_KERNEL_SENDFILE
2549 .sendfile = ll_file_sendfile,
2551 #ifdef HAVE_KERNEL_SPLICE_READ
2552 .splice_read = ll_file_splice_read,
2558 struct file_operations ll_file_operations_flock = {
2559 .read = ll_file_read,
2560 .READ_METHOD = READ_FUNCTION,
2561 .write = ll_file_write,
2562 .WRITE_METHOD = WRITE_FUNCTION,
2563 #ifdef HAVE_UNLOCKED_IOCTL
2564 .unlocked_ioctl = ll_file_ioctl,
2566 .ioctl = ll_file_ioctl,
2568 .open = ll_file_open,
2569 .release = ll_file_release,
2570 .mmap = ll_file_mmap,
2571 .llseek = ll_file_seek,
2572 #ifdef HAVE_KERNEL_SENDFILE
2573 .sendfile = ll_file_sendfile,
2575 #ifdef HAVE_KERNEL_SPLICE_READ
2576 .splice_read = ll_file_splice_read,
2580 #ifdef HAVE_F_OP_FLOCK
2581 .flock = ll_file_flock,
2583 .lock = ll_file_flock
2586 /* These are for -o noflock - to return ENOSYS on flock calls */
2587 struct file_operations ll_file_operations_noflock = {
2588 .read = ll_file_read,
2589 .READ_METHOD = READ_FUNCTION,
2590 .write = ll_file_write,
2591 .WRITE_METHOD = WRITE_FUNCTION,
2592 #ifdef HAVE_UNLOCKED_IOCTL
2593 .unlocked_ioctl = ll_file_ioctl,
2595 .ioctl = ll_file_ioctl,
2597 .open = ll_file_open,
2598 .release = ll_file_release,
2599 .mmap = ll_file_mmap,
2600 .llseek = ll_file_seek,
2601 #ifdef HAVE_KERNEL_SENDFILE
2602 .sendfile = ll_file_sendfile,
2604 #ifdef HAVE_KERNEL_SPLICE_READ
2605 .splice_read = ll_file_splice_read,
2609 #ifdef HAVE_F_OP_FLOCK
2610 .flock = ll_file_noflock,
2612 .lock = ll_file_noflock
2615 struct inode_operations ll_file_inode_operations = {
2616 .setattr = ll_setattr,
2617 .truncate = ll_truncate,
2618 .getattr = ll_getattr,
2619 .permission = ll_inode_permission,
2620 .setxattr = ll_setxattr,
2621 .getxattr = ll_getxattr,
2622 .listxattr = ll_listxattr,
2623 .removexattr = ll_removexattr,
2624 #ifdef HAVE_LINUX_FIEMAP_H
2625 .fiemap = ll_fiemap,
2629 /* dynamic ioctl number support routins */
2630 static struct llioc_ctl_data {
2631 cfs_rw_semaphore_t ioc_sem;
2632 cfs_list_t ioc_head;
2634 __RWSEM_INITIALIZER(llioc.ioc_sem),
2635 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2640 cfs_list_t iocd_list;
2641 unsigned int iocd_size;
2642 llioc_callback_t iocd_cb;
2643 unsigned int iocd_count;
2644 unsigned int iocd_cmd[0];
2647 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2650 struct llioc_data *in_data = NULL;
2653 if (cb == NULL || cmd == NULL ||
2654 count > LLIOC_MAX_CMD || count < 0)
2657 size = sizeof(*in_data) + count * sizeof(unsigned int);
2658 OBD_ALLOC(in_data, size);
2659 if (in_data == NULL)
2662 memset(in_data, 0, sizeof(*in_data));
2663 in_data->iocd_size = size;
2664 in_data->iocd_cb = cb;
2665 in_data->iocd_count = count;
2666 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2668 cfs_down_write(&llioc.ioc_sem);
2669 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2670 cfs_up_write(&llioc.ioc_sem);
2675 void ll_iocontrol_unregister(void *magic)
2677 struct llioc_data *tmp;
2682 cfs_down_write(&llioc.ioc_sem);
2683 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2685 unsigned int size = tmp->iocd_size;
2687 cfs_list_del(&tmp->iocd_list);
2688 cfs_up_write(&llioc.ioc_sem);
2690 OBD_FREE(tmp, size);
2694 cfs_up_write(&llioc.ioc_sem);
2696 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2699 EXPORT_SYMBOL(ll_iocontrol_register);
2700 EXPORT_SYMBOL(ll_iocontrol_unregister);
2702 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2703 unsigned int cmd, unsigned long arg, int *rcp)
2705 enum llioc_iter ret = LLIOC_CONT;
2706 struct llioc_data *data;
2707 int rc = -EINVAL, i;
2709 cfs_down_read(&llioc.ioc_sem);
2710 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2711 for (i = 0; i < data->iocd_count; i++) {
2712 if (cmd != data->iocd_cmd[i])
2715 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2719 if (ret == LLIOC_STOP)
2722 cfs_up_read(&llioc.ioc_sem);