1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 static struct ll_file_data *ll_file_data_get(int is_dir)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
60 OBD_ALLOC(fd->fd_dir.lfd_name, LLITE_NAME_LEN);
61 if (unlikely(fd->fd_dir.lfd_name == NULL)) {
62 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
69 static void ll_file_data_put(struct ll_file_data *fd)
72 if (fd->fd_dir.lfd_name)
73 OBD_FREE(fd->fd_dir.lfd_name, LLITE_NAME_LEN);
74 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
78 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
79 struct lustre_handle *fh)
81 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
82 op_data->op_attr.ia_mode = inode->i_mode;
83 op_data->op_attr.ia_atime = inode->i_atime;
84 op_data->op_attr.ia_mtime = inode->i_mtime;
85 op_data->op_attr.ia_ctime = inode->i_ctime;
86 op_data->op_attr.ia_size = i_size_read(inode);
87 op_data->op_attr_blocks = inode->i_blocks;
88 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags =
89 ll_inode_to_ext_flags(inode->i_flags);
90 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
92 op_data->op_handle = *fh;
93 op_data->op_capa1 = ll_mdscapa_get(inode);
97 * Closes the IO epoch and packs all the attributes into @op_data for
100 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
101 struct obd_client_handle *och)
105 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
106 ATTR_MTIME_SET | ATTR_CTIME_SET;
108 if (!(och->och_flags & FMODE_WRITE))
111 if (!exp_connect_som(ll_i2mdexp(inode)) || !S_ISREG(inode->i_mode))
112 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
114 ll_ioepoch_close(inode, op_data, &och, 0);
117 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
118 ll_prep_md_op_data(op_data, inode, NULL, NULL,
119 0, 0, LUSTRE_OPC_ANY, NULL);
123 static int ll_close_inode_openhandle(struct obd_export *md_exp,
125 struct obd_client_handle *och)
127 struct obd_export *exp = ll_i2mdexp(inode);
128 struct md_op_data *op_data;
129 struct ptlrpc_request *req = NULL;
130 struct obd_device *obd = class_exp2obd(exp);
137 * XXX: in case of LMV, is this correct to access
140 CERROR("Invalid MDC connection handle "LPX64"\n",
141 ll_i2mdexp(inode)->exp_handle.h_cookie);
145 OBD_ALLOC_PTR(op_data);
147 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
149 ll_prepare_close(inode, op_data, och);
150 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
151 rc = md_close(md_exp, op_data, och->och_mod, &req);
153 /* This close must have the epoch closed. */
154 LASSERT(epoch_close);
155 /* MDS has instructed us to obtain Size-on-MDS attribute from
156 * OSTs and send setattr to back to MDS. */
157 rc = ll_som_update(inode, op_data);
159 CERROR("inode %lu mdc Size-on-MDS update failed: "
160 "rc = %d\n", inode->i_ino, rc);
164 CERROR("inode %lu mdc close failed: rc = %d\n",
167 ll_finish_md_op_data(op_data);
170 rc = ll_objects_destroy(req, inode);
172 CERROR("inode %lu ll_objects destroy: rc = %d\n",
179 if (exp_connect_som(exp) && !epoch_close &&
180 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
181 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
183 md_clear_open_replay_data(md_exp, och);
184 /* Free @och if it is not waiting for DONE_WRITING. */
185 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
188 if (req) /* This is close request */
189 ptlrpc_req_finished(req);
193 int ll_md_real_close(struct inode *inode, int flags)
195 struct ll_inode_info *lli = ll_i2info(inode);
196 struct obd_client_handle **och_p;
197 struct obd_client_handle *och;
202 if (flags & FMODE_WRITE) {
203 och_p = &lli->lli_mds_write_och;
204 och_usecount = &lli->lli_open_fd_write_count;
205 } else if (flags & FMODE_EXEC) {
206 och_p = &lli->lli_mds_exec_och;
207 och_usecount = &lli->lli_open_fd_exec_count;
209 LASSERT(flags & FMODE_READ);
210 och_p = &lli->lli_mds_read_och;
211 och_usecount = &lli->lli_open_fd_read_count;
214 cfs_down(&lli->lli_och_sem);
215 if (*och_usecount) { /* There are still users of this handle, so
217 cfs_up(&lli->lli_och_sem);
222 cfs_up(&lli->lli_och_sem);
224 if (och) { /* There might be a race and somebody have freed this och
226 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
233 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
236 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
237 struct ll_inode_info *lli = ll_i2info(inode);
241 /* clear group lock, if present */
242 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
243 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
245 /* Let's see if we have good enough OPEN lock on the file and if
246 we can skip talking to MDS */
247 if (file->f_dentry->d_inode) { /* Can this ever be false? */
249 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
250 struct lustre_handle lockh;
251 struct inode *inode = file->f_dentry->d_inode;
252 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
254 cfs_down(&lli->lli_och_sem);
255 if (fd->fd_omode & FMODE_WRITE) {
257 LASSERT(lli->lli_open_fd_write_count);
258 lli->lli_open_fd_write_count--;
259 } else if (fd->fd_omode & FMODE_EXEC) {
261 LASSERT(lli->lli_open_fd_exec_count);
262 lli->lli_open_fd_exec_count--;
265 LASSERT(lli->lli_open_fd_read_count);
266 lli->lli_open_fd_read_count--;
268 cfs_up(&lli->lli_och_sem);
270 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
271 LDLM_IBITS, &policy, lockmode,
273 rc = ll_md_real_close(file->f_dentry->d_inode,
277 CERROR("Releasing a file %p with negative dentry %p. Name %s",
278 file, file->f_dentry, file->f_dentry->d_name.name);
281 LUSTRE_FPRIVATE(file) = NULL;
282 ll_file_data_put(fd);
283 ll_capa_close(inode);
288 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
290 /* While this returns an error code, fput() the caller does not, so we need
291 * to make every effort to clean up all of our state here. Also, applications
292 * rarely check close errors and even if an error is returned they will not
293 * re-try the close call.
295 int ll_file_release(struct inode *inode, struct file *file)
297 struct ll_file_data *fd;
298 struct ll_sb_info *sbi = ll_i2sbi(inode);
299 struct ll_inode_info *lli = ll_i2info(inode);
300 struct lov_stripe_md *lsm = lli->lli_smd;
304 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
305 inode->i_generation, inode);
307 #ifdef CONFIG_FS_POSIX_ACL
308 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
309 inode == inode->i_sb->s_root->d_inode) {
310 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
313 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
314 fd->fd_flags &= ~LL_FILE_RMTACL;
315 rct_del(&sbi->ll_rct, cfs_curproc_pid());
316 et_search_free(&sbi->ll_et, cfs_curproc_pid());
321 if (inode->i_sb->s_root != file->f_dentry)
322 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
323 fd = LUSTRE_FPRIVATE(file);
326 /* The last ref on @file, maybe not the the owner pid of statahead.
327 * Different processes can open the same dir, "lli_opendir_key" means:
328 * it is me that should stop the statahead thread. */
329 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
330 ll_stop_statahead(inode, lli->lli_opendir_key);
332 if (inode->i_sb->s_root == file->f_dentry) {
333 LUSTRE_FPRIVATE(file) = NULL;
334 ll_file_data_put(fd);
339 lov_test_and_clear_async_rc(lsm);
340 lli->lli_async_rc = 0;
342 rc = ll_md_close(sbi->ll_md_exp, inode, file);
344 if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
345 libcfs_debug_dumplog();
350 static int ll_intent_file_open(struct file *file, void *lmm,
351 int lmmsize, struct lookup_intent *itp)
353 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
354 struct dentry *parent = file->f_dentry->d_parent;
355 const char *name = file->f_dentry->d_name.name;
356 const int len = file->f_dentry->d_name.len;
357 struct md_op_data *op_data;
358 struct ptlrpc_request *req;
365 /* Usually we come here only for NFSD, and we want open lock.
366 But we can also get here with pre 2.6.15 patchless kernels, and in
367 that case that lock is also ok */
368 /* We can also get here if there was cached open handle in revalidate_it
369 * but it disappeared while we were getting from there to ll_file_open.
370 * But this means this file was closed and immediatelly opened which
371 * makes a good candidate for using OPEN lock */
372 /* If lmmsize & lmm are not 0, we are just setting stripe info
373 * parameters. No need for the open lock */
374 if (!lmm && !lmmsize)
375 itp->it_flags |= MDS_OPEN_LOCK;
377 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
378 file->f_dentry->d_inode, name, len,
379 O_RDWR, LUSTRE_OPC_ANY, NULL);
381 RETURN(PTR_ERR(op_data));
383 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
384 0 /*unused */, &req, ll_md_blocking_ast, 0);
385 ll_finish_md_op_data(op_data);
387 /* reason for keep own exit path - don`t flood log
388 * with messages with -ESTALE errors.
390 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
391 it_open_error(DISP_OPEN_OPEN, itp))
393 ll_release_openhandle(file->f_dentry, itp);
397 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
398 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
399 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
403 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
404 if (!rc && itp->d.lustre.it_lock_mode)
405 md_set_lock_data(sbi->ll_md_exp,
406 &itp->d.lustre.it_lock_handle,
407 file->f_dentry->d_inode, NULL);
410 ptlrpc_req_finished(itp->d.lustre.it_data);
411 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
412 ll_intent_drop_lock(itp);
418 * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
419 * not believe attributes if a few ioepoch holders exist. Attributes for
420 * previous ioepoch if new one is opened are also skipped by MDS.
422 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
424 if (ioepoch && lli->lli_ioepoch != ioepoch) {
425 lli->lli_ioepoch = ioepoch;
426 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
427 ioepoch, PFID(&lli->lli_fid));
431 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
432 struct lookup_intent *it, struct obd_client_handle *och)
434 struct ptlrpc_request *req = it->d.lustre.it_data;
435 struct mdt_body *body;
439 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
440 LASSERT(body != NULL); /* reply already checked out */
442 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
443 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
444 och->och_fid = lli->lli_fid;
445 och->och_flags = it->it_flags;
446 ll_ioepoch_open(lli, body->ioepoch);
448 return md_set_open_replay_data(md_exp, och, req);
451 int ll_local_open(struct file *file, struct lookup_intent *it,
452 struct ll_file_data *fd, struct obd_client_handle *och)
454 struct inode *inode = file->f_dentry->d_inode;
455 struct ll_inode_info *lli = ll_i2info(inode);
458 LASSERT(!LUSTRE_FPRIVATE(file));
463 struct ptlrpc_request *req = it->d.lustre.it_data;
464 struct mdt_body *body;
467 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
471 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
472 if ((it->it_flags & FMODE_WRITE) &&
473 (body->valid & OBD_MD_FLSIZE))
474 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
475 lli->lli_ioepoch, PFID(&lli->lli_fid));
478 LUSTRE_FPRIVATE(file) = fd;
479 ll_readahead_init(inode, &fd->fd_ras);
480 fd->fd_omode = it->it_flags;
484 /* Open a file, and (for the very first open) create objects on the OSTs at
485 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
486 * creation or open until ll_lov_setstripe() ioctl is called. We grab
487 * lli_open_sem to ensure no other process will create objects, send the
488 * stripe MD to the MDS, or try to destroy the objects if that fails.
490 * If we already have the stripe MD locally then we don't request it in
491 * md_open(), by passing a lmm_size = 0.
493 * It is up to the application to ensure no other processes open this file
494 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
495 * used. We might be able to avoid races of that sort by getting lli_open_sem
496 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
497 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
499 int ll_file_open(struct inode *inode, struct file *file)
501 struct ll_inode_info *lli = ll_i2info(inode);
502 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
503 .it_flags = file->f_flags };
504 struct lov_stripe_md *lsm;
505 struct ptlrpc_request *req = NULL;
506 struct obd_client_handle **och_p;
508 struct ll_file_data *fd;
509 int rc = 0, opendir_set = 0;
512 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
513 inode->i_generation, inode, file->f_flags);
515 #ifdef HAVE_VFS_INTENT_PATCHES
518 it = file->private_data; /* XXX: compat macro */
519 file->private_data = NULL; /* prevent ll_local_open assertion */
522 fd = ll_file_data_get(S_ISDIR(inode->i_mode));
527 if (S_ISDIR(inode->i_mode)) {
528 cfs_spin_lock(&lli->lli_sa_lock);
529 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
530 LASSERT(lli->lli_sai == NULL);
531 lli->lli_opendir_key = fd;
532 lli->lli_opendir_pid = cfs_curproc_pid();
535 cfs_spin_unlock(&lli->lli_sa_lock);
538 if (inode->i_sb->s_root == file->f_dentry) {
539 LUSTRE_FPRIVATE(file) = fd;
543 if (!it || !it->d.lustre.it_disposition) {
544 /* Convert f_flags into access mode. We cannot use file->f_mode,
545 * because everything but O_ACCMODE mask was stripped from
547 if ((oit.it_flags + 1) & O_ACCMODE)
549 if (file->f_flags & O_TRUNC)
550 oit.it_flags |= FMODE_WRITE;
552 /* kernel only call f_op->open in dentry_open. filp_open calls
553 * dentry_open after call to open_namei that checks permissions.
554 * Only nfsd_open call dentry_open directly without checking
555 * permissions and because of that this code below is safe. */
556 if (oit.it_flags & FMODE_WRITE)
557 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
559 /* We do not want O_EXCL here, presumably we opened the file
560 * already? XXX - NFS implications? */
561 oit.it_flags &= ~O_EXCL;
563 /* bug20584, if "it_flags" contains O_CREAT, the file will be
564 * created if necessary, then "IT_CREAT" should be set to keep
565 * consistent with it */
566 if (oit.it_flags & O_CREAT)
567 oit.it_op |= IT_CREAT;
573 /* Let's see if we have file open on MDS already. */
574 if (it->it_flags & FMODE_WRITE) {
575 och_p = &lli->lli_mds_write_och;
576 och_usecount = &lli->lli_open_fd_write_count;
577 } else if (it->it_flags & FMODE_EXEC) {
578 och_p = &lli->lli_mds_exec_och;
579 och_usecount = &lli->lli_open_fd_exec_count;
581 och_p = &lli->lli_mds_read_och;
582 och_usecount = &lli->lli_open_fd_read_count;
585 cfs_down(&lli->lli_och_sem);
586 if (*och_p) { /* Open handle is present */
587 if (it_disposition(it, DISP_OPEN_OPEN)) {
588 /* Well, there's extra open request that we do not need,
589 let's close it somehow. This will decref request. */
590 rc = it_open_error(DISP_OPEN_OPEN, it);
592 cfs_up(&lli->lli_och_sem);
593 ll_file_data_put(fd);
594 GOTO(out_openerr, rc);
596 ll_release_openhandle(file->f_dentry, it);
597 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
602 rc = ll_local_open(file, it, fd, NULL);
605 cfs_up(&lli->lli_och_sem);
606 ll_file_data_put(fd);
607 GOTO(out_openerr, rc);
610 LASSERT(*och_usecount == 0);
611 if (!it->d.lustre.it_disposition) {
612 /* We cannot just request lock handle now, new ELC code
613 means that one of other OPEN locks for this file
614 could be cancelled, and since blocking ast handler
615 would attempt to grab och_sem as well, that would
616 result in a deadlock */
617 cfs_up(&lli->lli_och_sem);
618 it->it_create_mode |= M_CHECK_STALE;
619 rc = ll_intent_file_open(file, NULL, 0, it);
620 it->it_create_mode &= ~M_CHECK_STALE;
622 ll_file_data_put(fd);
623 GOTO(out_openerr, rc);
626 /* Got some error? Release the request */
627 if (it->d.lustre.it_status < 0) {
628 req = it->d.lustre.it_data;
629 ptlrpc_req_finished(req);
633 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
635 ll_file_data_put(fd);
636 GOTO(out_och_free, rc = -ENOMEM);
639 req = it->d.lustre.it_data;
641 /* md_intent_lock() didn't get a request ref if there was an
642 * open error, so don't do cleanup on the request here
644 /* XXX (green): Should not we bail out on any error here, not
645 * just open error? */
646 rc = it_open_error(DISP_OPEN_OPEN, it);
648 ll_file_data_put(fd);
649 GOTO(out_och_free, rc);
652 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
653 rc = ll_local_open(file, it, fd, *och_p);
655 ll_file_data_put(fd);
656 GOTO(out_och_free, rc);
659 cfs_up(&lli->lli_och_sem);
661 /* Must do this outside lli_och_sem lock to prevent deadlock where
662 different kind of OPEN lock for this same inode gets cancelled
663 by ldlm_cancel_lru */
664 if (!S_ISREG(inode->i_mode))
671 if (file->f_flags & O_LOV_DELAY_CREATE ||
672 !(file->f_mode & FMODE_WRITE)) {
673 CDEBUG(D_INODE, "object creation was delayed\n");
677 file->f_flags &= ~O_LOV_DELAY_CREATE;
680 ptlrpc_req_finished(req);
682 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
686 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
687 *och_p = NULL; /* OBD_FREE writes some magic there */
690 cfs_up(&lli->lli_och_sem);
692 if (opendir_set != 0)
693 ll_stop_statahead(inode, lli->lli_opendir_key);
699 /* Fills the obdo with the attributes for the lsm */
700 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
701 struct obd_capa *capa, struct obdo *obdo,
702 __u64 ioepoch, int sync)
704 struct ptlrpc_request_set *set;
705 struct obd_info oinfo = { { { 0 } } };
710 LASSERT(lsm != NULL);
714 oinfo.oi_oa->o_id = lsm->lsm_object_id;
715 oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
716 oinfo.oi_oa->o_mode = S_IFREG;
717 oinfo.oi_oa->o_ioepoch = ioepoch;
718 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
719 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
720 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
721 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
722 OBD_MD_FLGROUP | OBD_MD_FLEPOCH;
723 oinfo.oi_capa = capa;
725 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
726 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
729 set = ptlrpc_prep_set();
731 CERROR("can't allocate ptlrpc set\n");
734 rc = obd_getattr_async(exp, &oinfo, set);
736 rc = ptlrpc_set_wait(set);
737 ptlrpc_set_destroy(set);
740 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
741 OBD_MD_FLATIME | OBD_MD_FLMTIME |
742 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
747 * Performs the getattr on the inode and updates its fields.
748 * If @sync != 0, perform the getattr under the server-side lock.
750 int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
751 __u64 ioepoch, int sync)
753 struct ll_inode_info *lli = ll_i2info(inode);
754 struct obd_capa *capa = ll_mdscapa_get(inode);
758 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode),
759 capa, obdo, ioepoch, sync);
762 obdo_refresh_inode(inode, obdo, obdo->o_valid);
764 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
765 lli->lli_smd->lsm_object_id, i_size_read(inode),
766 (unsigned long long)inode->i_blocks,
767 (unsigned long)ll_inode_blksize(inode));
772 int ll_merge_lvb(struct inode *inode)
774 struct ll_inode_info *lli = ll_i2info(inode);
775 struct ll_sb_info *sbi = ll_i2sbi(inode);
781 ll_inode_size_lock(inode, 1);
782 inode_init_lvb(inode, &lvb);
784 /* merge timestamps the most resently obtained from mds with
785 timestamps obtained from osts */
786 lvb.lvb_atime = lli->lli_lvb.lvb_atime;
787 lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
788 lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
789 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
790 cl_isize_write_nolock(inode, lvb.lvb_size);
791 inode->i_blocks = lvb.lvb_blocks;
793 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
794 LTIME_S(inode->i_atime) = lvb.lvb_atime;
795 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
796 ll_inode_size_unlock(inode, 1);
801 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
804 struct obdo obdo = { 0 };
807 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo, 0, 0);
809 st->st_size = obdo.o_size;
810 st->st_blocks = obdo.o_blocks;
811 st->st_mtime = obdo.o_mtime;
812 st->st_atime = obdo.o_atime;
813 st->st_ctime = obdo.o_ctime;
818 void ll_io_init(struct cl_io *io, const struct file *file, int write)
820 struct inode *inode = file->f_dentry->d_inode;
822 memset(io, 0, sizeof *io);
823 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
825 io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND);
826 io->ci_obj = ll_i2info(inode)->lli_clob;
827 io->ci_lockreq = CILR_MAYBE;
828 if (ll_file_nolock(file)) {
829 io->ci_lockreq = CILR_NEVER;
830 io->ci_no_srvlock = 1;
831 } else if (file->f_flags & O_APPEND) {
832 io->ci_lockreq = CILR_MANDATORY;
836 static ssize_t ll_file_io_generic(const struct lu_env *env,
837 struct vvp_io_args *args, struct file *file,
838 enum cl_io_type iot, loff_t *ppos, size_t count)
844 io = &ccc_env_info(env)->cti_io;
845 ll_io_init(io, file, iot == CIT_WRITE);
847 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
848 struct vvp_io *vio = vvp_env_io(env);
849 struct ccc_io *cio = ccc_env_io(env);
850 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
851 int write_sem_locked = 0;
853 cio->cui_fd = LUSTRE_FPRIVATE(file);
854 vio->cui_io_subtype = args->via_io_subtype;
856 switch (vio->cui_io_subtype) {
858 cio->cui_iov = args->u.normal.via_iov;
859 cio->cui_nrsegs = args->u.normal.via_nrsegs;
860 cio->cui_tot_nrsegs = cio->cui_nrsegs;
861 #ifndef HAVE_FILE_WRITEV
862 cio->cui_iocb = args->u.normal.via_iocb;
864 if ((iot == CIT_WRITE) &&
865 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
866 cfs_down(&lli->lli_write_sem);
867 write_sem_locked = 1;
871 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
872 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
875 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
876 vio->u.splice.cui_flags = args->u.splice.via_flags;
879 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
882 result = cl_io_loop(env, io);
883 if (write_sem_locked)
884 cfs_up(&lli->lli_write_sem);
886 /* cl_io_rw_init() handled IO */
887 result = io->ci_result;
890 if (io->ci_nob > 0) {
892 *ppos = io->u.ci_wr.wr.crw_pos;
900 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
902 static int ll_file_get_iov_count(const struct iovec *iov,
903 unsigned long *nr_segs, size_t *count)
908 for (seg = 0; seg < *nr_segs; seg++) {
909 const struct iovec *iv = &iov[seg];
912 * If any segment has a negative length, or the cumulative
913 * length ever wraps negative then return -EINVAL.
916 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
918 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
923 cnt -= iv->iov_len; /* This segment is no good */
930 #ifdef HAVE_FILE_READV
931 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
932 unsigned long nr_segs, loff_t *ppos)
935 struct vvp_io_args *args;
941 result = ll_file_get_iov_count(iov, &nr_segs, &count);
945 env = cl_env_get(&refcheck);
947 RETURN(PTR_ERR(env));
949 args = vvp_env_args(env, IO_NORMAL);
950 args->u.normal.via_iov = (struct iovec *)iov;
951 args->u.normal.via_nrsegs = nr_segs;
953 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
954 cl_env_put(env, &refcheck);
958 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
962 struct iovec *local_iov;
967 env = cl_env_get(&refcheck);
969 RETURN(PTR_ERR(env));
971 local_iov = &vvp_env_info(env)->vti_local_iov;
972 local_iov->iov_base = (void __user *)buf;
973 local_iov->iov_len = count;
974 result = ll_file_readv(file, local_iov, 1, ppos);
975 cl_env_put(env, &refcheck);
980 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
981 unsigned long nr_segs, loff_t pos)
984 struct vvp_io_args *args;
990 result = ll_file_get_iov_count(iov, &nr_segs, &count);
994 env = cl_env_get(&refcheck);
996 RETURN(PTR_ERR(env));
998 args = vvp_env_args(env, IO_NORMAL);
999 args->u.normal.via_iov = (struct iovec *)iov;
1000 args->u.normal.via_nrsegs = nr_segs;
1001 args->u.normal.via_iocb = iocb;
1003 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
1004 &iocb->ki_pos, count);
1005 cl_env_put(env, &refcheck);
1009 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
1013 struct iovec *local_iov;
1014 struct kiocb *kiocb;
1019 env = cl_env_get(&refcheck);
1021 RETURN(PTR_ERR(env));
1023 local_iov = &vvp_env_info(env)->vti_local_iov;
1024 kiocb = &vvp_env_info(env)->vti_kiocb;
1025 local_iov->iov_base = (void __user *)buf;
1026 local_iov->iov_len = count;
1027 init_sync_kiocb(kiocb, file);
1028 kiocb->ki_pos = *ppos;
1029 kiocb->ki_left = count;
1031 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
1032 *ppos = kiocb->ki_pos;
1034 cl_env_put(env, &refcheck);
1040 * Write to a file (through the page cache).
1042 #ifdef HAVE_FILE_WRITEV
1043 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1044 unsigned long nr_segs, loff_t *ppos)
1047 struct vvp_io_args *args;
1053 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1057 env = cl_env_get(&refcheck);
1059 RETURN(PTR_ERR(env));
1061 args = vvp_env_args(env, IO_NORMAL);
1062 args->u.normal.via_iov = (struct iovec *)iov;
1063 args->u.normal.via_nrsegs = nr_segs;
1065 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1066 cl_env_put(env, &refcheck);
1070 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1074 struct iovec *local_iov;
1079 env = cl_env_get(&refcheck);
1081 RETURN(PTR_ERR(env));
1083 local_iov = &vvp_env_info(env)->vti_local_iov;
1084 local_iov->iov_base = (void __user *)buf;
1085 local_iov->iov_len = count;
1087 result = ll_file_writev(file, local_iov, 1, ppos);
1088 cl_env_put(env, &refcheck);
1092 #else /* AIO stuff */
1093 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1094 unsigned long nr_segs, loff_t pos)
1097 struct vvp_io_args *args;
1103 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1107 env = cl_env_get(&refcheck);
1109 RETURN(PTR_ERR(env));
1111 args = vvp_env_args(env, IO_NORMAL);
1112 args->u.normal.via_iov = (struct iovec *)iov;
1113 args->u.normal.via_nrsegs = nr_segs;
1114 args->u.normal.via_iocb = iocb;
1116 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1117 &iocb->ki_pos, count);
1118 cl_env_put(env, &refcheck);
1122 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1126 struct iovec *local_iov;
1127 struct kiocb *kiocb;
1132 env = cl_env_get(&refcheck);
1134 RETURN(PTR_ERR(env));
1136 local_iov = &vvp_env_info(env)->vti_local_iov;
1137 kiocb = &vvp_env_info(env)->vti_kiocb;
1138 local_iov->iov_base = (void __user *)buf;
1139 local_iov->iov_len = count;
1140 init_sync_kiocb(kiocb, file);
1141 kiocb->ki_pos = *ppos;
1142 kiocb->ki_left = count;
1144 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1145 *ppos = kiocb->ki_pos;
1147 cl_env_put(env, &refcheck);
1153 #ifdef HAVE_KERNEL_SENDFILE
1155 * Send file content (through pagecache) somewhere with helper
1157 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1158 read_actor_t actor, void *target)
1161 struct vvp_io_args *args;
1166 env = cl_env_get(&refcheck);
1168 RETURN(PTR_ERR(env));
1170 args = vvp_env_args(env, IO_SENDFILE);
1171 args->u.sendfile.via_target = target;
1172 args->u.sendfile.via_actor = actor;
1174 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1175 cl_env_put(env, &refcheck);
1180 #ifdef HAVE_KERNEL_SPLICE_READ
1182 * Send file content (through pagecache) somewhere with helper
1184 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1185 struct pipe_inode_info *pipe, size_t count,
1189 struct vvp_io_args *args;
1194 env = cl_env_get(&refcheck);
1196 RETURN(PTR_ERR(env));
1198 args = vvp_env_args(env, IO_SPLICE);
1199 args->u.splice.via_pipe = pipe;
1200 args->u.splice.via_flags = flags;
1202 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1203 cl_env_put(env, &refcheck);
1208 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1211 struct obd_export *exp = ll_i2dtexp(inode);
1212 struct ll_recreate_obj ucreatp;
1213 struct obd_trans_info oti = { 0 };
1214 struct obdo *oa = NULL;
1217 struct lov_stripe_md *lsm, *lsm2;
1220 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1223 if (cfs_copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1224 sizeof(struct ll_recreate_obj)))
1231 ll_inode_size_lock(inode, 0);
1232 lsm = ll_i2info(inode)->lli_smd;
1234 GOTO(out, rc = -ENOENT);
1235 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1236 (lsm->lsm_stripe_count));
1238 OBD_ALLOC(lsm2, lsm_size);
1240 GOTO(out, rc = -ENOMEM);
1242 oa->o_id = ucreatp.lrc_id;
1243 oa->o_seq = ucreatp.lrc_seq;
1244 oa->o_nlink = ucreatp.lrc_ost_idx;
1245 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1246 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1247 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
1248 OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1249 memcpy(lsm2, lsm, lsm_size);
1250 rc = obd_create(exp, oa, &lsm2, &oti);
1252 OBD_FREE(lsm2, lsm_size);
1255 ll_inode_size_unlock(inode, 0);
1260 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1261 int flags, struct lov_user_md *lum, int lum_size)
1263 struct lov_stripe_md *lsm;
1264 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1268 ll_inode_size_lock(inode, 0);
1269 lsm = ll_i2info(inode)->lli_smd;
1271 ll_inode_size_unlock(inode, 0);
1272 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1277 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1280 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1281 GOTO(out_req_free, rc = -ENOENT);
1282 rc = oit.d.lustre.it_status;
1284 GOTO(out_req_free, rc);
1286 ll_release_openhandle(file->f_dentry, &oit);
1289 ll_inode_size_unlock(inode, 0);
1290 ll_intent_release(&oit);
1293 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1297 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1298 struct lov_mds_md **lmmp, int *lmm_size,
1299 struct ptlrpc_request **request)
1301 struct ll_sb_info *sbi = ll_i2sbi(inode);
1302 struct mdt_body *body;
1303 struct lov_mds_md *lmm = NULL;
1304 struct ptlrpc_request *req = NULL;
1305 struct md_op_data *op_data;
1308 rc = ll_get_max_mdsize(sbi, &lmmsize);
1312 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename,
1313 strlen(filename), lmmsize,
1314 LUSTRE_OPC_ANY, NULL);
1315 if (op_data == NULL)
1318 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
1319 rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
1320 ll_finish_md_op_data(op_data);
1322 CDEBUG(D_INFO, "md_getattr_name failed "
1323 "on %s: rc %d\n", filename, rc);
1327 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1328 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1330 lmmsize = body->eadatasize;
1332 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1334 GOTO(out, rc = -ENODATA);
1337 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1338 LASSERT(lmm != NULL);
1340 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1341 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1342 GOTO(out, rc = -EPROTO);
1346 * This is coming from the MDS, so is probably in
1347 * little endian. We convert it to host endian before
1348 * passing it to userspace.
1350 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1351 /* if function called for directory - we should
1352 * avoid swab not existent lsm objects */
1353 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1354 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1355 if (S_ISREG(body->mode))
1356 lustre_swab_lov_user_md_objects(
1357 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1358 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1359 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1360 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1361 if (S_ISREG(body->mode))
1362 lustre_swab_lov_user_md_objects(
1363 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1364 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1370 *lmm_size = lmmsize;
1375 static int ll_lov_setea(struct inode *inode, struct file *file,
1378 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1379 struct lov_user_md *lump;
1380 int lum_size = sizeof(struct lov_user_md) +
1381 sizeof(struct lov_user_ost_data);
1385 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1388 OBD_ALLOC(lump, lum_size);
1392 if (cfs_copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1393 OBD_FREE(lump, lum_size);
1397 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1399 OBD_FREE(lump, lum_size);
1403 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1406 struct lov_user_md_v3 lumv3;
1407 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1408 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1409 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1412 int flags = FMODE_WRITE;
1415 /* first try with v1 which is smaller than v3 */
1416 lum_size = sizeof(struct lov_user_md_v1);
1417 if (cfs_copy_from_user(lumv1, lumv1p, lum_size))
1420 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1421 lum_size = sizeof(struct lov_user_md_v3);
1422 if (cfs_copy_from_user(&lumv3, lumv3p, lum_size))
1426 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1428 put_user(0, &lumv1p->lmm_stripe_count);
1429 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1430 0, ll_i2info(inode)->lli_smd,
1436 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1438 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1443 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1447 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1449 struct ll_inode_info *lli = ll_i2info(inode);
1450 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1451 struct ccc_grouplock grouplock;
1455 if (ll_file_nolock(file))
1456 RETURN(-EOPNOTSUPP);
1458 cfs_spin_lock(&lli->lli_lock);
1459 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1460 CWARN("group lock already existed with gid %lu\n",
1461 fd->fd_grouplock.cg_gid);
1462 cfs_spin_unlock(&lli->lli_lock);
1465 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1466 cfs_spin_unlock(&lli->lli_lock);
1468 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1469 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1473 cfs_spin_lock(&lli->lli_lock);
1474 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1475 cfs_spin_unlock(&lli->lli_lock);
1476 CERROR("another thread just won the race\n");
1477 cl_put_grouplock(&grouplock);
1481 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1482 fd->fd_grouplock = grouplock;
1483 cfs_spin_unlock(&lli->lli_lock);
1485 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1489 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1491 struct ll_inode_info *lli = ll_i2info(inode);
1492 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1493 struct ccc_grouplock grouplock;
1496 cfs_spin_lock(&lli->lli_lock);
1497 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1498 cfs_spin_unlock(&lli->lli_lock);
1499 CWARN("no group lock held\n");
1502 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1504 if (fd->fd_grouplock.cg_gid != arg) {
1505 CWARN("group lock %lu doesn't match current id %lu\n",
1506 arg, fd->fd_grouplock.cg_gid);
1507 cfs_spin_unlock(&lli->lli_lock);
1511 grouplock = fd->fd_grouplock;
1512 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1513 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1514 cfs_spin_unlock(&lli->lli_lock);
1516 cl_put_grouplock(&grouplock);
1517 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1522 * Close inode open handle
1524 * \param dentry [in] dentry which contains the inode
1525 * \param it [in,out] intent which contains open info and result
1528 * \retval <0 failure
1530 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1532 struct inode *inode = dentry->d_inode;
1533 struct obd_client_handle *och;
1539 /* Root ? Do nothing. */
1540 if (dentry->d_inode->i_sb->s_root == dentry)
1543 /* No open handle to close? Move away */
1544 if (!it_disposition(it, DISP_OPEN_OPEN))
1547 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1549 OBD_ALLOC(och, sizeof(*och));
1551 GOTO(out, rc = -ENOMEM);
1553 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1554 ll_i2info(inode), it, och);
1556 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1559 /* this one is in place of ll_file_open */
1560 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1561 ptlrpc_req_finished(it->d.lustre.it_data);
1562 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1567 * Get size for inode for which FIEMAP mapping is requested.
1568 * Make the FIEMAP get_info call and returns the result.
1570 int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1573 struct obd_export *exp = ll_i2dtexp(inode);
1574 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1575 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1576 int vallen = num_bytes;
1580 /* Checks for fiemap flags */
1581 if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1582 fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1586 /* Check for FIEMAP_FLAG_SYNC */
1587 if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) {
1588 rc = filemap_fdatawrite(inode->i_mapping);
1593 /* If the stripe_count > 1 and the application does not understand
1594 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1596 if (lsm->lsm_stripe_count > 1 &&
1597 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1600 fm_key.oa.o_id = lsm->lsm_object_id;
1601 fm_key.oa.o_seq = lsm->lsm_object_seq;
1602 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1604 obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
1606 /* If filesize is 0, then there would be no objects for mapping */
1607 if (fm_key.oa.o_size == 0) {
1608 fiemap->fm_mapped_extents = 0;
1612 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1614 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1616 CERROR("obd_get_info failed: rc = %d\n", rc);
1621 int ll_fid2path(struct obd_export *exp, void *arg)
1623 struct getinfo_fid2path *gfout, *gfin;
1627 /* Need to get the buflen */
1628 OBD_ALLOC_PTR(gfin);
1631 if (cfs_copy_from_user(gfin, arg, sizeof(*gfin))) {
1636 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1637 OBD_ALLOC(gfout, outsize);
1638 if (gfout == NULL) {
1642 memcpy(gfout, gfin, sizeof(*gfout));
1645 /* Call mdc_iocontrol */
1646 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1649 if (cfs_copy_to_user(arg, gfout, outsize))
1653 OBD_FREE(gfout, outsize);
1657 static int ll_ioctl_fiemap(struct inode *inode, unsigned long arg)
1659 struct ll_user_fiemap *fiemap_s;
1660 size_t num_bytes, ret_bytes;
1661 unsigned int extent_count;
1664 /* Get the extent count so we can calculate the size of
1665 * required fiemap buffer */
1666 if (get_user(extent_count,
1667 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1669 num_bytes = sizeof(*fiemap_s) + (extent_count *
1670 sizeof(struct ll_fiemap_extent));
1672 OBD_VMALLOC(fiemap_s, num_bytes);
1673 if (fiemap_s == NULL)
1676 /* get the fiemap value */
1677 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1679 GOTO(error, rc = -EFAULT);
1681 /* If fm_extent_count is non-zero, read the first extent since
1682 * it is used to calculate end_offset and device from previous
1685 if (copy_from_user(&fiemap_s->fm_extents[0],
1686 (char __user *)arg + sizeof(*fiemap_s),
1687 sizeof(struct ll_fiemap_extent)))
1688 GOTO(error, rc = -EFAULT);
1691 rc = ll_do_fiemap(inode, fiemap_s, num_bytes);
1695 ret_bytes = sizeof(struct ll_user_fiemap);
1697 if (extent_count != 0)
1698 ret_bytes += (fiemap_s->fm_mapped_extents *
1699 sizeof(struct ll_fiemap_extent));
1701 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1705 OBD_VFREE(fiemap_s, num_bytes);
1709 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1712 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1716 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1717 inode->i_generation, inode, cmd);
1718 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1720 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1721 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1725 case LL_IOC_GETFLAGS:
1726 /* Get the current value of the file flags */
1727 return put_user(fd->fd_flags, (int *)arg);
1728 case LL_IOC_SETFLAGS:
1729 case LL_IOC_CLRFLAGS:
1730 /* Set or clear specific file flags */
1731 /* XXX This probably needs checks to ensure the flags are
1732 * not abused, and to handle any flag side effects.
1734 if (get_user(flags, (int *) arg))
1737 if (cmd == LL_IOC_SETFLAGS) {
1738 if ((flags & LL_FILE_IGNORE_LOCK) &&
1739 !(file->f_flags & O_DIRECT)) {
1740 CERROR("%s: unable to disable locking on "
1741 "non-O_DIRECT file\n", current->comm);
1745 fd->fd_flags |= flags;
1747 fd->fd_flags &= ~flags;
1750 case LL_IOC_LOV_SETSTRIPE:
1751 RETURN(ll_lov_setstripe(inode, file, arg));
1752 case LL_IOC_LOV_SETEA:
1753 RETURN(ll_lov_setea(inode, file, arg));
1754 case LL_IOC_LOV_GETSTRIPE:
1755 RETURN(ll_lov_getstripe(inode, arg));
1756 case LL_IOC_RECREATE_OBJ:
1757 RETURN(ll_lov_recreate_obj(inode, file, arg));
1758 case FSFILT_IOC_FIEMAP:
1759 RETURN(ll_ioctl_fiemap(inode, arg));
1760 case FSFILT_IOC_GETFLAGS:
1761 case FSFILT_IOC_SETFLAGS:
1762 RETURN(ll_iocontrol(inode, file, cmd, arg));
1763 case FSFILT_IOC_GETVERSION_OLD:
1764 case FSFILT_IOC_GETVERSION:
1765 RETURN(put_user(inode->i_generation, (int *)arg));
1766 case LL_IOC_GROUP_LOCK:
1767 RETURN(ll_get_grouplock(inode, file, arg));
1768 case LL_IOC_GROUP_UNLOCK:
1769 RETURN(ll_put_grouplock(inode, file, arg));
1770 case IOC_OBD_STATFS:
1771 RETURN(ll_obd_statfs(inode, (void *)arg));
1773 /* We need to special case any other ioctls we want to handle,
1774 * to send them to the MDS/OST as appropriate and to properly
1775 * network encode the arg field.
1776 case FSFILT_IOC_SETVERSION_OLD:
1777 case FSFILT_IOC_SETVERSION:
1779 case LL_IOC_FLUSHCTX:
1780 RETURN(ll_flush_ctx(inode));
1781 case LL_IOC_PATH2FID: {
1782 if (cfs_copy_to_user((void *)arg, ll_inode2fid(inode),
1783 sizeof(struct lu_fid)))
1788 case OBD_IOC_FID2PATH:
1789 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1791 case LL_IOC_GET_MDTIDX: {
1794 mdtidx = ll_get_mdt_idx(inode);
1798 if (put_user((int)mdtidx, (int*)arg))
1808 ll_iocontrol_call(inode, file, cmd, arg, &err))
1811 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1817 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1819 struct inode *inode = file->f_dentry->d_inode;
1822 retval = offset + ((origin == 2) ? i_size_read(inode) :
1823 (origin == 1) ? file->f_pos : 0);
1824 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1825 inode->i_ino, inode->i_generation, inode, retval, retval,
1826 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1827 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1829 if (origin == 2) { /* SEEK_END */
1830 int nonblock = 0, rc;
1832 if (file->f_flags & O_NONBLOCK)
1833 nonblock = LDLM_FL_BLOCK_NOWAIT;
1835 rc = cl_glimpse_size(inode);
1839 offset += i_size_read(inode);
1840 } else if (origin == 1) { /* SEEK_CUR */
1841 offset += file->f_pos;
1845 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1846 if (offset != file->f_pos) {
1847 file->f_pos = offset;
1855 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1857 struct inode *inode = dentry->d_inode;
1858 struct ll_inode_info *lli = ll_i2info(inode);
1859 struct lov_stripe_md *lsm = lli->lli_smd;
1860 struct ptlrpc_request *req;
1861 struct obd_capa *oc;
1864 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1865 inode->i_generation, inode);
1866 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1868 /* fsync's caller has already called _fdata{sync,write}, we want
1869 * that IO to finish before calling the osc and mdc sync methods */
1870 rc = filemap_fdatawait(inode->i_mapping);
1872 /* catch async errors that were recorded back when async writeback
1873 * failed for pages in this mapping. */
1874 err = lli->lli_async_rc;
1875 lli->lli_async_rc = 0;
1879 err = lov_test_and_clear_async_rc(lsm);
1884 oc = ll_mdscapa_get(inode);
1885 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1891 ptlrpc_req_finished(req);
1898 RETURN(rc ? rc : -ENOMEM);
1900 oa->o_id = lsm->lsm_object_id;
1901 oa->o_seq = lsm->lsm_object_seq;
1902 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1903 obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid,
1904 OBD_MD_FLTYPE | OBD_MD_FLATIME |
1905 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1908 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1909 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1910 0, OBD_OBJECT_EOF, oc);
1920 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1922 struct inode *inode = file->f_dentry->d_inode;
1923 struct ll_sb_info *sbi = ll_i2sbi(inode);
1924 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1925 .ei_cb_cp =ldlm_flock_completion_ast,
1926 .ei_cbdata = file_lock };
1927 struct md_op_data *op_data;
1928 struct lustre_handle lockh = {0};
1929 ldlm_policy_data_t flock;
1934 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1935 inode->i_ino, file_lock);
1937 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1939 if (file_lock->fl_flags & FL_FLOCK) {
1940 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1941 /* set missing params for flock() calls */
1942 file_lock->fl_end = OFFSET_MAX;
1943 file_lock->fl_pid = current->tgid;
1945 flock.l_flock.pid = file_lock->fl_pid;
1946 flock.l_flock.start = file_lock->fl_start;
1947 flock.l_flock.end = file_lock->fl_end;
1949 switch (file_lock->fl_type) {
1951 einfo.ei_mode = LCK_PR;
1954 /* An unlock request may or may not have any relation to
1955 * existing locks so we may not be able to pass a lock handle
1956 * via a normal ldlm_lock_cancel() request. The request may even
1957 * unlock a byte range in the middle of an existing lock. In
1958 * order to process an unlock request we need all of the same
1959 * information that is given with a normal read or write record
1960 * lock request. To avoid creating another ldlm unlock (cancel)
1961 * message we'll treat a LCK_NL flock request as an unlock. */
1962 einfo.ei_mode = LCK_NL;
1965 einfo.ei_mode = LCK_PW;
1968 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1983 flags = LDLM_FL_BLOCK_NOWAIT;
1989 flags = LDLM_FL_TEST_LOCK;
1990 /* Save the old mode so that if the mode in the lock changes we
1991 * can decrement the appropriate reader or writer refcount. */
1992 file_lock->fl_type = einfo.ei_mode;
1995 CERROR("unknown fcntl lock command: %d\n", cmd);
1999 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2000 LUSTRE_OPC_ANY, NULL);
2001 if (IS_ERR(op_data))
2002 RETURN(PTR_ERR(op_data));
2004 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2005 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2006 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2008 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2009 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2011 ll_finish_md_op_data(op_data);
2013 if ((file_lock->fl_flags & FL_FLOCK) &&
2014 (rc == 0 || file_lock->fl_type == F_UNLCK))
2015 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2016 #ifdef HAVE_F_OP_FLOCK
2017 if ((file_lock->fl_flags & FL_POSIX) &&
2018 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2019 !(flags & LDLM_FL_TEST_LOCK))
2020 posix_lock_file_wait(file, file_lock);
2026 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2033 int ll_have_md_lock(struct inode *inode, __u64 bits)
2035 struct lustre_handle lockh;
2036 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2044 fid = &ll_i2info(inode)->lli_fid;
2045 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2047 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2048 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2049 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2055 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2056 struct lustre_handle *lockh)
2058 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2064 fid = &ll_i2info(inode)->lli_fid;
2065 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2067 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2068 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2069 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2073 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2074 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2075 * and return success */
2077 /* This path cannot be hit for regular files unless in
2078 * case of obscure races, so no need to to validate
2080 if (!S_ISREG(inode->i_mode) &&
2081 !S_ISDIR(inode->i_mode))
2086 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2094 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2097 struct inode *inode = dentry->d_inode;
2098 struct ptlrpc_request *req = NULL;
2099 struct ll_sb_info *sbi;
2100 struct obd_export *exp;
2105 CERROR("REPORT THIS LINE TO PETER\n");
2108 sbi = ll_i2sbi(inode);
2110 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2111 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2113 exp = ll_i2mdexp(inode);
2115 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2116 struct lookup_intent oit = { .it_op = IT_GETATTR };
2117 struct md_op_data *op_data;
2119 /* Call getattr by fid, so do not provide name at all. */
2120 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2121 dentry->d_inode, NULL, 0, 0,
2122 LUSTRE_OPC_ANY, NULL);
2123 if (IS_ERR(op_data))
2124 RETURN(PTR_ERR(op_data));
2126 oit.it_create_mode |= M_CHECK_STALE;
2127 rc = md_intent_lock(exp, op_data, NULL, 0,
2128 /* we are not interested in name
2131 ll_md_blocking_ast, 0);
2132 ll_finish_md_op_data(op_data);
2133 oit.it_create_mode &= ~M_CHECK_STALE;
2135 rc = ll_inode_revalidate_fini(inode, rc);
2139 rc = ll_revalidate_it_finish(req, &oit, dentry);
2141 ll_intent_release(&oit);
2145 /* Unlinked? Unhash dentry, so it is not picked up later by
2146 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2147 here to preserve get_cwd functionality on 2.6.
2149 if (!dentry->d_inode->i_nlink) {
2150 cfs_spin_lock(&ll_lookup_lock);
2151 spin_lock(&dcache_lock);
2152 ll_drop_dentry(dentry);
2153 spin_unlock(&dcache_lock);
2154 cfs_spin_unlock(&ll_lookup_lock);
2157 ll_lookup_finish_locks(&oit, dentry);
2158 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2159 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2160 obd_valid valid = OBD_MD_FLGETATTR;
2161 struct md_op_data *op_data;
2164 if (S_ISREG(inode->i_mode)) {
2165 rc = ll_get_max_mdsize(sbi, &ealen);
2168 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2171 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2172 0, ealen, LUSTRE_OPC_ANY,
2174 if (op_data == NULL)
2177 op_data->op_valid = valid;
2178 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2179 * capa for this inode. Because we only keep capas of dirs
2181 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2182 ll_finish_md_op_data(op_data);
2184 rc = ll_inode_revalidate_fini(inode, rc);
2188 rc = ll_prep_inode(&inode, req, NULL);
2191 ptlrpc_req_finished(req);
2195 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2197 struct inode *inode = dentry->d_inode;
2201 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2202 MDS_INODELOCK_LOOKUP);
2204 /* if object not yet allocated, don't validate size */
2205 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
2206 LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
2207 LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
2208 LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
2212 /* cl_glimpse_size will prefer locally cached writes if they extend
2216 rc = cl_glimpse_size(inode);
2221 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2222 struct lookup_intent *it, struct kstat *stat)
2224 struct inode *inode = de->d_inode;
2225 struct ll_inode_info *lli = ll_i2info(inode);
2228 res = ll_inode_revalidate_it(de, it);
2229 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2234 stat->dev = inode->i_sb->s_dev;
2235 if (cfs_curproc_is_32bit())
2236 stat->ino = cl_fid_build_ino32(&lli->lli_fid);
2238 stat->ino = inode->i_ino;
2240 stat->mode = inode->i_mode;
2241 stat->nlink = inode->i_nlink;
2242 stat->uid = inode->i_uid;
2243 stat->gid = inode->i_gid;
2244 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2245 stat->atime = inode->i_atime;
2246 stat->mtime = inode->i_mtime;
2247 stat->ctime = inode->i_ctime;
2248 #ifdef HAVE_INODE_BLKSIZE
2249 stat->blksize = inode->i_blksize;
2251 stat->blksize = 1 << inode->i_blkbits;
2254 stat->size = i_size_read(inode);
2255 stat->blocks = inode->i_blocks;
2259 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2261 struct lookup_intent it = { .it_op = IT_GETATTR };
2263 return ll_getattr_it(mnt, de, &it, stat);
2266 #ifdef HAVE_LINUX_FIEMAP_H
2267 int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2268 __u64 start, __u64 len)
2272 struct ll_user_fiemap *fiemap;
2273 unsigned int extent_count = fieinfo->fi_extents_max;
2275 num_bytes = sizeof(*fiemap) + (extent_count *
2276 sizeof(struct ll_fiemap_extent));
2277 OBD_VMALLOC(fiemap, num_bytes);
2282 fiemap->fm_flags = fieinfo->fi_flags;
2283 fiemap->fm_extent_count = fieinfo->fi_extents_max;
2284 fiemap->fm_start = start;
2285 fiemap->fm_length = len;
2286 memcpy(&fiemap->fm_extents[0], fieinfo->fi_extents_start,
2287 sizeof(struct ll_fiemap_extent));
2289 rc = ll_do_fiemap(inode, fiemap, num_bytes);
2291 fieinfo->fi_flags = fiemap->fm_flags;
2292 fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents;
2293 memcpy(fieinfo->fi_extents_start, &fiemap->fm_extents[0],
2294 fiemap->fm_mapped_extents * sizeof(struct ll_fiemap_extent));
2296 OBD_VFREE(fiemap, num_bytes);
2303 int lustre_check_acl(struct inode *inode, int mask)
2305 #ifdef CONFIG_FS_POSIX_ACL
2306 struct ll_inode_info *lli = ll_i2info(inode);
2307 struct posix_acl *acl;
2311 cfs_spin_lock(&lli->lli_lock);
2312 acl = posix_acl_dup(lli->lli_posix_acl);
2313 cfs_spin_unlock(&lli->lli_lock);
2318 rc = posix_acl_permission(inode, acl, mask);
2319 posix_acl_release(acl);
2327 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2328 #ifndef HAVE_INODE_PERMISION_2ARGS
2329 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2331 int ll_inode_permission(struct inode *inode, int mask)
2337 /* as root inode are NOT getting validated in lookup operation,
2338 * need to do it before permission check. */
2340 if (inode == inode->i_sb->s_root->d_inode) {
2341 struct lookup_intent it = { .it_op = IT_LOOKUP };
2343 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2344 MDS_INODELOCK_LOOKUP);
2349 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2350 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2352 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2353 return lustre_check_remote_perm(inode, mask);
2355 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2356 rc = generic_permission(inode, mask, lustre_check_acl);
2361 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2363 int mode = inode->i_mode;
2366 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2367 inode->i_ino, inode->i_generation, inode, mask);
2369 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2370 return lustre_check_remote_perm(inode, mask);
2372 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2374 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2375 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2377 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2379 if (current->fsuid == inode->i_uid) {
2382 if (((mode >> 3) & mask & S_IRWXO) != mask)
2384 rc = lustre_check_acl(inode, mask);
2388 goto check_capabilities;
2392 if (cfs_curproc_is_in_groups(inode->i_gid))
2395 if ((mode & mask & S_IRWXO) == mask)
2399 if (!(mask & MAY_EXEC) ||
2400 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2401 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2404 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2405 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2412 #ifdef HAVE_FILE_READV
2413 #define READ_METHOD readv
2414 #define READ_FUNCTION ll_file_readv
2415 #define WRITE_METHOD writev
2416 #define WRITE_FUNCTION ll_file_writev
2418 #define READ_METHOD aio_read
2419 #define READ_FUNCTION ll_file_aio_read
2420 #define WRITE_METHOD aio_write
2421 #define WRITE_FUNCTION ll_file_aio_write
2424 /* -o localflock - only provides locally consistent flock locks */
2425 struct file_operations ll_file_operations = {
2426 .read = ll_file_read,
2427 .READ_METHOD = READ_FUNCTION,
2428 .write = ll_file_write,
2429 .WRITE_METHOD = WRITE_FUNCTION,
2430 .ioctl = ll_file_ioctl,
2431 .open = ll_file_open,
2432 .release = ll_file_release,
2433 .mmap = ll_file_mmap,
2434 .llseek = ll_file_seek,
2435 #ifdef HAVE_KERNEL_SENDFILE
2436 .sendfile = ll_file_sendfile,
2438 #ifdef HAVE_KERNEL_SPLICE_READ
2439 .splice_read = ll_file_splice_read,
2444 struct file_operations ll_file_operations_flock = {
2445 .read = ll_file_read,
2446 .READ_METHOD = READ_FUNCTION,
2447 .write = ll_file_write,
2448 .WRITE_METHOD = WRITE_FUNCTION,
2449 .ioctl = ll_file_ioctl,
2450 .open = ll_file_open,
2451 .release = ll_file_release,
2452 .mmap = ll_file_mmap,
2453 .llseek = ll_file_seek,
2454 #ifdef HAVE_KERNEL_SENDFILE
2455 .sendfile = ll_file_sendfile,
2457 #ifdef HAVE_KERNEL_SPLICE_READ
2458 .splice_read = ll_file_splice_read,
2461 #ifdef HAVE_F_OP_FLOCK
2462 .flock = ll_file_flock,
2464 .lock = ll_file_flock
2467 /* These are for -o noflock - to return ENOSYS on flock calls */
2468 struct file_operations ll_file_operations_noflock = {
2469 .read = ll_file_read,
2470 .READ_METHOD = READ_FUNCTION,
2471 .write = ll_file_write,
2472 .WRITE_METHOD = WRITE_FUNCTION,
2473 .ioctl = ll_file_ioctl,
2474 .open = ll_file_open,
2475 .release = ll_file_release,
2476 .mmap = ll_file_mmap,
2477 .llseek = ll_file_seek,
2478 #ifdef HAVE_KERNEL_SENDFILE
2479 .sendfile = ll_file_sendfile,
2481 #ifdef HAVE_KERNEL_SPLICE_READ
2482 .splice_read = ll_file_splice_read,
2485 #ifdef HAVE_F_OP_FLOCK
2486 .flock = ll_file_noflock,
2488 .lock = ll_file_noflock
2491 struct inode_operations ll_file_inode_operations = {
2492 #ifdef HAVE_VFS_INTENT_PATCHES
2493 .setattr_raw = ll_setattr_raw,
2495 .setattr = ll_setattr,
2496 .truncate = ll_truncate,
2497 .getattr = ll_getattr,
2498 .permission = ll_inode_permission,
2499 .setxattr = ll_setxattr,
2500 .getxattr = ll_getxattr,
2501 .listxattr = ll_listxattr,
2502 .removexattr = ll_removexattr,
2503 #ifdef HAVE_LINUX_FIEMAP_H
2504 .fiemap = ll_fiemap,
2508 /* dynamic ioctl number support routins */
2509 static struct llioc_ctl_data {
2510 cfs_rw_semaphore_t ioc_sem;
2511 cfs_list_t ioc_head;
2513 __RWSEM_INITIALIZER(llioc.ioc_sem),
2514 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2519 cfs_list_t iocd_list;
2520 unsigned int iocd_size;
2521 llioc_callback_t iocd_cb;
2522 unsigned int iocd_count;
2523 unsigned int iocd_cmd[0];
2526 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2529 struct llioc_data *in_data = NULL;
2532 if (cb == NULL || cmd == NULL ||
2533 count > LLIOC_MAX_CMD || count < 0)
2536 size = sizeof(*in_data) + count * sizeof(unsigned int);
2537 OBD_ALLOC(in_data, size);
2538 if (in_data == NULL)
2541 memset(in_data, 0, sizeof(*in_data));
2542 in_data->iocd_size = size;
2543 in_data->iocd_cb = cb;
2544 in_data->iocd_count = count;
2545 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2547 cfs_down_write(&llioc.ioc_sem);
2548 cfs_list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2549 cfs_up_write(&llioc.ioc_sem);
2554 void ll_iocontrol_unregister(void *magic)
2556 struct llioc_data *tmp;
2561 cfs_down_write(&llioc.ioc_sem);
2562 cfs_list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2564 unsigned int size = tmp->iocd_size;
2566 cfs_list_del(&tmp->iocd_list);
2567 cfs_up_write(&llioc.ioc_sem);
2569 OBD_FREE(tmp, size);
2573 cfs_up_write(&llioc.ioc_sem);
2575 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2578 EXPORT_SYMBOL(ll_iocontrol_register);
2579 EXPORT_SYMBOL(ll_iocontrol_unregister);
2581 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2582 unsigned int cmd, unsigned long arg, int *rcp)
2584 enum llioc_iter ret = LLIOC_CONT;
2585 struct llioc_data *data;
2586 int rc = -EINVAL, i;
2588 cfs_down_read(&llioc.ioc_sem);
2589 cfs_list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2590 for (i = 0; i < data->iocd_count; i++) {
2591 if (cmd != data->iocd_cmd[i])
2594 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2598 if (ret == LLIOC_STOP)
2601 cfs_up_read(&llioc.ioc_sem);