1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(exp_connect_som(ll_i2mdexp(inode))) || !S_ISREG(inode->i_mode))
96 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
98 ll_epoch_close(inode, op_data, &och, 0);
101 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
105 static int ll_close_inode_openhandle(struct obd_export *md_exp,
107 struct obd_client_handle *och)
109 struct obd_export *exp = ll_i2mdexp(inode);
110 struct md_op_data *op_data;
111 struct ptlrpc_request *req = NULL;
112 struct obd_device *obd = class_exp2obd(exp);
119 * XXX: in case of LMV, is this correct to access
122 CERROR("Invalid MDC connection handle "LPX64"\n",
123 ll_i2mdexp(inode)->exp_handle.h_cookie);
128 * here we check if this is forced umount. If so this is called on
129 * canceling "open lock" and we do not call md_close() in this case, as
130 * it will not be successful, as import is already deactivated.
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_sizeonmds_update(inode, &och->och_fh,
148 op_data->op_ioepoch);
150 CERROR("inode %lu mdc Size-on-MDS update failed: "
151 "rc = %d\n", inode->i_ino, rc);
155 CERROR("inode %lu mdc close failed: rc = %d\n",
158 ll_finish_md_op_data(op_data);
161 rc = ll_objects_destroy(req, inode);
163 CERROR("inode %lu ll_objects destroy: rc = %d\n",
170 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
171 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
172 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
174 md_clear_open_replay_data(md_exp, och);
175 /* Free @och if it is not waiting for DONE_WRITING. */
176 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
179 if (req) /* This is close request */
180 ptlrpc_req_finished(req);
184 int ll_md_real_close(struct inode *inode, int flags)
186 struct ll_inode_info *lli = ll_i2info(inode);
187 struct obd_client_handle **och_p;
188 struct obd_client_handle *och;
193 if (flags & FMODE_WRITE) {
194 och_p = &lli->lli_mds_write_och;
195 och_usecount = &lli->lli_open_fd_write_count;
196 } else if (flags & FMODE_EXEC) {
197 och_p = &lli->lli_mds_exec_och;
198 och_usecount = &lli->lli_open_fd_exec_count;
200 LASSERT(flags & FMODE_READ);
201 och_p = &lli->lli_mds_read_och;
202 och_usecount = &lli->lli_open_fd_read_count;
205 down(&lli->lli_och_sem);
206 if (*och_usecount) { /* There are still users of this handle, so
208 up(&lli->lli_och_sem);
213 up(&lli->lli_och_sem);
215 if (och) { /* There might be a race and somebody have freed this och
217 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
224 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
227 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
228 struct ll_inode_info *lli = ll_i2info(inode);
232 /* clear group lock, if present */
233 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
234 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
236 /* Let's see if we have good enough OPEN lock on the file and if
237 we can skip talking to MDS */
238 if (file->f_dentry->d_inode) { /* Can this ever be false? */
240 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
241 struct lustre_handle lockh;
242 struct inode *inode = file->f_dentry->d_inode;
243 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
245 down(&lli->lli_och_sem);
246 if (fd->fd_omode & FMODE_WRITE) {
248 LASSERT(lli->lli_open_fd_write_count);
249 lli->lli_open_fd_write_count--;
250 } else if (fd->fd_omode & FMODE_EXEC) {
252 LASSERT(lli->lli_open_fd_exec_count);
253 lli->lli_open_fd_exec_count--;
256 LASSERT(lli->lli_open_fd_read_count);
257 lli->lli_open_fd_read_count--;
259 up(&lli->lli_och_sem);
261 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
262 LDLM_IBITS, &policy, lockmode,
264 rc = ll_md_real_close(file->f_dentry->d_inode,
268 CERROR("Releasing a file %p with negative dentry %p. Name %s",
269 file, file->f_dentry, file->f_dentry->d_name.name);
272 LUSTRE_FPRIVATE(file) = NULL;
273 ll_file_data_put(fd);
274 ll_capa_close(inode);
279 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
281 /* While this returns an error code, fput() the caller does not, so we need
282 * to make every effort to clean up all of our state here. Also, applications
283 * rarely check close errors and even if an error is returned they will not
284 * re-try the close call.
286 int ll_file_release(struct inode *inode, struct file *file)
288 struct ll_file_data *fd;
289 struct ll_sb_info *sbi = ll_i2sbi(inode);
290 struct ll_inode_info *lli = ll_i2info(inode);
291 struct lov_stripe_md *lsm = lli->lli_smd;
295 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
296 inode->i_generation, inode);
298 #ifdef CONFIG_FS_POSIX_ACL
299 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
300 inode == inode->i_sb->s_root->d_inode) {
301 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
304 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
305 fd->fd_flags &= ~LL_FILE_RMTACL;
306 rct_del(&sbi->ll_rct, cfs_curproc_pid());
307 et_search_free(&sbi->ll_et, cfs_curproc_pid());
312 if (inode->i_sb->s_root != file->f_dentry)
313 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
314 fd = LUSTRE_FPRIVATE(file);
317 /* The last ref on @file, maybe not the the owner pid of statahead.
318 * Different processes can open the same dir, "ll_opendir_key" means:
319 * it is me that should stop the statahead thread. */
320 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
321 ll_stop_statahead(inode, lli->lli_opendir_key);
323 if (inode->i_sb->s_root == file->f_dentry) {
324 LUSTRE_FPRIVATE(file) = NULL;
325 ll_file_data_put(fd);
330 lov_test_and_clear_async_rc(lsm);
331 lli->lli_async_rc = 0;
333 rc = ll_md_close(sbi->ll_md_exp, inode, file);
337 static int ll_intent_file_open(struct file *file, void *lmm,
338 int lmmsize, struct lookup_intent *itp)
340 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
341 struct dentry *parent = file->f_dentry->d_parent;
342 const char *name = file->f_dentry->d_name.name;
343 const int len = file->f_dentry->d_name.len;
344 struct md_op_data *op_data;
345 struct ptlrpc_request *req;
352 /* Usually we come here only for NFSD, and we want open lock.
353 But we can also get here with pre 2.6.15 patchless kernels, and in
354 that case that lock is also ok */
355 /* We can also get here if there was cached open handle in revalidate_it
356 * but it disappeared while we were getting from there to ll_file_open.
357 * But this means this file was closed and immediatelly opened which
358 * makes a good candidate for using OPEN lock */
359 /* If lmmsize & lmm are not 0, we are just setting stripe info
360 * parameters. No need for the open lock */
361 if (!lmm && !lmmsize)
362 itp->it_flags |= MDS_OPEN_LOCK;
364 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
365 file->f_dentry->d_inode, name, len,
366 O_RDWR, LUSTRE_OPC_ANY, NULL);
368 RETURN(PTR_ERR(op_data));
370 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
371 0 /*unused */, &req, ll_md_blocking_ast, 0);
372 ll_finish_md_op_data(op_data);
374 /* reason for keep own exit path - don`t flood log
375 * with messages with -ESTALE errors.
377 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
378 it_open_error(DISP_OPEN_OPEN, itp))
380 ll_release_openhandle(file->f_dentry, itp);
384 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
385 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
386 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
390 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
391 if (!rc && itp->d.lustre.it_lock_mode)
392 md_set_lock_data(sbi->ll_md_exp,
393 &itp->d.lustre.it_lock_handle,
394 file->f_dentry->d_inode, NULL);
397 ptlrpc_req_finished(itp->d.lustre.it_data);
398 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
399 ll_intent_drop_lock(itp);
404 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
406 if (ioepoch && lli->lli_ioepoch != ioepoch) {
407 lli->lli_ioepoch = ioepoch;
408 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
409 ioepoch, PFID(&lli->lli_fid));
413 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
414 struct lookup_intent *it, struct obd_client_handle *och)
416 struct ptlrpc_request *req = it->d.lustre.it_data;
417 struct mdt_body *body;
421 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
422 LASSERT(body != NULL); /* reply already checked out */
424 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
425 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
426 och->och_fid = lli->lli_fid;
427 och->och_flags = it->it_flags;
428 ll_ioepoch_open(lli, body->ioepoch);
430 return md_set_open_replay_data(md_exp, och, req);
433 int ll_local_open(struct file *file, struct lookup_intent *it,
434 struct ll_file_data *fd, struct obd_client_handle *och)
436 struct inode *inode = file->f_dentry->d_inode;
437 struct ll_inode_info *lli = ll_i2info(inode);
440 LASSERT(!LUSTRE_FPRIVATE(file));
445 struct ptlrpc_request *req = it->d.lustre.it_data;
446 struct mdt_body *body;
449 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
453 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
454 if ((it->it_flags & FMODE_WRITE) &&
455 (body->valid & OBD_MD_FLSIZE))
456 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
457 lli->lli_ioepoch, PFID(&lli->lli_fid));
460 LUSTRE_FPRIVATE(file) = fd;
461 ll_readahead_init(inode, &fd->fd_ras);
462 fd->fd_omode = it->it_flags;
466 /* Open a file, and (for the very first open) create objects on the OSTs at
467 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
468 * creation or open until ll_lov_setstripe() ioctl is called. We grab
469 * lli_open_sem to ensure no other process will create objects, send the
470 * stripe MD to the MDS, or try to destroy the objects if that fails.
472 * If we already have the stripe MD locally then we don't request it in
473 * md_open(), by passing a lmm_size = 0.
475 * It is up to the application to ensure no other processes open this file
476 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
477 * used. We might be able to avoid races of that sort by getting lli_open_sem
478 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
479 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
481 int ll_file_open(struct inode *inode, struct file *file)
483 struct ll_inode_info *lli = ll_i2info(inode);
484 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
485 .it_flags = file->f_flags };
486 struct lov_stripe_md *lsm;
487 struct ptlrpc_request *req = NULL;
488 struct obd_client_handle **och_p;
490 struct ll_file_data *fd;
491 int rc = 0, opendir_set = 0;
494 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
495 inode->i_generation, inode, file->f_flags);
497 #ifdef HAVE_VFS_INTENT_PATCHES
500 it = file->private_data; /* XXX: compat macro */
501 file->private_data = NULL; /* prevent ll_local_open assertion */
504 fd = ll_file_data_get();
509 if (S_ISDIR(inode->i_mode)) {
510 spin_lock(&lli->lli_lock);
511 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
512 LASSERT(lli->lli_sai == NULL);
513 lli->lli_opendir_key = fd;
514 lli->lli_opendir_pid = cfs_curproc_pid();
517 spin_unlock(&lli->lli_lock);
520 if (inode->i_sb->s_root == file->f_dentry) {
521 LUSTRE_FPRIVATE(file) = fd;
525 if (!it || !it->d.lustre.it_disposition) {
526 /* Convert f_flags into access mode. We cannot use file->f_mode,
527 * because everything but O_ACCMODE mask was stripped from
529 if ((oit.it_flags + 1) & O_ACCMODE)
531 if (file->f_flags & O_TRUNC)
532 oit.it_flags |= FMODE_WRITE;
534 /* kernel only call f_op->open in dentry_open. filp_open calls
535 * dentry_open after call to open_namei that checks permissions.
536 * Only nfsd_open call dentry_open directly without checking
537 * permissions and because of that this code below is safe. */
538 if (oit.it_flags & FMODE_WRITE)
539 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
541 /* We do not want O_EXCL here, presumably we opened the file
542 * already? XXX - NFS implications? */
543 oit.it_flags &= ~O_EXCL;
545 /* bug20584, if "it_flags" contains O_CREAT, the file will be
546 * created if necessary, then "IT_CREAT" should be set to keep
547 * consistent with it */
548 if (oit.it_flags & O_CREAT)
549 oit.it_op |= IT_CREAT;
555 /* Let's see if we have file open on MDS already. */
556 if (it->it_flags & FMODE_WRITE) {
557 och_p = &lli->lli_mds_write_och;
558 och_usecount = &lli->lli_open_fd_write_count;
559 } else if (it->it_flags & FMODE_EXEC) {
560 och_p = &lli->lli_mds_exec_och;
561 och_usecount = &lli->lli_open_fd_exec_count;
563 och_p = &lli->lli_mds_read_och;
564 och_usecount = &lli->lli_open_fd_read_count;
567 down(&lli->lli_och_sem);
568 if (*och_p) { /* Open handle is present */
569 if (it_disposition(it, DISP_OPEN_OPEN)) {
570 /* Well, there's extra open request that we do not need,
571 let's close it somehow. This will decref request. */
572 rc = it_open_error(DISP_OPEN_OPEN, it);
574 up(&lli->lli_och_sem);
575 ll_file_data_put(fd);
576 GOTO(out_openerr, rc);
578 ll_release_openhandle(file->f_dentry, it);
579 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
584 rc = ll_local_open(file, it, fd, NULL);
587 up(&lli->lli_och_sem);
588 ll_file_data_put(fd);
589 GOTO(out_openerr, rc);
592 LASSERT(*och_usecount == 0);
593 if (!it->d.lustre.it_disposition) {
594 /* We cannot just request lock handle now, new ELC code
595 means that one of other OPEN locks for this file
596 could be cancelled, and since blocking ast handler
597 would attempt to grab och_sem as well, that would
598 result in a deadlock */
599 up(&lli->lli_och_sem);
600 it->it_create_mode |= M_CHECK_STALE;
601 rc = ll_intent_file_open(file, NULL, 0, it);
602 it->it_create_mode &= ~M_CHECK_STALE;
604 ll_file_data_put(fd);
605 GOTO(out_openerr, rc);
608 /* Got some error? Release the request */
609 if (it->d.lustre.it_status < 0) {
610 req = it->d.lustre.it_data;
611 ptlrpc_req_finished(req);
615 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
617 ll_file_data_put(fd);
618 GOTO(out_och_free, rc = -ENOMEM);
621 req = it->d.lustre.it_data;
623 /* md_intent_lock() didn't get a request ref if there was an
624 * open error, so don't do cleanup on the request here
626 /* XXX (green): Should not we bail out on any error here, not
627 * just open error? */
628 rc = it_open_error(DISP_OPEN_OPEN, it);
630 ll_file_data_put(fd);
631 GOTO(out_och_free, rc);
634 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
635 rc = ll_local_open(file, it, fd, *och_p);
637 ll_file_data_put(fd);
638 GOTO(out_och_free, rc);
641 up(&lli->lli_och_sem);
643 /* Must do this outside lli_och_sem lock to prevent deadlock where
644 different kind of OPEN lock for this same inode gets cancelled
645 by ldlm_cancel_lru */
646 if (!S_ISREG(inode->i_mode))
653 if (file->f_flags & O_LOV_DELAY_CREATE ||
654 !(file->f_mode & FMODE_WRITE)) {
655 CDEBUG(D_INODE, "object creation was delayed\n");
659 file->f_flags &= ~O_LOV_DELAY_CREATE;
662 ptlrpc_req_finished(req);
664 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
668 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
669 *och_p = NULL; /* OBD_FREE writes some magic there */
672 up(&lli->lli_och_sem);
674 if (opendir_set != 0)
675 ll_stop_statahead(inode, lli->lli_opendir_key);
681 /* Fills the obdo with the attributes for the lsm */
682 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
683 struct obd_capa *capa, struct obdo *obdo)
685 struct ptlrpc_request_set *set;
686 struct obd_info oinfo = { { { 0 } } };
691 LASSERT(lsm != NULL);
695 oinfo.oi_oa->o_id = lsm->lsm_object_id;
696 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
697 oinfo.oi_oa->o_mode = S_IFREG;
698 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
699 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
700 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
701 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
703 oinfo.oi_capa = capa;
705 set = ptlrpc_prep_set();
707 CERROR("can't allocate ptlrpc set\n");
710 rc = obd_getattr_async(exp, &oinfo, set);
712 rc = ptlrpc_set_wait(set);
713 ptlrpc_set_destroy(set);
716 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
717 OBD_MD_FLATIME | OBD_MD_FLMTIME |
718 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
722 /* Fills the obdo with the attributes for the inode defined by lsm */
723 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
725 struct ll_inode_info *lli = ll_i2info(inode);
726 struct obd_capa *capa = ll_mdscapa_get(inode);
730 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
733 obdo_refresh_inode(inode, obdo, obdo->o_valid);
735 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
736 lli->lli_smd->lsm_object_id, i_size_read(inode),
737 (unsigned long long)inode->i_blocks,
738 (unsigned long)ll_inode_blksize(inode));
743 int ll_merge_lvb(struct inode *inode)
745 struct ll_inode_info *lli = ll_i2info(inode);
746 struct ll_sb_info *sbi = ll_i2sbi(inode);
752 ll_inode_size_lock(inode, 1);
753 inode_init_lvb(inode, &lvb);
754 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
755 i_size_write(inode, lvb.lvb_size);
756 inode->i_blocks = lvb.lvb_blocks;
758 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
759 LTIME_S(inode->i_atime) = lvb.lvb_atime;
760 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
761 ll_inode_size_unlock(inode, 1);
766 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
769 struct obdo obdo = { 0 };
772 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
774 st->st_size = obdo.o_size;
775 st->st_blocks = obdo.o_blocks;
776 st->st_mtime = obdo.o_mtime;
777 st->st_atime = obdo.o_atime;
778 st->st_ctime = obdo.o_ctime;
783 void ll_io_init(struct cl_io *io, const struct file *file, int write)
785 struct inode *inode = file->f_dentry->d_inode;
787 memset(io, 0, sizeof *io);
788 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
790 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
791 io->ci_obj = ll_i2info(inode)->lli_clob;
792 io->ci_lockreq = CILR_MAYBE;
793 if (ll_file_nolock(file)) {
794 io->ci_lockreq = CILR_NEVER;
795 io->ci_no_srvlock = 1;
796 } else if (file->f_flags & O_APPEND) {
797 io->ci_lockreq = CILR_MANDATORY;
801 static ssize_t ll_file_io_generic(const struct lu_env *env,
802 struct vvp_io_args *args, struct file *file,
803 enum cl_io_type iot, loff_t *ppos, size_t count)
809 io = &ccc_env_info(env)->cti_io;
810 ll_io_init(io, file, iot == CIT_WRITE);
812 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
813 struct vvp_io *vio = vvp_env_io(env);
814 struct ccc_io *cio = ccc_env_io(env);
815 struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
816 int write_sem_locked = 0;
818 cio->cui_fd = LUSTRE_FPRIVATE(file);
819 vio->cui_io_subtype = args->via_io_subtype;
821 switch (vio->cui_io_subtype) {
823 cio->cui_iov = args->u.normal.via_iov;
824 cio->cui_nrsegs = args->u.normal.via_nrsegs;
825 #ifndef HAVE_FILE_WRITEV
826 cio->cui_iocb = args->u.normal.via_iocb;
828 if ((iot == CIT_WRITE) &&
829 !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
830 down(&lli->lli_write_sem);
831 write_sem_locked = 1;
835 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
836 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
839 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
840 vio->u.splice.cui_flags = args->u.splice.via_flags;
843 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
846 result = cl_io_loop(env, io);
847 if (write_sem_locked)
848 up(&lli->lli_write_sem);
850 /* cl_io_rw_init() handled IO */
851 result = io->ci_result;
854 if (io->ci_nob > 0) {
856 *ppos = io->u.ci_wr.wr.crw_pos;
864 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
866 static int ll_file_get_iov_count(const struct iovec *iov,
867 unsigned long *nr_segs, size_t *count)
872 for (seg = 0; seg < *nr_segs; seg++) {
873 const struct iovec *iv = &iov[seg];
876 * If any segment has a negative length, or the cumulative
877 * length ever wraps negative then return -EINVAL.
880 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
882 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
887 cnt -= iv->iov_len; /* This segment is no good */
894 #ifdef HAVE_FILE_READV
895 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
896 unsigned long nr_segs, loff_t *ppos)
899 struct vvp_io_args *args;
905 result = ll_file_get_iov_count(iov, &nr_segs, &count);
909 env = cl_env_get(&refcheck);
911 RETURN(PTR_ERR(env));
913 args = vvp_env_args(env, IO_NORMAL);
914 args->u.normal.via_iov = (struct iovec *)iov;
915 args->u.normal.via_nrsegs = nr_segs;
917 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
918 cl_env_put(env, &refcheck);
922 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
926 struct iovec *local_iov;
931 env = cl_env_get(&refcheck);
933 RETURN(PTR_ERR(env));
935 local_iov = &vvp_env_info(env)->vti_local_iov;
936 local_iov->iov_base = (void __user *)buf;
937 local_iov->iov_len = count;
938 result = ll_file_readv(file, local_iov, 1, ppos);
939 cl_env_put(env, &refcheck);
944 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
945 unsigned long nr_segs, loff_t pos)
948 struct vvp_io_args *args;
954 result = ll_file_get_iov_count(iov, &nr_segs, &count);
958 env = cl_env_get(&refcheck);
960 RETURN(PTR_ERR(env));
962 args = vvp_env_args(env, IO_NORMAL);
963 args->u.normal.via_iov = (struct iovec *)iov;
964 args->u.normal.via_nrsegs = nr_segs;
965 args->u.normal.via_iocb = iocb;
967 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
968 &iocb->ki_pos, count);
969 cl_env_put(env, &refcheck);
973 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
977 struct iovec *local_iov;
983 env = cl_env_get(&refcheck);
985 RETURN(PTR_ERR(env));
987 local_iov = &vvp_env_info(env)->vti_local_iov;
988 kiocb = &vvp_env_info(env)->vti_kiocb;
989 local_iov->iov_base = (void __user *)buf;
990 local_iov->iov_len = count;
991 init_sync_kiocb(kiocb, file);
992 kiocb->ki_pos = *ppos;
993 kiocb->ki_left = count;
995 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
996 *ppos = kiocb->ki_pos;
998 cl_env_put(env, &refcheck);
1004 * Write to a file (through the page cache).
1006 #ifdef HAVE_FILE_WRITEV
1007 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1008 unsigned long nr_segs, loff_t *ppos)
1011 struct vvp_io_args *args;
1017 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1021 env = cl_env_get(&refcheck);
1023 RETURN(PTR_ERR(env));
1025 args = vvp_env_args(env, IO_NORMAL);
1026 args->u.normal.via_iov = (struct iovec *)iov;
1027 args->u.normal.via_nrsegs = nr_segs;
1029 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1030 cl_env_put(env, &refcheck);
1034 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1038 struct iovec *local_iov;
1043 env = cl_env_get(&refcheck);
1045 RETURN(PTR_ERR(env));
1047 local_iov = &vvp_env_info(env)->vti_local_iov;
1048 local_iov->iov_base = (void __user *)buf;
1049 local_iov->iov_len = count;
1051 result = ll_file_writev(file, local_iov, 1, ppos);
1052 cl_env_put(env, &refcheck);
1056 #else /* AIO stuff */
1057 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1058 unsigned long nr_segs, loff_t pos)
1061 struct vvp_io_args *args;
1067 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1071 env = cl_env_get(&refcheck);
1073 RETURN(PTR_ERR(env));
1075 args = vvp_env_args(env, IO_NORMAL);
1076 args->u.normal.via_iov = (struct iovec *)iov;
1077 args->u.normal.via_nrsegs = nr_segs;
1078 args->u.normal.via_iocb = iocb;
1080 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1081 &iocb->ki_pos, count);
1082 cl_env_put(env, &refcheck);
1086 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1090 struct iovec *local_iov;
1091 struct kiocb *kiocb;
1096 env = cl_env_get(&refcheck);
1098 RETURN(PTR_ERR(env));
1100 local_iov = &vvp_env_info(env)->vti_local_iov;
1101 kiocb = &vvp_env_info(env)->vti_kiocb;
1102 local_iov->iov_base = (void __user *)buf;
1103 local_iov->iov_len = count;
1104 init_sync_kiocb(kiocb, file);
1105 kiocb->ki_pos = *ppos;
1106 kiocb->ki_left = count;
1108 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1109 *ppos = kiocb->ki_pos;
1111 cl_env_put(env, &refcheck);
1117 #ifdef HAVE_KERNEL_SENDFILE
1119 * Send file content (through pagecache) somewhere with helper
1121 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1122 read_actor_t actor, void *target)
1125 struct vvp_io_args *args;
1130 env = cl_env_get(&refcheck);
1132 RETURN(PTR_ERR(env));
1134 args = vvp_env_args(env, IO_SENDFILE);
1135 args->u.sendfile.via_target = target;
1136 args->u.sendfile.via_actor = actor;
1138 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1139 cl_env_put(env, &refcheck);
1144 #ifdef HAVE_KERNEL_SPLICE_READ
1146 * Send file content (through pagecache) somewhere with helper
1148 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1149 struct pipe_inode_info *pipe, size_t count,
1153 struct vvp_io_args *args;
1158 env = cl_env_get(&refcheck);
1160 RETURN(PTR_ERR(env));
1162 args = vvp_env_args(env, IO_SPLICE);
1163 args->u.splice.via_pipe = pipe;
1164 args->u.splice.via_flags = flags;
1166 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1167 cl_env_put(env, &refcheck);
1172 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1175 struct obd_export *exp = ll_i2dtexp(inode);
1176 struct ll_recreate_obj ucreatp;
1177 struct obd_trans_info oti = { 0 };
1178 struct obdo *oa = NULL;
1181 struct lov_stripe_md *lsm, *lsm2;
1184 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1187 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1188 sizeof(struct ll_recreate_obj)))
1195 ll_inode_size_lock(inode, 0);
1196 lsm = ll_i2info(inode)->lli_smd;
1198 GOTO(out, rc = -ENOENT);
1199 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1200 (lsm->lsm_stripe_count));
1202 OBD_ALLOC(lsm2, lsm_size);
1204 GOTO(out, rc = -ENOMEM);
1206 oa->o_id = ucreatp.lrc_id;
1207 oa->o_gr = ucreatp.lrc_group;
1208 oa->o_nlink = ucreatp.lrc_ost_idx;
1209 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1210 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1211 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1212 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1214 memcpy(lsm2, lsm, lsm_size);
1215 rc = obd_create(exp, oa, &lsm2, &oti);
1217 OBD_FREE(lsm2, lsm_size);
1220 ll_inode_size_unlock(inode, 0);
1225 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1226 int flags, struct lov_user_md *lum, int lum_size)
1228 struct lov_stripe_md *lsm;
1229 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1233 ll_inode_size_lock(inode, 0);
1234 lsm = ll_i2info(inode)->lli_smd;
1236 ll_inode_size_unlock(inode, 0);
1237 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1242 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1245 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1246 GOTO(out_req_free, rc = -ENOENT);
1247 rc = oit.d.lustre.it_status;
1249 GOTO(out_req_free, rc);
1251 ll_release_openhandle(file->f_dentry, &oit);
1254 ll_inode_size_unlock(inode, 0);
1255 ll_intent_release(&oit);
1258 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1262 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1263 struct lov_mds_md **lmmp, int *lmm_size,
1264 struct ptlrpc_request **request)
1266 struct ll_sb_info *sbi = ll_i2sbi(inode);
1267 struct mdt_body *body;
1268 struct lov_mds_md *lmm = NULL;
1269 struct ptlrpc_request *req = NULL;
1270 struct obd_capa *oc;
1273 rc = ll_get_max_mdsize(sbi, &lmmsize);
1277 oc = ll_mdscapa_get(inode);
1278 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1279 oc, filename, strlen(filename) + 1,
1280 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1281 ll_i2suppgid(inode), &req);
1284 CDEBUG(D_INFO, "md_getattr_name failed "
1285 "on %s: rc %d\n", filename, rc);
1289 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1290 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1292 lmmsize = body->eadatasize;
1294 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1296 GOTO(out, rc = -ENODATA);
1299 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1300 LASSERT(lmm != NULL);
1302 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1303 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) {
1304 GOTO(out, rc = -EPROTO);
1308 * This is coming from the MDS, so is probably in
1309 * little endian. We convert it to host endian before
1310 * passing it to userspace.
1312 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1313 /* if function called for directory - we should
1314 * avoid swab not existent lsm objects */
1315 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1316 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1317 if (S_ISREG(body->mode))
1318 lustre_swab_lov_user_md_objects(
1319 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1320 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1321 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1322 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1323 if (S_ISREG(body->mode))
1324 lustre_swab_lov_user_md_objects(
1325 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1326 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1332 *lmm_size = lmmsize;
1337 static int ll_lov_setea(struct inode *inode, struct file *file,
1340 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1341 struct lov_user_md *lump;
1342 int lum_size = sizeof(struct lov_user_md) +
1343 sizeof(struct lov_user_ost_data);
1347 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1350 OBD_ALLOC(lump, lum_size);
1354 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1355 OBD_FREE(lump, lum_size);
1359 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1361 OBD_FREE(lump, lum_size);
1365 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1368 struct lov_user_md_v3 lumv3;
1369 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1370 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1371 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1374 int flags = FMODE_WRITE;
1377 /* first try with v1 which is smaller than v3 */
1378 lum_size = sizeof(struct lov_user_md_v1);
1379 if (copy_from_user(lumv1, lumv1p, lum_size))
1382 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1383 lum_size = sizeof(struct lov_user_md_v3);
1384 if (copy_from_user(&lumv3, lumv3p, lum_size))
1388 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1390 put_user(0, &lumv1p->lmm_stripe_count);
1391 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1392 0, ll_i2info(inode)->lli_smd,
1398 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1400 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1405 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1409 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1411 struct ll_inode_info *lli = ll_i2info(inode);
1412 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1413 struct ccc_grouplock grouplock;
1417 if (ll_file_nolock(file))
1418 RETURN(-EOPNOTSUPP);
1420 spin_lock(&lli->lli_lock);
1421 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1422 CWARN("group lock already existed with gid %lu\n",
1423 fd->fd_grouplock.cg_gid);
1424 spin_unlock(&lli->lli_lock);
1427 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1428 spin_unlock(&lli->lli_lock);
1430 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1431 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1435 spin_lock(&lli->lli_lock);
1436 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1437 spin_unlock(&lli->lli_lock);
1438 CERROR("another thread just won the race\n");
1439 cl_put_grouplock(&grouplock);
1443 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1444 fd->fd_grouplock = grouplock;
1445 spin_unlock(&lli->lli_lock);
1447 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1451 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1453 struct ll_inode_info *lli = ll_i2info(inode);
1454 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1455 struct ccc_grouplock grouplock;
1458 spin_lock(&lli->lli_lock);
1459 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1460 spin_unlock(&lli->lli_lock);
1461 CWARN("no group lock held\n");
1464 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1466 if (fd->fd_grouplock.cg_gid != arg) {
1467 CWARN("group lock %lu doesn't match current id %lu\n",
1468 arg, fd->fd_grouplock.cg_gid);
1469 spin_unlock(&lli->lli_lock);
1473 grouplock = fd->fd_grouplock;
1474 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1475 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1476 spin_unlock(&lli->lli_lock);
1478 cl_put_grouplock(&grouplock);
1479 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1484 * Close inode open handle
1486 * \param dentry [in] dentry which contains the inode
1487 * \param it [in,out] intent which contains open info and result
1490 * \retval <0 failure
1492 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1494 struct inode *inode = dentry->d_inode;
1495 struct obd_client_handle *och;
1501 /* Root ? Do nothing. */
1502 if (dentry->d_inode->i_sb->s_root == dentry)
1505 /* No open handle to close? Move away */
1506 if (!it_disposition(it, DISP_OPEN_OPEN))
1509 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1511 OBD_ALLOC(och, sizeof(*och));
1513 GOTO(out, rc = -ENOMEM);
1515 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1516 ll_i2info(inode), it, och);
1518 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1521 /* this one is in place of ll_file_open */
1522 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1523 ptlrpc_req_finished(it->d.lustre.it_data);
1524 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1529 * Get size for inode for which FIEMAP mapping is requested.
1530 * Make the FIEMAP get_info call and returns the result.
1532 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1535 struct obd_export *exp = ll_i2dtexp(inode);
1536 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1537 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1538 int vallen = num_bytes;
1542 /* If the stripe_count > 1 and the application does not understand
1543 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1545 if (lsm->lsm_stripe_count > 1 &&
1546 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1549 fm_key.oa.o_id = lsm->lsm_object_id;
1550 fm_key.oa.o_gr = lsm->lsm_object_gr;
1551 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1553 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1556 /* If filesize is 0, then there would be no objects for mapping */
1557 if (fm_key.oa.o_size == 0) {
1558 fiemap->fm_mapped_extents = 0;
1562 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1564 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1566 CERROR("obd_get_info failed: rc = %d\n", rc);
1571 int ll_fid2path(struct obd_export *exp, void *arg)
1573 struct getinfo_fid2path *gfout, *gfin;
1577 /* Need to get the buflen */
1578 OBD_ALLOC_PTR(gfin);
1581 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1586 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1587 OBD_ALLOC(gfout, outsize);
1588 if (gfout == NULL) {
1592 memcpy(gfout, gfin, sizeof(*gfout));
1595 /* Call mdc_iocontrol */
1596 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1599 if (copy_to_user(arg, gfout, outsize))
1603 OBD_FREE(gfout, outsize);
1607 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1610 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1614 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1615 inode->i_generation, inode, cmd);
1616 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1618 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1619 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1623 case LL_IOC_GETFLAGS:
1624 /* Get the current value of the file flags */
1625 return put_user(fd->fd_flags, (int *)arg);
1626 case LL_IOC_SETFLAGS:
1627 case LL_IOC_CLRFLAGS:
1628 /* Set or clear specific file flags */
1629 /* XXX This probably needs checks to ensure the flags are
1630 * not abused, and to handle any flag side effects.
1632 if (get_user(flags, (int *) arg))
1635 if (cmd == LL_IOC_SETFLAGS) {
1636 if ((flags & LL_FILE_IGNORE_LOCK) &&
1637 !(file->f_flags & O_DIRECT)) {
1638 CERROR("%s: unable to disable locking on "
1639 "non-O_DIRECT file\n", current->comm);
1643 fd->fd_flags |= flags;
1645 fd->fd_flags &= ~flags;
1648 case LL_IOC_LOV_SETSTRIPE:
1649 RETURN(ll_lov_setstripe(inode, file, arg));
1650 case LL_IOC_LOV_SETEA:
1651 RETURN(ll_lov_setea(inode, file, arg));
1652 case LL_IOC_LOV_GETSTRIPE:
1653 RETURN(ll_lov_getstripe(inode, arg));
1654 case LL_IOC_RECREATE_OBJ:
1655 RETURN(ll_lov_recreate_obj(inode, file, arg));
1656 case FSFILT_IOC_FIEMAP: {
1657 struct ll_user_fiemap *fiemap_s;
1658 size_t num_bytes, ret_bytes;
1659 unsigned int extent_count;
1662 /* Get the extent count so we can calculate the size of
1663 * required fiemap buffer */
1664 if (get_user(extent_count,
1665 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1667 num_bytes = sizeof(*fiemap_s) + (extent_count *
1668 sizeof(struct ll_fiemap_extent));
1669 OBD_VMALLOC(fiemap_s, num_bytes);
1670 if (fiemap_s == NULL)
1673 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1675 GOTO(error, rc = -EFAULT);
1677 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1678 fiemap_s->fm_flags = fiemap_s->fm_flags &
1679 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1680 if (copy_to_user((char *)arg, fiemap_s,
1682 GOTO(error, rc = -EFAULT);
1684 GOTO(error, rc = -EBADR);
1687 /* If fm_extent_count is non-zero, read the first extent since
1688 * it is used to calculate end_offset and device from previous
1691 if (copy_from_user(&fiemap_s->fm_extents[0],
1692 (char __user *)arg + sizeof(*fiemap_s),
1693 sizeof(struct ll_fiemap_extent)))
1694 GOTO(error, rc = -EFAULT);
1697 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1700 rc = filemap_fdatawrite(inode->i_mapping);
1705 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1709 ret_bytes = sizeof(struct ll_user_fiemap);
1711 if (extent_count != 0)
1712 ret_bytes += (fiemap_s->fm_mapped_extents *
1713 sizeof(struct ll_fiemap_extent));
1715 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1719 OBD_VFREE(fiemap_s, num_bytes);
1722 case FSFILT_IOC_GETFLAGS:
1723 case FSFILT_IOC_SETFLAGS:
1724 RETURN(ll_iocontrol(inode, file, cmd, arg));
1725 case FSFILT_IOC_GETVERSION_OLD:
1726 case FSFILT_IOC_GETVERSION:
1727 RETURN(put_user(inode->i_generation, (int *)arg));
1728 case LL_IOC_GROUP_LOCK:
1729 RETURN(ll_get_grouplock(inode, file, arg));
1730 case LL_IOC_GROUP_UNLOCK:
1731 RETURN(ll_put_grouplock(inode, file, arg));
1732 case IOC_OBD_STATFS:
1733 RETURN(ll_obd_statfs(inode, (void *)arg));
1735 /* We need to special case any other ioctls we want to handle,
1736 * to send them to the MDS/OST as appropriate and to properly
1737 * network encode the arg field.
1738 case FSFILT_IOC_SETVERSION_OLD:
1739 case FSFILT_IOC_SETVERSION:
1741 case LL_IOC_FLUSHCTX:
1742 RETURN(ll_flush_ctx(inode));
1743 case LL_IOC_PATH2FID: {
1744 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1745 sizeof(struct lu_fid)))
1750 case OBD_IOC_FID2PATH:
1751 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1757 ll_iocontrol_call(inode, file, cmd, arg, &err))
1760 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1766 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1768 struct inode *inode = file->f_dentry->d_inode;
1771 retval = offset + ((origin == 2) ? i_size_read(inode) :
1772 (origin == 1) ? file->f_pos : 0);
1773 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1774 inode->i_ino, inode->i_generation, inode, retval, retval,
1775 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1776 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1778 if (origin == 2) { /* SEEK_END */
1779 int nonblock = 0, rc;
1781 if (file->f_flags & O_NONBLOCK)
1782 nonblock = LDLM_FL_BLOCK_NOWAIT;
1784 rc = cl_glimpse_size(inode);
1788 ll_inode_size_lock(inode, 0);
1789 offset += i_size_read(inode);
1790 ll_inode_size_unlock(inode, 0);
1791 } else if (origin == 1) { /* SEEK_CUR */
1792 offset += file->f_pos;
1796 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1797 if (offset != file->f_pos) {
1798 file->f_pos = offset;
1806 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1808 struct inode *inode = dentry->d_inode;
1809 struct ll_inode_info *lli = ll_i2info(inode);
1810 struct lov_stripe_md *lsm = lli->lli_smd;
1811 struct ptlrpc_request *req;
1812 struct obd_capa *oc;
1815 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1816 inode->i_generation, inode);
1817 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1819 /* fsync's caller has already called _fdata{sync,write}, we want
1820 * that IO to finish before calling the osc and mdc sync methods */
1821 rc = filemap_fdatawait(inode->i_mapping);
1823 /* catch async errors that were recorded back when async writeback
1824 * failed for pages in this mapping. */
1825 err = lli->lli_async_rc;
1826 lli->lli_async_rc = 0;
1830 err = lov_test_and_clear_async_rc(lsm);
1835 oc = ll_mdscapa_get(inode);
1836 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1842 ptlrpc_req_finished(req);
1849 RETURN(rc ? rc : -ENOMEM);
1851 oa->o_id = lsm->lsm_object_id;
1852 oa->o_gr = lsm->lsm_object_gr;
1853 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1854 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1855 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1858 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1859 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1860 0, OBD_OBJECT_EOF, oc);
1870 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1872 struct inode *inode = file->f_dentry->d_inode;
1873 struct ll_sb_info *sbi = ll_i2sbi(inode);
1874 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1875 .ei_cb_cp =ldlm_flock_completion_ast,
1876 .ei_cbdata = file_lock };
1877 struct md_op_data *op_data;
1878 struct lustre_handle lockh = {0};
1879 ldlm_policy_data_t flock;
1884 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1885 inode->i_ino, file_lock);
1887 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1889 if (file_lock->fl_flags & FL_FLOCK) {
1890 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1891 /* set missing params for flock() calls */
1892 file_lock->fl_end = OFFSET_MAX;
1893 file_lock->fl_pid = current->tgid;
1895 flock.l_flock.pid = file_lock->fl_pid;
1896 flock.l_flock.start = file_lock->fl_start;
1897 flock.l_flock.end = file_lock->fl_end;
1899 switch (file_lock->fl_type) {
1901 einfo.ei_mode = LCK_PR;
1904 /* An unlock request may or may not have any relation to
1905 * existing locks so we may not be able to pass a lock handle
1906 * via a normal ldlm_lock_cancel() request. The request may even
1907 * unlock a byte range in the middle of an existing lock. In
1908 * order to process an unlock request we need all of the same
1909 * information that is given with a normal read or write record
1910 * lock request. To avoid creating another ldlm unlock (cancel)
1911 * message we'll treat a LCK_NL flock request as an unlock. */
1912 einfo.ei_mode = LCK_NL;
1915 einfo.ei_mode = LCK_PW;
1918 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1933 flags = LDLM_FL_BLOCK_NOWAIT;
1939 flags = LDLM_FL_TEST_LOCK;
1940 /* Save the old mode so that if the mode in the lock changes we
1941 * can decrement the appropriate reader or writer refcount. */
1942 file_lock->fl_type = einfo.ei_mode;
1945 CERROR("unknown fcntl lock command: %d\n", cmd);
1949 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
1950 LUSTRE_OPC_ANY, NULL);
1951 if (IS_ERR(op_data))
1952 RETURN(PTR_ERR(op_data));
1954 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
1955 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
1956 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1958 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
1959 op_data, &lockh, &flock, 0, NULL /* req */, flags);
1961 ll_finish_md_op_data(op_data);
1963 if ((file_lock->fl_flags & FL_FLOCK) &&
1964 (rc == 0 || file_lock->fl_type == F_UNLCK))
1965 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
1966 #ifdef HAVE_F_OP_FLOCK
1967 if ((file_lock->fl_flags & FL_POSIX) &&
1968 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
1969 !(flags & LDLM_FL_TEST_LOCK))
1970 posix_lock_file_wait(file, file_lock);
1976 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
1983 int ll_have_md_lock(struct inode *inode, __u64 bits)
1985 struct lustre_handle lockh;
1986 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
1994 fid = &ll_i2info(inode)->lli_fid;
1995 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
1997 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
1998 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
1999 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2005 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2006 struct lustre_handle *lockh)
2008 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2014 fid = &ll_i2info(inode)->lli_fid;
2015 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2017 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2018 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2019 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2023 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2024 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2025 * and return success */
2027 /* This path cannot be hit for regular files unless in
2028 * case of obscure races, so no need to to validate
2030 if (!S_ISREG(inode->i_mode) &&
2031 !S_ISDIR(inode->i_mode))
2036 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2044 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2047 struct inode *inode = dentry->d_inode;
2048 struct ptlrpc_request *req = NULL;
2049 struct ll_sb_info *sbi;
2050 struct obd_export *exp;
2055 CERROR("REPORT THIS LINE TO PETER\n");
2058 sbi = ll_i2sbi(inode);
2060 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2061 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2063 exp = ll_i2mdexp(inode);
2065 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2066 struct lookup_intent oit = { .it_op = IT_GETATTR };
2067 struct md_op_data *op_data;
2069 /* Call getattr by fid, so do not provide name at all. */
2070 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2071 dentry->d_inode, NULL, 0, 0,
2072 LUSTRE_OPC_ANY, NULL);
2073 if (IS_ERR(op_data))
2074 RETURN(PTR_ERR(op_data));
2076 oit.it_create_mode |= M_CHECK_STALE;
2077 rc = md_intent_lock(exp, op_data, NULL, 0,
2078 /* we are not interested in name
2081 ll_md_blocking_ast, 0);
2082 ll_finish_md_op_data(op_data);
2083 oit.it_create_mode &= ~M_CHECK_STALE;
2085 rc = ll_inode_revalidate_fini(inode, rc);
2089 rc = ll_revalidate_it_finish(req, &oit, dentry);
2091 ll_intent_release(&oit);
2095 /* Unlinked? Unhash dentry, so it is not picked up later by
2096 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2097 here to preserve get_cwd functionality on 2.6.
2099 if (!dentry->d_inode->i_nlink) {
2100 spin_lock(&ll_lookup_lock);
2101 spin_lock(&dcache_lock);
2102 ll_drop_dentry(dentry);
2103 spin_unlock(&dcache_lock);
2104 spin_unlock(&ll_lookup_lock);
2107 ll_lookup_finish_locks(&oit, dentry);
2108 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2110 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2111 obd_valid valid = OBD_MD_FLGETATTR;
2112 struct obd_capa *oc;
2115 if (S_ISREG(inode->i_mode)) {
2116 rc = ll_get_max_mdsize(sbi, &ealen);
2119 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2121 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2122 * capa for this inode. Because we only keep capas of dirs
2124 oc = ll_mdscapa_get(inode);
2125 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2129 rc = ll_inode_revalidate_fini(inode, rc);
2133 rc = ll_prep_inode(&inode, req, NULL);
2136 ptlrpc_req_finished(req);
2140 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2145 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2146 MDS_INODELOCK_LOOKUP);
2148 /* if object not yet allocated, don't validate size */
2149 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2152 /* cl_glimpse_size will prefer locally cached writes if they extend
2156 rc = cl_glimpse_size(dentry->d_inode);
2161 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2162 struct lookup_intent *it, struct kstat *stat)
2164 struct inode *inode = de->d_inode;
2167 res = ll_inode_revalidate_it(de, it);
2168 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2173 stat->dev = inode->i_sb->s_dev;
2174 stat->ino = inode->i_ino;
2175 stat->mode = inode->i_mode;
2176 stat->nlink = inode->i_nlink;
2177 stat->uid = inode->i_uid;
2178 stat->gid = inode->i_gid;
2179 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2180 stat->atime = inode->i_atime;
2181 stat->mtime = inode->i_mtime;
2182 stat->ctime = inode->i_ctime;
2183 #ifdef HAVE_INODE_BLKSIZE
2184 stat->blksize = inode->i_blksize;
2186 stat->blksize = 1 << inode->i_blkbits;
2189 ll_inode_size_lock(inode, 0);
2190 stat->size = i_size_read(inode);
2191 stat->blocks = inode->i_blocks;
2192 ll_inode_size_unlock(inode, 0);
2196 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2198 struct lookup_intent it = { .it_op = IT_GETATTR };
2200 return ll_getattr_it(mnt, de, &it, stat);
2204 int lustre_check_acl(struct inode *inode, int mask)
2206 #ifdef CONFIG_FS_POSIX_ACL
2207 struct ll_inode_info *lli = ll_i2info(inode);
2208 struct posix_acl *acl;
2212 spin_lock(&lli->lli_lock);
2213 acl = posix_acl_dup(lli->lli_posix_acl);
2214 spin_unlock(&lli->lli_lock);
2219 rc = posix_acl_permission(inode, acl, mask);
2220 posix_acl_release(acl);
2228 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2229 #ifndef HAVE_INODE_PERMISION_2ARGS
2230 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2232 int ll_inode_permission(struct inode *inode, int mask)
2238 /* as root inode are NOT getting validated in lookup operation,
2239 * need to do it before permission check. */
2241 if (inode == inode->i_sb->s_root->d_inode) {
2242 struct lookup_intent it = { .it_op = IT_LOOKUP };
2244 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2245 MDS_INODELOCK_LOOKUP);
2250 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2251 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2253 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2254 return lustre_check_remote_perm(inode, mask);
2256 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2257 rc = generic_permission(inode, mask, lustre_check_acl);
2262 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2264 int mode = inode->i_mode;
2267 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2268 inode->i_ino, inode->i_generation, inode, mask);
2270 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2271 return lustre_check_remote_perm(inode, mask);
2273 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2275 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2276 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2278 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2280 if (current->fsuid == inode->i_uid) {
2283 if (((mode >> 3) & mask & S_IRWXO) != mask)
2285 rc = lustre_check_acl(inode, mask);
2289 goto check_capabilities;
2293 if (in_group_p(inode->i_gid))
2296 if ((mode & mask & S_IRWXO) == mask)
2300 if (!(mask & MAY_EXEC) ||
2301 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2302 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2305 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2306 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2313 #ifdef HAVE_FILE_READV
2314 #define READ_METHOD readv
2315 #define READ_FUNCTION ll_file_readv
2316 #define WRITE_METHOD writev
2317 #define WRITE_FUNCTION ll_file_writev
2319 #define READ_METHOD aio_read
2320 #define READ_FUNCTION ll_file_aio_read
2321 #define WRITE_METHOD aio_write
2322 #define WRITE_FUNCTION ll_file_aio_write
2325 /* -o localflock - only provides locally consistent flock locks */
2326 struct file_operations ll_file_operations = {
2327 .read = ll_file_read,
2328 .READ_METHOD = READ_FUNCTION,
2329 .write = ll_file_write,
2330 .WRITE_METHOD = WRITE_FUNCTION,
2331 .ioctl = ll_file_ioctl,
2332 .open = ll_file_open,
2333 .release = ll_file_release,
2334 .mmap = ll_file_mmap,
2335 .llseek = ll_file_seek,
2336 #ifdef HAVE_KERNEL_SENDFILE
2337 .sendfile = ll_file_sendfile,
2339 #ifdef HAVE_KERNEL_SPLICE_READ
2340 .splice_read = ll_file_splice_read,
2345 struct file_operations ll_file_operations_flock = {
2346 .read = ll_file_read,
2347 .READ_METHOD = READ_FUNCTION,
2348 .write = ll_file_write,
2349 .WRITE_METHOD = WRITE_FUNCTION,
2350 .ioctl = ll_file_ioctl,
2351 .open = ll_file_open,
2352 .release = ll_file_release,
2353 .mmap = ll_file_mmap,
2354 .llseek = ll_file_seek,
2355 #ifdef HAVE_KERNEL_SENDFILE
2356 .sendfile = ll_file_sendfile,
2358 #ifdef HAVE_KERNEL_SPLICE_READ
2359 .splice_read = ll_file_splice_read,
2362 #ifdef HAVE_F_OP_FLOCK
2363 .flock = ll_file_flock,
2365 .lock = ll_file_flock
2368 /* These are for -o noflock - to return ENOSYS on flock calls */
2369 struct file_operations ll_file_operations_noflock = {
2370 .read = ll_file_read,
2371 .READ_METHOD = READ_FUNCTION,
2372 .write = ll_file_write,
2373 .WRITE_METHOD = WRITE_FUNCTION,
2374 .ioctl = ll_file_ioctl,
2375 .open = ll_file_open,
2376 .release = ll_file_release,
2377 .mmap = ll_file_mmap,
2378 .llseek = ll_file_seek,
2379 #ifdef HAVE_KERNEL_SENDFILE
2380 .sendfile = ll_file_sendfile,
2382 #ifdef HAVE_KERNEL_SPLICE_READ
2383 .splice_read = ll_file_splice_read,
2386 #ifdef HAVE_F_OP_FLOCK
2387 .flock = ll_file_noflock,
2389 .lock = ll_file_noflock
2392 struct inode_operations ll_file_inode_operations = {
2393 #ifdef HAVE_VFS_INTENT_PATCHES
2394 .setattr_raw = ll_setattr_raw,
2396 .setattr = ll_setattr,
2397 .truncate = ll_truncate,
2398 .getattr = ll_getattr,
2399 .permission = ll_inode_permission,
2400 .setxattr = ll_setxattr,
2401 .getxattr = ll_getxattr,
2402 .listxattr = ll_listxattr,
2403 .removexattr = ll_removexattr,
2406 /* dynamic ioctl number support routins */
2407 static struct llioc_ctl_data {
2408 struct rw_semaphore ioc_sem;
2409 struct list_head ioc_head;
2411 __RWSEM_INITIALIZER(llioc.ioc_sem),
2412 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2417 struct list_head iocd_list;
2418 unsigned int iocd_size;
2419 llioc_callback_t iocd_cb;
2420 unsigned int iocd_count;
2421 unsigned int iocd_cmd[0];
2424 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2427 struct llioc_data *in_data = NULL;
2430 if (cb == NULL || cmd == NULL ||
2431 count > LLIOC_MAX_CMD || count < 0)
2434 size = sizeof(*in_data) + count * sizeof(unsigned int);
2435 OBD_ALLOC(in_data, size);
2436 if (in_data == NULL)
2439 memset(in_data, 0, sizeof(*in_data));
2440 in_data->iocd_size = size;
2441 in_data->iocd_cb = cb;
2442 in_data->iocd_count = count;
2443 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2445 down_write(&llioc.ioc_sem);
2446 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2447 up_write(&llioc.ioc_sem);
2452 void ll_iocontrol_unregister(void *magic)
2454 struct llioc_data *tmp;
2459 down_write(&llioc.ioc_sem);
2460 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2462 unsigned int size = tmp->iocd_size;
2464 list_del(&tmp->iocd_list);
2465 up_write(&llioc.ioc_sem);
2467 OBD_FREE(tmp, size);
2471 up_write(&llioc.ioc_sem);
2473 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2476 EXPORT_SYMBOL(ll_iocontrol_register);
2477 EXPORT_SYMBOL(ll_iocontrol_unregister);
2479 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2480 unsigned int cmd, unsigned long arg, int *rcp)
2482 enum llioc_iter ret = LLIOC_CONT;
2483 struct llioc_data *data;
2484 int rc = -EINVAL, i;
2486 down_read(&llioc.ioc_sem);
2487 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2488 for (i = 0; i < data->iocd_count; i++) {
2489 if (cmd != data->iocd_cmd[i])
2492 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2496 if (ret == LLIOC_STOP)
2499 up_read(&llioc.ioc_sem);