1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR(fd, ll_file_data_slab);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM) ||
96 !S_ISREG(inode->i_mode))
97 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
99 ll_epoch_close(inode, op_data, &och, 0);
102 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
106 static int ll_close_inode_openhandle(struct obd_export *md_exp,
108 struct obd_client_handle *och)
110 struct obd_export *exp = ll_i2mdexp(inode);
111 struct md_op_data *op_data;
112 struct ptlrpc_request *req = NULL;
113 struct obd_device *obd = class_exp2obd(exp);
120 * XXX: in case of LMV, is this correct to access
123 CERROR("Invalid MDC connection handle "LPX64"\n",
124 ll_i2mdexp(inode)->exp_handle.h_cookie);
129 * here we check if this is forced umount. If so this is called on
130 * canceling "open lock" and we do not call md_close() in this case, as
131 * it will not be successful, as import is already deactivated.
136 OBD_ALLOC_PTR(op_data);
138 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
140 ll_prepare_close(inode, op_data, och);
141 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
142 rc = md_close(md_exp, op_data, och->och_mod, &req);
147 /* This close must have the epoch closed. */
148 LASSERT(exp->exp_connect_flags & OBD_CONNECT_SOM);
149 LASSERT(epoch_close);
150 /* MDS has instructed us to obtain Size-on-MDS attribute from
151 * OSTs and send setattr to back to MDS. */
152 rc = ll_sizeonmds_update(inode, och->och_mod,
153 &och->och_fh, op_data->op_ioepoch);
155 CERROR("inode %lu mdc Size-on-MDS update failed: "
156 "rc = %d\n", inode->i_ino, rc);
160 CERROR("inode %lu mdc close failed: rc = %d\n",
163 ll_finish_md_op_data(op_data);
166 rc = ll_objects_destroy(req, inode);
168 CERROR("inode %lu ll_objects destroy: rc = %d\n",
175 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
176 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
177 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
180 ptlrpc_close_replay_seq(req);
181 md_clear_open_replay_data(md_exp, och);
182 /* Free @och if it is not waiting for DONE_WRITING. */
183 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
186 if (req) /* This is close request */
187 ptlrpc_req_finished(req);
191 int ll_md_real_close(struct inode *inode, int flags)
193 struct ll_inode_info *lli = ll_i2info(inode);
194 struct obd_client_handle **och_p;
195 struct obd_client_handle *och;
200 if (flags & FMODE_WRITE) {
201 och_p = &lli->lli_mds_write_och;
202 och_usecount = &lli->lli_open_fd_write_count;
203 } else if (flags & FMODE_EXEC) {
204 och_p = &lli->lli_mds_exec_och;
205 och_usecount = &lli->lli_open_fd_exec_count;
207 LASSERT(flags & FMODE_READ);
208 och_p = &lli->lli_mds_read_och;
209 och_usecount = &lli->lli_open_fd_read_count;
212 down(&lli->lli_och_sem);
213 if (*och_usecount) { /* There are still users of this handle, so
215 up(&lli->lli_och_sem);
220 up(&lli->lli_och_sem);
222 if (och) { /* There might be a race and somebody have freed this och
224 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
231 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
234 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
235 struct ll_inode_info *lli = ll_i2info(inode);
239 /* clear group lock, if present */
240 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
242 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
243 fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
244 rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
249 /* Let's see if we have good enough OPEN lock on the file and if
250 we can skip talking to MDS */
251 if (file->f_dentry->d_inode) { /* Can this ever be false? */
253 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
254 struct lustre_handle lockh;
255 struct inode *inode = file->f_dentry->d_inode;
256 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
258 down(&lli->lli_och_sem);
259 if (fd->fd_omode & FMODE_WRITE) {
261 LASSERT(lli->lli_open_fd_write_count);
262 lli->lli_open_fd_write_count--;
263 } else if (fd->fd_omode & FMODE_EXEC) {
265 LASSERT(lli->lli_open_fd_exec_count);
266 lli->lli_open_fd_exec_count--;
269 LASSERT(lli->lli_open_fd_read_count);
270 lli->lli_open_fd_read_count--;
272 up(&lli->lli_och_sem);
274 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
275 LDLM_IBITS, &policy, lockmode,
277 rc = ll_md_real_close(file->f_dentry->d_inode,
281 CERROR("Releasing a file %p with negative dentry %p. Name %s",
282 file, file->f_dentry, file->f_dentry->d_name.name);
285 LUSTRE_FPRIVATE(file) = NULL;
286 ll_file_data_put(fd);
287 ll_capa_close(inode);
292 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
294 /* While this returns an error code, fput() the caller does not, so we need
295 * to make every effort to clean up all of our state here. Also, applications
296 * rarely check close errors and even if an error is returned they will not
297 * re-try the close call.
299 int ll_file_release(struct inode *inode, struct file *file)
301 struct ll_file_data *fd;
302 struct ll_sb_info *sbi = ll_i2sbi(inode);
303 struct ll_inode_info *lli = ll_i2info(inode);
304 struct lov_stripe_md *lsm = lli->lli_smd;
308 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
309 inode->i_generation, inode);
311 #ifdef CONFIG_FS_POSIX_ACL
312 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
313 inode == inode->i_sb->s_root->d_inode) {
314 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
317 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
318 fd->fd_flags &= ~LL_FILE_RMTACL;
319 rct_del(&sbi->ll_rct, cfs_curproc_pid());
320 et_search_free(&sbi->ll_et, cfs_curproc_pid());
325 if (inode->i_sb->s_root != file->f_dentry)
326 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
327 fd = LUSTRE_FPRIVATE(file);
330 /* The last ref on @file, maybe not the the owner pid of statahead.
331 * Different processes can open the same dir, "ll_opendir_key" means:
332 * it is me that should stop the statahead thread. */
333 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
334 ll_stop_statahead(inode, lli->lli_opendir_key);
336 if (inode->i_sb->s_root == file->f_dentry) {
337 LUSTRE_FPRIVATE(file) = NULL;
338 ll_file_data_put(fd);
343 lov_test_and_clear_async_rc(lsm);
344 lli->lli_async_rc = 0;
346 rc = ll_md_close(sbi->ll_md_exp, inode, file);
350 static int ll_intent_file_open(struct file *file, void *lmm,
351 int lmmsize, struct lookup_intent *itp)
353 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
354 struct dentry *parent = file->f_dentry->d_parent;
355 const char *name = file->f_dentry->d_name.name;
356 const int len = file->f_dentry->d_name.len;
357 struct md_op_data *op_data;
358 struct ptlrpc_request *req;
365 /* Usually we come here only for NFSD, and we want open lock.
366 But we can also get here with pre 2.6.15 patchless kernels, and in
367 that case that lock is also ok */
368 /* We can also get here if there was cached open handle in revalidate_it
369 * but it disappeared while we were getting from there to ll_file_open.
370 * But this means this file was closed and immediatelly opened which
371 * makes a good candidate for using OPEN lock */
372 /* If lmmsize & lmm are not 0, we are just setting stripe info
373 * parameters. No need for the open lock */
374 if (!lmm && !lmmsize)
375 itp->it_flags |= MDS_OPEN_LOCK;
377 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
378 file->f_dentry->d_inode, name, len,
379 O_RDWR, LUSTRE_OPC_ANY, NULL);
381 RETURN(PTR_ERR(op_data));
383 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
384 0 /*unused */, &req, ll_md_blocking_ast, 0);
385 ll_finish_md_op_data(op_data);
387 /* reason for keep own exit path - don`t flood log
388 * with messages with -ESTALE errors.
390 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
391 it_open_error(DISP_OPEN_OPEN, itp))
393 ll_release_openhandle(file->f_dentry, itp);
397 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
398 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
399 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
403 if (itp->d.lustre.it_lock_mode)
404 md_set_lock_data(sbi->ll_md_exp,
405 &itp->d.lustre.it_lock_handle,
406 file->f_dentry->d_inode);
408 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
410 ptlrpc_req_finished(itp->d.lustre.it_data);
411 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
412 ll_intent_drop_lock(itp);
417 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
418 struct lookup_intent *it, struct obd_client_handle *och)
420 struct ptlrpc_request *req = it->d.lustre.it_data;
421 struct mdt_body *body;
425 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
426 LASSERT(body != NULL); /* reply already checked out */
428 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
429 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
430 och->och_fid = lli->lli_fid;
431 och->och_flags = it->it_flags;
432 lli->lli_ioepoch = body->ioepoch;
434 return md_set_open_replay_data(md_exp, och, req);
437 int ll_local_open(struct file *file, struct lookup_intent *it,
438 struct ll_file_data *fd, struct obd_client_handle *och)
440 struct inode *inode = file->f_dentry->d_inode;
441 struct ll_inode_info *lli = ll_i2info(inode);
444 LASSERT(!LUSTRE_FPRIVATE(file));
449 struct ptlrpc_request *req = it->d.lustre.it_data;
450 struct mdt_body *body;
453 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
457 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
458 if ((it->it_flags & FMODE_WRITE) &&
459 (body->valid & OBD_MD_FLSIZE))
460 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
461 lli->lli_ioepoch, PFID(&lli->lli_fid));
464 LUSTRE_FPRIVATE(file) = fd;
465 ll_readahead_init(inode, &fd->fd_ras);
466 fd->fd_omode = it->it_flags;
470 /* Open a file, and (for the very first open) create objects on the OSTs at
471 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
472 * creation or open until ll_lov_setstripe() ioctl is called. We grab
473 * lli_open_sem to ensure no other process will create objects, send the
474 * stripe MD to the MDS, or try to destroy the objects if that fails.
476 * If we already have the stripe MD locally then we don't request it in
477 * md_open(), by passing a lmm_size = 0.
479 * It is up to the application to ensure no other processes open this file
480 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
481 * used. We might be able to avoid races of that sort by getting lli_open_sem
482 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
483 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
485 int ll_file_open(struct inode *inode, struct file *file)
487 struct ll_inode_info *lli = ll_i2info(inode);
488 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
489 .it_flags = file->f_flags };
490 struct lov_stripe_md *lsm;
491 struct ptlrpc_request *req = NULL;
492 struct obd_client_handle **och_p;
494 struct ll_file_data *fd;
495 int rc = 0, opendir_set = 0;
498 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
499 inode->i_generation, inode, file->f_flags);
501 #ifdef HAVE_VFS_INTENT_PATCHES
504 it = file->private_data; /* XXX: compat macro */
505 file->private_data = NULL; /* prevent ll_local_open assertion */
508 fd = ll_file_data_get();
513 if (S_ISDIR(inode->i_mode)) {
515 spin_lock(&lli->lli_lock);
516 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
517 LASSERT(lli->lli_sai == NULL);
518 lli->lli_opendir_key = fd;
519 lli->lli_opendir_pid = cfs_curproc_pid();
521 } else if (unlikely(lli->lli_opendir_pid == cfs_curproc_pid() &&
522 lli->lli_opendir_key != NULL)) {
523 /* Two cases for this:
524 * (1) The same process open such directory many times.
525 * (2) The old process opened the directory, and exited
526 * before its children processes. Then new process
527 * with the same pid opens such directory before the
528 * old process's children processes exit.
529 * reset stat ahead for such cases. */
530 spin_unlock(&lli->lli_lock);
531 CDEBUG(D_INFO, "Conflict statahead for %.*s "DFID
532 " reset it.\n", file->f_dentry->d_name.len,
533 file->f_dentry->d_name.name,
534 PFID(&lli->lli_fid));
535 ll_stop_statahead(inode, lli->lli_opendir_key);
538 spin_unlock(&lli->lli_lock);
541 if (inode->i_sb->s_root == file->f_dentry) {
542 LUSTRE_FPRIVATE(file) = fd;
546 if (!it || !it->d.lustre.it_disposition) {
547 /* Convert f_flags into access mode. We cannot use file->f_mode,
548 * because everything but O_ACCMODE mask was stripped from
550 if ((oit.it_flags + 1) & O_ACCMODE)
552 if (file->f_flags & O_TRUNC)
553 oit.it_flags |= FMODE_WRITE;
555 /* kernel only call f_op->open in dentry_open. filp_open calls
556 * dentry_open after call to open_namei that checks permissions.
557 * Only nfsd_open call dentry_open directly without checking
558 * permissions and because of that this code below is safe. */
559 if (oit.it_flags & FMODE_WRITE)
560 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
562 /* We do not want O_EXCL here, presumably we opened the file
563 * already? XXX - NFS implications? */
564 oit.it_flags &= ~O_EXCL;
570 /* Let's see if we have file open on MDS already. */
571 if (it->it_flags & FMODE_WRITE) {
572 och_p = &lli->lli_mds_write_och;
573 och_usecount = &lli->lli_open_fd_write_count;
574 } else if (it->it_flags & FMODE_EXEC) {
575 och_p = &lli->lli_mds_exec_och;
576 och_usecount = &lli->lli_open_fd_exec_count;
578 och_p = &lli->lli_mds_read_och;
579 och_usecount = &lli->lli_open_fd_read_count;
582 down(&lli->lli_och_sem);
583 if (*och_p) { /* Open handle is present */
584 if (it_disposition(it, DISP_OPEN_OPEN)) {
585 /* Well, there's extra open request that we do not need,
586 let's close it somehow. This will decref request. */
587 rc = it_open_error(DISP_OPEN_OPEN, it);
589 up(&lli->lli_och_sem);
590 ll_file_data_put(fd);
591 GOTO(out_openerr, rc);
593 ll_release_openhandle(file->f_dentry, it);
594 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
599 rc = ll_local_open(file, it, fd, NULL);
602 up(&lli->lli_och_sem);
603 ll_file_data_put(fd);
604 GOTO(out_openerr, rc);
607 LASSERT(*och_usecount == 0);
608 if (!it->d.lustre.it_disposition) {
609 /* We cannot just request lock handle now, new ELC code
610 means that one of other OPEN locks for this file
611 could be cancelled, and since blocking ast handler
612 would attempt to grab och_sem as well, that would
613 result in a deadlock */
614 up(&lli->lli_och_sem);
615 it->it_flags |= O_CHECK_STALE;
616 rc = ll_intent_file_open(file, NULL, 0, it);
617 it->it_flags &= ~O_CHECK_STALE;
619 ll_file_data_put(fd);
620 GOTO(out_openerr, rc);
623 /* Got some error? Release the request */
624 if (it->d.lustre.it_status < 0) {
625 req = it->d.lustre.it_data;
626 ptlrpc_req_finished(req);
628 md_set_lock_data(ll_i2sbi(inode)->ll_md_exp,
629 &it->d.lustre.it_lock_handle,
630 file->f_dentry->d_inode);
633 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
635 ll_file_data_put(fd);
636 GOTO(out_och_free, rc = -ENOMEM);
639 req = it->d.lustre.it_data;
641 /* md_intent_lock() didn't get a request ref if there was an
642 * open error, so don't do cleanup on the request here
644 /* XXX (green): Should not we bail out on any error here, not
645 * just open error? */
646 rc = it_open_error(DISP_OPEN_OPEN, it);
648 ll_file_data_put(fd);
649 GOTO(out_och_free, rc);
652 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
653 rc = ll_local_open(file, it, fd, *och_p);
655 ll_file_data_put(fd);
656 GOTO(out_och_free, rc);
659 up(&lli->lli_och_sem);
661 /* Must do this outside lli_och_sem lock to prevent deadlock where
662 different kind of OPEN lock for this same inode gets cancelled
663 by ldlm_cancel_lru */
664 if (!S_ISREG(inode->i_mode))
671 if (file->f_flags & O_LOV_DELAY_CREATE ||
672 !(file->f_mode & FMODE_WRITE)) {
673 CDEBUG(D_INODE, "object creation was delayed\n");
677 file->f_flags &= ~O_LOV_DELAY_CREATE;
680 ptlrpc_req_finished(req);
682 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
686 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
687 *och_p = NULL; /* OBD_FREE writes some magic there */
690 up(&lli->lli_och_sem);
692 if (opendir_set != 0)
693 ll_stop_statahead(inode, lli->lli_opendir_key);
699 /* Fills the obdo with the attributes for the inode defined by lsm */
700 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
702 struct ptlrpc_request_set *set;
703 struct ll_inode_info *lli = ll_i2info(inode);
704 struct lov_stripe_md *lsm = lli->lli_smd;
706 struct obd_info oinfo = { { { 0 } } };
710 LASSERT(lsm != NULL);
714 oinfo.oi_oa->o_id = lsm->lsm_object_id;
715 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
716 oinfo.oi_oa->o_mode = S_IFREG;
717 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
718 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
719 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
720 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
722 oinfo.oi_capa = ll_mdscapa_get(inode);
724 set = ptlrpc_prep_set();
726 CERROR("can't allocate ptlrpc set\n");
729 rc = obd_getattr_async(ll_i2dtexp(inode), &oinfo, set);
731 rc = ptlrpc_set_wait(set);
732 ptlrpc_set_destroy(set);
734 capa_put(oinfo.oi_capa);
738 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
739 OBD_MD_FLATIME | OBD_MD_FLMTIME |
740 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
742 obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid);
743 CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
744 lli->lli_smd->lsm_object_id, i_size_read(inode),
745 (unsigned long long)inode->i_blocks,
746 (unsigned long)ll_inode_blksize(inode));
750 int ll_merge_lvb(struct inode *inode)
752 struct ll_inode_info *lli = ll_i2info(inode);
753 struct ll_sb_info *sbi = ll_i2sbi(inode);
759 ll_inode_size_lock(inode, 1);
760 inode_init_lvb(inode, &lvb);
761 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
762 i_size_write(inode, lvb.lvb_size);
763 inode->i_blocks = lvb.lvb_blocks;
765 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
766 LTIME_S(inode->i_atime) = lvb.lvb_atime;
767 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
768 ll_inode_size_unlock(inode, 1);
773 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
780 void ll_io_init(struct cl_io *io, const struct file *file, int write)
782 struct inode *inode = file->f_dentry->d_inode;
783 struct ll_sb_info *sbi = ll_i2sbi(inode);
784 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
787 memset(io, 0, sizeof *io);
788 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
790 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
791 io->ci_obj = ll_i2info(inode)->lli_clob;
792 io->ci_lockreq = CILR_MAYBE;
793 if (fd->fd_flags & LL_FILE_IGNORE_LOCK || sbi->ll_flags & LL_SBI_NOLCK)
794 io->ci_lockreq = CILR_NEVER;
795 else if (file->f_flags & O_APPEND)
796 io->ci_lockreq = CILR_MANDATORY;
799 static ssize_t ll_file_io_generic(const struct lu_env *env,
800 struct ccc_io_args *args, struct file *file,
801 enum cl_io_type iot, loff_t *ppos, size_t count)
807 io = &ccc_env_info(env)->cti_io;
808 ll_io_init(io, file, iot == CIT_WRITE);
811 io->u.ci_rd.rd_is_sendfile = args->cia_is_sendfile;
813 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
814 struct vvp_io *vio = vvp_env_io(env);
815 struct ccc_io *cio = ccc_env_io(env);
816 if (cl_io_is_sendfile(io)) {
817 vio->u.read.cui_actor = args->cia_actor;
818 vio->u.read.cui_target = args->cia_target;
820 cio->cui_iov = args->cia_iov;
821 cio->cui_nrsegs = args->cia_nrsegs;
822 #ifndef HAVE_FILE_WRITEV
823 cio->cui_iocb = args->cia_iocb;
826 cio->cui_fd = LUSTRE_FPRIVATE(file);
827 result = cl_io_loop(env, io);
829 /* cl_io_rw_init() handled IO */
830 result = io->ci_result;
831 if (io->ci_nob > 0) {
833 *ppos = io->u.ci_wr.wr.crw_pos;
841 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
843 static int ll_file_get_iov_count(const struct iovec *iov,
844 unsigned long *nr_segs, size_t *count)
849 for (seg = 0; seg < *nr_segs; seg++) {
850 const struct iovec *iv = &iov[seg];
853 * If any segment has a negative length, or the cumulative
854 * length ever wraps negative then return -EINVAL.
857 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
859 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
864 cnt -= iv->iov_len; /* This segment is no good */
871 #ifdef HAVE_FILE_READV
872 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
873 unsigned long nr_segs, loff_t *ppos)
876 struct ccc_io_args *args;
882 result = ll_file_get_iov_count(iov, &nr_segs, &count);
886 env = cl_env_get(&refcheck);
888 RETURN(PTR_ERR(env));
890 args = &vvp_env_info(env)->vti_args;
891 args->cia_is_sendfile = 0;
892 args->cia_iov = (struct iovec *)iov;
893 args->cia_nrsegs = nr_segs;
894 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
895 cl_env_put(env, &refcheck);
899 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
903 struct iovec *local_iov;
908 env = cl_env_get(&refcheck);
910 RETURN(PTR_ERR(env));
912 local_iov = &vvp_env_info(env)->vti_local_iov;
913 local_iov->iov_base = (void __user *)buf;
914 local_iov->iov_len = count;
915 result = ll_file_readv(file, local_iov, 1, ppos);
916 cl_env_put(env, &refcheck);
921 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
922 unsigned long nr_segs, loff_t pos)
925 struct ccc_io_args *args;
931 result = ll_file_get_iov_count(iov, &nr_segs, &count);
935 env = cl_env_get(&refcheck);
937 RETURN(PTR_ERR(env));
939 args = &vvp_env_info(env)->vti_args;
940 args->cia_is_sendfile = 0;
941 args->cia_iov = (struct iovec *)iov;
942 args->cia_nrsegs = nr_segs;
943 args->cia_iocb = iocb;
944 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
945 &iocb->ki_pos, count);
946 cl_env_put(env, &refcheck);
950 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
954 struct iovec *local_iov;
960 env = cl_env_get(&refcheck);
962 RETURN(PTR_ERR(env));
964 local_iov = &vvp_env_info(env)->vti_local_iov;
965 kiocb = &vvp_env_info(env)->vti_kiocb;
966 local_iov->iov_base = (void __user *)buf;
967 local_iov->iov_len = count;
968 init_sync_kiocb(kiocb, file);
969 kiocb->ki_pos = *ppos;
970 kiocb->ki_left = count;
972 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
973 *ppos = kiocb->ki_pos;
975 cl_env_put(env, &refcheck);
981 * Write to a file (through the page cache).
983 #ifdef HAVE_FILE_WRITEV
984 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
985 unsigned long nr_segs, loff_t *ppos)
988 struct ccc_io_args *args;
994 result = ll_file_get_iov_count(iov, &nr_segs, &count);
998 env = cl_env_get(&refcheck);
1000 RETURN(PTR_ERR(env));
1002 args = &vvp_env_info(env)->vti_args;
1003 args->cia_iov = (struct iovec *)iov;
1004 args->cia_nrsegs = nr_segs;
1005 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1006 cl_env_put(env, &refcheck);
1010 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1014 struct iovec *local_iov;
1019 env = cl_env_get(&refcheck);
1021 RETURN(PTR_ERR(env));
1023 local_iov = &vvp_env_info(env)->vti_local_iov;
1024 local_iov->iov_base = (void __user *)buf;
1025 local_iov->iov_len = count;
1027 result = ll_file_writev(file, local_iov, 1, ppos);
1028 cl_env_put(env, &refcheck);
1032 #else /* AIO stuff */
1033 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1034 unsigned long nr_segs, loff_t pos)
1037 struct ccc_io_args *args;
1043 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1047 env = cl_env_get(&refcheck);
1049 RETURN(PTR_ERR(env));
1051 args = &vvp_env_info(env)->vti_args;
1052 args->cia_iov = (struct iovec *)iov;
1053 args->cia_nrsegs = nr_segs;
1054 args->cia_iocb = iocb;
1055 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1056 &iocb->ki_pos, count);
1057 cl_env_put(env, &refcheck);
1061 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1065 struct iovec *local_iov;
1066 struct kiocb *kiocb;
1071 env = cl_env_get(&refcheck);
1073 RETURN(PTR_ERR(env));
1075 local_iov = &vvp_env_info(env)->vti_local_iov;
1076 kiocb = &vvp_env_info(env)->vti_kiocb;
1077 local_iov->iov_base = (void __user *)buf;
1078 local_iov->iov_len = count;
1079 init_sync_kiocb(kiocb, file);
1080 kiocb->ki_pos = *ppos;
1081 kiocb->ki_left = count;
1083 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1084 *ppos = kiocb->ki_pos;
1086 cl_env_put(env, &refcheck);
1093 * Send file content (through pagecache) somewhere with helper
1095 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1096 read_actor_t actor, void *target)
1099 struct ccc_io_args *args;
1104 env = cl_env_get(&refcheck);
1106 RETURN(PTR_ERR(env));
1108 args = &vvp_env_info(env)->vti_args;
1109 args->cia_is_sendfile = 1;
1110 args->cia_target = target;
1111 args->cia_actor = actor;
1112 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1113 cl_env_put(env, &refcheck);
1117 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1120 struct obd_export *exp = ll_i2dtexp(inode);
1121 struct ll_recreate_obj ucreatp;
1122 struct obd_trans_info oti = { 0 };
1123 struct obdo *oa = NULL;
1126 struct lov_stripe_md *lsm, *lsm2;
1129 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1132 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1133 sizeof(struct ll_recreate_obj)))
1140 ll_inode_size_lock(inode, 0);
1141 lsm = ll_i2info(inode)->lli_smd;
1143 GOTO(out, rc = -ENOENT);
1144 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1145 (lsm->lsm_stripe_count));
1147 OBD_ALLOC(lsm2, lsm_size);
1149 GOTO(out, rc = -ENOMEM);
1151 oa->o_id = ucreatp.lrc_id;
1152 oa->o_gr = ucreatp.lrc_group;
1153 oa->o_nlink = ucreatp.lrc_ost_idx;
1154 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1155 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1156 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1157 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1159 memcpy(lsm2, lsm, lsm_size);
1160 rc = obd_create(exp, oa, &lsm2, &oti);
1162 OBD_FREE(lsm2, lsm_size);
1165 ll_inode_size_unlock(inode, 0);
1170 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1171 int flags, struct lov_user_md *lum, int lum_size)
1173 struct lov_stripe_md *lsm;
1174 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1178 ll_inode_size_lock(inode, 0);
1179 lsm = ll_i2info(inode)->lli_smd;
1181 ll_inode_size_unlock(inode, 0);
1182 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1187 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1190 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1191 GOTO(out_req_free, rc = -ENOENT);
1192 rc = oit.d.lustre.it_status;
1194 GOTO(out_req_free, rc);
1196 ll_release_openhandle(file->f_dentry, &oit);
1199 ll_inode_size_unlock(inode, 0);
1200 ll_intent_release(&oit);
1203 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1207 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1208 struct lov_mds_md **lmmp, int *lmm_size,
1209 struct ptlrpc_request **request)
1211 struct ll_sb_info *sbi = ll_i2sbi(inode);
1212 struct mdt_body *body;
1213 struct lov_mds_md *lmm = NULL;
1214 struct ptlrpc_request *req = NULL;
1215 struct obd_capa *oc;
1218 rc = ll_get_max_mdsize(sbi, &lmmsize);
1222 oc = ll_mdscapa_get(inode);
1223 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1224 oc, filename, strlen(filename) + 1,
1225 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1226 ll_i2suppgid(inode), &req);
1229 CDEBUG(D_INFO, "md_getattr_name failed "
1230 "on %s: rc %d\n", filename, rc);
1234 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1235 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1237 lmmsize = body->eadatasize;
1239 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1241 GOTO(out, rc = -ENODATA);
1244 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1245 LASSERT(lmm != NULL);
1247 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1248 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
1249 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
1250 GOTO(out, rc = -EPROTO);
1254 * This is coming from the MDS, so is probably in
1255 * little endian. We convert it to host endian before
1256 * passing it to userspace.
1258 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1259 /* if function called for directory - we should
1260 * avoid swab not existent lsm objects */
1261 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1262 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1263 if (S_ISREG(body->mode))
1264 lustre_swab_lov_user_md_objects(
1265 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1266 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1267 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1268 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1269 if (S_ISREG(body->mode))
1270 lustre_swab_lov_user_md_objects(
1271 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1272 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1273 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
1274 lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
1278 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
1279 struct lov_stripe_md *lsm;
1280 struct lov_user_md_join *lmj;
1281 int lmj_size, i, aindex = 0;
1283 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1285 GOTO(out, rc = -ENOMEM);
1286 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm);
1288 GOTO(out_free_memmd, rc);
1290 lmj_size = sizeof(struct lov_user_md_join) +
1291 lsm->lsm_stripe_count *
1292 sizeof(struct lov_user_ost_data_join);
1293 OBD_ALLOC(lmj, lmj_size);
1295 GOTO(out_free_memmd, rc = -ENOMEM);
1297 memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
1298 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1299 struct lov_extent *lex =
1300 &lsm->lsm_array->lai_ext_array[aindex];
1302 if (lex->le_loi_idx + lex->le_stripe_count <= i)
1304 CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
1305 LPU64" len %d\n", aindex, i,
1306 lex->le_start, (int)lex->le_len);
1307 lmj->lmm_objects[i].l_extent_start =
1310 if ((int)lex->le_len == -1)
1311 lmj->lmm_objects[i].l_extent_end = -1;
1313 lmj->lmm_objects[i].l_extent_end =
1314 lex->le_start + lex->le_len;
1315 lmj->lmm_objects[i].l_object_id =
1316 lsm->lsm_oinfo[i]->loi_id;
1317 lmj->lmm_objects[i].l_object_gr =
1318 lsm->lsm_oinfo[i]->loi_gr;
1319 lmj->lmm_objects[i].l_ost_gen =
1320 lsm->lsm_oinfo[i]->loi_ost_gen;
1321 lmj->lmm_objects[i].l_ost_idx =
1322 lsm->lsm_oinfo[i]->loi_ost_idx;
1324 lmm = (struct lov_mds_md *)lmj;
1327 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1331 *lmm_size = lmmsize;
1336 static int ll_lov_setea(struct inode *inode, struct file *file,
1339 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1340 struct lov_user_md *lump;
1341 int lum_size = sizeof(struct lov_user_md) +
1342 sizeof(struct lov_user_ost_data);
1346 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1349 OBD_ALLOC(lump, lum_size);
1353 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1354 OBD_FREE(lump, lum_size);
1358 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1360 OBD_FREE(lump, lum_size);
1364 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1367 struct lov_user_md_v3 lumv3;
1368 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1369 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1370 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1373 int flags = FMODE_WRITE;
1376 /* first try with v1 which is smaller than v3 */
1377 lum_size = sizeof(struct lov_user_md_v1);
1378 if (copy_from_user(lumv1, lumv1p, lum_size))
1381 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1382 lum_size = sizeof(struct lov_user_md_v3);
1383 if (copy_from_user(&lumv3, lumv3p, lum_size))
1387 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1389 put_user(0, &lumv1p->lmm_stripe_count);
1390 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1391 0, ll_i2info(inode)->lli_smd,
1397 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1399 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1404 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1408 static int ll_get_grouplock(struct inode *inode, struct file *file,
1415 static int ll_put_grouplock(struct inode *inode, struct file *file,
1422 #if LUSTRE_FIX >= 50
1423 static int join_sanity_check(struct inode *head, struct inode *tail)
1426 if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
1427 CERROR("server do not support join \n");
1430 if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
1431 CERROR("tail ino %lu and ino head %lu must be regular\n",
1432 head->i_ino, tail->i_ino);
1435 if (head->i_ino == tail->i_ino) {
1436 CERROR("file %lu can not be joined to itself \n", head->i_ino);
1439 if (i_size_read(head) % JOIN_FILE_ALIGN) {
1440 CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
1446 static int join_file(struct inode *head_inode, struct file *head_filp,
1447 struct file *tail_filp)
1449 struct dentry *tail_dentry = tail_filp->f_dentry;
1450 struct lookup_intent oit = {.it_op = IT_OPEN,
1451 .it_flags = head_filp->f_flags|O_JOIN_FILE};
1452 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW,
1453 ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1455 struct lustre_handle lockh;
1456 struct md_op_data *op_data;
1461 tail_dentry = tail_filp->f_dentry;
1463 data = i_size_read(head_inode);
1464 op_data = ll_prep_md_op_data(NULL, head_inode,
1465 tail_dentry->d_parent->d_inode,
1466 tail_dentry->d_name.name,
1467 tail_dentry->d_name.len, 0,
1468 LUSTRE_OPC_ANY, &data);
1469 if (IS_ERR(op_data))
1470 RETURN(PTR_ERR(op_data));
1472 rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit,
1473 op_data, &lockh, NULL, 0, NULL, 0);
1475 ll_finish_md_op_data(op_data);
1479 rc = oit.d.lustre.it_status;
1481 if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
1482 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
1483 ptlrpc_req_finished((struct ptlrpc_request *)
1484 oit.d.lustre.it_data);
1488 if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
1490 ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
1491 oit.d.lustre.it_lock_mode = 0;
1493 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1494 it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
1495 ll_release_openhandle(head_filp->f_dentry, &oit);
1497 ll_intent_release(&oit);
1501 static int ll_file_join(struct inode *head, struct file *filp,
1502 char *filename_tail)
1504 struct inode *tail = NULL, *first = NULL, *second = NULL;
1505 struct dentry *tail_dentry;
1506 struct file *tail_filp, *first_filp, *second_filp;
1507 struct ll_lock_tree first_tree, second_tree;
1508 struct ll_lock_tree_node *first_node, *second_node;
1509 struct ll_inode_info *hlli = ll_i2info(head), *tlli;
1510 int rc = 0, cleanup_phase = 0;
1513 CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
1514 head->i_ino, head->i_generation, head, filename_tail);
1516 tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
1517 if (IS_ERR(tail_filp)) {
1518 CERROR("Can not open tail file %s", filename_tail);
1519 rc = PTR_ERR(tail_filp);
1522 tail = igrab(tail_filp->f_dentry->d_inode);
1524 tlli = ll_i2info(tail);
1525 tail_dentry = tail_filp->f_dentry;
1526 LASSERT(tail_dentry);
1529 /*reorder the inode for lock sequence*/
1530 first = head->i_ino > tail->i_ino ? head : tail;
1531 second = head->i_ino > tail->i_ino ? tail : head;
1532 first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
1533 second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
1535 CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
1536 head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
1537 first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
1538 if (IS_ERR(first_node)){
1539 rc = PTR_ERR(first_node);
1542 first_tree.lt_fd = first_filp->private_data;
1543 rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
1548 second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
1549 if (IS_ERR(second_node)){
1550 rc = PTR_ERR(second_node);
1553 second_tree.lt_fd = second_filp->private_data;
1554 rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
1559 rc = join_sanity_check(head, tail);
1563 rc = join_file(head, filp, tail_filp);
1567 switch (cleanup_phase) {
1569 ll_tree_unlock(&second_tree);
1570 obd_cancel_unused(ll_i2dtexp(second),
1571 ll_i2info(second)->lli_smd, 0, NULL);
1573 ll_tree_unlock(&first_tree);
1574 obd_cancel_unused(ll_i2dtexp(first),
1575 ll_i2info(first)->lli_smd, 0, NULL);
1577 filp_close(tail_filp, 0);
1580 if (head && rc == 0) {
1581 obd_free_memmd(ll_i2sbi(head)->ll_dt_exp,
1583 hlli->lli_smd = NULL;
1588 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
1593 #endif /* LUSTRE_FIX >= 50 */
1596 * Close inode open handle
1598 * \param dentry [in] dentry which contains the inode
1599 * \param it [in,out] intent which contains open info and result
1602 * \retval <0 failure
1604 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1606 struct inode *inode = dentry->d_inode;
1607 struct obd_client_handle *och;
1613 /* Root ? Do nothing. */
1614 if (dentry->d_inode->i_sb->s_root == dentry)
1617 /* No open handle to close? Move away */
1618 if (!it_disposition(it, DISP_OPEN_OPEN))
1621 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1623 OBD_ALLOC(och, sizeof(*och));
1625 GOTO(out, rc = -ENOMEM);
1627 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1628 ll_i2info(inode), it, och);
1630 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1633 /* this one is in place of ll_file_open */
1634 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1635 ptlrpc_req_finished(it->d.lustre.it_data);
1636 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1641 * Get size for inode for which FIEMAP mapping is requested.
1642 * Make the FIEMAP get_info call and returns the result.
1644 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1647 struct obd_export *exp = ll_i2dtexp(inode);
1648 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1649 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1650 int vallen = num_bytes;
1654 /* If the stripe_count > 1 and the application does not understand
1655 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1657 if (lsm->lsm_stripe_count > 1 &&
1658 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1661 fm_key.oa.o_id = lsm->lsm_object_id;
1662 fm_key.oa.o_gr = lsm->lsm_object_gr;
1663 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1665 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1668 /* If filesize is 0, then there would be no objects for mapping */
1669 if (fm_key.oa.o_size == 0) {
1670 fiemap->fm_mapped_extents = 0;
1674 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1676 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1678 CERROR("obd_get_info failed: rc = %d\n", rc);
1683 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1686 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1690 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1691 inode->i_generation, inode, cmd);
1692 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1694 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1695 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1699 case LL_IOC_GETFLAGS:
1700 /* Get the current value of the file flags */
1701 return put_user(fd->fd_flags, (int *)arg);
1702 case LL_IOC_SETFLAGS:
1703 case LL_IOC_CLRFLAGS:
1704 /* Set or clear specific file flags */
1705 /* XXX This probably needs checks to ensure the flags are
1706 * not abused, and to handle any flag side effects.
1708 if (get_user(flags, (int *) arg))
1711 if (cmd == LL_IOC_SETFLAGS) {
1712 if ((flags & LL_FILE_IGNORE_LOCK) &&
1713 !(file->f_flags & O_DIRECT)) {
1714 CERROR("%s: unable to disable locking on "
1715 "non-O_DIRECT file\n", current->comm);
1719 fd->fd_flags |= flags;
1721 fd->fd_flags &= ~flags;
1724 case LL_IOC_LOV_SETSTRIPE:
1725 RETURN(ll_lov_setstripe(inode, file, arg));
1726 case LL_IOC_LOV_SETEA:
1727 RETURN(ll_lov_setea(inode, file, arg));
1728 case LL_IOC_LOV_GETSTRIPE:
1729 RETURN(ll_lov_getstripe(inode, arg));
1730 case LL_IOC_RECREATE_OBJ:
1731 RETURN(ll_lov_recreate_obj(inode, file, arg));
1732 case EXT3_IOC_FIEMAP: {
1733 struct ll_user_fiemap *fiemap_s;
1734 size_t num_bytes, ret_bytes;
1735 unsigned int extent_count;
1738 /* Get the extent count so we can calculate the size of
1739 * required fiemap buffer */
1740 if (get_user(extent_count,
1741 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1743 num_bytes = sizeof(*fiemap_s) + (extent_count *
1744 sizeof(struct ll_fiemap_extent));
1745 OBD_VMALLOC(fiemap_s, num_bytes);
1746 if (fiemap_s == NULL)
1749 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1751 GOTO(error, rc = -EFAULT);
1753 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1754 fiemap_s->fm_flags = fiemap_s->fm_flags &
1755 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1756 if (copy_to_user((char *)arg, fiemap_s,
1758 GOTO(error, rc = -EFAULT);
1760 GOTO(error, rc = -EBADR);
1763 /* If fm_extent_count is non-zero, read the first extent since
1764 * it is used to calculate end_offset and device from previous
1767 if (copy_from_user(&fiemap_s->fm_extents[0],
1768 (char __user *)arg + sizeof(*fiemap_s),
1769 sizeof(struct ll_fiemap_extent)))
1770 GOTO(error, rc = -EFAULT);
1773 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1776 rc = filemap_fdatawrite(inode->i_mapping);
1781 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1785 ret_bytes = sizeof(struct ll_user_fiemap);
1787 if (extent_count != 0)
1788 ret_bytes += (fiemap_s->fm_mapped_extents *
1789 sizeof(struct ll_fiemap_extent));
1791 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1795 OBD_VFREE(fiemap_s, num_bytes);
1798 case EXT3_IOC_GETFLAGS:
1799 case EXT3_IOC_SETFLAGS:
1800 RETURN(ll_iocontrol(inode, file, cmd, arg));
1801 case EXT3_IOC_GETVERSION_OLD:
1802 case EXT3_IOC_GETVERSION:
1803 RETURN(put_user(inode->i_generation, (int *)arg));
1805 #if LUSTRE_FIX >= 50
1806 /* Allow file join in beta builds to allow debuggging */
1810 ftail = getname((const char *)arg);
1812 RETURN(PTR_ERR(ftail));
1813 rc = ll_file_join(inode, file, ftail);
1817 CWARN("file join is not supported in this version of Lustre\n");
1821 case LL_IOC_GROUP_LOCK:
1822 RETURN(ll_get_grouplock(inode, file, arg));
1823 case LL_IOC_GROUP_UNLOCK:
1824 RETURN(ll_put_grouplock(inode, file, arg));
1825 case IOC_OBD_STATFS:
1826 RETURN(ll_obd_statfs(inode, (void *)arg));
1828 /* We need to special case any other ioctls we want to handle,
1829 * to send them to the MDS/OST as appropriate and to properly
1830 * network encode the arg field.
1831 case EXT3_IOC_SETVERSION_OLD:
1832 case EXT3_IOC_SETVERSION:
1834 case LL_IOC_FLUSHCTX:
1835 RETURN(ll_flush_ctx(inode));
1836 case LL_IOC_PATH2FID: {
1837 if (copy_to_user((void *)arg, &ll_i2info(inode)->lli_fid,
1838 sizeof(struct lu_fid)))
1847 ll_iocontrol_call(inode, file, cmd, arg, &err))
1850 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1856 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1858 struct inode *inode = file->f_dentry->d_inode;
1861 retval = offset + ((origin == 2) ? i_size_read(inode) :
1862 (origin == 1) ? file->f_pos : 0);
1863 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1864 inode->i_ino, inode->i_generation, inode, retval, retval,
1865 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1866 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1868 if (origin == 2) { /* SEEK_END */
1869 int nonblock = 0, rc;
1871 if (file->f_flags & O_NONBLOCK)
1872 nonblock = LDLM_FL_BLOCK_NOWAIT;
1874 rc = cl_glimpse_size(inode);
1878 ll_inode_size_lock(inode, 0);
1879 offset += i_size_read(inode);
1880 ll_inode_size_unlock(inode, 0);
1881 } else if (origin == 1) { /* SEEK_CUR */
1882 offset += file->f_pos;
1886 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1887 if (offset != file->f_pos) {
1888 file->f_pos = offset;
1896 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1898 struct inode *inode = dentry->d_inode;
1899 struct ll_inode_info *lli = ll_i2info(inode);
1900 struct lov_stripe_md *lsm = lli->lli_smd;
1901 struct ptlrpc_request *req;
1902 struct obd_capa *oc;
1905 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1906 inode->i_generation, inode);
1907 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1909 /* fsync's caller has already called _fdata{sync,write}, we want
1910 * that IO to finish before calling the osc and mdc sync methods */
1911 rc = filemap_fdatawait(inode->i_mapping);
1913 /* catch async errors that were recorded back when async writeback
1914 * failed for pages in this mapping. */
1915 err = lli->lli_async_rc;
1916 lli->lli_async_rc = 0;
1920 err = lov_test_and_clear_async_rc(lsm);
1925 oc = ll_mdscapa_get(inode);
1926 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1932 ptlrpc_req_finished(req);
1939 RETURN(rc ? rc : -ENOMEM);
1941 oa->o_id = lsm->lsm_object_id;
1942 oa->o_gr = lsm->lsm_object_gr;
1943 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1944 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1945 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1948 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1949 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1950 0, OBD_OBJECT_EOF, oc);
1960 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1962 struct inode *inode = file->f_dentry->d_inode;
1963 struct ll_sb_info *sbi = ll_i2sbi(inode);
1964 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1965 .ei_cb_cp =ldlm_flock_completion_ast,
1966 .ei_cbdata = file_lock };
1967 struct md_op_data *op_data;
1968 struct lustre_handle lockh = {0};
1969 ldlm_policy_data_t flock;
1974 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1975 inode->i_ino, file_lock);
1977 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1979 if (file_lock->fl_flags & FL_FLOCK) {
1980 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1981 /* set missing params for flock() calls */
1982 file_lock->fl_end = OFFSET_MAX;
1983 file_lock->fl_pid = current->tgid;
1985 flock.l_flock.pid = file_lock->fl_pid;
1986 flock.l_flock.start = file_lock->fl_start;
1987 flock.l_flock.end = file_lock->fl_end;
1989 switch (file_lock->fl_type) {
1991 einfo.ei_mode = LCK_PR;
1994 /* An unlock request may or may not have any relation to
1995 * existing locks so we may not be able to pass a lock handle
1996 * via a normal ldlm_lock_cancel() request. The request may even
1997 * unlock a byte range in the middle of an existing lock. In
1998 * order to process an unlock request we need all of the same
1999 * information that is given with a normal read or write record
2000 * lock request. To avoid creating another ldlm unlock (cancel)
2001 * message we'll treat a LCK_NL flock request as an unlock. */
2002 einfo.ei_mode = LCK_NL;
2005 einfo.ei_mode = LCK_PW;
2008 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2023 flags = LDLM_FL_BLOCK_NOWAIT;
2029 flags = LDLM_FL_TEST_LOCK;
2030 /* Save the old mode so that if the mode in the lock changes we
2031 * can decrement the appropriate reader or writer refcount. */
2032 file_lock->fl_type = einfo.ei_mode;
2035 CERROR("unknown fcntl lock command: %d\n", cmd);
2039 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2040 LUSTRE_OPC_ANY, NULL);
2041 if (IS_ERR(op_data))
2042 RETURN(PTR_ERR(op_data));
2044 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2045 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2046 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2048 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2049 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2051 ll_finish_md_op_data(op_data);
2053 if ((file_lock->fl_flags & FL_FLOCK) &&
2054 (rc == 0 || file_lock->fl_type == F_UNLCK))
2055 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2056 #ifdef HAVE_F_OP_FLOCK
2057 if ((file_lock->fl_flags & FL_POSIX) &&
2058 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2059 !(flags & LDLM_FL_TEST_LOCK))
2060 posix_lock_file_wait(file, file_lock);
2066 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2073 int ll_have_md_lock(struct inode *inode, __u64 bits)
2075 struct lustre_handle lockh;
2076 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2084 fid = &ll_i2info(inode)->lli_fid;
2085 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2087 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2088 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2089 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2095 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2096 struct lustre_handle *lockh)
2098 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2104 fid = &ll_i2info(inode)->lli_fid;
2105 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2107 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2108 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2109 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2113 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2114 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2115 * and return success */
2117 /* This path cannot be hit for regular files unless in
2118 * case of obscure races, so no need to to validate
2120 if (!S_ISREG(inode->i_mode) &&
2121 !S_ISDIR(inode->i_mode))
2126 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2134 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2136 struct inode *inode = dentry->d_inode;
2137 struct ptlrpc_request *req = NULL;
2138 struct ll_sb_info *sbi;
2139 struct obd_export *exp;
2144 CERROR("REPORT THIS LINE TO PETER\n");
2147 sbi = ll_i2sbi(inode);
2149 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2150 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2152 exp = ll_i2mdexp(inode);
2154 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2155 struct lookup_intent oit = { .it_op = IT_GETATTR };
2156 struct md_op_data *op_data;
2158 /* Call getattr by fid, so do not provide name at all. */
2159 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2160 dentry->d_inode, NULL, 0, 0,
2161 LUSTRE_OPC_ANY, NULL);
2162 if (IS_ERR(op_data))
2163 RETURN(PTR_ERR(op_data));
2165 oit.it_flags |= O_CHECK_STALE;
2166 rc = md_intent_lock(exp, op_data, NULL, 0,
2167 /* we are not interested in name
2170 ll_md_blocking_ast, 0);
2171 ll_finish_md_op_data(op_data);
2172 oit.it_flags &= ~O_CHECK_STALE;
2174 rc = ll_inode_revalidate_fini(inode, rc);
2178 rc = ll_revalidate_it_finish(req, &oit, dentry);
2180 ll_intent_release(&oit);
2184 /* Unlinked? Unhash dentry, so it is not picked up later by
2185 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2186 here to preserve get_cwd functionality on 2.6.
2188 if (!dentry->d_inode->i_nlink) {
2189 spin_lock(&ll_lookup_lock);
2190 spin_lock(&dcache_lock);
2191 ll_drop_dentry(dentry);
2192 spin_unlock(&dcache_lock);
2193 spin_unlock(&ll_lookup_lock);
2196 ll_lookup_finish_locks(&oit, dentry);
2197 } else if (!ll_have_md_lock(dentry->d_inode, MDS_INODELOCK_UPDATE |
2198 MDS_INODELOCK_LOOKUP)) {
2199 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2200 obd_valid valid = OBD_MD_FLGETATTR;
2201 struct obd_capa *oc;
2204 if (S_ISREG(inode->i_mode)) {
2205 rc = ll_get_max_mdsize(sbi, &ealen);
2208 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2210 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2211 * capa for this inode. Because we only keep capas of dirs
2213 oc = ll_mdscapa_get(inode);
2214 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2218 rc = ll_inode_revalidate_fini(inode, rc);
2222 rc = ll_prep_inode(&inode, req, NULL);
2227 /* if object not yet allocated, don't validate size */
2228 if (ll_i2info(inode)->lli_smd == NULL)
2231 /* cl_glimpse_size will prefer locally cached writes if they extend
2233 rc = cl_glimpse_size(inode);
2236 ptlrpc_req_finished(req);
2240 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2241 struct lookup_intent *it, struct kstat *stat)
2243 struct inode *inode = de->d_inode;
2246 res = ll_inode_revalidate_it(de, it);
2247 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2252 stat->dev = inode->i_sb->s_dev;
2253 stat->ino = inode->i_ino;
2254 stat->mode = inode->i_mode;
2255 stat->nlink = inode->i_nlink;
2256 stat->uid = inode->i_uid;
2257 stat->gid = inode->i_gid;
2258 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2259 stat->atime = inode->i_atime;
2260 stat->mtime = inode->i_mtime;
2261 stat->ctime = inode->i_ctime;
2262 #ifdef HAVE_INODE_BLKSIZE
2263 stat->blksize = inode->i_blksize;
2265 stat->blksize = 1 << inode->i_blkbits;
2268 ll_inode_size_lock(inode, 0);
2269 stat->size = i_size_read(inode);
2270 stat->blocks = inode->i_blocks;
2271 ll_inode_size_unlock(inode, 0);
2275 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2277 struct lookup_intent it = { .it_op = IT_GETATTR };
2279 return ll_getattr_it(mnt, de, &it, stat);
2283 int lustre_check_acl(struct inode *inode, int mask)
2285 #ifdef CONFIG_FS_POSIX_ACL
2286 struct ll_inode_info *lli = ll_i2info(inode);
2287 struct posix_acl *acl;
2291 spin_lock(&lli->lli_lock);
2292 acl = posix_acl_dup(lli->lli_posix_acl);
2293 spin_unlock(&lli->lli_lock);
2298 rc = posix_acl_permission(inode, acl, mask);
2299 posix_acl_release(acl);
2307 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2308 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2310 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2311 inode->i_ino, inode->i_generation, inode, mask);
2312 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2313 return lustre_check_remote_perm(inode, mask);
2315 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2316 return generic_permission(inode, mask, lustre_check_acl);
2319 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2321 int mode = inode->i_mode;
2324 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2325 inode->i_ino, inode->i_generation, inode, mask);
2327 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2328 return lustre_check_remote_perm(inode, mask);
2330 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2332 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2333 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2335 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2337 if (current->fsuid == inode->i_uid) {
2340 if (((mode >> 3) & mask & S_IRWXO) != mask)
2342 rc = lustre_check_acl(inode, mask);
2346 goto check_capabilities;
2350 if (in_group_p(inode->i_gid))
2353 if ((mode & mask & S_IRWXO) == mask)
2357 if (!(mask & MAY_EXEC) ||
2358 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2359 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2362 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2363 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2370 #ifdef HAVE_FILE_READV
2371 #define READ_METHOD readv
2372 #define READ_FUNCTION ll_file_readv
2373 #define WRITE_METHOD writev
2374 #define WRITE_FUNCTION ll_file_writev
2376 #define READ_METHOD aio_read
2377 #define READ_FUNCTION ll_file_aio_read
2378 #define WRITE_METHOD aio_write
2379 #define WRITE_FUNCTION ll_file_aio_write
2382 /* -o localflock - only provides locally consistent flock locks */
2383 struct file_operations ll_file_operations = {
2384 .read = ll_file_read,
2385 .READ_METHOD = READ_FUNCTION,
2386 .write = ll_file_write,
2387 .WRITE_METHOD = WRITE_FUNCTION,
2388 .ioctl = ll_file_ioctl,
2389 .open = ll_file_open,
2390 .release = ll_file_release,
2391 .mmap = ll_file_mmap,
2392 .llseek = ll_file_seek,
2393 .sendfile = ll_file_sendfile,
2397 struct file_operations ll_file_operations_flock = {
2398 .read = ll_file_read,
2399 .READ_METHOD = READ_FUNCTION,
2400 .write = ll_file_write,
2401 .WRITE_METHOD = WRITE_FUNCTION,
2402 .ioctl = ll_file_ioctl,
2403 .open = ll_file_open,
2404 .release = ll_file_release,
2405 .mmap = ll_file_mmap,
2406 .llseek = ll_file_seek,
2407 .sendfile = ll_file_sendfile,
2409 #ifdef HAVE_F_OP_FLOCK
2410 .flock = ll_file_flock,
2412 .lock = ll_file_flock
2415 /* These are for -o noflock - to return ENOSYS on flock calls */
2416 struct file_operations ll_file_operations_noflock = {
2417 .read = ll_file_read,
2418 .READ_METHOD = READ_FUNCTION,
2419 .write = ll_file_write,
2420 .WRITE_METHOD = WRITE_FUNCTION,
2421 .ioctl = ll_file_ioctl,
2422 .open = ll_file_open,
2423 .release = ll_file_release,
2424 .mmap = ll_file_mmap,
2425 .llseek = ll_file_seek,
2426 .sendfile = ll_file_sendfile,
2428 #ifdef HAVE_F_OP_FLOCK
2429 .flock = ll_file_noflock,
2431 .lock = ll_file_noflock
2434 struct inode_operations ll_file_inode_operations = {
2435 #ifdef HAVE_VFS_INTENT_PATCHES
2436 .setattr_raw = ll_setattr_raw,
2438 .setattr = ll_setattr,
2439 .truncate = ll_truncate,
2440 .getattr = ll_getattr,
2441 .permission = ll_inode_permission,
2442 .setxattr = ll_setxattr,
2443 .getxattr = ll_getxattr,
2444 .listxattr = ll_listxattr,
2445 .removexattr = ll_removexattr,
2448 /* dynamic ioctl number support routins */
2449 static struct llioc_ctl_data {
2450 struct rw_semaphore ioc_sem;
2451 struct list_head ioc_head;
2453 __RWSEM_INITIALIZER(llioc.ioc_sem),
2454 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2459 struct list_head iocd_list;
2460 unsigned int iocd_size;
2461 llioc_callback_t iocd_cb;
2462 unsigned int iocd_count;
2463 unsigned int iocd_cmd[0];
2466 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2469 struct llioc_data *in_data = NULL;
2472 if (cb == NULL || cmd == NULL ||
2473 count > LLIOC_MAX_CMD || count < 0)
2476 size = sizeof(*in_data) + count * sizeof(unsigned int);
2477 OBD_ALLOC(in_data, size);
2478 if (in_data == NULL)
2481 memset(in_data, 0, sizeof(*in_data));
2482 in_data->iocd_size = size;
2483 in_data->iocd_cb = cb;
2484 in_data->iocd_count = count;
2485 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2487 down_write(&llioc.ioc_sem);
2488 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2489 up_write(&llioc.ioc_sem);
2494 void ll_iocontrol_unregister(void *magic)
2496 struct llioc_data *tmp;
2501 down_write(&llioc.ioc_sem);
2502 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2504 unsigned int size = tmp->iocd_size;
2506 list_del(&tmp->iocd_list);
2507 up_write(&llioc.ioc_sem);
2509 OBD_FREE(tmp, size);
2513 up_write(&llioc.ioc_sem);
2515 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2518 EXPORT_SYMBOL(ll_iocontrol_register);
2519 EXPORT_SYMBOL(ll_iocontrol_unregister);
2521 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2522 unsigned int cmd, unsigned long arg, int *rcp)
2524 enum llioc_iter ret = LLIOC_CONT;
2525 struct llioc_data *data;
2526 int rc = -EINVAL, i;
2528 down_read(&llioc.ioc_sem);
2529 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2530 for (i = 0; i < data->iocd_count; i++) {
2531 if (cmd != data->iocd_cmd[i])
2534 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2538 if (ret == LLIOC_STOP)
2541 up_read(&llioc.ioc_sem);