1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM) ||
96 !S_ISREG(inode->i_mode))
97 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
99 ll_epoch_close(inode, op_data, &och, 0);
102 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
106 static int ll_close_inode_openhandle(struct obd_export *md_exp,
108 struct obd_client_handle *och)
110 struct obd_export *exp = ll_i2mdexp(inode);
111 struct md_op_data *op_data;
112 struct ptlrpc_request *req = NULL;
113 struct obd_device *obd = class_exp2obd(exp);
120 * XXX: in case of LMV, is this correct to access
123 CERROR("Invalid MDC connection handle "LPX64"\n",
124 ll_i2mdexp(inode)->exp_handle.h_cookie);
129 * here we check if this is forced umount. If so this is called on
130 * canceling "open lock" and we do not call md_close() in this case, as
131 * it will not be successful, as import is already deactivated.
136 OBD_ALLOC_PTR(op_data);
138 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
140 ll_prepare_close(inode, op_data, och);
141 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
142 rc = md_close(md_exp, op_data, och->och_mod, &req);
147 /* This close must have the epoch closed. */
148 LASSERT(exp->exp_connect_flags & OBD_CONNECT_SOM);
149 LASSERT(epoch_close);
150 /* MDS has instructed us to obtain Size-on-MDS attribute from
151 * OSTs and send setattr to back to MDS. */
152 rc = ll_sizeonmds_update(inode, och->och_mod,
153 &och->och_fh, op_data->op_ioepoch);
155 CERROR("inode %lu mdc Size-on-MDS update failed: "
156 "rc = %d\n", inode->i_ino, rc);
160 CERROR("inode %lu mdc close failed: rc = %d\n",
163 ll_finish_md_op_data(op_data);
166 rc = ll_objects_destroy(req, inode);
168 CERROR("inode %lu ll_objects destroy: rc = %d\n",
175 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
176 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
177 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
180 ptlrpc_close_replay_seq(req);
181 md_clear_open_replay_data(md_exp, och);
182 /* Free @och if it is not waiting for DONE_WRITING. */
183 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
186 if (req) /* This is close request */
187 ptlrpc_req_finished(req);
191 int ll_md_real_close(struct inode *inode, int flags)
193 struct ll_inode_info *lli = ll_i2info(inode);
194 struct obd_client_handle **och_p;
195 struct obd_client_handle *och;
200 if (flags & FMODE_WRITE) {
201 och_p = &lli->lli_mds_write_och;
202 och_usecount = &lli->lli_open_fd_write_count;
203 } else if (flags & FMODE_EXEC) {
204 och_p = &lli->lli_mds_exec_och;
205 och_usecount = &lli->lli_open_fd_exec_count;
207 LASSERT(flags & FMODE_READ);
208 och_p = &lli->lli_mds_read_och;
209 och_usecount = &lli->lli_open_fd_read_count;
212 down(&lli->lli_och_sem);
213 if (*och_usecount) { /* There are still users of this handle, so
215 up(&lli->lli_och_sem);
220 up(&lli->lli_och_sem);
222 if (och) { /* There might be a race and somebody have freed this och
224 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
231 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
234 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
235 struct ll_inode_info *lli = ll_i2info(inode);
239 /* clear group lock, if present */
240 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
242 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
243 fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
244 rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
249 /* Let's see if we have good enough OPEN lock on the file and if
250 we can skip talking to MDS */
251 if (file->f_dentry->d_inode) { /* Can this ever be false? */
253 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
254 struct lustre_handle lockh;
255 struct inode *inode = file->f_dentry->d_inode;
256 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
258 down(&lli->lli_och_sem);
259 if (fd->fd_omode & FMODE_WRITE) {
261 LASSERT(lli->lli_open_fd_write_count);
262 lli->lli_open_fd_write_count--;
263 } else if (fd->fd_omode & FMODE_EXEC) {
265 LASSERT(lli->lli_open_fd_exec_count);
266 lli->lli_open_fd_exec_count--;
269 LASSERT(lli->lli_open_fd_read_count);
270 lli->lli_open_fd_read_count--;
272 up(&lli->lli_och_sem);
274 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
275 LDLM_IBITS, &policy, lockmode,
277 rc = ll_md_real_close(file->f_dentry->d_inode,
281 CERROR("Releasing a file %p with negative dentry %p. Name %s",
282 file, file->f_dentry, file->f_dentry->d_name.name);
285 LUSTRE_FPRIVATE(file) = NULL;
286 ll_file_data_put(fd);
287 ll_capa_close(inode);
292 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
294 /* While this returns an error code, fput() the caller does not, so we need
295 * to make every effort to clean up all of our state here. Also, applications
296 * rarely check close errors and even if an error is returned they will not
297 * re-try the close call.
299 int ll_file_release(struct inode *inode, struct file *file)
301 struct ll_file_data *fd;
302 struct ll_sb_info *sbi = ll_i2sbi(inode);
303 struct ll_inode_info *lli = ll_i2info(inode);
304 struct lov_stripe_md *lsm = lli->lli_smd;
308 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
309 inode->i_generation, inode);
311 #ifdef CONFIG_FS_POSIX_ACL
312 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
313 inode == inode->i_sb->s_root->d_inode) {
314 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
317 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
318 fd->fd_flags &= ~LL_FILE_RMTACL;
319 rct_del(&sbi->ll_rct, cfs_curproc_pid());
320 et_search_free(&sbi->ll_et, cfs_curproc_pid());
325 if (inode->i_sb->s_root != file->f_dentry)
326 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
327 fd = LUSTRE_FPRIVATE(file);
330 /* The last ref on @file, maybe not the the owner pid of statahead.
331 * Different processes can open the same dir, "ll_opendir_key" means:
332 * it is me that should stop the statahead thread. */
333 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
334 ll_stop_statahead(inode, lli->lli_opendir_key);
336 if (inode->i_sb->s_root == file->f_dentry) {
337 LUSTRE_FPRIVATE(file) = NULL;
338 ll_file_data_put(fd);
343 lov_test_and_clear_async_rc(lsm);
344 lli->lli_async_rc = 0;
346 rc = ll_md_close(sbi->ll_md_exp, inode, file);
350 static int ll_intent_file_open(struct file *file, void *lmm,
351 int lmmsize, struct lookup_intent *itp)
353 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
354 struct dentry *parent = file->f_dentry->d_parent;
355 const char *name = file->f_dentry->d_name.name;
356 const int len = file->f_dentry->d_name.len;
357 struct md_op_data *op_data;
358 struct ptlrpc_request *req;
365 /* Usually we come here only for NFSD, and we want open lock.
366 But we can also get here with pre 2.6.15 patchless kernels, and in
367 that case that lock is also ok */
368 /* We can also get here if there was cached open handle in revalidate_it
369 * but it disappeared while we were getting from there to ll_file_open.
370 * But this means this file was closed and immediatelly opened which
371 * makes a good candidate for using OPEN lock */
372 /* If lmmsize & lmm are not 0, we are just setting stripe info
373 * parameters. No need for the open lock */
374 if (!lmm && !lmmsize)
375 itp->it_flags |= MDS_OPEN_LOCK;
377 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
378 file->f_dentry->d_inode, name, len,
379 O_RDWR, LUSTRE_OPC_ANY, NULL);
381 RETURN(PTR_ERR(op_data));
383 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
384 0 /*unused */, &req, ll_md_blocking_ast, 0);
385 ll_finish_md_op_data(op_data);
387 /* reason for keep own exit path - don`t flood log
388 * with messages with -ESTALE errors.
390 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
391 it_open_error(DISP_OPEN_OPEN, itp))
393 ll_release_openhandle(file->f_dentry, itp);
397 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
398 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
399 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
403 if (itp->d.lustre.it_lock_mode)
404 md_set_lock_data(sbi->ll_md_exp,
405 &itp->d.lustre.it_lock_handle,
406 file->f_dentry->d_inode);
408 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
410 ptlrpc_req_finished(itp->d.lustre.it_data);
411 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
412 ll_intent_drop_lock(itp);
417 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
418 struct lookup_intent *it, struct obd_client_handle *och)
420 struct ptlrpc_request *req = it->d.lustre.it_data;
421 struct mdt_body *body;
425 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
426 LASSERT(body != NULL); /* reply already checked out */
428 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
429 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
430 och->och_fid = lli->lli_fid;
431 och->och_flags = it->it_flags;
432 lli->lli_ioepoch = body->ioepoch;
434 return md_set_open_replay_data(md_exp, och, req);
437 int ll_local_open(struct file *file, struct lookup_intent *it,
438 struct ll_file_data *fd, struct obd_client_handle *och)
440 struct inode *inode = file->f_dentry->d_inode;
441 struct ll_inode_info *lli = ll_i2info(inode);
444 LASSERT(!LUSTRE_FPRIVATE(file));
449 struct ptlrpc_request *req = it->d.lustre.it_data;
450 struct mdt_body *body;
453 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
457 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
458 if ((it->it_flags & FMODE_WRITE) &&
459 (body->valid & OBD_MD_FLSIZE))
460 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
461 lli->lli_ioepoch, PFID(&lli->lli_fid));
464 LUSTRE_FPRIVATE(file) = fd;
465 ll_readahead_init(inode, &fd->fd_ras);
466 fd->fd_omode = it->it_flags;
470 /* Open a file, and (for the very first open) create objects on the OSTs at
471 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
472 * creation or open until ll_lov_setstripe() ioctl is called. We grab
473 * lli_open_sem to ensure no other process will create objects, send the
474 * stripe MD to the MDS, or try to destroy the objects if that fails.
476 * If we already have the stripe MD locally then we don't request it in
477 * md_open(), by passing a lmm_size = 0.
479 * It is up to the application to ensure no other processes open this file
480 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
481 * used. We might be able to avoid races of that sort by getting lli_open_sem
482 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
483 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
485 int ll_file_open(struct inode *inode, struct file *file)
487 struct ll_inode_info *lli = ll_i2info(inode);
488 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
489 .it_flags = file->f_flags };
490 struct lov_stripe_md *lsm;
491 struct ptlrpc_request *req = NULL;
492 struct obd_client_handle **och_p;
494 struct ll_file_data *fd;
495 int rc = 0, opendir_set = 0;
498 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
499 inode->i_generation, inode, file->f_flags);
501 #ifdef HAVE_VFS_INTENT_PATCHES
504 it = file->private_data; /* XXX: compat macro */
505 file->private_data = NULL; /* prevent ll_local_open assertion */
508 fd = ll_file_data_get();
513 if (S_ISDIR(inode->i_mode)) {
515 spin_lock(&lli->lli_lock);
516 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
517 LASSERT(lli->lli_sai == NULL);
518 lli->lli_opendir_key = fd;
519 lli->lli_opendir_pid = cfs_curproc_pid();
521 } else if (unlikely(lli->lli_opendir_pid == cfs_curproc_pid() &&
522 lli->lli_opendir_key != NULL)) {
523 /* Two cases for this:
524 * (1) The same process open such directory many times.
525 * (2) The old process opened the directory, and exited
526 * before its children processes. Then new process
527 * with the same pid opens such directory before the
528 * old process's children processes exit.
529 * reset stat ahead for such cases. */
530 spin_unlock(&lli->lli_lock);
531 CDEBUG(D_INFO, "Conflict statahead for %.*s "DFID
532 " reset it.\n", file->f_dentry->d_name.len,
533 file->f_dentry->d_name.name,
534 PFID(&lli->lli_fid));
535 ll_stop_statahead(inode, lli->lli_opendir_key);
538 spin_unlock(&lli->lli_lock);
541 if (inode->i_sb->s_root == file->f_dentry) {
542 LUSTRE_FPRIVATE(file) = fd;
546 if (!it || !it->d.lustre.it_disposition) {
547 /* Convert f_flags into access mode. We cannot use file->f_mode,
548 * because everything but O_ACCMODE mask was stripped from
550 if ((oit.it_flags + 1) & O_ACCMODE)
552 if (file->f_flags & O_TRUNC)
553 oit.it_flags |= FMODE_WRITE;
555 /* kernel only call f_op->open in dentry_open. filp_open calls
556 * dentry_open after call to open_namei that checks permissions.
557 * Only nfsd_open call dentry_open directly without checking
558 * permissions and because of that this code below is safe. */
559 if (oit.it_flags & FMODE_WRITE)
560 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
562 /* We do not want O_EXCL here, presumably we opened the file
563 * already? XXX - NFS implications? */
564 oit.it_flags &= ~O_EXCL;
570 /* Let's see if we have file open on MDS already. */
571 if (it->it_flags & FMODE_WRITE) {
572 och_p = &lli->lli_mds_write_och;
573 och_usecount = &lli->lli_open_fd_write_count;
574 } else if (it->it_flags & FMODE_EXEC) {
575 och_p = &lli->lli_mds_exec_och;
576 och_usecount = &lli->lli_open_fd_exec_count;
578 och_p = &lli->lli_mds_read_och;
579 och_usecount = &lli->lli_open_fd_read_count;
582 down(&lli->lli_och_sem);
583 if (*och_p) { /* Open handle is present */
584 if (it_disposition(it, DISP_OPEN_OPEN)) {
585 /* Well, there's extra open request that we do not need,
586 let's close it somehow. This will decref request. */
587 rc = it_open_error(DISP_OPEN_OPEN, it);
589 up(&lli->lli_och_sem);
590 ll_file_data_put(fd);
591 GOTO(out_openerr, rc);
593 ll_release_openhandle(file->f_dentry, it);
594 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
599 rc = ll_local_open(file, it, fd, NULL);
602 up(&lli->lli_och_sem);
603 ll_file_data_put(fd);
604 GOTO(out_openerr, rc);
607 LASSERT(*och_usecount == 0);
608 if (!it->d.lustre.it_disposition) {
609 /* We cannot just request lock handle now, new ELC code
610 means that one of other OPEN locks for this file
611 could be cancelled, and since blocking ast handler
612 would attempt to grab och_sem as well, that would
613 result in a deadlock */
614 up(&lli->lli_och_sem);
615 it->it_flags |= O_CHECK_STALE;
616 rc = ll_intent_file_open(file, NULL, 0, it);
617 it->it_flags &= ~O_CHECK_STALE;
619 ll_file_data_put(fd);
620 GOTO(out_openerr, rc);
623 /* Got some error? Release the request */
624 if (it->d.lustre.it_status < 0) {
625 req = it->d.lustre.it_data;
626 ptlrpc_req_finished(req);
628 md_set_lock_data(ll_i2sbi(inode)->ll_md_exp,
629 &it->d.lustre.it_lock_handle,
630 file->f_dentry->d_inode);
633 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
635 ll_file_data_put(fd);
636 GOTO(out_och_free, rc = -ENOMEM);
639 req = it->d.lustre.it_data;
641 /* md_intent_lock() didn't get a request ref if there was an
642 * open error, so don't do cleanup on the request here
644 /* XXX (green): Should not we bail out on any error here, not
645 * just open error? */
646 rc = it_open_error(DISP_OPEN_OPEN, it);
648 ll_file_data_put(fd);
649 GOTO(out_och_free, rc);
652 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
653 rc = ll_local_open(file, it, fd, *och_p);
655 ll_file_data_put(fd);
656 GOTO(out_och_free, rc);
659 up(&lli->lli_och_sem);
661 /* Must do this outside lli_och_sem lock to prevent deadlock where
662 different kind of OPEN lock for this same inode gets cancelled
663 by ldlm_cancel_lru */
664 if (!S_ISREG(inode->i_mode))
671 if (file->f_flags & O_LOV_DELAY_CREATE ||
672 !(file->f_mode & FMODE_WRITE)) {
673 CDEBUG(D_INODE, "object creation was delayed\n");
677 file->f_flags &= ~O_LOV_DELAY_CREATE;
680 ptlrpc_req_finished(req);
682 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
686 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
687 *och_p = NULL; /* OBD_FREE writes some magic there */
690 up(&lli->lli_och_sem);
692 if (opendir_set != 0)
693 ll_stop_statahead(inode, lli->lli_opendir_key);
699 /* Fills the obdo with the attributes for the lsm */
700 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
701 struct obd_capa *capa, struct obdo *obdo)
703 struct ptlrpc_request_set *set;
704 struct obd_info oinfo = { { { 0 } } };
709 LASSERT(lsm != NULL);
713 oinfo.oi_oa->o_id = lsm->lsm_object_id;
714 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
715 oinfo.oi_oa->o_mode = S_IFREG;
716 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
717 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
718 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
719 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
721 oinfo.oi_capa = capa;
723 set = ptlrpc_prep_set();
725 CERROR("can't allocate ptlrpc set\n");
728 rc = obd_getattr_async(exp, &oinfo, set);
730 rc = ptlrpc_set_wait(set);
731 ptlrpc_set_destroy(set);
734 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
735 OBD_MD_FLATIME | OBD_MD_FLMTIME |
736 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
740 /* Fills the obdo with the attributes for the inode defined by lsm */
741 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
743 struct ll_inode_info *lli = ll_i2info(inode);
744 struct obd_capa *capa = ll_mdscapa_get(inode);
748 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
751 obdo_refresh_inode(inode, obdo, obdo->o_valid);
753 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
754 lli->lli_smd->lsm_object_id, i_size_read(inode),
755 (unsigned long long)inode->i_blocks,
756 (unsigned long)ll_inode_blksize(inode));
761 int ll_merge_lvb(struct inode *inode)
763 struct ll_inode_info *lli = ll_i2info(inode);
764 struct ll_sb_info *sbi = ll_i2sbi(inode);
770 ll_inode_size_lock(inode, 1);
771 inode_init_lvb(inode, &lvb);
772 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
773 i_size_write(inode, lvb.lvb_size);
774 inode->i_blocks = lvb.lvb_blocks;
776 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
777 LTIME_S(inode->i_atime) = lvb.lvb_atime;
778 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
779 ll_inode_size_unlock(inode, 1);
784 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
787 struct obdo obdo = { 0 };
790 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
792 st->st_size = obdo.o_size;
793 st->st_blocks = obdo.o_blocks;
794 st->st_mtime = obdo.o_mtime;
795 st->st_atime = obdo.o_atime;
796 st->st_ctime = obdo.o_ctime;
801 void ll_io_init(struct cl_io *io, const struct file *file, int write)
803 struct inode *inode = file->f_dentry->d_inode;
804 struct ll_sb_info *sbi = ll_i2sbi(inode);
805 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
808 memset(io, 0, sizeof *io);
809 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
811 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
812 io->ci_obj = ll_i2info(inode)->lli_clob;
813 io->ci_lockreq = CILR_MAYBE;
814 if (fd->fd_flags & LL_FILE_IGNORE_LOCK || sbi->ll_flags & LL_SBI_NOLCK)
815 io->ci_lockreq = CILR_NEVER;
816 else if (file->f_flags & O_APPEND)
817 io->ci_lockreq = CILR_MANDATORY;
820 static ssize_t ll_file_io_generic(const struct lu_env *env,
821 struct ccc_io_args *args, struct file *file,
822 enum cl_io_type iot, loff_t *ppos, size_t count)
828 io = &ccc_env_info(env)->cti_io;
829 ll_io_init(io, file, iot == CIT_WRITE);
832 io->u.ci_rd.rd_is_sendfile = args->cia_is_sendfile;
834 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
835 struct vvp_io *vio = vvp_env_io(env);
836 struct ccc_io *cio = ccc_env_io(env);
837 if (cl_io_is_sendfile(io)) {
838 vio->u.read.cui_actor = args->cia_actor;
839 vio->u.read.cui_target = args->cia_target;
841 cio->cui_iov = args->cia_iov;
842 cio->cui_nrsegs = args->cia_nrsegs;
843 #ifndef HAVE_FILE_WRITEV
844 cio->cui_iocb = args->cia_iocb;
847 cio->cui_fd = LUSTRE_FPRIVATE(file);
848 result = cl_io_loop(env, io);
850 /* cl_io_rw_init() handled IO */
851 result = io->ci_result;
852 if (io->ci_nob > 0) {
854 *ppos = io->u.ci_wr.wr.crw_pos;
862 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
864 static int ll_file_get_iov_count(const struct iovec *iov,
865 unsigned long *nr_segs, size_t *count)
870 for (seg = 0; seg < *nr_segs; seg++) {
871 const struct iovec *iv = &iov[seg];
874 * If any segment has a negative length, or the cumulative
875 * length ever wraps negative then return -EINVAL.
878 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
880 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
885 cnt -= iv->iov_len; /* This segment is no good */
892 #ifdef HAVE_FILE_READV
893 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
894 unsigned long nr_segs, loff_t *ppos)
897 struct ccc_io_args *args;
903 result = ll_file_get_iov_count(iov, &nr_segs, &count);
907 env = cl_env_get(&refcheck);
909 RETURN(PTR_ERR(env));
911 args = &vvp_env_info(env)->vti_args;
912 args->cia_is_sendfile = 0;
913 args->cia_iov = (struct iovec *)iov;
914 args->cia_nrsegs = nr_segs;
915 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
916 cl_env_put(env, &refcheck);
920 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
924 struct iovec *local_iov;
929 env = cl_env_get(&refcheck);
931 RETURN(PTR_ERR(env));
933 local_iov = &vvp_env_info(env)->vti_local_iov;
934 local_iov->iov_base = (void __user *)buf;
935 local_iov->iov_len = count;
936 result = ll_file_readv(file, local_iov, 1, ppos);
937 cl_env_put(env, &refcheck);
942 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
943 unsigned long nr_segs, loff_t pos)
946 struct ccc_io_args *args;
952 result = ll_file_get_iov_count(iov, &nr_segs, &count);
956 env = cl_env_get(&refcheck);
958 RETURN(PTR_ERR(env));
960 args = &vvp_env_info(env)->vti_args;
961 args->cia_is_sendfile = 0;
962 args->cia_iov = (struct iovec *)iov;
963 args->cia_nrsegs = nr_segs;
964 args->cia_iocb = iocb;
965 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
966 &iocb->ki_pos, count);
967 cl_env_put(env, &refcheck);
971 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
975 struct iovec *local_iov;
981 env = cl_env_get(&refcheck);
983 RETURN(PTR_ERR(env));
985 local_iov = &vvp_env_info(env)->vti_local_iov;
986 kiocb = &vvp_env_info(env)->vti_kiocb;
987 local_iov->iov_base = (void __user *)buf;
988 local_iov->iov_len = count;
989 init_sync_kiocb(kiocb, file);
990 kiocb->ki_pos = *ppos;
991 kiocb->ki_left = count;
993 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
994 *ppos = kiocb->ki_pos;
996 cl_env_put(env, &refcheck);
1002 * Write to a file (through the page cache).
1004 #ifdef HAVE_FILE_WRITEV
1005 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1006 unsigned long nr_segs, loff_t *ppos)
1009 struct ccc_io_args *args;
1015 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1019 env = cl_env_get(&refcheck);
1021 RETURN(PTR_ERR(env));
1023 args = &vvp_env_info(env)->vti_args;
1024 args->cia_iov = (struct iovec *)iov;
1025 args->cia_nrsegs = nr_segs;
1026 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1027 cl_env_put(env, &refcheck);
1031 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1035 struct iovec *local_iov;
1040 env = cl_env_get(&refcheck);
1042 RETURN(PTR_ERR(env));
1044 local_iov = &vvp_env_info(env)->vti_local_iov;
1045 local_iov->iov_base = (void __user *)buf;
1046 local_iov->iov_len = count;
1048 result = ll_file_writev(file, local_iov, 1, ppos);
1049 cl_env_put(env, &refcheck);
1053 #else /* AIO stuff */
1054 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1055 unsigned long nr_segs, loff_t pos)
1058 struct ccc_io_args *args;
1064 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1068 env = cl_env_get(&refcheck);
1070 RETURN(PTR_ERR(env));
1072 args = &vvp_env_info(env)->vti_args;
1073 args->cia_iov = (struct iovec *)iov;
1074 args->cia_nrsegs = nr_segs;
1075 args->cia_iocb = iocb;
1076 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1077 &iocb->ki_pos, count);
1078 cl_env_put(env, &refcheck);
1082 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1086 struct iovec *local_iov;
1087 struct kiocb *kiocb;
1092 env = cl_env_get(&refcheck);
1094 RETURN(PTR_ERR(env));
1096 local_iov = &vvp_env_info(env)->vti_local_iov;
1097 kiocb = &vvp_env_info(env)->vti_kiocb;
1098 local_iov->iov_base = (void __user *)buf;
1099 local_iov->iov_len = count;
1100 init_sync_kiocb(kiocb, file);
1101 kiocb->ki_pos = *ppos;
1102 kiocb->ki_left = count;
1104 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1105 *ppos = kiocb->ki_pos;
1107 cl_env_put(env, &refcheck);
1114 * Send file content (through pagecache) somewhere with helper
1116 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1117 read_actor_t actor, void *target)
1120 struct ccc_io_args *args;
1125 env = cl_env_get(&refcheck);
1127 RETURN(PTR_ERR(env));
1129 args = &vvp_env_info(env)->vti_args;
1130 args->cia_is_sendfile = 1;
1131 args->cia_target = target;
1132 args->cia_actor = actor;
1133 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1134 cl_env_put(env, &refcheck);
1138 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1141 struct obd_export *exp = ll_i2dtexp(inode);
1142 struct ll_recreate_obj ucreatp;
1143 struct obd_trans_info oti = { 0 };
1144 struct obdo *oa = NULL;
1147 struct lov_stripe_md *lsm, *lsm2;
1150 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1153 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1154 sizeof(struct ll_recreate_obj)))
1161 ll_inode_size_lock(inode, 0);
1162 lsm = ll_i2info(inode)->lli_smd;
1164 GOTO(out, rc = -ENOENT);
1165 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1166 (lsm->lsm_stripe_count));
1168 OBD_ALLOC(lsm2, lsm_size);
1170 GOTO(out, rc = -ENOMEM);
1172 oa->o_id = ucreatp.lrc_id;
1173 oa->o_gr = ucreatp.lrc_group;
1174 oa->o_nlink = ucreatp.lrc_ost_idx;
1175 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1176 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1177 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1178 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1180 memcpy(lsm2, lsm, lsm_size);
1181 rc = obd_create(exp, oa, &lsm2, &oti);
1183 OBD_FREE(lsm2, lsm_size);
1186 ll_inode_size_unlock(inode, 0);
1191 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1192 int flags, struct lov_user_md *lum, int lum_size)
1194 struct lov_stripe_md *lsm;
1195 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1199 ll_inode_size_lock(inode, 0);
1200 lsm = ll_i2info(inode)->lli_smd;
1202 ll_inode_size_unlock(inode, 0);
1203 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1208 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1211 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1212 GOTO(out_req_free, rc = -ENOENT);
1213 rc = oit.d.lustre.it_status;
1215 GOTO(out_req_free, rc);
1217 ll_release_openhandle(file->f_dentry, &oit);
1220 ll_inode_size_unlock(inode, 0);
1221 ll_intent_release(&oit);
1224 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1228 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1229 struct lov_mds_md **lmmp, int *lmm_size,
1230 struct ptlrpc_request **request)
1232 struct ll_sb_info *sbi = ll_i2sbi(inode);
1233 struct mdt_body *body;
1234 struct lov_mds_md *lmm = NULL;
1235 struct ptlrpc_request *req = NULL;
1236 struct obd_capa *oc;
1239 rc = ll_get_max_mdsize(sbi, &lmmsize);
1243 oc = ll_mdscapa_get(inode);
1244 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1245 oc, filename, strlen(filename) + 1,
1246 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1247 ll_i2suppgid(inode), &req);
1250 CDEBUG(D_INFO, "md_getattr_name failed "
1251 "on %s: rc %d\n", filename, rc);
1255 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1256 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1258 lmmsize = body->eadatasize;
1260 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1262 GOTO(out, rc = -ENODATA);
1265 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1266 LASSERT(lmm != NULL);
1268 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1269 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
1270 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
1271 GOTO(out, rc = -EPROTO);
1275 * This is coming from the MDS, so is probably in
1276 * little endian. We convert it to host endian before
1277 * passing it to userspace.
1279 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1280 /* if function called for directory - we should
1281 * avoid swab not existent lsm objects */
1282 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1283 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1284 if (S_ISREG(body->mode))
1285 lustre_swab_lov_user_md_objects(
1286 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1287 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1288 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1289 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1290 if (S_ISREG(body->mode))
1291 lustre_swab_lov_user_md_objects(
1292 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1293 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1294 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
1295 lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
1299 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
1300 struct lov_stripe_md *lsm;
1301 struct lov_user_md_join *lmj;
1302 int lmj_size, i, aindex = 0;
1304 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1306 GOTO(out, rc = -ENOMEM);
1307 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm);
1309 GOTO(out_free_memmd, rc);
1311 lmj_size = sizeof(struct lov_user_md_join) +
1312 lsm->lsm_stripe_count *
1313 sizeof(struct lov_user_ost_data_join);
1314 OBD_ALLOC(lmj, lmj_size);
1316 GOTO(out_free_memmd, rc = -ENOMEM);
1318 memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
1319 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1320 struct lov_extent *lex =
1321 &lsm->lsm_array->lai_ext_array[aindex];
1323 if (lex->le_loi_idx + lex->le_stripe_count <= i)
1325 CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
1326 LPU64" len %d\n", aindex, i,
1327 lex->le_start, (int)lex->le_len);
1328 lmj->lmm_objects[i].l_extent_start =
1331 if ((int)lex->le_len == -1)
1332 lmj->lmm_objects[i].l_extent_end = -1;
1334 lmj->lmm_objects[i].l_extent_end =
1335 lex->le_start + lex->le_len;
1336 lmj->lmm_objects[i].l_object_id =
1337 lsm->lsm_oinfo[i]->loi_id;
1338 lmj->lmm_objects[i].l_object_gr =
1339 lsm->lsm_oinfo[i]->loi_gr;
1340 lmj->lmm_objects[i].l_ost_gen =
1341 lsm->lsm_oinfo[i]->loi_ost_gen;
1342 lmj->lmm_objects[i].l_ost_idx =
1343 lsm->lsm_oinfo[i]->loi_ost_idx;
1345 lmm = (struct lov_mds_md *)lmj;
1348 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1352 *lmm_size = lmmsize;
1357 static int ll_lov_setea(struct inode *inode, struct file *file,
1360 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1361 struct lov_user_md *lump;
1362 int lum_size = sizeof(struct lov_user_md) +
1363 sizeof(struct lov_user_ost_data);
1367 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1370 OBD_ALLOC(lump, lum_size);
1374 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1375 OBD_FREE(lump, lum_size);
1379 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1381 OBD_FREE(lump, lum_size);
1385 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1388 struct lov_user_md_v3 lumv3;
1389 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1390 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1391 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1394 int flags = FMODE_WRITE;
1397 /* first try with v1 which is smaller than v3 */
1398 lum_size = sizeof(struct lov_user_md_v1);
1399 if (copy_from_user(lumv1, lumv1p, lum_size))
1402 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1403 lum_size = sizeof(struct lov_user_md_v3);
1404 if (copy_from_user(&lumv3, lumv3p, lum_size))
1408 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1410 put_user(0, &lumv1p->lmm_stripe_count);
1411 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1412 0, ll_i2info(inode)->lli_smd,
1418 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1420 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1425 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1429 static int ll_get_grouplock(struct inode *inode, struct file *file,
1436 static int ll_put_grouplock(struct inode *inode, struct file *file,
1443 #if LUSTRE_FIX >= 50
1444 static int join_sanity_check(struct inode *head, struct inode *tail)
1447 if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
1448 CERROR("server do not support join \n");
1451 if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
1452 CERROR("tail ino %lu and ino head %lu must be regular\n",
1453 head->i_ino, tail->i_ino);
1456 if (head->i_ino == tail->i_ino) {
1457 CERROR("file %lu can not be joined to itself \n", head->i_ino);
1460 if (i_size_read(head) % JOIN_FILE_ALIGN) {
1461 CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
1467 static int join_file(struct inode *head_inode, struct file *head_filp,
1468 struct file *tail_filp)
1470 struct dentry *tail_dentry = tail_filp->f_dentry;
1471 struct lookup_intent oit = {.it_op = IT_OPEN,
1472 .it_flags = head_filp->f_flags|O_JOIN_FILE};
1473 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW,
1474 ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1476 struct lustre_handle lockh;
1477 struct md_op_data *op_data;
1482 tail_dentry = tail_filp->f_dentry;
1484 data = i_size_read(head_inode);
1485 op_data = ll_prep_md_op_data(NULL, head_inode,
1486 tail_dentry->d_parent->d_inode,
1487 tail_dentry->d_name.name,
1488 tail_dentry->d_name.len, 0,
1489 LUSTRE_OPC_ANY, &data);
1490 if (IS_ERR(op_data))
1491 RETURN(PTR_ERR(op_data));
1493 rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit,
1494 op_data, &lockh, NULL, 0, NULL, 0);
1496 ll_finish_md_op_data(op_data);
1500 rc = oit.d.lustre.it_status;
1502 if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
1503 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
1504 ptlrpc_req_finished((struct ptlrpc_request *)
1505 oit.d.lustre.it_data);
1509 if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
1511 ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
1512 oit.d.lustre.it_lock_mode = 0;
1514 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1515 it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
1516 ll_release_openhandle(head_filp->f_dentry, &oit);
1518 ll_intent_release(&oit);
1522 static int ll_file_join(struct inode *head, struct file *filp,
1523 char *filename_tail)
1525 struct inode *tail = NULL, *first = NULL, *second = NULL;
1526 struct dentry *tail_dentry;
1527 struct file *tail_filp, *first_filp, *second_filp;
1528 struct ll_lock_tree first_tree, second_tree;
1529 struct ll_lock_tree_node *first_node, *second_node;
1530 struct ll_inode_info *hlli = ll_i2info(head), *tlli;
1531 int rc = 0, cleanup_phase = 0;
1534 CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
1535 head->i_ino, head->i_generation, head, filename_tail);
1537 tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
1538 if (IS_ERR(tail_filp)) {
1539 CERROR("Can not open tail file %s", filename_tail);
1540 rc = PTR_ERR(tail_filp);
1543 tail = igrab(tail_filp->f_dentry->d_inode);
1545 tlli = ll_i2info(tail);
1546 tail_dentry = tail_filp->f_dentry;
1547 LASSERT(tail_dentry);
1550 /*reorder the inode for lock sequence*/
1551 first = head->i_ino > tail->i_ino ? head : tail;
1552 second = head->i_ino > tail->i_ino ? tail : head;
1553 first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
1554 second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
1556 CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
1557 head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
1558 first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
1559 if (IS_ERR(first_node)){
1560 rc = PTR_ERR(first_node);
1563 first_tree.lt_fd = first_filp->private_data;
1564 rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
1569 second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
1570 if (IS_ERR(second_node)){
1571 rc = PTR_ERR(second_node);
1574 second_tree.lt_fd = second_filp->private_data;
1575 rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
1580 rc = join_sanity_check(head, tail);
1584 rc = join_file(head, filp, tail_filp);
1588 switch (cleanup_phase) {
1590 ll_tree_unlock(&second_tree);
1591 obd_cancel_unused(ll_i2dtexp(second),
1592 ll_i2info(second)->lli_smd, 0, NULL);
1594 ll_tree_unlock(&first_tree);
1595 obd_cancel_unused(ll_i2dtexp(first),
1596 ll_i2info(first)->lli_smd, 0, NULL);
1598 filp_close(tail_filp, 0);
1601 if (head && rc == 0) {
1602 obd_free_memmd(ll_i2sbi(head)->ll_dt_exp,
1604 hlli->lli_smd = NULL;
1609 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
1614 #endif /* LUSTRE_FIX >= 50 */
1617 * Close inode open handle
1619 * \param dentry [in] dentry which contains the inode
1620 * \param it [in,out] intent which contains open info and result
1623 * \retval <0 failure
1625 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1627 struct inode *inode = dentry->d_inode;
1628 struct obd_client_handle *och;
1634 /* Root ? Do nothing. */
1635 if (dentry->d_inode->i_sb->s_root == dentry)
1638 /* No open handle to close? Move away */
1639 if (!it_disposition(it, DISP_OPEN_OPEN))
1642 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1644 OBD_ALLOC(och, sizeof(*och));
1646 GOTO(out, rc = -ENOMEM);
1648 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1649 ll_i2info(inode), it, och);
1651 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1654 /* this one is in place of ll_file_open */
1655 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1656 ptlrpc_req_finished(it->d.lustre.it_data);
1657 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1662 * Get size for inode for which FIEMAP mapping is requested.
1663 * Make the FIEMAP get_info call and returns the result.
1665 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1668 struct obd_export *exp = ll_i2dtexp(inode);
1669 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1670 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1671 int vallen = num_bytes;
1675 /* If the stripe_count > 1 and the application does not understand
1676 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1678 if (lsm->lsm_stripe_count > 1 &&
1679 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1682 fm_key.oa.o_id = lsm->lsm_object_id;
1683 fm_key.oa.o_gr = lsm->lsm_object_gr;
1684 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1686 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1689 /* If filesize is 0, then there would be no objects for mapping */
1690 if (fm_key.oa.o_size == 0) {
1691 fiemap->fm_mapped_extents = 0;
1695 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1697 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1699 CERROR("obd_get_info failed: rc = %d\n", rc);
1704 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1707 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1711 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1712 inode->i_generation, inode, cmd);
1713 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1715 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1716 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1720 case LL_IOC_GETFLAGS:
1721 /* Get the current value of the file flags */
1722 return put_user(fd->fd_flags, (int *)arg);
1723 case LL_IOC_SETFLAGS:
1724 case LL_IOC_CLRFLAGS:
1725 /* Set or clear specific file flags */
1726 /* XXX This probably needs checks to ensure the flags are
1727 * not abused, and to handle any flag side effects.
1729 if (get_user(flags, (int *) arg))
1732 if (cmd == LL_IOC_SETFLAGS) {
1733 if ((flags & LL_FILE_IGNORE_LOCK) &&
1734 !(file->f_flags & O_DIRECT)) {
1735 CERROR("%s: unable to disable locking on "
1736 "non-O_DIRECT file\n", current->comm);
1740 fd->fd_flags |= flags;
1742 fd->fd_flags &= ~flags;
1745 case LL_IOC_LOV_SETSTRIPE:
1746 RETURN(ll_lov_setstripe(inode, file, arg));
1747 case LL_IOC_LOV_SETEA:
1748 RETURN(ll_lov_setea(inode, file, arg));
1749 case LL_IOC_LOV_GETSTRIPE:
1750 RETURN(ll_lov_getstripe(inode, arg));
1751 case LL_IOC_RECREATE_OBJ:
1752 RETURN(ll_lov_recreate_obj(inode, file, arg));
1753 case EXT3_IOC_FIEMAP: {
1754 struct ll_user_fiemap *fiemap_s;
1755 size_t num_bytes, ret_bytes;
1756 unsigned int extent_count;
1759 /* Get the extent count so we can calculate the size of
1760 * required fiemap buffer */
1761 if (get_user(extent_count,
1762 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1764 num_bytes = sizeof(*fiemap_s) + (extent_count *
1765 sizeof(struct ll_fiemap_extent));
1766 OBD_VMALLOC(fiemap_s, num_bytes);
1767 if (fiemap_s == NULL)
1770 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1772 GOTO(error, rc = -EFAULT);
1774 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1775 fiemap_s->fm_flags = fiemap_s->fm_flags &
1776 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1777 if (copy_to_user((char *)arg, fiemap_s,
1779 GOTO(error, rc = -EFAULT);
1781 GOTO(error, rc = -EBADR);
1784 /* If fm_extent_count is non-zero, read the first extent since
1785 * it is used to calculate end_offset and device from previous
1788 if (copy_from_user(&fiemap_s->fm_extents[0],
1789 (char __user *)arg + sizeof(*fiemap_s),
1790 sizeof(struct ll_fiemap_extent)))
1791 GOTO(error, rc = -EFAULT);
1794 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1797 rc = filemap_fdatawrite(inode->i_mapping);
1802 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1806 ret_bytes = sizeof(struct ll_user_fiemap);
1808 if (extent_count != 0)
1809 ret_bytes += (fiemap_s->fm_mapped_extents *
1810 sizeof(struct ll_fiemap_extent));
1812 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1816 OBD_VFREE(fiemap_s, num_bytes);
1819 case EXT3_IOC_GETFLAGS:
1820 case EXT3_IOC_SETFLAGS:
1821 RETURN(ll_iocontrol(inode, file, cmd, arg));
1822 case EXT3_IOC_GETVERSION_OLD:
1823 case EXT3_IOC_GETVERSION:
1824 RETURN(put_user(inode->i_generation, (int *)arg));
1826 #if LUSTRE_FIX >= 50
1827 /* Allow file join in beta builds to allow debuggging */
1831 ftail = getname((const char *)arg);
1833 RETURN(PTR_ERR(ftail));
1834 rc = ll_file_join(inode, file, ftail);
1838 CWARN("file join is not supported in this version of Lustre\n");
1842 case LL_IOC_GROUP_LOCK:
1843 RETURN(ll_get_grouplock(inode, file, arg));
1844 case LL_IOC_GROUP_UNLOCK:
1845 RETURN(ll_put_grouplock(inode, file, arg));
1846 case IOC_OBD_STATFS:
1847 RETURN(ll_obd_statfs(inode, (void *)arg));
1849 /* We need to special case any other ioctls we want to handle,
1850 * to send them to the MDS/OST as appropriate and to properly
1851 * network encode the arg field.
1852 case EXT3_IOC_SETVERSION_OLD:
1853 case EXT3_IOC_SETVERSION:
1855 case LL_IOC_FLUSHCTX:
1856 RETURN(ll_flush_ctx(inode));
1857 case LL_IOC_PATH2FID: {
1858 if (copy_to_user((void *)arg, &ll_i2info(inode)->lli_fid,
1859 sizeof(struct lu_fid)))
1868 ll_iocontrol_call(inode, file, cmd, arg, &err))
1871 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1877 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1879 struct inode *inode = file->f_dentry->d_inode;
1882 retval = offset + ((origin == 2) ? i_size_read(inode) :
1883 (origin == 1) ? file->f_pos : 0);
1884 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1885 inode->i_ino, inode->i_generation, inode, retval, retval,
1886 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1887 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1889 if (origin == 2) { /* SEEK_END */
1890 int nonblock = 0, rc;
1892 if (file->f_flags & O_NONBLOCK)
1893 nonblock = LDLM_FL_BLOCK_NOWAIT;
1895 rc = cl_glimpse_size(inode);
1899 ll_inode_size_lock(inode, 0);
1900 offset += i_size_read(inode);
1901 ll_inode_size_unlock(inode, 0);
1902 } else if (origin == 1) { /* SEEK_CUR */
1903 offset += file->f_pos;
1907 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1908 if (offset != file->f_pos) {
1909 file->f_pos = offset;
1917 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1919 struct inode *inode = dentry->d_inode;
1920 struct ll_inode_info *lli = ll_i2info(inode);
1921 struct lov_stripe_md *lsm = lli->lli_smd;
1922 struct ptlrpc_request *req;
1923 struct obd_capa *oc;
1926 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1927 inode->i_generation, inode);
1928 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1930 /* fsync's caller has already called _fdata{sync,write}, we want
1931 * that IO to finish before calling the osc and mdc sync methods */
1932 rc = filemap_fdatawait(inode->i_mapping);
1934 /* catch async errors that were recorded back when async writeback
1935 * failed for pages in this mapping. */
1936 err = lli->lli_async_rc;
1937 lli->lli_async_rc = 0;
1941 err = lov_test_and_clear_async_rc(lsm);
1946 oc = ll_mdscapa_get(inode);
1947 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1953 ptlrpc_req_finished(req);
1960 RETURN(rc ? rc : -ENOMEM);
1962 oa->o_id = lsm->lsm_object_id;
1963 oa->o_gr = lsm->lsm_object_gr;
1964 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1965 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1966 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1969 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1970 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1971 0, OBD_OBJECT_EOF, oc);
1981 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1983 struct inode *inode = file->f_dentry->d_inode;
1984 struct ll_sb_info *sbi = ll_i2sbi(inode);
1985 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1986 .ei_cb_cp =ldlm_flock_completion_ast,
1987 .ei_cbdata = file_lock };
1988 struct md_op_data *op_data;
1989 struct lustre_handle lockh = {0};
1990 ldlm_policy_data_t flock;
1995 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1996 inode->i_ino, file_lock);
1998 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2000 if (file_lock->fl_flags & FL_FLOCK) {
2001 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2002 /* set missing params for flock() calls */
2003 file_lock->fl_end = OFFSET_MAX;
2004 file_lock->fl_pid = current->tgid;
2006 flock.l_flock.pid = file_lock->fl_pid;
2007 flock.l_flock.start = file_lock->fl_start;
2008 flock.l_flock.end = file_lock->fl_end;
2010 switch (file_lock->fl_type) {
2012 einfo.ei_mode = LCK_PR;
2015 /* An unlock request may or may not have any relation to
2016 * existing locks so we may not be able to pass a lock handle
2017 * via a normal ldlm_lock_cancel() request. The request may even
2018 * unlock a byte range in the middle of an existing lock. In
2019 * order to process an unlock request we need all of the same
2020 * information that is given with a normal read or write record
2021 * lock request. To avoid creating another ldlm unlock (cancel)
2022 * message we'll treat a LCK_NL flock request as an unlock. */
2023 einfo.ei_mode = LCK_NL;
2026 einfo.ei_mode = LCK_PW;
2029 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2044 flags = LDLM_FL_BLOCK_NOWAIT;
2050 flags = LDLM_FL_TEST_LOCK;
2051 /* Save the old mode so that if the mode in the lock changes we
2052 * can decrement the appropriate reader or writer refcount. */
2053 file_lock->fl_type = einfo.ei_mode;
2056 CERROR("unknown fcntl lock command: %d\n", cmd);
2060 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2061 LUSTRE_OPC_ANY, NULL);
2062 if (IS_ERR(op_data))
2063 RETURN(PTR_ERR(op_data));
2065 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2066 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2067 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2069 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2070 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2072 ll_finish_md_op_data(op_data);
2074 if ((file_lock->fl_flags & FL_FLOCK) &&
2075 (rc == 0 || file_lock->fl_type == F_UNLCK))
2076 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2077 #ifdef HAVE_F_OP_FLOCK
2078 if ((file_lock->fl_flags & FL_POSIX) &&
2079 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2080 !(flags & LDLM_FL_TEST_LOCK))
2081 posix_lock_file_wait(file, file_lock);
2087 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2094 int ll_have_md_lock(struct inode *inode, __u64 bits)
2096 struct lustre_handle lockh;
2097 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2105 fid = &ll_i2info(inode)->lli_fid;
2106 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2108 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2109 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2110 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2116 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2117 struct lustre_handle *lockh)
2119 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2125 fid = &ll_i2info(inode)->lli_fid;
2126 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2128 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2129 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2130 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2134 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2135 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2136 * and return success */
2138 /* This path cannot be hit for regular files unless in
2139 * case of obscure races, so no need to to validate
2141 if (!S_ISREG(inode->i_mode) &&
2142 !S_ISDIR(inode->i_mode))
2147 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2155 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2157 struct inode *inode = dentry->d_inode;
2158 struct ptlrpc_request *req = NULL;
2159 struct ll_sb_info *sbi;
2160 struct obd_export *exp;
2165 CERROR("REPORT THIS LINE TO PETER\n");
2168 sbi = ll_i2sbi(inode);
2170 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2171 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2173 exp = ll_i2mdexp(inode);
2175 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2176 struct lookup_intent oit = { .it_op = IT_GETATTR };
2177 struct md_op_data *op_data;
2179 /* Call getattr by fid, so do not provide name at all. */
2180 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2181 dentry->d_inode, NULL, 0, 0,
2182 LUSTRE_OPC_ANY, NULL);
2183 if (IS_ERR(op_data))
2184 RETURN(PTR_ERR(op_data));
2186 oit.it_flags |= O_CHECK_STALE;
2187 rc = md_intent_lock(exp, op_data, NULL, 0,
2188 /* we are not interested in name
2191 ll_md_blocking_ast, 0);
2192 ll_finish_md_op_data(op_data);
2193 oit.it_flags &= ~O_CHECK_STALE;
2195 rc = ll_inode_revalidate_fini(inode, rc);
2199 rc = ll_revalidate_it_finish(req, &oit, dentry);
2201 ll_intent_release(&oit);
2205 /* Unlinked? Unhash dentry, so it is not picked up later by
2206 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2207 here to preserve get_cwd functionality on 2.6.
2209 if (!dentry->d_inode->i_nlink) {
2210 spin_lock(&ll_lookup_lock);
2211 spin_lock(&dcache_lock);
2212 ll_drop_dentry(dentry);
2213 spin_unlock(&dcache_lock);
2214 spin_unlock(&ll_lookup_lock);
2217 ll_lookup_finish_locks(&oit, dentry);
2218 } else if (!ll_have_md_lock(dentry->d_inode, MDS_INODELOCK_UPDATE |
2219 MDS_INODELOCK_LOOKUP)) {
2220 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2221 obd_valid valid = OBD_MD_FLGETATTR;
2222 struct obd_capa *oc;
2225 if (S_ISREG(inode->i_mode)) {
2226 rc = ll_get_max_mdsize(sbi, &ealen);
2229 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2231 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2232 * capa for this inode. Because we only keep capas of dirs
2234 oc = ll_mdscapa_get(inode);
2235 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2239 rc = ll_inode_revalidate_fini(inode, rc);
2243 rc = ll_prep_inode(&inode, req, NULL);
2248 /* if object not yet allocated, don't validate size */
2249 if (ll_i2info(inode)->lli_smd == NULL)
2252 /* cl_glimpse_size will prefer locally cached writes if they extend
2254 rc = cl_glimpse_size(inode);
2257 ptlrpc_req_finished(req);
2261 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2262 struct lookup_intent *it, struct kstat *stat)
2264 struct inode *inode = de->d_inode;
2267 res = ll_inode_revalidate_it(de, it);
2268 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2273 stat->dev = inode->i_sb->s_dev;
2274 stat->ino = inode->i_ino;
2275 stat->mode = inode->i_mode;
2276 stat->nlink = inode->i_nlink;
2277 stat->uid = inode->i_uid;
2278 stat->gid = inode->i_gid;
2279 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2280 stat->atime = inode->i_atime;
2281 stat->mtime = inode->i_mtime;
2282 stat->ctime = inode->i_ctime;
2283 #ifdef HAVE_INODE_BLKSIZE
2284 stat->blksize = inode->i_blksize;
2286 stat->blksize = 1 << inode->i_blkbits;
2289 ll_inode_size_lock(inode, 0);
2290 stat->size = i_size_read(inode);
2291 stat->blocks = inode->i_blocks;
2292 ll_inode_size_unlock(inode, 0);
2296 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2298 struct lookup_intent it = { .it_op = IT_GETATTR };
2300 return ll_getattr_it(mnt, de, &it, stat);
2304 int lustre_check_acl(struct inode *inode, int mask)
2306 #ifdef CONFIG_FS_POSIX_ACL
2307 struct ll_inode_info *lli = ll_i2info(inode);
2308 struct posix_acl *acl;
2312 spin_lock(&lli->lli_lock);
2313 acl = posix_acl_dup(lli->lli_posix_acl);
2314 spin_unlock(&lli->lli_lock);
2319 rc = posix_acl_permission(inode, acl, mask);
2320 posix_acl_release(acl);
2328 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2329 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2331 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2332 inode->i_ino, inode->i_generation, inode, mask);
2333 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2334 return lustre_check_remote_perm(inode, mask);
2336 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2337 return generic_permission(inode, mask, lustre_check_acl);
2340 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2342 int mode = inode->i_mode;
2345 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2346 inode->i_ino, inode->i_generation, inode, mask);
2348 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2349 return lustre_check_remote_perm(inode, mask);
2351 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2353 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2354 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2356 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2358 if (current->fsuid == inode->i_uid) {
2361 if (((mode >> 3) & mask & S_IRWXO) != mask)
2363 rc = lustre_check_acl(inode, mask);
2367 goto check_capabilities;
2371 if (in_group_p(inode->i_gid))
2374 if ((mode & mask & S_IRWXO) == mask)
2378 if (!(mask & MAY_EXEC) ||
2379 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2380 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2383 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2384 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2391 #ifdef HAVE_FILE_READV
2392 #define READ_METHOD readv
2393 #define READ_FUNCTION ll_file_readv
2394 #define WRITE_METHOD writev
2395 #define WRITE_FUNCTION ll_file_writev
2397 #define READ_METHOD aio_read
2398 #define READ_FUNCTION ll_file_aio_read
2399 #define WRITE_METHOD aio_write
2400 #define WRITE_FUNCTION ll_file_aio_write
2403 /* -o localflock - only provides locally consistent flock locks */
2404 struct file_operations ll_file_operations = {
2405 .read = ll_file_read,
2406 .READ_METHOD = READ_FUNCTION,
2407 .write = ll_file_write,
2408 .WRITE_METHOD = WRITE_FUNCTION,
2409 .ioctl = ll_file_ioctl,
2410 .open = ll_file_open,
2411 .release = ll_file_release,
2412 .mmap = ll_file_mmap,
2413 .llseek = ll_file_seek,
2414 .sendfile = ll_file_sendfile,
2418 struct file_operations ll_file_operations_flock = {
2419 .read = ll_file_read,
2420 .READ_METHOD = READ_FUNCTION,
2421 .write = ll_file_write,
2422 .WRITE_METHOD = WRITE_FUNCTION,
2423 .ioctl = ll_file_ioctl,
2424 .open = ll_file_open,
2425 .release = ll_file_release,
2426 .mmap = ll_file_mmap,
2427 .llseek = ll_file_seek,
2428 .sendfile = ll_file_sendfile,
2430 #ifdef HAVE_F_OP_FLOCK
2431 .flock = ll_file_flock,
2433 .lock = ll_file_flock
2436 /* These are for -o noflock - to return ENOSYS on flock calls */
2437 struct file_operations ll_file_operations_noflock = {
2438 .read = ll_file_read,
2439 .READ_METHOD = READ_FUNCTION,
2440 .write = ll_file_write,
2441 .WRITE_METHOD = WRITE_FUNCTION,
2442 .ioctl = ll_file_ioctl,
2443 .open = ll_file_open,
2444 .release = ll_file_release,
2445 .mmap = ll_file_mmap,
2446 .llseek = ll_file_seek,
2447 .sendfile = ll_file_sendfile,
2449 #ifdef HAVE_F_OP_FLOCK
2450 .flock = ll_file_noflock,
2452 .lock = ll_file_noflock
2455 struct inode_operations ll_file_inode_operations = {
2456 #ifdef HAVE_VFS_INTENT_PATCHES
2457 .setattr_raw = ll_setattr_raw,
2459 .setattr = ll_setattr,
2460 .truncate = ll_truncate,
2461 .getattr = ll_getattr,
2462 .permission = ll_inode_permission,
2463 .setxattr = ll_setxattr,
2464 .getxattr = ll_getxattr,
2465 .listxattr = ll_listxattr,
2466 .removexattr = ll_removexattr,
2469 /* dynamic ioctl number support routins */
2470 static struct llioc_ctl_data {
2471 struct rw_semaphore ioc_sem;
2472 struct list_head ioc_head;
2474 __RWSEM_INITIALIZER(llioc.ioc_sem),
2475 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2480 struct list_head iocd_list;
2481 unsigned int iocd_size;
2482 llioc_callback_t iocd_cb;
2483 unsigned int iocd_count;
2484 unsigned int iocd_cmd[0];
2487 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2490 struct llioc_data *in_data = NULL;
2493 if (cb == NULL || cmd == NULL ||
2494 count > LLIOC_MAX_CMD || count < 0)
2497 size = sizeof(*in_data) + count * sizeof(unsigned int);
2498 OBD_ALLOC(in_data, size);
2499 if (in_data == NULL)
2502 memset(in_data, 0, sizeof(*in_data));
2503 in_data->iocd_size = size;
2504 in_data->iocd_cb = cb;
2505 in_data->iocd_count = count;
2506 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2508 down_write(&llioc.ioc_sem);
2509 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2510 up_write(&llioc.ioc_sem);
2515 void ll_iocontrol_unregister(void *magic)
2517 struct llioc_data *tmp;
2522 down_write(&llioc.ioc_sem);
2523 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2525 unsigned int size = tmp->iocd_size;
2527 list_del(&tmp->iocd_list);
2528 up_write(&llioc.ioc_sem);
2530 OBD_FREE(tmp, size);
2534 up_write(&llioc.ioc_sem);
2536 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2539 EXPORT_SYMBOL(ll_iocontrol_register);
2540 EXPORT_SYMBOL(ll_iocontrol_unregister);
2542 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2543 unsigned int cmd, unsigned long arg, int *rcp)
2545 enum llioc_iter ret = LLIOC_CONT;
2546 struct llioc_data *data;
2547 int rc = -EINVAL, i;
2549 down_read(&llioc.ioc_sem);
2550 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2551 for (i = 0; i < data->iocd_count; i++) {
2552 if (cmd != data->iocd_cmd[i])
2555 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2559 if (ret == LLIOC_STOP)
2562 up_read(&llioc.ioc_sem);