1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM) ||
96 !S_ISREG(inode->i_mode))
97 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
99 ll_epoch_close(inode, op_data, &och, 0);
102 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
106 static int ll_close_inode_openhandle(struct obd_export *md_exp,
108 struct obd_client_handle *och)
110 struct obd_export *exp = ll_i2mdexp(inode);
111 struct md_op_data *op_data;
112 struct ptlrpc_request *req = NULL;
113 struct obd_device *obd = class_exp2obd(exp);
120 * XXX: in case of LMV, is this correct to access
123 CERROR("Invalid MDC connection handle "LPX64"\n",
124 ll_i2mdexp(inode)->exp_handle.h_cookie);
129 * here we check if this is forced umount. If so this is called on
130 * canceling "open lock" and we do not call md_close() in this case, as
131 * it will not be successful, as import is already deactivated.
136 OBD_ALLOC_PTR(op_data);
138 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
140 ll_prepare_close(inode, op_data, och);
141 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
142 rc = md_close(md_exp, op_data, och->och_mod, &req);
144 /* This close must have the epoch closed. */
145 LASSERT(exp->exp_connect_flags & OBD_CONNECT_SOM);
146 LASSERT(epoch_close);
147 /* MDS has instructed us to obtain Size-on-MDS attribute from
148 * OSTs and send setattr to back to MDS. */
149 rc = ll_sizeonmds_update(inode, &och->och_fh,
150 op_data->op_ioepoch);
152 CERROR("inode %lu mdc Size-on-MDS update failed: "
153 "rc = %d\n", inode->i_ino, rc);
157 CERROR("inode %lu mdc close failed: rc = %d\n",
160 ll_finish_md_op_data(op_data);
163 rc = ll_objects_destroy(req, inode);
165 CERROR("inode %lu ll_objects destroy: rc = %d\n",
172 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
173 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
174 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
176 md_clear_open_replay_data(md_exp, och);
177 /* Free @och if it is not waiting for DONE_WRITING. */
178 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
181 if (req) /* This is close request */
182 ptlrpc_req_finished(req);
186 int ll_md_real_close(struct inode *inode, int flags)
188 struct ll_inode_info *lli = ll_i2info(inode);
189 struct obd_client_handle **och_p;
190 struct obd_client_handle *och;
195 if (flags & FMODE_WRITE) {
196 och_p = &lli->lli_mds_write_och;
197 och_usecount = &lli->lli_open_fd_write_count;
198 } else if (flags & FMODE_EXEC) {
199 och_p = &lli->lli_mds_exec_och;
200 och_usecount = &lli->lli_open_fd_exec_count;
202 LASSERT(flags & FMODE_READ);
203 och_p = &lli->lli_mds_read_och;
204 och_usecount = &lli->lli_open_fd_read_count;
207 down(&lli->lli_och_sem);
208 if (*och_usecount) { /* There are still users of this handle, so
210 up(&lli->lli_och_sem);
215 up(&lli->lli_och_sem);
217 if (och) { /* There might be a race and somebody have freed this och
219 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
226 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
229 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
230 struct ll_inode_info *lli = ll_i2info(inode);
234 /* clear group lock, if present */
235 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
237 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
238 fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
239 rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
244 /* Let's see if we have good enough OPEN lock on the file and if
245 we can skip talking to MDS */
246 if (file->f_dentry->d_inode) { /* Can this ever be false? */
248 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
249 struct lustre_handle lockh;
250 struct inode *inode = file->f_dentry->d_inode;
251 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
253 down(&lli->lli_och_sem);
254 if (fd->fd_omode & FMODE_WRITE) {
256 LASSERT(lli->lli_open_fd_write_count);
257 lli->lli_open_fd_write_count--;
258 } else if (fd->fd_omode & FMODE_EXEC) {
260 LASSERT(lli->lli_open_fd_exec_count);
261 lli->lli_open_fd_exec_count--;
264 LASSERT(lli->lli_open_fd_read_count);
265 lli->lli_open_fd_read_count--;
267 up(&lli->lli_och_sem);
269 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
270 LDLM_IBITS, &policy, lockmode,
272 rc = ll_md_real_close(file->f_dentry->d_inode,
276 CERROR("Releasing a file %p with negative dentry %p. Name %s",
277 file, file->f_dentry, file->f_dentry->d_name.name);
280 LUSTRE_FPRIVATE(file) = NULL;
281 ll_file_data_put(fd);
282 ll_capa_close(inode);
287 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
289 /* While this returns an error code, fput() the caller does not, so we need
290 * to make every effort to clean up all of our state here. Also, applications
291 * rarely check close errors and even if an error is returned they will not
292 * re-try the close call.
294 int ll_file_release(struct inode *inode, struct file *file)
296 struct ll_file_data *fd;
297 struct ll_sb_info *sbi = ll_i2sbi(inode);
298 struct ll_inode_info *lli = ll_i2info(inode);
299 struct lov_stripe_md *lsm = lli->lli_smd;
303 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
304 inode->i_generation, inode);
306 #ifdef CONFIG_FS_POSIX_ACL
307 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
308 inode == inode->i_sb->s_root->d_inode) {
309 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
312 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
313 fd->fd_flags &= ~LL_FILE_RMTACL;
314 rct_del(&sbi->ll_rct, cfs_curproc_pid());
315 et_search_free(&sbi->ll_et, cfs_curproc_pid());
320 if (inode->i_sb->s_root != file->f_dentry)
321 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
322 fd = LUSTRE_FPRIVATE(file);
325 /* The last ref on @file, maybe not the the owner pid of statahead.
326 * Different processes can open the same dir, "ll_opendir_key" means:
327 * it is me that should stop the statahead thread. */
328 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
329 ll_stop_statahead(inode, lli->lli_opendir_key);
331 if (inode->i_sb->s_root == file->f_dentry) {
332 LUSTRE_FPRIVATE(file) = NULL;
333 ll_file_data_put(fd);
338 lov_test_and_clear_async_rc(lsm);
339 lli->lli_async_rc = 0;
341 rc = ll_md_close(sbi->ll_md_exp, inode, file);
345 static int ll_intent_file_open(struct file *file, void *lmm,
346 int lmmsize, struct lookup_intent *itp)
348 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
349 struct dentry *parent = file->f_dentry->d_parent;
350 const char *name = file->f_dentry->d_name.name;
351 const int len = file->f_dentry->d_name.len;
352 struct md_op_data *op_data;
353 struct ptlrpc_request *req;
360 /* Usually we come here only for NFSD, and we want open lock.
361 But we can also get here with pre 2.6.15 patchless kernels, and in
362 that case that lock is also ok */
363 /* We can also get here if there was cached open handle in revalidate_it
364 * but it disappeared while we were getting from there to ll_file_open.
365 * But this means this file was closed and immediatelly opened which
366 * makes a good candidate for using OPEN lock */
367 /* If lmmsize & lmm are not 0, we are just setting stripe info
368 * parameters. No need for the open lock */
369 if (!lmm && !lmmsize)
370 itp->it_flags |= MDS_OPEN_LOCK;
372 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
373 file->f_dentry->d_inode, name, len,
374 O_RDWR, LUSTRE_OPC_ANY, NULL);
376 RETURN(PTR_ERR(op_data));
378 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
379 0 /*unused */, &req, ll_md_blocking_ast, 0);
380 ll_finish_md_op_data(op_data);
382 /* reason for keep own exit path - don`t flood log
383 * with messages with -ESTALE errors.
385 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
386 it_open_error(DISP_OPEN_OPEN, itp))
388 ll_release_openhandle(file->f_dentry, itp);
392 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
393 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
394 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
398 if (itp->d.lustre.it_lock_mode)
399 md_set_lock_data(sbi->ll_md_exp,
400 &itp->d.lustre.it_lock_handle,
401 file->f_dentry->d_inode);
403 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
405 ptlrpc_req_finished(itp->d.lustre.it_data);
406 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
407 ll_intent_drop_lock(itp);
412 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
413 struct lookup_intent *it, struct obd_client_handle *och)
415 struct ptlrpc_request *req = it->d.lustre.it_data;
416 struct mdt_body *body;
420 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
421 LASSERT(body != NULL); /* reply already checked out */
423 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
424 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
425 och->och_fid = lli->lli_fid;
426 och->och_flags = it->it_flags;
427 lli->lli_ioepoch = body->ioepoch;
429 return md_set_open_replay_data(md_exp, och, req);
432 int ll_local_open(struct file *file, struct lookup_intent *it,
433 struct ll_file_data *fd, struct obd_client_handle *och)
435 struct inode *inode = file->f_dentry->d_inode;
436 struct ll_inode_info *lli = ll_i2info(inode);
439 LASSERT(!LUSTRE_FPRIVATE(file));
444 struct ptlrpc_request *req = it->d.lustre.it_data;
445 struct mdt_body *body;
448 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
452 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
453 if ((it->it_flags & FMODE_WRITE) &&
454 (body->valid & OBD_MD_FLSIZE))
455 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
456 lli->lli_ioepoch, PFID(&lli->lli_fid));
459 LUSTRE_FPRIVATE(file) = fd;
460 ll_readahead_init(inode, &fd->fd_ras);
461 fd->fd_omode = it->it_flags;
465 /* Open a file, and (for the very first open) create objects on the OSTs at
466 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
467 * creation or open until ll_lov_setstripe() ioctl is called. We grab
468 * lli_open_sem to ensure no other process will create objects, send the
469 * stripe MD to the MDS, or try to destroy the objects if that fails.
471 * If we already have the stripe MD locally then we don't request it in
472 * md_open(), by passing a lmm_size = 0.
474 * It is up to the application to ensure no other processes open this file
475 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
476 * used. We might be able to avoid races of that sort by getting lli_open_sem
477 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
478 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
480 int ll_file_open(struct inode *inode, struct file *file)
482 struct ll_inode_info *lli = ll_i2info(inode);
483 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
484 .it_flags = file->f_flags };
485 struct lov_stripe_md *lsm;
486 struct ptlrpc_request *req = NULL;
487 struct obd_client_handle **och_p;
489 struct ll_file_data *fd;
490 int rc = 0, opendir_set = 0;
493 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
494 inode->i_generation, inode, file->f_flags);
496 #ifdef HAVE_VFS_INTENT_PATCHES
499 it = file->private_data; /* XXX: compat macro */
500 file->private_data = NULL; /* prevent ll_local_open assertion */
503 fd = ll_file_data_get();
508 if (S_ISDIR(inode->i_mode)) {
510 spin_lock(&lli->lli_lock);
511 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
512 LASSERT(lli->lli_sai == NULL);
513 lli->lli_opendir_key = fd;
514 lli->lli_opendir_pid = cfs_curproc_pid();
516 } else if (unlikely(lli->lli_opendir_pid == cfs_curproc_pid() &&
517 lli->lli_opendir_key != NULL)) {
518 /* Two cases for this:
519 * (1) The same process open such directory many times.
520 * (2) The old process opened the directory, and exited
521 * before its children processes. Then new process
522 * with the same pid opens such directory before the
523 * old process's children processes exit.
524 * reset stat ahead for such cases. */
525 spin_unlock(&lli->lli_lock);
526 CDEBUG(D_INFO, "Conflict statahead for %.*s "DFID
527 " reset it.\n", file->f_dentry->d_name.len,
528 file->f_dentry->d_name.name,
529 PFID(&lli->lli_fid));
530 ll_stop_statahead(inode, lli->lli_opendir_key);
533 spin_unlock(&lli->lli_lock);
536 if (inode->i_sb->s_root == file->f_dentry) {
537 LUSTRE_FPRIVATE(file) = fd;
541 if (!it || !it->d.lustre.it_disposition) {
542 /* Convert f_flags into access mode. We cannot use file->f_mode,
543 * because everything but O_ACCMODE mask was stripped from
545 if ((oit.it_flags + 1) & O_ACCMODE)
547 if (file->f_flags & O_TRUNC)
548 oit.it_flags |= FMODE_WRITE;
550 /* kernel only call f_op->open in dentry_open. filp_open calls
551 * dentry_open after call to open_namei that checks permissions.
552 * Only nfsd_open call dentry_open directly without checking
553 * permissions and because of that this code below is safe. */
554 if (oit.it_flags & FMODE_WRITE)
555 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
557 /* We do not want O_EXCL here, presumably we opened the file
558 * already? XXX - NFS implications? */
559 oit.it_flags &= ~O_EXCL;
565 /* Let's see if we have file open on MDS already. */
566 if (it->it_flags & FMODE_WRITE) {
567 och_p = &lli->lli_mds_write_och;
568 och_usecount = &lli->lli_open_fd_write_count;
569 } else if (it->it_flags & FMODE_EXEC) {
570 och_p = &lli->lli_mds_exec_och;
571 och_usecount = &lli->lli_open_fd_exec_count;
573 och_p = &lli->lli_mds_read_och;
574 och_usecount = &lli->lli_open_fd_read_count;
577 down(&lli->lli_och_sem);
578 if (*och_p) { /* Open handle is present */
579 if (it_disposition(it, DISP_OPEN_OPEN)) {
580 /* Well, there's extra open request that we do not need,
581 let's close it somehow. This will decref request. */
582 rc = it_open_error(DISP_OPEN_OPEN, it);
584 up(&lli->lli_och_sem);
585 ll_file_data_put(fd);
586 GOTO(out_openerr, rc);
588 ll_release_openhandle(file->f_dentry, it);
589 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
594 rc = ll_local_open(file, it, fd, NULL);
597 up(&lli->lli_och_sem);
598 ll_file_data_put(fd);
599 GOTO(out_openerr, rc);
602 LASSERT(*och_usecount == 0);
603 if (!it->d.lustre.it_disposition) {
604 /* We cannot just request lock handle now, new ELC code
605 means that one of other OPEN locks for this file
606 could be cancelled, and since blocking ast handler
607 would attempt to grab och_sem as well, that would
608 result in a deadlock */
609 up(&lli->lli_och_sem);
610 it->it_create_mode |= M_CHECK_STALE;
611 rc = ll_intent_file_open(file, NULL, 0, it);
612 it->it_create_mode &= ~M_CHECK_STALE;
614 ll_file_data_put(fd);
615 GOTO(out_openerr, rc);
618 /* Got some error? Release the request */
619 if (it->d.lustre.it_status < 0) {
620 req = it->d.lustre.it_data;
621 ptlrpc_req_finished(req);
623 md_set_lock_data(ll_i2sbi(inode)->ll_md_exp,
624 &it->d.lustre.it_lock_handle,
625 file->f_dentry->d_inode);
628 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
630 ll_file_data_put(fd);
631 GOTO(out_och_free, rc = -ENOMEM);
634 req = it->d.lustre.it_data;
636 /* md_intent_lock() didn't get a request ref if there was an
637 * open error, so don't do cleanup on the request here
639 /* XXX (green): Should not we bail out on any error here, not
640 * just open error? */
641 rc = it_open_error(DISP_OPEN_OPEN, it);
643 ll_file_data_put(fd);
644 GOTO(out_och_free, rc);
647 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
648 rc = ll_local_open(file, it, fd, *och_p);
650 ll_file_data_put(fd);
651 GOTO(out_och_free, rc);
654 up(&lli->lli_och_sem);
656 /* Must do this outside lli_och_sem lock to prevent deadlock where
657 different kind of OPEN lock for this same inode gets cancelled
658 by ldlm_cancel_lru */
659 if (!S_ISREG(inode->i_mode))
666 if (file->f_flags & O_LOV_DELAY_CREATE ||
667 !(file->f_mode & FMODE_WRITE)) {
668 CDEBUG(D_INODE, "object creation was delayed\n");
672 file->f_flags &= ~O_LOV_DELAY_CREATE;
675 ptlrpc_req_finished(req);
677 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
681 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
682 *och_p = NULL; /* OBD_FREE writes some magic there */
685 up(&lli->lli_och_sem);
687 if (opendir_set != 0)
688 ll_stop_statahead(inode, lli->lli_opendir_key);
694 /* Fills the obdo with the attributes for the lsm */
695 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
696 struct obd_capa *capa, struct obdo *obdo)
698 struct ptlrpc_request_set *set;
699 struct obd_info oinfo = { { { 0 } } };
704 LASSERT(lsm != NULL);
708 oinfo.oi_oa->o_id = lsm->lsm_object_id;
709 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
710 oinfo.oi_oa->o_mode = S_IFREG;
711 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
712 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
713 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
714 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
716 oinfo.oi_capa = capa;
718 set = ptlrpc_prep_set();
720 CERROR("can't allocate ptlrpc set\n");
723 rc = obd_getattr_async(exp, &oinfo, set);
725 rc = ptlrpc_set_wait(set);
726 ptlrpc_set_destroy(set);
729 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
730 OBD_MD_FLATIME | OBD_MD_FLMTIME |
731 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
735 /* Fills the obdo with the attributes for the inode defined by lsm */
736 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
738 struct ll_inode_info *lli = ll_i2info(inode);
739 struct obd_capa *capa = ll_mdscapa_get(inode);
743 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
746 obdo_refresh_inode(inode, obdo, obdo->o_valid);
748 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
749 lli->lli_smd->lsm_object_id, i_size_read(inode),
750 (unsigned long long)inode->i_blocks,
751 (unsigned long)ll_inode_blksize(inode));
756 int ll_merge_lvb(struct inode *inode)
758 struct ll_inode_info *lli = ll_i2info(inode);
759 struct ll_sb_info *sbi = ll_i2sbi(inode);
765 ll_inode_size_lock(inode, 1);
766 inode_init_lvb(inode, &lvb);
767 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
768 i_size_write(inode, lvb.lvb_size);
769 inode->i_blocks = lvb.lvb_blocks;
771 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
772 LTIME_S(inode->i_atime) = lvb.lvb_atime;
773 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
774 ll_inode_size_unlock(inode, 1);
779 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
782 struct obdo obdo = { 0 };
785 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
787 st->st_size = obdo.o_size;
788 st->st_blocks = obdo.o_blocks;
789 st->st_mtime = obdo.o_mtime;
790 st->st_atime = obdo.o_atime;
791 st->st_ctime = obdo.o_ctime;
796 void ll_io_init(struct cl_io *io, const struct file *file, int write)
798 struct inode *inode = file->f_dentry->d_inode;
799 struct ll_sb_info *sbi = ll_i2sbi(inode);
800 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
803 memset(io, 0, sizeof *io);
804 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
806 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
807 io->ci_obj = ll_i2info(inode)->lli_clob;
808 io->ci_lockreq = CILR_MAYBE;
809 if (fd->fd_flags & LL_FILE_IGNORE_LOCK || sbi->ll_flags & LL_SBI_NOLCK)
810 io->ci_lockreq = CILR_NEVER;
811 else if (file->f_flags & O_APPEND)
812 io->ci_lockreq = CILR_MANDATORY;
815 static ssize_t ll_file_io_generic(const struct lu_env *env,
816 struct ccc_io_args *args, struct file *file,
817 enum cl_io_type iot, loff_t *ppos, size_t count)
823 io = &ccc_env_info(env)->cti_io;
824 ll_io_init(io, file, iot == CIT_WRITE);
827 io->u.ci_rd.rd_is_sendfile = args->cia_is_sendfile;
829 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
830 struct vvp_io *vio = vvp_env_io(env);
831 struct ccc_io *cio = ccc_env_io(env);
832 if (cl_io_is_sendfile(io)) {
833 vio->u.read.cui_actor = args->cia_actor;
834 vio->u.read.cui_target = args->cia_target;
836 cio->cui_iov = args->cia_iov;
837 cio->cui_nrsegs = args->cia_nrsegs;
838 #ifndef HAVE_FILE_WRITEV
839 cio->cui_iocb = args->cia_iocb;
842 cio->cui_fd = LUSTRE_FPRIVATE(file);
843 result = cl_io_loop(env, io);
845 /* cl_io_rw_init() handled IO */
846 result = io->ci_result;
847 if (io->ci_nob > 0) {
849 *ppos = io->u.ci_wr.wr.crw_pos;
857 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
859 static int ll_file_get_iov_count(const struct iovec *iov,
860 unsigned long *nr_segs, size_t *count)
865 for (seg = 0; seg < *nr_segs; seg++) {
866 const struct iovec *iv = &iov[seg];
869 * If any segment has a negative length, or the cumulative
870 * length ever wraps negative then return -EINVAL.
873 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
875 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
880 cnt -= iv->iov_len; /* This segment is no good */
887 #ifdef HAVE_FILE_READV
888 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
889 unsigned long nr_segs, loff_t *ppos)
892 struct ccc_io_args *args;
898 result = ll_file_get_iov_count(iov, &nr_segs, &count);
902 env = cl_env_get(&refcheck);
904 RETURN(PTR_ERR(env));
906 args = &vvp_env_info(env)->vti_args;
907 args->cia_is_sendfile = 0;
908 args->cia_iov = (struct iovec *)iov;
909 args->cia_nrsegs = nr_segs;
910 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
911 cl_env_put(env, &refcheck);
915 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
919 struct iovec *local_iov;
924 env = cl_env_get(&refcheck);
926 RETURN(PTR_ERR(env));
928 local_iov = &vvp_env_info(env)->vti_local_iov;
929 local_iov->iov_base = (void __user *)buf;
930 local_iov->iov_len = count;
931 result = ll_file_readv(file, local_iov, 1, ppos);
932 cl_env_put(env, &refcheck);
937 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
938 unsigned long nr_segs, loff_t pos)
941 struct ccc_io_args *args;
947 result = ll_file_get_iov_count(iov, &nr_segs, &count);
951 env = cl_env_get(&refcheck);
953 RETURN(PTR_ERR(env));
955 args = &vvp_env_info(env)->vti_args;
956 args->cia_is_sendfile = 0;
957 args->cia_iov = (struct iovec *)iov;
958 args->cia_nrsegs = nr_segs;
959 args->cia_iocb = iocb;
960 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
961 &iocb->ki_pos, count);
962 cl_env_put(env, &refcheck);
966 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
970 struct iovec *local_iov;
976 env = cl_env_get(&refcheck);
978 RETURN(PTR_ERR(env));
980 local_iov = &vvp_env_info(env)->vti_local_iov;
981 kiocb = &vvp_env_info(env)->vti_kiocb;
982 local_iov->iov_base = (void __user *)buf;
983 local_iov->iov_len = count;
984 init_sync_kiocb(kiocb, file);
985 kiocb->ki_pos = *ppos;
986 kiocb->ki_left = count;
988 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
989 *ppos = kiocb->ki_pos;
991 cl_env_put(env, &refcheck);
997 * Write to a file (through the page cache).
999 #ifdef HAVE_FILE_WRITEV
1000 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1001 unsigned long nr_segs, loff_t *ppos)
1004 struct ccc_io_args *args;
1010 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1014 env = cl_env_get(&refcheck);
1016 RETURN(PTR_ERR(env));
1018 args = &vvp_env_info(env)->vti_args;
1019 args->cia_iov = (struct iovec *)iov;
1020 args->cia_nrsegs = nr_segs;
1021 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1022 cl_env_put(env, &refcheck);
1026 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1030 struct iovec *local_iov;
1035 env = cl_env_get(&refcheck);
1037 RETURN(PTR_ERR(env));
1039 local_iov = &vvp_env_info(env)->vti_local_iov;
1040 local_iov->iov_base = (void __user *)buf;
1041 local_iov->iov_len = count;
1043 result = ll_file_writev(file, local_iov, 1, ppos);
1044 cl_env_put(env, &refcheck);
1048 #else /* AIO stuff */
1049 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1050 unsigned long nr_segs, loff_t pos)
1053 struct ccc_io_args *args;
1059 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1063 env = cl_env_get(&refcheck);
1065 RETURN(PTR_ERR(env));
1067 args = &vvp_env_info(env)->vti_args;
1068 args->cia_iov = (struct iovec *)iov;
1069 args->cia_nrsegs = nr_segs;
1070 args->cia_iocb = iocb;
1071 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1072 &iocb->ki_pos, count);
1073 cl_env_put(env, &refcheck);
1077 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1081 struct iovec *local_iov;
1082 struct kiocb *kiocb;
1087 env = cl_env_get(&refcheck);
1089 RETURN(PTR_ERR(env));
1091 local_iov = &vvp_env_info(env)->vti_local_iov;
1092 kiocb = &vvp_env_info(env)->vti_kiocb;
1093 local_iov->iov_base = (void __user *)buf;
1094 local_iov->iov_len = count;
1095 init_sync_kiocb(kiocb, file);
1096 kiocb->ki_pos = *ppos;
1097 kiocb->ki_left = count;
1099 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1100 *ppos = kiocb->ki_pos;
1102 cl_env_put(env, &refcheck);
1109 * Send file content (through pagecache) somewhere with helper
1111 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1112 read_actor_t actor, void *target)
1115 struct ccc_io_args *args;
1120 env = cl_env_get(&refcheck);
1122 RETURN(PTR_ERR(env));
1124 args = &vvp_env_info(env)->vti_args;
1125 args->cia_is_sendfile = 1;
1126 args->cia_target = target;
1127 args->cia_actor = actor;
1128 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1129 cl_env_put(env, &refcheck);
1133 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1136 struct obd_export *exp = ll_i2dtexp(inode);
1137 struct ll_recreate_obj ucreatp;
1138 struct obd_trans_info oti = { 0 };
1139 struct obdo *oa = NULL;
1142 struct lov_stripe_md *lsm, *lsm2;
1145 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1148 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1149 sizeof(struct ll_recreate_obj)))
1156 ll_inode_size_lock(inode, 0);
1157 lsm = ll_i2info(inode)->lli_smd;
1159 GOTO(out, rc = -ENOENT);
1160 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1161 (lsm->lsm_stripe_count));
1163 OBD_ALLOC(lsm2, lsm_size);
1165 GOTO(out, rc = -ENOMEM);
1167 oa->o_id = ucreatp.lrc_id;
1168 oa->o_gr = ucreatp.lrc_group;
1169 oa->o_nlink = ucreatp.lrc_ost_idx;
1170 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1171 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1172 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1173 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1175 memcpy(lsm2, lsm, lsm_size);
1176 rc = obd_create(exp, oa, &lsm2, &oti);
1178 OBD_FREE(lsm2, lsm_size);
1181 ll_inode_size_unlock(inode, 0);
1186 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1187 int flags, struct lov_user_md *lum, int lum_size)
1189 struct lov_stripe_md *lsm;
1190 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1194 ll_inode_size_lock(inode, 0);
1195 lsm = ll_i2info(inode)->lli_smd;
1197 ll_inode_size_unlock(inode, 0);
1198 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1203 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1206 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1207 GOTO(out_req_free, rc = -ENOENT);
1208 rc = oit.d.lustre.it_status;
1210 GOTO(out_req_free, rc);
1212 ll_release_openhandle(file->f_dentry, &oit);
1215 ll_inode_size_unlock(inode, 0);
1216 ll_intent_release(&oit);
1219 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1223 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1224 struct lov_mds_md **lmmp, int *lmm_size,
1225 struct ptlrpc_request **request)
1227 struct ll_sb_info *sbi = ll_i2sbi(inode);
1228 struct mdt_body *body;
1229 struct lov_mds_md *lmm = NULL;
1230 struct ptlrpc_request *req = NULL;
1231 struct obd_capa *oc;
1234 rc = ll_get_max_mdsize(sbi, &lmmsize);
1238 oc = ll_mdscapa_get(inode);
1239 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1240 oc, filename, strlen(filename) + 1,
1241 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1242 ll_i2suppgid(inode), &req);
1245 CDEBUG(D_INFO, "md_getattr_name failed "
1246 "on %s: rc %d\n", filename, rc);
1250 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1251 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1253 lmmsize = body->eadatasize;
1255 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1257 GOTO(out, rc = -ENODATA);
1260 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1261 LASSERT(lmm != NULL);
1263 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1264 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
1265 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
1266 GOTO(out, rc = -EPROTO);
1270 * This is coming from the MDS, so is probably in
1271 * little endian. We convert it to host endian before
1272 * passing it to userspace.
1274 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1275 /* if function called for directory - we should
1276 * avoid swab not existent lsm objects */
1277 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1278 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1279 if (S_ISREG(body->mode))
1280 lustre_swab_lov_user_md_objects(
1281 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1282 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1283 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1284 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1285 if (S_ISREG(body->mode))
1286 lustre_swab_lov_user_md_objects(
1287 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1288 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1289 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
1290 lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
1294 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
1295 struct lov_stripe_md *lsm;
1296 struct lov_user_md_join *lmj;
1297 int lmj_size, i, aindex = 0;
1299 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1301 GOTO(out, rc = -ENOMEM);
1302 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm);
1304 GOTO(out_free_memmd, rc);
1306 lmj_size = sizeof(struct lov_user_md_join) +
1307 lsm->lsm_stripe_count *
1308 sizeof(struct lov_user_ost_data_join);
1309 OBD_ALLOC(lmj, lmj_size);
1311 GOTO(out_free_memmd, rc = -ENOMEM);
1313 memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
1314 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1315 struct lov_extent *lex =
1316 &lsm->lsm_array->lai_ext_array[aindex];
1318 if (lex->le_loi_idx + lex->le_stripe_count <= i)
1320 CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
1321 LPU64" len %d\n", aindex, i,
1322 lex->le_start, (int)lex->le_len);
1323 lmj->lmm_objects[i].l_extent_start =
1326 if ((int)lex->le_len == -1)
1327 lmj->lmm_objects[i].l_extent_end = -1;
1329 lmj->lmm_objects[i].l_extent_end =
1330 lex->le_start + lex->le_len;
1331 lmj->lmm_objects[i].l_object_id =
1332 lsm->lsm_oinfo[i]->loi_id;
1333 lmj->lmm_objects[i].l_object_gr =
1334 lsm->lsm_oinfo[i]->loi_gr;
1335 lmj->lmm_objects[i].l_ost_gen =
1336 lsm->lsm_oinfo[i]->loi_ost_gen;
1337 lmj->lmm_objects[i].l_ost_idx =
1338 lsm->lsm_oinfo[i]->loi_ost_idx;
1340 lmm = (struct lov_mds_md *)lmj;
1343 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1347 *lmm_size = lmmsize;
1352 static int ll_lov_setea(struct inode *inode, struct file *file,
1355 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1356 struct lov_user_md *lump;
1357 int lum_size = sizeof(struct lov_user_md) +
1358 sizeof(struct lov_user_ost_data);
1362 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1365 OBD_ALLOC(lump, lum_size);
1369 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1370 OBD_FREE(lump, lum_size);
1374 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1376 OBD_FREE(lump, lum_size);
1380 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1383 struct lov_user_md_v3 lumv3;
1384 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1385 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1386 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1389 int flags = FMODE_WRITE;
1392 /* first try with v1 which is smaller than v3 */
1393 lum_size = sizeof(struct lov_user_md_v1);
1394 if (copy_from_user(lumv1, lumv1p, lum_size))
1397 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1398 lum_size = sizeof(struct lov_user_md_v3);
1399 if (copy_from_user(&lumv3, lumv3p, lum_size))
1403 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1405 put_user(0, &lumv1p->lmm_stripe_count);
1406 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1407 0, ll_i2info(inode)->lli_smd,
1413 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1415 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1420 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1424 static int ll_get_grouplock(struct inode *inode, struct file *file,
1431 static int ll_put_grouplock(struct inode *inode, struct file *file,
1438 #if LUSTRE_FIX >= 50
1439 static int join_sanity_check(struct inode *head, struct inode *tail)
1442 if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
1443 CERROR("server do not support join \n");
1446 if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
1447 CERROR("tail ino %lu and ino head %lu must be regular\n",
1448 head->i_ino, tail->i_ino);
1451 if (head->i_ino == tail->i_ino) {
1452 CERROR("file %lu can not be joined to itself \n", head->i_ino);
1455 if (i_size_read(head) % JOIN_FILE_ALIGN) {
1456 CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
1462 static int join_file(struct inode *head_inode, struct file *head_filp,
1463 struct file *tail_filp)
1465 struct dentry *tail_dentry = tail_filp->f_dentry;
1466 struct lookup_intent oit = {.it_op = IT_OPEN,
1467 .it_flags = head_filp->f_flags,
1468 .it_create_mode = M_JOIN_FILE};
1469 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW,
1470 ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1472 struct lustre_handle lockh;
1473 struct md_op_data *op_data;
1478 tail_dentry = tail_filp->f_dentry;
1480 data = i_size_read(head_inode);
1481 op_data = ll_prep_md_op_data(NULL, head_inode,
1482 tail_dentry->d_parent->d_inode,
1483 tail_dentry->d_name.name,
1484 tail_dentry->d_name.len, 0,
1485 LUSTRE_OPC_ANY, &data);
1486 if (IS_ERR(op_data))
1487 RETURN(PTR_ERR(op_data));
1489 rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit,
1490 op_data, &lockh, NULL, 0, NULL, 0);
1492 ll_finish_md_op_data(op_data);
1496 rc = oit.d.lustre.it_status;
1498 if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
1499 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
1500 ptlrpc_req_finished((struct ptlrpc_request *)
1501 oit.d.lustre.it_data);
1505 if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
1507 ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
1508 oit.d.lustre.it_lock_mode = 0;
1510 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1511 it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
1512 ll_release_openhandle(head_filp->f_dentry, &oit);
1514 ll_intent_release(&oit);
1518 static int ll_file_join(struct inode *head, struct file *filp,
1519 char *filename_tail)
1521 struct inode *tail = NULL, *first = NULL, *second = NULL;
1522 struct dentry *tail_dentry;
1523 struct file *tail_filp, *first_filp, *second_filp;
1524 struct ll_lock_tree first_tree, second_tree;
1525 struct ll_lock_tree_node *first_node, *second_node;
1526 struct ll_inode_info *hlli = ll_i2info(head);
1527 int rc = 0, cleanup_phase = 0;
1530 CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
1531 head->i_ino, head->i_generation, head, filename_tail);
1533 tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
1534 if (IS_ERR(tail_filp)) {
1535 CERROR("Can not open tail file %s", filename_tail);
1536 rc = PTR_ERR(tail_filp);
1539 tail = igrab(tail_filp->f_dentry->d_inode);
1541 tail_dentry = tail_filp->f_dentry;
1542 LASSERT(tail_dentry);
1545 /*reorder the inode for lock sequence*/
1546 first = head->i_ino > tail->i_ino ? head : tail;
1547 second = head->i_ino > tail->i_ino ? tail : head;
1548 first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
1549 second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
1551 CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
1552 head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
1553 first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
1554 if (IS_ERR(first_node)){
1555 rc = PTR_ERR(first_node);
1558 first_tree.lt_fd = first_filp->private_data;
1559 rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
1564 second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
1565 if (IS_ERR(second_node)){
1566 rc = PTR_ERR(second_node);
1569 second_tree.lt_fd = second_filp->private_data;
1570 rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
1575 rc = join_sanity_check(head, tail);
1579 rc = join_file(head, filp, tail_filp);
1583 switch (cleanup_phase) {
1585 ll_tree_unlock(&second_tree);
1586 obd_cancel_unused(ll_i2dtexp(second),
1587 ll_i2info(second)->lli_smd, 0, NULL);
1589 ll_tree_unlock(&first_tree);
1590 obd_cancel_unused(ll_i2dtexp(first),
1591 ll_i2info(first)->lli_smd, 0, NULL);
1593 filp_close(tail_filp, 0);
1596 if (head && rc == 0) {
1597 obd_free_memmd(ll_i2sbi(head)->ll_dt_exp,
1599 hlli->lli_smd = NULL;
1604 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
1609 #endif /* LUSTRE_FIX >= 50 */
1612 * Close inode open handle
1614 * \param dentry [in] dentry which contains the inode
1615 * \param it [in,out] intent which contains open info and result
1618 * \retval <0 failure
1620 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1622 struct inode *inode = dentry->d_inode;
1623 struct obd_client_handle *och;
1629 /* Root ? Do nothing. */
1630 if (dentry->d_inode->i_sb->s_root == dentry)
1633 /* No open handle to close? Move away */
1634 if (!it_disposition(it, DISP_OPEN_OPEN))
1637 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1639 OBD_ALLOC(och, sizeof(*och));
1641 GOTO(out, rc = -ENOMEM);
1643 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1644 ll_i2info(inode), it, och);
1646 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1649 /* this one is in place of ll_file_open */
1650 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1651 ptlrpc_req_finished(it->d.lustre.it_data);
1652 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1657 * Get size for inode for which FIEMAP mapping is requested.
1658 * Make the FIEMAP get_info call and returns the result.
1660 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1663 struct obd_export *exp = ll_i2dtexp(inode);
1664 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1665 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1666 int vallen = num_bytes;
1670 /* If the stripe_count > 1 and the application does not understand
1671 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1673 if (lsm->lsm_stripe_count > 1 &&
1674 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1677 fm_key.oa.o_id = lsm->lsm_object_id;
1678 fm_key.oa.o_gr = lsm->lsm_object_gr;
1679 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1681 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1684 /* If filesize is 0, then there would be no objects for mapping */
1685 if (fm_key.oa.o_size == 0) {
1686 fiemap->fm_mapped_extents = 0;
1690 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1692 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1694 CERROR("obd_get_info failed: rc = %d\n", rc);
1699 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1702 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1706 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1707 inode->i_generation, inode, cmd);
1708 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1710 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1711 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1715 case LL_IOC_GETFLAGS:
1716 /* Get the current value of the file flags */
1717 return put_user(fd->fd_flags, (int *)arg);
1718 case LL_IOC_SETFLAGS:
1719 case LL_IOC_CLRFLAGS:
1720 /* Set or clear specific file flags */
1721 /* XXX This probably needs checks to ensure the flags are
1722 * not abused, and to handle any flag side effects.
1724 if (get_user(flags, (int *) arg))
1727 if (cmd == LL_IOC_SETFLAGS) {
1728 if ((flags & LL_FILE_IGNORE_LOCK) &&
1729 !(file->f_flags & O_DIRECT)) {
1730 CERROR("%s: unable to disable locking on "
1731 "non-O_DIRECT file\n", current->comm);
1735 fd->fd_flags |= flags;
1737 fd->fd_flags &= ~flags;
1740 case LL_IOC_LOV_SETSTRIPE:
1741 RETURN(ll_lov_setstripe(inode, file, arg));
1742 case LL_IOC_LOV_SETEA:
1743 RETURN(ll_lov_setea(inode, file, arg));
1744 case LL_IOC_LOV_GETSTRIPE:
1745 RETURN(ll_lov_getstripe(inode, arg));
1746 case LL_IOC_RECREATE_OBJ:
1747 RETURN(ll_lov_recreate_obj(inode, file, arg));
1748 case EXT3_IOC_FIEMAP: {
1749 struct ll_user_fiemap *fiemap_s;
1750 size_t num_bytes, ret_bytes;
1751 unsigned int extent_count;
1754 /* Get the extent count so we can calculate the size of
1755 * required fiemap buffer */
1756 if (get_user(extent_count,
1757 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1759 num_bytes = sizeof(*fiemap_s) + (extent_count *
1760 sizeof(struct ll_fiemap_extent));
1761 OBD_VMALLOC(fiemap_s, num_bytes);
1762 if (fiemap_s == NULL)
1765 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1767 GOTO(error, rc = -EFAULT);
1769 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1770 fiemap_s->fm_flags = fiemap_s->fm_flags &
1771 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1772 if (copy_to_user((char *)arg, fiemap_s,
1774 GOTO(error, rc = -EFAULT);
1776 GOTO(error, rc = -EBADR);
1779 /* If fm_extent_count is non-zero, read the first extent since
1780 * it is used to calculate end_offset and device from previous
1783 if (copy_from_user(&fiemap_s->fm_extents[0],
1784 (char __user *)arg + sizeof(*fiemap_s),
1785 sizeof(struct ll_fiemap_extent)))
1786 GOTO(error, rc = -EFAULT);
1789 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1792 rc = filemap_fdatawrite(inode->i_mapping);
1797 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1801 ret_bytes = sizeof(struct ll_user_fiemap);
1803 if (extent_count != 0)
1804 ret_bytes += (fiemap_s->fm_mapped_extents *
1805 sizeof(struct ll_fiemap_extent));
1807 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1811 OBD_VFREE(fiemap_s, num_bytes);
1814 case EXT3_IOC_GETFLAGS:
1815 case EXT3_IOC_SETFLAGS:
1816 RETURN(ll_iocontrol(inode, file, cmd, arg));
1817 case EXT3_IOC_GETVERSION_OLD:
1818 case EXT3_IOC_GETVERSION:
1819 RETURN(put_user(inode->i_generation, (int *)arg));
1821 #if LUSTRE_FIX >= 50
1822 /* Allow file join in beta builds to allow debuggging */
1826 ftail = getname((const char *)arg);
1828 RETURN(PTR_ERR(ftail));
1829 rc = ll_file_join(inode, file, ftail);
1833 CWARN("file join is not supported in this version of Lustre\n");
1837 case LL_IOC_GROUP_LOCK:
1838 RETURN(ll_get_grouplock(inode, file, arg));
1839 case LL_IOC_GROUP_UNLOCK:
1840 RETURN(ll_put_grouplock(inode, file, arg));
1841 case IOC_OBD_STATFS:
1842 RETURN(ll_obd_statfs(inode, (void *)arg));
1844 /* We need to special case any other ioctls we want to handle,
1845 * to send them to the MDS/OST as appropriate and to properly
1846 * network encode the arg field.
1847 case EXT3_IOC_SETVERSION_OLD:
1848 case EXT3_IOC_SETVERSION:
1850 case LL_IOC_FLUSHCTX:
1851 RETURN(ll_flush_ctx(inode));
1852 case LL_IOC_PATH2FID: {
1853 if (copy_to_user((void *)arg, &ll_i2info(inode)->lli_fid,
1854 sizeof(struct lu_fid)))
1863 ll_iocontrol_call(inode, file, cmd, arg, &err))
1866 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1872 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1874 struct inode *inode = file->f_dentry->d_inode;
1877 retval = offset + ((origin == 2) ? i_size_read(inode) :
1878 (origin == 1) ? file->f_pos : 0);
1879 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1880 inode->i_ino, inode->i_generation, inode, retval, retval,
1881 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1882 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1884 if (origin == 2) { /* SEEK_END */
1885 int nonblock = 0, rc;
1887 if (file->f_flags & O_NONBLOCK)
1888 nonblock = LDLM_FL_BLOCK_NOWAIT;
1890 rc = cl_glimpse_size(inode);
1894 ll_inode_size_lock(inode, 0);
1895 offset += i_size_read(inode);
1896 ll_inode_size_unlock(inode, 0);
1897 } else if (origin == 1) { /* SEEK_CUR */
1898 offset += file->f_pos;
1902 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1903 if (offset != file->f_pos) {
1904 file->f_pos = offset;
1912 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1914 struct inode *inode = dentry->d_inode;
1915 struct ll_inode_info *lli = ll_i2info(inode);
1916 struct lov_stripe_md *lsm = lli->lli_smd;
1917 struct ptlrpc_request *req;
1918 struct obd_capa *oc;
1921 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
1922 inode->i_generation, inode);
1923 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
1925 /* fsync's caller has already called _fdata{sync,write}, we want
1926 * that IO to finish before calling the osc and mdc sync methods */
1927 rc = filemap_fdatawait(inode->i_mapping);
1929 /* catch async errors that were recorded back when async writeback
1930 * failed for pages in this mapping. */
1931 err = lli->lli_async_rc;
1932 lli->lli_async_rc = 0;
1936 err = lov_test_and_clear_async_rc(lsm);
1941 oc = ll_mdscapa_get(inode);
1942 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
1948 ptlrpc_req_finished(req);
1955 RETURN(rc ? rc : -ENOMEM);
1957 oa->o_id = lsm->lsm_object_id;
1958 oa->o_gr = lsm->lsm_object_gr;
1959 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1960 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1961 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1964 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
1965 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
1966 0, OBD_OBJECT_EOF, oc);
1976 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
1978 struct inode *inode = file->f_dentry->d_inode;
1979 struct ll_sb_info *sbi = ll_i2sbi(inode);
1980 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
1981 .ei_cb_cp =ldlm_flock_completion_ast,
1982 .ei_cbdata = file_lock };
1983 struct md_op_data *op_data;
1984 struct lustre_handle lockh = {0};
1985 ldlm_policy_data_t flock;
1990 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
1991 inode->i_ino, file_lock);
1993 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
1995 if (file_lock->fl_flags & FL_FLOCK) {
1996 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
1997 /* set missing params for flock() calls */
1998 file_lock->fl_end = OFFSET_MAX;
1999 file_lock->fl_pid = current->tgid;
2001 flock.l_flock.pid = file_lock->fl_pid;
2002 flock.l_flock.start = file_lock->fl_start;
2003 flock.l_flock.end = file_lock->fl_end;
2005 switch (file_lock->fl_type) {
2007 einfo.ei_mode = LCK_PR;
2010 /* An unlock request may or may not have any relation to
2011 * existing locks so we may not be able to pass a lock handle
2012 * via a normal ldlm_lock_cancel() request. The request may even
2013 * unlock a byte range in the middle of an existing lock. In
2014 * order to process an unlock request we need all of the same
2015 * information that is given with a normal read or write record
2016 * lock request. To avoid creating another ldlm unlock (cancel)
2017 * message we'll treat a LCK_NL flock request as an unlock. */
2018 einfo.ei_mode = LCK_NL;
2021 einfo.ei_mode = LCK_PW;
2024 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2039 flags = LDLM_FL_BLOCK_NOWAIT;
2045 flags = LDLM_FL_TEST_LOCK;
2046 /* Save the old mode so that if the mode in the lock changes we
2047 * can decrement the appropriate reader or writer refcount. */
2048 file_lock->fl_type = einfo.ei_mode;
2051 CERROR("unknown fcntl lock command: %d\n", cmd);
2055 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2056 LUSTRE_OPC_ANY, NULL);
2057 if (IS_ERR(op_data))
2058 RETURN(PTR_ERR(op_data));
2060 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2061 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2062 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2064 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2065 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2067 ll_finish_md_op_data(op_data);
2069 if ((file_lock->fl_flags & FL_FLOCK) &&
2070 (rc == 0 || file_lock->fl_type == F_UNLCK))
2071 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2072 #ifdef HAVE_F_OP_FLOCK
2073 if ((file_lock->fl_flags & FL_POSIX) &&
2074 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2075 !(flags & LDLM_FL_TEST_LOCK))
2076 posix_lock_file_wait(file, file_lock);
2082 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2089 int ll_have_md_lock(struct inode *inode, __u64 bits)
2091 struct lustre_handle lockh;
2092 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2100 fid = &ll_i2info(inode)->lli_fid;
2101 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2103 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2104 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2105 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2111 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2112 struct lustre_handle *lockh)
2114 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2120 fid = &ll_i2info(inode)->lli_fid;
2121 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2123 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2124 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2125 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2129 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2130 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2131 * and return success */
2133 /* This path cannot be hit for regular files unless in
2134 * case of obscure races, so no need to to validate
2136 if (!S_ISREG(inode->i_mode) &&
2137 !S_ISDIR(inode->i_mode))
2142 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2150 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2152 struct inode *inode = dentry->d_inode;
2153 struct ptlrpc_request *req = NULL;
2154 struct ll_sb_info *sbi;
2155 struct obd_export *exp;
2160 CERROR("REPORT THIS LINE TO PETER\n");
2163 sbi = ll_i2sbi(inode);
2165 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2166 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2168 exp = ll_i2mdexp(inode);
2170 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2171 struct lookup_intent oit = { .it_op = IT_GETATTR };
2172 struct md_op_data *op_data;
2174 /* Call getattr by fid, so do not provide name at all. */
2175 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2176 dentry->d_inode, NULL, 0, 0,
2177 LUSTRE_OPC_ANY, NULL);
2178 if (IS_ERR(op_data))
2179 RETURN(PTR_ERR(op_data));
2181 oit.it_create_mode |= M_CHECK_STALE;
2182 rc = md_intent_lock(exp, op_data, NULL, 0,
2183 /* we are not interested in name
2186 ll_md_blocking_ast, 0);
2187 ll_finish_md_op_data(op_data);
2188 oit.it_create_mode &= ~M_CHECK_STALE;
2190 rc = ll_inode_revalidate_fini(inode, rc);
2194 rc = ll_revalidate_it_finish(req, &oit, dentry);
2196 ll_intent_release(&oit);
2200 /* Unlinked? Unhash dentry, so it is not picked up later by
2201 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2202 here to preserve get_cwd functionality on 2.6.
2204 if (!dentry->d_inode->i_nlink) {
2205 spin_lock(&ll_lookup_lock);
2206 spin_lock(&dcache_lock);
2207 ll_drop_dentry(dentry);
2208 spin_unlock(&dcache_lock);
2209 spin_unlock(&ll_lookup_lock);
2212 ll_lookup_finish_locks(&oit, dentry);
2213 } else if (!ll_have_md_lock(dentry->d_inode, MDS_INODELOCK_UPDATE |
2214 MDS_INODELOCK_LOOKUP)) {
2215 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2216 obd_valid valid = OBD_MD_FLGETATTR;
2217 struct obd_capa *oc;
2220 if (S_ISREG(inode->i_mode)) {
2221 rc = ll_get_max_mdsize(sbi, &ealen);
2224 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2226 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2227 * capa for this inode. Because we only keep capas of dirs
2229 oc = ll_mdscapa_get(inode);
2230 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2234 rc = ll_inode_revalidate_fini(inode, rc);
2238 rc = ll_prep_inode(&inode, req, NULL);
2243 /* if object not yet allocated, don't validate size */
2244 if (ll_i2info(inode)->lli_smd == NULL)
2247 /* cl_glimpse_size will prefer locally cached writes if they extend
2249 rc = cl_glimpse_size(inode);
2252 ptlrpc_req_finished(req);
2256 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2257 struct lookup_intent *it, struct kstat *stat)
2259 struct inode *inode = de->d_inode;
2262 res = ll_inode_revalidate_it(de, it);
2263 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2268 stat->dev = inode->i_sb->s_dev;
2269 stat->ino = inode->i_ino;
2270 stat->mode = inode->i_mode;
2271 stat->nlink = inode->i_nlink;
2272 stat->uid = inode->i_uid;
2273 stat->gid = inode->i_gid;
2274 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2275 stat->atime = inode->i_atime;
2276 stat->mtime = inode->i_mtime;
2277 stat->ctime = inode->i_ctime;
2278 #ifdef HAVE_INODE_BLKSIZE
2279 stat->blksize = inode->i_blksize;
2281 stat->blksize = 1 << inode->i_blkbits;
2284 ll_inode_size_lock(inode, 0);
2285 stat->size = i_size_read(inode);
2286 stat->blocks = inode->i_blocks;
2287 ll_inode_size_unlock(inode, 0);
2291 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2293 struct lookup_intent it = { .it_op = IT_GETATTR };
2295 return ll_getattr_it(mnt, de, &it, stat);
2299 int lustre_check_acl(struct inode *inode, int mask)
2301 #ifdef CONFIG_FS_POSIX_ACL
2302 struct ll_inode_info *lli = ll_i2info(inode);
2303 struct posix_acl *acl;
2307 spin_lock(&lli->lli_lock);
2308 acl = posix_acl_dup(lli->lli_posix_acl);
2309 spin_unlock(&lli->lli_lock);
2314 rc = posix_acl_permission(inode, acl, mask);
2315 posix_acl_release(acl);
2323 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2324 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2326 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2327 inode->i_ino, inode->i_generation, inode, mask);
2328 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2329 return lustre_check_remote_perm(inode, mask);
2331 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2332 return generic_permission(inode, mask, lustre_check_acl);
2335 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2337 int mode = inode->i_mode;
2340 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2341 inode->i_ino, inode->i_generation, inode, mask);
2343 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2344 return lustre_check_remote_perm(inode, mask);
2346 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2348 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2349 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2351 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2353 if (current->fsuid == inode->i_uid) {
2356 if (((mode >> 3) & mask & S_IRWXO) != mask)
2358 rc = lustre_check_acl(inode, mask);
2362 goto check_capabilities;
2366 if (in_group_p(inode->i_gid))
2369 if ((mode & mask & S_IRWXO) == mask)
2373 if (!(mask & MAY_EXEC) ||
2374 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2375 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2378 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2379 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2386 #ifdef HAVE_FILE_READV
2387 #define READ_METHOD readv
2388 #define READ_FUNCTION ll_file_readv
2389 #define WRITE_METHOD writev
2390 #define WRITE_FUNCTION ll_file_writev
2392 #define READ_METHOD aio_read
2393 #define READ_FUNCTION ll_file_aio_read
2394 #define WRITE_METHOD aio_write
2395 #define WRITE_FUNCTION ll_file_aio_write
2398 /* -o localflock - only provides locally consistent flock locks */
2399 struct file_operations ll_file_operations = {
2400 .read = ll_file_read,
2401 .READ_METHOD = READ_FUNCTION,
2402 .write = ll_file_write,
2403 .WRITE_METHOD = WRITE_FUNCTION,
2404 .ioctl = ll_file_ioctl,
2405 .open = ll_file_open,
2406 .release = ll_file_release,
2407 .mmap = ll_file_mmap,
2408 .llseek = ll_file_seek,
2409 .sendfile = ll_file_sendfile,
2413 struct file_operations ll_file_operations_flock = {
2414 .read = ll_file_read,
2415 .READ_METHOD = READ_FUNCTION,
2416 .write = ll_file_write,
2417 .WRITE_METHOD = WRITE_FUNCTION,
2418 .ioctl = ll_file_ioctl,
2419 .open = ll_file_open,
2420 .release = ll_file_release,
2421 .mmap = ll_file_mmap,
2422 .llseek = ll_file_seek,
2423 .sendfile = ll_file_sendfile,
2425 #ifdef HAVE_F_OP_FLOCK
2426 .flock = ll_file_flock,
2428 .lock = ll_file_flock
2431 /* These are for -o noflock - to return ENOSYS on flock calls */
2432 struct file_operations ll_file_operations_noflock = {
2433 .read = ll_file_read,
2434 .READ_METHOD = READ_FUNCTION,
2435 .write = ll_file_write,
2436 .WRITE_METHOD = WRITE_FUNCTION,
2437 .ioctl = ll_file_ioctl,
2438 .open = ll_file_open,
2439 .release = ll_file_release,
2440 .mmap = ll_file_mmap,
2441 .llseek = ll_file_seek,
2442 .sendfile = ll_file_sendfile,
2444 #ifdef HAVE_F_OP_FLOCK
2445 .flock = ll_file_noflock,
2447 .lock = ll_file_noflock
2450 struct inode_operations ll_file_inode_operations = {
2451 #ifdef HAVE_VFS_INTENT_PATCHES
2452 .setattr_raw = ll_setattr_raw,
2454 .setattr = ll_setattr,
2455 .truncate = ll_truncate,
2456 .getattr = ll_getattr,
2457 .permission = ll_inode_permission,
2458 .setxattr = ll_setxattr,
2459 .getxattr = ll_getxattr,
2460 .listxattr = ll_listxattr,
2461 .removexattr = ll_removexattr,
2464 /* dynamic ioctl number support routins */
2465 static struct llioc_ctl_data {
2466 struct rw_semaphore ioc_sem;
2467 struct list_head ioc_head;
2469 __RWSEM_INITIALIZER(llioc.ioc_sem),
2470 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2475 struct list_head iocd_list;
2476 unsigned int iocd_size;
2477 llioc_callback_t iocd_cb;
2478 unsigned int iocd_count;
2479 unsigned int iocd_cmd[0];
2482 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2485 struct llioc_data *in_data = NULL;
2488 if (cb == NULL || cmd == NULL ||
2489 count > LLIOC_MAX_CMD || count < 0)
2492 size = sizeof(*in_data) + count * sizeof(unsigned int);
2493 OBD_ALLOC(in_data, size);
2494 if (in_data == NULL)
2497 memset(in_data, 0, sizeof(*in_data));
2498 in_data->iocd_size = size;
2499 in_data->iocd_cb = cb;
2500 in_data->iocd_count = count;
2501 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2503 down_write(&llioc.ioc_sem);
2504 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2505 up_write(&llioc.ioc_sem);
2510 void ll_iocontrol_unregister(void *magic)
2512 struct llioc_data *tmp;
2517 down_write(&llioc.ioc_sem);
2518 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2520 unsigned int size = tmp->iocd_size;
2522 list_del(&tmp->iocd_list);
2523 up_write(&llioc.ioc_sem);
2525 OBD_FREE(tmp, size);
2529 up_write(&llioc.ioc_sem);
2531 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2534 EXPORT_SYMBOL(ll_iocontrol_register);
2535 EXPORT_SYMBOL(ll_iocontrol_unregister);
2537 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2538 unsigned int cmd, unsigned long arg, int *rcp)
2540 enum llioc_iter ret = LLIOC_CONT;
2541 struct llioc_data *data;
2542 int rc = -EINVAL, i;
2544 down_read(&llioc.ioc_sem);
2545 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2546 for (i = 0; i < data->iocd_count; i++) {
2547 if (cmd != data->iocd_cmd[i])
2550 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2554 if (ret == LLIOC_STOP)
2557 up_read(&llioc.ioc_sem);