1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(exp_connect_som(ll_i2mdexp(inode))) || !S_ISREG(inode->i_mode))
96 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
98 ll_epoch_close(inode, op_data, &och, 0);
101 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
105 static int ll_close_inode_openhandle(struct obd_export *md_exp,
107 struct obd_client_handle *och)
109 struct obd_export *exp = ll_i2mdexp(inode);
110 struct md_op_data *op_data;
111 struct ptlrpc_request *req = NULL;
112 struct obd_device *obd = class_exp2obd(exp);
119 * XXX: in case of LMV, is this correct to access
122 CERROR("Invalid MDC connection handle "LPX64"\n",
123 ll_i2mdexp(inode)->exp_handle.h_cookie);
128 * here we check if this is forced umount. If so this is called on
129 * canceling "open lock" and we do not call md_close() in this case, as
130 * it will not be successful, as import is already deactivated.
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_sizeonmds_update(inode, &och->och_fh,
148 op_data->op_ioepoch);
150 CERROR("inode %lu mdc Size-on-MDS update failed: "
151 "rc = %d\n", inode->i_ino, rc);
155 CERROR("inode %lu mdc close failed: rc = %d\n",
158 ll_finish_md_op_data(op_data);
161 rc = ll_objects_destroy(req, inode);
163 CERROR("inode %lu ll_objects destroy: rc = %d\n",
170 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
171 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
172 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
174 md_clear_open_replay_data(md_exp, och);
175 /* Free @och if it is not waiting for DONE_WRITING. */
176 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
179 if (req) /* This is close request */
180 ptlrpc_req_finished(req);
184 int ll_md_real_close(struct inode *inode, int flags)
186 struct ll_inode_info *lli = ll_i2info(inode);
187 struct obd_client_handle **och_p;
188 struct obd_client_handle *och;
193 if (flags & FMODE_WRITE) {
194 och_p = &lli->lli_mds_write_och;
195 och_usecount = &lli->lli_open_fd_write_count;
196 } else if (flags & FMODE_EXEC) {
197 och_p = &lli->lli_mds_exec_och;
198 och_usecount = &lli->lli_open_fd_exec_count;
200 LASSERT(flags & FMODE_READ);
201 och_p = &lli->lli_mds_read_och;
202 och_usecount = &lli->lli_open_fd_read_count;
205 down(&lli->lli_och_sem);
206 if (*och_usecount) { /* There are still users of this handle, so
208 up(&lli->lli_och_sem);
213 up(&lli->lli_och_sem);
215 if (och) { /* There might be a race and somebody have freed this och
217 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
224 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
227 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
228 struct ll_inode_info *lli = ll_i2info(inode);
232 /* clear group lock, if present */
233 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
234 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
236 /* Let's see if we have good enough OPEN lock on the file and if
237 we can skip talking to MDS */
238 if (file->f_dentry->d_inode) { /* Can this ever be false? */
240 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
241 struct lustre_handle lockh;
242 struct inode *inode = file->f_dentry->d_inode;
243 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
245 down(&lli->lli_och_sem);
246 if (fd->fd_omode & FMODE_WRITE) {
248 LASSERT(lli->lli_open_fd_write_count);
249 lli->lli_open_fd_write_count--;
250 } else if (fd->fd_omode & FMODE_EXEC) {
252 LASSERT(lli->lli_open_fd_exec_count);
253 lli->lli_open_fd_exec_count--;
256 LASSERT(lli->lli_open_fd_read_count);
257 lli->lli_open_fd_read_count--;
259 up(&lli->lli_och_sem);
261 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
262 LDLM_IBITS, &policy, lockmode,
264 rc = ll_md_real_close(file->f_dentry->d_inode,
268 CERROR("Releasing a file %p with negative dentry %p. Name %s",
269 file, file->f_dentry, file->f_dentry->d_name.name);
272 LUSTRE_FPRIVATE(file) = NULL;
273 ll_file_data_put(fd);
274 ll_capa_close(inode);
279 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
281 /* While this returns an error code, fput() the caller does not, so we need
282 * to make every effort to clean up all of our state here. Also, applications
283 * rarely check close errors and even if an error is returned they will not
284 * re-try the close call.
286 int ll_file_release(struct inode *inode, struct file *file)
288 struct ll_file_data *fd;
289 struct ll_sb_info *sbi = ll_i2sbi(inode);
290 struct ll_inode_info *lli = ll_i2info(inode);
291 struct lov_stripe_md *lsm = lli->lli_smd;
295 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
296 inode->i_generation, inode);
298 #ifdef CONFIG_FS_POSIX_ACL
299 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
300 inode == inode->i_sb->s_root->d_inode) {
301 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
304 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
305 fd->fd_flags &= ~LL_FILE_RMTACL;
306 rct_del(&sbi->ll_rct, cfs_curproc_pid());
307 et_search_free(&sbi->ll_et, cfs_curproc_pid());
312 if (inode->i_sb->s_root != file->f_dentry)
313 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
314 fd = LUSTRE_FPRIVATE(file);
317 /* The last ref on @file, maybe not the the owner pid of statahead.
318 * Different processes can open the same dir, "ll_opendir_key" means:
319 * it is me that should stop the statahead thread. */
320 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
321 ll_stop_statahead(inode, lli->lli_opendir_key);
323 if (inode->i_sb->s_root == file->f_dentry) {
324 LUSTRE_FPRIVATE(file) = NULL;
325 ll_file_data_put(fd);
330 lov_test_and_clear_async_rc(lsm);
331 lli->lli_async_rc = 0;
333 rc = ll_md_close(sbi->ll_md_exp, inode, file);
337 static int ll_intent_file_open(struct file *file, void *lmm,
338 int lmmsize, struct lookup_intent *itp)
340 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
341 struct dentry *parent = file->f_dentry->d_parent;
342 const char *name = file->f_dentry->d_name.name;
343 const int len = file->f_dentry->d_name.len;
344 struct md_op_data *op_data;
345 struct ptlrpc_request *req;
352 /* Usually we come here only for NFSD, and we want open lock.
353 But we can also get here with pre 2.6.15 patchless kernels, and in
354 that case that lock is also ok */
355 /* We can also get here if there was cached open handle in revalidate_it
356 * but it disappeared while we were getting from there to ll_file_open.
357 * But this means this file was closed and immediatelly opened which
358 * makes a good candidate for using OPEN lock */
359 /* If lmmsize & lmm are not 0, we are just setting stripe info
360 * parameters. No need for the open lock */
361 if (!lmm && !lmmsize)
362 itp->it_flags |= MDS_OPEN_LOCK;
364 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
365 file->f_dentry->d_inode, name, len,
366 O_RDWR, LUSTRE_OPC_ANY, NULL);
368 RETURN(PTR_ERR(op_data));
370 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
371 0 /*unused */, &req, ll_md_blocking_ast, 0);
372 ll_finish_md_op_data(op_data);
374 /* reason for keep own exit path - don`t flood log
375 * with messages with -ESTALE errors.
377 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
378 it_open_error(DISP_OPEN_OPEN, itp))
380 ll_release_openhandle(file->f_dentry, itp);
384 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
385 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
386 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
390 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
391 if (!rc && itp->d.lustre.it_lock_mode)
392 md_set_lock_data(sbi->ll_md_exp,
393 &itp->d.lustre.it_lock_handle,
394 file->f_dentry->d_inode, NULL);
397 ptlrpc_req_finished(itp->d.lustre.it_data);
398 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
399 ll_intent_drop_lock(itp);
404 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
406 if (ioepoch && lli->lli_ioepoch != ioepoch) {
407 lli->lli_ioepoch = ioepoch;
408 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
409 ioepoch, PFID(&lli->lli_fid));
413 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
414 struct lookup_intent *it, struct obd_client_handle *och)
416 struct ptlrpc_request *req = it->d.lustre.it_data;
417 struct mdt_body *body;
421 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
422 LASSERT(body != NULL); /* reply already checked out */
424 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
425 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
426 och->och_fid = lli->lli_fid;
427 och->och_flags = it->it_flags;
428 ll_ioepoch_open(lli, body->ioepoch);
430 return md_set_open_replay_data(md_exp, och, req);
433 int ll_local_open(struct file *file, struct lookup_intent *it,
434 struct ll_file_data *fd, struct obd_client_handle *och)
436 struct inode *inode = file->f_dentry->d_inode;
437 struct ll_inode_info *lli = ll_i2info(inode);
440 LASSERT(!LUSTRE_FPRIVATE(file));
445 struct ptlrpc_request *req = it->d.lustre.it_data;
446 struct mdt_body *body;
449 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
453 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
454 if ((it->it_flags & FMODE_WRITE) &&
455 (body->valid & OBD_MD_FLSIZE))
456 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
457 lli->lli_ioepoch, PFID(&lli->lli_fid));
460 LUSTRE_FPRIVATE(file) = fd;
461 ll_readahead_init(inode, &fd->fd_ras);
462 fd->fd_omode = it->it_flags;
466 /* Open a file, and (for the very first open) create objects on the OSTs at
467 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
468 * creation or open until ll_lov_setstripe() ioctl is called. We grab
469 * lli_open_sem to ensure no other process will create objects, send the
470 * stripe MD to the MDS, or try to destroy the objects if that fails.
472 * If we already have the stripe MD locally then we don't request it in
473 * md_open(), by passing a lmm_size = 0.
475 * It is up to the application to ensure no other processes open this file
476 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
477 * used. We might be able to avoid races of that sort by getting lli_open_sem
478 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
479 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
481 int ll_file_open(struct inode *inode, struct file *file)
483 struct ll_inode_info *lli = ll_i2info(inode);
484 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
485 .it_flags = file->f_flags };
486 struct lov_stripe_md *lsm;
487 struct ptlrpc_request *req = NULL;
488 struct obd_client_handle **och_p;
490 struct ll_file_data *fd;
491 int rc = 0, opendir_set = 0;
494 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
495 inode->i_generation, inode, file->f_flags);
497 #ifdef HAVE_VFS_INTENT_PATCHES
500 it = file->private_data; /* XXX: compat macro */
501 file->private_data = NULL; /* prevent ll_local_open assertion */
504 fd = ll_file_data_get();
509 if (S_ISDIR(inode->i_mode)) {
510 spin_lock(&lli->lli_lock);
511 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
512 LASSERT(lli->lli_sai == NULL);
513 lli->lli_opendir_key = fd;
514 lli->lli_opendir_pid = cfs_curproc_pid();
517 spin_unlock(&lli->lli_lock);
520 if (inode->i_sb->s_root == file->f_dentry) {
521 LUSTRE_FPRIVATE(file) = fd;
525 if (!it || !it->d.lustre.it_disposition) {
526 /* Convert f_flags into access mode. We cannot use file->f_mode,
527 * because everything but O_ACCMODE mask was stripped from
529 if ((oit.it_flags + 1) & O_ACCMODE)
531 if (file->f_flags & O_TRUNC)
532 oit.it_flags |= FMODE_WRITE;
534 /* kernel only call f_op->open in dentry_open. filp_open calls
535 * dentry_open after call to open_namei that checks permissions.
536 * Only nfsd_open call dentry_open directly without checking
537 * permissions and because of that this code below is safe. */
538 if (oit.it_flags & FMODE_WRITE)
539 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
541 /* We do not want O_EXCL here, presumably we opened the file
542 * already? XXX - NFS implications? */
543 oit.it_flags &= ~O_EXCL;
545 /* bug20584, if "it_flags" contains O_CREAT, the file will be
546 * created if necessary, then "IT_CREAT" should be set to keep
547 * consistent with it */
548 if (oit.it_flags & O_CREAT)
549 oit.it_op |= IT_CREAT;
555 /* Let's see if we have file open on MDS already. */
556 if (it->it_flags & FMODE_WRITE) {
557 och_p = &lli->lli_mds_write_och;
558 och_usecount = &lli->lli_open_fd_write_count;
559 } else if (it->it_flags & FMODE_EXEC) {
560 och_p = &lli->lli_mds_exec_och;
561 och_usecount = &lli->lli_open_fd_exec_count;
563 och_p = &lli->lli_mds_read_och;
564 och_usecount = &lli->lli_open_fd_read_count;
567 down(&lli->lli_och_sem);
568 if (*och_p) { /* Open handle is present */
569 if (it_disposition(it, DISP_OPEN_OPEN)) {
570 /* Well, there's extra open request that we do not need,
571 let's close it somehow. This will decref request. */
572 rc = it_open_error(DISP_OPEN_OPEN, it);
574 up(&lli->lli_och_sem);
575 ll_file_data_put(fd);
576 GOTO(out_openerr, rc);
578 ll_release_openhandle(file->f_dentry, it);
579 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
584 rc = ll_local_open(file, it, fd, NULL);
587 up(&lli->lli_och_sem);
588 ll_file_data_put(fd);
589 GOTO(out_openerr, rc);
592 LASSERT(*och_usecount == 0);
593 if (!it->d.lustre.it_disposition) {
594 /* We cannot just request lock handle now, new ELC code
595 means that one of other OPEN locks for this file
596 could be cancelled, and since blocking ast handler
597 would attempt to grab och_sem as well, that would
598 result in a deadlock */
599 up(&lli->lli_och_sem);
600 it->it_create_mode |= M_CHECK_STALE;
601 rc = ll_intent_file_open(file, NULL, 0, it);
602 it->it_create_mode &= ~M_CHECK_STALE;
604 ll_file_data_put(fd);
605 GOTO(out_openerr, rc);
608 /* Got some error? Release the request */
609 if (it->d.lustre.it_status < 0) {
610 req = it->d.lustre.it_data;
611 ptlrpc_req_finished(req);
615 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
617 ll_file_data_put(fd);
618 GOTO(out_och_free, rc = -ENOMEM);
621 req = it->d.lustre.it_data;
623 /* md_intent_lock() didn't get a request ref if there was an
624 * open error, so don't do cleanup on the request here
626 /* XXX (green): Should not we bail out on any error here, not
627 * just open error? */
628 rc = it_open_error(DISP_OPEN_OPEN, it);
630 ll_file_data_put(fd);
631 GOTO(out_och_free, rc);
634 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
635 rc = ll_local_open(file, it, fd, *och_p);
637 ll_file_data_put(fd);
638 GOTO(out_och_free, rc);
641 up(&lli->lli_och_sem);
643 /* Must do this outside lli_och_sem lock to prevent deadlock where
644 different kind of OPEN lock for this same inode gets cancelled
645 by ldlm_cancel_lru */
646 if (!S_ISREG(inode->i_mode))
653 if (file->f_flags & O_LOV_DELAY_CREATE ||
654 !(file->f_mode & FMODE_WRITE)) {
655 CDEBUG(D_INODE, "object creation was delayed\n");
659 file->f_flags &= ~O_LOV_DELAY_CREATE;
662 ptlrpc_req_finished(req);
664 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
668 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
669 *och_p = NULL; /* OBD_FREE writes some magic there */
672 up(&lli->lli_och_sem);
674 if (opendir_set != 0)
675 ll_stop_statahead(inode, lli->lli_opendir_key);
681 /* Fills the obdo with the attributes for the lsm */
682 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
683 struct obd_capa *capa, struct obdo *obdo)
685 struct ptlrpc_request_set *set;
686 struct obd_info oinfo = { { { 0 } } };
691 LASSERT(lsm != NULL);
695 oinfo.oi_oa->o_id = lsm->lsm_object_id;
696 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
697 oinfo.oi_oa->o_mode = S_IFREG;
698 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
699 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
700 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
701 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
703 oinfo.oi_capa = capa;
705 set = ptlrpc_prep_set();
707 CERROR("can't allocate ptlrpc set\n");
710 rc = obd_getattr_async(exp, &oinfo, set);
712 rc = ptlrpc_set_wait(set);
713 ptlrpc_set_destroy(set);
716 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
717 OBD_MD_FLATIME | OBD_MD_FLMTIME |
718 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
722 /* Fills the obdo with the attributes for the inode defined by lsm */
723 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
725 struct ll_inode_info *lli = ll_i2info(inode);
726 struct obd_capa *capa = ll_mdscapa_get(inode);
730 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
733 obdo_refresh_inode(inode, obdo, obdo->o_valid);
735 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
736 lli->lli_smd->lsm_object_id, i_size_read(inode),
737 (unsigned long long)inode->i_blocks,
738 (unsigned long)ll_inode_blksize(inode));
743 int ll_merge_lvb(struct inode *inode)
745 struct ll_inode_info *lli = ll_i2info(inode);
746 struct ll_sb_info *sbi = ll_i2sbi(inode);
752 ll_inode_size_lock(inode, 1);
753 inode_init_lvb(inode, &lvb);
754 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
755 i_size_write(inode, lvb.lvb_size);
756 inode->i_blocks = lvb.lvb_blocks;
758 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
759 LTIME_S(inode->i_atime) = lvb.lvb_atime;
760 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
761 ll_inode_size_unlock(inode, 1);
766 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
769 struct obdo obdo = { 0 };
772 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
774 st->st_size = obdo.o_size;
775 st->st_blocks = obdo.o_blocks;
776 st->st_mtime = obdo.o_mtime;
777 st->st_atime = obdo.o_atime;
778 st->st_ctime = obdo.o_ctime;
783 void ll_io_init(struct cl_io *io, const struct file *file, int write)
785 struct inode *inode = file->f_dentry->d_inode;
787 memset(io, 0, sizeof *io);
788 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
790 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
791 io->ci_obj = ll_i2info(inode)->lli_clob;
792 io->ci_lockreq = CILR_MAYBE;
793 if (ll_file_nolock(file)) {
794 io->ci_lockreq = CILR_NEVER;
795 io->ci_no_srvlock = 1;
796 } else if (file->f_flags & O_APPEND) {
797 io->ci_lockreq = CILR_MANDATORY;
801 static ssize_t ll_file_io_generic(const struct lu_env *env,
802 struct ccc_io_args *args, struct file *file,
803 enum cl_io_type iot, loff_t *ppos, size_t count)
809 io = &ccc_env_info(env)->cti_io;
810 ll_io_init(io, file, iot == CIT_WRITE);
813 io->u.ci_rd.rd_is_sendfile = args->cia_is_sendfile;
815 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
816 struct vvp_io *vio = vvp_env_io(env);
817 struct ccc_io *cio = ccc_env_io(env);
818 if (cl_io_is_sendfile(io)) {
819 vio->u.read.cui_actor = args->cia_actor;
820 vio->u.read.cui_target = args->cia_target;
822 cio->cui_iov = args->cia_iov;
823 cio->cui_nrsegs = args->cia_nrsegs;
824 #ifndef HAVE_FILE_WRITEV
825 cio->cui_iocb = args->cia_iocb;
828 cio->cui_fd = LUSTRE_FPRIVATE(file);
829 result = cl_io_loop(env, io);
831 /* cl_io_rw_init() handled IO */
832 result = io->ci_result;
833 if (io->ci_nob > 0) {
835 *ppos = io->u.ci_wr.wr.crw_pos;
843 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
845 static int ll_file_get_iov_count(const struct iovec *iov,
846 unsigned long *nr_segs, size_t *count)
851 for (seg = 0; seg < *nr_segs; seg++) {
852 const struct iovec *iv = &iov[seg];
855 * If any segment has a negative length, or the cumulative
856 * length ever wraps negative then return -EINVAL.
859 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
861 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
866 cnt -= iv->iov_len; /* This segment is no good */
873 #ifdef HAVE_FILE_READV
874 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
875 unsigned long nr_segs, loff_t *ppos)
878 struct ccc_io_args *args;
884 result = ll_file_get_iov_count(iov, &nr_segs, &count);
888 env = cl_env_get(&refcheck);
890 RETURN(PTR_ERR(env));
892 args = &vvp_env_info(env)->vti_args;
893 args->cia_is_sendfile = 0;
894 args->cia_iov = (struct iovec *)iov;
895 args->cia_nrsegs = nr_segs;
896 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
897 cl_env_put(env, &refcheck);
901 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
905 struct iovec *local_iov;
910 env = cl_env_get(&refcheck);
912 RETURN(PTR_ERR(env));
914 local_iov = &vvp_env_info(env)->vti_local_iov;
915 local_iov->iov_base = (void __user *)buf;
916 local_iov->iov_len = count;
917 result = ll_file_readv(file, local_iov, 1, ppos);
918 cl_env_put(env, &refcheck);
923 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
924 unsigned long nr_segs, loff_t pos)
927 struct ccc_io_args *args;
933 result = ll_file_get_iov_count(iov, &nr_segs, &count);
937 env = cl_env_get(&refcheck);
939 RETURN(PTR_ERR(env));
941 args = &vvp_env_info(env)->vti_args;
942 args->cia_is_sendfile = 0;
943 args->cia_iov = (struct iovec *)iov;
944 args->cia_nrsegs = nr_segs;
945 args->cia_iocb = iocb;
946 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
947 &iocb->ki_pos, count);
948 cl_env_put(env, &refcheck);
952 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
956 struct iovec *local_iov;
962 env = cl_env_get(&refcheck);
964 RETURN(PTR_ERR(env));
966 local_iov = &vvp_env_info(env)->vti_local_iov;
967 kiocb = &vvp_env_info(env)->vti_kiocb;
968 local_iov->iov_base = (void __user *)buf;
969 local_iov->iov_len = count;
970 init_sync_kiocb(kiocb, file);
971 kiocb->ki_pos = *ppos;
972 kiocb->ki_left = count;
974 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
975 *ppos = kiocb->ki_pos;
977 cl_env_put(env, &refcheck);
983 * Write to a file (through the page cache).
985 #ifdef HAVE_FILE_WRITEV
986 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
987 unsigned long nr_segs, loff_t *ppos)
990 struct ccc_io_args *args;
996 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1000 env = cl_env_get(&refcheck);
1002 RETURN(PTR_ERR(env));
1004 args = &vvp_env_info(env)->vti_args;
1005 args->cia_iov = (struct iovec *)iov;
1006 args->cia_nrsegs = nr_segs;
1007 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1008 cl_env_put(env, &refcheck);
1012 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1016 struct iovec *local_iov;
1021 env = cl_env_get(&refcheck);
1023 RETURN(PTR_ERR(env));
1025 local_iov = &vvp_env_info(env)->vti_local_iov;
1026 local_iov->iov_base = (void __user *)buf;
1027 local_iov->iov_len = count;
1029 result = ll_file_writev(file, local_iov, 1, ppos);
1030 cl_env_put(env, &refcheck);
1034 #else /* AIO stuff */
1035 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1036 unsigned long nr_segs, loff_t pos)
1039 struct ccc_io_args *args;
1045 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1049 env = cl_env_get(&refcheck);
1051 RETURN(PTR_ERR(env));
1053 args = &vvp_env_info(env)->vti_args;
1054 args->cia_iov = (struct iovec *)iov;
1055 args->cia_nrsegs = nr_segs;
1056 args->cia_iocb = iocb;
1057 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1058 &iocb->ki_pos, count);
1059 cl_env_put(env, &refcheck);
1063 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1067 struct iovec *local_iov;
1068 struct kiocb *kiocb;
1073 env = cl_env_get(&refcheck);
1075 RETURN(PTR_ERR(env));
1077 local_iov = &vvp_env_info(env)->vti_local_iov;
1078 kiocb = &vvp_env_info(env)->vti_kiocb;
1079 local_iov->iov_base = (void __user *)buf;
1080 local_iov->iov_len = count;
1081 init_sync_kiocb(kiocb, file);
1082 kiocb->ki_pos = *ppos;
1083 kiocb->ki_left = count;
1085 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1086 *ppos = kiocb->ki_pos;
1088 cl_env_put(env, &refcheck);
1095 * Send file content (through pagecache) somewhere with helper
1097 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1098 read_actor_t actor, void *target)
1101 struct ccc_io_args *args;
1106 env = cl_env_get(&refcheck);
1108 RETURN(PTR_ERR(env));
1110 args = &vvp_env_info(env)->vti_args;
1111 args->cia_is_sendfile = 1;
1112 args->cia_target = target;
1113 args->cia_actor = actor;
1114 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1115 cl_env_put(env, &refcheck);
1119 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1122 struct obd_export *exp = ll_i2dtexp(inode);
1123 struct ll_recreate_obj ucreatp;
1124 struct obd_trans_info oti = { 0 };
1125 struct obdo *oa = NULL;
1128 struct lov_stripe_md *lsm, *lsm2;
1131 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1134 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1135 sizeof(struct ll_recreate_obj)))
1142 ll_inode_size_lock(inode, 0);
1143 lsm = ll_i2info(inode)->lli_smd;
1145 GOTO(out, rc = -ENOENT);
1146 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1147 (lsm->lsm_stripe_count));
1149 OBD_ALLOC(lsm2, lsm_size);
1151 GOTO(out, rc = -ENOMEM);
1153 oa->o_id = ucreatp.lrc_id;
1154 oa->o_gr = ucreatp.lrc_group;
1155 oa->o_nlink = ucreatp.lrc_ost_idx;
1156 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1157 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1158 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1159 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1161 memcpy(lsm2, lsm, lsm_size);
1162 rc = obd_create(exp, oa, &lsm2, &oti);
1164 OBD_FREE(lsm2, lsm_size);
1167 ll_inode_size_unlock(inode, 0);
1172 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1173 int flags, struct lov_user_md *lum, int lum_size)
1175 struct lov_stripe_md *lsm;
1176 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1180 ll_inode_size_lock(inode, 0);
1181 lsm = ll_i2info(inode)->lli_smd;
1183 ll_inode_size_unlock(inode, 0);
1184 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1189 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1192 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1193 GOTO(out_req_free, rc = -ENOENT);
1194 rc = oit.d.lustre.it_status;
1196 GOTO(out_req_free, rc);
1198 ll_release_openhandle(file->f_dentry, &oit);
1201 ll_inode_size_unlock(inode, 0);
1202 ll_intent_release(&oit);
1205 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1209 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1210 struct lov_mds_md **lmmp, int *lmm_size,
1211 struct ptlrpc_request **request)
1213 struct ll_sb_info *sbi = ll_i2sbi(inode);
1214 struct mdt_body *body;
1215 struct lov_mds_md *lmm = NULL;
1216 struct ptlrpc_request *req = NULL;
1217 struct obd_capa *oc;
1220 rc = ll_get_max_mdsize(sbi, &lmmsize);
1224 oc = ll_mdscapa_get(inode);
1225 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1226 oc, filename, strlen(filename) + 1,
1227 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1228 ll_i2suppgid(inode), &req);
1231 CDEBUG(D_INFO, "md_getattr_name failed "
1232 "on %s: rc %d\n", filename, rc);
1236 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1237 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1239 lmmsize = body->eadatasize;
1241 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1243 GOTO(out, rc = -ENODATA);
1246 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1247 LASSERT(lmm != NULL);
1249 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1250 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
1251 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
1252 GOTO(out, rc = -EPROTO);
1256 * This is coming from the MDS, so is probably in
1257 * little endian. We convert it to host endian before
1258 * passing it to userspace.
1260 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1261 /* if function called for directory - we should
1262 * avoid swab not existent lsm objects */
1263 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1264 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1265 if (S_ISREG(body->mode))
1266 lustre_swab_lov_user_md_objects(
1267 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1268 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1269 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1270 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1271 if (S_ISREG(body->mode))
1272 lustre_swab_lov_user_md_objects(
1273 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1274 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1275 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
1276 lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
1280 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
1281 struct lov_stripe_md *lsm;
1282 struct lov_user_md_join *lmj;
1283 int lmj_size, i, aindex = 0;
1285 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1287 GOTO(out, rc = -ENOMEM);
1288 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm);
1290 GOTO(out_free_memmd, rc);
1292 lmj_size = sizeof(struct lov_user_md_join) +
1293 lsm->lsm_stripe_count *
1294 sizeof(struct lov_user_ost_data_join);
1295 OBD_ALLOC(lmj, lmj_size);
1297 GOTO(out_free_memmd, rc = -ENOMEM);
1299 memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
1300 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1301 struct lov_extent *lex =
1302 &lsm->lsm_array->lai_ext_array[aindex];
1304 if (lex->le_loi_idx + lex->le_stripe_count <= i)
1306 CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
1307 LPU64" len %d\n", aindex, i,
1308 lex->le_start, (int)lex->le_len);
1309 lmj->lmm_objects[i].l_extent_start =
1312 if ((int)lex->le_len == -1)
1313 lmj->lmm_objects[i].l_extent_end = -1;
1315 lmj->lmm_objects[i].l_extent_end =
1316 lex->le_start + lex->le_len;
1317 lmj->lmm_objects[i].l_object_id =
1318 lsm->lsm_oinfo[i]->loi_id;
1319 lmj->lmm_objects[i].l_object_gr =
1320 lsm->lsm_oinfo[i]->loi_gr;
1321 lmj->lmm_objects[i].l_ost_gen =
1322 lsm->lsm_oinfo[i]->loi_ost_gen;
1323 lmj->lmm_objects[i].l_ost_idx =
1324 lsm->lsm_oinfo[i]->loi_ost_idx;
1326 lmm = (struct lov_mds_md *)lmj;
1329 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1333 *lmm_size = lmmsize;
1338 static int ll_lov_setea(struct inode *inode, struct file *file,
1341 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1342 struct lov_user_md *lump;
1343 int lum_size = sizeof(struct lov_user_md) +
1344 sizeof(struct lov_user_ost_data);
1348 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1351 OBD_ALLOC(lump, lum_size);
1355 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1356 OBD_FREE(lump, lum_size);
1360 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1362 OBD_FREE(lump, lum_size);
1366 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1369 struct lov_user_md_v3 lumv3;
1370 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1371 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1372 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1375 int flags = FMODE_WRITE;
1378 /* first try with v1 which is smaller than v3 */
1379 lum_size = sizeof(struct lov_user_md_v1);
1380 if (copy_from_user(lumv1, lumv1p, lum_size))
1383 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1384 lum_size = sizeof(struct lov_user_md_v3);
1385 if (copy_from_user(&lumv3, lumv3p, lum_size))
1389 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1391 put_user(0, &lumv1p->lmm_stripe_count);
1392 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1393 0, ll_i2info(inode)->lli_smd,
1399 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1401 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1406 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1410 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1412 struct ll_inode_info *lli = ll_i2info(inode);
1413 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1414 struct ccc_grouplock grouplock;
1418 if (ll_file_nolock(file))
1419 RETURN(-EOPNOTSUPP);
1421 spin_lock(&lli->lli_lock);
1422 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1423 CERROR("group lock already existed with gid %lu\n",
1424 fd->fd_grouplock.cg_gid);
1425 spin_unlock(&lli->lli_lock);
1428 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1429 spin_unlock(&lli->lli_lock);
1431 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1432 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1436 spin_lock(&lli->lli_lock);
1437 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1438 spin_unlock(&lli->lli_lock);
1439 CERROR("another thread just won the race\n");
1440 cl_put_grouplock(&grouplock);
1444 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1445 fd->fd_grouplock = grouplock;
1446 spin_unlock(&lli->lli_lock);
1448 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1452 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1454 struct ll_inode_info *lli = ll_i2info(inode);
1455 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1456 struct ccc_grouplock grouplock;
1459 spin_lock(&lli->lli_lock);
1460 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1461 spin_unlock(&lli->lli_lock);
1462 CERROR("no group lock held\n");
1465 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1467 if (fd->fd_grouplock.cg_gid != arg) {
1468 CERROR("group lock %lu doesn't match current id %lu\n",
1469 arg, fd->fd_grouplock.cg_gid);
1470 spin_unlock(&lli->lli_lock);
1474 grouplock = fd->fd_grouplock;
1475 fd->fd_grouplock.cg_env = NULL;
1476 fd->fd_grouplock.cg_lock = NULL;
1477 fd->fd_grouplock.cg_gid = 0;
1478 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1479 spin_unlock(&lli->lli_lock);
1481 cl_put_grouplock(&grouplock);
1482 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1486 #if LUSTRE_FIX >= 50
1487 static int join_sanity_check(struct inode *head, struct inode *tail)
1490 if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
1491 CERROR("server do not support join \n");
1494 if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
1495 CERROR("tail ino %lu and ino head %lu must be regular\n",
1496 head->i_ino, tail->i_ino);
1499 if (head->i_ino == tail->i_ino) {
1500 CERROR("file %lu can not be joined to itself \n", head->i_ino);
1503 if (i_size_read(head) % JOIN_FILE_ALIGN) {
1504 CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
1510 static int join_file(struct inode *head_inode, struct file *head_filp,
1511 struct file *tail_filp)
1513 struct dentry *tail_dentry = tail_filp->f_dentry;
1514 struct lookup_intent oit = {.it_op = IT_OPEN,
1515 .it_flags = head_filp->f_flags,
1516 .it_create_mode = M_JOIN_FILE};
1517 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW,
1518 ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1520 struct lustre_handle lockh;
1521 struct md_op_data *op_data;
1526 tail_dentry = tail_filp->f_dentry;
1528 data = i_size_read(head_inode);
1529 op_data = ll_prep_md_op_data(NULL, head_inode,
1530 tail_dentry->d_parent->d_inode,
1531 tail_dentry->d_name.name,
1532 tail_dentry->d_name.len, 0,
1533 LUSTRE_OPC_ANY, &data);
1534 if (IS_ERR(op_data))
1535 RETURN(PTR_ERR(op_data));
1537 rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit,
1538 op_data, &lockh, NULL, 0, NULL, 0);
1540 ll_finish_md_op_data(op_data);
1544 rc = oit.d.lustre.it_status;
1546 if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
1547 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
1548 ptlrpc_req_finished((struct ptlrpc_request *)
1549 oit.d.lustre.it_data);
1553 if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
1555 ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
1556 oit.d.lustre.it_lock_mode = 0;
1558 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1559 it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
1560 ll_release_openhandle(head_filp->f_dentry, &oit);
1562 ll_intent_release(&oit);
1566 static int ll_file_join(struct inode *head, struct file *filp,
1567 char *filename_tail)
1569 struct inode *tail = NULL, *first = NULL, *second = NULL;
1570 struct dentry *tail_dentry;
1571 struct file *tail_filp, *first_filp, *second_filp;
1572 struct ll_lock_tree first_tree, second_tree;
1573 struct ll_lock_tree_node *first_node, *second_node;
1574 struct ll_inode_info *hlli = ll_i2info(head);
1575 int rc = 0, cleanup_phase = 0;
1578 CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
1579 head->i_ino, head->i_generation, head, filename_tail);
1581 tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
1582 if (IS_ERR(tail_filp)) {
1583 CERROR("Can not open tail file %s", filename_tail);
1584 rc = PTR_ERR(tail_filp);
1587 tail = igrab(tail_filp->f_dentry->d_inode);
1589 tail_dentry = tail_filp->f_dentry;
1590 LASSERT(tail_dentry);
1593 /*reorder the inode for lock sequence*/
1594 first = head->i_ino > tail->i_ino ? head : tail;
1595 second = head->i_ino > tail->i_ino ? tail : head;
1596 first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
1597 second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
1599 CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
1600 head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
1601 first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
1602 if (IS_ERR(first_node)){
1603 rc = PTR_ERR(first_node);
1606 first_tree.lt_fd = first_filp->private_data;
1607 rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
1612 second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
1613 if (IS_ERR(second_node)){
1614 rc = PTR_ERR(second_node);
1617 second_tree.lt_fd = second_filp->private_data;
1618 rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
1623 rc = join_sanity_check(head, tail);
1627 rc = join_file(head, filp, tail_filp);
1631 switch (cleanup_phase) {
1633 ll_tree_unlock(&second_tree);
1634 obd_cancel_unused(ll_i2dtexp(second),
1635 ll_i2info(second)->lli_smd, 0, NULL);
1637 ll_tree_unlock(&first_tree);
1638 obd_cancel_unused(ll_i2dtexp(first),
1639 ll_i2info(first)->lli_smd, 0, NULL);
1641 filp_close(tail_filp, 0);
1644 if (head && rc == 0) {
1645 obd_free_memmd(ll_i2sbi(head)->ll_dt_exp,
1647 hlli->lli_smd = NULL;
1652 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
1657 #endif /* LUSTRE_FIX >= 50 */
1660 * Close inode open handle
1662 * \param dentry [in] dentry which contains the inode
1663 * \param it [in,out] intent which contains open info and result
1666 * \retval <0 failure
1668 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1670 struct inode *inode = dentry->d_inode;
1671 struct obd_client_handle *och;
1677 /* Root ? Do nothing. */
1678 if (dentry->d_inode->i_sb->s_root == dentry)
1681 /* No open handle to close? Move away */
1682 if (!it_disposition(it, DISP_OPEN_OPEN))
1685 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1687 OBD_ALLOC(och, sizeof(*och));
1689 GOTO(out, rc = -ENOMEM);
1691 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1692 ll_i2info(inode), it, och);
1694 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1697 /* this one is in place of ll_file_open */
1698 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1699 ptlrpc_req_finished(it->d.lustre.it_data);
1700 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1705 * Get size for inode for which FIEMAP mapping is requested.
1706 * Make the FIEMAP get_info call and returns the result.
1708 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1711 struct obd_export *exp = ll_i2dtexp(inode);
1712 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1713 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1714 int vallen = num_bytes;
1718 /* If the stripe_count > 1 and the application does not understand
1719 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1721 if (lsm->lsm_stripe_count > 1 &&
1722 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1725 fm_key.oa.o_id = lsm->lsm_object_id;
1726 fm_key.oa.o_gr = lsm->lsm_object_gr;
1727 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1729 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1732 /* If filesize is 0, then there would be no objects for mapping */
1733 if (fm_key.oa.o_size == 0) {
1734 fiemap->fm_mapped_extents = 0;
1738 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1740 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1742 CERROR("obd_get_info failed: rc = %d\n", rc);
1747 int ll_fid2path(struct obd_export *exp, void *arg)
1749 struct getinfo_fid2path *gfout, *gfin;
1753 /* Need to get the buflen */
1754 OBD_ALLOC_PTR(gfin);
1757 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1762 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1763 OBD_ALLOC(gfout, outsize);
1764 if (gfout == NULL) {
1768 memcpy(gfout, gfin, sizeof(*gfout));
1771 /* Call mdc_iocontrol */
1772 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1775 if (copy_to_user(arg, gfout, outsize))
1779 OBD_FREE(gfout, outsize);
1783 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1786 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1790 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1791 inode->i_generation, inode, cmd);
1792 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1794 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1795 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1799 case LL_IOC_GETFLAGS:
1800 /* Get the current value of the file flags */
1801 return put_user(fd->fd_flags, (int *)arg);
1802 case LL_IOC_SETFLAGS:
1803 case LL_IOC_CLRFLAGS:
1804 /* Set or clear specific file flags */
1805 /* XXX This probably needs checks to ensure the flags are
1806 * not abused, and to handle any flag side effects.
1808 if (get_user(flags, (int *) arg))
1811 if (cmd == LL_IOC_SETFLAGS) {
1812 if ((flags & LL_FILE_IGNORE_LOCK) &&
1813 !(file->f_flags & O_DIRECT)) {
1814 CERROR("%s: unable to disable locking on "
1815 "non-O_DIRECT file\n", current->comm);
1819 fd->fd_flags |= flags;
1821 fd->fd_flags &= ~flags;
1824 case LL_IOC_LOV_SETSTRIPE:
1825 RETURN(ll_lov_setstripe(inode, file, arg));
1826 case LL_IOC_LOV_SETEA:
1827 RETURN(ll_lov_setea(inode, file, arg));
1828 case LL_IOC_LOV_GETSTRIPE:
1829 RETURN(ll_lov_getstripe(inode, arg));
1830 case LL_IOC_RECREATE_OBJ:
1831 RETURN(ll_lov_recreate_obj(inode, file, arg));
1832 case FSFILT_IOC_FIEMAP: {
1833 struct ll_user_fiemap *fiemap_s;
1834 size_t num_bytes, ret_bytes;
1835 unsigned int extent_count;
1838 /* Get the extent count so we can calculate the size of
1839 * required fiemap buffer */
1840 if (get_user(extent_count,
1841 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1843 num_bytes = sizeof(*fiemap_s) + (extent_count *
1844 sizeof(struct ll_fiemap_extent));
1845 OBD_VMALLOC(fiemap_s, num_bytes);
1846 if (fiemap_s == NULL)
1849 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1851 GOTO(error, rc = -EFAULT);
1853 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1854 fiemap_s->fm_flags = fiemap_s->fm_flags &
1855 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1856 if (copy_to_user((char *)arg, fiemap_s,
1858 GOTO(error, rc = -EFAULT);
1860 GOTO(error, rc = -EBADR);
1863 /* If fm_extent_count is non-zero, read the first extent since
1864 * it is used to calculate end_offset and device from previous
1867 if (copy_from_user(&fiemap_s->fm_extents[0],
1868 (char __user *)arg + sizeof(*fiemap_s),
1869 sizeof(struct ll_fiemap_extent)))
1870 GOTO(error, rc = -EFAULT);
1873 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1876 rc = filemap_fdatawrite(inode->i_mapping);
1881 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1885 ret_bytes = sizeof(struct ll_user_fiemap);
1887 if (extent_count != 0)
1888 ret_bytes += (fiemap_s->fm_mapped_extents *
1889 sizeof(struct ll_fiemap_extent));
1891 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1895 OBD_VFREE(fiemap_s, num_bytes);
1898 case FSFILT_IOC_GETFLAGS:
1899 case FSFILT_IOC_SETFLAGS:
1900 RETURN(ll_iocontrol(inode, file, cmd, arg));
1901 case FSFILT_IOC_GETVERSION_OLD:
1902 case FSFILT_IOC_GETVERSION:
1903 RETURN(put_user(inode->i_generation, (int *)arg));
1905 #if LUSTRE_FIX >= 50
1906 /* Allow file join in beta builds to allow debuggging */
1910 ftail = getname((const char *)arg);
1912 RETURN(PTR_ERR(ftail));
1913 rc = ll_file_join(inode, file, ftail);
1917 CWARN("file join is not supported in this version of Lustre\n");
1921 case LL_IOC_GROUP_LOCK:
1922 RETURN(ll_get_grouplock(inode, file, arg));
1923 case LL_IOC_GROUP_UNLOCK:
1924 RETURN(ll_put_grouplock(inode, file, arg));
1925 case IOC_OBD_STATFS:
1926 RETURN(ll_obd_statfs(inode, (void *)arg));
1928 /* We need to special case any other ioctls we want to handle,
1929 * to send them to the MDS/OST as appropriate and to properly
1930 * network encode the arg field.
1931 case FSFILT_IOC_SETVERSION_OLD:
1932 case FSFILT_IOC_SETVERSION:
1934 case LL_IOC_FLUSHCTX:
1935 RETURN(ll_flush_ctx(inode));
1936 case LL_IOC_PATH2FID: {
1937 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1938 sizeof(struct lu_fid)))
1943 case OBD_IOC_FID2PATH:
1944 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1950 ll_iocontrol_call(inode, file, cmd, arg, &err))
1953 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1959 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1961 struct inode *inode = file->f_dentry->d_inode;
1964 retval = offset + ((origin == 2) ? i_size_read(inode) :
1965 (origin == 1) ? file->f_pos : 0);
1966 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1967 inode->i_ino, inode->i_generation, inode, retval, retval,
1968 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1969 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1971 if (origin == 2) { /* SEEK_END */
1972 int nonblock = 0, rc;
1974 if (file->f_flags & O_NONBLOCK)
1975 nonblock = LDLM_FL_BLOCK_NOWAIT;
1977 rc = cl_glimpse_size(inode);
1981 ll_inode_size_lock(inode, 0);
1982 offset += i_size_read(inode);
1983 ll_inode_size_unlock(inode, 0);
1984 } else if (origin == 1) { /* SEEK_CUR */
1985 offset += file->f_pos;
1989 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1990 if (offset != file->f_pos) {
1991 file->f_pos = offset;
1999 int ll_fsync(struct file *file, struct dentry *dentry, int data)
2001 struct inode *inode = dentry->d_inode;
2002 struct ll_inode_info *lli = ll_i2info(inode);
2003 struct lov_stripe_md *lsm = lli->lli_smd;
2004 struct ptlrpc_request *req;
2005 struct obd_capa *oc;
2008 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2009 inode->i_generation, inode);
2010 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2012 /* fsync's caller has already called _fdata{sync,write}, we want
2013 * that IO to finish before calling the osc and mdc sync methods */
2014 rc = filemap_fdatawait(inode->i_mapping);
2016 /* catch async errors that were recorded back when async writeback
2017 * failed for pages in this mapping. */
2018 err = lli->lli_async_rc;
2019 lli->lli_async_rc = 0;
2023 err = lov_test_and_clear_async_rc(lsm);
2028 oc = ll_mdscapa_get(inode);
2029 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2035 ptlrpc_req_finished(req);
2042 RETURN(rc ? rc : -ENOMEM);
2044 oa->o_id = lsm->lsm_object_id;
2045 oa->o_gr = lsm->lsm_object_gr;
2046 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2047 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
2048 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
2051 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2052 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
2053 0, OBD_OBJECT_EOF, oc);
2063 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2065 struct inode *inode = file->f_dentry->d_inode;
2066 struct ll_sb_info *sbi = ll_i2sbi(inode);
2067 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2068 .ei_cb_cp =ldlm_flock_completion_ast,
2069 .ei_cbdata = file_lock };
2070 struct md_op_data *op_data;
2071 struct lustre_handle lockh = {0};
2072 ldlm_policy_data_t flock;
2077 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2078 inode->i_ino, file_lock);
2080 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2082 if (file_lock->fl_flags & FL_FLOCK) {
2083 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2084 /* set missing params for flock() calls */
2085 file_lock->fl_end = OFFSET_MAX;
2086 file_lock->fl_pid = current->tgid;
2088 flock.l_flock.pid = file_lock->fl_pid;
2089 flock.l_flock.start = file_lock->fl_start;
2090 flock.l_flock.end = file_lock->fl_end;
2092 switch (file_lock->fl_type) {
2094 einfo.ei_mode = LCK_PR;
2097 /* An unlock request may or may not have any relation to
2098 * existing locks so we may not be able to pass a lock handle
2099 * via a normal ldlm_lock_cancel() request. The request may even
2100 * unlock a byte range in the middle of an existing lock. In
2101 * order to process an unlock request we need all of the same
2102 * information that is given with a normal read or write record
2103 * lock request. To avoid creating another ldlm unlock (cancel)
2104 * message we'll treat a LCK_NL flock request as an unlock. */
2105 einfo.ei_mode = LCK_NL;
2108 einfo.ei_mode = LCK_PW;
2111 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2126 flags = LDLM_FL_BLOCK_NOWAIT;
2132 flags = LDLM_FL_TEST_LOCK;
2133 /* Save the old mode so that if the mode in the lock changes we
2134 * can decrement the appropriate reader or writer refcount. */
2135 file_lock->fl_type = einfo.ei_mode;
2138 CERROR("unknown fcntl lock command: %d\n", cmd);
2142 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2143 LUSTRE_OPC_ANY, NULL);
2144 if (IS_ERR(op_data))
2145 RETURN(PTR_ERR(op_data));
2147 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2148 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2149 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2151 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2152 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2154 ll_finish_md_op_data(op_data);
2156 if ((file_lock->fl_flags & FL_FLOCK) &&
2157 (rc == 0 || file_lock->fl_type == F_UNLCK))
2158 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2159 #ifdef HAVE_F_OP_FLOCK
2160 if ((file_lock->fl_flags & FL_POSIX) &&
2161 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2162 !(flags & LDLM_FL_TEST_LOCK))
2163 posix_lock_file_wait(file, file_lock);
2169 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2176 int ll_have_md_lock(struct inode *inode, __u64 bits)
2178 struct lustre_handle lockh;
2179 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2187 fid = &ll_i2info(inode)->lli_fid;
2188 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2190 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2191 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2192 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2198 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2199 struct lustre_handle *lockh)
2201 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2207 fid = &ll_i2info(inode)->lli_fid;
2208 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2210 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2211 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2212 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2216 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2217 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2218 * and return success */
2220 /* This path cannot be hit for regular files unless in
2221 * case of obscure races, so no need to to validate
2223 if (!S_ISREG(inode->i_mode) &&
2224 !S_ISDIR(inode->i_mode))
2229 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2237 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2240 struct inode *inode = dentry->d_inode;
2241 struct ptlrpc_request *req = NULL;
2242 struct ll_sb_info *sbi;
2243 struct obd_export *exp;
2248 CERROR("REPORT THIS LINE TO PETER\n");
2251 sbi = ll_i2sbi(inode);
2253 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2254 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2256 exp = ll_i2mdexp(inode);
2258 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2259 struct lookup_intent oit = { .it_op = IT_GETATTR };
2260 struct md_op_data *op_data;
2262 /* Call getattr by fid, so do not provide name at all. */
2263 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2264 dentry->d_inode, NULL, 0, 0,
2265 LUSTRE_OPC_ANY, NULL);
2266 if (IS_ERR(op_data))
2267 RETURN(PTR_ERR(op_data));
2269 oit.it_create_mode |= M_CHECK_STALE;
2270 rc = md_intent_lock(exp, op_data, NULL, 0,
2271 /* we are not interested in name
2274 ll_md_blocking_ast, 0);
2275 ll_finish_md_op_data(op_data);
2276 oit.it_create_mode &= ~M_CHECK_STALE;
2278 rc = ll_inode_revalidate_fini(inode, rc);
2282 rc = ll_revalidate_it_finish(req, &oit, dentry);
2284 ll_intent_release(&oit);
2288 /* Unlinked? Unhash dentry, so it is not picked up later by
2289 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2290 here to preserve get_cwd functionality on 2.6.
2292 if (!dentry->d_inode->i_nlink) {
2293 spin_lock(&ll_lookup_lock);
2294 spin_lock(&dcache_lock);
2295 ll_drop_dentry(dentry);
2296 spin_unlock(&dcache_lock);
2297 spin_unlock(&ll_lookup_lock);
2300 ll_lookup_finish_locks(&oit, dentry);
2301 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2303 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2304 obd_valid valid = OBD_MD_FLGETATTR;
2305 struct obd_capa *oc;
2308 if (S_ISREG(inode->i_mode)) {
2309 rc = ll_get_max_mdsize(sbi, &ealen);
2312 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2314 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2315 * capa for this inode. Because we only keep capas of dirs
2317 oc = ll_mdscapa_get(inode);
2318 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2322 rc = ll_inode_revalidate_fini(inode, rc);
2326 rc = ll_prep_inode(&inode, req, NULL);
2329 ptlrpc_req_finished(req);
2333 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2338 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2339 MDS_INODELOCK_LOOKUP);
2341 /* if object not yet allocated, don't validate size */
2342 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2345 /* cl_glimpse_size will prefer locally cached writes if they extend
2349 rc = cl_glimpse_size(dentry->d_inode);
2354 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2355 struct lookup_intent *it, struct kstat *stat)
2357 struct inode *inode = de->d_inode;
2360 res = ll_inode_revalidate_it(de, it);
2361 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2366 stat->dev = inode->i_sb->s_dev;
2367 stat->ino = inode->i_ino;
2368 stat->mode = inode->i_mode;
2369 stat->nlink = inode->i_nlink;
2370 stat->uid = inode->i_uid;
2371 stat->gid = inode->i_gid;
2372 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2373 stat->atime = inode->i_atime;
2374 stat->mtime = inode->i_mtime;
2375 stat->ctime = inode->i_ctime;
2376 #ifdef HAVE_INODE_BLKSIZE
2377 stat->blksize = inode->i_blksize;
2379 stat->blksize = 1 << inode->i_blkbits;
2382 ll_inode_size_lock(inode, 0);
2383 stat->size = i_size_read(inode);
2384 stat->blocks = inode->i_blocks;
2385 ll_inode_size_unlock(inode, 0);
2389 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2391 struct lookup_intent it = { .it_op = IT_GETATTR };
2393 return ll_getattr_it(mnt, de, &it, stat);
2397 int lustre_check_acl(struct inode *inode, int mask)
2399 #ifdef CONFIG_FS_POSIX_ACL
2400 struct ll_inode_info *lli = ll_i2info(inode);
2401 struct posix_acl *acl;
2405 spin_lock(&lli->lli_lock);
2406 acl = posix_acl_dup(lli->lli_posix_acl);
2407 spin_unlock(&lli->lli_lock);
2412 rc = posix_acl_permission(inode, acl, mask);
2413 posix_acl_release(acl);
2421 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2422 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2427 /* as root inode are NOT getting validated in lookup operation,
2428 * need to do it before permission check. */
2430 if (inode == inode->i_sb->s_root->d_inode) {
2431 struct lookup_intent it = { .it_op = IT_LOOKUP };
2433 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2434 MDS_INODELOCK_LOOKUP);
2439 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2440 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2442 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2443 return lustre_check_remote_perm(inode, mask);
2445 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2446 rc = generic_permission(inode, mask, lustre_check_acl);
2451 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2453 int mode = inode->i_mode;
2456 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2457 inode->i_ino, inode->i_generation, inode, mask);
2459 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2460 return lustre_check_remote_perm(inode, mask);
2462 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2464 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2465 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2467 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2469 if (current->fsuid == inode->i_uid) {
2472 if (((mode >> 3) & mask & S_IRWXO) != mask)
2474 rc = lustre_check_acl(inode, mask);
2478 goto check_capabilities;
2482 if (in_group_p(inode->i_gid))
2485 if ((mode & mask & S_IRWXO) == mask)
2489 if (!(mask & MAY_EXEC) ||
2490 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2491 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2494 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2495 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2502 #ifdef HAVE_FILE_READV
2503 #define READ_METHOD readv
2504 #define READ_FUNCTION ll_file_readv
2505 #define WRITE_METHOD writev
2506 #define WRITE_FUNCTION ll_file_writev
2508 #define READ_METHOD aio_read
2509 #define READ_FUNCTION ll_file_aio_read
2510 #define WRITE_METHOD aio_write
2511 #define WRITE_FUNCTION ll_file_aio_write
2514 /* -o localflock - only provides locally consistent flock locks */
2515 struct file_operations ll_file_operations = {
2516 .read = ll_file_read,
2517 .READ_METHOD = READ_FUNCTION,
2518 .write = ll_file_write,
2519 .WRITE_METHOD = WRITE_FUNCTION,
2520 .ioctl = ll_file_ioctl,
2521 .open = ll_file_open,
2522 .release = ll_file_release,
2523 .mmap = ll_file_mmap,
2524 .llseek = ll_file_seek,
2525 .sendfile = ll_file_sendfile,
2529 struct file_operations ll_file_operations_flock = {
2530 .read = ll_file_read,
2531 .READ_METHOD = READ_FUNCTION,
2532 .write = ll_file_write,
2533 .WRITE_METHOD = WRITE_FUNCTION,
2534 .ioctl = ll_file_ioctl,
2535 .open = ll_file_open,
2536 .release = ll_file_release,
2537 .mmap = ll_file_mmap,
2538 .llseek = ll_file_seek,
2539 .sendfile = ll_file_sendfile,
2541 #ifdef HAVE_F_OP_FLOCK
2542 .flock = ll_file_flock,
2544 .lock = ll_file_flock
2547 /* These are for -o noflock - to return ENOSYS on flock calls */
2548 struct file_operations ll_file_operations_noflock = {
2549 .read = ll_file_read,
2550 .READ_METHOD = READ_FUNCTION,
2551 .write = ll_file_write,
2552 .WRITE_METHOD = WRITE_FUNCTION,
2553 .ioctl = ll_file_ioctl,
2554 .open = ll_file_open,
2555 .release = ll_file_release,
2556 .mmap = ll_file_mmap,
2557 .llseek = ll_file_seek,
2558 .sendfile = ll_file_sendfile,
2560 #ifdef HAVE_F_OP_FLOCK
2561 .flock = ll_file_noflock,
2563 .lock = ll_file_noflock
2566 struct inode_operations ll_file_inode_operations = {
2567 #ifdef HAVE_VFS_INTENT_PATCHES
2568 .setattr_raw = ll_setattr_raw,
2570 .setattr = ll_setattr,
2571 .truncate = ll_truncate,
2572 .getattr = ll_getattr,
2573 .permission = ll_inode_permission,
2574 .setxattr = ll_setxattr,
2575 .getxattr = ll_getxattr,
2576 .listxattr = ll_listxattr,
2577 .removexattr = ll_removexattr,
2580 /* dynamic ioctl number support routins */
2581 static struct llioc_ctl_data {
2582 struct rw_semaphore ioc_sem;
2583 struct list_head ioc_head;
2585 __RWSEM_INITIALIZER(llioc.ioc_sem),
2586 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2591 struct list_head iocd_list;
2592 unsigned int iocd_size;
2593 llioc_callback_t iocd_cb;
2594 unsigned int iocd_count;
2595 unsigned int iocd_cmd[0];
2598 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2601 struct llioc_data *in_data = NULL;
2604 if (cb == NULL || cmd == NULL ||
2605 count > LLIOC_MAX_CMD || count < 0)
2608 size = sizeof(*in_data) + count * sizeof(unsigned int);
2609 OBD_ALLOC(in_data, size);
2610 if (in_data == NULL)
2613 memset(in_data, 0, sizeof(*in_data));
2614 in_data->iocd_size = size;
2615 in_data->iocd_cb = cb;
2616 in_data->iocd_count = count;
2617 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2619 down_write(&llioc.ioc_sem);
2620 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2621 up_write(&llioc.ioc_sem);
2626 void ll_iocontrol_unregister(void *magic)
2628 struct llioc_data *tmp;
2633 down_write(&llioc.ioc_sem);
2634 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2636 unsigned int size = tmp->iocd_size;
2638 list_del(&tmp->iocd_list);
2639 up_write(&llioc.ioc_sem);
2641 OBD_FREE(tmp, size);
2645 up_write(&llioc.ioc_sem);
2647 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2650 EXPORT_SYMBOL(ll_iocontrol_register);
2651 EXPORT_SYMBOL(ll_iocontrol_unregister);
2653 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2654 unsigned int cmd, unsigned long arg, int *rcp)
2656 enum llioc_iter ret = LLIOC_CONT;
2657 struct llioc_data *data;
2658 int rc = -EINVAL, i;
2660 down_read(&llioc.ioc_sem);
2661 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2662 for (i = 0; i < data->iocd_count; i++) {
2663 if (cmd != data->iocd_cmd[i])
2666 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2670 if (ret == LLIOC_STOP)
2673 up_read(&llioc.ioc_sem);