1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(exp_connect_som(ll_i2mdexp(inode))) || !S_ISREG(inode->i_mode))
96 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
98 ll_epoch_close(inode, op_data, &och, 0);
101 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
105 static int ll_close_inode_openhandle(struct obd_export *md_exp,
107 struct obd_client_handle *och)
109 struct obd_export *exp = ll_i2mdexp(inode);
110 struct md_op_data *op_data;
111 struct ptlrpc_request *req = NULL;
112 struct obd_device *obd = class_exp2obd(exp);
119 * XXX: in case of LMV, is this correct to access
122 CERROR("Invalid MDC connection handle "LPX64"\n",
123 ll_i2mdexp(inode)->exp_handle.h_cookie);
128 * here we check if this is forced umount. If so this is called on
129 * canceling "open lock" and we do not call md_close() in this case, as
130 * it will not be successful, as import is already deactivated.
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_sizeonmds_update(inode, &och->och_fh,
148 op_data->op_ioepoch);
150 CERROR("inode %lu mdc Size-on-MDS update failed: "
151 "rc = %d\n", inode->i_ino, rc);
155 CERROR("inode %lu mdc close failed: rc = %d\n",
158 ll_finish_md_op_data(op_data);
161 rc = ll_objects_destroy(req, inode);
163 CERROR("inode %lu ll_objects destroy: rc = %d\n",
170 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
171 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
172 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
174 md_clear_open_replay_data(md_exp, och);
175 /* Free @och if it is not waiting for DONE_WRITING. */
176 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
179 if (req) /* This is close request */
180 ptlrpc_req_finished(req);
184 int ll_md_real_close(struct inode *inode, int flags)
186 struct ll_inode_info *lli = ll_i2info(inode);
187 struct obd_client_handle **och_p;
188 struct obd_client_handle *och;
193 if (flags & FMODE_WRITE) {
194 och_p = &lli->lli_mds_write_och;
195 och_usecount = &lli->lli_open_fd_write_count;
196 } else if (flags & FMODE_EXEC) {
197 och_p = &lli->lli_mds_exec_och;
198 och_usecount = &lli->lli_open_fd_exec_count;
200 LASSERT(flags & FMODE_READ);
201 och_p = &lli->lli_mds_read_och;
202 och_usecount = &lli->lli_open_fd_read_count;
205 down(&lli->lli_och_sem);
206 if (*och_usecount) { /* There are still users of this handle, so
208 up(&lli->lli_och_sem);
213 up(&lli->lli_och_sem);
215 if (och) { /* There might be a race and somebody have freed this och
217 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
224 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
227 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
228 struct ll_inode_info *lli = ll_i2info(inode);
232 /* clear group lock, if present */
233 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
234 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
236 /* Let's see if we have good enough OPEN lock on the file and if
237 we can skip talking to MDS */
238 if (file->f_dentry->d_inode) { /* Can this ever be false? */
240 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
241 struct lustre_handle lockh;
242 struct inode *inode = file->f_dentry->d_inode;
243 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
245 down(&lli->lli_och_sem);
246 if (fd->fd_omode & FMODE_WRITE) {
248 LASSERT(lli->lli_open_fd_write_count);
249 lli->lli_open_fd_write_count--;
250 } else if (fd->fd_omode & FMODE_EXEC) {
252 LASSERT(lli->lli_open_fd_exec_count);
253 lli->lli_open_fd_exec_count--;
256 LASSERT(lli->lli_open_fd_read_count);
257 lli->lli_open_fd_read_count--;
259 up(&lli->lli_och_sem);
261 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
262 LDLM_IBITS, &policy, lockmode,
264 rc = ll_md_real_close(file->f_dentry->d_inode,
268 CERROR("Releasing a file %p with negative dentry %p. Name %s",
269 file, file->f_dentry, file->f_dentry->d_name.name);
272 LUSTRE_FPRIVATE(file) = NULL;
273 ll_file_data_put(fd);
274 ll_capa_close(inode);
279 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
281 /* While this returns an error code, fput() the caller does not, so we need
282 * to make every effort to clean up all of our state here. Also, applications
283 * rarely check close errors and even if an error is returned they will not
284 * re-try the close call.
286 int ll_file_release(struct inode *inode, struct file *file)
288 struct ll_file_data *fd;
289 struct ll_sb_info *sbi = ll_i2sbi(inode);
290 struct ll_inode_info *lli = ll_i2info(inode);
291 struct lov_stripe_md *lsm = lli->lli_smd;
295 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
296 inode->i_generation, inode);
298 #ifdef CONFIG_FS_POSIX_ACL
299 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
300 inode == inode->i_sb->s_root->d_inode) {
301 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
304 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
305 fd->fd_flags &= ~LL_FILE_RMTACL;
306 rct_del(&sbi->ll_rct, cfs_curproc_pid());
307 et_search_free(&sbi->ll_et, cfs_curproc_pid());
312 if (inode->i_sb->s_root != file->f_dentry)
313 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
314 fd = LUSTRE_FPRIVATE(file);
317 /* The last ref on @file, maybe not the the owner pid of statahead.
318 * Different processes can open the same dir, "ll_opendir_key" means:
319 * it is me that should stop the statahead thread. */
320 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
321 ll_stop_statahead(inode, lli->lli_opendir_key);
323 if (inode->i_sb->s_root == file->f_dentry) {
324 LUSTRE_FPRIVATE(file) = NULL;
325 ll_file_data_put(fd);
330 lov_test_and_clear_async_rc(lsm);
331 lli->lli_async_rc = 0;
333 rc = ll_md_close(sbi->ll_md_exp, inode, file);
337 static int ll_intent_file_open(struct file *file, void *lmm,
338 int lmmsize, struct lookup_intent *itp)
340 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
341 struct dentry *parent = file->f_dentry->d_parent;
342 const char *name = file->f_dentry->d_name.name;
343 const int len = file->f_dentry->d_name.len;
344 struct md_op_data *op_data;
345 struct ptlrpc_request *req;
352 /* Usually we come here only for NFSD, and we want open lock.
353 But we can also get here with pre 2.6.15 patchless kernels, and in
354 that case that lock is also ok */
355 /* We can also get here if there was cached open handle in revalidate_it
356 * but it disappeared while we were getting from there to ll_file_open.
357 * But this means this file was closed and immediatelly opened which
358 * makes a good candidate for using OPEN lock */
359 /* If lmmsize & lmm are not 0, we are just setting stripe info
360 * parameters. No need for the open lock */
361 if (!lmm && !lmmsize)
362 itp->it_flags |= MDS_OPEN_LOCK;
364 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
365 file->f_dentry->d_inode, name, len,
366 O_RDWR, LUSTRE_OPC_ANY, NULL);
368 RETURN(PTR_ERR(op_data));
370 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
371 0 /*unused */, &req, ll_md_blocking_ast, 0);
372 ll_finish_md_op_data(op_data);
374 /* reason for keep own exit path - don`t flood log
375 * with messages with -ESTALE errors.
377 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
378 it_open_error(DISP_OPEN_OPEN, itp))
380 ll_release_openhandle(file->f_dentry, itp);
384 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
385 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
386 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
390 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
391 if (!rc && itp->d.lustre.it_lock_mode)
392 md_set_lock_data(sbi->ll_md_exp,
393 &itp->d.lustre.it_lock_handle,
394 file->f_dentry->d_inode, NULL);
397 ptlrpc_req_finished(itp->d.lustre.it_data);
398 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
399 ll_intent_drop_lock(itp);
404 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
406 if (ioepoch && lli->lli_ioepoch != ioepoch) {
407 lli->lli_ioepoch = ioepoch;
408 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
409 ioepoch, PFID(&lli->lli_fid));
413 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
414 struct lookup_intent *it, struct obd_client_handle *och)
416 struct ptlrpc_request *req = it->d.lustre.it_data;
417 struct mdt_body *body;
421 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
422 LASSERT(body != NULL); /* reply already checked out */
424 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
425 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
426 och->och_fid = lli->lli_fid;
427 och->och_flags = it->it_flags;
428 ll_ioepoch_open(lli, body->ioepoch);
430 return md_set_open_replay_data(md_exp, och, req);
433 int ll_local_open(struct file *file, struct lookup_intent *it,
434 struct ll_file_data *fd, struct obd_client_handle *och)
436 struct inode *inode = file->f_dentry->d_inode;
437 struct ll_inode_info *lli = ll_i2info(inode);
440 LASSERT(!LUSTRE_FPRIVATE(file));
445 struct ptlrpc_request *req = it->d.lustre.it_data;
446 struct mdt_body *body;
449 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
453 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
454 if ((it->it_flags & FMODE_WRITE) &&
455 (body->valid & OBD_MD_FLSIZE))
456 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
457 lli->lli_ioepoch, PFID(&lli->lli_fid));
460 LUSTRE_FPRIVATE(file) = fd;
461 ll_readahead_init(inode, &fd->fd_ras);
462 fd->fd_omode = it->it_flags;
466 /* Open a file, and (for the very first open) create objects on the OSTs at
467 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
468 * creation or open until ll_lov_setstripe() ioctl is called. We grab
469 * lli_open_sem to ensure no other process will create objects, send the
470 * stripe MD to the MDS, or try to destroy the objects if that fails.
472 * If we already have the stripe MD locally then we don't request it in
473 * md_open(), by passing a lmm_size = 0.
475 * It is up to the application to ensure no other processes open this file
476 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
477 * used. We might be able to avoid races of that sort by getting lli_open_sem
478 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
479 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
481 int ll_file_open(struct inode *inode, struct file *file)
483 struct ll_inode_info *lli = ll_i2info(inode);
484 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
485 .it_flags = file->f_flags };
486 struct lov_stripe_md *lsm;
487 struct ptlrpc_request *req = NULL;
488 struct obd_client_handle **och_p;
490 struct ll_file_data *fd;
491 int rc = 0, opendir_set = 0;
494 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
495 inode->i_generation, inode, file->f_flags);
497 #ifdef HAVE_VFS_INTENT_PATCHES
500 it = file->private_data; /* XXX: compat macro */
501 file->private_data = NULL; /* prevent ll_local_open assertion */
504 fd = ll_file_data_get();
509 if (S_ISDIR(inode->i_mode)) {
510 spin_lock(&lli->lli_lock);
511 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
512 LASSERT(lli->lli_sai == NULL);
513 lli->lli_opendir_key = fd;
514 lli->lli_opendir_pid = cfs_curproc_pid();
517 spin_unlock(&lli->lli_lock);
520 if (inode->i_sb->s_root == file->f_dentry) {
521 LUSTRE_FPRIVATE(file) = fd;
525 if (!it || !it->d.lustre.it_disposition) {
526 /* Convert f_flags into access mode. We cannot use file->f_mode,
527 * because everything but O_ACCMODE mask was stripped from
529 if ((oit.it_flags + 1) & O_ACCMODE)
531 if (file->f_flags & O_TRUNC)
532 oit.it_flags |= FMODE_WRITE;
534 /* kernel only call f_op->open in dentry_open. filp_open calls
535 * dentry_open after call to open_namei that checks permissions.
536 * Only nfsd_open call dentry_open directly without checking
537 * permissions and because of that this code below is safe. */
538 if (oit.it_flags & FMODE_WRITE)
539 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
541 /* We do not want O_EXCL here, presumably we opened the file
542 * already? XXX - NFS implications? */
543 oit.it_flags &= ~O_EXCL;
549 /* Let's see if we have file open on MDS already. */
550 if (it->it_flags & FMODE_WRITE) {
551 och_p = &lli->lli_mds_write_och;
552 och_usecount = &lli->lli_open_fd_write_count;
553 } else if (it->it_flags & FMODE_EXEC) {
554 och_p = &lli->lli_mds_exec_och;
555 och_usecount = &lli->lli_open_fd_exec_count;
557 och_p = &lli->lli_mds_read_och;
558 och_usecount = &lli->lli_open_fd_read_count;
561 down(&lli->lli_och_sem);
562 if (*och_p) { /* Open handle is present */
563 if (it_disposition(it, DISP_OPEN_OPEN)) {
564 /* Well, there's extra open request that we do not need,
565 let's close it somehow. This will decref request. */
566 rc = it_open_error(DISP_OPEN_OPEN, it);
568 up(&lli->lli_och_sem);
569 ll_file_data_put(fd);
570 GOTO(out_openerr, rc);
572 ll_release_openhandle(file->f_dentry, it);
573 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
578 rc = ll_local_open(file, it, fd, NULL);
581 up(&lli->lli_och_sem);
582 ll_file_data_put(fd);
583 GOTO(out_openerr, rc);
586 LASSERT(*och_usecount == 0);
587 if (!it->d.lustre.it_disposition) {
588 /* We cannot just request lock handle now, new ELC code
589 means that one of other OPEN locks for this file
590 could be cancelled, and since blocking ast handler
591 would attempt to grab och_sem as well, that would
592 result in a deadlock */
593 up(&lli->lli_och_sem);
594 it->it_create_mode |= M_CHECK_STALE;
595 rc = ll_intent_file_open(file, NULL, 0, it);
596 it->it_create_mode &= ~M_CHECK_STALE;
598 ll_file_data_put(fd);
599 GOTO(out_openerr, rc);
602 /* Got some error? Release the request */
603 if (it->d.lustre.it_status < 0) {
604 req = it->d.lustre.it_data;
605 ptlrpc_req_finished(req);
609 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
611 ll_file_data_put(fd);
612 GOTO(out_och_free, rc = -ENOMEM);
615 req = it->d.lustre.it_data;
617 /* md_intent_lock() didn't get a request ref if there was an
618 * open error, so don't do cleanup on the request here
620 /* XXX (green): Should not we bail out on any error here, not
621 * just open error? */
622 rc = it_open_error(DISP_OPEN_OPEN, it);
624 ll_file_data_put(fd);
625 GOTO(out_och_free, rc);
628 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
629 rc = ll_local_open(file, it, fd, *och_p);
631 ll_file_data_put(fd);
632 GOTO(out_och_free, rc);
635 up(&lli->lli_och_sem);
637 /* Must do this outside lli_och_sem lock to prevent deadlock where
638 different kind of OPEN lock for this same inode gets cancelled
639 by ldlm_cancel_lru */
640 if (!S_ISREG(inode->i_mode))
647 if (file->f_flags & O_LOV_DELAY_CREATE ||
648 !(file->f_mode & FMODE_WRITE)) {
649 CDEBUG(D_INODE, "object creation was delayed\n");
653 file->f_flags &= ~O_LOV_DELAY_CREATE;
656 ptlrpc_req_finished(req);
658 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
662 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
663 *och_p = NULL; /* OBD_FREE writes some magic there */
666 up(&lli->lli_och_sem);
668 if (opendir_set != 0)
669 ll_stop_statahead(inode, lli->lli_opendir_key);
675 /* Fills the obdo with the attributes for the lsm */
676 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
677 struct obd_capa *capa, struct obdo *obdo)
679 struct ptlrpc_request_set *set;
680 struct obd_info oinfo = { { { 0 } } };
685 LASSERT(lsm != NULL);
689 oinfo.oi_oa->o_id = lsm->lsm_object_id;
690 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
691 oinfo.oi_oa->o_mode = S_IFREG;
692 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
693 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
694 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
695 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
697 oinfo.oi_capa = capa;
699 set = ptlrpc_prep_set();
701 CERROR("can't allocate ptlrpc set\n");
704 rc = obd_getattr_async(exp, &oinfo, set);
706 rc = ptlrpc_set_wait(set);
707 ptlrpc_set_destroy(set);
710 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
711 OBD_MD_FLATIME | OBD_MD_FLMTIME |
712 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
716 /* Fills the obdo with the attributes for the inode defined by lsm */
717 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
719 struct ll_inode_info *lli = ll_i2info(inode);
720 struct obd_capa *capa = ll_mdscapa_get(inode);
724 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
727 obdo_refresh_inode(inode, obdo, obdo->o_valid);
729 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
730 lli->lli_smd->lsm_object_id, i_size_read(inode),
731 (unsigned long long)inode->i_blocks,
732 (unsigned long)ll_inode_blksize(inode));
737 int ll_merge_lvb(struct inode *inode)
739 struct ll_inode_info *lli = ll_i2info(inode);
740 struct ll_sb_info *sbi = ll_i2sbi(inode);
746 ll_inode_size_lock(inode, 1);
747 inode_init_lvb(inode, &lvb);
748 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
749 i_size_write(inode, lvb.lvb_size);
750 inode->i_blocks = lvb.lvb_blocks;
752 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
753 LTIME_S(inode->i_atime) = lvb.lvb_atime;
754 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
755 ll_inode_size_unlock(inode, 1);
760 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
763 struct obdo obdo = { 0 };
766 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
768 st->st_size = obdo.o_size;
769 st->st_blocks = obdo.o_blocks;
770 st->st_mtime = obdo.o_mtime;
771 st->st_atime = obdo.o_atime;
772 st->st_ctime = obdo.o_ctime;
777 void ll_io_init(struct cl_io *io, const struct file *file, int write)
779 struct inode *inode = file->f_dentry->d_inode;
780 struct ll_sb_info *sbi = ll_i2sbi(inode);
781 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
784 memset(io, 0, sizeof *io);
785 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
787 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
788 io->ci_obj = ll_i2info(inode)->lli_clob;
789 io->ci_lockreq = CILR_MAYBE;
790 if (fd->fd_flags & LL_FILE_IGNORE_LOCK ||
791 sbi->ll_flags & LL_SBI_NOLCK) {
792 io->ci_lockreq = CILR_NEVER;
793 io->ci_no_srvlock = 1;
794 } else if (file->f_flags & O_APPEND) {
795 io->ci_lockreq = CILR_MANDATORY;
799 static ssize_t ll_file_io_generic(const struct lu_env *env,
800 struct ccc_io_args *args, struct file *file,
801 enum cl_io_type iot, loff_t *ppos, size_t count)
807 io = &ccc_env_info(env)->cti_io;
808 ll_io_init(io, file, iot == CIT_WRITE);
811 io->u.ci_rd.rd_is_sendfile = args->cia_is_sendfile;
813 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
814 struct vvp_io *vio = vvp_env_io(env);
815 struct ccc_io *cio = ccc_env_io(env);
816 if (cl_io_is_sendfile(io)) {
817 vio->u.read.cui_actor = args->cia_actor;
818 vio->u.read.cui_target = args->cia_target;
820 cio->cui_iov = args->cia_iov;
821 cio->cui_nrsegs = args->cia_nrsegs;
822 #ifndef HAVE_FILE_WRITEV
823 cio->cui_iocb = args->cia_iocb;
826 cio->cui_fd = LUSTRE_FPRIVATE(file);
827 result = cl_io_loop(env, io);
829 /* cl_io_rw_init() handled IO */
830 result = io->ci_result;
831 if (io->ci_nob > 0) {
833 *ppos = io->u.ci_wr.wr.crw_pos;
841 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
843 static int ll_file_get_iov_count(const struct iovec *iov,
844 unsigned long *nr_segs, size_t *count)
849 for (seg = 0; seg < *nr_segs; seg++) {
850 const struct iovec *iv = &iov[seg];
853 * If any segment has a negative length, or the cumulative
854 * length ever wraps negative then return -EINVAL.
857 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
859 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
864 cnt -= iv->iov_len; /* This segment is no good */
871 #ifdef HAVE_FILE_READV
872 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
873 unsigned long nr_segs, loff_t *ppos)
876 struct ccc_io_args *args;
882 result = ll_file_get_iov_count(iov, &nr_segs, &count);
886 env = cl_env_get(&refcheck);
888 RETURN(PTR_ERR(env));
890 args = &vvp_env_info(env)->vti_args;
891 args->cia_is_sendfile = 0;
892 args->cia_iov = (struct iovec *)iov;
893 args->cia_nrsegs = nr_segs;
894 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
895 cl_env_put(env, &refcheck);
899 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
903 struct iovec *local_iov;
908 env = cl_env_get(&refcheck);
910 RETURN(PTR_ERR(env));
912 local_iov = &vvp_env_info(env)->vti_local_iov;
913 local_iov->iov_base = (void __user *)buf;
914 local_iov->iov_len = count;
915 result = ll_file_readv(file, local_iov, 1, ppos);
916 cl_env_put(env, &refcheck);
921 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
922 unsigned long nr_segs, loff_t pos)
925 struct ccc_io_args *args;
931 result = ll_file_get_iov_count(iov, &nr_segs, &count);
935 env = cl_env_get(&refcheck);
937 RETURN(PTR_ERR(env));
939 args = &vvp_env_info(env)->vti_args;
940 args->cia_is_sendfile = 0;
941 args->cia_iov = (struct iovec *)iov;
942 args->cia_nrsegs = nr_segs;
943 args->cia_iocb = iocb;
944 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
945 &iocb->ki_pos, count);
946 cl_env_put(env, &refcheck);
950 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
954 struct iovec *local_iov;
960 env = cl_env_get(&refcheck);
962 RETURN(PTR_ERR(env));
964 local_iov = &vvp_env_info(env)->vti_local_iov;
965 kiocb = &vvp_env_info(env)->vti_kiocb;
966 local_iov->iov_base = (void __user *)buf;
967 local_iov->iov_len = count;
968 init_sync_kiocb(kiocb, file);
969 kiocb->ki_pos = *ppos;
970 kiocb->ki_left = count;
972 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
973 *ppos = kiocb->ki_pos;
975 cl_env_put(env, &refcheck);
981 * Write to a file (through the page cache).
983 #ifdef HAVE_FILE_WRITEV
984 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
985 unsigned long nr_segs, loff_t *ppos)
988 struct ccc_io_args *args;
994 result = ll_file_get_iov_count(iov, &nr_segs, &count);
998 env = cl_env_get(&refcheck);
1000 RETURN(PTR_ERR(env));
1002 args = &vvp_env_info(env)->vti_args;
1003 args->cia_iov = (struct iovec *)iov;
1004 args->cia_nrsegs = nr_segs;
1005 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1006 cl_env_put(env, &refcheck);
1010 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1014 struct iovec *local_iov;
1019 env = cl_env_get(&refcheck);
1021 RETURN(PTR_ERR(env));
1023 local_iov = &vvp_env_info(env)->vti_local_iov;
1024 local_iov->iov_base = (void __user *)buf;
1025 local_iov->iov_len = count;
1027 result = ll_file_writev(file, local_iov, 1, ppos);
1028 cl_env_put(env, &refcheck);
1032 #else /* AIO stuff */
1033 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1034 unsigned long nr_segs, loff_t pos)
1037 struct ccc_io_args *args;
1043 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1047 env = cl_env_get(&refcheck);
1049 RETURN(PTR_ERR(env));
1051 args = &vvp_env_info(env)->vti_args;
1052 args->cia_iov = (struct iovec *)iov;
1053 args->cia_nrsegs = nr_segs;
1054 args->cia_iocb = iocb;
1055 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1056 &iocb->ki_pos, count);
1057 cl_env_put(env, &refcheck);
1061 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1065 struct iovec *local_iov;
1066 struct kiocb *kiocb;
1071 env = cl_env_get(&refcheck);
1073 RETURN(PTR_ERR(env));
1075 local_iov = &vvp_env_info(env)->vti_local_iov;
1076 kiocb = &vvp_env_info(env)->vti_kiocb;
1077 local_iov->iov_base = (void __user *)buf;
1078 local_iov->iov_len = count;
1079 init_sync_kiocb(kiocb, file);
1080 kiocb->ki_pos = *ppos;
1081 kiocb->ki_left = count;
1083 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1084 *ppos = kiocb->ki_pos;
1086 cl_env_put(env, &refcheck);
1093 * Send file content (through pagecache) somewhere with helper
1095 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1096 read_actor_t actor, void *target)
1099 struct ccc_io_args *args;
1104 env = cl_env_get(&refcheck);
1106 RETURN(PTR_ERR(env));
1108 args = &vvp_env_info(env)->vti_args;
1109 args->cia_is_sendfile = 1;
1110 args->cia_target = target;
1111 args->cia_actor = actor;
1112 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1113 cl_env_put(env, &refcheck);
1117 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1120 struct obd_export *exp = ll_i2dtexp(inode);
1121 struct ll_recreate_obj ucreatp;
1122 struct obd_trans_info oti = { 0 };
1123 struct obdo *oa = NULL;
1126 struct lov_stripe_md *lsm, *lsm2;
1129 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1132 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1133 sizeof(struct ll_recreate_obj)))
1140 ll_inode_size_lock(inode, 0);
1141 lsm = ll_i2info(inode)->lli_smd;
1143 GOTO(out, rc = -ENOENT);
1144 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1145 (lsm->lsm_stripe_count));
1147 OBD_ALLOC(lsm2, lsm_size);
1149 GOTO(out, rc = -ENOMEM);
1151 oa->o_id = ucreatp.lrc_id;
1152 oa->o_gr = ucreatp.lrc_group;
1153 oa->o_nlink = ucreatp.lrc_ost_idx;
1154 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1155 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1156 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1157 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1159 memcpy(lsm2, lsm, lsm_size);
1160 rc = obd_create(exp, oa, &lsm2, &oti);
1162 OBD_FREE(lsm2, lsm_size);
1165 ll_inode_size_unlock(inode, 0);
1170 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1171 int flags, struct lov_user_md *lum, int lum_size)
1173 struct lov_stripe_md *lsm;
1174 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1178 ll_inode_size_lock(inode, 0);
1179 lsm = ll_i2info(inode)->lli_smd;
1181 ll_inode_size_unlock(inode, 0);
1182 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1187 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1190 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1191 GOTO(out_req_free, rc = -ENOENT);
1192 rc = oit.d.lustre.it_status;
1194 GOTO(out_req_free, rc);
1196 ll_release_openhandle(file->f_dentry, &oit);
1199 ll_inode_size_unlock(inode, 0);
1200 ll_intent_release(&oit);
1203 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1207 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1208 struct lov_mds_md **lmmp, int *lmm_size,
1209 struct ptlrpc_request **request)
1211 struct ll_sb_info *sbi = ll_i2sbi(inode);
1212 struct mdt_body *body;
1213 struct lov_mds_md *lmm = NULL;
1214 struct ptlrpc_request *req = NULL;
1215 struct obd_capa *oc;
1218 rc = ll_get_max_mdsize(sbi, &lmmsize);
1222 oc = ll_mdscapa_get(inode);
1223 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1224 oc, filename, strlen(filename) + 1,
1225 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1226 ll_i2suppgid(inode), &req);
1229 CDEBUG(D_INFO, "md_getattr_name failed "
1230 "on %s: rc %d\n", filename, rc);
1234 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1235 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1237 lmmsize = body->eadatasize;
1239 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1241 GOTO(out, rc = -ENODATA);
1244 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1245 LASSERT(lmm != NULL);
1247 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1248 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
1249 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
1250 GOTO(out, rc = -EPROTO);
1254 * This is coming from the MDS, so is probably in
1255 * little endian. We convert it to host endian before
1256 * passing it to userspace.
1258 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1259 /* if function called for directory - we should
1260 * avoid swab not existent lsm objects */
1261 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1262 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1263 if (S_ISREG(body->mode))
1264 lustre_swab_lov_user_md_objects(
1265 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1266 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1267 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1268 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1269 if (S_ISREG(body->mode))
1270 lustre_swab_lov_user_md_objects(
1271 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1272 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1273 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
1274 lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
1278 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
1279 struct lov_stripe_md *lsm;
1280 struct lov_user_md_join *lmj;
1281 int lmj_size, i, aindex = 0;
1283 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1285 GOTO(out, rc = -ENOMEM);
1286 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm);
1288 GOTO(out_free_memmd, rc);
1290 lmj_size = sizeof(struct lov_user_md_join) +
1291 lsm->lsm_stripe_count *
1292 sizeof(struct lov_user_ost_data_join);
1293 OBD_ALLOC(lmj, lmj_size);
1295 GOTO(out_free_memmd, rc = -ENOMEM);
1297 memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
1298 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1299 struct lov_extent *lex =
1300 &lsm->lsm_array->lai_ext_array[aindex];
1302 if (lex->le_loi_idx + lex->le_stripe_count <= i)
1304 CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
1305 LPU64" len %d\n", aindex, i,
1306 lex->le_start, (int)lex->le_len);
1307 lmj->lmm_objects[i].l_extent_start =
1310 if ((int)lex->le_len == -1)
1311 lmj->lmm_objects[i].l_extent_end = -1;
1313 lmj->lmm_objects[i].l_extent_end =
1314 lex->le_start + lex->le_len;
1315 lmj->lmm_objects[i].l_object_id =
1316 lsm->lsm_oinfo[i]->loi_id;
1317 lmj->lmm_objects[i].l_object_gr =
1318 lsm->lsm_oinfo[i]->loi_gr;
1319 lmj->lmm_objects[i].l_ost_gen =
1320 lsm->lsm_oinfo[i]->loi_ost_gen;
1321 lmj->lmm_objects[i].l_ost_idx =
1322 lsm->lsm_oinfo[i]->loi_ost_idx;
1324 lmm = (struct lov_mds_md *)lmj;
1327 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1331 *lmm_size = lmmsize;
1336 static int ll_lov_setea(struct inode *inode, struct file *file,
1339 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1340 struct lov_user_md *lump;
1341 int lum_size = sizeof(struct lov_user_md) +
1342 sizeof(struct lov_user_ost_data);
1346 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1349 OBD_ALLOC(lump, lum_size);
1353 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1354 OBD_FREE(lump, lum_size);
1358 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1360 OBD_FREE(lump, lum_size);
1364 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1367 struct lov_user_md_v3 lumv3;
1368 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1369 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1370 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1373 int flags = FMODE_WRITE;
1376 /* first try with v1 which is smaller than v3 */
1377 lum_size = sizeof(struct lov_user_md_v1);
1378 if (copy_from_user(lumv1, lumv1p, lum_size))
1381 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1382 lum_size = sizeof(struct lov_user_md_v3);
1383 if (copy_from_user(&lumv3, lumv3p, lum_size))
1387 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1389 put_user(0, &lumv1p->lmm_stripe_count);
1390 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1391 0, ll_i2info(inode)->lli_smd,
1397 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1399 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1404 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1408 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1410 struct ll_inode_info *lli = ll_i2info(inode);
1411 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1412 struct ccc_grouplock grouplock;
1416 spin_lock(&lli->lli_lock);
1417 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1418 CERROR("group lock already existed with gid %lu\n",
1419 fd->fd_grouplock.cg_gid);
1420 spin_unlock(&lli->lli_lock);
1423 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1424 spin_unlock(&lli->lli_lock);
1426 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1427 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1431 spin_lock(&lli->lli_lock);
1432 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1433 spin_unlock(&lli->lli_lock);
1434 CERROR("another thread just won the race\n");
1435 cl_put_grouplock(&grouplock);
1439 fd->fd_flags |= (LL_FILE_GROUP_LOCKED | LL_FILE_IGNORE_LOCK);
1440 fd->fd_grouplock = grouplock;
1441 spin_unlock(&lli->lli_lock);
1443 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1447 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1449 struct ll_inode_info *lli = ll_i2info(inode);
1450 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1451 struct ccc_grouplock grouplock;
1454 spin_lock(&lli->lli_lock);
1455 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1456 spin_unlock(&lli->lli_lock);
1457 CERROR("no group lock held\n");
1460 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1462 if (fd->fd_grouplock.cg_gid != arg) {
1463 CERROR("group lock %lu doesn't match current id %lu\n",
1464 arg, fd->fd_grouplock.cg_gid);
1465 spin_unlock(&lli->lli_lock);
1469 grouplock = fd->fd_grouplock;
1470 fd->fd_grouplock.cg_env = NULL;
1471 fd->fd_grouplock.cg_lock = NULL;
1472 fd->fd_grouplock.cg_gid = 0;
1473 fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED | LL_FILE_IGNORE_LOCK);
1474 spin_unlock(&lli->lli_lock);
1476 cl_put_grouplock(&grouplock);
1477 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1481 #if LUSTRE_FIX >= 50
1482 static int join_sanity_check(struct inode *head, struct inode *tail)
1485 if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
1486 CERROR("server do not support join \n");
1489 if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
1490 CERROR("tail ino %lu and ino head %lu must be regular\n",
1491 head->i_ino, tail->i_ino);
1494 if (head->i_ino == tail->i_ino) {
1495 CERROR("file %lu can not be joined to itself \n", head->i_ino);
1498 if (i_size_read(head) % JOIN_FILE_ALIGN) {
1499 CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
1505 static int join_file(struct inode *head_inode, struct file *head_filp,
1506 struct file *tail_filp)
1508 struct dentry *tail_dentry = tail_filp->f_dentry;
1509 struct lookup_intent oit = {.it_op = IT_OPEN,
1510 .it_flags = head_filp->f_flags,
1511 .it_create_mode = M_JOIN_FILE};
1512 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW,
1513 ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1515 struct lustre_handle lockh;
1516 struct md_op_data *op_data;
1521 tail_dentry = tail_filp->f_dentry;
1523 data = i_size_read(head_inode);
1524 op_data = ll_prep_md_op_data(NULL, head_inode,
1525 tail_dentry->d_parent->d_inode,
1526 tail_dentry->d_name.name,
1527 tail_dentry->d_name.len, 0,
1528 LUSTRE_OPC_ANY, &data);
1529 if (IS_ERR(op_data))
1530 RETURN(PTR_ERR(op_data));
1532 rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit,
1533 op_data, &lockh, NULL, 0, NULL, 0);
1535 ll_finish_md_op_data(op_data);
1539 rc = oit.d.lustre.it_status;
1541 if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
1542 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
1543 ptlrpc_req_finished((struct ptlrpc_request *)
1544 oit.d.lustre.it_data);
1548 if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
1550 ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
1551 oit.d.lustre.it_lock_mode = 0;
1553 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1554 it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
1555 ll_release_openhandle(head_filp->f_dentry, &oit);
1557 ll_intent_release(&oit);
1561 static int ll_file_join(struct inode *head, struct file *filp,
1562 char *filename_tail)
1564 struct inode *tail = NULL, *first = NULL, *second = NULL;
1565 struct dentry *tail_dentry;
1566 struct file *tail_filp, *first_filp, *second_filp;
1567 struct ll_lock_tree first_tree, second_tree;
1568 struct ll_lock_tree_node *first_node, *second_node;
1569 struct ll_inode_info *hlli = ll_i2info(head);
1570 int rc = 0, cleanup_phase = 0;
1573 CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
1574 head->i_ino, head->i_generation, head, filename_tail);
1576 tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
1577 if (IS_ERR(tail_filp)) {
1578 CERROR("Can not open tail file %s", filename_tail);
1579 rc = PTR_ERR(tail_filp);
1582 tail = igrab(tail_filp->f_dentry->d_inode);
1584 tail_dentry = tail_filp->f_dentry;
1585 LASSERT(tail_dentry);
1588 /*reorder the inode for lock sequence*/
1589 first = head->i_ino > tail->i_ino ? head : tail;
1590 second = head->i_ino > tail->i_ino ? tail : head;
1591 first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
1592 second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
1594 CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
1595 head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
1596 first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
1597 if (IS_ERR(first_node)){
1598 rc = PTR_ERR(first_node);
1601 first_tree.lt_fd = first_filp->private_data;
1602 rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
1607 second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
1608 if (IS_ERR(second_node)){
1609 rc = PTR_ERR(second_node);
1612 second_tree.lt_fd = second_filp->private_data;
1613 rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
1618 rc = join_sanity_check(head, tail);
1622 rc = join_file(head, filp, tail_filp);
1626 switch (cleanup_phase) {
1628 ll_tree_unlock(&second_tree);
1629 obd_cancel_unused(ll_i2dtexp(second),
1630 ll_i2info(second)->lli_smd, 0, NULL);
1632 ll_tree_unlock(&first_tree);
1633 obd_cancel_unused(ll_i2dtexp(first),
1634 ll_i2info(first)->lli_smd, 0, NULL);
1636 filp_close(tail_filp, 0);
1639 if (head && rc == 0) {
1640 obd_free_memmd(ll_i2sbi(head)->ll_dt_exp,
1642 hlli->lli_smd = NULL;
1647 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
1652 #endif /* LUSTRE_FIX >= 50 */
1655 * Close inode open handle
1657 * \param dentry [in] dentry which contains the inode
1658 * \param it [in,out] intent which contains open info and result
1661 * \retval <0 failure
1663 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1665 struct inode *inode = dentry->d_inode;
1666 struct obd_client_handle *och;
1672 /* Root ? Do nothing. */
1673 if (dentry->d_inode->i_sb->s_root == dentry)
1676 /* No open handle to close? Move away */
1677 if (!it_disposition(it, DISP_OPEN_OPEN))
1680 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1682 OBD_ALLOC(och, sizeof(*och));
1684 GOTO(out, rc = -ENOMEM);
1686 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1687 ll_i2info(inode), it, och);
1689 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1692 /* this one is in place of ll_file_open */
1693 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1694 ptlrpc_req_finished(it->d.lustre.it_data);
1695 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1700 * Get size for inode for which FIEMAP mapping is requested.
1701 * Make the FIEMAP get_info call and returns the result.
1703 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1706 struct obd_export *exp = ll_i2dtexp(inode);
1707 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1708 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1709 int vallen = num_bytes;
1713 /* If the stripe_count > 1 and the application does not understand
1714 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1716 if (lsm->lsm_stripe_count > 1 &&
1717 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1720 fm_key.oa.o_id = lsm->lsm_object_id;
1721 fm_key.oa.o_gr = lsm->lsm_object_gr;
1722 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1724 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1727 /* If filesize is 0, then there would be no objects for mapping */
1728 if (fm_key.oa.o_size == 0) {
1729 fiemap->fm_mapped_extents = 0;
1733 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1735 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1737 CERROR("obd_get_info failed: rc = %d\n", rc);
1742 int ll_fid2path(struct obd_export *exp, void *arg)
1744 struct getinfo_fid2path *gfout, *gfin;
1748 /* Need to get the buflen */
1749 OBD_ALLOC_PTR(gfin);
1752 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1757 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1758 OBD_ALLOC(gfout, outsize);
1759 if (gfout == NULL) {
1763 memcpy(gfout, gfin, sizeof(*gfout));
1766 /* Call mdc_iocontrol */
1767 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1770 if (copy_to_user(arg, gfout, outsize))
1774 OBD_FREE(gfout, outsize);
1778 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1781 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1785 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1786 inode->i_generation, inode, cmd);
1787 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1789 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1790 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1794 case LL_IOC_GETFLAGS:
1795 /* Get the current value of the file flags */
1796 return put_user(fd->fd_flags, (int *)arg);
1797 case LL_IOC_SETFLAGS:
1798 case LL_IOC_CLRFLAGS:
1799 /* Set or clear specific file flags */
1800 /* XXX This probably needs checks to ensure the flags are
1801 * not abused, and to handle any flag side effects.
1803 if (get_user(flags, (int *) arg))
1806 if (cmd == LL_IOC_SETFLAGS) {
1807 if ((flags & LL_FILE_IGNORE_LOCK) &&
1808 !(file->f_flags & O_DIRECT)) {
1809 CERROR("%s: unable to disable locking on "
1810 "non-O_DIRECT file\n", current->comm);
1814 fd->fd_flags |= flags;
1816 fd->fd_flags &= ~flags;
1819 case LL_IOC_LOV_SETSTRIPE:
1820 RETURN(ll_lov_setstripe(inode, file, arg));
1821 case LL_IOC_LOV_SETEA:
1822 RETURN(ll_lov_setea(inode, file, arg));
1823 case LL_IOC_LOV_GETSTRIPE:
1824 RETURN(ll_lov_getstripe(inode, arg));
1825 case LL_IOC_RECREATE_OBJ:
1826 RETURN(ll_lov_recreate_obj(inode, file, arg));
1827 case FSFILT_IOC_FIEMAP: {
1828 struct ll_user_fiemap *fiemap_s;
1829 size_t num_bytes, ret_bytes;
1830 unsigned int extent_count;
1833 /* Get the extent count so we can calculate the size of
1834 * required fiemap buffer */
1835 if (get_user(extent_count,
1836 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1838 num_bytes = sizeof(*fiemap_s) + (extent_count *
1839 sizeof(struct ll_fiemap_extent));
1840 OBD_VMALLOC(fiemap_s, num_bytes);
1841 if (fiemap_s == NULL)
1844 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1846 GOTO(error, rc = -EFAULT);
1848 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1849 fiemap_s->fm_flags = fiemap_s->fm_flags &
1850 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1851 if (copy_to_user((char *)arg, fiemap_s,
1853 GOTO(error, rc = -EFAULT);
1855 GOTO(error, rc = -EBADR);
1858 /* If fm_extent_count is non-zero, read the first extent since
1859 * it is used to calculate end_offset and device from previous
1862 if (copy_from_user(&fiemap_s->fm_extents[0],
1863 (char __user *)arg + sizeof(*fiemap_s),
1864 sizeof(struct ll_fiemap_extent)))
1865 GOTO(error, rc = -EFAULT);
1868 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1871 rc = filemap_fdatawrite(inode->i_mapping);
1876 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1880 ret_bytes = sizeof(struct ll_user_fiemap);
1882 if (extent_count != 0)
1883 ret_bytes += (fiemap_s->fm_mapped_extents *
1884 sizeof(struct ll_fiemap_extent));
1886 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1890 OBD_VFREE(fiemap_s, num_bytes);
1893 case FSFILT_IOC_GETFLAGS:
1894 case FSFILT_IOC_SETFLAGS:
1895 RETURN(ll_iocontrol(inode, file, cmd, arg));
1896 case FSFILT_IOC_GETVERSION_OLD:
1897 case FSFILT_IOC_GETVERSION:
1898 RETURN(put_user(inode->i_generation, (int *)arg));
1900 #if LUSTRE_FIX >= 50
1901 /* Allow file join in beta builds to allow debuggging */
1905 ftail = getname((const char *)arg);
1907 RETURN(PTR_ERR(ftail));
1908 rc = ll_file_join(inode, file, ftail);
1912 CWARN("file join is not supported in this version of Lustre\n");
1916 case LL_IOC_GROUP_LOCK:
1917 RETURN(ll_get_grouplock(inode, file, arg));
1918 case LL_IOC_GROUP_UNLOCK:
1919 RETURN(ll_put_grouplock(inode, file, arg));
1920 case IOC_OBD_STATFS:
1921 RETURN(ll_obd_statfs(inode, (void *)arg));
1923 /* We need to special case any other ioctls we want to handle,
1924 * to send them to the MDS/OST as appropriate and to properly
1925 * network encode the arg field.
1926 case FSFILT_IOC_SETVERSION_OLD:
1927 case FSFILT_IOC_SETVERSION:
1929 case LL_IOC_FLUSHCTX:
1930 RETURN(ll_flush_ctx(inode));
1931 case LL_IOC_PATH2FID: {
1932 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1933 sizeof(struct lu_fid)))
1938 case OBD_IOC_FID2PATH:
1939 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1945 ll_iocontrol_call(inode, file, cmd, arg, &err))
1948 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1954 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1956 struct inode *inode = file->f_dentry->d_inode;
1959 retval = offset + ((origin == 2) ? i_size_read(inode) :
1960 (origin == 1) ? file->f_pos : 0);
1961 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1962 inode->i_ino, inode->i_generation, inode, retval, retval,
1963 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1964 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1966 if (origin == 2) { /* SEEK_END */
1967 int nonblock = 0, rc;
1969 if (file->f_flags & O_NONBLOCK)
1970 nonblock = LDLM_FL_BLOCK_NOWAIT;
1972 rc = cl_glimpse_size(inode);
1976 ll_inode_size_lock(inode, 0);
1977 offset += i_size_read(inode);
1978 ll_inode_size_unlock(inode, 0);
1979 } else if (origin == 1) { /* SEEK_CUR */
1980 offset += file->f_pos;
1984 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1985 if (offset != file->f_pos) {
1986 file->f_pos = offset;
1994 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1996 struct inode *inode = dentry->d_inode;
1997 struct ll_inode_info *lli = ll_i2info(inode);
1998 struct lov_stripe_md *lsm = lli->lli_smd;
1999 struct ptlrpc_request *req;
2000 struct obd_capa *oc;
2003 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2004 inode->i_generation, inode);
2005 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2007 /* fsync's caller has already called _fdata{sync,write}, we want
2008 * that IO to finish before calling the osc and mdc sync methods */
2009 rc = filemap_fdatawait(inode->i_mapping);
2011 /* catch async errors that were recorded back when async writeback
2012 * failed for pages in this mapping. */
2013 err = lli->lli_async_rc;
2014 lli->lli_async_rc = 0;
2018 err = lov_test_and_clear_async_rc(lsm);
2023 oc = ll_mdscapa_get(inode);
2024 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2030 ptlrpc_req_finished(req);
2037 RETURN(rc ? rc : -ENOMEM);
2039 oa->o_id = lsm->lsm_object_id;
2040 oa->o_gr = lsm->lsm_object_gr;
2041 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2042 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
2043 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
2046 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2047 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
2048 0, OBD_OBJECT_EOF, oc);
2058 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2060 struct inode *inode = file->f_dentry->d_inode;
2061 struct ll_sb_info *sbi = ll_i2sbi(inode);
2062 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2063 .ei_cb_cp =ldlm_flock_completion_ast,
2064 .ei_cbdata = file_lock };
2065 struct md_op_data *op_data;
2066 struct lustre_handle lockh = {0};
2067 ldlm_policy_data_t flock;
2072 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2073 inode->i_ino, file_lock);
2075 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2077 if (file_lock->fl_flags & FL_FLOCK) {
2078 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2079 /* set missing params for flock() calls */
2080 file_lock->fl_end = OFFSET_MAX;
2081 file_lock->fl_pid = current->tgid;
2083 flock.l_flock.pid = file_lock->fl_pid;
2084 flock.l_flock.start = file_lock->fl_start;
2085 flock.l_flock.end = file_lock->fl_end;
2087 switch (file_lock->fl_type) {
2089 einfo.ei_mode = LCK_PR;
2092 /* An unlock request may or may not have any relation to
2093 * existing locks so we may not be able to pass a lock handle
2094 * via a normal ldlm_lock_cancel() request. The request may even
2095 * unlock a byte range in the middle of an existing lock. In
2096 * order to process an unlock request we need all of the same
2097 * information that is given with a normal read or write record
2098 * lock request. To avoid creating another ldlm unlock (cancel)
2099 * message we'll treat a LCK_NL flock request as an unlock. */
2100 einfo.ei_mode = LCK_NL;
2103 einfo.ei_mode = LCK_PW;
2106 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2121 flags = LDLM_FL_BLOCK_NOWAIT;
2127 flags = LDLM_FL_TEST_LOCK;
2128 /* Save the old mode so that if the mode in the lock changes we
2129 * can decrement the appropriate reader or writer refcount. */
2130 file_lock->fl_type = einfo.ei_mode;
2133 CERROR("unknown fcntl lock command: %d\n", cmd);
2137 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2138 LUSTRE_OPC_ANY, NULL);
2139 if (IS_ERR(op_data))
2140 RETURN(PTR_ERR(op_data));
2142 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2143 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2144 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2146 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2147 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2149 ll_finish_md_op_data(op_data);
2151 if ((file_lock->fl_flags & FL_FLOCK) &&
2152 (rc == 0 || file_lock->fl_type == F_UNLCK))
2153 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2154 #ifdef HAVE_F_OP_FLOCK
2155 if ((file_lock->fl_flags & FL_POSIX) &&
2156 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2157 !(flags & LDLM_FL_TEST_LOCK))
2158 posix_lock_file_wait(file, file_lock);
2164 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2171 int ll_have_md_lock(struct inode *inode, __u64 bits)
2173 struct lustre_handle lockh;
2174 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2182 fid = &ll_i2info(inode)->lli_fid;
2183 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2185 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2186 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2187 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2193 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2194 struct lustre_handle *lockh)
2196 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2202 fid = &ll_i2info(inode)->lli_fid;
2203 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2205 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2206 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2207 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2211 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2212 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2213 * and return success */
2215 /* This path cannot be hit for regular files unless in
2216 * case of obscure races, so no need to to validate
2218 if (!S_ISREG(inode->i_mode) &&
2219 !S_ISDIR(inode->i_mode))
2224 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2232 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2235 struct inode *inode = dentry->d_inode;
2236 struct ptlrpc_request *req = NULL;
2237 struct ll_sb_info *sbi;
2238 struct obd_export *exp;
2243 CERROR("REPORT THIS LINE TO PETER\n");
2246 sbi = ll_i2sbi(inode);
2248 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2249 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2251 exp = ll_i2mdexp(inode);
2253 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2254 struct lookup_intent oit = { .it_op = IT_GETATTR };
2255 struct md_op_data *op_data;
2257 /* Call getattr by fid, so do not provide name at all. */
2258 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2259 dentry->d_inode, NULL, 0, 0,
2260 LUSTRE_OPC_ANY, NULL);
2261 if (IS_ERR(op_data))
2262 RETURN(PTR_ERR(op_data));
2264 oit.it_create_mode |= M_CHECK_STALE;
2265 rc = md_intent_lock(exp, op_data, NULL, 0,
2266 /* we are not interested in name
2269 ll_md_blocking_ast, 0);
2270 ll_finish_md_op_data(op_data);
2271 oit.it_create_mode &= ~M_CHECK_STALE;
2273 rc = ll_inode_revalidate_fini(inode, rc);
2277 rc = ll_revalidate_it_finish(req, &oit, dentry);
2279 ll_intent_release(&oit);
2283 /* Unlinked? Unhash dentry, so it is not picked up later by
2284 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2285 here to preserve get_cwd functionality on 2.6.
2287 if (!dentry->d_inode->i_nlink) {
2288 spin_lock(&ll_lookup_lock);
2289 spin_lock(&dcache_lock);
2290 ll_drop_dentry(dentry);
2291 spin_unlock(&dcache_lock);
2292 spin_unlock(&ll_lookup_lock);
2295 ll_lookup_finish_locks(&oit, dentry);
2296 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2298 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2299 obd_valid valid = OBD_MD_FLGETATTR;
2300 struct obd_capa *oc;
2303 if (S_ISREG(inode->i_mode)) {
2304 rc = ll_get_max_mdsize(sbi, &ealen);
2307 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2309 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2310 * capa for this inode. Because we only keep capas of dirs
2312 oc = ll_mdscapa_get(inode);
2313 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2317 rc = ll_inode_revalidate_fini(inode, rc);
2321 rc = ll_prep_inode(&inode, req, NULL);
2324 ptlrpc_req_finished(req);
2328 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2333 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2334 MDS_INODELOCK_LOOKUP);
2336 /* if object not yet allocated, don't validate size */
2337 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2340 /* cl_glimpse_size will prefer locally cached writes if they extend
2344 rc = cl_glimpse_size(dentry->d_inode);
2349 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2350 struct lookup_intent *it, struct kstat *stat)
2352 struct inode *inode = de->d_inode;
2355 res = ll_inode_revalidate_it(de, it);
2356 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2361 stat->dev = inode->i_sb->s_dev;
2362 stat->ino = inode->i_ino;
2363 stat->mode = inode->i_mode;
2364 stat->nlink = inode->i_nlink;
2365 stat->uid = inode->i_uid;
2366 stat->gid = inode->i_gid;
2367 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2368 stat->atime = inode->i_atime;
2369 stat->mtime = inode->i_mtime;
2370 stat->ctime = inode->i_ctime;
2371 #ifdef HAVE_INODE_BLKSIZE
2372 stat->blksize = inode->i_blksize;
2374 stat->blksize = 1 << inode->i_blkbits;
2377 ll_inode_size_lock(inode, 0);
2378 stat->size = i_size_read(inode);
2379 stat->blocks = inode->i_blocks;
2380 ll_inode_size_unlock(inode, 0);
2384 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2386 struct lookup_intent it = { .it_op = IT_GETATTR };
2388 return ll_getattr_it(mnt, de, &it, stat);
2392 int lustre_check_acl(struct inode *inode, int mask)
2394 #ifdef CONFIG_FS_POSIX_ACL
2395 struct ll_inode_info *lli = ll_i2info(inode);
2396 struct posix_acl *acl;
2400 spin_lock(&lli->lli_lock);
2401 acl = posix_acl_dup(lli->lli_posix_acl);
2402 spin_unlock(&lli->lli_lock);
2407 rc = posix_acl_permission(inode, acl, mask);
2408 posix_acl_release(acl);
2416 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2417 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2422 /* as root inode are NOT getting validated in lookup operation,
2423 * need to do it before permission check. */
2425 if (inode == inode->i_sb->s_root->d_inode) {
2426 struct lookup_intent it = { .it_op = IT_LOOKUP };
2428 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2429 MDS_INODELOCK_LOOKUP);
2434 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2435 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2437 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2438 return lustre_check_remote_perm(inode, mask);
2440 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2441 rc = generic_permission(inode, mask, lustre_check_acl);
2446 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2448 int mode = inode->i_mode;
2451 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2452 inode->i_ino, inode->i_generation, inode, mask);
2454 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2455 return lustre_check_remote_perm(inode, mask);
2457 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2459 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2460 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2462 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2464 if (current->fsuid == inode->i_uid) {
2467 if (((mode >> 3) & mask & S_IRWXO) != mask)
2469 rc = lustre_check_acl(inode, mask);
2473 goto check_capabilities;
2477 if (in_group_p(inode->i_gid))
2480 if ((mode & mask & S_IRWXO) == mask)
2484 if (!(mask & MAY_EXEC) ||
2485 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2486 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2489 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2490 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2497 #ifdef HAVE_FILE_READV
2498 #define READ_METHOD readv
2499 #define READ_FUNCTION ll_file_readv
2500 #define WRITE_METHOD writev
2501 #define WRITE_FUNCTION ll_file_writev
2503 #define READ_METHOD aio_read
2504 #define READ_FUNCTION ll_file_aio_read
2505 #define WRITE_METHOD aio_write
2506 #define WRITE_FUNCTION ll_file_aio_write
2509 /* -o localflock - only provides locally consistent flock locks */
2510 struct file_operations ll_file_operations = {
2511 .read = ll_file_read,
2512 .READ_METHOD = READ_FUNCTION,
2513 .write = ll_file_write,
2514 .WRITE_METHOD = WRITE_FUNCTION,
2515 .ioctl = ll_file_ioctl,
2516 .open = ll_file_open,
2517 .release = ll_file_release,
2518 .mmap = ll_file_mmap,
2519 .llseek = ll_file_seek,
2520 .sendfile = ll_file_sendfile,
2524 struct file_operations ll_file_operations_flock = {
2525 .read = ll_file_read,
2526 .READ_METHOD = READ_FUNCTION,
2527 .write = ll_file_write,
2528 .WRITE_METHOD = WRITE_FUNCTION,
2529 .ioctl = ll_file_ioctl,
2530 .open = ll_file_open,
2531 .release = ll_file_release,
2532 .mmap = ll_file_mmap,
2533 .llseek = ll_file_seek,
2534 .sendfile = ll_file_sendfile,
2536 #ifdef HAVE_F_OP_FLOCK
2537 .flock = ll_file_flock,
2539 .lock = ll_file_flock
2542 /* These are for -o noflock - to return ENOSYS on flock calls */
2543 struct file_operations ll_file_operations_noflock = {
2544 .read = ll_file_read,
2545 .READ_METHOD = READ_FUNCTION,
2546 .write = ll_file_write,
2547 .WRITE_METHOD = WRITE_FUNCTION,
2548 .ioctl = ll_file_ioctl,
2549 .open = ll_file_open,
2550 .release = ll_file_release,
2551 .mmap = ll_file_mmap,
2552 .llseek = ll_file_seek,
2553 .sendfile = ll_file_sendfile,
2555 #ifdef HAVE_F_OP_FLOCK
2556 .flock = ll_file_noflock,
2558 .lock = ll_file_noflock
2561 struct inode_operations ll_file_inode_operations = {
2562 #ifdef HAVE_VFS_INTENT_PATCHES
2563 .setattr_raw = ll_setattr_raw,
2565 .setattr = ll_setattr,
2566 .truncate = ll_truncate,
2567 .getattr = ll_getattr,
2568 .permission = ll_inode_permission,
2569 .setxattr = ll_setxattr,
2570 .getxattr = ll_getxattr,
2571 .listxattr = ll_listxattr,
2572 .removexattr = ll_removexattr,
2575 /* dynamic ioctl number support routins */
2576 static struct llioc_ctl_data {
2577 struct rw_semaphore ioc_sem;
2578 struct list_head ioc_head;
2580 __RWSEM_INITIALIZER(llioc.ioc_sem),
2581 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2586 struct list_head iocd_list;
2587 unsigned int iocd_size;
2588 llioc_callback_t iocd_cb;
2589 unsigned int iocd_count;
2590 unsigned int iocd_cmd[0];
2593 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2596 struct llioc_data *in_data = NULL;
2599 if (cb == NULL || cmd == NULL ||
2600 count > LLIOC_MAX_CMD || count < 0)
2603 size = sizeof(*in_data) + count * sizeof(unsigned int);
2604 OBD_ALLOC(in_data, size);
2605 if (in_data == NULL)
2608 memset(in_data, 0, sizeof(*in_data));
2609 in_data->iocd_size = size;
2610 in_data->iocd_cb = cb;
2611 in_data->iocd_count = count;
2612 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2614 down_write(&llioc.ioc_sem);
2615 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2616 up_write(&llioc.ioc_sem);
2621 void ll_iocontrol_unregister(void *magic)
2623 struct llioc_data *tmp;
2628 down_write(&llioc.ioc_sem);
2629 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2631 unsigned int size = tmp->iocd_size;
2633 list_del(&tmp->iocd_list);
2634 up_write(&llioc.ioc_sem);
2636 OBD_FREE(tmp, size);
2640 up_write(&llioc.ioc_sem);
2642 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2645 EXPORT_SYMBOL(ll_iocontrol_register);
2646 EXPORT_SYMBOL(ll_iocontrol_unregister);
2648 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2649 unsigned int cmd, unsigned long arg, int *rcp)
2651 enum llioc_iter ret = LLIOC_CONT;
2652 struct llioc_data *data;
2653 int rc = -EINVAL, i;
2655 down_read(&llioc.ioc_sem);
2656 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2657 for (i = 0; i < data->iocd_count; i++) {
2658 if (cmd != data->iocd_cmd[i])
2661 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2665 if (ret == LLIOC_STOP)
2668 up_read(&llioc.ioc_sem);