1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(exp_connect_som(ll_i2mdexp(inode))) || !S_ISREG(inode->i_mode))
96 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
98 ll_epoch_close(inode, op_data, &och, 0);
101 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
105 static int ll_close_inode_openhandle(struct obd_export *md_exp,
107 struct obd_client_handle *och)
109 struct obd_export *exp = ll_i2mdexp(inode);
110 struct md_op_data *op_data;
111 struct ptlrpc_request *req = NULL;
112 struct obd_device *obd = class_exp2obd(exp);
119 * XXX: in case of LMV, is this correct to access
122 CERROR("Invalid MDC connection handle "LPX64"\n",
123 ll_i2mdexp(inode)->exp_handle.h_cookie);
128 * here we check if this is forced umount. If so this is called on
129 * canceling "open lock" and we do not call md_close() in this case, as
130 * it will not be successful, as import is already deactivated.
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_sizeonmds_update(inode, &och->och_fh,
148 op_data->op_ioepoch);
150 CERROR("inode %lu mdc Size-on-MDS update failed: "
151 "rc = %d\n", inode->i_ino, rc);
155 CERROR("inode %lu mdc close failed: rc = %d\n",
158 ll_finish_md_op_data(op_data);
161 rc = ll_objects_destroy(req, inode);
163 CERROR("inode %lu ll_objects destroy: rc = %d\n",
170 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
171 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
172 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
174 md_clear_open_replay_data(md_exp, och);
175 /* Free @och if it is not waiting for DONE_WRITING. */
176 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
179 if (req) /* This is close request */
180 ptlrpc_req_finished(req);
184 int ll_md_real_close(struct inode *inode, int flags)
186 struct ll_inode_info *lli = ll_i2info(inode);
187 struct obd_client_handle **och_p;
188 struct obd_client_handle *och;
193 if (flags & FMODE_WRITE) {
194 och_p = &lli->lli_mds_write_och;
195 och_usecount = &lli->lli_open_fd_write_count;
196 } else if (flags & FMODE_EXEC) {
197 och_p = &lli->lli_mds_exec_och;
198 och_usecount = &lli->lli_open_fd_exec_count;
200 LASSERT(flags & FMODE_READ);
201 och_p = &lli->lli_mds_read_och;
202 och_usecount = &lli->lli_open_fd_read_count;
205 down(&lli->lli_och_sem);
206 if (*och_usecount) { /* There are still users of this handle, so
208 up(&lli->lli_och_sem);
213 up(&lli->lli_och_sem);
215 if (och) { /* There might be a race and somebody have freed this och
217 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
224 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
227 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
228 struct ll_inode_info *lli = ll_i2info(inode);
232 /* clear group lock, if present */
233 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
234 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
236 /* Let's see if we have good enough OPEN lock on the file and if
237 we can skip talking to MDS */
238 if (file->f_dentry->d_inode) { /* Can this ever be false? */
240 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
241 struct lustre_handle lockh;
242 struct inode *inode = file->f_dentry->d_inode;
243 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
245 down(&lli->lli_och_sem);
246 if (fd->fd_omode & FMODE_WRITE) {
248 LASSERT(lli->lli_open_fd_write_count);
249 lli->lli_open_fd_write_count--;
250 } else if (fd->fd_omode & FMODE_EXEC) {
252 LASSERT(lli->lli_open_fd_exec_count);
253 lli->lli_open_fd_exec_count--;
256 LASSERT(lli->lli_open_fd_read_count);
257 lli->lli_open_fd_read_count--;
259 up(&lli->lli_och_sem);
261 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
262 LDLM_IBITS, &policy, lockmode,
264 rc = ll_md_real_close(file->f_dentry->d_inode,
268 CERROR("Releasing a file %p with negative dentry %p. Name %s",
269 file, file->f_dentry, file->f_dentry->d_name.name);
272 LUSTRE_FPRIVATE(file) = NULL;
273 ll_file_data_put(fd);
274 ll_capa_close(inode);
279 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
281 /* While this returns an error code, fput() the caller does not, so we need
282 * to make every effort to clean up all of our state here. Also, applications
283 * rarely check close errors and even if an error is returned they will not
284 * re-try the close call.
286 int ll_file_release(struct inode *inode, struct file *file)
288 struct ll_file_data *fd;
289 struct ll_sb_info *sbi = ll_i2sbi(inode);
290 struct ll_inode_info *lli = ll_i2info(inode);
291 struct lov_stripe_md *lsm = lli->lli_smd;
295 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
296 inode->i_generation, inode);
298 #ifdef CONFIG_FS_POSIX_ACL
299 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
300 inode == inode->i_sb->s_root->d_inode) {
301 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
304 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
305 fd->fd_flags &= ~LL_FILE_RMTACL;
306 rct_del(&sbi->ll_rct, cfs_curproc_pid());
307 et_search_free(&sbi->ll_et, cfs_curproc_pid());
312 if (inode->i_sb->s_root != file->f_dentry)
313 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
314 fd = LUSTRE_FPRIVATE(file);
317 /* The last ref on @file, maybe not the the owner pid of statahead.
318 * Different processes can open the same dir, "ll_opendir_key" means:
319 * it is me that should stop the statahead thread. */
320 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
321 ll_stop_statahead(inode, lli->lli_opendir_key);
323 if (inode->i_sb->s_root == file->f_dentry) {
324 LUSTRE_FPRIVATE(file) = NULL;
325 ll_file_data_put(fd);
330 lov_test_and_clear_async_rc(lsm);
331 lli->lli_async_rc = 0;
333 rc = ll_md_close(sbi->ll_md_exp, inode, file);
337 static int ll_intent_file_open(struct file *file, void *lmm,
338 int lmmsize, struct lookup_intent *itp)
340 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
341 struct dentry *parent = file->f_dentry->d_parent;
342 const char *name = file->f_dentry->d_name.name;
343 const int len = file->f_dentry->d_name.len;
344 struct md_op_data *op_data;
345 struct ptlrpc_request *req;
352 /* Usually we come here only for NFSD, and we want open lock.
353 But we can also get here with pre 2.6.15 patchless kernels, and in
354 that case that lock is also ok */
355 /* We can also get here if there was cached open handle in revalidate_it
356 * but it disappeared while we were getting from there to ll_file_open.
357 * But this means this file was closed and immediatelly opened which
358 * makes a good candidate for using OPEN lock */
359 /* If lmmsize & lmm are not 0, we are just setting stripe info
360 * parameters. No need for the open lock */
361 if (!lmm && !lmmsize)
362 itp->it_flags |= MDS_OPEN_LOCK;
364 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
365 file->f_dentry->d_inode, name, len,
366 O_RDWR, LUSTRE_OPC_ANY, NULL);
368 RETURN(PTR_ERR(op_data));
370 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
371 0 /*unused */, &req, ll_md_blocking_ast, 0);
372 ll_finish_md_op_data(op_data);
374 /* reason for keep own exit path - don`t flood log
375 * with messages with -ESTALE errors.
377 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
378 it_open_error(DISP_OPEN_OPEN, itp))
380 ll_release_openhandle(file->f_dentry, itp);
384 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
385 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
386 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
390 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
391 if (!rc && itp->d.lustre.it_lock_mode)
392 md_set_lock_data(sbi->ll_md_exp,
393 &itp->d.lustre.it_lock_handle,
394 file->f_dentry->d_inode, NULL);
397 ptlrpc_req_finished(itp->d.lustre.it_data);
398 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
399 ll_intent_drop_lock(itp);
404 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
406 if (ioepoch && lli->lli_ioepoch != ioepoch) {
407 lli->lli_ioepoch = ioepoch;
408 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
409 ioepoch, PFID(&lli->lli_fid));
413 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
414 struct lookup_intent *it, struct obd_client_handle *och)
416 struct ptlrpc_request *req = it->d.lustre.it_data;
417 struct mdt_body *body;
421 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
422 LASSERT(body != NULL); /* reply already checked out */
424 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
425 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
426 och->och_fid = lli->lli_fid;
427 och->och_flags = it->it_flags;
428 ll_ioepoch_open(lli, body->ioepoch);
430 return md_set_open_replay_data(md_exp, och, req);
433 int ll_local_open(struct file *file, struct lookup_intent *it,
434 struct ll_file_data *fd, struct obd_client_handle *och)
436 struct inode *inode = file->f_dentry->d_inode;
437 struct ll_inode_info *lli = ll_i2info(inode);
440 LASSERT(!LUSTRE_FPRIVATE(file));
445 struct ptlrpc_request *req = it->d.lustre.it_data;
446 struct mdt_body *body;
449 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
453 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
454 if ((it->it_flags & FMODE_WRITE) &&
455 (body->valid & OBD_MD_FLSIZE))
456 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
457 lli->lli_ioepoch, PFID(&lli->lli_fid));
460 LUSTRE_FPRIVATE(file) = fd;
461 ll_readahead_init(inode, &fd->fd_ras);
462 fd->fd_omode = it->it_flags;
466 /* Open a file, and (for the very first open) create objects on the OSTs at
467 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
468 * creation or open until ll_lov_setstripe() ioctl is called. We grab
469 * lli_open_sem to ensure no other process will create objects, send the
470 * stripe MD to the MDS, or try to destroy the objects if that fails.
472 * If we already have the stripe MD locally then we don't request it in
473 * md_open(), by passing a lmm_size = 0.
475 * It is up to the application to ensure no other processes open this file
476 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
477 * used. We might be able to avoid races of that sort by getting lli_open_sem
478 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
479 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
481 int ll_file_open(struct inode *inode, struct file *file)
483 struct ll_inode_info *lli = ll_i2info(inode);
484 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
485 .it_flags = file->f_flags };
486 struct lov_stripe_md *lsm;
487 struct ptlrpc_request *req = NULL;
488 struct obd_client_handle **och_p;
490 struct ll_file_data *fd;
491 int rc = 0, opendir_set = 0;
494 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
495 inode->i_generation, inode, file->f_flags);
497 #ifdef HAVE_VFS_INTENT_PATCHES
500 it = file->private_data; /* XXX: compat macro */
501 file->private_data = NULL; /* prevent ll_local_open assertion */
504 fd = ll_file_data_get();
509 if (S_ISDIR(inode->i_mode)) {
510 spin_lock(&lli->lli_lock);
511 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
512 LASSERT(lli->lli_sai == NULL);
513 lli->lli_opendir_key = fd;
514 lli->lli_opendir_pid = cfs_curproc_pid();
517 spin_unlock(&lli->lli_lock);
520 if (inode->i_sb->s_root == file->f_dentry) {
521 LUSTRE_FPRIVATE(file) = fd;
525 if (!it || !it->d.lustre.it_disposition) {
526 /* Convert f_flags into access mode. We cannot use file->f_mode,
527 * because everything but O_ACCMODE mask was stripped from
529 if ((oit.it_flags + 1) & O_ACCMODE)
531 if (file->f_flags & O_TRUNC)
532 oit.it_flags |= FMODE_WRITE;
534 /* kernel only call f_op->open in dentry_open. filp_open calls
535 * dentry_open after call to open_namei that checks permissions.
536 * Only nfsd_open call dentry_open directly without checking
537 * permissions and because of that this code below is safe. */
538 if (oit.it_flags & FMODE_WRITE)
539 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
541 /* We do not want O_EXCL here, presumably we opened the file
542 * already? XXX - NFS implications? */
543 oit.it_flags &= ~O_EXCL;
549 /* Let's see if we have file open on MDS already. */
550 if (it->it_flags & FMODE_WRITE) {
551 och_p = &lli->lli_mds_write_och;
552 och_usecount = &lli->lli_open_fd_write_count;
553 } else if (it->it_flags & FMODE_EXEC) {
554 och_p = &lli->lli_mds_exec_och;
555 och_usecount = &lli->lli_open_fd_exec_count;
557 och_p = &lli->lli_mds_read_och;
558 och_usecount = &lli->lli_open_fd_read_count;
561 down(&lli->lli_och_sem);
562 if (*och_p) { /* Open handle is present */
563 if (it_disposition(it, DISP_OPEN_OPEN)) {
564 /* Well, there's extra open request that we do not need,
565 let's close it somehow. This will decref request. */
566 rc = it_open_error(DISP_OPEN_OPEN, it);
568 up(&lli->lli_och_sem);
569 ll_file_data_put(fd);
570 GOTO(out_openerr, rc);
572 ll_release_openhandle(file->f_dentry, it);
573 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
578 rc = ll_local_open(file, it, fd, NULL);
581 up(&lli->lli_och_sem);
582 ll_file_data_put(fd);
583 GOTO(out_openerr, rc);
586 LASSERT(*och_usecount == 0);
587 if (!it->d.lustre.it_disposition) {
588 /* We cannot just request lock handle now, new ELC code
589 means that one of other OPEN locks for this file
590 could be cancelled, and since blocking ast handler
591 would attempt to grab och_sem as well, that would
592 result in a deadlock */
593 up(&lli->lli_och_sem);
594 it->it_create_mode |= M_CHECK_STALE;
595 rc = ll_intent_file_open(file, NULL, 0, it);
596 it->it_create_mode &= ~M_CHECK_STALE;
598 ll_file_data_put(fd);
599 GOTO(out_openerr, rc);
602 /* Got some error? Release the request */
603 if (it->d.lustre.it_status < 0) {
604 req = it->d.lustre.it_data;
605 ptlrpc_req_finished(req);
609 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
611 ll_file_data_put(fd);
612 GOTO(out_och_free, rc = -ENOMEM);
615 req = it->d.lustre.it_data;
617 /* md_intent_lock() didn't get a request ref if there was an
618 * open error, so don't do cleanup on the request here
620 /* XXX (green): Should not we bail out on any error here, not
621 * just open error? */
622 rc = it_open_error(DISP_OPEN_OPEN, it);
624 ll_file_data_put(fd);
625 GOTO(out_och_free, rc);
628 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
629 rc = ll_local_open(file, it, fd, *och_p);
631 ll_file_data_put(fd);
632 GOTO(out_och_free, rc);
635 up(&lli->lli_och_sem);
637 /* Must do this outside lli_och_sem lock to prevent deadlock where
638 different kind of OPEN lock for this same inode gets cancelled
639 by ldlm_cancel_lru */
640 if (!S_ISREG(inode->i_mode))
647 if (file->f_flags & O_LOV_DELAY_CREATE ||
648 !(file->f_mode & FMODE_WRITE)) {
649 CDEBUG(D_INODE, "object creation was delayed\n");
653 file->f_flags &= ~O_LOV_DELAY_CREATE;
656 ptlrpc_req_finished(req);
658 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
662 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
663 *och_p = NULL; /* OBD_FREE writes some magic there */
666 up(&lli->lli_och_sem);
668 if (opendir_set != 0)
669 ll_stop_statahead(inode, lli->lli_opendir_key);
675 /* Fills the obdo with the attributes for the lsm */
676 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
677 struct obd_capa *capa, struct obdo *obdo)
679 struct ptlrpc_request_set *set;
680 struct obd_info oinfo = { { { 0 } } };
685 LASSERT(lsm != NULL);
689 oinfo.oi_oa->o_id = lsm->lsm_object_id;
690 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
691 oinfo.oi_oa->o_mode = S_IFREG;
692 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
693 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
694 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
695 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
697 oinfo.oi_capa = capa;
699 set = ptlrpc_prep_set();
701 CERROR("can't allocate ptlrpc set\n");
704 rc = obd_getattr_async(exp, &oinfo, set);
706 rc = ptlrpc_set_wait(set);
707 ptlrpc_set_destroy(set);
710 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
711 OBD_MD_FLATIME | OBD_MD_FLMTIME |
712 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
716 /* Fills the obdo with the attributes for the inode defined by lsm */
717 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
719 struct ll_inode_info *lli = ll_i2info(inode);
720 struct obd_capa *capa = ll_mdscapa_get(inode);
724 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
727 obdo_refresh_inode(inode, obdo, obdo->o_valid);
729 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
730 lli->lli_smd->lsm_object_id, i_size_read(inode),
731 (unsigned long long)inode->i_blocks,
732 (unsigned long)ll_inode_blksize(inode));
737 int ll_merge_lvb(struct inode *inode)
739 struct ll_inode_info *lli = ll_i2info(inode);
740 struct ll_sb_info *sbi = ll_i2sbi(inode);
746 ll_inode_size_lock(inode, 1);
747 inode_init_lvb(inode, &lvb);
748 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
749 i_size_write(inode, lvb.lvb_size);
750 inode->i_blocks = lvb.lvb_blocks;
752 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
753 LTIME_S(inode->i_atime) = lvb.lvb_atime;
754 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
755 ll_inode_size_unlock(inode, 1);
760 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
763 struct obdo obdo = { 0 };
766 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
768 st->st_size = obdo.o_size;
769 st->st_blocks = obdo.o_blocks;
770 st->st_mtime = obdo.o_mtime;
771 st->st_atime = obdo.o_atime;
772 st->st_ctime = obdo.o_ctime;
777 void ll_io_init(struct cl_io *io, const struct file *file, int write)
779 struct inode *inode = file->f_dentry->d_inode;
781 memset(io, 0, sizeof *io);
782 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
784 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
785 io->ci_obj = ll_i2info(inode)->lli_clob;
786 io->ci_lockreq = CILR_MAYBE;
787 if (ll_file_nolock(file)) {
788 io->ci_lockreq = CILR_NEVER;
789 io->ci_no_srvlock = 1;
790 } else if (file->f_flags & O_APPEND) {
791 io->ci_lockreq = CILR_MANDATORY;
795 static ssize_t ll_file_io_generic(const struct lu_env *env,
796 struct ccc_io_args *args, struct file *file,
797 enum cl_io_type iot, loff_t *ppos, size_t count)
803 io = &ccc_env_info(env)->cti_io;
804 ll_io_init(io, file, iot == CIT_WRITE);
807 io->u.ci_rd.rd_is_sendfile = args->cia_is_sendfile;
809 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
810 struct vvp_io *vio = vvp_env_io(env);
811 struct ccc_io *cio = ccc_env_io(env);
812 if (cl_io_is_sendfile(io)) {
813 vio->u.read.cui_actor = args->cia_actor;
814 vio->u.read.cui_target = args->cia_target;
816 cio->cui_iov = args->cia_iov;
817 cio->cui_nrsegs = args->cia_nrsegs;
818 #ifndef HAVE_FILE_WRITEV
819 cio->cui_iocb = args->cia_iocb;
822 cio->cui_fd = LUSTRE_FPRIVATE(file);
823 result = cl_io_loop(env, io);
825 /* cl_io_rw_init() handled IO */
826 result = io->ci_result;
827 if (io->ci_nob > 0) {
829 *ppos = io->u.ci_wr.wr.crw_pos;
837 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
839 static int ll_file_get_iov_count(const struct iovec *iov,
840 unsigned long *nr_segs, size_t *count)
845 for (seg = 0; seg < *nr_segs; seg++) {
846 const struct iovec *iv = &iov[seg];
849 * If any segment has a negative length, or the cumulative
850 * length ever wraps negative then return -EINVAL.
853 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
855 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
860 cnt -= iv->iov_len; /* This segment is no good */
867 #ifdef HAVE_FILE_READV
868 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
869 unsigned long nr_segs, loff_t *ppos)
872 struct ccc_io_args *args;
878 result = ll_file_get_iov_count(iov, &nr_segs, &count);
882 env = cl_env_get(&refcheck);
884 RETURN(PTR_ERR(env));
886 args = &vvp_env_info(env)->vti_args;
887 args->cia_is_sendfile = 0;
888 args->cia_iov = (struct iovec *)iov;
889 args->cia_nrsegs = nr_segs;
890 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
891 cl_env_put(env, &refcheck);
895 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
899 struct iovec *local_iov;
904 env = cl_env_get(&refcheck);
906 RETURN(PTR_ERR(env));
908 local_iov = &vvp_env_info(env)->vti_local_iov;
909 local_iov->iov_base = (void __user *)buf;
910 local_iov->iov_len = count;
911 result = ll_file_readv(file, local_iov, 1, ppos);
912 cl_env_put(env, &refcheck);
917 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
918 unsigned long nr_segs, loff_t pos)
921 struct ccc_io_args *args;
927 result = ll_file_get_iov_count(iov, &nr_segs, &count);
931 env = cl_env_get(&refcheck);
933 RETURN(PTR_ERR(env));
935 args = &vvp_env_info(env)->vti_args;
936 args->cia_is_sendfile = 0;
937 args->cia_iov = (struct iovec *)iov;
938 args->cia_nrsegs = nr_segs;
939 args->cia_iocb = iocb;
940 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
941 &iocb->ki_pos, count);
942 cl_env_put(env, &refcheck);
946 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
950 struct iovec *local_iov;
956 env = cl_env_get(&refcheck);
958 RETURN(PTR_ERR(env));
960 local_iov = &vvp_env_info(env)->vti_local_iov;
961 kiocb = &vvp_env_info(env)->vti_kiocb;
962 local_iov->iov_base = (void __user *)buf;
963 local_iov->iov_len = count;
964 init_sync_kiocb(kiocb, file);
965 kiocb->ki_pos = *ppos;
966 kiocb->ki_left = count;
968 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
969 *ppos = kiocb->ki_pos;
971 cl_env_put(env, &refcheck);
977 * Write to a file (through the page cache).
979 #ifdef HAVE_FILE_WRITEV
980 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
981 unsigned long nr_segs, loff_t *ppos)
984 struct ccc_io_args *args;
990 result = ll_file_get_iov_count(iov, &nr_segs, &count);
994 env = cl_env_get(&refcheck);
996 RETURN(PTR_ERR(env));
998 args = &vvp_env_info(env)->vti_args;
999 args->cia_iov = (struct iovec *)iov;
1000 args->cia_nrsegs = nr_segs;
1001 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1002 cl_env_put(env, &refcheck);
1006 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1010 struct iovec *local_iov;
1015 env = cl_env_get(&refcheck);
1017 RETURN(PTR_ERR(env));
1019 local_iov = &vvp_env_info(env)->vti_local_iov;
1020 local_iov->iov_base = (void __user *)buf;
1021 local_iov->iov_len = count;
1023 result = ll_file_writev(file, local_iov, 1, ppos);
1024 cl_env_put(env, &refcheck);
1028 #else /* AIO stuff */
1029 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1030 unsigned long nr_segs, loff_t pos)
1033 struct ccc_io_args *args;
1039 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1043 env = cl_env_get(&refcheck);
1045 RETURN(PTR_ERR(env));
1047 args = &vvp_env_info(env)->vti_args;
1048 args->cia_iov = (struct iovec *)iov;
1049 args->cia_nrsegs = nr_segs;
1050 args->cia_iocb = iocb;
1051 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1052 &iocb->ki_pos, count);
1053 cl_env_put(env, &refcheck);
1057 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1061 struct iovec *local_iov;
1062 struct kiocb *kiocb;
1067 env = cl_env_get(&refcheck);
1069 RETURN(PTR_ERR(env));
1071 local_iov = &vvp_env_info(env)->vti_local_iov;
1072 kiocb = &vvp_env_info(env)->vti_kiocb;
1073 local_iov->iov_base = (void __user *)buf;
1074 local_iov->iov_len = count;
1075 init_sync_kiocb(kiocb, file);
1076 kiocb->ki_pos = *ppos;
1077 kiocb->ki_left = count;
1079 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1080 *ppos = kiocb->ki_pos;
1082 cl_env_put(env, &refcheck);
1089 * Send file content (through pagecache) somewhere with helper
1091 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1092 read_actor_t actor, void *target)
1095 struct ccc_io_args *args;
1100 env = cl_env_get(&refcheck);
1102 RETURN(PTR_ERR(env));
1104 args = &vvp_env_info(env)->vti_args;
1105 args->cia_is_sendfile = 1;
1106 args->cia_target = target;
1107 args->cia_actor = actor;
1108 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1109 cl_env_put(env, &refcheck);
1113 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1116 struct obd_export *exp = ll_i2dtexp(inode);
1117 struct ll_recreate_obj ucreatp;
1118 struct obd_trans_info oti = { 0 };
1119 struct obdo *oa = NULL;
1122 struct lov_stripe_md *lsm, *lsm2;
1125 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1128 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1129 sizeof(struct ll_recreate_obj)))
1136 ll_inode_size_lock(inode, 0);
1137 lsm = ll_i2info(inode)->lli_smd;
1139 GOTO(out, rc = -ENOENT);
1140 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1141 (lsm->lsm_stripe_count));
1143 OBD_ALLOC(lsm2, lsm_size);
1145 GOTO(out, rc = -ENOMEM);
1147 oa->o_id = ucreatp.lrc_id;
1148 oa->o_gr = ucreatp.lrc_group;
1149 oa->o_nlink = ucreatp.lrc_ost_idx;
1150 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1151 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1152 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1153 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1155 memcpy(lsm2, lsm, lsm_size);
1156 rc = obd_create(exp, oa, &lsm2, &oti);
1158 OBD_FREE(lsm2, lsm_size);
1161 ll_inode_size_unlock(inode, 0);
1166 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1167 int flags, struct lov_user_md *lum, int lum_size)
1169 struct lov_stripe_md *lsm;
1170 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1174 ll_inode_size_lock(inode, 0);
1175 lsm = ll_i2info(inode)->lli_smd;
1177 ll_inode_size_unlock(inode, 0);
1178 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1183 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1186 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1187 GOTO(out_req_free, rc = -ENOENT);
1188 rc = oit.d.lustre.it_status;
1190 GOTO(out_req_free, rc);
1192 ll_release_openhandle(file->f_dentry, &oit);
1195 ll_inode_size_unlock(inode, 0);
1196 ll_intent_release(&oit);
1199 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1203 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1204 struct lov_mds_md **lmmp, int *lmm_size,
1205 struct ptlrpc_request **request)
1207 struct ll_sb_info *sbi = ll_i2sbi(inode);
1208 struct mdt_body *body;
1209 struct lov_mds_md *lmm = NULL;
1210 struct ptlrpc_request *req = NULL;
1211 struct obd_capa *oc;
1214 rc = ll_get_max_mdsize(sbi, &lmmsize);
1218 oc = ll_mdscapa_get(inode);
1219 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1220 oc, filename, strlen(filename) + 1,
1221 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1222 ll_i2suppgid(inode), &req);
1225 CDEBUG(D_INFO, "md_getattr_name failed "
1226 "on %s: rc %d\n", filename, rc);
1230 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1231 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1233 lmmsize = body->eadatasize;
1235 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1237 GOTO(out, rc = -ENODATA);
1240 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1241 LASSERT(lmm != NULL);
1243 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1244 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
1245 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
1246 GOTO(out, rc = -EPROTO);
1250 * This is coming from the MDS, so is probably in
1251 * little endian. We convert it to host endian before
1252 * passing it to userspace.
1254 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1255 /* if function called for directory - we should
1256 * avoid swab not existent lsm objects */
1257 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1258 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1259 if (S_ISREG(body->mode))
1260 lustre_swab_lov_user_md_objects(
1261 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1262 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1263 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1264 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1265 if (S_ISREG(body->mode))
1266 lustre_swab_lov_user_md_objects(
1267 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1268 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1269 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
1270 lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
1274 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
1275 struct lov_stripe_md *lsm;
1276 struct lov_user_md_join *lmj;
1277 int lmj_size, i, aindex = 0;
1279 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1281 GOTO(out, rc = -ENOMEM);
1282 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm);
1284 GOTO(out_free_memmd, rc);
1286 lmj_size = sizeof(struct lov_user_md_join) +
1287 lsm->lsm_stripe_count *
1288 sizeof(struct lov_user_ost_data_join);
1289 OBD_ALLOC(lmj, lmj_size);
1291 GOTO(out_free_memmd, rc = -ENOMEM);
1293 memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
1294 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1295 struct lov_extent *lex =
1296 &lsm->lsm_array->lai_ext_array[aindex];
1298 if (lex->le_loi_idx + lex->le_stripe_count <= i)
1300 CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
1301 LPU64" len %d\n", aindex, i,
1302 lex->le_start, (int)lex->le_len);
1303 lmj->lmm_objects[i].l_extent_start =
1306 if ((int)lex->le_len == -1)
1307 lmj->lmm_objects[i].l_extent_end = -1;
1309 lmj->lmm_objects[i].l_extent_end =
1310 lex->le_start + lex->le_len;
1311 lmj->lmm_objects[i].l_object_id =
1312 lsm->lsm_oinfo[i]->loi_id;
1313 lmj->lmm_objects[i].l_object_gr =
1314 lsm->lsm_oinfo[i]->loi_gr;
1315 lmj->lmm_objects[i].l_ost_gen =
1316 lsm->lsm_oinfo[i]->loi_ost_gen;
1317 lmj->lmm_objects[i].l_ost_idx =
1318 lsm->lsm_oinfo[i]->loi_ost_idx;
1320 lmm = (struct lov_mds_md *)lmj;
1323 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1327 *lmm_size = lmmsize;
1332 static int ll_lov_setea(struct inode *inode, struct file *file,
1335 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1336 struct lov_user_md *lump;
1337 int lum_size = sizeof(struct lov_user_md) +
1338 sizeof(struct lov_user_ost_data);
1342 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1345 OBD_ALLOC(lump, lum_size);
1349 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1350 OBD_FREE(lump, lum_size);
1354 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1356 OBD_FREE(lump, lum_size);
1360 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1363 struct lov_user_md_v3 lumv3;
1364 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1365 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1366 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1369 int flags = FMODE_WRITE;
1372 /* first try with v1 which is smaller than v3 */
1373 lum_size = sizeof(struct lov_user_md_v1);
1374 if (copy_from_user(lumv1, lumv1p, lum_size))
1377 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1378 lum_size = sizeof(struct lov_user_md_v3);
1379 if (copy_from_user(&lumv3, lumv3p, lum_size))
1383 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1385 put_user(0, &lumv1p->lmm_stripe_count);
1386 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1387 0, ll_i2info(inode)->lli_smd,
1393 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1395 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1400 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1404 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1406 struct ll_inode_info *lli = ll_i2info(inode);
1407 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1408 struct ccc_grouplock grouplock;
1412 if (ll_file_nolock(file))
1413 RETURN(-EOPNOTSUPP);
1415 spin_lock(&lli->lli_lock);
1416 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1417 CERROR("group lock already existed with gid %lu\n",
1418 fd->fd_grouplock.cg_gid);
1419 spin_unlock(&lli->lli_lock);
1422 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1423 spin_unlock(&lli->lli_lock);
1425 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1426 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1430 spin_lock(&lli->lli_lock);
1431 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1432 spin_unlock(&lli->lli_lock);
1433 CERROR("another thread just won the race\n");
1434 cl_put_grouplock(&grouplock);
1438 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1439 fd->fd_grouplock = grouplock;
1440 spin_unlock(&lli->lli_lock);
1442 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1446 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1448 struct ll_inode_info *lli = ll_i2info(inode);
1449 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1450 struct ccc_grouplock grouplock;
1453 spin_lock(&lli->lli_lock);
1454 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1455 spin_unlock(&lli->lli_lock);
1456 CERROR("no group lock held\n");
1459 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1461 if (fd->fd_grouplock.cg_gid != arg) {
1462 CERROR("group lock %lu doesn't match current id %lu\n",
1463 arg, fd->fd_grouplock.cg_gid);
1464 spin_unlock(&lli->lli_lock);
1468 grouplock = fd->fd_grouplock;
1469 fd->fd_grouplock.cg_env = NULL;
1470 fd->fd_grouplock.cg_lock = NULL;
1471 fd->fd_grouplock.cg_gid = 0;
1472 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1473 spin_unlock(&lli->lli_lock);
1475 cl_put_grouplock(&grouplock);
1476 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1480 #if LUSTRE_FIX >= 50
1481 static int join_sanity_check(struct inode *head, struct inode *tail)
1484 if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
1485 CERROR("server do not support join \n");
1488 if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
1489 CERROR("tail ino %lu and ino head %lu must be regular\n",
1490 head->i_ino, tail->i_ino);
1493 if (head->i_ino == tail->i_ino) {
1494 CERROR("file %lu can not be joined to itself \n", head->i_ino);
1497 if (i_size_read(head) % JOIN_FILE_ALIGN) {
1498 CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
1504 static int join_file(struct inode *head_inode, struct file *head_filp,
1505 struct file *tail_filp)
1507 struct dentry *tail_dentry = tail_filp->f_dentry;
1508 struct lookup_intent oit = {.it_op = IT_OPEN,
1509 .it_flags = head_filp->f_flags,
1510 .it_create_mode = M_JOIN_FILE};
1511 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW,
1512 ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1514 struct lustre_handle lockh;
1515 struct md_op_data *op_data;
1520 tail_dentry = tail_filp->f_dentry;
1522 data = i_size_read(head_inode);
1523 op_data = ll_prep_md_op_data(NULL, head_inode,
1524 tail_dentry->d_parent->d_inode,
1525 tail_dentry->d_name.name,
1526 tail_dentry->d_name.len, 0,
1527 LUSTRE_OPC_ANY, &data);
1528 if (IS_ERR(op_data))
1529 RETURN(PTR_ERR(op_data));
1531 rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit,
1532 op_data, &lockh, NULL, 0, NULL, 0);
1534 ll_finish_md_op_data(op_data);
1538 rc = oit.d.lustre.it_status;
1540 if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
1541 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
1542 ptlrpc_req_finished((struct ptlrpc_request *)
1543 oit.d.lustre.it_data);
1547 if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
1549 ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
1550 oit.d.lustre.it_lock_mode = 0;
1552 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1553 it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
1554 ll_release_openhandle(head_filp->f_dentry, &oit);
1556 ll_intent_release(&oit);
1560 static int ll_file_join(struct inode *head, struct file *filp,
1561 char *filename_tail)
1563 struct inode *tail = NULL, *first = NULL, *second = NULL;
1564 struct dentry *tail_dentry;
1565 struct file *tail_filp, *first_filp, *second_filp;
1566 struct ll_lock_tree first_tree, second_tree;
1567 struct ll_lock_tree_node *first_node, *second_node;
1568 struct ll_inode_info *hlli = ll_i2info(head);
1569 int rc = 0, cleanup_phase = 0;
1572 CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
1573 head->i_ino, head->i_generation, head, filename_tail);
1575 tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
1576 if (IS_ERR(tail_filp)) {
1577 CERROR("Can not open tail file %s", filename_tail);
1578 rc = PTR_ERR(tail_filp);
1581 tail = igrab(tail_filp->f_dentry->d_inode);
1583 tail_dentry = tail_filp->f_dentry;
1584 LASSERT(tail_dentry);
1587 /*reorder the inode for lock sequence*/
1588 first = head->i_ino > tail->i_ino ? head : tail;
1589 second = head->i_ino > tail->i_ino ? tail : head;
1590 first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
1591 second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
1593 CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
1594 head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
1595 first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
1596 if (IS_ERR(first_node)){
1597 rc = PTR_ERR(first_node);
1600 first_tree.lt_fd = first_filp->private_data;
1601 rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
1606 second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
1607 if (IS_ERR(second_node)){
1608 rc = PTR_ERR(second_node);
1611 second_tree.lt_fd = second_filp->private_data;
1612 rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
1617 rc = join_sanity_check(head, tail);
1621 rc = join_file(head, filp, tail_filp);
1625 switch (cleanup_phase) {
1627 ll_tree_unlock(&second_tree);
1628 obd_cancel_unused(ll_i2dtexp(second),
1629 ll_i2info(second)->lli_smd, 0, NULL);
1631 ll_tree_unlock(&first_tree);
1632 obd_cancel_unused(ll_i2dtexp(first),
1633 ll_i2info(first)->lli_smd, 0, NULL);
1635 filp_close(tail_filp, 0);
1638 if (head && rc == 0) {
1639 obd_free_memmd(ll_i2sbi(head)->ll_dt_exp,
1641 hlli->lli_smd = NULL;
1646 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
1651 #endif /* LUSTRE_FIX >= 50 */
1654 * Close inode open handle
1656 * \param dentry [in] dentry which contains the inode
1657 * \param it [in,out] intent which contains open info and result
1660 * \retval <0 failure
1662 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1664 struct inode *inode = dentry->d_inode;
1665 struct obd_client_handle *och;
1671 /* Root ? Do nothing. */
1672 if (dentry->d_inode->i_sb->s_root == dentry)
1675 /* No open handle to close? Move away */
1676 if (!it_disposition(it, DISP_OPEN_OPEN))
1679 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1681 OBD_ALLOC(och, sizeof(*och));
1683 GOTO(out, rc = -ENOMEM);
1685 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1686 ll_i2info(inode), it, och);
1688 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1691 /* this one is in place of ll_file_open */
1692 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1693 ptlrpc_req_finished(it->d.lustre.it_data);
1694 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1699 * Get size for inode for which FIEMAP mapping is requested.
1700 * Make the FIEMAP get_info call and returns the result.
1702 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1705 struct obd_export *exp = ll_i2dtexp(inode);
1706 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1707 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1708 int vallen = num_bytes;
1712 /* If the stripe_count > 1 and the application does not understand
1713 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1715 if (lsm->lsm_stripe_count > 1 &&
1716 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1719 fm_key.oa.o_id = lsm->lsm_object_id;
1720 fm_key.oa.o_gr = lsm->lsm_object_gr;
1721 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1723 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1726 /* If filesize is 0, then there would be no objects for mapping */
1727 if (fm_key.oa.o_size == 0) {
1728 fiemap->fm_mapped_extents = 0;
1732 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1734 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1736 CERROR("obd_get_info failed: rc = %d\n", rc);
1741 int ll_fid2path(struct obd_export *exp, void *arg)
1743 struct getinfo_fid2path *gfout, *gfin;
1747 /* Need to get the buflen */
1748 OBD_ALLOC_PTR(gfin);
1751 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1756 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1757 OBD_ALLOC(gfout, outsize);
1758 if (gfout == NULL) {
1762 memcpy(gfout, gfin, sizeof(*gfout));
1765 /* Call mdc_iocontrol */
1766 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1769 if (copy_to_user(arg, gfout, outsize))
1773 OBD_FREE(gfout, outsize);
1777 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1780 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1784 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1785 inode->i_generation, inode, cmd);
1786 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1788 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1789 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1793 case LL_IOC_GETFLAGS:
1794 /* Get the current value of the file flags */
1795 return put_user(fd->fd_flags, (int *)arg);
1796 case LL_IOC_SETFLAGS:
1797 case LL_IOC_CLRFLAGS:
1798 /* Set or clear specific file flags */
1799 /* XXX This probably needs checks to ensure the flags are
1800 * not abused, and to handle any flag side effects.
1802 if (get_user(flags, (int *) arg))
1805 if (cmd == LL_IOC_SETFLAGS) {
1806 if ((flags & LL_FILE_IGNORE_LOCK) &&
1807 !(file->f_flags & O_DIRECT)) {
1808 CERROR("%s: unable to disable locking on "
1809 "non-O_DIRECT file\n", current->comm);
1813 fd->fd_flags |= flags;
1815 fd->fd_flags &= ~flags;
1818 case LL_IOC_LOV_SETSTRIPE:
1819 RETURN(ll_lov_setstripe(inode, file, arg));
1820 case LL_IOC_LOV_SETEA:
1821 RETURN(ll_lov_setea(inode, file, arg));
1822 case LL_IOC_LOV_GETSTRIPE:
1823 RETURN(ll_lov_getstripe(inode, arg));
1824 case LL_IOC_RECREATE_OBJ:
1825 RETURN(ll_lov_recreate_obj(inode, file, arg));
1826 case FSFILT_IOC_FIEMAP: {
1827 struct ll_user_fiemap *fiemap_s;
1828 size_t num_bytes, ret_bytes;
1829 unsigned int extent_count;
1832 /* Get the extent count so we can calculate the size of
1833 * required fiemap buffer */
1834 if (get_user(extent_count,
1835 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1837 num_bytes = sizeof(*fiemap_s) + (extent_count *
1838 sizeof(struct ll_fiemap_extent));
1839 OBD_VMALLOC(fiemap_s, num_bytes);
1840 if (fiemap_s == NULL)
1843 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1845 GOTO(error, rc = -EFAULT);
1847 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1848 fiemap_s->fm_flags = fiemap_s->fm_flags &
1849 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1850 if (copy_to_user((char *)arg, fiemap_s,
1852 GOTO(error, rc = -EFAULT);
1854 GOTO(error, rc = -EBADR);
1857 /* If fm_extent_count is non-zero, read the first extent since
1858 * it is used to calculate end_offset and device from previous
1861 if (copy_from_user(&fiemap_s->fm_extents[0],
1862 (char __user *)arg + sizeof(*fiemap_s),
1863 sizeof(struct ll_fiemap_extent)))
1864 GOTO(error, rc = -EFAULT);
1867 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1870 rc = filemap_fdatawrite(inode->i_mapping);
1875 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1879 ret_bytes = sizeof(struct ll_user_fiemap);
1881 if (extent_count != 0)
1882 ret_bytes += (fiemap_s->fm_mapped_extents *
1883 sizeof(struct ll_fiemap_extent));
1885 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1889 OBD_VFREE(fiemap_s, num_bytes);
1892 case FSFILT_IOC_GETFLAGS:
1893 case FSFILT_IOC_SETFLAGS:
1894 RETURN(ll_iocontrol(inode, file, cmd, arg));
1895 case FSFILT_IOC_GETVERSION_OLD:
1896 case FSFILT_IOC_GETVERSION:
1897 RETURN(put_user(inode->i_generation, (int *)arg));
1899 #if LUSTRE_FIX >= 50
1900 /* Allow file join in beta builds to allow debuggging */
1904 ftail = getname((const char *)arg);
1906 RETURN(PTR_ERR(ftail));
1907 rc = ll_file_join(inode, file, ftail);
1911 CWARN("file join is not supported in this version of Lustre\n");
1915 case LL_IOC_GROUP_LOCK:
1916 RETURN(ll_get_grouplock(inode, file, arg));
1917 case LL_IOC_GROUP_UNLOCK:
1918 RETURN(ll_put_grouplock(inode, file, arg));
1919 case IOC_OBD_STATFS:
1920 RETURN(ll_obd_statfs(inode, (void *)arg));
1922 /* We need to special case any other ioctls we want to handle,
1923 * to send them to the MDS/OST as appropriate and to properly
1924 * network encode the arg field.
1925 case FSFILT_IOC_SETVERSION_OLD:
1926 case FSFILT_IOC_SETVERSION:
1928 case LL_IOC_FLUSHCTX:
1929 RETURN(ll_flush_ctx(inode));
1930 case LL_IOC_PATH2FID: {
1931 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1932 sizeof(struct lu_fid)))
1937 case OBD_IOC_FID2PATH:
1938 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1944 ll_iocontrol_call(inode, file, cmd, arg, &err))
1947 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1953 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1955 struct inode *inode = file->f_dentry->d_inode;
1958 retval = offset + ((origin == 2) ? i_size_read(inode) :
1959 (origin == 1) ? file->f_pos : 0);
1960 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1961 inode->i_ino, inode->i_generation, inode, retval, retval,
1962 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1963 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1965 if (origin == 2) { /* SEEK_END */
1966 int nonblock = 0, rc;
1968 if (file->f_flags & O_NONBLOCK)
1969 nonblock = LDLM_FL_BLOCK_NOWAIT;
1971 rc = cl_glimpse_size(inode);
1975 ll_inode_size_lock(inode, 0);
1976 offset += i_size_read(inode);
1977 ll_inode_size_unlock(inode, 0);
1978 } else if (origin == 1) { /* SEEK_CUR */
1979 offset += file->f_pos;
1983 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1984 if (offset != file->f_pos) {
1985 file->f_pos = offset;
1993 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1995 struct inode *inode = dentry->d_inode;
1996 struct ll_inode_info *lli = ll_i2info(inode);
1997 struct lov_stripe_md *lsm = lli->lli_smd;
1998 struct ptlrpc_request *req;
1999 struct obd_capa *oc;
2002 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2003 inode->i_generation, inode);
2004 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2006 /* fsync's caller has already called _fdata{sync,write}, we want
2007 * that IO to finish before calling the osc and mdc sync methods */
2008 rc = filemap_fdatawait(inode->i_mapping);
2010 /* catch async errors that were recorded back when async writeback
2011 * failed for pages in this mapping. */
2012 err = lli->lli_async_rc;
2013 lli->lli_async_rc = 0;
2017 err = lov_test_and_clear_async_rc(lsm);
2022 oc = ll_mdscapa_get(inode);
2023 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2029 ptlrpc_req_finished(req);
2036 RETURN(rc ? rc : -ENOMEM);
2038 oa->o_id = lsm->lsm_object_id;
2039 oa->o_gr = lsm->lsm_object_gr;
2040 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2041 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
2042 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
2045 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2046 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
2047 0, OBD_OBJECT_EOF, oc);
2057 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2059 struct inode *inode = file->f_dentry->d_inode;
2060 struct ll_sb_info *sbi = ll_i2sbi(inode);
2061 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2062 .ei_cb_cp =ldlm_flock_completion_ast,
2063 .ei_cbdata = file_lock };
2064 struct md_op_data *op_data;
2065 struct lustre_handle lockh = {0};
2066 ldlm_policy_data_t flock;
2071 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2072 inode->i_ino, file_lock);
2074 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2076 if (file_lock->fl_flags & FL_FLOCK) {
2077 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2078 /* set missing params for flock() calls */
2079 file_lock->fl_end = OFFSET_MAX;
2080 file_lock->fl_pid = current->tgid;
2082 flock.l_flock.pid = file_lock->fl_pid;
2083 flock.l_flock.start = file_lock->fl_start;
2084 flock.l_flock.end = file_lock->fl_end;
2086 switch (file_lock->fl_type) {
2088 einfo.ei_mode = LCK_PR;
2091 /* An unlock request may or may not have any relation to
2092 * existing locks so we may not be able to pass a lock handle
2093 * via a normal ldlm_lock_cancel() request. The request may even
2094 * unlock a byte range in the middle of an existing lock. In
2095 * order to process an unlock request we need all of the same
2096 * information that is given with a normal read or write record
2097 * lock request. To avoid creating another ldlm unlock (cancel)
2098 * message we'll treat a LCK_NL flock request as an unlock. */
2099 einfo.ei_mode = LCK_NL;
2102 einfo.ei_mode = LCK_PW;
2105 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2120 flags = LDLM_FL_BLOCK_NOWAIT;
2126 flags = LDLM_FL_TEST_LOCK;
2127 /* Save the old mode so that if the mode in the lock changes we
2128 * can decrement the appropriate reader or writer refcount. */
2129 file_lock->fl_type = einfo.ei_mode;
2132 CERROR("unknown fcntl lock command: %d\n", cmd);
2136 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2137 LUSTRE_OPC_ANY, NULL);
2138 if (IS_ERR(op_data))
2139 RETURN(PTR_ERR(op_data));
2141 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2142 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2143 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2145 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2146 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2148 ll_finish_md_op_data(op_data);
2150 if ((file_lock->fl_flags & FL_FLOCK) &&
2151 (rc == 0 || file_lock->fl_type == F_UNLCK))
2152 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2153 #ifdef HAVE_F_OP_FLOCK
2154 if ((file_lock->fl_flags & FL_POSIX) &&
2155 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2156 !(flags & LDLM_FL_TEST_LOCK))
2157 posix_lock_file_wait(file, file_lock);
2163 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2170 int ll_have_md_lock(struct inode *inode, __u64 bits)
2172 struct lustre_handle lockh;
2173 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2181 fid = &ll_i2info(inode)->lli_fid;
2182 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2184 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2185 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2186 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2192 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2193 struct lustre_handle *lockh)
2195 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2201 fid = &ll_i2info(inode)->lli_fid;
2202 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2204 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2205 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2206 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2210 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2211 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2212 * and return success */
2214 /* This path cannot be hit for regular files unless in
2215 * case of obscure races, so no need to to validate
2217 if (!S_ISREG(inode->i_mode) &&
2218 !S_ISDIR(inode->i_mode))
2223 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2231 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2234 struct inode *inode = dentry->d_inode;
2235 struct ptlrpc_request *req = NULL;
2236 struct ll_sb_info *sbi;
2237 struct obd_export *exp;
2242 CERROR("REPORT THIS LINE TO PETER\n");
2245 sbi = ll_i2sbi(inode);
2247 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2248 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2250 exp = ll_i2mdexp(inode);
2252 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2253 struct lookup_intent oit = { .it_op = IT_GETATTR };
2254 struct md_op_data *op_data;
2256 /* Call getattr by fid, so do not provide name at all. */
2257 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2258 dentry->d_inode, NULL, 0, 0,
2259 LUSTRE_OPC_ANY, NULL);
2260 if (IS_ERR(op_data))
2261 RETURN(PTR_ERR(op_data));
2263 oit.it_create_mode |= M_CHECK_STALE;
2264 rc = md_intent_lock(exp, op_data, NULL, 0,
2265 /* we are not interested in name
2268 ll_md_blocking_ast, 0);
2269 ll_finish_md_op_data(op_data);
2270 oit.it_create_mode &= ~M_CHECK_STALE;
2272 rc = ll_inode_revalidate_fini(inode, rc);
2276 rc = ll_revalidate_it_finish(req, &oit, dentry);
2278 ll_intent_release(&oit);
2282 /* Unlinked? Unhash dentry, so it is not picked up later by
2283 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2284 here to preserve get_cwd functionality on 2.6.
2286 if (!dentry->d_inode->i_nlink) {
2287 spin_lock(&ll_lookup_lock);
2288 spin_lock(&dcache_lock);
2289 ll_drop_dentry(dentry);
2290 spin_unlock(&dcache_lock);
2291 spin_unlock(&ll_lookup_lock);
2294 ll_lookup_finish_locks(&oit, dentry);
2295 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2297 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2298 obd_valid valid = OBD_MD_FLGETATTR;
2299 struct obd_capa *oc;
2302 if (S_ISREG(inode->i_mode)) {
2303 rc = ll_get_max_mdsize(sbi, &ealen);
2306 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2308 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2309 * capa for this inode. Because we only keep capas of dirs
2311 oc = ll_mdscapa_get(inode);
2312 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2316 rc = ll_inode_revalidate_fini(inode, rc);
2320 rc = ll_prep_inode(&inode, req, NULL);
2323 ptlrpc_req_finished(req);
2327 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2332 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2333 MDS_INODELOCK_LOOKUP);
2335 /* if object not yet allocated, don't validate size */
2336 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2339 /* cl_glimpse_size will prefer locally cached writes if they extend
2343 rc = cl_glimpse_size(dentry->d_inode);
2348 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2349 struct lookup_intent *it, struct kstat *stat)
2351 struct inode *inode = de->d_inode;
2354 res = ll_inode_revalidate_it(de, it);
2355 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2360 stat->dev = inode->i_sb->s_dev;
2361 stat->ino = inode->i_ino;
2362 stat->mode = inode->i_mode;
2363 stat->nlink = inode->i_nlink;
2364 stat->uid = inode->i_uid;
2365 stat->gid = inode->i_gid;
2366 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2367 stat->atime = inode->i_atime;
2368 stat->mtime = inode->i_mtime;
2369 stat->ctime = inode->i_ctime;
2370 #ifdef HAVE_INODE_BLKSIZE
2371 stat->blksize = inode->i_blksize;
2373 stat->blksize = 1 << inode->i_blkbits;
2376 ll_inode_size_lock(inode, 0);
2377 stat->size = i_size_read(inode);
2378 stat->blocks = inode->i_blocks;
2379 ll_inode_size_unlock(inode, 0);
2383 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2385 struct lookup_intent it = { .it_op = IT_GETATTR };
2387 return ll_getattr_it(mnt, de, &it, stat);
2391 int lustre_check_acl(struct inode *inode, int mask)
2393 #ifdef CONFIG_FS_POSIX_ACL
2394 struct ll_inode_info *lli = ll_i2info(inode);
2395 struct posix_acl *acl;
2399 spin_lock(&lli->lli_lock);
2400 acl = posix_acl_dup(lli->lli_posix_acl);
2401 spin_unlock(&lli->lli_lock);
2406 rc = posix_acl_permission(inode, acl, mask);
2407 posix_acl_release(acl);
2415 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2416 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2421 /* as root inode are NOT getting validated in lookup operation,
2422 * need to do it before permission check. */
2424 if (inode == inode->i_sb->s_root->d_inode) {
2425 struct lookup_intent it = { .it_op = IT_LOOKUP };
2427 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2428 MDS_INODELOCK_LOOKUP);
2433 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2434 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2436 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2437 return lustre_check_remote_perm(inode, mask);
2439 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2440 rc = generic_permission(inode, mask, lustre_check_acl);
2445 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2447 int mode = inode->i_mode;
2450 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2451 inode->i_ino, inode->i_generation, inode, mask);
2453 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2454 return lustre_check_remote_perm(inode, mask);
2456 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2458 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2459 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2461 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2463 if (current->fsuid == inode->i_uid) {
2466 if (((mode >> 3) & mask & S_IRWXO) != mask)
2468 rc = lustre_check_acl(inode, mask);
2472 goto check_capabilities;
2476 if (in_group_p(inode->i_gid))
2479 if ((mode & mask & S_IRWXO) == mask)
2483 if (!(mask & MAY_EXEC) ||
2484 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2485 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2488 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2489 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2496 #ifdef HAVE_FILE_READV
2497 #define READ_METHOD readv
2498 #define READ_FUNCTION ll_file_readv
2499 #define WRITE_METHOD writev
2500 #define WRITE_FUNCTION ll_file_writev
2502 #define READ_METHOD aio_read
2503 #define READ_FUNCTION ll_file_aio_read
2504 #define WRITE_METHOD aio_write
2505 #define WRITE_FUNCTION ll_file_aio_write
2508 /* -o localflock - only provides locally consistent flock locks */
2509 struct file_operations ll_file_operations = {
2510 .read = ll_file_read,
2511 .READ_METHOD = READ_FUNCTION,
2512 .write = ll_file_write,
2513 .WRITE_METHOD = WRITE_FUNCTION,
2514 .ioctl = ll_file_ioctl,
2515 .open = ll_file_open,
2516 .release = ll_file_release,
2517 .mmap = ll_file_mmap,
2518 .llseek = ll_file_seek,
2519 .sendfile = ll_file_sendfile,
2523 struct file_operations ll_file_operations_flock = {
2524 .read = ll_file_read,
2525 .READ_METHOD = READ_FUNCTION,
2526 .write = ll_file_write,
2527 .WRITE_METHOD = WRITE_FUNCTION,
2528 .ioctl = ll_file_ioctl,
2529 .open = ll_file_open,
2530 .release = ll_file_release,
2531 .mmap = ll_file_mmap,
2532 .llseek = ll_file_seek,
2533 .sendfile = ll_file_sendfile,
2535 #ifdef HAVE_F_OP_FLOCK
2536 .flock = ll_file_flock,
2538 .lock = ll_file_flock
2541 /* These are for -o noflock - to return ENOSYS on flock calls */
2542 struct file_operations ll_file_operations_noflock = {
2543 .read = ll_file_read,
2544 .READ_METHOD = READ_FUNCTION,
2545 .write = ll_file_write,
2546 .WRITE_METHOD = WRITE_FUNCTION,
2547 .ioctl = ll_file_ioctl,
2548 .open = ll_file_open,
2549 .release = ll_file_release,
2550 .mmap = ll_file_mmap,
2551 .llseek = ll_file_seek,
2552 .sendfile = ll_file_sendfile,
2554 #ifdef HAVE_F_OP_FLOCK
2555 .flock = ll_file_noflock,
2557 .lock = ll_file_noflock
2560 struct inode_operations ll_file_inode_operations = {
2561 #ifdef HAVE_VFS_INTENT_PATCHES
2562 .setattr_raw = ll_setattr_raw,
2564 .setattr = ll_setattr,
2565 .truncate = ll_truncate,
2566 .getattr = ll_getattr,
2567 .permission = ll_inode_permission,
2568 .setxattr = ll_setxattr,
2569 .getxattr = ll_getxattr,
2570 .listxattr = ll_listxattr,
2571 .removexattr = ll_removexattr,
2574 /* dynamic ioctl number support routins */
2575 static struct llioc_ctl_data {
2576 struct rw_semaphore ioc_sem;
2577 struct list_head ioc_head;
2579 __RWSEM_INITIALIZER(llioc.ioc_sem),
2580 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2585 struct list_head iocd_list;
2586 unsigned int iocd_size;
2587 llioc_callback_t iocd_cb;
2588 unsigned int iocd_count;
2589 unsigned int iocd_cmd[0];
2592 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2595 struct llioc_data *in_data = NULL;
2598 if (cb == NULL || cmd == NULL ||
2599 count > LLIOC_MAX_CMD || count < 0)
2602 size = sizeof(*in_data) + count * sizeof(unsigned int);
2603 OBD_ALLOC(in_data, size);
2604 if (in_data == NULL)
2607 memset(in_data, 0, sizeof(*in_data));
2608 in_data->iocd_size = size;
2609 in_data->iocd_cb = cb;
2610 in_data->iocd_count = count;
2611 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2613 down_write(&llioc.ioc_sem);
2614 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2615 up_write(&llioc.ioc_sem);
2620 void ll_iocontrol_unregister(void *magic)
2622 struct llioc_data *tmp;
2627 down_write(&llioc.ioc_sem);
2628 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2630 unsigned int size = tmp->iocd_size;
2632 list_del(&tmp->iocd_list);
2633 up_write(&llioc.ioc_sem);
2635 OBD_FREE(tmp, size);
2639 up_write(&llioc.ioc_sem);
2641 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2644 EXPORT_SYMBOL(ll_iocontrol_register);
2645 EXPORT_SYMBOL(ll_iocontrol_unregister);
2647 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2648 unsigned int cmd, unsigned long arg, int *rcp)
2650 enum llioc_iter ret = LLIOC_CONT;
2651 struct llioc_data *data;
2652 int rc = -EINVAL, i;
2654 down_read(&llioc.ioc_sem);
2655 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2656 for (i = 0; i < data->iocd_count; i++) {
2657 if (cmd != data->iocd_cmd[i])
2660 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2664 if (ret == LLIOC_STOP)
2667 up_read(&llioc.ioc_sem);