1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(exp_connect_som(ll_i2mdexp(inode))) || !S_ISREG(inode->i_mode))
96 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
98 ll_epoch_close(inode, op_data, &och, 0);
101 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
105 static int ll_close_inode_openhandle(struct obd_export *md_exp,
107 struct obd_client_handle *och)
109 struct obd_export *exp = ll_i2mdexp(inode);
110 struct md_op_data *op_data;
111 struct ptlrpc_request *req = NULL;
112 struct obd_device *obd = class_exp2obd(exp);
119 * XXX: in case of LMV, is this correct to access
122 CERROR("Invalid MDC connection handle "LPX64"\n",
123 ll_i2mdexp(inode)->exp_handle.h_cookie);
128 * here we check if this is forced umount. If so this is called on
129 * canceling "open lock" and we do not call md_close() in this case, as
130 * it will not be successful, as import is already deactivated.
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_sizeonmds_update(inode, &och->och_fh,
148 op_data->op_ioepoch);
150 CERROR("inode %lu mdc Size-on-MDS update failed: "
151 "rc = %d\n", inode->i_ino, rc);
155 CERROR("inode %lu mdc close failed: rc = %d\n",
158 ll_finish_md_op_data(op_data);
161 rc = ll_objects_destroy(req, inode);
163 CERROR("inode %lu ll_objects destroy: rc = %d\n",
170 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
171 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
172 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
174 md_clear_open_replay_data(md_exp, och);
175 /* Free @och if it is not waiting for DONE_WRITING. */
176 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
179 if (req) /* This is close request */
180 ptlrpc_req_finished(req);
184 int ll_md_real_close(struct inode *inode, int flags)
186 struct ll_inode_info *lli = ll_i2info(inode);
187 struct obd_client_handle **och_p;
188 struct obd_client_handle *och;
193 if (flags & FMODE_WRITE) {
194 och_p = &lli->lli_mds_write_och;
195 och_usecount = &lli->lli_open_fd_write_count;
196 } else if (flags & FMODE_EXEC) {
197 och_p = &lli->lli_mds_exec_och;
198 och_usecount = &lli->lli_open_fd_exec_count;
200 LASSERT(flags & FMODE_READ);
201 och_p = &lli->lli_mds_read_och;
202 och_usecount = &lli->lli_open_fd_read_count;
205 down(&lli->lli_och_sem);
206 if (*och_usecount) { /* There are still users of this handle, so
208 up(&lli->lli_och_sem);
213 up(&lli->lli_och_sem);
215 if (och) { /* There might be a race and somebody have freed this och
217 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
224 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
227 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
228 struct ll_inode_info *lli = ll_i2info(inode);
232 /* clear group lock, if present */
233 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
234 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
236 /* Let's see if we have good enough OPEN lock on the file and if
237 we can skip talking to MDS */
238 if (file->f_dentry->d_inode) { /* Can this ever be false? */
240 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
241 struct lustre_handle lockh;
242 struct inode *inode = file->f_dentry->d_inode;
243 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
245 down(&lli->lli_och_sem);
246 if (fd->fd_omode & FMODE_WRITE) {
248 LASSERT(lli->lli_open_fd_write_count);
249 lli->lli_open_fd_write_count--;
250 } else if (fd->fd_omode & FMODE_EXEC) {
252 LASSERT(lli->lli_open_fd_exec_count);
253 lli->lli_open_fd_exec_count--;
256 LASSERT(lli->lli_open_fd_read_count);
257 lli->lli_open_fd_read_count--;
259 up(&lli->lli_och_sem);
261 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
262 LDLM_IBITS, &policy, lockmode,
264 rc = ll_md_real_close(file->f_dentry->d_inode,
268 CERROR("Releasing a file %p with negative dentry %p. Name %s",
269 file, file->f_dentry, file->f_dentry->d_name.name);
272 LUSTRE_FPRIVATE(file) = NULL;
273 ll_file_data_put(fd);
274 ll_capa_close(inode);
279 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
281 /* While this returns an error code, fput() the caller does not, so we need
282 * to make every effort to clean up all of our state here. Also, applications
283 * rarely check close errors and even if an error is returned they will not
284 * re-try the close call.
286 int ll_file_release(struct inode *inode, struct file *file)
288 struct ll_file_data *fd;
289 struct ll_sb_info *sbi = ll_i2sbi(inode);
290 struct ll_inode_info *lli = ll_i2info(inode);
291 struct lov_stripe_md *lsm = lli->lli_smd;
295 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
296 inode->i_generation, inode);
298 #ifdef CONFIG_FS_POSIX_ACL
299 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
300 inode == inode->i_sb->s_root->d_inode) {
301 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
304 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
305 fd->fd_flags &= ~LL_FILE_RMTACL;
306 rct_del(&sbi->ll_rct, cfs_curproc_pid());
307 et_search_free(&sbi->ll_et, cfs_curproc_pid());
312 if (inode->i_sb->s_root != file->f_dentry)
313 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
314 fd = LUSTRE_FPRIVATE(file);
317 /* The last ref on @file, maybe not the the owner pid of statahead.
318 * Different processes can open the same dir, "ll_opendir_key" means:
319 * it is me that should stop the statahead thread. */
320 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
321 ll_stop_statahead(inode, lli->lli_opendir_key);
323 if (inode->i_sb->s_root == file->f_dentry) {
324 LUSTRE_FPRIVATE(file) = NULL;
325 ll_file_data_put(fd);
330 lov_test_and_clear_async_rc(lsm);
331 lli->lli_async_rc = 0;
333 rc = ll_md_close(sbi->ll_md_exp, inode, file);
337 static int ll_intent_file_open(struct file *file, void *lmm,
338 int lmmsize, struct lookup_intent *itp)
340 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
341 struct dentry *parent = file->f_dentry->d_parent;
342 const char *name = file->f_dentry->d_name.name;
343 const int len = file->f_dentry->d_name.len;
344 struct md_op_data *op_data;
345 struct ptlrpc_request *req;
352 /* Usually we come here only for NFSD, and we want open lock.
353 But we can also get here with pre 2.6.15 patchless kernels, and in
354 that case that lock is also ok */
355 /* We can also get here if there was cached open handle in revalidate_it
356 * but it disappeared while we were getting from there to ll_file_open.
357 * But this means this file was closed and immediatelly opened which
358 * makes a good candidate for using OPEN lock */
359 /* If lmmsize & lmm are not 0, we are just setting stripe info
360 * parameters. No need for the open lock */
361 if (!lmm && !lmmsize)
362 itp->it_flags |= MDS_OPEN_LOCK;
364 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
365 file->f_dentry->d_inode, name, len,
366 O_RDWR, LUSTRE_OPC_ANY, NULL);
368 RETURN(PTR_ERR(op_data));
370 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
371 0 /*unused */, &req, ll_md_blocking_ast, 0);
372 ll_finish_md_op_data(op_data);
374 /* reason for keep own exit path - don`t flood log
375 * with messages with -ESTALE errors.
377 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
378 it_open_error(DISP_OPEN_OPEN, itp))
380 ll_release_openhandle(file->f_dentry, itp);
384 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
385 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
386 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
390 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
391 if (!rc && itp->d.lustre.it_lock_mode)
392 md_set_lock_data(sbi->ll_md_exp,
393 &itp->d.lustre.it_lock_handle,
394 file->f_dentry->d_inode, NULL);
397 ptlrpc_req_finished(itp->d.lustre.it_data);
398 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
399 ll_intent_drop_lock(itp);
404 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
406 if (ioepoch && lli->lli_ioepoch != ioepoch) {
407 lli->lli_ioepoch = ioepoch;
408 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
409 ioepoch, PFID(&lli->lli_fid));
413 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
414 struct lookup_intent *it, struct obd_client_handle *och)
416 struct ptlrpc_request *req = it->d.lustre.it_data;
417 struct mdt_body *body;
421 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
422 LASSERT(body != NULL); /* reply already checked out */
424 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
425 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
426 och->och_fid = lli->lli_fid;
427 och->och_flags = it->it_flags;
428 ll_ioepoch_open(lli, body->ioepoch);
430 return md_set_open_replay_data(md_exp, och, req);
433 int ll_local_open(struct file *file, struct lookup_intent *it,
434 struct ll_file_data *fd, struct obd_client_handle *och)
436 struct inode *inode = file->f_dentry->d_inode;
437 struct ll_inode_info *lli = ll_i2info(inode);
440 LASSERT(!LUSTRE_FPRIVATE(file));
445 struct ptlrpc_request *req = it->d.lustre.it_data;
446 struct mdt_body *body;
449 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
453 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
454 if ((it->it_flags & FMODE_WRITE) &&
455 (body->valid & OBD_MD_FLSIZE))
456 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
457 lli->lli_ioepoch, PFID(&lli->lli_fid));
460 LUSTRE_FPRIVATE(file) = fd;
461 ll_readahead_init(inode, &fd->fd_ras);
462 fd->fd_omode = it->it_flags;
466 /* Open a file, and (for the very first open) create objects on the OSTs at
467 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
468 * creation or open until ll_lov_setstripe() ioctl is called. We grab
469 * lli_open_sem to ensure no other process will create objects, send the
470 * stripe MD to the MDS, or try to destroy the objects if that fails.
472 * If we already have the stripe MD locally then we don't request it in
473 * md_open(), by passing a lmm_size = 0.
475 * It is up to the application to ensure no other processes open this file
476 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
477 * used. We might be able to avoid races of that sort by getting lli_open_sem
478 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
479 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
481 int ll_file_open(struct inode *inode, struct file *file)
483 struct ll_inode_info *lli = ll_i2info(inode);
484 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
485 .it_flags = file->f_flags };
486 struct lov_stripe_md *lsm;
487 struct ptlrpc_request *req = NULL;
488 struct obd_client_handle **och_p;
490 struct ll_file_data *fd;
491 int rc = 0, opendir_set = 0;
494 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
495 inode->i_generation, inode, file->f_flags);
497 #ifdef HAVE_VFS_INTENT_PATCHES
500 it = file->private_data; /* XXX: compat macro */
501 file->private_data = NULL; /* prevent ll_local_open assertion */
504 fd = ll_file_data_get();
509 if (S_ISDIR(inode->i_mode)) {
510 spin_lock(&lli->lli_lock);
511 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
512 LASSERT(lli->lli_sai == NULL);
513 lli->lli_opendir_key = fd;
514 lli->lli_opendir_pid = cfs_curproc_pid();
517 spin_unlock(&lli->lli_lock);
520 if (inode->i_sb->s_root == file->f_dentry) {
521 LUSTRE_FPRIVATE(file) = fd;
525 if (!it || !it->d.lustre.it_disposition) {
526 /* Convert f_flags into access mode. We cannot use file->f_mode,
527 * because everything but O_ACCMODE mask was stripped from
529 if ((oit.it_flags + 1) & O_ACCMODE)
531 if (file->f_flags & O_TRUNC)
532 oit.it_flags |= FMODE_WRITE;
534 /* kernel only call f_op->open in dentry_open. filp_open calls
535 * dentry_open after call to open_namei that checks permissions.
536 * Only nfsd_open call dentry_open directly without checking
537 * permissions and because of that this code below is safe. */
538 if (oit.it_flags & FMODE_WRITE)
539 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
541 /* We do not want O_EXCL here, presumably we opened the file
542 * already? XXX - NFS implications? */
543 oit.it_flags &= ~O_EXCL;
545 /* bug20584, if "it_flags" contains O_CREAT, the file will be
546 * created if necessary, then "IT_CREAT" should be set to keep
547 * consistent with it */
548 if (oit.it_flags & O_CREAT)
549 oit.it_op |= IT_CREAT;
555 /* Let's see if we have file open on MDS already. */
556 if (it->it_flags & FMODE_WRITE) {
557 och_p = &lli->lli_mds_write_och;
558 och_usecount = &lli->lli_open_fd_write_count;
559 } else if (it->it_flags & FMODE_EXEC) {
560 och_p = &lli->lli_mds_exec_och;
561 och_usecount = &lli->lli_open_fd_exec_count;
563 och_p = &lli->lli_mds_read_och;
564 och_usecount = &lli->lli_open_fd_read_count;
567 down(&lli->lli_och_sem);
568 if (*och_p) { /* Open handle is present */
569 if (it_disposition(it, DISP_OPEN_OPEN)) {
570 /* Well, there's extra open request that we do not need,
571 let's close it somehow. This will decref request. */
572 rc = it_open_error(DISP_OPEN_OPEN, it);
574 up(&lli->lli_och_sem);
575 ll_file_data_put(fd);
576 GOTO(out_openerr, rc);
578 ll_release_openhandle(file->f_dentry, it);
579 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
584 rc = ll_local_open(file, it, fd, NULL);
587 up(&lli->lli_och_sem);
588 ll_file_data_put(fd);
589 GOTO(out_openerr, rc);
592 LASSERT(*och_usecount == 0);
593 if (!it->d.lustre.it_disposition) {
594 /* We cannot just request lock handle now, new ELC code
595 means that one of other OPEN locks for this file
596 could be cancelled, and since blocking ast handler
597 would attempt to grab och_sem as well, that would
598 result in a deadlock */
599 up(&lli->lli_och_sem);
600 it->it_create_mode |= M_CHECK_STALE;
601 rc = ll_intent_file_open(file, NULL, 0, it);
602 it->it_create_mode &= ~M_CHECK_STALE;
604 ll_file_data_put(fd);
605 GOTO(out_openerr, rc);
608 /* Got some error? Release the request */
609 if (it->d.lustre.it_status < 0) {
610 req = it->d.lustre.it_data;
611 ptlrpc_req_finished(req);
615 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
617 ll_file_data_put(fd);
618 GOTO(out_och_free, rc = -ENOMEM);
621 req = it->d.lustre.it_data;
623 /* md_intent_lock() didn't get a request ref if there was an
624 * open error, so don't do cleanup on the request here
626 /* XXX (green): Should not we bail out on any error here, not
627 * just open error? */
628 rc = it_open_error(DISP_OPEN_OPEN, it);
630 ll_file_data_put(fd);
631 GOTO(out_och_free, rc);
634 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
635 rc = ll_local_open(file, it, fd, *och_p);
637 ll_file_data_put(fd);
638 GOTO(out_och_free, rc);
641 up(&lli->lli_och_sem);
643 /* Must do this outside lli_och_sem lock to prevent deadlock where
644 different kind of OPEN lock for this same inode gets cancelled
645 by ldlm_cancel_lru */
646 if (!S_ISREG(inode->i_mode))
653 if (file->f_flags & O_LOV_DELAY_CREATE ||
654 !(file->f_mode & FMODE_WRITE)) {
655 CDEBUG(D_INODE, "object creation was delayed\n");
659 file->f_flags &= ~O_LOV_DELAY_CREATE;
662 ptlrpc_req_finished(req);
664 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
668 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
669 *och_p = NULL; /* OBD_FREE writes some magic there */
672 up(&lli->lli_och_sem);
674 if (opendir_set != 0)
675 ll_stop_statahead(inode, lli->lli_opendir_key);
681 /* Fills the obdo with the attributes for the lsm */
682 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
683 struct obd_capa *capa, struct obdo *obdo)
685 struct ptlrpc_request_set *set;
686 struct obd_info oinfo = { { { 0 } } };
691 LASSERT(lsm != NULL);
695 oinfo.oi_oa->o_id = lsm->lsm_object_id;
696 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
697 oinfo.oi_oa->o_mode = S_IFREG;
698 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
699 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
700 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
701 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
703 oinfo.oi_capa = capa;
705 set = ptlrpc_prep_set();
707 CERROR("can't allocate ptlrpc set\n");
710 rc = obd_getattr_async(exp, &oinfo, set);
712 rc = ptlrpc_set_wait(set);
713 ptlrpc_set_destroy(set);
716 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
717 OBD_MD_FLATIME | OBD_MD_FLMTIME |
718 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
722 /* Fills the obdo with the attributes for the inode defined by lsm */
723 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
725 struct ll_inode_info *lli = ll_i2info(inode);
726 struct obd_capa *capa = ll_mdscapa_get(inode);
730 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
733 obdo_refresh_inode(inode, obdo, obdo->o_valid);
735 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
736 lli->lli_smd->lsm_object_id, i_size_read(inode),
737 (unsigned long long)inode->i_blocks,
738 (unsigned long)ll_inode_blksize(inode));
743 int ll_merge_lvb(struct inode *inode)
745 struct ll_inode_info *lli = ll_i2info(inode);
746 struct ll_sb_info *sbi = ll_i2sbi(inode);
752 ll_inode_size_lock(inode, 1);
753 inode_init_lvb(inode, &lvb);
754 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
755 i_size_write(inode, lvb.lvb_size);
756 inode->i_blocks = lvb.lvb_blocks;
758 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
759 LTIME_S(inode->i_atime) = lvb.lvb_atime;
760 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
761 ll_inode_size_unlock(inode, 1);
766 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
769 struct obdo obdo = { 0 };
772 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
774 st->st_size = obdo.o_size;
775 st->st_blocks = obdo.o_blocks;
776 st->st_mtime = obdo.o_mtime;
777 st->st_atime = obdo.o_atime;
778 st->st_ctime = obdo.o_ctime;
783 void ll_io_init(struct cl_io *io, const struct file *file, int write)
785 struct inode *inode = file->f_dentry->d_inode;
787 memset(io, 0, sizeof *io);
788 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
790 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
791 io->ci_obj = ll_i2info(inode)->lli_clob;
792 io->ci_lockreq = CILR_MAYBE;
793 if (ll_file_nolock(file)) {
794 io->ci_lockreq = CILR_NEVER;
795 io->ci_no_srvlock = 1;
796 } else if (file->f_flags & O_APPEND) {
797 io->ci_lockreq = CILR_MANDATORY;
801 static ssize_t ll_file_io_generic(const struct lu_env *env,
802 struct vvp_io_args *args, struct file *file,
803 enum cl_io_type iot, loff_t *ppos, size_t count)
809 io = &ccc_env_info(env)->cti_io;
810 ll_io_init(io, file, iot == CIT_WRITE);
812 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
813 struct vvp_io *vio = vvp_env_io(env);
814 struct ccc_io *cio = ccc_env_io(env);
816 vio->cui_io_subtype = args->via_io_subtype;
818 switch (vio->cui_io_subtype) {
820 cio->cui_iov = args->u.normal.via_iov;
821 cio->cui_nrsegs = args->u.normal.via_nrsegs;
822 #ifndef HAVE_FILE_WRITEV
823 cio->cui_iocb = args->u.normal.via_iocb;
827 vio->u.sendfile.cui_actor = args->u.sendfile.via_actor;
828 vio->u.sendfile.cui_target = args->u.sendfile.via_target;
831 vio->u.splice.cui_pipe = args->u.splice.via_pipe;
832 vio->u.splice.cui_flags = args->u.splice.via_flags;
835 CERROR("Unknow IO type - %u\n", vio->cui_io_subtype);
838 cio->cui_fd = LUSTRE_FPRIVATE(file);
839 result = cl_io_loop(env, io);
841 /* cl_io_rw_init() handled IO */
842 result = io->ci_result;
845 if (io->ci_nob > 0) {
847 *ppos = io->u.ci_wr.wr.crw_pos;
855 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
857 static int ll_file_get_iov_count(const struct iovec *iov,
858 unsigned long *nr_segs, size_t *count)
863 for (seg = 0; seg < *nr_segs; seg++) {
864 const struct iovec *iv = &iov[seg];
867 * If any segment has a negative length, or the cumulative
868 * length ever wraps negative then return -EINVAL.
871 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
873 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
878 cnt -= iv->iov_len; /* This segment is no good */
885 #ifdef HAVE_FILE_READV
886 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
887 unsigned long nr_segs, loff_t *ppos)
890 struct vvp_io_args *args;
896 result = ll_file_get_iov_count(iov, &nr_segs, &count);
900 env = cl_env_get(&refcheck);
902 RETURN(PTR_ERR(env));
904 args = vvp_env_args(env, IO_NORMAL);
905 args->u.normal.via_iov = (struct iovec *)iov;
906 args->u.normal.via_nrsegs = nr_segs;
908 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
909 cl_env_put(env, &refcheck);
913 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
917 struct iovec *local_iov;
922 env = cl_env_get(&refcheck);
924 RETURN(PTR_ERR(env));
926 local_iov = &vvp_env_info(env)->vti_local_iov;
927 local_iov->iov_base = (void __user *)buf;
928 local_iov->iov_len = count;
929 result = ll_file_readv(file, local_iov, 1, ppos);
930 cl_env_put(env, &refcheck);
935 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
936 unsigned long nr_segs, loff_t pos)
939 struct vvp_io_args *args;
945 result = ll_file_get_iov_count(iov, &nr_segs, &count);
949 env = cl_env_get(&refcheck);
951 RETURN(PTR_ERR(env));
953 args = vvp_env_args(env, IO_NORMAL);
954 args->u.normal.via_iov = (struct iovec *)iov;
955 args->u.normal.via_nrsegs = nr_segs;
956 args->u.normal.via_iocb = iocb;
958 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
959 &iocb->ki_pos, count);
960 cl_env_put(env, &refcheck);
964 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
968 struct iovec *local_iov;
974 env = cl_env_get(&refcheck);
976 RETURN(PTR_ERR(env));
978 local_iov = &vvp_env_info(env)->vti_local_iov;
979 kiocb = &vvp_env_info(env)->vti_kiocb;
980 local_iov->iov_base = (void __user *)buf;
981 local_iov->iov_len = count;
982 init_sync_kiocb(kiocb, file);
983 kiocb->ki_pos = *ppos;
984 kiocb->ki_left = count;
986 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
987 *ppos = kiocb->ki_pos;
989 cl_env_put(env, &refcheck);
995 * Write to a file (through the page cache).
997 #ifdef HAVE_FILE_WRITEV
998 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
999 unsigned long nr_segs, loff_t *ppos)
1002 struct vvp_io_args *args;
1008 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1012 env = cl_env_get(&refcheck);
1014 RETURN(PTR_ERR(env));
1016 args = vvp_env_args(env, IO_NORMAL);
1017 args->u.normal.via_iov = (struct iovec *)iov;
1018 args->u.normal.via_nrsegs = nr_segs;
1020 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1021 cl_env_put(env, &refcheck);
1025 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1029 struct iovec *local_iov;
1034 env = cl_env_get(&refcheck);
1036 RETURN(PTR_ERR(env));
1038 local_iov = &vvp_env_info(env)->vti_local_iov;
1039 local_iov->iov_base = (void __user *)buf;
1040 local_iov->iov_len = count;
1042 result = ll_file_writev(file, local_iov, 1, ppos);
1043 cl_env_put(env, &refcheck);
1047 #else /* AIO stuff */
1048 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1049 unsigned long nr_segs, loff_t pos)
1052 struct vvp_io_args *args;
1058 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1062 env = cl_env_get(&refcheck);
1064 RETURN(PTR_ERR(env));
1066 args = vvp_env_args(env, IO_NORMAL);
1067 args->u.normal.via_iov = (struct iovec *)iov;
1068 args->u.normal.via_nrsegs = nr_segs;
1069 args->u.normal.via_iocb = iocb;
1071 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1072 &iocb->ki_pos, count);
1073 cl_env_put(env, &refcheck);
1077 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1081 struct iovec *local_iov;
1082 struct kiocb *kiocb;
1087 env = cl_env_get(&refcheck);
1089 RETURN(PTR_ERR(env));
1091 local_iov = &vvp_env_info(env)->vti_local_iov;
1092 kiocb = &vvp_env_info(env)->vti_kiocb;
1093 local_iov->iov_base = (void __user *)buf;
1094 local_iov->iov_len = count;
1095 init_sync_kiocb(kiocb, file);
1096 kiocb->ki_pos = *ppos;
1097 kiocb->ki_left = count;
1099 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1100 *ppos = kiocb->ki_pos;
1102 cl_env_put(env, &refcheck);
1108 #ifdef HAVE_KERNEL_SENDFILE
1110 * Send file content (through pagecache) somewhere with helper
1112 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1113 read_actor_t actor, void *target)
1116 struct vvp_io_args *args;
1121 env = cl_env_get(&refcheck);
1123 RETURN(PTR_ERR(env));
1125 args = vvp_env_args(env, IO_SENDFILE);
1126 args->u.sendfile.via_target = target;
1127 args->u.sendfile.via_actor = actor;
1129 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1130 cl_env_put(env, &refcheck);
1135 #ifdef HAVE_KERNEL_SPLICE_READ
1137 * Send file content (through pagecache) somewhere with helper
1139 static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
1140 struct pipe_inode_info *pipe, size_t count,
1144 struct vvp_io_args *args;
1149 env = cl_env_get(&refcheck);
1151 RETURN(PTR_ERR(env));
1153 args = vvp_env_args(env, IO_SPLICE);
1154 args->u.splice.via_pipe = pipe;
1155 args->u.splice.via_flags = flags;
1157 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1158 cl_env_put(env, &refcheck);
1163 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1166 struct obd_export *exp = ll_i2dtexp(inode);
1167 struct ll_recreate_obj ucreatp;
1168 struct obd_trans_info oti = { 0 };
1169 struct obdo *oa = NULL;
1172 struct lov_stripe_md *lsm, *lsm2;
1175 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1178 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1179 sizeof(struct ll_recreate_obj)))
1186 ll_inode_size_lock(inode, 0);
1187 lsm = ll_i2info(inode)->lli_smd;
1189 GOTO(out, rc = -ENOENT);
1190 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1191 (lsm->lsm_stripe_count));
1193 OBD_ALLOC(lsm2, lsm_size);
1195 GOTO(out, rc = -ENOMEM);
1197 oa->o_id = ucreatp.lrc_id;
1198 oa->o_gr = ucreatp.lrc_group;
1199 oa->o_nlink = ucreatp.lrc_ost_idx;
1200 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1201 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1202 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1203 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1205 memcpy(lsm2, lsm, lsm_size);
1206 rc = obd_create(exp, oa, &lsm2, &oti);
1208 OBD_FREE(lsm2, lsm_size);
1211 ll_inode_size_unlock(inode, 0);
1216 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1217 int flags, struct lov_user_md *lum, int lum_size)
1219 struct lov_stripe_md *lsm;
1220 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1224 ll_inode_size_lock(inode, 0);
1225 lsm = ll_i2info(inode)->lli_smd;
1227 ll_inode_size_unlock(inode, 0);
1228 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1233 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1236 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1237 GOTO(out_req_free, rc = -ENOENT);
1238 rc = oit.d.lustre.it_status;
1240 GOTO(out_req_free, rc);
1242 ll_release_openhandle(file->f_dentry, &oit);
1245 ll_inode_size_unlock(inode, 0);
1246 ll_intent_release(&oit);
1249 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1253 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1254 struct lov_mds_md **lmmp, int *lmm_size,
1255 struct ptlrpc_request **request)
1257 struct ll_sb_info *sbi = ll_i2sbi(inode);
1258 struct mdt_body *body;
1259 struct lov_mds_md *lmm = NULL;
1260 struct ptlrpc_request *req = NULL;
1261 struct obd_capa *oc;
1264 rc = ll_get_max_mdsize(sbi, &lmmsize);
1268 oc = ll_mdscapa_get(inode);
1269 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1270 oc, filename, strlen(filename) + 1,
1271 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1272 ll_i2suppgid(inode), &req);
1275 CDEBUG(D_INFO, "md_getattr_name failed "
1276 "on %s: rc %d\n", filename, rc);
1280 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1281 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1283 lmmsize = body->eadatasize;
1285 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1287 GOTO(out, rc = -ENODATA);
1290 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1291 LASSERT(lmm != NULL);
1293 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1294 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
1295 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
1296 GOTO(out, rc = -EPROTO);
1300 * This is coming from the MDS, so is probably in
1301 * little endian. We convert it to host endian before
1302 * passing it to userspace.
1304 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1305 /* if function called for directory - we should
1306 * avoid swab not existent lsm objects */
1307 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1308 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1309 if (S_ISREG(body->mode))
1310 lustre_swab_lov_user_md_objects(
1311 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1312 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1313 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1314 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1315 if (S_ISREG(body->mode))
1316 lustre_swab_lov_user_md_objects(
1317 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1318 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1319 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
1320 lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
1324 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
1325 struct lov_stripe_md *lsm;
1326 struct lov_user_md_join *lmj;
1327 int lmj_size, i, aindex = 0;
1329 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1331 GOTO(out, rc = -ENOMEM);
1332 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm);
1334 GOTO(out_free_memmd, rc);
1336 lmj_size = sizeof(struct lov_user_md_join) +
1337 lsm->lsm_stripe_count *
1338 sizeof(struct lov_user_ost_data_join);
1339 OBD_ALLOC(lmj, lmj_size);
1341 GOTO(out_free_memmd, rc = -ENOMEM);
1343 memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
1344 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1345 struct lov_extent *lex =
1346 &lsm->lsm_array->lai_ext_array[aindex];
1348 if (lex->le_loi_idx + lex->le_stripe_count <= i)
1350 CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
1351 LPU64" len %d\n", aindex, i,
1352 lex->le_start, (int)lex->le_len);
1353 lmj->lmm_objects[i].l_extent_start =
1356 if ((int)lex->le_len == -1)
1357 lmj->lmm_objects[i].l_extent_end = -1;
1359 lmj->lmm_objects[i].l_extent_end =
1360 lex->le_start + lex->le_len;
1361 lmj->lmm_objects[i].l_object_id =
1362 lsm->lsm_oinfo[i]->loi_id;
1363 lmj->lmm_objects[i].l_object_gr =
1364 lsm->lsm_oinfo[i]->loi_gr;
1365 lmj->lmm_objects[i].l_ost_gen =
1366 lsm->lsm_oinfo[i]->loi_ost_gen;
1367 lmj->lmm_objects[i].l_ost_idx =
1368 lsm->lsm_oinfo[i]->loi_ost_idx;
1370 lmm = (struct lov_mds_md *)lmj;
1373 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1377 *lmm_size = lmmsize;
1382 static int ll_lov_setea(struct inode *inode, struct file *file,
1385 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1386 struct lov_user_md *lump;
1387 int lum_size = sizeof(struct lov_user_md) +
1388 sizeof(struct lov_user_ost_data);
1392 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1395 OBD_ALLOC(lump, lum_size);
1399 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1400 OBD_FREE(lump, lum_size);
1404 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1406 OBD_FREE(lump, lum_size);
1410 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1413 struct lov_user_md_v3 lumv3;
1414 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1415 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1416 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1419 int flags = FMODE_WRITE;
1422 /* first try with v1 which is smaller than v3 */
1423 lum_size = sizeof(struct lov_user_md_v1);
1424 if (copy_from_user(lumv1, lumv1p, lum_size))
1427 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1428 lum_size = sizeof(struct lov_user_md_v3);
1429 if (copy_from_user(&lumv3, lumv3p, lum_size))
1433 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1435 put_user(0, &lumv1p->lmm_stripe_count);
1436 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1437 0, ll_i2info(inode)->lli_smd,
1443 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1445 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1450 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1454 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1456 struct ll_inode_info *lli = ll_i2info(inode);
1457 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1458 struct ccc_grouplock grouplock;
1462 if (ll_file_nolock(file))
1463 RETURN(-EOPNOTSUPP);
1465 spin_lock(&lli->lli_lock);
1466 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1467 CERROR("group lock already existed with gid %lu\n",
1468 fd->fd_grouplock.cg_gid);
1469 spin_unlock(&lli->lli_lock);
1472 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1473 spin_unlock(&lli->lli_lock);
1475 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1476 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1480 spin_lock(&lli->lli_lock);
1481 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1482 spin_unlock(&lli->lli_lock);
1483 CERROR("another thread just won the race\n");
1484 cl_put_grouplock(&grouplock);
1488 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1489 fd->fd_grouplock = grouplock;
1490 spin_unlock(&lli->lli_lock);
1492 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1496 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1498 struct ll_inode_info *lli = ll_i2info(inode);
1499 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1500 struct ccc_grouplock grouplock;
1503 spin_lock(&lli->lli_lock);
1504 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1505 spin_unlock(&lli->lli_lock);
1506 CERROR("no group lock held\n");
1509 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1511 if (fd->fd_grouplock.cg_gid != arg) {
1512 CERROR("group lock %lu doesn't match current id %lu\n",
1513 arg, fd->fd_grouplock.cg_gid);
1514 spin_unlock(&lli->lli_lock);
1518 grouplock = fd->fd_grouplock;
1519 fd->fd_grouplock.cg_env = NULL;
1520 fd->fd_grouplock.cg_lock = NULL;
1521 fd->fd_grouplock.cg_gid = 0;
1522 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1523 spin_unlock(&lli->lli_lock);
1525 cl_put_grouplock(&grouplock);
1526 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1530 #if LUSTRE_FIX >= 50
1531 static int join_sanity_check(struct inode *head, struct inode *tail)
1534 if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
1535 CERROR("server do not support join \n");
1538 if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
1539 CERROR("tail ino %lu and ino head %lu must be regular\n",
1540 head->i_ino, tail->i_ino);
1543 if (head->i_ino == tail->i_ino) {
1544 CERROR("file %lu can not be joined to itself \n", head->i_ino);
1547 if (i_size_read(head) % JOIN_FILE_ALIGN) {
1548 CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
1554 static int join_file(struct inode *head_inode, struct file *head_filp,
1555 struct file *tail_filp)
1557 struct dentry *tail_dentry = tail_filp->f_dentry;
1558 struct lookup_intent oit = {.it_op = IT_OPEN,
1559 .it_flags = head_filp->f_flags,
1560 .it_create_mode = M_JOIN_FILE};
1561 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW,
1562 ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1564 struct lustre_handle lockh;
1565 struct md_op_data *op_data;
1570 tail_dentry = tail_filp->f_dentry;
1572 data = i_size_read(head_inode);
1573 op_data = ll_prep_md_op_data(NULL, head_inode,
1574 tail_dentry->d_parent->d_inode,
1575 tail_dentry->d_name.name,
1576 tail_dentry->d_name.len, 0,
1577 LUSTRE_OPC_ANY, &data);
1578 if (IS_ERR(op_data))
1579 RETURN(PTR_ERR(op_data));
1581 rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit,
1582 op_data, &lockh, NULL, 0, NULL, 0);
1584 ll_finish_md_op_data(op_data);
1588 rc = oit.d.lustre.it_status;
1590 if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
1591 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
1592 ptlrpc_req_finished((struct ptlrpc_request *)
1593 oit.d.lustre.it_data);
1597 if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
1599 ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
1600 oit.d.lustre.it_lock_mode = 0;
1602 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1603 it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
1604 ll_release_openhandle(head_filp->f_dentry, &oit);
1606 ll_intent_release(&oit);
1610 static int ll_file_join(struct inode *head, struct file *filp,
1611 char *filename_tail)
1613 struct inode *tail = NULL, *first = NULL, *second = NULL;
1614 struct dentry *tail_dentry;
1615 struct file *tail_filp, *first_filp, *second_filp;
1616 struct ll_lock_tree first_tree, second_tree;
1617 struct ll_lock_tree_node *first_node, *second_node;
1618 struct ll_inode_info *hlli = ll_i2info(head);
1619 int rc = 0, cleanup_phase = 0;
1622 CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
1623 head->i_ino, head->i_generation, head, filename_tail);
1625 tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
1626 if (IS_ERR(tail_filp)) {
1627 CERROR("Can not open tail file %s", filename_tail);
1628 rc = PTR_ERR(tail_filp);
1631 tail = igrab(tail_filp->f_dentry->d_inode);
1633 tail_dentry = tail_filp->f_dentry;
1634 LASSERT(tail_dentry);
1637 /*reorder the inode for lock sequence*/
1638 first = head->i_ino > tail->i_ino ? head : tail;
1639 second = head->i_ino > tail->i_ino ? tail : head;
1640 first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
1641 second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
1643 CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
1644 head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
1645 first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
1646 if (IS_ERR(first_node)){
1647 rc = PTR_ERR(first_node);
1650 first_tree.lt_fd = first_filp->private_data;
1651 rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
1656 second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
1657 if (IS_ERR(second_node)){
1658 rc = PTR_ERR(second_node);
1661 second_tree.lt_fd = second_filp->private_data;
1662 rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
1667 rc = join_sanity_check(head, tail);
1671 rc = join_file(head, filp, tail_filp);
1675 switch (cleanup_phase) {
1677 ll_tree_unlock(&second_tree);
1678 obd_cancel_unused(ll_i2dtexp(second),
1679 ll_i2info(second)->lli_smd, 0, NULL);
1681 ll_tree_unlock(&first_tree);
1682 obd_cancel_unused(ll_i2dtexp(first),
1683 ll_i2info(first)->lli_smd, 0, NULL);
1685 filp_close(tail_filp, 0);
1688 if (head && rc == 0) {
1689 obd_free_memmd(ll_i2sbi(head)->ll_dt_exp,
1691 hlli->lli_smd = NULL;
1696 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
1701 #endif /* LUSTRE_FIX >= 50 */
1704 * Close inode open handle
1706 * \param dentry [in] dentry which contains the inode
1707 * \param it [in,out] intent which contains open info and result
1710 * \retval <0 failure
1712 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1714 struct inode *inode = dentry->d_inode;
1715 struct obd_client_handle *och;
1721 /* Root ? Do nothing. */
1722 if (dentry->d_inode->i_sb->s_root == dentry)
1725 /* No open handle to close? Move away */
1726 if (!it_disposition(it, DISP_OPEN_OPEN))
1729 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1731 OBD_ALLOC(och, sizeof(*och));
1733 GOTO(out, rc = -ENOMEM);
1735 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1736 ll_i2info(inode), it, och);
1738 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1741 /* this one is in place of ll_file_open */
1742 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1743 ptlrpc_req_finished(it->d.lustre.it_data);
1744 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1749 * Get size for inode for which FIEMAP mapping is requested.
1750 * Make the FIEMAP get_info call and returns the result.
1752 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1755 struct obd_export *exp = ll_i2dtexp(inode);
1756 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1757 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1758 int vallen = num_bytes;
1762 /* If the stripe_count > 1 and the application does not understand
1763 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1765 if (lsm->lsm_stripe_count > 1 &&
1766 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1769 fm_key.oa.o_id = lsm->lsm_object_id;
1770 fm_key.oa.o_gr = lsm->lsm_object_gr;
1771 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1773 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1776 /* If filesize is 0, then there would be no objects for mapping */
1777 if (fm_key.oa.o_size == 0) {
1778 fiemap->fm_mapped_extents = 0;
1782 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1784 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1786 CERROR("obd_get_info failed: rc = %d\n", rc);
1791 int ll_fid2path(struct obd_export *exp, void *arg)
1793 struct getinfo_fid2path *gfout, *gfin;
1797 /* Need to get the buflen */
1798 OBD_ALLOC_PTR(gfin);
1801 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1806 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1807 OBD_ALLOC(gfout, outsize);
1808 if (gfout == NULL) {
1812 memcpy(gfout, gfin, sizeof(*gfout));
1815 /* Call mdc_iocontrol */
1816 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1819 if (copy_to_user(arg, gfout, outsize))
1823 OBD_FREE(gfout, outsize);
1827 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1830 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1834 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1835 inode->i_generation, inode, cmd);
1836 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1838 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1839 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1843 case LL_IOC_GETFLAGS:
1844 /* Get the current value of the file flags */
1845 return put_user(fd->fd_flags, (int *)arg);
1846 case LL_IOC_SETFLAGS:
1847 case LL_IOC_CLRFLAGS:
1848 /* Set or clear specific file flags */
1849 /* XXX This probably needs checks to ensure the flags are
1850 * not abused, and to handle any flag side effects.
1852 if (get_user(flags, (int *) arg))
1855 if (cmd == LL_IOC_SETFLAGS) {
1856 if ((flags & LL_FILE_IGNORE_LOCK) &&
1857 !(file->f_flags & O_DIRECT)) {
1858 CERROR("%s: unable to disable locking on "
1859 "non-O_DIRECT file\n", current->comm);
1863 fd->fd_flags |= flags;
1865 fd->fd_flags &= ~flags;
1868 case LL_IOC_LOV_SETSTRIPE:
1869 RETURN(ll_lov_setstripe(inode, file, arg));
1870 case LL_IOC_LOV_SETEA:
1871 RETURN(ll_lov_setea(inode, file, arg));
1872 case LL_IOC_LOV_GETSTRIPE:
1873 RETURN(ll_lov_getstripe(inode, arg));
1874 case LL_IOC_RECREATE_OBJ:
1875 RETURN(ll_lov_recreate_obj(inode, file, arg));
1876 case FSFILT_IOC_FIEMAP: {
1877 struct ll_user_fiemap *fiemap_s;
1878 size_t num_bytes, ret_bytes;
1879 unsigned int extent_count;
1882 /* Get the extent count so we can calculate the size of
1883 * required fiemap buffer */
1884 if (get_user(extent_count,
1885 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1887 num_bytes = sizeof(*fiemap_s) + (extent_count *
1888 sizeof(struct ll_fiemap_extent));
1889 OBD_VMALLOC(fiemap_s, num_bytes);
1890 if (fiemap_s == NULL)
1893 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1895 GOTO(error, rc = -EFAULT);
1897 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1898 fiemap_s->fm_flags = fiemap_s->fm_flags &
1899 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1900 if (copy_to_user((char *)arg, fiemap_s,
1902 GOTO(error, rc = -EFAULT);
1904 GOTO(error, rc = -EBADR);
1907 /* If fm_extent_count is non-zero, read the first extent since
1908 * it is used to calculate end_offset and device from previous
1911 if (copy_from_user(&fiemap_s->fm_extents[0],
1912 (char __user *)arg + sizeof(*fiemap_s),
1913 sizeof(struct ll_fiemap_extent)))
1914 GOTO(error, rc = -EFAULT);
1917 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1920 rc = filemap_fdatawrite(inode->i_mapping);
1925 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1929 ret_bytes = sizeof(struct ll_user_fiemap);
1931 if (extent_count != 0)
1932 ret_bytes += (fiemap_s->fm_mapped_extents *
1933 sizeof(struct ll_fiemap_extent));
1935 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1939 OBD_VFREE(fiemap_s, num_bytes);
1942 case FSFILT_IOC_GETFLAGS:
1943 case FSFILT_IOC_SETFLAGS:
1944 RETURN(ll_iocontrol(inode, file, cmd, arg));
1945 case FSFILT_IOC_GETVERSION_OLD:
1946 case FSFILT_IOC_GETVERSION:
1947 RETURN(put_user(inode->i_generation, (int *)arg));
1949 #if LUSTRE_FIX >= 50
1950 /* Allow file join in beta builds to allow debuggging */
1954 ftail = getname((const char *)arg);
1956 RETURN(PTR_ERR(ftail));
1957 rc = ll_file_join(inode, file, ftail);
1961 CWARN("file join is not supported in this version of Lustre\n");
1965 case LL_IOC_GROUP_LOCK:
1966 RETURN(ll_get_grouplock(inode, file, arg));
1967 case LL_IOC_GROUP_UNLOCK:
1968 RETURN(ll_put_grouplock(inode, file, arg));
1969 case IOC_OBD_STATFS:
1970 RETURN(ll_obd_statfs(inode, (void *)arg));
1972 /* We need to special case any other ioctls we want to handle,
1973 * to send them to the MDS/OST as appropriate and to properly
1974 * network encode the arg field.
1975 case FSFILT_IOC_SETVERSION_OLD:
1976 case FSFILT_IOC_SETVERSION:
1978 case LL_IOC_FLUSHCTX:
1979 RETURN(ll_flush_ctx(inode));
1980 case LL_IOC_PATH2FID: {
1981 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1982 sizeof(struct lu_fid)))
1987 case OBD_IOC_FID2PATH:
1988 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1994 ll_iocontrol_call(inode, file, cmd, arg, &err))
1997 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
2003 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
2005 struct inode *inode = file->f_dentry->d_inode;
2008 retval = offset + ((origin == 2) ? i_size_read(inode) :
2009 (origin == 1) ? file->f_pos : 0);
2010 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
2011 inode->i_ino, inode->i_generation, inode, retval, retval,
2012 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
2013 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
2015 if (origin == 2) { /* SEEK_END */
2016 int nonblock = 0, rc;
2018 if (file->f_flags & O_NONBLOCK)
2019 nonblock = LDLM_FL_BLOCK_NOWAIT;
2021 rc = cl_glimpse_size(inode);
2025 ll_inode_size_lock(inode, 0);
2026 offset += i_size_read(inode);
2027 ll_inode_size_unlock(inode, 0);
2028 } else if (origin == 1) { /* SEEK_CUR */
2029 offset += file->f_pos;
2033 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
2034 if (offset != file->f_pos) {
2035 file->f_pos = offset;
2043 int ll_fsync(struct file *file, struct dentry *dentry, int data)
2045 struct inode *inode = dentry->d_inode;
2046 struct ll_inode_info *lli = ll_i2info(inode);
2047 struct lov_stripe_md *lsm = lli->lli_smd;
2048 struct ptlrpc_request *req;
2049 struct obd_capa *oc;
2052 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2053 inode->i_generation, inode);
2054 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2056 /* fsync's caller has already called _fdata{sync,write}, we want
2057 * that IO to finish before calling the osc and mdc sync methods */
2058 rc = filemap_fdatawait(inode->i_mapping);
2060 /* catch async errors that were recorded back when async writeback
2061 * failed for pages in this mapping. */
2062 err = lli->lli_async_rc;
2063 lli->lli_async_rc = 0;
2067 err = lov_test_and_clear_async_rc(lsm);
2072 oc = ll_mdscapa_get(inode);
2073 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2079 ptlrpc_req_finished(req);
2086 RETURN(rc ? rc : -ENOMEM);
2088 oa->o_id = lsm->lsm_object_id;
2089 oa->o_gr = lsm->lsm_object_gr;
2090 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2091 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
2092 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
2095 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2096 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
2097 0, OBD_OBJECT_EOF, oc);
2107 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2109 struct inode *inode = file->f_dentry->d_inode;
2110 struct ll_sb_info *sbi = ll_i2sbi(inode);
2111 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2112 .ei_cb_cp =ldlm_flock_completion_ast,
2113 .ei_cbdata = file_lock };
2114 struct md_op_data *op_data;
2115 struct lustre_handle lockh = {0};
2116 ldlm_policy_data_t flock;
2121 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2122 inode->i_ino, file_lock);
2124 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2126 if (file_lock->fl_flags & FL_FLOCK) {
2127 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2128 /* set missing params for flock() calls */
2129 file_lock->fl_end = OFFSET_MAX;
2130 file_lock->fl_pid = current->tgid;
2132 flock.l_flock.pid = file_lock->fl_pid;
2133 flock.l_flock.start = file_lock->fl_start;
2134 flock.l_flock.end = file_lock->fl_end;
2136 switch (file_lock->fl_type) {
2138 einfo.ei_mode = LCK_PR;
2141 /* An unlock request may or may not have any relation to
2142 * existing locks so we may not be able to pass a lock handle
2143 * via a normal ldlm_lock_cancel() request. The request may even
2144 * unlock a byte range in the middle of an existing lock. In
2145 * order to process an unlock request we need all of the same
2146 * information that is given with a normal read or write record
2147 * lock request. To avoid creating another ldlm unlock (cancel)
2148 * message we'll treat a LCK_NL flock request as an unlock. */
2149 einfo.ei_mode = LCK_NL;
2152 einfo.ei_mode = LCK_PW;
2155 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2170 flags = LDLM_FL_BLOCK_NOWAIT;
2176 flags = LDLM_FL_TEST_LOCK;
2177 /* Save the old mode so that if the mode in the lock changes we
2178 * can decrement the appropriate reader or writer refcount. */
2179 file_lock->fl_type = einfo.ei_mode;
2182 CERROR("unknown fcntl lock command: %d\n", cmd);
2186 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2187 LUSTRE_OPC_ANY, NULL);
2188 if (IS_ERR(op_data))
2189 RETURN(PTR_ERR(op_data));
2191 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2192 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2193 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2195 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2196 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2198 ll_finish_md_op_data(op_data);
2200 if ((file_lock->fl_flags & FL_FLOCK) &&
2201 (rc == 0 || file_lock->fl_type == F_UNLCK))
2202 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2203 #ifdef HAVE_F_OP_FLOCK
2204 if ((file_lock->fl_flags & FL_POSIX) &&
2205 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2206 !(flags & LDLM_FL_TEST_LOCK))
2207 posix_lock_file_wait(file, file_lock);
2213 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2220 int ll_have_md_lock(struct inode *inode, __u64 bits)
2222 struct lustre_handle lockh;
2223 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2231 fid = &ll_i2info(inode)->lli_fid;
2232 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2234 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2235 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2236 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2242 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2243 struct lustre_handle *lockh)
2245 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2251 fid = &ll_i2info(inode)->lli_fid;
2252 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2254 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2255 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2256 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2260 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2261 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2262 * and return success */
2264 /* This path cannot be hit for regular files unless in
2265 * case of obscure races, so no need to to validate
2267 if (!S_ISREG(inode->i_mode) &&
2268 !S_ISDIR(inode->i_mode))
2273 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2281 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2284 struct inode *inode = dentry->d_inode;
2285 struct ptlrpc_request *req = NULL;
2286 struct ll_sb_info *sbi;
2287 struct obd_export *exp;
2292 CERROR("REPORT THIS LINE TO PETER\n");
2295 sbi = ll_i2sbi(inode);
2297 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2298 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2300 exp = ll_i2mdexp(inode);
2302 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2303 struct lookup_intent oit = { .it_op = IT_GETATTR };
2304 struct md_op_data *op_data;
2306 /* Call getattr by fid, so do not provide name at all. */
2307 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2308 dentry->d_inode, NULL, 0, 0,
2309 LUSTRE_OPC_ANY, NULL);
2310 if (IS_ERR(op_data))
2311 RETURN(PTR_ERR(op_data));
2313 oit.it_create_mode |= M_CHECK_STALE;
2314 rc = md_intent_lock(exp, op_data, NULL, 0,
2315 /* we are not interested in name
2318 ll_md_blocking_ast, 0);
2319 ll_finish_md_op_data(op_data);
2320 oit.it_create_mode &= ~M_CHECK_STALE;
2322 rc = ll_inode_revalidate_fini(inode, rc);
2326 rc = ll_revalidate_it_finish(req, &oit, dentry);
2328 ll_intent_release(&oit);
2332 /* Unlinked? Unhash dentry, so it is not picked up later by
2333 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2334 here to preserve get_cwd functionality on 2.6.
2336 if (!dentry->d_inode->i_nlink) {
2337 spin_lock(&ll_lookup_lock);
2338 spin_lock(&dcache_lock);
2339 ll_drop_dentry(dentry);
2340 spin_unlock(&dcache_lock);
2341 spin_unlock(&ll_lookup_lock);
2344 ll_lookup_finish_locks(&oit, dentry);
2345 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2347 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2348 obd_valid valid = OBD_MD_FLGETATTR;
2349 struct obd_capa *oc;
2352 if (S_ISREG(inode->i_mode)) {
2353 rc = ll_get_max_mdsize(sbi, &ealen);
2356 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2358 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2359 * capa for this inode. Because we only keep capas of dirs
2361 oc = ll_mdscapa_get(inode);
2362 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2366 rc = ll_inode_revalidate_fini(inode, rc);
2370 rc = ll_prep_inode(&inode, req, NULL);
2373 ptlrpc_req_finished(req);
2377 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2382 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2383 MDS_INODELOCK_LOOKUP);
2385 /* if object not yet allocated, don't validate size */
2386 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2389 /* cl_glimpse_size will prefer locally cached writes if they extend
2393 rc = cl_glimpse_size(dentry->d_inode);
2398 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2399 struct lookup_intent *it, struct kstat *stat)
2401 struct inode *inode = de->d_inode;
2404 res = ll_inode_revalidate_it(de, it);
2405 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2410 stat->dev = inode->i_sb->s_dev;
2411 stat->ino = inode->i_ino;
2412 stat->mode = inode->i_mode;
2413 stat->nlink = inode->i_nlink;
2414 stat->uid = inode->i_uid;
2415 stat->gid = inode->i_gid;
2416 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2417 stat->atime = inode->i_atime;
2418 stat->mtime = inode->i_mtime;
2419 stat->ctime = inode->i_ctime;
2420 #ifdef HAVE_INODE_BLKSIZE
2421 stat->blksize = inode->i_blksize;
2423 stat->blksize = 1 << inode->i_blkbits;
2426 ll_inode_size_lock(inode, 0);
2427 stat->size = i_size_read(inode);
2428 stat->blocks = inode->i_blocks;
2429 ll_inode_size_unlock(inode, 0);
2433 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2435 struct lookup_intent it = { .it_op = IT_GETATTR };
2437 return ll_getattr_it(mnt, de, &it, stat);
2441 int lustre_check_acl(struct inode *inode, int mask)
2443 #ifdef CONFIG_FS_POSIX_ACL
2444 struct ll_inode_info *lli = ll_i2info(inode);
2445 struct posix_acl *acl;
2449 spin_lock(&lli->lli_lock);
2450 acl = posix_acl_dup(lli->lli_posix_acl);
2451 spin_unlock(&lli->lli_lock);
2456 rc = posix_acl_permission(inode, acl, mask);
2457 posix_acl_release(acl);
2465 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2466 #ifndef HAVE_INODE_PERMISION_2ARGS
2467 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2469 int ll_inode_permission(struct inode *inode, int mask)
2475 /* as root inode are NOT getting validated in lookup operation,
2476 * need to do it before permission check. */
2478 if (inode == inode->i_sb->s_root->d_inode) {
2479 struct lookup_intent it = { .it_op = IT_LOOKUP };
2481 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2482 MDS_INODELOCK_LOOKUP);
2487 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2488 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2490 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2491 return lustre_check_remote_perm(inode, mask);
2493 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2494 rc = generic_permission(inode, mask, lustre_check_acl);
2499 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2501 int mode = inode->i_mode;
2504 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2505 inode->i_ino, inode->i_generation, inode, mask);
2507 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2508 return lustre_check_remote_perm(inode, mask);
2510 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2512 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2513 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2515 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2517 if (current->fsuid == inode->i_uid) {
2520 if (((mode >> 3) & mask & S_IRWXO) != mask)
2522 rc = lustre_check_acl(inode, mask);
2526 goto check_capabilities;
2530 if (in_group_p(inode->i_gid))
2533 if ((mode & mask & S_IRWXO) == mask)
2537 if (!(mask & MAY_EXEC) ||
2538 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2539 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2542 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2543 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2550 #ifdef HAVE_FILE_READV
2551 #define READ_METHOD readv
2552 #define READ_FUNCTION ll_file_readv
2553 #define WRITE_METHOD writev
2554 #define WRITE_FUNCTION ll_file_writev
2556 #define READ_METHOD aio_read
2557 #define READ_FUNCTION ll_file_aio_read
2558 #define WRITE_METHOD aio_write
2559 #define WRITE_FUNCTION ll_file_aio_write
2562 /* -o localflock - only provides locally consistent flock locks */
2563 struct file_operations ll_file_operations = {
2564 .read = ll_file_read,
2565 .READ_METHOD = READ_FUNCTION,
2566 .write = ll_file_write,
2567 .WRITE_METHOD = WRITE_FUNCTION,
2568 .ioctl = ll_file_ioctl,
2569 .open = ll_file_open,
2570 .release = ll_file_release,
2571 .mmap = ll_file_mmap,
2572 .llseek = ll_file_seek,
2573 #ifdef HAVE_KERNEL_SENDFILE
2574 .sendfile = ll_file_sendfile,
2576 #ifdef HAVE_KERNEL_SPLICE_READ
2577 .splice_read = ll_file_splice_read,
2582 struct file_operations ll_file_operations_flock = {
2583 .read = ll_file_read,
2584 .READ_METHOD = READ_FUNCTION,
2585 .write = ll_file_write,
2586 .WRITE_METHOD = WRITE_FUNCTION,
2587 .ioctl = ll_file_ioctl,
2588 .open = ll_file_open,
2589 .release = ll_file_release,
2590 .mmap = ll_file_mmap,
2591 .llseek = ll_file_seek,
2592 #ifdef HAVE_KERNEL_SENDFILE
2593 .sendfile = ll_file_sendfile,
2595 #ifdef HAVE_KERNEL_SPLICE_READ
2596 .splice_read = ll_file_splice_read,
2599 #ifdef HAVE_F_OP_FLOCK
2600 .flock = ll_file_flock,
2602 .lock = ll_file_flock
2605 /* These are for -o noflock - to return ENOSYS on flock calls */
2606 struct file_operations ll_file_operations_noflock = {
2607 .read = ll_file_read,
2608 .READ_METHOD = READ_FUNCTION,
2609 .write = ll_file_write,
2610 .WRITE_METHOD = WRITE_FUNCTION,
2611 .ioctl = ll_file_ioctl,
2612 .open = ll_file_open,
2613 .release = ll_file_release,
2614 .mmap = ll_file_mmap,
2615 .llseek = ll_file_seek,
2616 #ifdef HAVE_KERNEL_SENDFILE
2617 .sendfile = ll_file_sendfile,
2619 #ifdef HAVE_KERNEL_SPLICE_READ
2620 .splice_read = ll_file_splice_read,
2623 #ifdef HAVE_F_OP_FLOCK
2624 .flock = ll_file_noflock,
2626 .lock = ll_file_noflock
2629 struct inode_operations ll_file_inode_operations = {
2630 #ifdef HAVE_VFS_INTENT_PATCHES
2631 .setattr_raw = ll_setattr_raw,
2633 .setattr = ll_setattr,
2634 .truncate = ll_truncate,
2635 .getattr = ll_getattr,
2636 .permission = ll_inode_permission,
2637 .setxattr = ll_setxattr,
2638 .getxattr = ll_getxattr,
2639 .listxattr = ll_listxattr,
2640 .removexattr = ll_removexattr,
2643 /* dynamic ioctl number support routins */
2644 static struct llioc_ctl_data {
2645 struct rw_semaphore ioc_sem;
2646 struct list_head ioc_head;
2648 __RWSEM_INITIALIZER(llioc.ioc_sem),
2649 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2654 struct list_head iocd_list;
2655 unsigned int iocd_size;
2656 llioc_callback_t iocd_cb;
2657 unsigned int iocd_count;
2658 unsigned int iocd_cmd[0];
2661 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2664 struct llioc_data *in_data = NULL;
2667 if (cb == NULL || cmd == NULL ||
2668 count > LLIOC_MAX_CMD || count < 0)
2671 size = sizeof(*in_data) + count * sizeof(unsigned int);
2672 OBD_ALLOC(in_data, size);
2673 if (in_data == NULL)
2676 memset(in_data, 0, sizeof(*in_data));
2677 in_data->iocd_size = size;
2678 in_data->iocd_cb = cb;
2679 in_data->iocd_count = count;
2680 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2682 down_write(&llioc.ioc_sem);
2683 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2684 up_write(&llioc.ioc_sem);
2689 void ll_iocontrol_unregister(void *magic)
2691 struct llioc_data *tmp;
2696 down_write(&llioc.ioc_sem);
2697 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2699 unsigned int size = tmp->iocd_size;
2701 list_del(&tmp->iocd_list);
2702 up_write(&llioc.ioc_sem);
2704 OBD_FREE(tmp, size);
2708 up_write(&llioc.ioc_sem);
2710 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2713 EXPORT_SYMBOL(ll_iocontrol_register);
2714 EXPORT_SYMBOL(ll_iocontrol_unregister);
2716 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2717 unsigned int cmd, unsigned long arg, int *rcp)
2719 enum llioc_iter ret = LLIOC_CONT;
2720 struct llioc_data *data;
2721 int rc = -EINVAL, i;
2723 down_read(&llioc.ioc_sem);
2724 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2725 for (i = 0; i < data->iocd_count; i++) {
2726 if (cmd != data->iocd_cmd[i])
2729 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2733 if (ret == LLIOC_STOP)
2736 up_read(&llioc.ioc_sem);