1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(exp_connect_som(ll_i2mdexp(inode))) || !S_ISREG(inode->i_mode))
96 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
98 ll_epoch_close(inode, op_data, &och, 0);
101 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
105 static int ll_close_inode_openhandle(struct obd_export *md_exp,
107 struct obd_client_handle *och)
109 struct obd_export *exp = ll_i2mdexp(inode);
110 struct md_op_data *op_data;
111 struct ptlrpc_request *req = NULL;
112 struct obd_device *obd = class_exp2obd(exp);
119 * XXX: in case of LMV, is this correct to access
122 CERROR("Invalid MDC connection handle "LPX64"\n",
123 ll_i2mdexp(inode)->exp_handle.h_cookie);
128 * here we check if this is forced umount. If so this is called on
129 * canceling "open lock" and we do not call md_close() in this case, as
130 * it will not be successful, as import is already deactivated.
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_sizeonmds_update(inode, &och->och_fh,
148 op_data->op_ioepoch);
150 CERROR("inode %lu mdc Size-on-MDS update failed: "
151 "rc = %d\n", inode->i_ino, rc);
155 CERROR("inode %lu mdc close failed: rc = %d\n",
158 ll_finish_md_op_data(op_data);
161 rc = ll_objects_destroy(req, inode);
163 CERROR("inode %lu ll_objects destroy: rc = %d\n",
170 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
171 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
172 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
174 md_clear_open_replay_data(md_exp, och);
175 /* Free @och if it is not waiting for DONE_WRITING. */
176 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
179 if (req) /* This is close request */
180 ptlrpc_req_finished(req);
184 int ll_md_real_close(struct inode *inode, int flags)
186 struct ll_inode_info *lli = ll_i2info(inode);
187 struct obd_client_handle **och_p;
188 struct obd_client_handle *och;
193 if (flags & FMODE_WRITE) {
194 och_p = &lli->lli_mds_write_och;
195 och_usecount = &lli->lli_open_fd_write_count;
196 } else if (flags & FMODE_EXEC) {
197 och_p = &lli->lli_mds_exec_och;
198 och_usecount = &lli->lli_open_fd_exec_count;
200 LASSERT(flags & FMODE_READ);
201 och_p = &lli->lli_mds_read_och;
202 och_usecount = &lli->lli_open_fd_read_count;
205 down(&lli->lli_och_sem);
206 if (*och_usecount) { /* There are still users of this handle, so
208 up(&lli->lli_och_sem);
213 up(&lli->lli_och_sem);
215 if (och) { /* There might be a race and somebody have freed this och
217 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
224 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
227 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
228 struct ll_inode_info *lli = ll_i2info(inode);
232 /* clear group lock, if present */
233 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
234 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
236 /* Let's see if we have good enough OPEN lock on the file and if
237 we can skip talking to MDS */
238 if (file->f_dentry->d_inode) { /* Can this ever be false? */
240 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
241 struct lustre_handle lockh;
242 struct inode *inode = file->f_dentry->d_inode;
243 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
245 down(&lli->lli_och_sem);
246 if (fd->fd_omode & FMODE_WRITE) {
248 LASSERT(lli->lli_open_fd_write_count);
249 lli->lli_open_fd_write_count--;
250 } else if (fd->fd_omode & FMODE_EXEC) {
252 LASSERT(lli->lli_open_fd_exec_count);
253 lli->lli_open_fd_exec_count--;
256 LASSERT(lli->lli_open_fd_read_count);
257 lli->lli_open_fd_read_count--;
259 up(&lli->lli_och_sem);
261 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
262 LDLM_IBITS, &policy, lockmode,
264 rc = ll_md_real_close(file->f_dentry->d_inode,
268 CERROR("Releasing a file %p with negative dentry %p. Name %s",
269 file, file->f_dentry, file->f_dentry->d_name.name);
272 LUSTRE_FPRIVATE(file) = NULL;
273 ll_file_data_put(fd);
274 ll_capa_close(inode);
279 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
281 /* While this returns an error code, fput() the caller does not, so we need
282 * to make every effort to clean up all of our state here. Also, applications
283 * rarely check close errors and even if an error is returned they will not
284 * re-try the close call.
286 int ll_file_release(struct inode *inode, struct file *file)
288 struct ll_file_data *fd;
289 struct ll_sb_info *sbi = ll_i2sbi(inode);
290 struct ll_inode_info *lli = ll_i2info(inode);
291 struct lov_stripe_md *lsm = lli->lli_smd;
295 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
296 inode->i_generation, inode);
298 #ifdef CONFIG_FS_POSIX_ACL
299 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
300 inode == inode->i_sb->s_root->d_inode) {
301 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
304 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
305 fd->fd_flags &= ~LL_FILE_RMTACL;
306 rct_del(&sbi->ll_rct, cfs_curproc_pid());
307 et_search_free(&sbi->ll_et, cfs_curproc_pid());
312 if (inode->i_sb->s_root != file->f_dentry)
313 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
314 fd = LUSTRE_FPRIVATE(file);
317 /* The last ref on @file, maybe not the the owner pid of statahead.
318 * Different processes can open the same dir, "ll_opendir_key" means:
319 * it is me that should stop the statahead thread. */
320 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
321 ll_stop_statahead(inode, lli->lli_opendir_key);
323 if (inode->i_sb->s_root == file->f_dentry) {
324 LUSTRE_FPRIVATE(file) = NULL;
325 ll_file_data_put(fd);
330 lov_test_and_clear_async_rc(lsm);
331 lli->lli_async_rc = 0;
333 rc = ll_md_close(sbi->ll_md_exp, inode, file);
337 static int ll_intent_file_open(struct file *file, void *lmm,
338 int lmmsize, struct lookup_intent *itp)
340 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
341 struct dentry *parent = file->f_dentry->d_parent;
342 const char *name = file->f_dentry->d_name.name;
343 const int len = file->f_dentry->d_name.len;
344 struct md_op_data *op_data;
345 struct ptlrpc_request *req;
352 /* Usually we come here only for NFSD, and we want open lock.
353 But we can also get here with pre 2.6.15 patchless kernels, and in
354 that case that lock is also ok */
355 /* We can also get here if there was cached open handle in revalidate_it
356 * but it disappeared while we were getting from there to ll_file_open.
357 * But this means this file was closed and immediatelly opened which
358 * makes a good candidate for using OPEN lock */
359 /* If lmmsize & lmm are not 0, we are just setting stripe info
360 * parameters. No need for the open lock */
361 if (!lmm && !lmmsize)
362 itp->it_flags |= MDS_OPEN_LOCK;
364 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
365 file->f_dentry->d_inode, name, len,
366 O_RDWR, LUSTRE_OPC_ANY, NULL);
368 RETURN(PTR_ERR(op_data));
370 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
371 0 /*unused */, &req, ll_md_blocking_ast, 0);
372 ll_finish_md_op_data(op_data);
374 /* reason for keep own exit path - don`t flood log
375 * with messages with -ESTALE errors.
377 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
378 it_open_error(DISP_OPEN_OPEN, itp))
380 ll_release_openhandle(file->f_dentry, itp);
384 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
385 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
386 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
390 if (itp->d.lustre.it_lock_mode)
391 md_set_lock_data(sbi->ll_md_exp,
392 &itp->d.lustre.it_lock_handle,
393 file->f_dentry->d_inode, NULL);
395 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
397 ptlrpc_req_finished(itp->d.lustre.it_data);
398 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
399 ll_intent_drop_lock(itp);
404 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
406 if (ioepoch && lli->lli_ioepoch != ioepoch) {
407 lli->lli_ioepoch = ioepoch;
408 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
409 ioepoch, PFID(&lli->lli_fid));
413 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
414 struct lookup_intent *it, struct obd_client_handle *och)
416 struct ptlrpc_request *req = it->d.lustre.it_data;
417 struct mdt_body *body;
421 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
422 LASSERT(body != NULL); /* reply already checked out */
424 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
425 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
426 och->och_fid = lli->lli_fid;
427 och->och_flags = it->it_flags;
428 ll_ioepoch_open(lli, body->ioepoch);
430 return md_set_open_replay_data(md_exp, och, req);
433 int ll_local_open(struct file *file, struct lookup_intent *it,
434 struct ll_file_data *fd, struct obd_client_handle *och)
436 struct inode *inode = file->f_dentry->d_inode;
437 struct ll_inode_info *lli = ll_i2info(inode);
440 LASSERT(!LUSTRE_FPRIVATE(file));
445 struct ptlrpc_request *req = it->d.lustre.it_data;
446 struct mdt_body *body;
449 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
453 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
454 if ((it->it_flags & FMODE_WRITE) &&
455 (body->valid & OBD_MD_FLSIZE))
456 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
457 lli->lli_ioepoch, PFID(&lli->lli_fid));
460 LUSTRE_FPRIVATE(file) = fd;
461 ll_readahead_init(inode, &fd->fd_ras);
462 fd->fd_omode = it->it_flags;
466 /* Open a file, and (for the very first open) create objects on the OSTs at
467 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
468 * creation or open until ll_lov_setstripe() ioctl is called. We grab
469 * lli_open_sem to ensure no other process will create objects, send the
470 * stripe MD to the MDS, or try to destroy the objects if that fails.
472 * If we already have the stripe MD locally then we don't request it in
473 * md_open(), by passing a lmm_size = 0.
475 * It is up to the application to ensure no other processes open this file
476 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
477 * used. We might be able to avoid races of that sort by getting lli_open_sem
478 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
479 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
481 int ll_file_open(struct inode *inode, struct file *file)
483 struct ll_inode_info *lli = ll_i2info(inode);
484 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
485 .it_flags = file->f_flags };
486 struct lov_stripe_md *lsm;
487 struct ptlrpc_request *req = NULL;
488 struct obd_client_handle **och_p;
490 struct ll_file_data *fd;
491 int rc = 0, opendir_set = 0;
494 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
495 inode->i_generation, inode, file->f_flags);
497 #ifdef HAVE_VFS_INTENT_PATCHES
500 it = file->private_data; /* XXX: compat macro */
501 file->private_data = NULL; /* prevent ll_local_open assertion */
504 fd = ll_file_data_get();
509 if (S_ISDIR(inode->i_mode)) {
510 spin_lock(&lli->lli_lock);
511 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
512 LASSERT(lli->lli_sai == NULL);
513 lli->lli_opendir_key = fd;
514 lli->lli_opendir_pid = cfs_curproc_pid();
517 spin_unlock(&lli->lli_lock);
520 if (inode->i_sb->s_root == file->f_dentry) {
521 LUSTRE_FPRIVATE(file) = fd;
525 if (!it || !it->d.lustre.it_disposition) {
526 /* Convert f_flags into access mode. We cannot use file->f_mode,
527 * because everything but O_ACCMODE mask was stripped from
529 if ((oit.it_flags + 1) & O_ACCMODE)
531 if (file->f_flags & O_TRUNC)
532 oit.it_flags |= FMODE_WRITE;
534 /* kernel only call f_op->open in dentry_open. filp_open calls
535 * dentry_open after call to open_namei that checks permissions.
536 * Only nfsd_open call dentry_open directly without checking
537 * permissions and because of that this code below is safe. */
538 if (oit.it_flags & FMODE_WRITE)
539 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
541 /* We do not want O_EXCL here, presumably we opened the file
542 * already? XXX - NFS implications? */
543 oit.it_flags &= ~O_EXCL;
549 /* Let's see if we have file open on MDS already. */
550 if (it->it_flags & FMODE_WRITE) {
551 och_p = &lli->lli_mds_write_och;
552 och_usecount = &lli->lli_open_fd_write_count;
553 } else if (it->it_flags & FMODE_EXEC) {
554 och_p = &lli->lli_mds_exec_och;
555 och_usecount = &lli->lli_open_fd_exec_count;
557 och_p = &lli->lli_mds_read_och;
558 och_usecount = &lli->lli_open_fd_read_count;
561 down(&lli->lli_och_sem);
562 if (*och_p) { /* Open handle is present */
563 if (it_disposition(it, DISP_OPEN_OPEN)) {
564 /* Well, there's extra open request that we do not need,
565 let's close it somehow. This will decref request. */
566 rc = it_open_error(DISP_OPEN_OPEN, it);
568 up(&lli->lli_och_sem);
569 ll_file_data_put(fd);
570 GOTO(out_openerr, rc);
572 ll_release_openhandle(file->f_dentry, it);
573 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
578 rc = ll_local_open(file, it, fd, NULL);
581 up(&lli->lli_och_sem);
582 ll_file_data_put(fd);
583 GOTO(out_openerr, rc);
586 LASSERT(*och_usecount == 0);
587 if (!it->d.lustre.it_disposition) {
588 /* We cannot just request lock handle now, new ELC code
589 means that one of other OPEN locks for this file
590 could be cancelled, and since blocking ast handler
591 would attempt to grab och_sem as well, that would
592 result in a deadlock */
593 up(&lli->lli_och_sem);
594 it->it_create_mode |= M_CHECK_STALE;
595 rc = ll_intent_file_open(file, NULL, 0, it);
596 it->it_create_mode &= ~M_CHECK_STALE;
598 ll_file_data_put(fd);
599 GOTO(out_openerr, rc);
602 /* Got some error? Release the request */
603 if (it->d.lustre.it_status < 0) {
604 req = it->d.lustre.it_data;
605 ptlrpc_req_finished(req);
607 md_set_lock_data(ll_i2sbi(inode)->ll_md_exp,
608 &it->d.lustre.it_lock_handle,
609 file->f_dentry->d_inode, NULL);
612 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
614 ll_file_data_put(fd);
615 GOTO(out_och_free, rc = -ENOMEM);
618 req = it->d.lustre.it_data;
620 /* md_intent_lock() didn't get a request ref if there was an
621 * open error, so don't do cleanup on the request here
623 /* XXX (green): Should not we bail out on any error here, not
624 * just open error? */
625 rc = it_open_error(DISP_OPEN_OPEN, it);
627 ll_file_data_put(fd);
628 GOTO(out_och_free, rc);
631 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
632 rc = ll_local_open(file, it, fd, *och_p);
634 ll_file_data_put(fd);
635 GOTO(out_och_free, rc);
638 up(&lli->lli_och_sem);
640 /* Must do this outside lli_och_sem lock to prevent deadlock where
641 different kind of OPEN lock for this same inode gets cancelled
642 by ldlm_cancel_lru */
643 if (!S_ISREG(inode->i_mode))
650 if (file->f_flags & O_LOV_DELAY_CREATE ||
651 !(file->f_mode & FMODE_WRITE)) {
652 CDEBUG(D_INODE, "object creation was delayed\n");
656 file->f_flags &= ~O_LOV_DELAY_CREATE;
659 ptlrpc_req_finished(req);
661 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
665 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
666 *och_p = NULL; /* OBD_FREE writes some magic there */
669 up(&lli->lli_och_sem);
671 if (opendir_set != 0)
672 ll_stop_statahead(inode, lli->lli_opendir_key);
678 /* Fills the obdo with the attributes for the lsm */
679 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
680 struct obd_capa *capa, struct obdo *obdo)
682 struct ptlrpc_request_set *set;
683 struct obd_info oinfo = { { { 0 } } };
688 LASSERT(lsm != NULL);
692 oinfo.oi_oa->o_id = lsm->lsm_object_id;
693 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
694 oinfo.oi_oa->o_mode = S_IFREG;
695 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
696 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
697 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
698 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
700 oinfo.oi_capa = capa;
702 set = ptlrpc_prep_set();
704 CERROR("can't allocate ptlrpc set\n");
707 rc = obd_getattr_async(exp, &oinfo, set);
709 rc = ptlrpc_set_wait(set);
710 ptlrpc_set_destroy(set);
713 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
714 OBD_MD_FLATIME | OBD_MD_FLMTIME |
715 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
719 /* Fills the obdo with the attributes for the inode defined by lsm */
720 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
722 struct ll_inode_info *lli = ll_i2info(inode);
723 struct obd_capa *capa = ll_mdscapa_get(inode);
727 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
730 obdo_refresh_inode(inode, obdo, obdo->o_valid);
732 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
733 lli->lli_smd->lsm_object_id, i_size_read(inode),
734 (unsigned long long)inode->i_blocks,
735 (unsigned long)ll_inode_blksize(inode));
740 int ll_merge_lvb(struct inode *inode)
742 struct ll_inode_info *lli = ll_i2info(inode);
743 struct ll_sb_info *sbi = ll_i2sbi(inode);
749 ll_inode_size_lock(inode, 1);
750 inode_init_lvb(inode, &lvb);
751 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
752 i_size_write(inode, lvb.lvb_size);
753 inode->i_blocks = lvb.lvb_blocks;
755 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
756 LTIME_S(inode->i_atime) = lvb.lvb_atime;
757 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
758 ll_inode_size_unlock(inode, 1);
763 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
766 struct obdo obdo = { 0 };
769 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
771 st->st_size = obdo.o_size;
772 st->st_blocks = obdo.o_blocks;
773 st->st_mtime = obdo.o_mtime;
774 st->st_atime = obdo.o_atime;
775 st->st_ctime = obdo.o_ctime;
780 void ll_io_init(struct cl_io *io, const struct file *file, int write)
782 struct inode *inode = file->f_dentry->d_inode;
783 struct ll_sb_info *sbi = ll_i2sbi(inode);
784 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
787 memset(io, 0, sizeof *io);
788 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
790 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
791 io->ci_obj = ll_i2info(inode)->lli_clob;
792 io->ci_lockreq = CILR_MAYBE;
793 if (fd->fd_flags & LL_FILE_IGNORE_LOCK ||
794 sbi->ll_flags & LL_SBI_NOLCK) {
795 io->ci_lockreq = CILR_NEVER;
796 io->ci_no_srvlock = 1;
797 } else if (file->f_flags & O_APPEND) {
798 io->ci_lockreq = CILR_MANDATORY;
802 static ssize_t ll_file_io_generic(const struct lu_env *env,
803 struct ccc_io_args *args, struct file *file,
804 enum cl_io_type iot, loff_t *ppos, size_t count)
810 io = &ccc_env_info(env)->cti_io;
811 ll_io_init(io, file, iot == CIT_WRITE);
814 io->u.ci_rd.rd_is_sendfile = args->cia_is_sendfile;
816 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
817 struct vvp_io *vio = vvp_env_io(env);
818 struct ccc_io *cio = ccc_env_io(env);
819 if (cl_io_is_sendfile(io)) {
820 vio->u.read.cui_actor = args->cia_actor;
821 vio->u.read.cui_target = args->cia_target;
823 cio->cui_iov = args->cia_iov;
824 cio->cui_nrsegs = args->cia_nrsegs;
825 #ifndef HAVE_FILE_WRITEV
826 cio->cui_iocb = args->cia_iocb;
829 cio->cui_fd = LUSTRE_FPRIVATE(file);
830 result = cl_io_loop(env, io);
832 /* cl_io_rw_init() handled IO */
833 result = io->ci_result;
834 if (io->ci_nob > 0) {
836 *ppos = io->u.ci_wr.wr.crw_pos;
844 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
846 static int ll_file_get_iov_count(const struct iovec *iov,
847 unsigned long *nr_segs, size_t *count)
852 for (seg = 0; seg < *nr_segs; seg++) {
853 const struct iovec *iv = &iov[seg];
856 * If any segment has a negative length, or the cumulative
857 * length ever wraps negative then return -EINVAL.
860 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
862 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
867 cnt -= iv->iov_len; /* This segment is no good */
874 #ifdef HAVE_FILE_READV
875 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
876 unsigned long nr_segs, loff_t *ppos)
879 struct ccc_io_args *args;
885 result = ll_file_get_iov_count(iov, &nr_segs, &count);
889 env = cl_env_get(&refcheck);
891 RETURN(PTR_ERR(env));
893 args = &vvp_env_info(env)->vti_args;
894 args->cia_is_sendfile = 0;
895 args->cia_iov = (struct iovec *)iov;
896 args->cia_nrsegs = nr_segs;
897 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
898 cl_env_put(env, &refcheck);
902 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
906 struct iovec *local_iov;
911 env = cl_env_get(&refcheck);
913 RETURN(PTR_ERR(env));
915 local_iov = &vvp_env_info(env)->vti_local_iov;
916 local_iov->iov_base = (void __user *)buf;
917 local_iov->iov_len = count;
918 result = ll_file_readv(file, local_iov, 1, ppos);
919 cl_env_put(env, &refcheck);
924 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
925 unsigned long nr_segs, loff_t pos)
928 struct ccc_io_args *args;
934 result = ll_file_get_iov_count(iov, &nr_segs, &count);
938 env = cl_env_get(&refcheck);
940 RETURN(PTR_ERR(env));
942 args = &vvp_env_info(env)->vti_args;
943 args->cia_is_sendfile = 0;
944 args->cia_iov = (struct iovec *)iov;
945 args->cia_nrsegs = nr_segs;
946 args->cia_iocb = iocb;
947 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
948 &iocb->ki_pos, count);
949 cl_env_put(env, &refcheck);
953 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
957 struct iovec *local_iov;
963 env = cl_env_get(&refcheck);
965 RETURN(PTR_ERR(env));
967 local_iov = &vvp_env_info(env)->vti_local_iov;
968 kiocb = &vvp_env_info(env)->vti_kiocb;
969 local_iov->iov_base = (void __user *)buf;
970 local_iov->iov_len = count;
971 init_sync_kiocb(kiocb, file);
972 kiocb->ki_pos = *ppos;
973 kiocb->ki_left = count;
975 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
976 *ppos = kiocb->ki_pos;
978 cl_env_put(env, &refcheck);
984 * Write to a file (through the page cache).
986 #ifdef HAVE_FILE_WRITEV
987 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
988 unsigned long nr_segs, loff_t *ppos)
991 struct ccc_io_args *args;
997 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1001 env = cl_env_get(&refcheck);
1003 RETURN(PTR_ERR(env));
1005 args = &vvp_env_info(env)->vti_args;
1006 args->cia_iov = (struct iovec *)iov;
1007 args->cia_nrsegs = nr_segs;
1008 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1009 cl_env_put(env, &refcheck);
1013 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1017 struct iovec *local_iov;
1022 env = cl_env_get(&refcheck);
1024 RETURN(PTR_ERR(env));
1026 local_iov = &vvp_env_info(env)->vti_local_iov;
1027 local_iov->iov_base = (void __user *)buf;
1028 local_iov->iov_len = count;
1030 result = ll_file_writev(file, local_iov, 1, ppos);
1031 cl_env_put(env, &refcheck);
1035 #else /* AIO stuff */
1036 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1037 unsigned long nr_segs, loff_t pos)
1040 struct ccc_io_args *args;
1046 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1050 env = cl_env_get(&refcheck);
1052 RETURN(PTR_ERR(env));
1054 args = &vvp_env_info(env)->vti_args;
1055 args->cia_iov = (struct iovec *)iov;
1056 args->cia_nrsegs = nr_segs;
1057 args->cia_iocb = iocb;
1058 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1059 &iocb->ki_pos, count);
1060 cl_env_put(env, &refcheck);
1064 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1068 struct iovec *local_iov;
1069 struct kiocb *kiocb;
1074 env = cl_env_get(&refcheck);
1076 RETURN(PTR_ERR(env));
1078 local_iov = &vvp_env_info(env)->vti_local_iov;
1079 kiocb = &vvp_env_info(env)->vti_kiocb;
1080 local_iov->iov_base = (void __user *)buf;
1081 local_iov->iov_len = count;
1082 init_sync_kiocb(kiocb, file);
1083 kiocb->ki_pos = *ppos;
1084 kiocb->ki_left = count;
1086 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1087 *ppos = kiocb->ki_pos;
1089 cl_env_put(env, &refcheck);
1096 * Send file content (through pagecache) somewhere with helper
1098 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1099 read_actor_t actor, void *target)
1102 struct ccc_io_args *args;
1107 env = cl_env_get(&refcheck);
1109 RETURN(PTR_ERR(env));
1111 args = &vvp_env_info(env)->vti_args;
1112 args->cia_is_sendfile = 1;
1113 args->cia_target = target;
1114 args->cia_actor = actor;
1115 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1116 cl_env_put(env, &refcheck);
1120 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1123 struct obd_export *exp = ll_i2dtexp(inode);
1124 struct ll_recreate_obj ucreatp;
1125 struct obd_trans_info oti = { 0 };
1126 struct obdo *oa = NULL;
1129 struct lov_stripe_md *lsm, *lsm2;
1132 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1135 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1136 sizeof(struct ll_recreate_obj)))
1143 ll_inode_size_lock(inode, 0);
1144 lsm = ll_i2info(inode)->lli_smd;
1146 GOTO(out, rc = -ENOENT);
1147 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1148 (lsm->lsm_stripe_count));
1150 OBD_ALLOC(lsm2, lsm_size);
1152 GOTO(out, rc = -ENOMEM);
1154 oa->o_id = ucreatp.lrc_id;
1155 oa->o_gr = ucreatp.lrc_group;
1156 oa->o_nlink = ucreatp.lrc_ost_idx;
1157 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1158 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1159 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1160 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1162 memcpy(lsm2, lsm, lsm_size);
1163 rc = obd_create(exp, oa, &lsm2, &oti);
1165 OBD_FREE(lsm2, lsm_size);
1168 ll_inode_size_unlock(inode, 0);
1173 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1174 int flags, struct lov_user_md *lum, int lum_size)
1176 struct lov_stripe_md *lsm;
1177 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1181 ll_inode_size_lock(inode, 0);
1182 lsm = ll_i2info(inode)->lli_smd;
1184 ll_inode_size_unlock(inode, 0);
1185 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1190 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1193 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1194 GOTO(out_req_free, rc = -ENOENT);
1195 rc = oit.d.lustre.it_status;
1197 GOTO(out_req_free, rc);
1199 ll_release_openhandle(file->f_dentry, &oit);
1202 ll_inode_size_unlock(inode, 0);
1203 ll_intent_release(&oit);
1206 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1210 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1211 struct lov_mds_md **lmmp, int *lmm_size,
1212 struct ptlrpc_request **request)
1214 struct ll_sb_info *sbi = ll_i2sbi(inode);
1215 struct mdt_body *body;
1216 struct lov_mds_md *lmm = NULL;
1217 struct ptlrpc_request *req = NULL;
1218 struct obd_capa *oc;
1221 rc = ll_get_max_mdsize(sbi, &lmmsize);
1225 oc = ll_mdscapa_get(inode);
1226 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1227 oc, filename, strlen(filename) + 1,
1228 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1229 ll_i2suppgid(inode), &req);
1232 CDEBUG(D_INFO, "md_getattr_name failed "
1233 "on %s: rc %d\n", filename, rc);
1237 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1238 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1240 lmmsize = body->eadatasize;
1242 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1244 GOTO(out, rc = -ENODATA);
1247 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1248 LASSERT(lmm != NULL);
1250 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1251 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
1252 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
1253 GOTO(out, rc = -EPROTO);
1257 * This is coming from the MDS, so is probably in
1258 * little endian. We convert it to host endian before
1259 * passing it to userspace.
1261 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1262 /* if function called for directory - we should
1263 * avoid swab not existent lsm objects */
1264 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1265 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1266 if (S_ISREG(body->mode))
1267 lustre_swab_lov_user_md_objects(
1268 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1269 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1270 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1271 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1272 if (S_ISREG(body->mode))
1273 lustre_swab_lov_user_md_objects(
1274 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1275 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1276 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
1277 lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
1281 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
1282 struct lov_stripe_md *lsm;
1283 struct lov_user_md_join *lmj;
1284 int lmj_size, i, aindex = 0;
1286 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1288 GOTO(out, rc = -ENOMEM);
1289 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm);
1291 GOTO(out_free_memmd, rc);
1293 lmj_size = sizeof(struct lov_user_md_join) +
1294 lsm->lsm_stripe_count *
1295 sizeof(struct lov_user_ost_data_join);
1296 OBD_ALLOC(lmj, lmj_size);
1298 GOTO(out_free_memmd, rc = -ENOMEM);
1300 memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
1301 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1302 struct lov_extent *lex =
1303 &lsm->lsm_array->lai_ext_array[aindex];
1305 if (lex->le_loi_idx + lex->le_stripe_count <= i)
1307 CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
1308 LPU64" len %d\n", aindex, i,
1309 lex->le_start, (int)lex->le_len);
1310 lmj->lmm_objects[i].l_extent_start =
1313 if ((int)lex->le_len == -1)
1314 lmj->lmm_objects[i].l_extent_end = -1;
1316 lmj->lmm_objects[i].l_extent_end =
1317 lex->le_start + lex->le_len;
1318 lmj->lmm_objects[i].l_object_id =
1319 lsm->lsm_oinfo[i]->loi_id;
1320 lmj->lmm_objects[i].l_object_gr =
1321 lsm->lsm_oinfo[i]->loi_gr;
1322 lmj->lmm_objects[i].l_ost_gen =
1323 lsm->lsm_oinfo[i]->loi_ost_gen;
1324 lmj->lmm_objects[i].l_ost_idx =
1325 lsm->lsm_oinfo[i]->loi_ost_idx;
1327 lmm = (struct lov_mds_md *)lmj;
1330 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1334 *lmm_size = lmmsize;
1339 static int ll_lov_setea(struct inode *inode, struct file *file,
1342 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1343 struct lov_user_md *lump;
1344 int lum_size = sizeof(struct lov_user_md) +
1345 sizeof(struct lov_user_ost_data);
1349 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1352 OBD_ALLOC(lump, lum_size);
1356 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1357 OBD_FREE(lump, lum_size);
1361 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1363 OBD_FREE(lump, lum_size);
1367 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1370 struct lov_user_md_v3 lumv3;
1371 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1372 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1373 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1376 int flags = FMODE_WRITE;
1379 /* first try with v1 which is smaller than v3 */
1380 lum_size = sizeof(struct lov_user_md_v1);
1381 if (copy_from_user(lumv1, lumv1p, lum_size))
1384 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1385 lum_size = sizeof(struct lov_user_md_v3);
1386 if (copy_from_user(&lumv3, lumv3p, lum_size))
1390 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1392 put_user(0, &lumv1p->lmm_stripe_count);
1393 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1394 0, ll_i2info(inode)->lli_smd,
1400 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1402 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1407 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1411 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1413 struct ll_inode_info *lli = ll_i2info(inode);
1414 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1415 struct ccc_grouplock grouplock;
1419 spin_lock(&lli->lli_lock);
1420 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1421 CERROR("group lock already existed with gid %lu\n",
1422 fd->fd_grouplock.cg_gid);
1423 spin_unlock(&lli->lli_lock);
1426 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1427 spin_unlock(&lli->lli_lock);
1429 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1430 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1434 spin_lock(&lli->lli_lock);
1435 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1436 spin_unlock(&lli->lli_lock);
1437 CERROR("another thread just won the race\n");
1438 cl_put_grouplock(&grouplock);
1442 fd->fd_flags |= (LL_FILE_GROUP_LOCKED | LL_FILE_IGNORE_LOCK);
1443 fd->fd_grouplock = grouplock;
1444 spin_unlock(&lli->lli_lock);
1446 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1450 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1452 struct ll_inode_info *lli = ll_i2info(inode);
1453 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1454 struct ccc_grouplock grouplock;
1457 spin_lock(&lli->lli_lock);
1458 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1459 spin_unlock(&lli->lli_lock);
1460 CERROR("no group lock held\n");
1463 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1465 if (fd->fd_grouplock.cg_gid != arg) {
1466 CERROR("group lock %lu doesn't match current id %lu\n",
1467 arg, fd->fd_grouplock.cg_gid);
1468 spin_unlock(&lli->lli_lock);
1472 grouplock = fd->fd_grouplock;
1473 fd->fd_grouplock.cg_env = NULL;
1474 fd->fd_grouplock.cg_lock = NULL;
1475 fd->fd_grouplock.cg_gid = 0;
1476 fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED | LL_FILE_IGNORE_LOCK);
1477 spin_unlock(&lli->lli_lock);
1479 cl_put_grouplock(&grouplock);
1480 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1484 #if LUSTRE_FIX >= 50
1485 static int join_sanity_check(struct inode *head, struct inode *tail)
1488 if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
1489 CERROR("server do not support join \n");
1492 if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
1493 CERROR("tail ino %lu and ino head %lu must be regular\n",
1494 head->i_ino, tail->i_ino);
1497 if (head->i_ino == tail->i_ino) {
1498 CERROR("file %lu can not be joined to itself \n", head->i_ino);
1501 if (i_size_read(head) % JOIN_FILE_ALIGN) {
1502 CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
1508 static int join_file(struct inode *head_inode, struct file *head_filp,
1509 struct file *tail_filp)
1511 struct dentry *tail_dentry = tail_filp->f_dentry;
1512 struct lookup_intent oit = {.it_op = IT_OPEN,
1513 .it_flags = head_filp->f_flags,
1514 .it_create_mode = M_JOIN_FILE};
1515 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW,
1516 ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1518 struct lustre_handle lockh;
1519 struct md_op_data *op_data;
1524 tail_dentry = tail_filp->f_dentry;
1526 data = i_size_read(head_inode);
1527 op_data = ll_prep_md_op_data(NULL, head_inode,
1528 tail_dentry->d_parent->d_inode,
1529 tail_dentry->d_name.name,
1530 tail_dentry->d_name.len, 0,
1531 LUSTRE_OPC_ANY, &data);
1532 if (IS_ERR(op_data))
1533 RETURN(PTR_ERR(op_data));
1535 rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit,
1536 op_data, &lockh, NULL, 0, NULL, 0);
1538 ll_finish_md_op_data(op_data);
1542 rc = oit.d.lustre.it_status;
1544 if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
1545 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
1546 ptlrpc_req_finished((struct ptlrpc_request *)
1547 oit.d.lustre.it_data);
1551 if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
1553 ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
1554 oit.d.lustre.it_lock_mode = 0;
1556 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1557 it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
1558 ll_release_openhandle(head_filp->f_dentry, &oit);
1560 ll_intent_release(&oit);
1564 static int ll_file_join(struct inode *head, struct file *filp,
1565 char *filename_tail)
1567 struct inode *tail = NULL, *first = NULL, *second = NULL;
1568 struct dentry *tail_dentry;
1569 struct file *tail_filp, *first_filp, *second_filp;
1570 struct ll_lock_tree first_tree, second_tree;
1571 struct ll_lock_tree_node *first_node, *second_node;
1572 struct ll_inode_info *hlli = ll_i2info(head);
1573 int rc = 0, cleanup_phase = 0;
1576 CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
1577 head->i_ino, head->i_generation, head, filename_tail);
1579 tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
1580 if (IS_ERR(tail_filp)) {
1581 CERROR("Can not open tail file %s", filename_tail);
1582 rc = PTR_ERR(tail_filp);
1585 tail = igrab(tail_filp->f_dentry->d_inode);
1587 tail_dentry = tail_filp->f_dentry;
1588 LASSERT(tail_dentry);
1591 /*reorder the inode for lock sequence*/
1592 first = head->i_ino > tail->i_ino ? head : tail;
1593 second = head->i_ino > tail->i_ino ? tail : head;
1594 first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
1595 second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
1597 CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
1598 head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
1599 first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
1600 if (IS_ERR(first_node)){
1601 rc = PTR_ERR(first_node);
1604 first_tree.lt_fd = first_filp->private_data;
1605 rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
1610 second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
1611 if (IS_ERR(second_node)){
1612 rc = PTR_ERR(second_node);
1615 second_tree.lt_fd = second_filp->private_data;
1616 rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
1621 rc = join_sanity_check(head, tail);
1625 rc = join_file(head, filp, tail_filp);
1629 switch (cleanup_phase) {
1631 ll_tree_unlock(&second_tree);
1632 obd_cancel_unused(ll_i2dtexp(second),
1633 ll_i2info(second)->lli_smd, 0, NULL);
1635 ll_tree_unlock(&first_tree);
1636 obd_cancel_unused(ll_i2dtexp(first),
1637 ll_i2info(first)->lli_smd, 0, NULL);
1639 filp_close(tail_filp, 0);
1642 if (head && rc == 0) {
1643 obd_free_memmd(ll_i2sbi(head)->ll_dt_exp,
1645 hlli->lli_smd = NULL;
1650 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
1655 #endif /* LUSTRE_FIX >= 50 */
1658 * Close inode open handle
1660 * \param dentry [in] dentry which contains the inode
1661 * \param it [in,out] intent which contains open info and result
1664 * \retval <0 failure
1666 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1668 struct inode *inode = dentry->d_inode;
1669 struct obd_client_handle *och;
1675 /* Root ? Do nothing. */
1676 if (dentry->d_inode->i_sb->s_root == dentry)
1679 /* No open handle to close? Move away */
1680 if (!it_disposition(it, DISP_OPEN_OPEN))
1683 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1685 OBD_ALLOC(och, sizeof(*och));
1687 GOTO(out, rc = -ENOMEM);
1689 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1690 ll_i2info(inode), it, och);
1692 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1695 /* this one is in place of ll_file_open */
1696 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1697 ptlrpc_req_finished(it->d.lustre.it_data);
1698 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1703 * Get size for inode for which FIEMAP mapping is requested.
1704 * Make the FIEMAP get_info call and returns the result.
1706 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1709 struct obd_export *exp = ll_i2dtexp(inode);
1710 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1711 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1712 int vallen = num_bytes;
1716 /* If the stripe_count > 1 and the application does not understand
1717 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1719 if (lsm->lsm_stripe_count > 1 &&
1720 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1723 fm_key.oa.o_id = lsm->lsm_object_id;
1724 fm_key.oa.o_gr = lsm->lsm_object_gr;
1725 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1727 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1730 /* If filesize is 0, then there would be no objects for mapping */
1731 if (fm_key.oa.o_size == 0) {
1732 fiemap->fm_mapped_extents = 0;
1736 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1738 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1740 CERROR("obd_get_info failed: rc = %d\n", rc);
1745 int ll_fid2path(struct obd_export *exp, void *arg)
1747 struct getinfo_fid2path *gfout, *gfin;
1751 /* Need to get the buflen */
1752 OBD_ALLOC_PTR(gfin);
1755 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1760 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1761 OBD_ALLOC(gfout, outsize);
1762 if (gfout == NULL) {
1766 memcpy(gfout, gfin, sizeof(*gfout));
1769 /* Call mdc_iocontrol */
1770 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1773 if (copy_to_user(arg, gfout, outsize))
1777 OBD_FREE(gfout, outsize);
1781 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1784 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1788 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1789 inode->i_generation, inode, cmd);
1790 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1792 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1793 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1797 case LL_IOC_GETFLAGS:
1798 /* Get the current value of the file flags */
1799 return put_user(fd->fd_flags, (int *)arg);
1800 case LL_IOC_SETFLAGS:
1801 case LL_IOC_CLRFLAGS:
1802 /* Set or clear specific file flags */
1803 /* XXX This probably needs checks to ensure the flags are
1804 * not abused, and to handle any flag side effects.
1806 if (get_user(flags, (int *) arg))
1809 if (cmd == LL_IOC_SETFLAGS) {
1810 if ((flags & LL_FILE_IGNORE_LOCK) &&
1811 !(file->f_flags & O_DIRECT)) {
1812 CERROR("%s: unable to disable locking on "
1813 "non-O_DIRECT file\n", current->comm);
1817 fd->fd_flags |= flags;
1819 fd->fd_flags &= ~flags;
1822 case LL_IOC_LOV_SETSTRIPE:
1823 RETURN(ll_lov_setstripe(inode, file, arg));
1824 case LL_IOC_LOV_SETEA:
1825 RETURN(ll_lov_setea(inode, file, arg));
1826 case LL_IOC_LOV_GETSTRIPE:
1827 RETURN(ll_lov_getstripe(inode, arg));
1828 case LL_IOC_RECREATE_OBJ:
1829 RETURN(ll_lov_recreate_obj(inode, file, arg));
1830 case FSFILT_IOC_FIEMAP: {
1831 struct ll_user_fiemap *fiemap_s;
1832 size_t num_bytes, ret_bytes;
1833 unsigned int extent_count;
1836 /* Get the extent count so we can calculate the size of
1837 * required fiemap buffer */
1838 if (get_user(extent_count,
1839 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1841 num_bytes = sizeof(*fiemap_s) + (extent_count *
1842 sizeof(struct ll_fiemap_extent));
1843 OBD_VMALLOC(fiemap_s, num_bytes);
1844 if (fiemap_s == NULL)
1847 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1849 GOTO(error, rc = -EFAULT);
1851 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1852 fiemap_s->fm_flags = fiemap_s->fm_flags &
1853 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1854 if (copy_to_user((char *)arg, fiemap_s,
1856 GOTO(error, rc = -EFAULT);
1858 GOTO(error, rc = -EBADR);
1861 /* If fm_extent_count is non-zero, read the first extent since
1862 * it is used to calculate end_offset and device from previous
1865 if (copy_from_user(&fiemap_s->fm_extents[0],
1866 (char __user *)arg + sizeof(*fiemap_s),
1867 sizeof(struct ll_fiemap_extent)))
1868 GOTO(error, rc = -EFAULT);
1871 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1874 rc = filemap_fdatawrite(inode->i_mapping);
1879 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1883 ret_bytes = sizeof(struct ll_user_fiemap);
1885 if (extent_count != 0)
1886 ret_bytes += (fiemap_s->fm_mapped_extents *
1887 sizeof(struct ll_fiemap_extent));
1889 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1893 OBD_VFREE(fiemap_s, num_bytes);
1896 case FSFILT_IOC_GETFLAGS:
1897 case FSFILT_IOC_SETFLAGS:
1898 RETURN(ll_iocontrol(inode, file, cmd, arg));
1899 case FSFILT_IOC_GETVERSION_OLD:
1900 case FSFILT_IOC_GETVERSION:
1901 RETURN(put_user(inode->i_generation, (int *)arg));
1903 #if LUSTRE_FIX >= 50
1904 /* Allow file join in beta builds to allow debuggging */
1908 ftail = getname((const char *)arg);
1910 RETURN(PTR_ERR(ftail));
1911 rc = ll_file_join(inode, file, ftail);
1915 CWARN("file join is not supported in this version of Lustre\n");
1919 case LL_IOC_GROUP_LOCK:
1920 RETURN(ll_get_grouplock(inode, file, arg));
1921 case LL_IOC_GROUP_UNLOCK:
1922 RETURN(ll_put_grouplock(inode, file, arg));
1923 case IOC_OBD_STATFS:
1924 RETURN(ll_obd_statfs(inode, (void *)arg));
1926 /* We need to special case any other ioctls we want to handle,
1927 * to send them to the MDS/OST as appropriate and to properly
1928 * network encode the arg field.
1929 case FSFILT_IOC_SETVERSION_OLD:
1930 case FSFILT_IOC_SETVERSION:
1932 case LL_IOC_FLUSHCTX:
1933 RETURN(ll_flush_ctx(inode));
1934 case LL_IOC_PATH2FID: {
1935 if (copy_to_user((void *)arg, ll_inode2fid(inode),
1936 sizeof(struct lu_fid)))
1941 case OBD_IOC_FID2PATH:
1942 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1948 ll_iocontrol_call(inode, file, cmd, arg, &err))
1951 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1957 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1959 struct inode *inode = file->f_dentry->d_inode;
1962 retval = offset + ((origin == 2) ? i_size_read(inode) :
1963 (origin == 1) ? file->f_pos : 0);
1964 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1965 inode->i_ino, inode->i_generation, inode, retval, retval,
1966 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1967 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1969 if (origin == 2) { /* SEEK_END */
1970 int nonblock = 0, rc;
1972 if (file->f_flags & O_NONBLOCK)
1973 nonblock = LDLM_FL_BLOCK_NOWAIT;
1975 rc = cl_glimpse_size(inode);
1979 ll_inode_size_lock(inode, 0);
1980 offset += i_size_read(inode);
1981 ll_inode_size_unlock(inode, 0);
1982 } else if (origin == 1) { /* SEEK_CUR */
1983 offset += file->f_pos;
1987 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1988 if (offset != file->f_pos) {
1989 file->f_pos = offset;
1997 int ll_fsync(struct file *file, struct dentry *dentry, int data)
1999 struct inode *inode = dentry->d_inode;
2000 struct ll_inode_info *lli = ll_i2info(inode);
2001 struct lov_stripe_md *lsm = lli->lli_smd;
2002 struct ptlrpc_request *req;
2003 struct obd_capa *oc;
2006 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2007 inode->i_generation, inode);
2008 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2010 /* fsync's caller has already called _fdata{sync,write}, we want
2011 * that IO to finish before calling the osc and mdc sync methods */
2012 rc = filemap_fdatawait(inode->i_mapping);
2014 /* catch async errors that were recorded back when async writeback
2015 * failed for pages in this mapping. */
2016 err = lli->lli_async_rc;
2017 lli->lli_async_rc = 0;
2021 err = lov_test_and_clear_async_rc(lsm);
2026 oc = ll_mdscapa_get(inode);
2027 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2033 ptlrpc_req_finished(req);
2040 RETURN(rc ? rc : -ENOMEM);
2042 oa->o_id = lsm->lsm_object_id;
2043 oa->o_gr = lsm->lsm_object_gr;
2044 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2045 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
2046 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
2049 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2050 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
2051 0, OBD_OBJECT_EOF, oc);
2061 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2063 struct inode *inode = file->f_dentry->d_inode;
2064 struct ll_sb_info *sbi = ll_i2sbi(inode);
2065 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2066 .ei_cb_cp =ldlm_flock_completion_ast,
2067 .ei_cbdata = file_lock };
2068 struct md_op_data *op_data;
2069 struct lustre_handle lockh = {0};
2070 ldlm_policy_data_t flock;
2075 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2076 inode->i_ino, file_lock);
2078 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2080 if (file_lock->fl_flags & FL_FLOCK) {
2081 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2082 /* set missing params for flock() calls */
2083 file_lock->fl_end = OFFSET_MAX;
2084 file_lock->fl_pid = current->tgid;
2086 flock.l_flock.pid = file_lock->fl_pid;
2087 flock.l_flock.start = file_lock->fl_start;
2088 flock.l_flock.end = file_lock->fl_end;
2090 switch (file_lock->fl_type) {
2092 einfo.ei_mode = LCK_PR;
2095 /* An unlock request may or may not have any relation to
2096 * existing locks so we may not be able to pass a lock handle
2097 * via a normal ldlm_lock_cancel() request. The request may even
2098 * unlock a byte range in the middle of an existing lock. In
2099 * order to process an unlock request we need all of the same
2100 * information that is given with a normal read or write record
2101 * lock request. To avoid creating another ldlm unlock (cancel)
2102 * message we'll treat a LCK_NL flock request as an unlock. */
2103 einfo.ei_mode = LCK_NL;
2106 einfo.ei_mode = LCK_PW;
2109 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2124 flags = LDLM_FL_BLOCK_NOWAIT;
2130 flags = LDLM_FL_TEST_LOCK;
2131 /* Save the old mode so that if the mode in the lock changes we
2132 * can decrement the appropriate reader or writer refcount. */
2133 file_lock->fl_type = einfo.ei_mode;
2136 CERROR("unknown fcntl lock command: %d\n", cmd);
2140 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2141 LUSTRE_OPC_ANY, NULL);
2142 if (IS_ERR(op_data))
2143 RETURN(PTR_ERR(op_data));
2145 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2146 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2147 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2149 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2150 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2152 ll_finish_md_op_data(op_data);
2154 if ((file_lock->fl_flags & FL_FLOCK) &&
2155 (rc == 0 || file_lock->fl_type == F_UNLCK))
2156 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2157 #ifdef HAVE_F_OP_FLOCK
2158 if ((file_lock->fl_flags & FL_POSIX) &&
2159 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2160 !(flags & LDLM_FL_TEST_LOCK))
2161 posix_lock_file_wait(file, file_lock);
2167 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2174 int ll_have_md_lock(struct inode *inode, __u64 bits)
2176 struct lustre_handle lockh;
2177 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2185 fid = &ll_i2info(inode)->lli_fid;
2186 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2188 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2189 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2190 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2196 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2197 struct lustre_handle *lockh)
2199 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2205 fid = &ll_i2info(inode)->lli_fid;
2206 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2208 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2209 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2210 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2214 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2215 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2216 * and return success */
2218 /* This path cannot be hit for regular files unless in
2219 * case of obscure races, so no need to to validate
2221 if (!S_ISREG(inode->i_mode) &&
2222 !S_ISDIR(inode->i_mode))
2227 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2235 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2238 struct inode *inode = dentry->d_inode;
2239 struct ptlrpc_request *req = NULL;
2240 struct ll_sb_info *sbi;
2241 struct obd_export *exp;
2246 CERROR("REPORT THIS LINE TO PETER\n");
2249 sbi = ll_i2sbi(inode);
2251 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2252 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2254 exp = ll_i2mdexp(inode);
2256 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2257 struct lookup_intent oit = { .it_op = IT_GETATTR };
2258 struct md_op_data *op_data;
2260 /* Call getattr by fid, so do not provide name at all. */
2261 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2262 dentry->d_inode, NULL, 0, 0,
2263 LUSTRE_OPC_ANY, NULL);
2264 if (IS_ERR(op_data))
2265 RETURN(PTR_ERR(op_data));
2267 oit.it_create_mode |= M_CHECK_STALE;
2268 rc = md_intent_lock(exp, op_data, NULL, 0,
2269 /* we are not interested in name
2272 ll_md_blocking_ast, 0);
2273 ll_finish_md_op_data(op_data);
2274 oit.it_create_mode &= ~M_CHECK_STALE;
2276 rc = ll_inode_revalidate_fini(inode, rc);
2280 rc = ll_revalidate_it_finish(req, &oit, dentry);
2282 ll_intent_release(&oit);
2286 /* Unlinked? Unhash dentry, so it is not picked up later by
2287 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2288 here to preserve get_cwd functionality on 2.6.
2290 if (!dentry->d_inode->i_nlink) {
2291 spin_lock(&ll_lookup_lock);
2292 spin_lock(&dcache_lock);
2293 ll_drop_dentry(dentry);
2294 spin_unlock(&dcache_lock);
2295 spin_unlock(&ll_lookup_lock);
2298 ll_lookup_finish_locks(&oit, dentry);
2299 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2301 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2302 obd_valid valid = OBD_MD_FLGETATTR;
2303 struct obd_capa *oc;
2306 if (S_ISREG(inode->i_mode)) {
2307 rc = ll_get_max_mdsize(sbi, &ealen);
2310 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2312 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2313 * capa for this inode. Because we only keep capas of dirs
2315 oc = ll_mdscapa_get(inode);
2316 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2320 rc = ll_inode_revalidate_fini(inode, rc);
2324 rc = ll_prep_inode(&inode, req, NULL);
2327 ptlrpc_req_finished(req);
2331 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2336 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2337 MDS_INODELOCK_LOOKUP);
2339 /* if object not yet allocated, don't validate size */
2340 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2343 /* cl_glimpse_size will prefer locally cached writes if they extend
2347 rc = cl_glimpse_size(dentry->d_inode);
2352 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2353 struct lookup_intent *it, struct kstat *stat)
2355 struct inode *inode = de->d_inode;
2358 res = ll_inode_revalidate_it(de, it);
2359 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2364 stat->dev = inode->i_sb->s_dev;
2365 stat->ino = inode->i_ino;
2366 stat->mode = inode->i_mode;
2367 stat->nlink = inode->i_nlink;
2368 stat->uid = inode->i_uid;
2369 stat->gid = inode->i_gid;
2370 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2371 stat->atime = inode->i_atime;
2372 stat->mtime = inode->i_mtime;
2373 stat->ctime = inode->i_ctime;
2374 #ifdef HAVE_INODE_BLKSIZE
2375 stat->blksize = inode->i_blksize;
2377 stat->blksize = 1 << inode->i_blkbits;
2380 ll_inode_size_lock(inode, 0);
2381 stat->size = i_size_read(inode);
2382 stat->blocks = inode->i_blocks;
2383 ll_inode_size_unlock(inode, 0);
2387 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2389 struct lookup_intent it = { .it_op = IT_GETATTR };
2391 return ll_getattr_it(mnt, de, &it, stat);
2395 int lustre_check_acl(struct inode *inode, int mask)
2397 #ifdef CONFIG_FS_POSIX_ACL
2398 struct ll_inode_info *lli = ll_i2info(inode);
2399 struct posix_acl *acl;
2403 spin_lock(&lli->lli_lock);
2404 acl = posix_acl_dup(lli->lli_posix_acl);
2405 spin_unlock(&lli->lli_lock);
2410 rc = posix_acl_permission(inode, acl, mask);
2411 posix_acl_release(acl);
2419 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2420 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2425 /* as root inode are NOT getting validated in lookup operation,
2426 * need to do it before permission check. */
2428 if (inode == inode->i_sb->s_root->d_inode) {
2429 struct lookup_intent it = { .it_op = IT_LOOKUP };
2431 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2432 MDS_INODELOCK_LOOKUP);
2437 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2438 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2440 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2441 return lustre_check_remote_perm(inode, mask);
2443 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2444 rc = generic_permission(inode, mask, lustre_check_acl);
2449 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2451 int mode = inode->i_mode;
2454 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2455 inode->i_ino, inode->i_generation, inode, mask);
2457 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2458 return lustre_check_remote_perm(inode, mask);
2460 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2462 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2463 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2465 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2467 if (current->fsuid == inode->i_uid) {
2470 if (((mode >> 3) & mask & S_IRWXO) != mask)
2472 rc = lustre_check_acl(inode, mask);
2476 goto check_capabilities;
2480 if (in_group_p(inode->i_gid))
2483 if ((mode & mask & S_IRWXO) == mask)
2487 if (!(mask & MAY_EXEC) ||
2488 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2489 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2492 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2493 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2500 #ifdef HAVE_FILE_READV
2501 #define READ_METHOD readv
2502 #define READ_FUNCTION ll_file_readv
2503 #define WRITE_METHOD writev
2504 #define WRITE_FUNCTION ll_file_writev
2506 #define READ_METHOD aio_read
2507 #define READ_FUNCTION ll_file_aio_read
2508 #define WRITE_METHOD aio_write
2509 #define WRITE_FUNCTION ll_file_aio_write
2512 /* -o localflock - only provides locally consistent flock locks */
2513 struct file_operations ll_file_operations = {
2514 .read = ll_file_read,
2515 .READ_METHOD = READ_FUNCTION,
2516 .write = ll_file_write,
2517 .WRITE_METHOD = WRITE_FUNCTION,
2518 .ioctl = ll_file_ioctl,
2519 .open = ll_file_open,
2520 .release = ll_file_release,
2521 .mmap = ll_file_mmap,
2522 .llseek = ll_file_seek,
2523 .sendfile = ll_file_sendfile,
2527 struct file_operations ll_file_operations_flock = {
2528 .read = ll_file_read,
2529 .READ_METHOD = READ_FUNCTION,
2530 .write = ll_file_write,
2531 .WRITE_METHOD = WRITE_FUNCTION,
2532 .ioctl = ll_file_ioctl,
2533 .open = ll_file_open,
2534 .release = ll_file_release,
2535 .mmap = ll_file_mmap,
2536 .llseek = ll_file_seek,
2537 .sendfile = ll_file_sendfile,
2539 #ifdef HAVE_F_OP_FLOCK
2540 .flock = ll_file_flock,
2542 .lock = ll_file_flock
2545 /* These are for -o noflock - to return ENOSYS on flock calls */
2546 struct file_operations ll_file_operations_noflock = {
2547 .read = ll_file_read,
2548 .READ_METHOD = READ_FUNCTION,
2549 .write = ll_file_write,
2550 .WRITE_METHOD = WRITE_FUNCTION,
2551 .ioctl = ll_file_ioctl,
2552 .open = ll_file_open,
2553 .release = ll_file_release,
2554 .mmap = ll_file_mmap,
2555 .llseek = ll_file_seek,
2556 .sendfile = ll_file_sendfile,
2558 #ifdef HAVE_F_OP_FLOCK
2559 .flock = ll_file_noflock,
2561 .lock = ll_file_noflock
2564 struct inode_operations ll_file_inode_operations = {
2565 #ifdef HAVE_VFS_INTENT_PATCHES
2566 .setattr_raw = ll_setattr_raw,
2568 .setattr = ll_setattr,
2569 .truncate = ll_truncate,
2570 .getattr = ll_getattr,
2571 .permission = ll_inode_permission,
2572 .setxattr = ll_setxattr,
2573 .getxattr = ll_getxattr,
2574 .listxattr = ll_listxattr,
2575 .removexattr = ll_removexattr,
2578 /* dynamic ioctl number support routins */
2579 static struct llioc_ctl_data {
2580 struct rw_semaphore ioc_sem;
2581 struct list_head ioc_head;
2583 __RWSEM_INITIALIZER(llioc.ioc_sem),
2584 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2589 struct list_head iocd_list;
2590 unsigned int iocd_size;
2591 llioc_callback_t iocd_cb;
2592 unsigned int iocd_count;
2593 unsigned int iocd_cmd[0];
2596 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2599 struct llioc_data *in_data = NULL;
2602 if (cb == NULL || cmd == NULL ||
2603 count > LLIOC_MAX_CMD || count < 0)
2606 size = sizeof(*in_data) + count * sizeof(unsigned int);
2607 OBD_ALLOC(in_data, size);
2608 if (in_data == NULL)
2611 memset(in_data, 0, sizeof(*in_data));
2612 in_data->iocd_size = size;
2613 in_data->iocd_cb = cb;
2614 in_data->iocd_count = count;
2615 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2617 down_write(&llioc.ioc_sem);
2618 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2619 up_write(&llioc.ioc_sem);
2624 void ll_iocontrol_unregister(void *magic)
2626 struct llioc_data *tmp;
2631 down_write(&llioc.ioc_sem);
2632 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2634 unsigned int size = tmp->iocd_size;
2636 list_del(&tmp->iocd_list);
2637 up_write(&llioc.ioc_sem);
2639 OBD_FREE(tmp, size);
2643 up_write(&llioc.ioc_sem);
2645 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2648 EXPORT_SYMBOL(ll_iocontrol_register);
2649 EXPORT_SYMBOL(ll_iocontrol_unregister);
2651 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2652 unsigned int cmd, unsigned long arg, int *rcp)
2654 enum llioc_iter ret = LLIOC_CONT;
2655 struct llioc_data *data;
2656 int rc = -EINVAL, i;
2658 down_read(&llioc.ioc_sem);
2659 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2660 for (i = 0; i < data->iocd_count; i++) {
2661 if (cmd != data->iocd_cmd[i])
2664 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2668 if (ret == LLIOC_STOP)
2671 up_read(&llioc.ioc_sem);