1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(exp_connect_som(ll_i2mdexp(inode))) || !S_ISREG(inode->i_mode))
96 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
98 ll_epoch_close(inode, op_data, &och, 0);
101 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
105 static int ll_close_inode_openhandle(struct obd_export *md_exp,
107 struct obd_client_handle *och)
109 struct obd_export *exp = ll_i2mdexp(inode);
110 struct md_op_data *op_data;
111 struct ptlrpc_request *req = NULL;
112 struct obd_device *obd = class_exp2obd(exp);
119 * XXX: in case of LMV, is this correct to access
122 CERROR("Invalid MDC connection handle "LPX64"\n",
123 ll_i2mdexp(inode)->exp_handle.h_cookie);
128 * here we check if this is forced umount. If so this is called on
129 * canceling "open lock" and we do not call md_close() in this case, as
130 * it will not be successful, as import is already deactivated.
135 OBD_ALLOC_PTR(op_data);
137 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
139 ll_prepare_close(inode, op_data, och);
140 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
141 rc = md_close(md_exp, op_data, och->och_mod, &req);
143 /* This close must have the epoch closed. */
144 LASSERT(epoch_close);
145 /* MDS has instructed us to obtain Size-on-MDS attribute from
146 * OSTs and send setattr to back to MDS. */
147 rc = ll_sizeonmds_update(inode, &och->och_fh,
148 op_data->op_ioepoch);
150 CERROR("inode %lu mdc Size-on-MDS update failed: "
151 "rc = %d\n", inode->i_ino, rc);
155 CERROR("inode %lu mdc close failed: rc = %d\n",
158 ll_finish_md_op_data(op_data);
161 rc = ll_objects_destroy(req, inode);
163 CERROR("inode %lu ll_objects destroy: rc = %d\n",
170 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
171 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
172 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
174 md_clear_open_replay_data(md_exp, och);
175 /* Free @och if it is not waiting for DONE_WRITING. */
176 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
179 if (req) /* This is close request */
180 ptlrpc_req_finished(req);
184 int ll_md_real_close(struct inode *inode, int flags)
186 struct ll_inode_info *lli = ll_i2info(inode);
187 struct obd_client_handle **och_p;
188 struct obd_client_handle *och;
193 if (flags & FMODE_WRITE) {
194 och_p = &lli->lli_mds_write_och;
195 och_usecount = &lli->lli_open_fd_write_count;
196 } else if (flags & FMODE_EXEC) {
197 och_p = &lli->lli_mds_exec_och;
198 och_usecount = &lli->lli_open_fd_exec_count;
200 LASSERT(flags & FMODE_READ);
201 och_p = &lli->lli_mds_read_och;
202 och_usecount = &lli->lli_open_fd_read_count;
205 down(&lli->lli_och_sem);
206 if (*och_usecount) { /* There are still users of this handle, so
208 up(&lli->lli_och_sem);
213 up(&lli->lli_och_sem);
215 if (och) { /* There might be a race and somebody have freed this och
217 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
224 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
227 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
228 struct ll_inode_info *lli = ll_i2info(inode);
232 /* clear group lock, if present */
233 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
234 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
236 /* Let's see if we have good enough OPEN lock on the file and if
237 we can skip talking to MDS */
238 if (file->f_dentry->d_inode) { /* Can this ever be false? */
240 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
241 struct lustre_handle lockh;
242 struct inode *inode = file->f_dentry->d_inode;
243 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
245 down(&lli->lli_och_sem);
246 if (fd->fd_omode & FMODE_WRITE) {
248 LASSERT(lli->lli_open_fd_write_count);
249 lli->lli_open_fd_write_count--;
250 } else if (fd->fd_omode & FMODE_EXEC) {
252 LASSERT(lli->lli_open_fd_exec_count);
253 lli->lli_open_fd_exec_count--;
256 LASSERT(lli->lli_open_fd_read_count);
257 lli->lli_open_fd_read_count--;
259 up(&lli->lli_och_sem);
261 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
262 LDLM_IBITS, &policy, lockmode,
264 rc = ll_md_real_close(file->f_dentry->d_inode,
268 CERROR("Releasing a file %p with negative dentry %p. Name %s",
269 file, file->f_dentry, file->f_dentry->d_name.name);
272 LUSTRE_FPRIVATE(file) = NULL;
273 ll_file_data_put(fd);
274 ll_capa_close(inode);
279 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
281 /* While this returns an error code, fput() the caller does not, so we need
282 * to make every effort to clean up all of our state here. Also, applications
283 * rarely check close errors and even if an error is returned they will not
284 * re-try the close call.
286 int ll_file_release(struct inode *inode, struct file *file)
288 struct ll_file_data *fd;
289 struct ll_sb_info *sbi = ll_i2sbi(inode);
290 struct ll_inode_info *lli = ll_i2info(inode);
291 struct lov_stripe_md *lsm = lli->lli_smd;
295 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
296 inode->i_generation, inode);
298 #ifdef CONFIG_FS_POSIX_ACL
299 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
300 inode == inode->i_sb->s_root->d_inode) {
301 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
304 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
305 fd->fd_flags &= ~LL_FILE_RMTACL;
306 rct_del(&sbi->ll_rct, cfs_curproc_pid());
307 et_search_free(&sbi->ll_et, cfs_curproc_pid());
312 if (inode->i_sb->s_root != file->f_dentry)
313 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
314 fd = LUSTRE_FPRIVATE(file);
317 /* The last ref on @file, maybe not the the owner pid of statahead.
318 * Different processes can open the same dir, "ll_opendir_key" means:
319 * it is me that should stop the statahead thread. */
320 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
321 ll_stop_statahead(inode, lli->lli_opendir_key);
323 if (inode->i_sb->s_root == file->f_dentry) {
324 LUSTRE_FPRIVATE(file) = NULL;
325 ll_file_data_put(fd);
330 lov_test_and_clear_async_rc(lsm);
331 lli->lli_async_rc = 0;
333 rc = ll_md_close(sbi->ll_md_exp, inode, file);
337 static int ll_intent_file_open(struct file *file, void *lmm,
338 int lmmsize, struct lookup_intent *itp)
340 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
341 struct dentry *parent = file->f_dentry->d_parent;
342 const char *name = file->f_dentry->d_name.name;
343 const int len = file->f_dentry->d_name.len;
344 struct md_op_data *op_data;
345 struct ptlrpc_request *req;
352 /* Usually we come here only for NFSD, and we want open lock.
353 But we can also get here with pre 2.6.15 patchless kernels, and in
354 that case that lock is also ok */
355 /* We can also get here if there was cached open handle in revalidate_it
356 * but it disappeared while we were getting from there to ll_file_open.
357 * But this means this file was closed and immediatelly opened which
358 * makes a good candidate for using OPEN lock */
359 /* If lmmsize & lmm are not 0, we are just setting stripe info
360 * parameters. No need for the open lock */
361 if (!lmm && !lmmsize)
362 itp->it_flags |= MDS_OPEN_LOCK;
364 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
365 file->f_dentry->d_inode, name, len,
366 O_RDWR, LUSTRE_OPC_ANY, NULL);
368 RETURN(PTR_ERR(op_data));
370 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
371 0 /*unused */, &req, ll_md_blocking_ast, 0);
372 ll_finish_md_op_data(op_data);
374 /* reason for keep own exit path - don`t flood log
375 * with messages with -ESTALE errors.
377 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
378 it_open_error(DISP_OPEN_OPEN, itp))
380 ll_release_openhandle(file->f_dentry, itp);
384 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
385 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
386 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
390 if (itp->d.lustre.it_lock_mode)
391 md_set_lock_data(sbi->ll_md_exp,
392 &itp->d.lustre.it_lock_handle,
393 file->f_dentry->d_inode);
395 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
397 ptlrpc_req_finished(itp->d.lustre.it_data);
398 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
399 ll_intent_drop_lock(itp);
404 void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch)
406 if (ioepoch && lli->lli_ioepoch != ioepoch) {
407 lli->lli_ioepoch = ioepoch;
408 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
409 ioepoch, PFID(&lli->lli_fid));
413 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
414 struct lookup_intent *it, struct obd_client_handle *och)
416 struct ptlrpc_request *req = it->d.lustre.it_data;
417 struct mdt_body *body;
421 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
422 LASSERT(body != NULL); /* reply already checked out */
424 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
425 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
426 och->och_fid = lli->lli_fid;
427 och->och_flags = it->it_flags;
428 ll_ioepoch_open(lli, body->ioepoch);
430 return md_set_open_replay_data(md_exp, och, req);
433 int ll_local_open(struct file *file, struct lookup_intent *it,
434 struct ll_file_data *fd, struct obd_client_handle *och)
436 struct inode *inode = file->f_dentry->d_inode;
437 struct ll_inode_info *lli = ll_i2info(inode);
440 LASSERT(!LUSTRE_FPRIVATE(file));
445 struct ptlrpc_request *req = it->d.lustre.it_data;
446 struct mdt_body *body;
449 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
453 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
454 if ((it->it_flags & FMODE_WRITE) &&
455 (body->valid & OBD_MD_FLSIZE))
456 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
457 lli->lli_ioepoch, PFID(&lli->lli_fid));
460 LUSTRE_FPRIVATE(file) = fd;
461 ll_readahead_init(inode, &fd->fd_ras);
462 fd->fd_omode = it->it_flags;
466 /* Open a file, and (for the very first open) create objects on the OSTs at
467 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
468 * creation or open until ll_lov_setstripe() ioctl is called. We grab
469 * lli_open_sem to ensure no other process will create objects, send the
470 * stripe MD to the MDS, or try to destroy the objects if that fails.
472 * If we already have the stripe MD locally then we don't request it in
473 * md_open(), by passing a lmm_size = 0.
475 * It is up to the application to ensure no other processes open this file
476 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
477 * used. We might be able to avoid races of that sort by getting lli_open_sem
478 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
479 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
481 int ll_file_open(struct inode *inode, struct file *file)
483 struct ll_inode_info *lli = ll_i2info(inode);
484 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
485 .it_flags = file->f_flags };
486 struct lov_stripe_md *lsm;
487 struct ptlrpc_request *req = NULL;
488 struct obd_client_handle **och_p;
490 struct ll_file_data *fd;
491 int rc = 0, opendir_set = 0;
494 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
495 inode->i_generation, inode, file->f_flags);
497 #ifdef HAVE_VFS_INTENT_PATCHES
500 it = file->private_data; /* XXX: compat macro */
501 file->private_data = NULL; /* prevent ll_local_open assertion */
504 fd = ll_file_data_get();
509 if (S_ISDIR(inode->i_mode)) {
511 spin_lock(&lli->lli_lock);
512 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
513 LASSERT(lli->lli_sai == NULL);
514 lli->lli_opendir_key = fd;
515 lli->lli_opendir_pid = cfs_curproc_pid();
517 } else if (unlikely(lli->lli_opendir_pid == cfs_curproc_pid() &&
518 lli->lli_opendir_key != NULL)) {
519 /* Two cases for this:
520 * (1) The same process open such directory many times.
521 * (2) The old process opened the directory, and exited
522 * before its children processes. Then new process
523 * with the same pid opens such directory before the
524 * old process's children processes exit.
525 * reset stat ahead for such cases. */
526 spin_unlock(&lli->lli_lock);
527 CDEBUG(D_INFO, "Conflict statahead for %.*s "DFID
528 " reset it.\n", file->f_dentry->d_name.len,
529 file->f_dentry->d_name.name,
530 PFID(&lli->lli_fid));
531 ll_stop_statahead(inode, lli->lli_opendir_key);
534 spin_unlock(&lli->lli_lock);
537 if (inode->i_sb->s_root == file->f_dentry) {
538 LUSTRE_FPRIVATE(file) = fd;
542 if (!it || !it->d.lustre.it_disposition) {
543 /* Convert f_flags into access mode. We cannot use file->f_mode,
544 * because everything but O_ACCMODE mask was stripped from
546 if ((oit.it_flags + 1) & O_ACCMODE)
548 if (file->f_flags & O_TRUNC)
549 oit.it_flags |= FMODE_WRITE;
551 /* kernel only call f_op->open in dentry_open. filp_open calls
552 * dentry_open after call to open_namei that checks permissions.
553 * Only nfsd_open call dentry_open directly without checking
554 * permissions and because of that this code below is safe. */
555 if (oit.it_flags & FMODE_WRITE)
556 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
558 /* We do not want O_EXCL here, presumably we opened the file
559 * already? XXX - NFS implications? */
560 oit.it_flags &= ~O_EXCL;
566 /* Let's see if we have file open on MDS already. */
567 if (it->it_flags & FMODE_WRITE) {
568 och_p = &lli->lli_mds_write_och;
569 och_usecount = &lli->lli_open_fd_write_count;
570 } else if (it->it_flags & FMODE_EXEC) {
571 och_p = &lli->lli_mds_exec_och;
572 och_usecount = &lli->lli_open_fd_exec_count;
574 och_p = &lli->lli_mds_read_och;
575 och_usecount = &lli->lli_open_fd_read_count;
578 down(&lli->lli_och_sem);
579 if (*och_p) { /* Open handle is present */
580 if (it_disposition(it, DISP_OPEN_OPEN)) {
581 /* Well, there's extra open request that we do not need,
582 let's close it somehow. This will decref request. */
583 rc = it_open_error(DISP_OPEN_OPEN, it);
585 up(&lli->lli_och_sem);
586 ll_file_data_put(fd);
587 GOTO(out_openerr, rc);
589 ll_release_openhandle(file->f_dentry, it);
590 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
595 rc = ll_local_open(file, it, fd, NULL);
598 up(&lli->lli_och_sem);
599 ll_file_data_put(fd);
600 GOTO(out_openerr, rc);
603 LASSERT(*och_usecount == 0);
604 if (!it->d.lustre.it_disposition) {
605 /* We cannot just request lock handle now, new ELC code
606 means that one of other OPEN locks for this file
607 could be cancelled, and since blocking ast handler
608 would attempt to grab och_sem as well, that would
609 result in a deadlock */
610 up(&lli->lli_och_sem);
611 it->it_create_mode |= M_CHECK_STALE;
612 rc = ll_intent_file_open(file, NULL, 0, it);
613 it->it_create_mode &= ~M_CHECK_STALE;
615 ll_file_data_put(fd);
616 GOTO(out_openerr, rc);
619 /* Got some error? Release the request */
620 if (it->d.lustre.it_status < 0) {
621 req = it->d.lustre.it_data;
622 ptlrpc_req_finished(req);
624 md_set_lock_data(ll_i2sbi(inode)->ll_md_exp,
625 &it->d.lustre.it_lock_handle,
626 file->f_dentry->d_inode);
629 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
631 ll_file_data_put(fd);
632 GOTO(out_och_free, rc = -ENOMEM);
635 req = it->d.lustre.it_data;
637 /* md_intent_lock() didn't get a request ref if there was an
638 * open error, so don't do cleanup on the request here
640 /* XXX (green): Should not we bail out on any error here, not
641 * just open error? */
642 rc = it_open_error(DISP_OPEN_OPEN, it);
644 ll_file_data_put(fd);
645 GOTO(out_och_free, rc);
648 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
649 rc = ll_local_open(file, it, fd, *och_p);
651 ll_file_data_put(fd);
652 GOTO(out_och_free, rc);
655 up(&lli->lli_och_sem);
657 /* Must do this outside lli_och_sem lock to prevent deadlock where
658 different kind of OPEN lock for this same inode gets cancelled
659 by ldlm_cancel_lru */
660 if (!S_ISREG(inode->i_mode))
667 if (file->f_flags & O_LOV_DELAY_CREATE ||
668 !(file->f_mode & FMODE_WRITE)) {
669 CDEBUG(D_INODE, "object creation was delayed\n");
673 file->f_flags &= ~O_LOV_DELAY_CREATE;
676 ptlrpc_req_finished(req);
678 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
682 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
683 *och_p = NULL; /* OBD_FREE writes some magic there */
686 up(&lli->lli_och_sem);
688 if (opendir_set != 0)
689 ll_stop_statahead(inode, lli->lli_opendir_key);
695 /* Fills the obdo with the attributes for the lsm */
696 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
697 struct obd_capa *capa, struct obdo *obdo)
699 struct ptlrpc_request_set *set;
700 struct obd_info oinfo = { { { 0 } } };
705 LASSERT(lsm != NULL);
709 oinfo.oi_oa->o_id = lsm->lsm_object_id;
710 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
711 oinfo.oi_oa->o_mode = S_IFREG;
712 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
713 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
714 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
715 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
717 oinfo.oi_capa = capa;
719 set = ptlrpc_prep_set();
721 CERROR("can't allocate ptlrpc set\n");
724 rc = obd_getattr_async(exp, &oinfo, set);
726 rc = ptlrpc_set_wait(set);
727 ptlrpc_set_destroy(set);
730 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
731 OBD_MD_FLATIME | OBD_MD_FLMTIME |
732 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
736 /* Fills the obdo with the attributes for the inode defined by lsm */
737 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
739 struct ll_inode_info *lli = ll_i2info(inode);
740 struct obd_capa *capa = ll_mdscapa_get(inode);
744 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
747 obdo_refresh_inode(inode, obdo, obdo->o_valid);
749 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
750 lli->lli_smd->lsm_object_id, i_size_read(inode),
751 (unsigned long long)inode->i_blocks,
752 (unsigned long)ll_inode_blksize(inode));
757 int ll_merge_lvb(struct inode *inode)
759 struct ll_inode_info *lli = ll_i2info(inode);
760 struct ll_sb_info *sbi = ll_i2sbi(inode);
766 ll_inode_size_lock(inode, 1);
767 inode_init_lvb(inode, &lvb);
768 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
769 i_size_write(inode, lvb.lvb_size);
770 inode->i_blocks = lvb.lvb_blocks;
772 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
773 LTIME_S(inode->i_atime) = lvb.lvb_atime;
774 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
775 ll_inode_size_unlock(inode, 1);
780 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
783 struct obdo obdo = { 0 };
786 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
788 st->st_size = obdo.o_size;
789 st->st_blocks = obdo.o_blocks;
790 st->st_mtime = obdo.o_mtime;
791 st->st_atime = obdo.o_atime;
792 st->st_ctime = obdo.o_ctime;
797 void ll_io_init(struct cl_io *io, const struct file *file, int write)
799 struct inode *inode = file->f_dentry->d_inode;
800 struct ll_sb_info *sbi = ll_i2sbi(inode);
801 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
804 memset(io, 0, sizeof *io);
805 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
807 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
808 io->ci_obj = ll_i2info(inode)->lli_clob;
809 io->ci_lockreq = CILR_MAYBE;
810 if (fd->fd_flags & LL_FILE_IGNORE_LOCK ||
811 sbi->ll_flags & LL_SBI_NOLCK) {
812 io->ci_lockreq = CILR_NEVER;
813 io->ci_no_srvlock = 1;
814 } else if (file->f_flags & O_APPEND) {
815 io->ci_lockreq = CILR_MANDATORY;
819 static ssize_t ll_file_io_generic(const struct lu_env *env,
820 struct ccc_io_args *args, struct file *file,
821 enum cl_io_type iot, loff_t *ppos, size_t count)
827 io = &ccc_env_info(env)->cti_io;
828 ll_io_init(io, file, iot == CIT_WRITE);
831 io->u.ci_rd.rd_is_sendfile = args->cia_is_sendfile;
833 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
834 struct vvp_io *vio = vvp_env_io(env);
835 struct ccc_io *cio = ccc_env_io(env);
836 if (cl_io_is_sendfile(io)) {
837 vio->u.read.cui_actor = args->cia_actor;
838 vio->u.read.cui_target = args->cia_target;
840 cio->cui_iov = args->cia_iov;
841 cio->cui_nrsegs = args->cia_nrsegs;
842 #ifndef HAVE_FILE_WRITEV
843 cio->cui_iocb = args->cia_iocb;
846 cio->cui_fd = LUSTRE_FPRIVATE(file);
847 result = cl_io_loop(env, io);
849 /* cl_io_rw_init() handled IO */
850 result = io->ci_result;
851 if (io->ci_nob > 0) {
853 *ppos = io->u.ci_wr.wr.crw_pos;
861 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
863 static int ll_file_get_iov_count(const struct iovec *iov,
864 unsigned long *nr_segs, size_t *count)
869 for (seg = 0; seg < *nr_segs; seg++) {
870 const struct iovec *iv = &iov[seg];
873 * If any segment has a negative length, or the cumulative
874 * length ever wraps negative then return -EINVAL.
877 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
879 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
884 cnt -= iv->iov_len; /* This segment is no good */
891 #ifdef HAVE_FILE_READV
892 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
893 unsigned long nr_segs, loff_t *ppos)
896 struct ccc_io_args *args;
902 result = ll_file_get_iov_count(iov, &nr_segs, &count);
906 env = cl_env_get(&refcheck);
908 RETURN(PTR_ERR(env));
910 args = &vvp_env_info(env)->vti_args;
911 args->cia_is_sendfile = 0;
912 args->cia_iov = (struct iovec *)iov;
913 args->cia_nrsegs = nr_segs;
914 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
915 cl_env_put(env, &refcheck);
919 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
923 struct iovec *local_iov;
928 env = cl_env_get(&refcheck);
930 RETURN(PTR_ERR(env));
932 local_iov = &vvp_env_info(env)->vti_local_iov;
933 local_iov->iov_base = (void __user *)buf;
934 local_iov->iov_len = count;
935 result = ll_file_readv(file, local_iov, 1, ppos);
936 cl_env_put(env, &refcheck);
941 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
942 unsigned long nr_segs, loff_t pos)
945 struct ccc_io_args *args;
951 result = ll_file_get_iov_count(iov, &nr_segs, &count);
955 env = cl_env_get(&refcheck);
957 RETURN(PTR_ERR(env));
959 args = &vvp_env_info(env)->vti_args;
960 args->cia_is_sendfile = 0;
961 args->cia_iov = (struct iovec *)iov;
962 args->cia_nrsegs = nr_segs;
963 args->cia_iocb = iocb;
964 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
965 &iocb->ki_pos, count);
966 cl_env_put(env, &refcheck);
970 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
974 struct iovec *local_iov;
980 env = cl_env_get(&refcheck);
982 RETURN(PTR_ERR(env));
984 local_iov = &vvp_env_info(env)->vti_local_iov;
985 kiocb = &vvp_env_info(env)->vti_kiocb;
986 local_iov->iov_base = (void __user *)buf;
987 local_iov->iov_len = count;
988 init_sync_kiocb(kiocb, file);
989 kiocb->ki_pos = *ppos;
990 kiocb->ki_left = count;
992 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
993 *ppos = kiocb->ki_pos;
995 cl_env_put(env, &refcheck);
1001 * Write to a file (through the page cache).
1003 #ifdef HAVE_FILE_WRITEV
1004 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
1005 unsigned long nr_segs, loff_t *ppos)
1008 struct ccc_io_args *args;
1014 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1018 env = cl_env_get(&refcheck);
1020 RETURN(PTR_ERR(env));
1022 args = &vvp_env_info(env)->vti_args;
1023 args->cia_iov = (struct iovec *)iov;
1024 args->cia_nrsegs = nr_segs;
1025 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1026 cl_env_put(env, &refcheck);
1030 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1034 struct iovec *local_iov;
1039 env = cl_env_get(&refcheck);
1041 RETURN(PTR_ERR(env));
1043 local_iov = &vvp_env_info(env)->vti_local_iov;
1044 local_iov->iov_base = (void __user *)buf;
1045 local_iov->iov_len = count;
1047 result = ll_file_writev(file, local_iov, 1, ppos);
1048 cl_env_put(env, &refcheck);
1052 #else /* AIO stuff */
1053 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1054 unsigned long nr_segs, loff_t pos)
1057 struct ccc_io_args *args;
1063 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1067 env = cl_env_get(&refcheck);
1069 RETURN(PTR_ERR(env));
1071 args = &vvp_env_info(env)->vti_args;
1072 args->cia_iov = (struct iovec *)iov;
1073 args->cia_nrsegs = nr_segs;
1074 args->cia_iocb = iocb;
1075 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1076 &iocb->ki_pos, count);
1077 cl_env_put(env, &refcheck);
1081 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1085 struct iovec *local_iov;
1086 struct kiocb *kiocb;
1091 env = cl_env_get(&refcheck);
1093 RETURN(PTR_ERR(env));
1095 local_iov = &vvp_env_info(env)->vti_local_iov;
1096 kiocb = &vvp_env_info(env)->vti_kiocb;
1097 local_iov->iov_base = (void __user *)buf;
1098 local_iov->iov_len = count;
1099 init_sync_kiocb(kiocb, file);
1100 kiocb->ki_pos = *ppos;
1101 kiocb->ki_left = count;
1103 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1104 *ppos = kiocb->ki_pos;
1106 cl_env_put(env, &refcheck);
1113 * Send file content (through pagecache) somewhere with helper
1115 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1116 read_actor_t actor, void *target)
1119 struct ccc_io_args *args;
1124 env = cl_env_get(&refcheck);
1126 RETURN(PTR_ERR(env));
1128 args = &vvp_env_info(env)->vti_args;
1129 args->cia_is_sendfile = 1;
1130 args->cia_target = target;
1131 args->cia_actor = actor;
1132 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1133 cl_env_put(env, &refcheck);
1137 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1140 struct obd_export *exp = ll_i2dtexp(inode);
1141 struct ll_recreate_obj ucreatp;
1142 struct obd_trans_info oti = { 0 };
1143 struct obdo *oa = NULL;
1146 struct lov_stripe_md *lsm, *lsm2;
1149 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1152 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1153 sizeof(struct ll_recreate_obj)))
1160 ll_inode_size_lock(inode, 0);
1161 lsm = ll_i2info(inode)->lli_smd;
1163 GOTO(out, rc = -ENOENT);
1164 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1165 (lsm->lsm_stripe_count));
1167 OBD_ALLOC(lsm2, lsm_size);
1169 GOTO(out, rc = -ENOMEM);
1171 oa->o_id = ucreatp.lrc_id;
1172 oa->o_gr = ucreatp.lrc_group;
1173 oa->o_nlink = ucreatp.lrc_ost_idx;
1174 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1175 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1176 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1177 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1179 memcpy(lsm2, lsm, lsm_size);
1180 rc = obd_create(exp, oa, &lsm2, &oti);
1182 OBD_FREE(lsm2, lsm_size);
1185 ll_inode_size_unlock(inode, 0);
1190 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1191 int flags, struct lov_user_md *lum, int lum_size)
1193 struct lov_stripe_md *lsm;
1194 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1198 ll_inode_size_lock(inode, 0);
1199 lsm = ll_i2info(inode)->lli_smd;
1201 ll_inode_size_unlock(inode, 0);
1202 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1207 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1210 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1211 GOTO(out_req_free, rc = -ENOENT);
1212 rc = oit.d.lustre.it_status;
1214 GOTO(out_req_free, rc);
1216 ll_release_openhandle(file->f_dentry, &oit);
1219 ll_inode_size_unlock(inode, 0);
1220 ll_intent_release(&oit);
1223 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1227 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1228 struct lov_mds_md **lmmp, int *lmm_size,
1229 struct ptlrpc_request **request)
1231 struct ll_sb_info *sbi = ll_i2sbi(inode);
1232 struct mdt_body *body;
1233 struct lov_mds_md *lmm = NULL;
1234 struct ptlrpc_request *req = NULL;
1235 struct obd_capa *oc;
1238 rc = ll_get_max_mdsize(sbi, &lmmsize);
1242 oc = ll_mdscapa_get(inode);
1243 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1244 oc, filename, strlen(filename) + 1,
1245 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1246 ll_i2suppgid(inode), &req);
1249 CDEBUG(D_INFO, "md_getattr_name failed "
1250 "on %s: rc %d\n", filename, rc);
1254 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1255 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1257 lmmsize = body->eadatasize;
1259 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1261 GOTO(out, rc = -ENODATA);
1264 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1265 LASSERT(lmm != NULL);
1267 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1268 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
1269 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
1270 GOTO(out, rc = -EPROTO);
1274 * This is coming from the MDS, so is probably in
1275 * little endian. We convert it to host endian before
1276 * passing it to userspace.
1278 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1279 /* if function called for directory - we should
1280 * avoid swab not existent lsm objects */
1281 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1282 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1283 if (S_ISREG(body->mode))
1284 lustre_swab_lov_user_md_objects(
1285 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1286 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1287 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1288 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1289 if (S_ISREG(body->mode))
1290 lustre_swab_lov_user_md_objects(
1291 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1292 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1293 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
1294 lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
1298 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
1299 struct lov_stripe_md *lsm;
1300 struct lov_user_md_join *lmj;
1301 int lmj_size, i, aindex = 0;
1303 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1305 GOTO(out, rc = -ENOMEM);
1306 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm);
1308 GOTO(out_free_memmd, rc);
1310 lmj_size = sizeof(struct lov_user_md_join) +
1311 lsm->lsm_stripe_count *
1312 sizeof(struct lov_user_ost_data_join);
1313 OBD_ALLOC(lmj, lmj_size);
1315 GOTO(out_free_memmd, rc = -ENOMEM);
1317 memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
1318 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1319 struct lov_extent *lex =
1320 &lsm->lsm_array->lai_ext_array[aindex];
1322 if (lex->le_loi_idx + lex->le_stripe_count <= i)
1324 CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
1325 LPU64" len %d\n", aindex, i,
1326 lex->le_start, (int)lex->le_len);
1327 lmj->lmm_objects[i].l_extent_start =
1330 if ((int)lex->le_len == -1)
1331 lmj->lmm_objects[i].l_extent_end = -1;
1333 lmj->lmm_objects[i].l_extent_end =
1334 lex->le_start + lex->le_len;
1335 lmj->lmm_objects[i].l_object_id =
1336 lsm->lsm_oinfo[i]->loi_id;
1337 lmj->lmm_objects[i].l_object_gr =
1338 lsm->lsm_oinfo[i]->loi_gr;
1339 lmj->lmm_objects[i].l_ost_gen =
1340 lsm->lsm_oinfo[i]->loi_ost_gen;
1341 lmj->lmm_objects[i].l_ost_idx =
1342 lsm->lsm_oinfo[i]->loi_ost_idx;
1344 lmm = (struct lov_mds_md *)lmj;
1347 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1351 *lmm_size = lmmsize;
1356 static int ll_lov_setea(struct inode *inode, struct file *file,
1359 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1360 struct lov_user_md *lump;
1361 int lum_size = sizeof(struct lov_user_md) +
1362 sizeof(struct lov_user_ost_data);
1366 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1369 OBD_ALLOC(lump, lum_size);
1373 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1374 OBD_FREE(lump, lum_size);
1378 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1380 OBD_FREE(lump, lum_size);
1384 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1387 struct lov_user_md_v3 lumv3;
1388 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1389 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1390 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1393 int flags = FMODE_WRITE;
1396 /* first try with v1 which is smaller than v3 */
1397 lum_size = sizeof(struct lov_user_md_v1);
1398 if (copy_from_user(lumv1, lumv1p, lum_size))
1401 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1402 lum_size = sizeof(struct lov_user_md_v3);
1403 if (copy_from_user(&lumv3, lumv3p, lum_size))
1407 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1409 put_user(0, &lumv1p->lmm_stripe_count);
1410 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1411 0, ll_i2info(inode)->lli_smd,
1417 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1419 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1424 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1428 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1430 struct ll_inode_info *lli = ll_i2info(inode);
1431 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1432 struct ccc_grouplock grouplock;
1436 spin_lock(&lli->lli_lock);
1437 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1438 CERROR("group lock already existed with gid %lu\n",
1439 fd->fd_grouplock.cg_gid);
1440 spin_unlock(&lli->lli_lock);
1443 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1444 spin_unlock(&lli->lli_lock);
1446 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1447 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1451 spin_lock(&lli->lli_lock);
1452 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1453 spin_unlock(&lli->lli_lock);
1454 CERROR("another thread just won the race\n");
1455 cl_put_grouplock(&grouplock);
1459 fd->fd_flags |= (LL_FILE_GROUP_LOCKED | LL_FILE_IGNORE_LOCK);
1460 fd->fd_grouplock = grouplock;
1461 spin_unlock(&lli->lli_lock);
1463 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1467 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1469 struct ll_inode_info *lli = ll_i2info(inode);
1470 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1471 struct ccc_grouplock grouplock;
1474 spin_lock(&lli->lli_lock);
1475 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1476 spin_unlock(&lli->lli_lock);
1477 CERROR("no group lock held\n");
1480 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1482 if (fd->fd_grouplock.cg_gid != arg) {
1483 CERROR("group lock %lu doesn't match current id %lu\n",
1484 arg, fd->fd_grouplock.cg_gid);
1485 spin_unlock(&lli->lli_lock);
1489 grouplock = fd->fd_grouplock;
1490 fd->fd_grouplock.cg_env = NULL;
1491 fd->fd_grouplock.cg_lock = NULL;
1492 fd->fd_grouplock.cg_gid = 0;
1493 fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED | LL_FILE_IGNORE_LOCK);
1494 spin_unlock(&lli->lli_lock);
1496 cl_put_grouplock(&grouplock);
1497 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1501 #if LUSTRE_FIX >= 50
1502 static int join_sanity_check(struct inode *head, struct inode *tail)
1505 if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
1506 CERROR("server do not support join \n");
1509 if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
1510 CERROR("tail ino %lu and ino head %lu must be regular\n",
1511 head->i_ino, tail->i_ino);
1514 if (head->i_ino == tail->i_ino) {
1515 CERROR("file %lu can not be joined to itself \n", head->i_ino);
1518 if (i_size_read(head) % JOIN_FILE_ALIGN) {
1519 CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
1525 static int join_file(struct inode *head_inode, struct file *head_filp,
1526 struct file *tail_filp)
1528 struct dentry *tail_dentry = tail_filp->f_dentry;
1529 struct lookup_intent oit = {.it_op = IT_OPEN,
1530 .it_flags = head_filp->f_flags,
1531 .it_create_mode = M_JOIN_FILE};
1532 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW,
1533 ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1535 struct lustre_handle lockh;
1536 struct md_op_data *op_data;
1541 tail_dentry = tail_filp->f_dentry;
1543 data = i_size_read(head_inode);
1544 op_data = ll_prep_md_op_data(NULL, head_inode,
1545 tail_dentry->d_parent->d_inode,
1546 tail_dentry->d_name.name,
1547 tail_dentry->d_name.len, 0,
1548 LUSTRE_OPC_ANY, &data);
1549 if (IS_ERR(op_data))
1550 RETURN(PTR_ERR(op_data));
1552 rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit,
1553 op_data, &lockh, NULL, 0, NULL, 0);
1555 ll_finish_md_op_data(op_data);
1559 rc = oit.d.lustre.it_status;
1561 if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
1562 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
1563 ptlrpc_req_finished((struct ptlrpc_request *)
1564 oit.d.lustre.it_data);
1568 if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
1570 ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
1571 oit.d.lustre.it_lock_mode = 0;
1573 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1574 it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
1575 ll_release_openhandle(head_filp->f_dentry, &oit);
1577 ll_intent_release(&oit);
1581 static int ll_file_join(struct inode *head, struct file *filp,
1582 char *filename_tail)
1584 struct inode *tail = NULL, *first = NULL, *second = NULL;
1585 struct dentry *tail_dentry;
1586 struct file *tail_filp, *first_filp, *second_filp;
1587 struct ll_lock_tree first_tree, second_tree;
1588 struct ll_lock_tree_node *first_node, *second_node;
1589 struct ll_inode_info *hlli = ll_i2info(head);
1590 int rc = 0, cleanup_phase = 0;
1593 CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
1594 head->i_ino, head->i_generation, head, filename_tail);
1596 tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
1597 if (IS_ERR(tail_filp)) {
1598 CERROR("Can not open tail file %s", filename_tail);
1599 rc = PTR_ERR(tail_filp);
1602 tail = igrab(tail_filp->f_dentry->d_inode);
1604 tail_dentry = tail_filp->f_dentry;
1605 LASSERT(tail_dentry);
1608 /*reorder the inode for lock sequence*/
1609 first = head->i_ino > tail->i_ino ? head : tail;
1610 second = head->i_ino > tail->i_ino ? tail : head;
1611 first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
1612 second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
1614 CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
1615 head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
1616 first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
1617 if (IS_ERR(first_node)){
1618 rc = PTR_ERR(first_node);
1621 first_tree.lt_fd = first_filp->private_data;
1622 rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
1627 second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
1628 if (IS_ERR(second_node)){
1629 rc = PTR_ERR(second_node);
1632 second_tree.lt_fd = second_filp->private_data;
1633 rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
1638 rc = join_sanity_check(head, tail);
1642 rc = join_file(head, filp, tail_filp);
1646 switch (cleanup_phase) {
1648 ll_tree_unlock(&second_tree);
1649 obd_cancel_unused(ll_i2dtexp(second),
1650 ll_i2info(second)->lli_smd, 0, NULL);
1652 ll_tree_unlock(&first_tree);
1653 obd_cancel_unused(ll_i2dtexp(first),
1654 ll_i2info(first)->lli_smd, 0, NULL);
1656 filp_close(tail_filp, 0);
1659 if (head && rc == 0) {
1660 obd_free_memmd(ll_i2sbi(head)->ll_dt_exp,
1662 hlli->lli_smd = NULL;
1667 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
1672 #endif /* LUSTRE_FIX >= 50 */
1675 * Close inode open handle
1677 * \param dentry [in] dentry which contains the inode
1678 * \param it [in,out] intent which contains open info and result
1681 * \retval <0 failure
1683 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1685 struct inode *inode = dentry->d_inode;
1686 struct obd_client_handle *och;
1692 /* Root ? Do nothing. */
1693 if (dentry->d_inode->i_sb->s_root == dentry)
1696 /* No open handle to close? Move away */
1697 if (!it_disposition(it, DISP_OPEN_OPEN))
1700 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1702 OBD_ALLOC(och, sizeof(*och));
1704 GOTO(out, rc = -ENOMEM);
1706 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1707 ll_i2info(inode), it, och);
1709 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1712 /* this one is in place of ll_file_open */
1713 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1714 ptlrpc_req_finished(it->d.lustre.it_data);
1715 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1720 * Get size for inode for which FIEMAP mapping is requested.
1721 * Make the FIEMAP get_info call and returns the result.
1723 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1726 struct obd_export *exp = ll_i2dtexp(inode);
1727 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1728 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1729 int vallen = num_bytes;
1733 /* If the stripe_count > 1 and the application does not understand
1734 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1736 if (lsm->lsm_stripe_count > 1 &&
1737 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1740 fm_key.oa.o_id = lsm->lsm_object_id;
1741 fm_key.oa.o_gr = lsm->lsm_object_gr;
1742 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1744 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1747 /* If filesize is 0, then there would be no objects for mapping */
1748 if (fm_key.oa.o_size == 0) {
1749 fiemap->fm_mapped_extents = 0;
1753 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1755 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1757 CERROR("obd_get_info failed: rc = %d\n", rc);
1762 int ll_fid2path(struct obd_export *exp, void *arg)
1764 struct getinfo_fid2path *gfout, *gfin;
1768 /* Need to get the buflen */
1769 OBD_ALLOC_PTR(gfin);
1772 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1777 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1778 OBD_ALLOC(gfout, outsize);
1779 if (gfout == NULL) {
1783 memcpy(gfout, gfin, sizeof(*gfout));
1786 /* Call mdc_iocontrol */
1787 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1790 if (copy_to_user(arg, gfout, outsize))
1794 OBD_FREE(gfout, outsize);
1798 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1801 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1805 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1806 inode->i_generation, inode, cmd);
1807 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1809 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1810 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1814 case LL_IOC_GETFLAGS:
1815 /* Get the current value of the file flags */
1816 return put_user(fd->fd_flags, (int *)arg);
1817 case LL_IOC_SETFLAGS:
1818 case LL_IOC_CLRFLAGS:
1819 /* Set or clear specific file flags */
1820 /* XXX This probably needs checks to ensure the flags are
1821 * not abused, and to handle any flag side effects.
1823 if (get_user(flags, (int *) arg))
1826 if (cmd == LL_IOC_SETFLAGS) {
1827 if ((flags & LL_FILE_IGNORE_LOCK) &&
1828 !(file->f_flags & O_DIRECT)) {
1829 CERROR("%s: unable to disable locking on "
1830 "non-O_DIRECT file\n", current->comm);
1834 fd->fd_flags |= flags;
1836 fd->fd_flags &= ~flags;
1839 case LL_IOC_LOV_SETSTRIPE:
1840 RETURN(ll_lov_setstripe(inode, file, arg));
1841 case LL_IOC_LOV_SETEA:
1842 RETURN(ll_lov_setea(inode, file, arg));
1843 case LL_IOC_LOV_GETSTRIPE:
1844 RETURN(ll_lov_getstripe(inode, arg));
1845 case LL_IOC_RECREATE_OBJ:
1846 RETURN(ll_lov_recreate_obj(inode, file, arg));
1847 case EXT3_IOC_FIEMAP: {
1848 struct ll_user_fiemap *fiemap_s;
1849 size_t num_bytes, ret_bytes;
1850 unsigned int extent_count;
1853 /* Get the extent count so we can calculate the size of
1854 * required fiemap buffer */
1855 if (get_user(extent_count,
1856 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1858 num_bytes = sizeof(*fiemap_s) + (extent_count *
1859 sizeof(struct ll_fiemap_extent));
1860 OBD_VMALLOC(fiemap_s, num_bytes);
1861 if (fiemap_s == NULL)
1864 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1866 GOTO(error, rc = -EFAULT);
1868 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1869 fiemap_s->fm_flags = fiemap_s->fm_flags &
1870 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1871 if (copy_to_user((char *)arg, fiemap_s,
1873 GOTO(error, rc = -EFAULT);
1875 GOTO(error, rc = -EBADR);
1878 /* If fm_extent_count is non-zero, read the first extent since
1879 * it is used to calculate end_offset and device from previous
1882 if (copy_from_user(&fiemap_s->fm_extents[0],
1883 (char __user *)arg + sizeof(*fiemap_s),
1884 sizeof(struct ll_fiemap_extent)))
1885 GOTO(error, rc = -EFAULT);
1888 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1891 rc = filemap_fdatawrite(inode->i_mapping);
1896 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1900 ret_bytes = sizeof(struct ll_user_fiemap);
1902 if (extent_count != 0)
1903 ret_bytes += (fiemap_s->fm_mapped_extents *
1904 sizeof(struct ll_fiemap_extent));
1906 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1910 OBD_VFREE(fiemap_s, num_bytes);
1913 case EXT3_IOC_GETFLAGS:
1914 case EXT3_IOC_SETFLAGS:
1915 RETURN(ll_iocontrol(inode, file, cmd, arg));
1916 case EXT3_IOC_GETVERSION_OLD:
1917 case EXT3_IOC_GETVERSION:
1918 RETURN(put_user(inode->i_generation, (int *)arg));
1920 #if LUSTRE_FIX >= 50
1921 /* Allow file join in beta builds to allow debuggging */
1925 ftail = getname((const char *)arg);
1927 RETURN(PTR_ERR(ftail));
1928 rc = ll_file_join(inode, file, ftail);
1932 CWARN("file join is not supported in this version of Lustre\n");
1936 case LL_IOC_GROUP_LOCK:
1937 RETURN(ll_get_grouplock(inode, file, arg));
1938 case LL_IOC_GROUP_UNLOCK:
1939 RETURN(ll_put_grouplock(inode, file, arg));
1940 case IOC_OBD_STATFS:
1941 RETURN(ll_obd_statfs(inode, (void *)arg));
1943 /* We need to special case any other ioctls we want to handle,
1944 * to send them to the MDS/OST as appropriate and to properly
1945 * network encode the arg field.
1946 case EXT3_IOC_SETVERSION_OLD:
1947 case EXT3_IOC_SETVERSION:
1949 case LL_IOC_FLUSHCTX:
1950 RETURN(ll_flush_ctx(inode));
1951 case LL_IOC_PATH2FID: {
1952 if (copy_to_user((void *)arg, &ll_i2info(inode)->lli_fid,
1953 sizeof(struct lu_fid)))
1958 case OBD_IOC_FID2PATH:
1959 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1965 ll_iocontrol_call(inode, file, cmd, arg, &err))
1968 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1974 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1976 struct inode *inode = file->f_dentry->d_inode;
1979 retval = offset + ((origin == 2) ? i_size_read(inode) :
1980 (origin == 1) ? file->f_pos : 0);
1981 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1982 inode->i_ino, inode->i_generation, inode, retval, retval,
1983 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1984 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1986 if (origin == 2) { /* SEEK_END */
1987 int nonblock = 0, rc;
1989 if (file->f_flags & O_NONBLOCK)
1990 nonblock = LDLM_FL_BLOCK_NOWAIT;
1992 rc = cl_glimpse_size(inode);
1996 ll_inode_size_lock(inode, 0);
1997 offset += i_size_read(inode);
1998 ll_inode_size_unlock(inode, 0);
1999 } else if (origin == 1) { /* SEEK_CUR */
2000 offset += file->f_pos;
2004 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
2005 if (offset != file->f_pos) {
2006 file->f_pos = offset;
2014 int ll_fsync(struct file *file, struct dentry *dentry, int data)
2016 struct inode *inode = dentry->d_inode;
2017 struct ll_inode_info *lli = ll_i2info(inode);
2018 struct lov_stripe_md *lsm = lli->lli_smd;
2019 struct ptlrpc_request *req;
2020 struct obd_capa *oc;
2023 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2024 inode->i_generation, inode);
2025 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2027 /* fsync's caller has already called _fdata{sync,write}, we want
2028 * that IO to finish before calling the osc and mdc sync methods */
2029 rc = filemap_fdatawait(inode->i_mapping);
2031 /* catch async errors that were recorded back when async writeback
2032 * failed for pages in this mapping. */
2033 err = lli->lli_async_rc;
2034 lli->lli_async_rc = 0;
2038 err = lov_test_and_clear_async_rc(lsm);
2043 oc = ll_mdscapa_get(inode);
2044 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2050 ptlrpc_req_finished(req);
2057 RETURN(rc ? rc : -ENOMEM);
2059 oa->o_id = lsm->lsm_object_id;
2060 oa->o_gr = lsm->lsm_object_gr;
2061 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2062 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
2063 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
2066 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2067 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
2068 0, OBD_OBJECT_EOF, oc);
2078 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2080 struct inode *inode = file->f_dentry->d_inode;
2081 struct ll_sb_info *sbi = ll_i2sbi(inode);
2082 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2083 .ei_cb_cp =ldlm_flock_completion_ast,
2084 .ei_cbdata = file_lock };
2085 struct md_op_data *op_data;
2086 struct lustre_handle lockh = {0};
2087 ldlm_policy_data_t flock;
2092 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2093 inode->i_ino, file_lock);
2095 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2097 if (file_lock->fl_flags & FL_FLOCK) {
2098 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2099 /* set missing params for flock() calls */
2100 file_lock->fl_end = OFFSET_MAX;
2101 file_lock->fl_pid = current->tgid;
2103 flock.l_flock.pid = file_lock->fl_pid;
2104 flock.l_flock.start = file_lock->fl_start;
2105 flock.l_flock.end = file_lock->fl_end;
2107 switch (file_lock->fl_type) {
2109 einfo.ei_mode = LCK_PR;
2112 /* An unlock request may or may not have any relation to
2113 * existing locks so we may not be able to pass a lock handle
2114 * via a normal ldlm_lock_cancel() request. The request may even
2115 * unlock a byte range in the middle of an existing lock. In
2116 * order to process an unlock request we need all of the same
2117 * information that is given with a normal read or write record
2118 * lock request. To avoid creating another ldlm unlock (cancel)
2119 * message we'll treat a LCK_NL flock request as an unlock. */
2120 einfo.ei_mode = LCK_NL;
2123 einfo.ei_mode = LCK_PW;
2126 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2141 flags = LDLM_FL_BLOCK_NOWAIT;
2147 flags = LDLM_FL_TEST_LOCK;
2148 /* Save the old mode so that if the mode in the lock changes we
2149 * can decrement the appropriate reader or writer refcount. */
2150 file_lock->fl_type = einfo.ei_mode;
2153 CERROR("unknown fcntl lock command: %d\n", cmd);
2157 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2158 LUSTRE_OPC_ANY, NULL);
2159 if (IS_ERR(op_data))
2160 RETURN(PTR_ERR(op_data));
2162 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2163 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2164 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2166 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2167 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2169 ll_finish_md_op_data(op_data);
2171 if ((file_lock->fl_flags & FL_FLOCK) &&
2172 (rc == 0 || file_lock->fl_type == F_UNLCK))
2173 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2174 #ifdef HAVE_F_OP_FLOCK
2175 if ((file_lock->fl_flags & FL_POSIX) &&
2176 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2177 !(flags & LDLM_FL_TEST_LOCK))
2178 posix_lock_file_wait(file, file_lock);
2184 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2191 int ll_have_md_lock(struct inode *inode, __u64 bits)
2193 struct lustre_handle lockh;
2194 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2202 fid = &ll_i2info(inode)->lli_fid;
2203 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2205 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2206 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2207 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2213 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2214 struct lustre_handle *lockh)
2216 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2222 fid = &ll_i2info(inode)->lli_fid;
2223 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2225 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2226 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2227 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2231 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2232 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2233 * and return success */
2235 /* This path cannot be hit for regular files unless in
2236 * case of obscure races, so no need to to validate
2238 if (!S_ISREG(inode->i_mode) &&
2239 !S_ISDIR(inode->i_mode))
2244 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2252 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2255 struct inode *inode = dentry->d_inode;
2256 struct ptlrpc_request *req = NULL;
2257 struct ll_sb_info *sbi;
2258 struct obd_export *exp;
2263 CERROR("REPORT THIS LINE TO PETER\n");
2266 sbi = ll_i2sbi(inode);
2268 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2269 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2271 exp = ll_i2mdexp(inode);
2273 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2274 struct lookup_intent oit = { .it_op = IT_GETATTR };
2275 struct md_op_data *op_data;
2277 /* Call getattr by fid, so do not provide name at all. */
2278 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2279 dentry->d_inode, NULL, 0, 0,
2280 LUSTRE_OPC_ANY, NULL);
2281 if (IS_ERR(op_data))
2282 RETURN(PTR_ERR(op_data));
2284 oit.it_create_mode |= M_CHECK_STALE;
2285 rc = md_intent_lock(exp, op_data, NULL, 0,
2286 /* we are not interested in name
2289 ll_md_blocking_ast, 0);
2290 ll_finish_md_op_data(op_data);
2291 oit.it_create_mode &= ~M_CHECK_STALE;
2293 rc = ll_inode_revalidate_fini(inode, rc);
2297 rc = ll_revalidate_it_finish(req, &oit, dentry);
2299 ll_intent_release(&oit);
2303 /* Unlinked? Unhash dentry, so it is not picked up later by
2304 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2305 here to preserve get_cwd functionality on 2.6.
2307 if (!dentry->d_inode->i_nlink) {
2308 spin_lock(&ll_lookup_lock);
2309 spin_lock(&dcache_lock);
2310 ll_drop_dentry(dentry);
2311 spin_unlock(&dcache_lock);
2312 spin_unlock(&ll_lookup_lock);
2315 ll_lookup_finish_locks(&oit, dentry);
2316 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2318 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2319 obd_valid valid = OBD_MD_FLGETATTR;
2320 struct obd_capa *oc;
2323 if (S_ISREG(inode->i_mode)) {
2324 rc = ll_get_max_mdsize(sbi, &ealen);
2327 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2329 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2330 * capa for this inode. Because we only keep capas of dirs
2332 oc = ll_mdscapa_get(inode);
2333 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2337 rc = ll_inode_revalidate_fini(inode, rc);
2341 rc = ll_prep_inode(&inode, req, NULL);
2344 ptlrpc_req_finished(req);
2348 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2353 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2354 MDS_INODELOCK_LOOKUP);
2356 /* if object not yet allocated, don't validate size */
2357 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2360 /* cl_glimpse_size will prefer locally cached writes if they extend
2364 rc = cl_glimpse_size(dentry->d_inode);
2369 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2370 struct lookup_intent *it, struct kstat *stat)
2372 struct inode *inode = de->d_inode;
2375 res = ll_inode_revalidate_it(de, it);
2376 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2381 stat->dev = inode->i_sb->s_dev;
2382 stat->ino = inode->i_ino;
2383 stat->mode = inode->i_mode;
2384 stat->nlink = inode->i_nlink;
2385 stat->uid = inode->i_uid;
2386 stat->gid = inode->i_gid;
2387 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2388 stat->atime = inode->i_atime;
2389 stat->mtime = inode->i_mtime;
2390 stat->ctime = inode->i_ctime;
2391 #ifdef HAVE_INODE_BLKSIZE
2392 stat->blksize = inode->i_blksize;
2394 stat->blksize = 1 << inode->i_blkbits;
2397 ll_inode_size_lock(inode, 0);
2398 stat->size = i_size_read(inode);
2399 stat->blocks = inode->i_blocks;
2400 ll_inode_size_unlock(inode, 0);
2404 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2406 struct lookup_intent it = { .it_op = IT_GETATTR };
2408 return ll_getattr_it(mnt, de, &it, stat);
2412 int lustre_check_acl(struct inode *inode, int mask)
2414 #ifdef CONFIG_FS_POSIX_ACL
2415 struct ll_inode_info *lli = ll_i2info(inode);
2416 struct posix_acl *acl;
2420 spin_lock(&lli->lli_lock);
2421 acl = posix_acl_dup(lli->lli_posix_acl);
2422 spin_unlock(&lli->lli_lock);
2427 rc = posix_acl_permission(inode, acl, mask);
2428 posix_acl_release(acl);
2436 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2437 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2442 /* as root inode are NOT getting validated in lookup operation,
2443 * need to do it before permission check. */
2445 if (inode == inode->i_sb->s_root->d_inode) {
2446 struct lookup_intent it = { .it_op = IT_LOOKUP };
2448 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2449 MDS_INODELOCK_LOOKUP);
2454 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2455 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2457 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2458 return lustre_check_remote_perm(inode, mask);
2460 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2461 rc = generic_permission(inode, mask, lustre_check_acl);
2466 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2468 int mode = inode->i_mode;
2471 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2472 inode->i_ino, inode->i_generation, inode, mask);
2474 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2475 return lustre_check_remote_perm(inode, mask);
2477 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2479 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2480 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2482 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2484 if (current->fsuid == inode->i_uid) {
2487 if (((mode >> 3) & mask & S_IRWXO) != mask)
2489 rc = lustre_check_acl(inode, mask);
2493 goto check_capabilities;
2497 if (in_group_p(inode->i_gid))
2500 if ((mode & mask & S_IRWXO) == mask)
2504 if (!(mask & MAY_EXEC) ||
2505 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2506 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2509 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2510 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2517 #ifdef HAVE_FILE_READV
2518 #define READ_METHOD readv
2519 #define READ_FUNCTION ll_file_readv
2520 #define WRITE_METHOD writev
2521 #define WRITE_FUNCTION ll_file_writev
2523 #define READ_METHOD aio_read
2524 #define READ_FUNCTION ll_file_aio_read
2525 #define WRITE_METHOD aio_write
2526 #define WRITE_FUNCTION ll_file_aio_write
2529 /* -o localflock - only provides locally consistent flock locks */
2530 struct file_operations ll_file_operations = {
2531 .read = ll_file_read,
2532 .READ_METHOD = READ_FUNCTION,
2533 .write = ll_file_write,
2534 .WRITE_METHOD = WRITE_FUNCTION,
2535 .ioctl = ll_file_ioctl,
2536 .open = ll_file_open,
2537 .release = ll_file_release,
2538 .mmap = ll_file_mmap,
2539 .llseek = ll_file_seek,
2540 .sendfile = ll_file_sendfile,
2544 struct file_operations ll_file_operations_flock = {
2545 .read = ll_file_read,
2546 .READ_METHOD = READ_FUNCTION,
2547 .write = ll_file_write,
2548 .WRITE_METHOD = WRITE_FUNCTION,
2549 .ioctl = ll_file_ioctl,
2550 .open = ll_file_open,
2551 .release = ll_file_release,
2552 .mmap = ll_file_mmap,
2553 .llseek = ll_file_seek,
2554 .sendfile = ll_file_sendfile,
2556 #ifdef HAVE_F_OP_FLOCK
2557 .flock = ll_file_flock,
2559 .lock = ll_file_flock
2562 /* These are for -o noflock - to return ENOSYS on flock calls */
2563 struct file_operations ll_file_operations_noflock = {
2564 .read = ll_file_read,
2565 .READ_METHOD = READ_FUNCTION,
2566 .write = ll_file_write,
2567 .WRITE_METHOD = WRITE_FUNCTION,
2568 .ioctl = ll_file_ioctl,
2569 .open = ll_file_open,
2570 .release = ll_file_release,
2571 .mmap = ll_file_mmap,
2572 .llseek = ll_file_seek,
2573 .sendfile = ll_file_sendfile,
2575 #ifdef HAVE_F_OP_FLOCK
2576 .flock = ll_file_noflock,
2578 .lock = ll_file_noflock
2581 struct inode_operations ll_file_inode_operations = {
2582 #ifdef HAVE_VFS_INTENT_PATCHES
2583 .setattr_raw = ll_setattr_raw,
2585 .setattr = ll_setattr,
2586 .truncate = ll_truncate,
2587 .getattr = ll_getattr,
2588 .permission = ll_inode_permission,
2589 .setxattr = ll_setxattr,
2590 .getxattr = ll_getxattr,
2591 .listxattr = ll_listxattr,
2592 .removexattr = ll_removexattr,
2595 /* dynamic ioctl number support routins */
2596 static struct llioc_ctl_data {
2597 struct rw_semaphore ioc_sem;
2598 struct list_head ioc_head;
2600 __RWSEM_INITIALIZER(llioc.ioc_sem),
2601 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2606 struct list_head iocd_list;
2607 unsigned int iocd_size;
2608 llioc_callback_t iocd_cb;
2609 unsigned int iocd_count;
2610 unsigned int iocd_cmd[0];
2613 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2616 struct llioc_data *in_data = NULL;
2619 if (cb == NULL || cmd == NULL ||
2620 count > LLIOC_MAX_CMD || count < 0)
2623 size = sizeof(*in_data) + count * sizeof(unsigned int);
2624 OBD_ALLOC(in_data, size);
2625 if (in_data == NULL)
2628 memset(in_data, 0, sizeof(*in_data));
2629 in_data->iocd_size = size;
2630 in_data->iocd_cb = cb;
2631 in_data->iocd_count = count;
2632 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2634 down_write(&llioc.ioc_sem);
2635 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2636 up_write(&llioc.ioc_sem);
2641 void ll_iocontrol_unregister(void *magic)
2643 struct llioc_data *tmp;
2648 down_write(&llioc.ioc_sem);
2649 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2651 unsigned int size = tmp->iocd_size;
2653 list_del(&tmp->iocd_list);
2654 up_write(&llioc.ioc_sem);
2656 OBD_FREE(tmp, size);
2660 up_write(&llioc.ioc_sem);
2662 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2665 EXPORT_SYMBOL(ll_iocontrol_register);
2666 EXPORT_SYMBOL(ll_iocontrol_unregister);
2668 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2669 unsigned int cmd, unsigned long arg, int *rcp)
2671 enum llioc_iter ret = LLIOC_CONT;
2672 struct llioc_data *data;
2673 int rc = -EINVAL, i;
2675 down_read(&llioc.ioc_sem);
2676 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2677 for (i = 0; i < data->iocd_count; i++) {
2678 if (cmd != data->iocd_cmd[i])
2681 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2685 if (ret == LLIOC_STOP)
2688 up_read(&llioc.ioc_sem);