1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
40 * Author: Andreas Dilger <adilger@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <lustre_dlm.h>
45 #include <lustre_lite.h>
46 #include <lustre_mdc.h>
47 #include <linux/pagemap.h>
48 #include <linux/file.h>
49 #include "llite_internal.h"
50 #include <lustre/ll_fiemap.h>
52 #include "cl_object.h"
54 struct ll_file_data *ll_file_data_get(void)
56 struct ll_file_data *fd;
58 OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
62 static void ll_file_data_put(struct ll_file_data *fd)
65 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
68 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
69 struct lustre_handle *fh)
71 op_data->op_fid1 = ll_i2info(inode)->lli_fid;
72 op_data->op_attr.ia_mode = inode->i_mode;
73 op_data->op_attr.ia_atime = inode->i_atime;
74 op_data->op_attr.ia_mtime = inode->i_mtime;
75 op_data->op_attr.ia_ctime = inode->i_ctime;
76 op_data->op_attr.ia_size = i_size_read(inode);
77 op_data->op_attr_blocks = inode->i_blocks;
78 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = inode->i_flags;
79 op_data->op_ioepoch = ll_i2info(inode)->lli_ioepoch;
80 memcpy(&op_data->op_handle, fh, sizeof(op_data->op_handle));
81 op_data->op_capa1 = ll_mdscapa_get(inode);
84 static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
85 struct obd_client_handle *och)
89 op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
90 ATTR_MTIME_SET | ATTR_CTIME_SET;
92 if (!(och->och_flags & FMODE_WRITE))
95 if (!(ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM) ||
96 !S_ISREG(inode->i_mode))
97 op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
99 ll_epoch_close(inode, op_data, &och, 0);
102 ll_pack_inode2opdata(inode, op_data, &och->och_fh);
106 static int ll_close_inode_openhandle(struct obd_export *md_exp,
108 struct obd_client_handle *och)
110 struct obd_export *exp = ll_i2mdexp(inode);
111 struct md_op_data *op_data;
112 struct ptlrpc_request *req = NULL;
113 struct obd_device *obd = class_exp2obd(exp);
120 * XXX: in case of LMV, is this correct to access
123 CERROR("Invalid MDC connection handle "LPX64"\n",
124 ll_i2mdexp(inode)->exp_handle.h_cookie);
129 * here we check if this is forced umount. If so this is called on
130 * canceling "open lock" and we do not call md_close() in this case, as
131 * it will not be successful, as import is already deactivated.
136 OBD_ALLOC_PTR(op_data);
138 GOTO(out, rc = -ENOMEM); // XXX We leak openhandle and request here.
140 ll_prepare_close(inode, op_data, och);
141 epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
142 rc = md_close(md_exp, op_data, och->och_mod, &req);
144 /* This close must have the epoch closed. */
145 LASSERT(exp->exp_connect_flags & OBD_CONNECT_SOM);
146 LASSERT(epoch_close);
147 /* MDS has instructed us to obtain Size-on-MDS attribute from
148 * OSTs and send setattr to back to MDS. */
149 rc = ll_sizeonmds_update(inode, &och->och_fh,
150 op_data->op_ioepoch);
152 CERROR("inode %lu mdc Size-on-MDS update failed: "
153 "rc = %d\n", inode->i_ino, rc);
157 CERROR("inode %lu mdc close failed: rc = %d\n",
160 ll_finish_md_op_data(op_data);
163 rc = ll_objects_destroy(req, inode);
165 CERROR("inode %lu ll_objects destroy: rc = %d\n",
172 if ((exp->exp_connect_flags & OBD_CONNECT_SOM) && !epoch_close &&
173 S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
174 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
176 md_clear_open_replay_data(md_exp, och);
177 /* Free @och if it is not waiting for DONE_WRITING. */
178 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
181 if (req) /* This is close request */
182 ptlrpc_req_finished(req);
186 int ll_md_real_close(struct inode *inode, int flags)
188 struct ll_inode_info *lli = ll_i2info(inode);
189 struct obd_client_handle **och_p;
190 struct obd_client_handle *och;
195 if (flags & FMODE_WRITE) {
196 och_p = &lli->lli_mds_write_och;
197 och_usecount = &lli->lli_open_fd_write_count;
198 } else if (flags & FMODE_EXEC) {
199 och_p = &lli->lli_mds_exec_och;
200 och_usecount = &lli->lli_open_fd_exec_count;
202 LASSERT(flags & FMODE_READ);
203 och_p = &lli->lli_mds_read_och;
204 och_usecount = &lli->lli_open_fd_read_count;
207 down(&lli->lli_och_sem);
208 if (*och_usecount) { /* There are still users of this handle, so
210 up(&lli->lli_och_sem);
215 up(&lli->lli_och_sem);
217 if (och) { /* There might be a race and somebody have freed this och
219 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
226 int ll_md_close(struct obd_export *md_exp, struct inode *inode,
229 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
230 struct ll_inode_info *lli = ll_i2info(inode);
234 /* clear group lock, if present */
235 if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
236 ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
238 /* Let's see if we have good enough OPEN lock on the file and if
239 we can skip talking to MDS */
240 if (file->f_dentry->d_inode) { /* Can this ever be false? */
242 int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
243 struct lustre_handle lockh;
244 struct inode *inode = file->f_dentry->d_inode;
245 ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
247 down(&lli->lli_och_sem);
248 if (fd->fd_omode & FMODE_WRITE) {
250 LASSERT(lli->lli_open_fd_write_count);
251 lli->lli_open_fd_write_count--;
252 } else if (fd->fd_omode & FMODE_EXEC) {
254 LASSERT(lli->lli_open_fd_exec_count);
255 lli->lli_open_fd_exec_count--;
258 LASSERT(lli->lli_open_fd_read_count);
259 lli->lli_open_fd_read_count--;
261 up(&lli->lli_och_sem);
263 if (!md_lock_match(md_exp, flags, ll_inode2fid(inode),
264 LDLM_IBITS, &policy, lockmode,
266 rc = ll_md_real_close(file->f_dentry->d_inode,
270 CERROR("Releasing a file %p with negative dentry %p. Name %s",
271 file, file->f_dentry, file->f_dentry->d_name.name);
274 LUSTRE_FPRIVATE(file) = NULL;
275 ll_file_data_put(fd);
276 ll_capa_close(inode);
281 int lov_test_and_clear_async_rc(struct lov_stripe_md *lsm);
283 /* While this returns an error code, fput() the caller does not, so we need
284 * to make every effort to clean up all of our state here. Also, applications
285 * rarely check close errors and even if an error is returned they will not
286 * re-try the close call.
288 int ll_file_release(struct inode *inode, struct file *file)
290 struct ll_file_data *fd;
291 struct ll_sb_info *sbi = ll_i2sbi(inode);
292 struct ll_inode_info *lli = ll_i2info(inode);
293 struct lov_stripe_md *lsm = lli->lli_smd;
297 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
298 inode->i_generation, inode);
300 #ifdef CONFIG_FS_POSIX_ACL
301 if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
302 inode == inode->i_sb->s_root->d_inode) {
303 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
306 if (unlikely(fd->fd_flags & LL_FILE_RMTACL)) {
307 fd->fd_flags &= ~LL_FILE_RMTACL;
308 rct_del(&sbi->ll_rct, cfs_curproc_pid());
309 et_search_free(&sbi->ll_et, cfs_curproc_pid());
314 if (inode->i_sb->s_root != file->f_dentry)
315 ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1);
316 fd = LUSTRE_FPRIVATE(file);
319 /* The last ref on @file, maybe not the the owner pid of statahead.
320 * Different processes can open the same dir, "ll_opendir_key" means:
321 * it is me that should stop the statahead thread. */
322 if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
323 ll_stop_statahead(inode, lli->lli_opendir_key);
325 if (inode->i_sb->s_root == file->f_dentry) {
326 LUSTRE_FPRIVATE(file) = NULL;
327 ll_file_data_put(fd);
332 lov_test_and_clear_async_rc(lsm);
333 lli->lli_async_rc = 0;
335 rc = ll_md_close(sbi->ll_md_exp, inode, file);
339 static int ll_intent_file_open(struct file *file, void *lmm,
340 int lmmsize, struct lookup_intent *itp)
342 struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
343 struct dentry *parent = file->f_dentry->d_parent;
344 const char *name = file->f_dentry->d_name.name;
345 const int len = file->f_dentry->d_name.len;
346 struct md_op_data *op_data;
347 struct ptlrpc_request *req;
354 /* Usually we come here only for NFSD, and we want open lock.
355 But we can also get here with pre 2.6.15 patchless kernels, and in
356 that case that lock is also ok */
357 /* We can also get here if there was cached open handle in revalidate_it
358 * but it disappeared while we were getting from there to ll_file_open.
359 * But this means this file was closed and immediatelly opened which
360 * makes a good candidate for using OPEN lock */
361 /* If lmmsize & lmm are not 0, we are just setting stripe info
362 * parameters. No need for the open lock */
363 if (!lmm && !lmmsize)
364 itp->it_flags |= MDS_OPEN_LOCK;
366 op_data = ll_prep_md_op_data(NULL, parent->d_inode,
367 file->f_dentry->d_inode, name, len,
368 O_RDWR, LUSTRE_OPC_ANY, NULL);
370 RETURN(PTR_ERR(op_data));
372 rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
373 0 /*unused */, &req, ll_md_blocking_ast, 0);
374 ll_finish_md_op_data(op_data);
376 /* reason for keep own exit path - don`t flood log
377 * with messages with -ESTALE errors.
379 if (!it_disposition(itp, DISP_OPEN_OPEN) ||
380 it_open_error(DISP_OPEN_OPEN, itp))
382 ll_release_openhandle(file->f_dentry, itp);
386 if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) {
387 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp);
388 CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc);
392 if (itp->d.lustre.it_lock_mode)
393 md_set_lock_data(sbi->ll_md_exp,
394 &itp->d.lustre.it_lock_handle,
395 file->f_dentry->d_inode);
397 rc = ll_prep_inode(&file->f_dentry->d_inode, req, NULL);
399 ptlrpc_req_finished(itp->d.lustre.it_data);
400 it_clear_disposition(itp, DISP_ENQ_COMPLETE);
401 ll_intent_drop_lock(itp);
406 static int ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli,
407 struct lookup_intent *it, struct obd_client_handle *och)
409 struct ptlrpc_request *req = it->d.lustre.it_data;
410 struct mdt_body *body;
414 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
415 LASSERT(body != NULL); /* reply already checked out */
417 memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
418 och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
419 och->och_fid = lli->lli_fid;
420 och->och_flags = it->it_flags;
421 lli->lli_ioepoch = body->ioepoch;
423 return md_set_open_replay_data(md_exp, och, req);
426 int ll_local_open(struct file *file, struct lookup_intent *it,
427 struct ll_file_data *fd, struct obd_client_handle *och)
429 struct inode *inode = file->f_dentry->d_inode;
430 struct ll_inode_info *lli = ll_i2info(inode);
433 LASSERT(!LUSTRE_FPRIVATE(file));
438 struct ptlrpc_request *req = it->d.lustre.it_data;
439 struct mdt_body *body;
442 rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
446 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
447 if ((it->it_flags & FMODE_WRITE) &&
448 (body->valid & OBD_MD_FLSIZE))
449 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
450 lli->lli_ioepoch, PFID(&lli->lli_fid));
453 LUSTRE_FPRIVATE(file) = fd;
454 ll_readahead_init(inode, &fd->fd_ras);
455 fd->fd_omode = it->it_flags;
459 /* Open a file, and (for the very first open) create objects on the OSTs at
460 * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
461 * creation or open until ll_lov_setstripe() ioctl is called. We grab
462 * lli_open_sem to ensure no other process will create objects, send the
463 * stripe MD to the MDS, or try to destroy the objects if that fails.
465 * If we already have the stripe MD locally then we don't request it in
466 * md_open(), by passing a lmm_size = 0.
468 * It is up to the application to ensure no other processes open this file
469 * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
470 * used. We might be able to avoid races of that sort by getting lli_open_sem
471 * before returning in the O_LOV_DELAY_CREATE case and dropping it here
472 * or in ll_file_release(), but I'm not sure that is desirable/necessary.
474 int ll_file_open(struct inode *inode, struct file *file)
476 struct ll_inode_info *lli = ll_i2info(inode);
477 struct lookup_intent *it, oit = { .it_op = IT_OPEN,
478 .it_flags = file->f_flags };
479 struct lov_stripe_md *lsm;
480 struct ptlrpc_request *req = NULL;
481 struct obd_client_handle **och_p;
483 struct ll_file_data *fd;
484 int rc = 0, opendir_set = 0;
487 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
488 inode->i_generation, inode, file->f_flags);
490 #ifdef HAVE_VFS_INTENT_PATCHES
493 it = file->private_data; /* XXX: compat macro */
494 file->private_data = NULL; /* prevent ll_local_open assertion */
497 fd = ll_file_data_get();
502 if (S_ISDIR(inode->i_mode)) {
504 spin_lock(&lli->lli_lock);
505 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
506 LASSERT(lli->lli_sai == NULL);
507 lli->lli_opendir_key = fd;
508 lli->lli_opendir_pid = cfs_curproc_pid();
510 } else if (unlikely(lli->lli_opendir_pid == cfs_curproc_pid() &&
511 lli->lli_opendir_key != NULL)) {
512 /* Two cases for this:
513 * (1) The same process open such directory many times.
514 * (2) The old process opened the directory, and exited
515 * before its children processes. Then new process
516 * with the same pid opens such directory before the
517 * old process's children processes exit.
518 * reset stat ahead for such cases. */
519 spin_unlock(&lli->lli_lock);
520 CDEBUG(D_INFO, "Conflict statahead for %.*s "DFID
521 " reset it.\n", file->f_dentry->d_name.len,
522 file->f_dentry->d_name.name,
523 PFID(&lli->lli_fid));
524 ll_stop_statahead(inode, lli->lli_opendir_key);
527 spin_unlock(&lli->lli_lock);
530 if (inode->i_sb->s_root == file->f_dentry) {
531 LUSTRE_FPRIVATE(file) = fd;
535 if (!it || !it->d.lustre.it_disposition) {
536 /* Convert f_flags into access mode. We cannot use file->f_mode,
537 * because everything but O_ACCMODE mask was stripped from
539 if ((oit.it_flags + 1) & O_ACCMODE)
541 if (file->f_flags & O_TRUNC)
542 oit.it_flags |= FMODE_WRITE;
544 /* kernel only call f_op->open in dentry_open. filp_open calls
545 * dentry_open after call to open_namei that checks permissions.
546 * Only nfsd_open call dentry_open directly without checking
547 * permissions and because of that this code below is safe. */
548 if (oit.it_flags & FMODE_WRITE)
549 oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
551 /* We do not want O_EXCL here, presumably we opened the file
552 * already? XXX - NFS implications? */
553 oit.it_flags &= ~O_EXCL;
559 /* Let's see if we have file open on MDS already. */
560 if (it->it_flags & FMODE_WRITE) {
561 och_p = &lli->lli_mds_write_och;
562 och_usecount = &lli->lli_open_fd_write_count;
563 } else if (it->it_flags & FMODE_EXEC) {
564 och_p = &lli->lli_mds_exec_och;
565 och_usecount = &lli->lli_open_fd_exec_count;
567 och_p = &lli->lli_mds_read_och;
568 och_usecount = &lli->lli_open_fd_read_count;
571 down(&lli->lli_och_sem);
572 if (*och_p) { /* Open handle is present */
573 if (it_disposition(it, DISP_OPEN_OPEN)) {
574 /* Well, there's extra open request that we do not need,
575 let's close it somehow. This will decref request. */
576 rc = it_open_error(DISP_OPEN_OPEN, it);
578 up(&lli->lli_och_sem);
579 ll_file_data_put(fd);
580 GOTO(out_openerr, rc);
582 ll_release_openhandle(file->f_dentry, it);
583 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
588 rc = ll_local_open(file, it, fd, NULL);
591 up(&lli->lli_och_sem);
592 ll_file_data_put(fd);
593 GOTO(out_openerr, rc);
596 LASSERT(*och_usecount == 0);
597 if (!it->d.lustre.it_disposition) {
598 /* We cannot just request lock handle now, new ELC code
599 means that one of other OPEN locks for this file
600 could be cancelled, and since blocking ast handler
601 would attempt to grab och_sem as well, that would
602 result in a deadlock */
603 up(&lli->lli_och_sem);
604 it->it_create_mode |= M_CHECK_STALE;
605 rc = ll_intent_file_open(file, NULL, 0, it);
606 it->it_create_mode &= ~M_CHECK_STALE;
608 ll_file_data_put(fd);
609 GOTO(out_openerr, rc);
612 /* Got some error? Release the request */
613 if (it->d.lustre.it_status < 0) {
614 req = it->d.lustre.it_data;
615 ptlrpc_req_finished(req);
617 md_set_lock_data(ll_i2sbi(inode)->ll_md_exp,
618 &it->d.lustre.it_lock_handle,
619 file->f_dentry->d_inode);
622 OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
624 ll_file_data_put(fd);
625 GOTO(out_och_free, rc = -ENOMEM);
628 req = it->d.lustre.it_data;
630 /* md_intent_lock() didn't get a request ref if there was an
631 * open error, so don't do cleanup on the request here
633 /* XXX (green): Should not we bail out on any error here, not
634 * just open error? */
635 rc = it_open_error(DISP_OPEN_OPEN, it);
637 ll_file_data_put(fd);
638 GOTO(out_och_free, rc);
641 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
642 rc = ll_local_open(file, it, fd, *och_p);
644 ll_file_data_put(fd);
645 GOTO(out_och_free, rc);
648 up(&lli->lli_och_sem);
650 /* Must do this outside lli_och_sem lock to prevent deadlock where
651 different kind of OPEN lock for this same inode gets cancelled
652 by ldlm_cancel_lru */
653 if (!S_ISREG(inode->i_mode))
660 if (file->f_flags & O_LOV_DELAY_CREATE ||
661 !(file->f_mode & FMODE_WRITE)) {
662 CDEBUG(D_INODE, "object creation was delayed\n");
666 file->f_flags &= ~O_LOV_DELAY_CREATE;
669 ptlrpc_req_finished(req);
671 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
675 OBD_FREE(*och_p, sizeof (struct obd_client_handle));
676 *och_p = NULL; /* OBD_FREE writes some magic there */
679 up(&lli->lli_och_sem);
681 if (opendir_set != 0)
682 ll_stop_statahead(inode, lli->lli_opendir_key);
688 /* Fills the obdo with the attributes for the lsm */
689 static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
690 struct obd_capa *capa, struct obdo *obdo)
692 struct ptlrpc_request_set *set;
693 struct obd_info oinfo = { { { 0 } } };
698 LASSERT(lsm != NULL);
702 oinfo.oi_oa->o_id = lsm->lsm_object_id;
703 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
704 oinfo.oi_oa->o_mode = S_IFREG;
705 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
706 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
707 OBD_MD_FLBLKSZ | OBD_MD_FLATIME |
708 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
710 oinfo.oi_capa = capa;
712 set = ptlrpc_prep_set();
714 CERROR("can't allocate ptlrpc set\n");
717 rc = obd_getattr_async(exp, &oinfo, set);
719 rc = ptlrpc_set_wait(set);
720 ptlrpc_set_destroy(set);
723 oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
724 OBD_MD_FLATIME | OBD_MD_FLMTIME |
725 OBD_MD_FLCTIME | OBD_MD_FLSIZE);
729 /* Fills the obdo with the attributes for the inode defined by lsm */
730 int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
732 struct ll_inode_info *lli = ll_i2info(inode);
733 struct obd_capa *capa = ll_mdscapa_get(inode);
737 rc = ll_lsm_getattr(lli->lli_smd, ll_i2dtexp(inode), capa, obdo);
740 obdo_refresh_inode(inode, obdo, obdo->o_valid);
742 "objid "LPX64" size %Lu, blocks %llu, blksize %lu\n",
743 lli->lli_smd->lsm_object_id, i_size_read(inode),
744 (unsigned long long)inode->i_blocks,
745 (unsigned long)ll_inode_blksize(inode));
750 int ll_merge_lvb(struct inode *inode)
752 struct ll_inode_info *lli = ll_i2info(inode);
753 struct ll_sb_info *sbi = ll_i2sbi(inode);
759 ll_inode_size_lock(inode, 1);
760 inode_init_lvb(inode, &lvb);
761 rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
762 i_size_write(inode, lvb.lvb_size);
763 inode->i_blocks = lvb.lvb_blocks;
765 LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
766 LTIME_S(inode->i_atime) = lvb.lvb_atime;
767 LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
768 ll_inode_size_unlock(inode, 1);
773 int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
776 struct obdo obdo = { 0 };
779 rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, &obdo);
781 st->st_size = obdo.o_size;
782 st->st_blocks = obdo.o_blocks;
783 st->st_mtime = obdo.o_mtime;
784 st->st_atime = obdo.o_atime;
785 st->st_ctime = obdo.o_ctime;
790 void ll_io_init(struct cl_io *io, const struct file *file, int write)
792 struct inode *inode = file->f_dentry->d_inode;
793 struct ll_sb_info *sbi = ll_i2sbi(inode);
794 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
797 memset(io, 0, sizeof *io);
798 io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK;
800 io->u.ci_wr.wr_append = file->f_flags & O_APPEND;
801 io->ci_obj = ll_i2info(inode)->lli_clob;
802 io->ci_lockreq = CILR_MAYBE;
803 if (fd->fd_flags & LL_FILE_IGNORE_LOCK ||
804 sbi->ll_flags & LL_SBI_NOLCK) {
805 io->ci_lockreq = CILR_NEVER;
806 io->ci_no_srvlock = 1;
807 } else if (file->f_flags & O_APPEND) {
808 io->ci_lockreq = CILR_MANDATORY;
812 static ssize_t ll_file_io_generic(const struct lu_env *env,
813 struct ccc_io_args *args, struct file *file,
814 enum cl_io_type iot, loff_t *ppos, size_t count)
820 io = &ccc_env_info(env)->cti_io;
821 ll_io_init(io, file, iot == CIT_WRITE);
824 io->u.ci_rd.rd_is_sendfile = args->cia_is_sendfile;
826 if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
827 struct vvp_io *vio = vvp_env_io(env);
828 struct ccc_io *cio = ccc_env_io(env);
829 if (cl_io_is_sendfile(io)) {
830 vio->u.read.cui_actor = args->cia_actor;
831 vio->u.read.cui_target = args->cia_target;
833 cio->cui_iov = args->cia_iov;
834 cio->cui_nrsegs = args->cia_nrsegs;
835 #ifndef HAVE_FILE_WRITEV
836 cio->cui_iocb = args->cia_iocb;
839 cio->cui_fd = LUSTRE_FPRIVATE(file);
840 result = cl_io_loop(env, io);
842 /* cl_io_rw_init() handled IO */
843 result = io->ci_result;
844 if (io->ci_nob > 0) {
846 *ppos = io->u.ci_wr.wr.crw_pos;
854 * XXX: exact copy from kernel code (__generic_file_aio_write_nolock)
856 static int ll_file_get_iov_count(const struct iovec *iov,
857 unsigned long *nr_segs, size_t *count)
862 for (seg = 0; seg < *nr_segs; seg++) {
863 const struct iovec *iv = &iov[seg];
866 * If any segment has a negative length, or the cumulative
867 * length ever wraps negative then return -EINVAL.
870 if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
872 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
877 cnt -= iv->iov_len; /* This segment is no good */
884 #ifdef HAVE_FILE_READV
885 static ssize_t ll_file_readv(struct file *file, const struct iovec *iov,
886 unsigned long nr_segs, loff_t *ppos)
889 struct ccc_io_args *args;
895 result = ll_file_get_iov_count(iov, &nr_segs, &count);
899 env = cl_env_get(&refcheck);
901 RETURN(PTR_ERR(env));
903 args = &vvp_env_info(env)->vti_args;
904 args->cia_is_sendfile = 0;
905 args->cia_iov = (struct iovec *)iov;
906 args->cia_nrsegs = nr_segs;
907 result = ll_file_io_generic(env, args, file, CIT_READ, ppos, count);
908 cl_env_put(env, &refcheck);
912 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
916 struct iovec *local_iov;
921 env = cl_env_get(&refcheck);
923 RETURN(PTR_ERR(env));
925 local_iov = &vvp_env_info(env)->vti_local_iov;
926 local_iov->iov_base = (void __user *)buf;
927 local_iov->iov_len = count;
928 result = ll_file_readv(file, local_iov, 1, ppos);
929 cl_env_put(env, &refcheck);
934 static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
935 unsigned long nr_segs, loff_t pos)
938 struct ccc_io_args *args;
944 result = ll_file_get_iov_count(iov, &nr_segs, &count);
948 env = cl_env_get(&refcheck);
950 RETURN(PTR_ERR(env));
952 args = &vvp_env_info(env)->vti_args;
953 args->cia_is_sendfile = 0;
954 args->cia_iov = (struct iovec *)iov;
955 args->cia_nrsegs = nr_segs;
956 args->cia_iocb = iocb;
957 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
958 &iocb->ki_pos, count);
959 cl_env_put(env, &refcheck);
963 static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
967 struct iovec *local_iov;
973 env = cl_env_get(&refcheck);
975 RETURN(PTR_ERR(env));
977 local_iov = &vvp_env_info(env)->vti_local_iov;
978 kiocb = &vvp_env_info(env)->vti_kiocb;
979 local_iov->iov_base = (void __user *)buf;
980 local_iov->iov_len = count;
981 init_sync_kiocb(kiocb, file);
982 kiocb->ki_pos = *ppos;
983 kiocb->ki_left = count;
985 result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
986 *ppos = kiocb->ki_pos;
988 cl_env_put(env, &refcheck);
994 * Write to a file (through the page cache).
996 #ifdef HAVE_FILE_WRITEV
997 static ssize_t ll_file_writev(struct file *file, const struct iovec *iov,
998 unsigned long nr_segs, loff_t *ppos)
1001 struct ccc_io_args *args;
1007 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1011 env = cl_env_get(&refcheck);
1013 RETURN(PTR_ERR(env));
1015 args = &vvp_env_info(env)->vti_args;
1016 args->cia_iov = (struct iovec *)iov;
1017 args->cia_nrsegs = nr_segs;
1018 result = ll_file_io_generic(env, args, file, CIT_WRITE, ppos, count);
1019 cl_env_put(env, &refcheck);
1023 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1027 struct iovec *local_iov;
1032 env = cl_env_get(&refcheck);
1034 RETURN(PTR_ERR(env));
1036 local_iov = &vvp_env_info(env)->vti_local_iov;
1037 local_iov->iov_base = (void __user *)buf;
1038 local_iov->iov_len = count;
1040 result = ll_file_writev(file, local_iov, 1, ppos);
1041 cl_env_put(env, &refcheck);
1045 #else /* AIO stuff */
1046 static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1047 unsigned long nr_segs, loff_t pos)
1050 struct ccc_io_args *args;
1056 result = ll_file_get_iov_count(iov, &nr_segs, &count);
1060 env = cl_env_get(&refcheck);
1062 RETURN(PTR_ERR(env));
1064 args = &vvp_env_info(env)->vti_args;
1065 args->cia_iov = (struct iovec *)iov;
1066 args->cia_nrsegs = nr_segs;
1067 args->cia_iocb = iocb;
1068 result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
1069 &iocb->ki_pos, count);
1070 cl_env_put(env, &refcheck);
1074 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
1078 struct iovec *local_iov;
1079 struct kiocb *kiocb;
1084 env = cl_env_get(&refcheck);
1086 RETURN(PTR_ERR(env));
1088 local_iov = &vvp_env_info(env)->vti_local_iov;
1089 kiocb = &vvp_env_info(env)->vti_kiocb;
1090 local_iov->iov_base = (void __user *)buf;
1091 local_iov->iov_len = count;
1092 init_sync_kiocb(kiocb, file);
1093 kiocb->ki_pos = *ppos;
1094 kiocb->ki_left = count;
1096 result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
1097 *ppos = kiocb->ki_pos;
1099 cl_env_put(env, &refcheck);
1106 * Send file content (through pagecache) somewhere with helper
1108 static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
1109 read_actor_t actor, void *target)
1112 struct ccc_io_args *args;
1117 env = cl_env_get(&refcheck);
1119 RETURN(PTR_ERR(env));
1121 args = &vvp_env_info(env)->vti_args;
1122 args->cia_is_sendfile = 1;
1123 args->cia_target = target;
1124 args->cia_actor = actor;
1125 result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
1126 cl_env_put(env, &refcheck);
1130 static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
1133 struct obd_export *exp = ll_i2dtexp(inode);
1134 struct ll_recreate_obj ucreatp;
1135 struct obd_trans_info oti = { 0 };
1136 struct obdo *oa = NULL;
1139 struct lov_stripe_md *lsm, *lsm2;
1142 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1145 if (copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
1146 sizeof(struct ll_recreate_obj)))
1153 ll_inode_size_lock(inode, 0);
1154 lsm = ll_i2info(inode)->lli_smd;
1156 GOTO(out, rc = -ENOENT);
1157 lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
1158 (lsm->lsm_stripe_count));
1160 OBD_ALLOC(lsm2, lsm_size);
1162 GOTO(out, rc = -ENOMEM);
1164 oa->o_id = ucreatp.lrc_id;
1165 oa->o_gr = ucreatp.lrc_group;
1166 oa->o_nlink = ucreatp.lrc_ost_idx;
1167 oa->o_flags |= OBD_FL_RECREATE_OBJS;
1168 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
1169 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
1170 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
1172 memcpy(lsm2, lsm, lsm_size);
1173 rc = obd_create(exp, oa, &lsm2, &oti);
1175 OBD_FREE(lsm2, lsm_size);
1178 ll_inode_size_unlock(inode, 0);
1183 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
1184 int flags, struct lov_user_md *lum, int lum_size)
1186 struct lov_stripe_md *lsm;
1187 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1191 ll_inode_size_lock(inode, 0);
1192 lsm = ll_i2info(inode)->lli_smd;
1194 ll_inode_size_unlock(inode, 0);
1195 CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
1200 rc = ll_intent_file_open(file, lum, lum_size, &oit);
1203 if (it_disposition(&oit, DISP_LOOKUP_NEG))
1204 GOTO(out_req_free, rc = -ENOENT);
1205 rc = oit.d.lustre.it_status;
1207 GOTO(out_req_free, rc);
1209 ll_release_openhandle(file->f_dentry, &oit);
1212 ll_inode_size_unlock(inode, 0);
1213 ll_intent_release(&oit);
1216 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1220 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
1221 struct lov_mds_md **lmmp, int *lmm_size,
1222 struct ptlrpc_request **request)
1224 struct ll_sb_info *sbi = ll_i2sbi(inode);
1225 struct mdt_body *body;
1226 struct lov_mds_md *lmm = NULL;
1227 struct ptlrpc_request *req = NULL;
1228 struct obd_capa *oc;
1231 rc = ll_get_max_mdsize(sbi, &lmmsize);
1235 oc = ll_mdscapa_get(inode);
1236 rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(inode),
1237 oc, filename, strlen(filename) + 1,
1238 OBD_MD_FLEASIZE | OBD_MD_FLDIREA, lmmsize,
1239 ll_i2suppgid(inode), &req);
1242 CDEBUG(D_INFO, "md_getattr_name failed "
1243 "on %s: rc %d\n", filename, rc);
1247 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
1248 LASSERT(body != NULL); /* checked by mdc_getattr_name */
1250 lmmsize = body->eadatasize;
1252 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
1254 GOTO(out, rc = -ENODATA);
1257 lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize);
1258 LASSERT(lmm != NULL);
1260 if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) &&
1261 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3)) &&
1262 (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_JOIN))) {
1263 GOTO(out, rc = -EPROTO);
1267 * This is coming from the MDS, so is probably in
1268 * little endian. We convert it to host endian before
1269 * passing it to userspace.
1271 if (LOV_MAGIC != cpu_to_le32(LOV_MAGIC)) {
1272 /* if function called for directory - we should
1273 * avoid swab not existent lsm objects */
1274 if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
1275 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1276 if (S_ISREG(body->mode))
1277 lustre_swab_lov_user_md_objects(
1278 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1279 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1280 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
1281 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1282 if (S_ISREG(body->mode))
1283 lustre_swab_lov_user_md_objects(
1284 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1285 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1286 } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_JOIN)) {
1287 lustre_swab_lov_user_md_join((struct lov_user_md_join *)lmm);
1291 if (lmm->lmm_magic == LOV_MAGIC_JOIN) {
1292 struct lov_stripe_md *lsm;
1293 struct lov_user_md_join *lmj;
1294 int lmj_size, i, aindex = 0;
1296 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
1298 GOTO(out, rc = -ENOMEM);
1299 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, lsm);
1301 GOTO(out_free_memmd, rc);
1303 lmj_size = sizeof(struct lov_user_md_join) +
1304 lsm->lsm_stripe_count *
1305 sizeof(struct lov_user_ost_data_join);
1306 OBD_ALLOC(lmj, lmj_size);
1308 GOTO(out_free_memmd, rc = -ENOMEM);
1310 memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
1311 for (i = 0; i < lsm->lsm_stripe_count; i++) {
1312 struct lov_extent *lex =
1313 &lsm->lsm_array->lai_ext_array[aindex];
1315 if (lex->le_loi_idx + lex->le_stripe_count <= i)
1317 CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
1318 LPU64" len %d\n", aindex, i,
1319 lex->le_start, (int)lex->le_len);
1320 lmj->lmm_objects[i].l_extent_start =
1323 if ((int)lex->le_len == -1)
1324 lmj->lmm_objects[i].l_extent_end = -1;
1326 lmj->lmm_objects[i].l_extent_end =
1327 lex->le_start + lex->le_len;
1328 lmj->lmm_objects[i].l_object_id =
1329 lsm->lsm_oinfo[i]->loi_id;
1330 lmj->lmm_objects[i].l_object_gr =
1331 lsm->lsm_oinfo[i]->loi_gr;
1332 lmj->lmm_objects[i].l_ost_gen =
1333 lsm->lsm_oinfo[i]->loi_ost_gen;
1334 lmj->lmm_objects[i].l_ost_idx =
1335 lsm->lsm_oinfo[i]->loi_ost_idx;
1337 lmm = (struct lov_mds_md *)lmj;
1340 obd_free_memmd(sbi->ll_dt_exp, &lsm);
1344 *lmm_size = lmmsize;
1349 static int ll_lov_setea(struct inode *inode, struct file *file,
1352 int flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE;
1353 struct lov_user_md *lump;
1354 int lum_size = sizeof(struct lov_user_md) +
1355 sizeof(struct lov_user_ost_data);
1359 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
1362 OBD_ALLOC(lump, lum_size);
1366 if (copy_from_user(lump, (struct lov_user_md *)arg, lum_size)) {
1367 OBD_FREE(lump, lum_size);
1371 rc = ll_lov_setstripe_ea_info(inode, file, flags, lump, lum_size);
1373 OBD_FREE(lump, lum_size);
1377 static int ll_lov_setstripe(struct inode *inode, struct file *file,
1380 struct lov_user_md_v3 lumv3;
1381 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
1382 struct lov_user_md_v1 *lumv1p = (struct lov_user_md_v1 *)arg;
1383 struct lov_user_md_v3 *lumv3p = (struct lov_user_md_v3 *)arg;
1386 int flags = FMODE_WRITE;
1389 /* first try with v1 which is smaller than v3 */
1390 lum_size = sizeof(struct lov_user_md_v1);
1391 if (copy_from_user(lumv1, lumv1p, lum_size))
1394 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
1395 lum_size = sizeof(struct lov_user_md_v3);
1396 if (copy_from_user(&lumv3, lumv3p, lum_size))
1400 rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
1402 put_user(0, &lumv1p->lmm_stripe_count);
1403 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
1404 0, ll_i2info(inode)->lli_smd,
1410 static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
1412 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1417 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
1421 int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1423 struct ll_inode_info *lli = ll_i2info(inode);
1424 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1425 struct ccc_grouplock grouplock;
1429 spin_lock(&lli->lli_lock);
1430 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1431 CERROR("group lock already existed with gid %lu\n",
1432 fd->fd_grouplock.cg_gid);
1433 spin_unlock(&lli->lli_lock);
1436 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1437 spin_unlock(&lli->lli_lock);
1439 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1440 arg, (file->f_flags & O_NONBLOCK), &grouplock);
1444 spin_lock(&lli->lli_lock);
1445 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1446 spin_unlock(&lli->lli_lock);
1447 CERROR("another thread just won the race\n");
1448 cl_put_grouplock(&grouplock);
1452 fd->fd_flags |= (LL_FILE_GROUP_LOCKED | LL_FILE_IGNORE_LOCK);
1453 fd->fd_grouplock = grouplock;
1454 spin_unlock(&lli->lli_lock);
1456 CDEBUG(D_INFO, "group lock %lu obtained\n", arg);
1460 int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg)
1462 struct ll_inode_info *lli = ll_i2info(inode);
1463 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1464 struct ccc_grouplock grouplock;
1467 spin_lock(&lli->lli_lock);
1468 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
1469 spin_unlock(&lli->lli_lock);
1470 CERROR("no group lock held\n");
1473 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1475 if (fd->fd_grouplock.cg_gid != arg) {
1476 CERROR("group lock %lu doesn't match current id %lu\n",
1477 arg, fd->fd_grouplock.cg_gid);
1478 spin_unlock(&lli->lli_lock);
1482 grouplock = fd->fd_grouplock;
1483 fd->fd_grouplock.cg_env = NULL;
1484 fd->fd_grouplock.cg_lock = NULL;
1485 fd->fd_grouplock.cg_gid = 0;
1486 fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED | LL_FILE_IGNORE_LOCK);
1487 spin_unlock(&lli->lli_lock);
1489 cl_put_grouplock(&grouplock);
1490 CDEBUG(D_INFO, "group lock %lu released\n", arg);
1494 #if LUSTRE_FIX >= 50
1495 static int join_sanity_check(struct inode *head, struct inode *tail)
1498 if ((ll_i2sbi(head)->ll_flags & LL_SBI_JOIN) == 0) {
1499 CERROR("server do not support join \n");
1502 if (!S_ISREG(tail->i_mode) || !S_ISREG(head->i_mode)) {
1503 CERROR("tail ino %lu and ino head %lu must be regular\n",
1504 head->i_ino, tail->i_ino);
1507 if (head->i_ino == tail->i_ino) {
1508 CERROR("file %lu can not be joined to itself \n", head->i_ino);
1511 if (i_size_read(head) % JOIN_FILE_ALIGN) {
1512 CERROR("hsize %llu must be times of 64K\n", i_size_read(head));
1518 static int join_file(struct inode *head_inode, struct file *head_filp,
1519 struct file *tail_filp)
1521 struct dentry *tail_dentry = tail_filp->f_dentry;
1522 struct lookup_intent oit = {.it_op = IT_OPEN,
1523 .it_flags = head_filp->f_flags,
1524 .it_create_mode = M_JOIN_FILE};
1525 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CW,
1526 ll_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1528 struct lustre_handle lockh;
1529 struct md_op_data *op_data;
1534 tail_dentry = tail_filp->f_dentry;
1536 data = i_size_read(head_inode);
1537 op_data = ll_prep_md_op_data(NULL, head_inode,
1538 tail_dentry->d_parent->d_inode,
1539 tail_dentry->d_name.name,
1540 tail_dentry->d_name.len, 0,
1541 LUSTRE_OPC_ANY, &data);
1542 if (IS_ERR(op_data))
1543 RETURN(PTR_ERR(op_data));
1545 rc = md_enqueue(ll_i2mdexp(head_inode), &einfo, &oit,
1546 op_data, &lockh, NULL, 0, NULL, 0);
1548 ll_finish_md_op_data(op_data);
1552 rc = oit.d.lustre.it_status;
1554 if (rc < 0 || it_open_error(DISP_OPEN_OPEN, &oit)) {
1555 rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, &oit);
1556 ptlrpc_req_finished((struct ptlrpc_request *)
1557 oit.d.lustre.it_data);
1561 if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
1563 ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
1564 oit.d.lustre.it_lock_mode = 0;
1566 ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
1567 it_clear_disposition(&oit, DISP_ENQ_COMPLETE);
1568 ll_release_openhandle(head_filp->f_dentry, &oit);
1570 ll_intent_release(&oit);
1574 static int ll_file_join(struct inode *head, struct file *filp,
1575 char *filename_tail)
1577 struct inode *tail = NULL, *first = NULL, *second = NULL;
1578 struct dentry *tail_dentry;
1579 struct file *tail_filp, *first_filp, *second_filp;
1580 struct ll_lock_tree first_tree, second_tree;
1581 struct ll_lock_tree_node *first_node, *second_node;
1582 struct ll_inode_info *hlli = ll_i2info(head);
1583 int rc = 0, cleanup_phase = 0;
1586 CDEBUG(D_VFSTRACE, "VFS Op:head=%lu/%u(%p) tail %s\n",
1587 head->i_ino, head->i_generation, head, filename_tail);
1589 tail_filp = filp_open(filename_tail, O_WRONLY, 0644);
1590 if (IS_ERR(tail_filp)) {
1591 CERROR("Can not open tail file %s", filename_tail);
1592 rc = PTR_ERR(tail_filp);
1595 tail = igrab(tail_filp->f_dentry->d_inode);
1597 tail_dentry = tail_filp->f_dentry;
1598 LASSERT(tail_dentry);
1601 /*reorder the inode for lock sequence*/
1602 first = head->i_ino > tail->i_ino ? head : tail;
1603 second = head->i_ino > tail->i_ino ? tail : head;
1604 first_filp = head->i_ino > tail->i_ino ? filp : tail_filp;
1605 second_filp = head->i_ino > tail->i_ino ? tail_filp : filp;
1607 CDEBUG(D_INFO, "reorder object from %lu:%lu to %lu:%lu \n",
1608 head->i_ino, tail->i_ino, first->i_ino, second->i_ino);
1609 first_node = ll_node_from_inode(first, 0, OBD_OBJECT_EOF, LCK_EX);
1610 if (IS_ERR(first_node)){
1611 rc = PTR_ERR(first_node);
1614 first_tree.lt_fd = first_filp->private_data;
1615 rc = ll_tree_lock(&first_tree, first_node, NULL, 0, 0);
1620 second_node = ll_node_from_inode(second, 0, OBD_OBJECT_EOF, LCK_EX);
1621 if (IS_ERR(second_node)){
1622 rc = PTR_ERR(second_node);
1625 second_tree.lt_fd = second_filp->private_data;
1626 rc = ll_tree_lock(&second_tree, second_node, NULL, 0, 0);
1631 rc = join_sanity_check(head, tail);
1635 rc = join_file(head, filp, tail_filp);
1639 switch (cleanup_phase) {
1641 ll_tree_unlock(&second_tree);
1642 obd_cancel_unused(ll_i2dtexp(second),
1643 ll_i2info(second)->lli_smd, 0, NULL);
1645 ll_tree_unlock(&first_tree);
1646 obd_cancel_unused(ll_i2dtexp(first),
1647 ll_i2info(first)->lli_smd, 0, NULL);
1649 filp_close(tail_filp, 0);
1652 if (head && rc == 0) {
1653 obd_free_memmd(ll_i2sbi(head)->ll_dt_exp,
1655 hlli->lli_smd = NULL;
1660 CERROR("invalid cleanup_phase %d\n", cleanup_phase);
1665 #endif /* LUSTRE_FIX >= 50 */
1668 * Close inode open handle
1670 * \param dentry [in] dentry which contains the inode
1671 * \param it [in,out] intent which contains open info and result
1674 * \retval <0 failure
1676 int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
1678 struct inode *inode = dentry->d_inode;
1679 struct obd_client_handle *och;
1685 /* Root ? Do nothing. */
1686 if (dentry->d_inode->i_sb->s_root == dentry)
1689 /* No open handle to close? Move away */
1690 if (!it_disposition(it, DISP_OPEN_OPEN))
1693 LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0);
1695 OBD_ALLOC(och, sizeof(*och));
1697 GOTO(out, rc = -ENOMEM);
1699 ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
1700 ll_i2info(inode), it, och);
1702 rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
1705 /* this one is in place of ll_file_open */
1706 if (it_disposition(it, DISP_ENQ_OPEN_REF))
1707 ptlrpc_req_finished(it->d.lustre.it_data);
1708 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
1713 * Get size for inode for which FIEMAP mapping is requested.
1714 * Make the FIEMAP get_info call and returns the result.
1716 int ll_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
1719 struct obd_export *exp = ll_i2dtexp(inode);
1720 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1721 struct ll_fiemap_info_key fm_key = { .name = KEY_FIEMAP, };
1722 int vallen = num_bytes;
1726 /* If the stripe_count > 1 and the application does not understand
1727 * DEVICE_ORDER flag, then it cannot interpret the extents correctly.
1729 if (lsm->lsm_stripe_count > 1 &&
1730 !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER))
1733 fm_key.oa.o_id = lsm->lsm_object_id;
1734 fm_key.oa.o_gr = lsm->lsm_object_gr;
1735 fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
1737 obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
1740 /* If filesize is 0, then there would be no objects for mapping */
1741 if (fm_key.oa.o_size == 0) {
1742 fiemap->fm_mapped_extents = 0;
1746 memcpy(&fm_key.fiemap, fiemap, sizeof(*fiemap));
1748 rc = obd_get_info(exp, sizeof(fm_key), &fm_key, &vallen, fiemap, lsm);
1750 CERROR("obd_get_info failed: rc = %d\n", rc);
1755 int ll_fid2path(struct obd_export *exp, void *arg)
1757 struct getinfo_fid2path *gfout, *gfin;
1761 /* Need to get the buflen */
1762 OBD_ALLOC_PTR(gfin);
1765 if (copy_from_user(gfin, arg, sizeof(*gfin))) {
1770 outsize = sizeof(*gfout) + gfin->gf_pathlen;
1771 OBD_ALLOC(gfout, outsize);
1772 if (gfout == NULL) {
1776 memcpy(gfout, gfin, sizeof(*gfout));
1779 /* Call mdc_iocontrol */
1780 rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL);
1783 if (copy_to_user(arg, gfout, outsize))
1787 OBD_FREE(gfout, outsize);
1791 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
1794 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1798 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%x\n", inode->i_ino,
1799 inode->i_generation, inode, cmd);
1800 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
1802 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1803 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1807 case LL_IOC_GETFLAGS:
1808 /* Get the current value of the file flags */
1809 return put_user(fd->fd_flags, (int *)arg);
1810 case LL_IOC_SETFLAGS:
1811 case LL_IOC_CLRFLAGS:
1812 /* Set or clear specific file flags */
1813 /* XXX This probably needs checks to ensure the flags are
1814 * not abused, and to handle any flag side effects.
1816 if (get_user(flags, (int *) arg))
1819 if (cmd == LL_IOC_SETFLAGS) {
1820 if ((flags & LL_FILE_IGNORE_LOCK) &&
1821 !(file->f_flags & O_DIRECT)) {
1822 CERROR("%s: unable to disable locking on "
1823 "non-O_DIRECT file\n", current->comm);
1827 fd->fd_flags |= flags;
1829 fd->fd_flags &= ~flags;
1832 case LL_IOC_LOV_SETSTRIPE:
1833 RETURN(ll_lov_setstripe(inode, file, arg));
1834 case LL_IOC_LOV_SETEA:
1835 RETURN(ll_lov_setea(inode, file, arg));
1836 case LL_IOC_LOV_GETSTRIPE:
1837 RETURN(ll_lov_getstripe(inode, arg));
1838 case LL_IOC_RECREATE_OBJ:
1839 RETURN(ll_lov_recreate_obj(inode, file, arg));
1840 case EXT3_IOC_FIEMAP: {
1841 struct ll_user_fiemap *fiemap_s;
1842 size_t num_bytes, ret_bytes;
1843 unsigned int extent_count;
1846 /* Get the extent count so we can calculate the size of
1847 * required fiemap buffer */
1848 if (get_user(extent_count,
1849 &((struct ll_user_fiemap __user *)arg)->fm_extent_count))
1851 num_bytes = sizeof(*fiemap_s) + (extent_count *
1852 sizeof(struct ll_fiemap_extent));
1853 OBD_VMALLOC(fiemap_s, num_bytes);
1854 if (fiemap_s == NULL)
1857 if (copy_from_user(fiemap_s,(struct ll_user_fiemap __user *)arg,
1859 GOTO(error, rc = -EFAULT);
1861 if (fiemap_s->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) {
1862 fiemap_s->fm_flags = fiemap_s->fm_flags &
1863 ~LUSTRE_FIEMAP_FLAGS_COMPAT;
1864 if (copy_to_user((char *)arg, fiemap_s,
1866 GOTO(error, rc = -EFAULT);
1868 GOTO(error, rc = -EBADR);
1871 /* If fm_extent_count is non-zero, read the first extent since
1872 * it is used to calculate end_offset and device from previous
1875 if (copy_from_user(&fiemap_s->fm_extents[0],
1876 (char __user *)arg + sizeof(*fiemap_s),
1877 sizeof(struct ll_fiemap_extent)))
1878 GOTO(error, rc = -EFAULT);
1881 if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) {
1884 rc = filemap_fdatawrite(inode->i_mapping);
1889 rc = ll_fiemap(inode, fiemap_s, num_bytes);
1893 ret_bytes = sizeof(struct ll_user_fiemap);
1895 if (extent_count != 0)
1896 ret_bytes += (fiemap_s->fm_mapped_extents *
1897 sizeof(struct ll_fiemap_extent));
1899 if (copy_to_user((void *)arg, fiemap_s, ret_bytes))
1903 OBD_VFREE(fiemap_s, num_bytes);
1906 case EXT3_IOC_GETFLAGS:
1907 case EXT3_IOC_SETFLAGS:
1908 RETURN(ll_iocontrol(inode, file, cmd, arg));
1909 case EXT3_IOC_GETVERSION_OLD:
1910 case EXT3_IOC_GETVERSION:
1911 RETURN(put_user(inode->i_generation, (int *)arg));
1913 #if LUSTRE_FIX >= 50
1914 /* Allow file join in beta builds to allow debuggging */
1918 ftail = getname((const char *)arg);
1920 RETURN(PTR_ERR(ftail));
1921 rc = ll_file_join(inode, file, ftail);
1925 CWARN("file join is not supported in this version of Lustre\n");
1929 case LL_IOC_GROUP_LOCK:
1930 RETURN(ll_get_grouplock(inode, file, arg));
1931 case LL_IOC_GROUP_UNLOCK:
1932 RETURN(ll_put_grouplock(inode, file, arg));
1933 case IOC_OBD_STATFS:
1934 RETURN(ll_obd_statfs(inode, (void *)arg));
1936 /* We need to special case any other ioctls we want to handle,
1937 * to send them to the MDS/OST as appropriate and to properly
1938 * network encode the arg field.
1939 case EXT3_IOC_SETVERSION_OLD:
1940 case EXT3_IOC_SETVERSION:
1942 case LL_IOC_FLUSHCTX:
1943 RETURN(ll_flush_ctx(inode));
1944 case LL_IOC_PATH2FID: {
1945 if (copy_to_user((void *)arg, &ll_i2info(inode)->lli_fid,
1946 sizeof(struct lu_fid)))
1951 case OBD_IOC_FID2PATH:
1952 RETURN(ll_fid2path(ll_i2mdexp(inode), (void *)arg));
1958 ll_iocontrol_call(inode, file, cmd, arg, &err))
1961 RETURN(obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL,
1967 loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
1969 struct inode *inode = file->f_dentry->d_inode;
1972 retval = offset + ((origin == 2) ? i_size_read(inode) :
1973 (origin == 1) ? file->f_pos : 0);
1974 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), to=%Lu=%#Lx(%s)\n",
1975 inode->i_ino, inode->i_generation, inode, retval, retval,
1976 origin == 2 ? "SEEK_END": origin == 1 ? "SEEK_CUR" : "SEEK_SET");
1977 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1);
1979 if (origin == 2) { /* SEEK_END */
1980 int nonblock = 0, rc;
1982 if (file->f_flags & O_NONBLOCK)
1983 nonblock = LDLM_FL_BLOCK_NOWAIT;
1985 rc = cl_glimpse_size(inode);
1989 ll_inode_size_lock(inode, 0);
1990 offset += i_size_read(inode);
1991 ll_inode_size_unlock(inode, 0);
1992 } else if (origin == 1) { /* SEEK_CUR */
1993 offset += file->f_pos;
1997 if (offset >= 0 && offset <= ll_file_maxbytes(inode)) {
1998 if (offset != file->f_pos) {
1999 file->f_pos = offset;
2007 int ll_fsync(struct file *file, struct dentry *dentry, int data)
2009 struct inode *inode = dentry->d_inode;
2010 struct ll_inode_info *lli = ll_i2info(inode);
2011 struct lov_stripe_md *lsm = lli->lli_smd;
2012 struct ptlrpc_request *req;
2013 struct obd_capa *oc;
2016 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
2017 inode->i_generation, inode);
2018 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
2020 /* fsync's caller has already called _fdata{sync,write}, we want
2021 * that IO to finish before calling the osc and mdc sync methods */
2022 rc = filemap_fdatawait(inode->i_mapping);
2024 /* catch async errors that were recorded back when async writeback
2025 * failed for pages in this mapping. */
2026 err = lli->lli_async_rc;
2027 lli->lli_async_rc = 0;
2031 err = lov_test_and_clear_async_rc(lsm);
2036 oc = ll_mdscapa_get(inode);
2037 err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), oc,
2043 ptlrpc_req_finished(req);
2050 RETURN(rc ? rc : -ENOMEM);
2052 oa->o_id = lsm->lsm_object_id;
2053 oa->o_gr = lsm->lsm_object_gr;
2054 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
2055 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
2056 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
2059 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
2060 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
2061 0, OBD_OBJECT_EOF, oc);
2071 int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
2073 struct inode *inode = file->f_dentry->d_inode;
2074 struct ll_sb_info *sbi = ll_i2sbi(inode);
2075 struct ldlm_enqueue_info einfo = { .ei_type = LDLM_FLOCK,
2076 .ei_cb_cp =ldlm_flock_completion_ast,
2077 .ei_cbdata = file_lock };
2078 struct md_op_data *op_data;
2079 struct lustre_handle lockh = {0};
2080 ldlm_policy_data_t flock;
2085 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu file_lock=%p\n",
2086 inode->i_ino, file_lock);
2088 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1);
2090 if (file_lock->fl_flags & FL_FLOCK) {
2091 LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK));
2092 /* set missing params for flock() calls */
2093 file_lock->fl_end = OFFSET_MAX;
2094 file_lock->fl_pid = current->tgid;
2096 flock.l_flock.pid = file_lock->fl_pid;
2097 flock.l_flock.start = file_lock->fl_start;
2098 flock.l_flock.end = file_lock->fl_end;
2100 switch (file_lock->fl_type) {
2102 einfo.ei_mode = LCK_PR;
2105 /* An unlock request may or may not have any relation to
2106 * existing locks so we may not be able to pass a lock handle
2107 * via a normal ldlm_lock_cancel() request. The request may even
2108 * unlock a byte range in the middle of an existing lock. In
2109 * order to process an unlock request we need all of the same
2110 * information that is given with a normal read or write record
2111 * lock request. To avoid creating another ldlm unlock (cancel)
2112 * message we'll treat a LCK_NL flock request as an unlock. */
2113 einfo.ei_mode = LCK_NL;
2116 einfo.ei_mode = LCK_PW;
2119 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
2134 flags = LDLM_FL_BLOCK_NOWAIT;
2140 flags = LDLM_FL_TEST_LOCK;
2141 /* Save the old mode so that if the mode in the lock changes we
2142 * can decrement the appropriate reader or writer refcount. */
2143 file_lock->fl_type = einfo.ei_mode;
2146 CERROR("unknown fcntl lock command: %d\n", cmd);
2150 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2151 LUSTRE_OPC_ANY, NULL);
2152 if (IS_ERR(op_data))
2153 RETURN(PTR_ERR(op_data));
2155 CDEBUG(D_DLMTRACE, "inode=%lu, pid=%u, flags=%#x, mode=%u, "
2156 "start="LPU64", end="LPU64"\n", inode->i_ino, flock.l_flock.pid,
2157 flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
2159 rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
2160 op_data, &lockh, &flock, 0, NULL /* req */, flags);
2162 ll_finish_md_op_data(op_data);
2164 if ((file_lock->fl_flags & FL_FLOCK) &&
2165 (rc == 0 || file_lock->fl_type == F_UNLCK))
2166 ll_flock_lock_file_wait(file, file_lock, (cmd == F_SETLKW));
2167 #ifdef HAVE_F_OP_FLOCK
2168 if ((file_lock->fl_flags & FL_POSIX) &&
2169 (rc == 0 || file_lock->fl_type == F_UNLCK) &&
2170 !(flags & LDLM_FL_TEST_LOCK))
2171 posix_lock_file_wait(file, file_lock);
2177 int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
2184 int ll_have_md_lock(struct inode *inode, __u64 bits)
2186 struct lustre_handle lockh;
2187 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2195 fid = &ll_i2info(inode)->lli_fid;
2196 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2198 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
2199 if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2200 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
2206 ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
2207 struct lustre_handle *lockh)
2209 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
2215 fid = &ll_i2info(inode)->lli_fid;
2216 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
2218 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
2219 rc = md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
2220 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
2224 static int ll_inode_revalidate_fini(struct inode *inode, int rc) {
2225 if (rc == -ENOENT) { /* Already unlinked. Just update nlink
2226 * and return success */
2228 /* This path cannot be hit for regular files unless in
2229 * case of obscure races, so no need to to validate
2231 if (!S_ISREG(inode->i_mode) &&
2232 !S_ISDIR(inode->i_mode))
2237 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
2245 int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
2248 struct inode *inode = dentry->d_inode;
2249 struct ptlrpc_request *req = NULL;
2250 struct ll_sb_info *sbi;
2251 struct obd_export *exp;
2256 CERROR("REPORT THIS LINE TO PETER\n");
2259 sbi = ll_i2sbi(inode);
2261 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
2262 inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
2264 exp = ll_i2mdexp(inode);
2266 if (exp->exp_connect_flags & OBD_CONNECT_ATTRFID) {
2267 struct lookup_intent oit = { .it_op = IT_GETATTR };
2268 struct md_op_data *op_data;
2270 /* Call getattr by fid, so do not provide name at all. */
2271 op_data = ll_prep_md_op_data(NULL, dentry->d_parent->d_inode,
2272 dentry->d_inode, NULL, 0, 0,
2273 LUSTRE_OPC_ANY, NULL);
2274 if (IS_ERR(op_data))
2275 RETURN(PTR_ERR(op_data));
2277 oit.it_create_mode |= M_CHECK_STALE;
2278 rc = md_intent_lock(exp, op_data, NULL, 0,
2279 /* we are not interested in name
2282 ll_md_blocking_ast, 0);
2283 ll_finish_md_op_data(op_data);
2284 oit.it_create_mode &= ~M_CHECK_STALE;
2286 rc = ll_inode_revalidate_fini(inode, rc);
2290 rc = ll_revalidate_it_finish(req, &oit, dentry);
2292 ll_intent_release(&oit);
2296 /* Unlinked? Unhash dentry, so it is not picked up later by
2297 do_lookup() -> ll_revalidate_it(). We cannot use d_drop
2298 here to preserve get_cwd functionality on 2.6.
2300 if (!dentry->d_inode->i_nlink) {
2301 spin_lock(&ll_lookup_lock);
2302 spin_lock(&dcache_lock);
2303 ll_drop_dentry(dentry);
2304 spin_unlock(&dcache_lock);
2305 spin_unlock(&ll_lookup_lock);
2308 ll_lookup_finish_locks(&oit, dentry);
2309 } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
2311 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
2312 obd_valid valid = OBD_MD_FLGETATTR;
2313 struct obd_capa *oc;
2316 if (S_ISREG(inode->i_mode)) {
2317 rc = ll_get_max_mdsize(sbi, &ealen);
2320 valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE;
2322 /* Once OBD_CONNECT_ATTRFID is not supported, we can't find one
2323 * capa for this inode. Because we only keep capas of dirs
2325 oc = ll_mdscapa_get(inode);
2326 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid,
2330 rc = ll_inode_revalidate_fini(inode, rc);
2334 rc = ll_prep_inode(&inode, req, NULL);
2337 ptlrpc_req_finished(req);
2341 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
2346 rc = __ll_inode_revalidate_it(dentry, it, MDS_INODELOCK_UPDATE |
2347 MDS_INODELOCK_LOOKUP);
2349 /* if object not yet allocated, don't validate size */
2350 if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
2353 /* cl_glimpse_size will prefer locally cached writes if they extend
2357 rc = cl_glimpse_size(dentry->d_inode);
2362 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
2363 struct lookup_intent *it, struct kstat *stat)
2365 struct inode *inode = de->d_inode;
2368 res = ll_inode_revalidate_it(de, it);
2369 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
2374 stat->dev = inode->i_sb->s_dev;
2375 stat->ino = inode->i_ino;
2376 stat->mode = inode->i_mode;
2377 stat->nlink = inode->i_nlink;
2378 stat->uid = inode->i_uid;
2379 stat->gid = inode->i_gid;
2380 stat->rdev = kdev_t_to_nr(inode->i_rdev);
2381 stat->atime = inode->i_atime;
2382 stat->mtime = inode->i_mtime;
2383 stat->ctime = inode->i_ctime;
2384 #ifdef HAVE_INODE_BLKSIZE
2385 stat->blksize = inode->i_blksize;
2387 stat->blksize = 1 << inode->i_blkbits;
2390 ll_inode_size_lock(inode, 0);
2391 stat->size = i_size_read(inode);
2392 stat->blocks = inode->i_blocks;
2393 ll_inode_size_unlock(inode, 0);
2397 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
2399 struct lookup_intent it = { .it_op = IT_GETATTR };
2401 return ll_getattr_it(mnt, de, &it, stat);
2405 int lustre_check_acl(struct inode *inode, int mask)
2407 #ifdef CONFIG_FS_POSIX_ACL
2408 struct ll_inode_info *lli = ll_i2info(inode);
2409 struct posix_acl *acl;
2413 spin_lock(&lli->lli_lock);
2414 acl = posix_acl_dup(lli->lli_posix_acl);
2415 spin_unlock(&lli->lli_lock);
2420 rc = posix_acl_permission(inode, acl, mask);
2421 posix_acl_release(acl);
2429 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
2430 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2435 /* as root inode are NOT getting validated in lookup operation,
2436 * need to do it before permission check. */
2438 if (inode == inode->i_sb->s_root->d_inode) {
2439 struct lookup_intent it = { .it_op = IT_GETATTR };
2441 rc = __ll_inode_revalidate_it(inode->i_sb->s_root, &it,
2442 MDS_INODELOCK_LOOKUP);
2447 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), inode mode %x mask %o\n",
2448 inode->i_ino, inode->i_generation, inode, inode->i_mode, mask);
2450 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2451 return lustre_check_remote_perm(inode, mask);
2453 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2454 rc = generic_permission(inode, mask, lustre_check_acl);
2459 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
2461 int mode = inode->i_mode;
2464 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), mask %o\n",
2465 inode->i_ino, inode->i_generation, inode, mask);
2467 if (ll_i2sbi(inode)->ll_flags & LL_SBI_RMT_CLIENT)
2468 return lustre_check_remote_perm(inode, mask);
2470 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
2472 if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
2473 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
2475 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
2477 if (current->fsuid == inode->i_uid) {
2480 if (((mode >> 3) & mask & S_IRWXO) != mask)
2482 rc = lustre_check_acl(inode, mask);
2486 goto check_capabilities;
2490 if (in_group_p(inode->i_gid))
2493 if ((mode & mask & S_IRWXO) == mask)
2497 if (!(mask & MAY_EXEC) ||
2498 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
2499 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
2502 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH) && ((mask == MAY_READ) ||
2503 (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
2510 #ifdef HAVE_FILE_READV
2511 #define READ_METHOD readv
2512 #define READ_FUNCTION ll_file_readv
2513 #define WRITE_METHOD writev
2514 #define WRITE_FUNCTION ll_file_writev
2516 #define READ_METHOD aio_read
2517 #define READ_FUNCTION ll_file_aio_read
2518 #define WRITE_METHOD aio_write
2519 #define WRITE_FUNCTION ll_file_aio_write
2522 /* -o localflock - only provides locally consistent flock locks */
2523 struct file_operations ll_file_operations = {
2524 .read = ll_file_read,
2525 .READ_METHOD = READ_FUNCTION,
2526 .write = ll_file_write,
2527 .WRITE_METHOD = WRITE_FUNCTION,
2528 .ioctl = ll_file_ioctl,
2529 .open = ll_file_open,
2530 .release = ll_file_release,
2531 .mmap = ll_file_mmap,
2532 .llseek = ll_file_seek,
2533 .sendfile = ll_file_sendfile,
2537 struct file_operations ll_file_operations_flock = {
2538 .read = ll_file_read,
2539 .READ_METHOD = READ_FUNCTION,
2540 .write = ll_file_write,
2541 .WRITE_METHOD = WRITE_FUNCTION,
2542 .ioctl = ll_file_ioctl,
2543 .open = ll_file_open,
2544 .release = ll_file_release,
2545 .mmap = ll_file_mmap,
2546 .llseek = ll_file_seek,
2547 .sendfile = ll_file_sendfile,
2549 #ifdef HAVE_F_OP_FLOCK
2550 .flock = ll_file_flock,
2552 .lock = ll_file_flock
2555 /* These are for -o noflock - to return ENOSYS on flock calls */
2556 struct file_operations ll_file_operations_noflock = {
2557 .read = ll_file_read,
2558 .READ_METHOD = READ_FUNCTION,
2559 .write = ll_file_write,
2560 .WRITE_METHOD = WRITE_FUNCTION,
2561 .ioctl = ll_file_ioctl,
2562 .open = ll_file_open,
2563 .release = ll_file_release,
2564 .mmap = ll_file_mmap,
2565 .llseek = ll_file_seek,
2566 .sendfile = ll_file_sendfile,
2568 #ifdef HAVE_F_OP_FLOCK
2569 .flock = ll_file_noflock,
2571 .lock = ll_file_noflock
2574 struct inode_operations ll_file_inode_operations = {
2575 #ifdef HAVE_VFS_INTENT_PATCHES
2576 .setattr_raw = ll_setattr_raw,
2578 .setattr = ll_setattr,
2579 .truncate = ll_truncate,
2580 .getattr = ll_getattr,
2581 .permission = ll_inode_permission,
2582 .setxattr = ll_setxattr,
2583 .getxattr = ll_getxattr,
2584 .listxattr = ll_listxattr,
2585 .removexattr = ll_removexattr,
2588 /* dynamic ioctl number support routins */
2589 static struct llioc_ctl_data {
2590 struct rw_semaphore ioc_sem;
2591 struct list_head ioc_head;
2593 __RWSEM_INITIALIZER(llioc.ioc_sem),
2594 CFS_LIST_HEAD_INIT(llioc.ioc_head)
2599 struct list_head iocd_list;
2600 unsigned int iocd_size;
2601 llioc_callback_t iocd_cb;
2602 unsigned int iocd_count;
2603 unsigned int iocd_cmd[0];
2606 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
2609 struct llioc_data *in_data = NULL;
2612 if (cb == NULL || cmd == NULL ||
2613 count > LLIOC_MAX_CMD || count < 0)
2616 size = sizeof(*in_data) + count * sizeof(unsigned int);
2617 OBD_ALLOC(in_data, size);
2618 if (in_data == NULL)
2621 memset(in_data, 0, sizeof(*in_data));
2622 in_data->iocd_size = size;
2623 in_data->iocd_cb = cb;
2624 in_data->iocd_count = count;
2625 memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count);
2627 down_write(&llioc.ioc_sem);
2628 list_add_tail(&in_data->iocd_list, &llioc.ioc_head);
2629 up_write(&llioc.ioc_sem);
2634 void ll_iocontrol_unregister(void *magic)
2636 struct llioc_data *tmp;
2641 down_write(&llioc.ioc_sem);
2642 list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) {
2644 unsigned int size = tmp->iocd_size;
2646 list_del(&tmp->iocd_list);
2647 up_write(&llioc.ioc_sem);
2649 OBD_FREE(tmp, size);
2653 up_write(&llioc.ioc_sem);
2655 CWARN("didn't find iocontrol register block with magic: %p\n", magic);
2658 EXPORT_SYMBOL(ll_iocontrol_register);
2659 EXPORT_SYMBOL(ll_iocontrol_unregister);
2661 enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
2662 unsigned int cmd, unsigned long arg, int *rcp)
2664 enum llioc_iter ret = LLIOC_CONT;
2665 struct llioc_data *data;
2666 int rc = -EINVAL, i;
2668 down_read(&llioc.ioc_sem);
2669 list_for_each_entry(data, &llioc.ioc_head, iocd_list) {
2670 for (i = 0; i < data->iocd_count; i++) {
2671 if (cmd != data->iocd_cmd[i])
2674 ret = data->iocd_cb(inode, file, cmd, arg, data, &rc);
2678 if (ret == LLIOC_STOP)
2681 up_read(&llioc.ioc_sem);