1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Lustre Light Super operations
6 * Copyright (c) 2002-2005 Cluster File Systems, Inc.
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LLITE
26 #include <linux/module.h>
27 #include <linux/types.h>
28 #include <linux/random.h>
29 #include <linux/version.h>
31 #include <lustre_lite.h>
32 #include <lustre_ha.h>
33 #include <lustre_dlm.h>
34 #include <lprocfs_status.h>
35 #include <lustre_disk.h>
36 #include "llite_internal.h"
38 kmem_cache_t *ll_file_data_slab;
40 LIST_HEAD(ll_super_blocks);
41 spinlock_t ll_sb_lock = SPIN_LOCK_UNLOCKED;
43 extern struct address_space_operations ll_aops;
44 extern struct address_space_operations ll_dir_aops;
47 #define log2(n) ffz(~(n))
51 struct ll_sb_info *ll_init_sbi(void)
53 struct ll_sb_info *sbi = NULL;
57 OBD_ALLOC(sbi, sizeof(*sbi));
61 spin_lock_init(&sbi->ll_lock);
62 spin_lock_init(&sbi->ll_lco.lco_lock);
63 INIT_LIST_HEAD(&sbi->ll_pglist);
64 sbi->ll_pglist_gen = 0;
65 if (num_physpages >> (20 - PAGE_SHIFT) < 512)
66 sbi->ll_async_page_max = num_physpages / 2;
68 sbi->ll_async_page_max = (num_physpages / 4) * 3;
69 sbi->ll_ra_info.ra_max_pages = min(num_physpages / 8,
70 SBI_DEFAULT_READAHEAD_MAX);
71 sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
72 SBI_DEFAULT_READAHEAD_WHOLE_MAX;
74 INIT_LIST_HEAD(&sbi->ll_conn_chain);
75 INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list);
77 class_generate_random_uuid(uuid);
78 class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
79 CDEBUG(D_HA, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid);
81 spin_lock(&ll_sb_lock);
82 list_add_tail(&sbi->ll_list, &ll_super_blocks);
83 spin_unlock(&ll_sb_lock);
85 INIT_LIST_HEAD(&sbi->ll_deathrow);
86 spin_lock_init(&sbi->ll_deathrow_lock);
90 void ll_free_sbi(struct super_block *sb)
92 struct ll_sb_info *sbi = ll_s2sbi(sb);
96 spin_lock(&ll_sb_lock);
97 list_del(&sbi->ll_list);
98 spin_unlock(&ll_sb_lock);
99 OBD_FREE(sbi, sizeof(*sbi));
104 static struct dentry_operations ll_d_root_ops = {
105 .d_compare = ll_dcompare,
108 /* Initialize the default and maximum LOV EA and cookie sizes. This allows
109 * us to make MDS RPCs with large enough reply buffers to hold the
110 * maximum-sized (= maximum striped) EA and cookie without having to
111 * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */
112 static int ll_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
114 struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC };
115 __u32 valsize = sizeof(struct lov_desc);
116 int rc, easize, def_easize, cookiesize;
117 struct lov_desc desc;
121 rc = obd_get_info(dt_exp, strlen(KEY_LOVDESC) + 1, KEY_LOVDESC,
126 stripes = min(desc.ld_tgt_count, (__u32)LOV_MAX_STRIPE_COUNT);
127 lsm.lsm_stripe_count = stripes;
128 easize = obd_size_diskmd(dt_exp, &lsm);
130 lsm.lsm_stripe_count = desc.ld_default_stripe_count;
131 def_easize = obd_size_diskmd(dt_exp, &lsm);
133 cookiesize = stripes * sizeof(struct llog_cookie);
135 CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n",
138 rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize);
142 int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
144 struct inode *root = 0;
145 struct ll_sb_info *sbi = ll_s2sbi(sb);
146 struct obd_device *obd;
147 struct lu_fid rootfid;
148 struct obd_statfs osfs;
149 struct ptlrpc_request *request = NULL;
150 struct lustre_handle osc_conn = {0, };
151 struct lustre_handle md_conn = {0, };
152 struct obd_connect_data *data = NULL;
157 obd = class_name2obd(mdc);
159 CERROR("MDC %s: not setup or attached\n", mdc);
167 if (proc_lustre_fs_root) {
168 err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
171 CERROR("could not register mount in /proc/lustre");
174 /* indicate that inodebits locking is supported by this client */
175 data->ocd_connect_flags |= OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH;
176 data->ocd_ibits_known = MDS_INODELOCK_FULL;
178 if (sb->s_flags & MS_RDONLY)
179 data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
180 if (sbi->ll_flags & LL_SBI_USER_XATTR)
181 data->ocd_connect_flags |= OBD_CONNECT_XATTR;
182 data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_JOIN;
184 if (sbi->ll_flags & LL_SBI_FLOCK) {
185 sbi->ll_fop = &ll_file_operations_flock;
187 sbi->ll_fop = &ll_file_operations;
190 data->ocd_connect_flags |= OBD_CONNECT_VERSION;
191 data->ocd_version = LUSTRE_VERSION_CODE;
194 data->ocd_connect_flags |= OBD_CONNECT_REAL;
196 err = obd_connect(&md_conn, obd, &sbi->ll_sb_uuid, data);
198 CERROR("An MDT (mdc %s) is performing recovery, of which this"
199 " client is not a part. Please wait for recovery to "
200 "complete, abort, or time out.\n", mdc);
203 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
206 sbi->ll_md_exp = class_conn2export(&md_conn);
208 err = obd_statfs(obd, &osfs, jiffies - HZ);
212 LASSERT(osfs.os_bsize);
213 sb->s_blocksize = osfs.os_bsize;
214 sb->s_blocksize_bits = log2(osfs.os_bsize);
215 sb->s_magic = LL_SUPER_MAGIC;
216 sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
217 sbi->ll_namelen = osfs.os_namelen;
219 if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
220 !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
221 LCONSOLE_INFO("Disabling user_xattr feature because "
222 "it is not supported on the server\n");
223 sbi->ll_flags &= ~LL_SBI_USER_XATTR;
226 if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
228 sb->s_flags |= MS_POSIXACL;
230 sbi->ll_flags |= LL_SBI_ACL;
232 sbi->ll_flags &= ~LL_SBI_ACL;
234 if (data->ocd_connect_flags & OBD_CONNECT_JOIN)
235 sbi->ll_flags |= LL_SBI_JOIN;
237 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
238 /* We set sb->s_dev equal on all lustre clients in order to support
239 * NFS export clustering. NFSD requires that the FSID be the same
241 /* s_dev is also used in lt_compare() to compare two fs, but that is
242 * only a node-local comparison. */
243 sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid,
244 strlen(sbi2mdc(sbi)->cl_target_uuid.uuid));
247 obd = class_name2obd(osc);
249 CERROR("OSC %s: not setup or attached\n", osc);
250 GOTO(out_mdc, err = -ENODEV);
253 data->ocd_connect_flags =
254 OBD_CONNECT_GRANT | OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL;
256 CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
257 "ocd_grant: %d\n", data->ocd_connect_flags,
258 data->ocd_version, data->ocd_grant);
260 obd->obd_upcall.onu_owner = &sbi->ll_lco;
261 obd->obd_upcall.onu_upcall = ll_ocd_update;
263 err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, data);
265 CERROR("An OST (osc %s) is performing recovery, of which this"
266 " client is not a part. Please wait for recovery to "
267 "complete, abort, or time out.\n", osc);
270 CERROR("cannot connect to %s: rc = %d\n", osc, err);
274 sbi->ll_dt_exp = class_conn2export(&osc_conn);
276 spin_lock(&sbi->ll_lco.lco_lock);
277 sbi->ll_lco.lco_flags = data->ocd_connect_flags;
278 spin_unlock(&sbi->ll_lco.lco_lock);
280 ll_init_ea_size(sbi->ll_md_exp, sbi->ll_dt_exp);
282 err = obd_prep_async_page(sbi->ll_dt_exp, NULL, NULL, NULL,
283 0, NULL, NULL, NULL);
285 LCONSOLE_ERROR("There are no OST's in this filesystem. "
286 "There must be at least one active OST for "
287 "a client to start.\n");
291 if (!ll_async_page_slab) {
292 ll_async_page_slab_size =
293 size_round(sizeof(struct ll_async_page)) + err;
294 ll_async_page_slab = kmem_cache_create("ll_async_page",
295 ll_async_page_slab_size,
297 if (!ll_async_page_slab)
298 GOTO(out_osc, -ENOMEM);
301 err = md_getstatus(sbi->ll_md_exp, &rootfid);
303 CERROR("cannot mds_connect: rc = %d\n", err);
306 CDEBUG(D_SUPER, "rootfid "DFID3"\n", PFID3(&rootfid));
307 sbi->ll_root_fid = rootfid;
309 sb->s_op = &lustre_super_operations;
310 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
311 sb->s_export_op = &lustre_export_operations;
315 * XXX: move this to after cbd setup? */
316 err = md_getattr(sbi->ll_md_exp, &rootfid,
317 OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS |
318 (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0),
321 CERROR("md_getattr failed for root: rc = %d\n", err);
325 err = md_get_lustre_md(sbi->ll_md_exp, request, 0, sbi->ll_dt_exp, &md);
327 CERROR("failed to understand root inode md: rc = %d\n", err);
328 ptlrpc_req_finished (request);
332 LASSERT(fid_oid(&sbi->ll_root_fid) != 0);
333 root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &md);
334 ptlrpc_req_finished(request);
336 if (root == NULL || is_bad_inode(root)) {
337 md_free_lustre_md(sbi->ll_dt_exp, &md);
338 CERROR("lustre_lite: bad iget4 for root\n");
339 GOTO(out_root, err = -EBADF);
342 err = ll_close_thread_start(&sbi->ll_lcq);
344 CERROR("cannot start close thread: rc %d\n", err);
348 /* making vm readahead 0 for 2.4.x. In the case of 2.6.x,
349 backing dev info assigned to inode mapping is used for
350 determining maximal readahead. */
351 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
352 !defined(KERNEL_HAS_AS_MAX_READAHEAD)
353 /* bug 2805 - set VM readahead to zero */
354 vm_max_readahead = vm_min_readahead = 0;
357 sb->s_root = d_alloc_root(root);
359 OBD_FREE(data, sizeof(*data));
360 sb->s_root->d_op = &ll_d_root_ops;
367 obd_disconnect(sbi->ll_dt_exp);
369 obd_disconnect(sbi->ll_md_exp);
373 lprocfs_unregister_mountpoint(sbi);
377 int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
381 *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL);
383 rc = obd_get_info(sbi->ll_md_exp, strlen("max_easize"), "max_easize",
386 CERROR("Get max mdsize error rc %d \n", rc);
391 void ll_dump_inode(struct inode *inode)
393 struct list_head *tmp;
394 int dentry_count = 0;
396 LASSERT(inode != NULL);
398 list_for_each(tmp, &inode->i_dentry)
401 CERROR("inode %p dump: dev=%s ino=%lu mode=%o count=%u, %d dentries\n",
402 inode, ll_i2mdexp(inode)->exp_obd->obd_name, inode->i_ino,
403 inode->i_mode, atomic_read(&inode->i_count), dentry_count);
406 void lustre_dump_dentry(struct dentry *dentry, int recur)
408 struct list_head *tmp;
411 LASSERT(dentry != NULL);
413 list_for_each(tmp, &dentry->d_subdirs)
416 CERROR("dentry %p dump: name=%.*s parent=%.*s (%p), inode=%p, count=%u,"
417 " flags=0x%x, fsdata=%p, %d subdirs\n", dentry,
418 dentry->d_name.len, dentry->d_name.name,
419 dentry->d_parent->d_name.len, dentry->d_parent->d_name.name,
420 dentry->d_parent, dentry->d_inode, atomic_read(&dentry->d_count),
421 dentry->d_flags, dentry->d_fsdata, subdirs);
422 if (dentry->d_inode != NULL)
423 ll_dump_inode(dentry->d_inode);
428 list_for_each(tmp, &dentry->d_subdirs) {
429 struct dentry *d = list_entry(tmp, struct dentry, d_child);
430 lustre_dump_dentry(d, recur - 1);
434 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
435 void lustre_throw_orphan_dentries(struct super_block *sb)
437 struct hlist_node *tmp, *next;
438 struct ll_sb_info *sbi = ll_s2sbi(sb);
440 /* Do this to get rid of orphaned dentries. That is not really trw. */
441 hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
442 struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash);
443 CWARN("found orphan dentry %.*s (%p->%p) at unmount, dumping "
444 "before and after shrink_dcache_parent\n",
445 dentry->d_name.len, dentry->d_name.name, dentry, next);
446 lustre_dump_dentry(dentry, 1);
447 shrink_dcache_parent(dentry);
448 lustre_dump_dentry(dentry, 1);
452 #define lustre_throw_orphan_dentries(sb)
455 static void prune_deathrow(struct ll_sb_info *sbi, int try)
457 LIST_HEAD(throw_away);
462 locked = spin_trylock(&sbi->ll_deathrow_lock);
464 spin_lock(&sbi->ll_deathrow_lock);
473 list_splice_init(&sbi->ll_deathrow, &throw_away);
474 spin_unlock(&sbi->ll_deathrow_lock);
476 while (!list_empty(&throw_away)) {
477 struct ll_inode_info *lli;
480 lli = list_entry(throw_away.next, struct ll_inode_info,
482 list_del_init(&lli->lli_dead_list);
484 inode = ll_info2i(lli);
485 d_prune_aliases(inode);
487 CDEBUG(D_INODE, "prune duplicate inode %p inum %lu count %u\n",
488 inode, inode->i_ino, atomic_read(&inode->i_count));
494 void client_common_put_super(struct super_block *sb)
496 struct ll_sb_info *sbi = ll_s2sbi(sb);
499 ll_close_thread_shutdown(sbi->ll_lcq);
501 /* destroy inodes in deathrow */
502 prune_deathrow(sbi, 0);
504 list_del(&sbi->ll_conn_chain);
505 obd_disconnect(sbi->ll_dt_exp);
507 lprocfs_unregister_mountpoint(sbi);
508 if (sbi->ll_proc_root) {
509 lprocfs_remove(sbi->ll_proc_root);
510 sbi->ll_proc_root = NULL;
513 obd_disconnect(sbi->ll_md_exp);
515 lustre_throw_orphan_dentries(sb);
519 char *ll_read_opt(const char *opt, char *data)
525 CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
526 if (strncmp(opt, data, strlen(opt)))
528 if ((value = strchr(data, '=')) == NULL)
532 OBD_ALLOC(retval, strlen(value) + 1);
534 CERROR("out of memory!\n");
538 memcpy(retval, value, strlen(value)+1);
539 CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
543 static inline int ll_set_opt(const char *opt, char *data, int fl)
545 if (strncmp(opt, data, strlen(opt)) != 0)
551 /* non-client-specific mount options are parsed in lmd_parse */
552 void ll_options(char *options, int *flags)
555 char *s1 = options, *s2;
563 CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
566 CDEBUG(D_SUPER, "next opt=%s\n", s1);
567 tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
572 tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
577 tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK);
582 tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
587 tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
592 tmp = ll_set_opt("acl", s1, LL_SBI_ACL);
594 /* Ignore deprecated mount option. The client will
595 * always try to mount with ACL support, whether this
596 * is used depends on whether server supports it. */
599 tmp = ll_set_opt("noacl", s1, LL_SBI_ACL);
606 s2 = strchr(s1, ',');
614 void ll_lli_init(struct ll_inode_info *lli)
616 sema_init(&lli->lli_open_sem, 1);
617 sema_init(&lli->lli_size_sem, 1);
619 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
620 spin_lock_init(&lli->lli_lock);
621 INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
622 lli->lli_inode_magic = LLI_INODE_MAGIC;
623 INIT_LIST_HEAD(&lli->lli_dead_list);
626 int ll_fill_super(struct super_block *sb)
628 struct lustre_profile *lprof;
629 struct lustre_sb_info *lsi = s2lsi(sb);
630 struct ll_sb_info *sbi;
633 char *profilenm = get_profile_name(sb);
634 struct config_llog_instance cfg;
635 char ll_instance[sizeof(sb) * 2 + 1];
639 CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
641 /* client additional sb info */
642 lsi->lsi_llsbi = sbi = ll_init_sbi();
646 ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags);
648 /* Generate a string unique to this super, in case some joker tries
649 to mount the same fs at two mount points.
650 Use the address of the super itself.*/
651 sprintf(ll_instance, "%p", sb);
652 cfg.cfg_instance = ll_instance;
653 cfg.cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
654 cfg.cfg_last_idx = 0;
656 /* set up client obds */
657 err = lustre_process_log(sb, profilenm, &cfg);
659 CERROR("Unable to process log: %d\n", err);
663 lprof = class_get_profile(profilenm);
665 CERROR("No profile found: %s\n", profilenm);
666 GOTO(out_free, err = -EINVAL);
668 CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
669 lprof->lp_mdc, lprof->lp_osc);
671 OBD_ALLOC(osc, strlen(lprof->lp_osc) +
672 strlen(ll_instance) + 2);
674 GOTO(out_free, err = -ENOMEM);
675 sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
677 OBD_ALLOC(mdc, strlen(lprof->lp_mdc) +
678 strlen(ll_instance) + 2);
680 GOTO(out_free, err = -ENOMEM);
681 sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
683 /* connections, registrations, sb setup */
684 err = client_common_fill_super(sb, mdc, osc);
688 OBD_FREE(mdc, strlen(mdc) + 1);
690 OBD_FREE(osc, strlen(osc) + 1);
692 struct obd_device *obd;
694 /* like ll_put_super below */
695 lustre_end_log(sb, NULL, &cfg);
696 while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next))
698 class_manual_cleanup(obd);
700 class_del_profile(profilenm);
702 lsi->lsi_llsbi = NULL;
703 lustre_common_put_super(sb);
706 } /* ll_fill_super */
709 void ll_put_super(struct super_block *sb)
711 struct config_llog_instance cfg;
712 char ll_instance[sizeof(sb) * 2 + 1];
713 struct obd_device *obd;
714 struct lustre_sb_info *lsi = s2lsi(sb);
715 struct ll_sb_info *sbi = ll_s2sbi(sb);
716 char *profilenm = get_profile_name(sb);
720 CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
722 sprintf(ll_instance, "%p", sb);
723 cfg.cfg_instance = ll_instance;
724 lustre_end_log(sb, NULL, &cfg);
726 obd = class_exp2obd(sbi->ll_md_exp);
728 int force = obd->obd_no_recov;
729 /* We need to set force before the lov_disconnect in
730 lustre_common_put_super, since l_d cleans up osc's as well. */
732 while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next))
734 obd->obd_force = force;
738 client_common_put_super(sb);
741 while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
742 class_manual_cleanup(obd);
746 class_del_profile(profilenm);
749 lsi->lsi_llsbi = NULL;
751 lustre_common_put_super(sb);
753 LCONSOLE_WARN("client umount complete\n");
755 } /* client_put_super */
757 #ifdef HAVE_REGISTER_CACHE
758 #include <linux/cache_def.h>
759 #ifdef HAVE_CACHE_RETURN_INT
764 ll_shrink_cache(int priority, unsigned int gfp_mask)
766 struct ll_sb_info *sbi;
769 list_for_each_entry(sbi, &ll_super_blocks, ll_list)
770 count += llap_shrink_cache(sbi, priority);
772 #ifdef HAVE_CACHE_RETURN_INT
777 struct cache_definition ll_cache_definition = {
778 .name = "llap_cache",
779 .shrink = ll_shrink_cache
781 #endif /* HAVE_REGISTER_CACHE */
783 struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
785 struct inode *inode = NULL;
786 l_lock(&lock->l_resource->lr_namespace->ns_lock);
787 if (lock->l_ast_data) {
788 struct ll_inode_info *lli = ll_i2info(lock->l_ast_data);
789 if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
790 inode = igrab(lock->l_ast_data);
792 inode = lock->l_ast_data;
793 if (inode->i_state & I_FREEING)
794 __LDLM_DEBUG(D_INFO, lock,
795 "l_ast_data %p is bogus: magic %08x",
796 lock->l_ast_data, lli->lli_inode_magic);
798 __LDLM_DEBUG(D_WARNING, lock,
799 "l_ast_data %p is bogus: magic %08x",
800 lock->l_ast_data, lli->lli_inode_magic);
805 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
809 static int null_if_equal(struct ldlm_lock *lock, void *data)
811 if (data == lock->l_ast_data) {
812 lock->l_ast_data = NULL;
814 if (lock->l_req_mode != lock->l_granted_mode)
815 LDLM_ERROR(lock,"clearing inode with ungranted lock");
818 return LDLM_ITER_CONTINUE;
821 void ll_clear_inode(struct inode *inode)
823 struct ll_inode_info *lli = ll_i2info(inode);
824 struct ll_sb_info *sbi = ll_i2sbi(inode);
827 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
828 inode->i_generation, inode);
830 clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(ll_i2info(inode)->lli_flags));
831 md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
832 null_if_equal, inode);
835 obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd,
836 null_if_equal, inode);
838 obd_free_memmd(sbi->ll_dt_exp, &lli->lli_smd);
842 if (lli->lli_symlink_name) {
843 OBD_FREE(lli->lli_symlink_name,
844 strlen(lli->lli_symlink_name) + 1);
845 lli->lli_symlink_name = NULL;
848 #ifdef CONFIG_FS_POSIX_ACL
849 if (lli->lli_posix_acl) {
850 LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
851 posix_acl_release(lli->lli_posix_acl);
852 lli->lli_posix_acl = NULL;
856 lli->lli_inode_magic = LLI_INODE_DEAD;
858 spin_lock(&sbi->ll_deathrow_lock);
859 list_del_init(&lli->lli_dead_list);
860 spin_unlock(&sbi->ll_deathrow_lock);
865 /* If this inode has objects allocated to it (lsm != NULL), then the OST
866 * object(s) determine the file size and mtime. Otherwise, the MDS will
867 * keep these values until such a time that objects are allocated for it.
868 * We do the MDS operations first, as it is checking permissions for us.
869 * We don't to the MDS RPC if there is nothing that we want to store there,
870 * otherwise there is no harm in updating mtime/atime on the MDS if we are
871 * going to do an RPC anyways.
873 * If we are doing a truncate, we will send the mtime and ctime updates
874 * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
875 * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
878 int ll_setattr_raw(struct inode *inode, struct iattr *attr)
880 struct ll_inode_info *lli = ll_i2info(inode);
881 struct lov_stripe_md *lsm = lli->lli_smd;
882 struct ll_sb_info *sbi = ll_i2sbi(inode);
883 struct ptlrpc_request *request = NULL;
884 struct md_op_data op_data = { { 0 } };
885 int ia_valid = attr->ia_valid;
889 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu valid %x\n", inode->i_ino,
891 lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_SETATTR);
893 if (ia_valid & ATTR_SIZE) {
894 if (attr->ia_size > ll_file_maxbytes(inode)) {
895 CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
896 attr->ia_size, ll_file_maxbytes(inode));
900 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
903 /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
904 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
905 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
909 /* We mark all of the fields "set" so MDS/OST does not re-set them */
910 if (attr->ia_valid & ATTR_CTIME) {
911 attr->ia_ctime = CURRENT_TIME;
912 attr->ia_valid |= ATTR_CTIME_SET;
914 if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
915 attr->ia_atime = CURRENT_TIME;
916 attr->ia_valid |= ATTR_ATIME_SET;
918 if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
919 attr->ia_mtime = CURRENT_TIME;
920 attr->ia_valid |= ATTR_MTIME_SET;
923 if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
924 CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
925 LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
928 /* NB: ATTR_SIZE will only be set after this point if the size
929 * resides on the MDS, ie, this file has no objects. */
931 attr->ia_valid &= ~ATTR_SIZE;
933 /* If only OST attributes being set on objects, don't do MDS RPC.
934 * In that case, we need to check permissions and update the local
935 * inode ourselves so we can call obdo_from_inode() always. */
936 if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
938 ll_prepare_md_op_data(&op_data, inode, NULL, NULL, 0, 0);
940 rc = md_setattr(sbi->ll_md_exp, &op_data,
941 attr, NULL, 0, NULL, 0, &request);
944 ptlrpc_req_finished(request);
947 /* Unlinked special device node? Or just a race?
948 * Pretend we done everything. */
949 if (!S_ISREG(inode->i_mode) &&
950 !S_ISDIR(inode->i_mode) &&
951 !S_ISDIR(inode->i_mode))
952 rc = inode_setattr(inode, attr);
953 } else if (rc != -EPERM && rc != -EACCES) {
954 CERROR("mdcsetattr fails: rc = %d\n", rc);
959 rc = md_get_lustre_md(sbi->ll_md_exp, request, 0, sbi->ll_dt_exp, &md);
961 ptlrpc_req_finished(request);
965 /* We call inode_setattr to adjust timestamps.
966 * If there is at least some data in file, we cleared ATTR_SIZE
967 * above to avoid invoking vmtruncate, otherwise it is important
968 * to call vmtruncate in inode_setattr to update inode->i_size
970 rc = inode_setattr(inode, attr);
972 ll_update_inode(inode, &md);
973 ptlrpc_req_finished(request);
975 if (!lsm || !S_ISREG(inode->i_mode)) {
976 CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
980 /* The OST doesn't check permissions, but the alternative is
981 * a gratuitous RPC to the MDS. We already rely on the client
982 * to do read/write/truncate permission checks, so is mtime OK?
984 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
985 /* from sys_utime() */
986 if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
987 if (current->fsuid != inode->i_uid &&
988 (rc=ll_permission(inode,MAY_WRITE,NULL))!=0)
991 /* from inode_change_ok() */
992 if (current->fsuid != inode->i_uid &&
993 !capable(CAP_FOWNER))
998 /* Won't invoke vmtruncate, as we already cleared ATTR_SIZE */
999 rc = inode_setattr(inode, attr);
1002 /* We really need to get our PW lock before we change inode->i_size.
1003 * If we don't we can race with other i_size updaters on our node, like
1004 * ll_file_read. We can also race with i_size propogation to other
1005 * nodes through dirtying and writeback of final cached pages. This
1006 * last one is especially bad for racing o_append users on other
1008 if (ia_valid & ATTR_SIZE) {
1009 ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
1011 struct lustre_handle lockh = { 0 };
1012 int err, ast_flags = 0;
1013 /* XXX when we fix the AST intents to pass the discard-range
1014 * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
1016 if (attr->ia_size == 0)
1017 ast_flags = LDLM_AST_DISCARD_DATA;
1019 UNLOCK_INODE_MUTEX(inode);
1020 UP_WRITE_I_ALLOC_SEM(inode);
1021 rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy, &lockh,
1023 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
1024 DOWN_WRITE_I_ALLOC_SEM(inode);
1025 LOCK_INODE_MUTEX(inode);
1027 LOCK_INODE_MUTEX(inode);
1028 DOWN_WRITE_I_ALLOC_SEM(inode);
1033 /* Only ll_inode_size_lock is taken at this level.
1034 * lov_stripe_lock() is grabbed by ll_truncate() only over
1035 * call to obd_adjust_kms(). If vmtruncate returns 0, then
1036 * ll_truncate dropped ll_inode_size_lock() */
1037 ll_inode_size_lock(inode, 0);
1038 rc = vmtruncate(inode, attr->ia_size);
1040 LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
1041 ll_inode_size_unlock(inode, 0);
1044 err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
1046 CERROR("ll_extent_unlock failed: %d\n", err);
1050 } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
1054 CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
1055 inode->i_ino, LTIME_S(attr->ia_mtime));
1057 oa.o_id = lsm->lsm_object_id;
1058 oa.o_valid = OBD_MD_FLID;
1060 flags = OBD_MD_FLTYPE | OBD_MD_FLATIME |
1061 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
1062 OBD_MD_FLFID | OBD_MD_FLGENER;
1064 obdo_from_inode(&oa, inode, flags);
1065 rc = obd_setattr(sbi->ll_dt_exp, &oa, lsm, NULL);
1067 CERROR("obd_setattr fails: rc=%d\n", rc);
1072 int ll_setattr(struct dentry *de, struct iattr *attr)
1074 return ll_setattr_raw(de->d_inode, attr);
1077 int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
1078 unsigned long max_age)
1080 struct ll_sb_info *sbi = ll_s2sbi(sb);
1081 struct obd_statfs obd_osfs;
1085 rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age);
1087 CERROR("md_statfs fails: rc = %d\n", rc);
1091 osfs->os_type = sb->s_magic;
1093 CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1094 osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
1096 rc = obd_statfs(class_exp2obd(sbi->ll_dt_exp), &obd_osfs, max_age);
1098 CERROR("obd_statfs fails: rc = %d\n", rc);
1102 CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1103 obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1106 osfs->os_blocks = obd_osfs.os_blocks;
1107 osfs->os_bfree = obd_osfs.os_bfree;
1108 osfs->os_bavail = obd_osfs.os_bavail;
1110 /* If we don't have as many objects free on the OST as inodes
1111 * on the MDS, we reduce the total number of inodes to
1112 * compensate, so that the "inodes in use" number is correct.
1114 if (obd_osfs.os_ffree < osfs->os_ffree) {
1115 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1117 osfs->os_ffree = obd_osfs.os_ffree;
1123 int ll_statfs(struct super_block *sb, struct kstatfs *sfs)
1125 struct obd_statfs osfs;
1128 CDEBUG(D_VFSTRACE, "VFS Op:\n");
1129 lprocfs_counter_incr(ll_s2sbi(sb)->ll_stats, LPROC_LL_STAFS);
1131 /* For now we will always get up-to-date statfs values, but in the
1132 * future we may allow some amount of caching on the client (e.g.
1133 * from QOS or lprocfs updates). */
1134 rc = ll_statfs_internal(sb, &osfs, jiffies - 1);
1138 statfs_unpack(sfs, &osfs);
1140 if (sizeof(sfs->f_blocks) == 4) {
1141 while (osfs.os_blocks > ~0UL) {
1144 osfs.os_blocks >>= 1;
1145 osfs.os_bfree >>= 1;
1146 osfs.os_bavail >>= 1;
1150 sfs->f_blocks = osfs.os_blocks;
1151 sfs->f_bfree = osfs.os_bfree;
1152 sfs->f_bavail = osfs.os_bavail;
1157 void ll_inode_size_lock(struct inode *inode, int lock_lsm)
1159 struct ll_inode_info *lli;
1160 struct lov_stripe_md *lsm;
1162 lli = ll_i2info(inode);
1163 LASSERT(lli->lli_size_sem_owner != current);
1164 down(&lli->lli_size_sem);
1165 LASSERT(lli->lli_size_sem_owner == NULL);
1166 lli->lli_size_sem_owner = current;
1168 LASSERTF(lsm != NULL || lock_lsm == 0, "lsm %p, lock_lsm %d\n",
1171 lov_stripe_lock(lsm);
1174 void ll_inode_size_unlock(struct inode *inode, int unlock_lsm)
1176 struct ll_inode_info *lli;
1177 struct lov_stripe_md *lsm;
1179 lli = ll_i2info(inode);
1181 LASSERTF(lsm != NULL || unlock_lsm == 0, "lsm %p, lock_lsm %d\n",
1184 lov_stripe_unlock(lsm);
1185 LASSERT(lli->lli_size_sem_owner == current);
1186 lli->lli_size_sem_owner = NULL;
1187 up(&lli->lli_size_sem);
1190 static void ll_replace_lsm(struct inode *inode, struct lov_stripe_md *lsm)
1192 struct ll_inode_info *lli = ll_i2info(inode);
1194 dump_lsm(D_INODE, lsm);
1195 dump_lsm(D_INODE, lli->lli_smd);
1196 LASSERTF(lsm->lsm_magic == LOV_MAGIC_JOIN,
1197 "lsm must be joined lsm %p\n", lsm);
1198 obd_free_memmd(ll_i2dtexp(inode), &lli->lli_smd);
1199 CDEBUG(D_INODE, "replace lsm %p to lli_smd %p for inode %lu%u(%p)\n",
1200 lsm, lli->lli_smd, inode->i_ino, inode->i_generation, inode);
1202 lli->lli_maxbytes = lsm->lsm_maxbytes;
1203 if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
1204 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
1207 void ll_update_inode(struct inode *inode, struct lustre_md *md)
1209 struct ll_inode_info *lli = ll_i2info(inode);
1210 struct mdt_body *body = md->body;
1211 struct lov_stripe_md *lsm = md->lsm;
1213 LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
1215 if (lli->lli_smd == NULL) {
1216 if (lsm->lsm_magic != LOV_MAGIC &&
1217 lsm->lsm_magic != LOV_MAGIC_JOIN) {
1218 dump_lsm(D_ERROR, lsm);
1221 CDEBUG(D_INODE, "adding lsm %p to inode %lu/%u(%p)\n",
1222 lsm, inode->i_ino, inode->i_generation, inode);
1223 /* ll_inode_size_lock() requires it is only called
1224 * with lli_smd != NULL or lock_lsm == 0 or we can
1225 * race between lock/unlock. bug 9547 */
1227 lli->lli_maxbytes = lsm->lsm_maxbytes;
1228 if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
1229 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
1231 if (lli->lli_smd->lsm_magic == lsm->lsm_magic &&
1232 lli->lli_smd->lsm_stripe_count ==
1233 lsm->lsm_stripe_count) {
1234 if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
1235 CERROR("lsm mismatch for inode %ld\n",
1237 CERROR("lli_smd:\n");
1238 dump_lsm(D_ERROR, lli->lli_smd);
1240 dump_lsm(D_ERROR, lsm);
1244 ll_replace_lsm(inode, lsm);
1246 /* bug 2844 - limit i_blksize for broken user-space apps */
1247 LASSERTF(lsm->lsm_xfersize != 0, "%lu\n", lsm->lsm_xfersize);
1248 inode->i_blksize = min(lsm->lsm_xfersize, LL_MAX_BLKSIZE);
1249 if (lli->lli_smd != lsm)
1250 obd_free_memmd(ll_i2dtexp(inode), &lsm);
1252 inode->i_blksize = max(inode->i_blksize,
1253 inode->i_sb->s_blocksize);
1256 #ifdef CONFIG_FS_POSIX_ACL
1257 LASSERT(!md->posix_acl || (body->valid & OBD_MD_FLACL));
1258 if (body->valid & OBD_MD_FLACL) {
1259 spin_lock(&lli->lli_lock);
1260 if (lli->lli_posix_acl)
1261 posix_acl_release(lli->lli_posix_acl);
1262 lli->lli_posix_acl = md->posix_acl;
1263 spin_unlock(&lli->lli_lock);
1267 if (body->valid & OBD_MD_FLATIME &&
1268 body->atime > LTIME_S(inode->i_atime))
1269 LTIME_S(inode->i_atime) = body->atime;
1270 if (body->valid & OBD_MD_FLMTIME &&
1271 body->mtime > LTIME_S(inode->i_mtime)) {
1272 CDEBUG(D_INODE, "setting ino %lu mtime from %lu to "LPU64"\n",
1273 inode->i_ino, LTIME_S(inode->i_mtime), body->mtime);
1274 LTIME_S(inode->i_mtime) = body->mtime;
1276 if (body->valid & OBD_MD_FLCTIME &&
1277 body->ctime > LTIME_S(inode->i_ctime))
1278 LTIME_S(inode->i_ctime) = body->ctime;
1279 if (body->valid & OBD_MD_FLMODE)
1280 inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
1281 if (body->valid & OBD_MD_FLTYPE)
1282 inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
1283 if (body->valid & OBD_MD_FLUID)
1284 inode->i_uid = body->uid;
1285 if (body->valid & OBD_MD_FLGID)
1286 inode->i_gid = body->gid;
1287 if (body->valid & OBD_MD_FLFLAGS)
1288 inode->i_flags = body->flags;
1289 if (body->valid & OBD_MD_FLNLINK)
1290 inode->i_nlink = body->nlink;
1291 if (body->valid & OBD_MD_FLRDEV)
1292 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
1293 inode->i_rdev = body->rdev;
1295 inode->i_rdev = old_decode_dev(body->rdev);
1297 if (body->valid & OBD_MD_FLSIZE)
1298 inode->i_size = body->size;
1299 if (body->valid & OBD_MD_FLBLOCKS)
1300 inode->i_blocks = body->blocks;
1302 if (body->valid & OBD_MD_FLSIZE)
1303 set_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
1305 if (body->valid & OBD_MD_FLID)
1306 lli->lli_fid = body->fid1;
1308 LASSERT(fid_seq(&lli->lli_fid) != 0);
1311 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
1312 static struct backing_dev_info ll_backing_dev_info = {
1313 .ra_pages = 0, /* No readahead */
1314 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12))
1315 .capabilities = 0, /* Does contribute to dirty memory */
1317 .memory_backed = 0, /* Does contribute to dirty memory */
1322 void ll_read_inode2(struct inode *inode, void *opaque)
1324 struct lustre_md *md = opaque;
1325 struct ll_inode_info *lli = ll_i2info(inode);
1328 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
1329 inode->i_ino, inode->i_generation, inode);
1333 LASSERT(!lli->lli_smd);
1335 /* Core attributes from the MDS first. This is a new inode, and
1336 * the VFS doesn't zero times in the core inode so we have to do
1337 * it ourselves. They will be overwritten by either MDS or OST
1338 * attributes - we just need to make sure they aren't newer. */
1339 LTIME_S(inode->i_mtime) = 0;
1340 LTIME_S(inode->i_atime) = 0;
1341 LTIME_S(inode->i_ctime) = 0;
1343 ll_update_inode(inode, md);
1345 /* OIDEBUG(inode); */
1347 if (S_ISREG(inode->i_mode)) {
1348 struct ll_sb_info *sbi = ll_i2sbi(inode);
1349 inode->i_op = &ll_file_inode_operations;
1350 inode->i_fop = sbi->ll_fop;
1351 inode->i_mapping->a_ops = &ll_aops;
1353 } else if (S_ISDIR(inode->i_mode)) {
1354 inode->i_op = &ll_dir_inode_operations;
1355 inode->i_fop = &ll_dir_operations;
1356 inode->i_mapping->a_ops = &ll_dir_aops;
1358 } else if (S_ISLNK(inode->i_mode)) {
1359 inode->i_op = &ll_fast_symlink_inode_operations;
1362 inode->i_op = &ll_special_inode_operations;
1364 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
1365 init_special_inode(inode, inode->i_mode,
1366 kdev_t_to_nr(inode->i_rdev));
1368 /* initializing backing dev info. */
1369 inode->i_mapping->backing_dev_info = &ll_backing_dev_info;
1371 init_special_inode(inode, inode->i_mode, inode->i_rdev);
1377 void ll_delete_inode(struct inode *inode)
1379 struct ll_sb_info *sbi = ll_i2sbi(inode);
1383 rc = obd_fid_delete(sbi->ll_md_exp, ll_inode2fid(inode));
1385 CERROR("fid_delete() failed, rc %d\n", rc);
1391 int ll_iocontrol(struct inode *inode, struct file *file,
1392 unsigned int cmd, unsigned long arg)
1394 struct ll_sb_info *sbi = ll_i2sbi(inode);
1395 struct ptlrpc_request *req = NULL;
1400 case EXT3_IOC_GETFLAGS: {
1401 struct mdt_body *body;
1403 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
1404 OBD_MD_FLFLAGS, 0, &req);
1406 CERROR("failure %d inode %lu\n", rc, inode->i_ino);
1410 body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body));
1412 if (body->flags & S_APPEND)
1413 flags |= EXT3_APPEND_FL;
1414 if (body->flags & S_IMMUTABLE)
1415 flags |= EXT3_IMMUTABLE_FL;
1416 if (body->flags & S_NOATIME)
1417 flags |= EXT3_NOATIME_FL;
1419 ptlrpc_req_finished (req);
1421 RETURN(put_user(flags, (int *)arg));
1423 case EXT3_IOC_SETFLAGS: {
1424 struct md_op_data op_data = { { 0 } };
1425 struct ll_iattr_struct attr;
1427 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
1429 if (get_user(flags, (int *)arg))
1436 ll_prepare_md_op_data(&op_data, inode, NULL, NULL, 0, 0);
1438 memset(&attr, 0x0, sizeof(attr));
1439 attr.ia_attr_flags = flags;
1440 ((struct iattr *)&attr)->ia_valid |= ATTR_ATTR_FLAG;
1442 rc = md_setattr(sbi->ll_md_exp, &op_data,
1443 (struct iattr *)&attr, NULL, 0, NULL, 0, &req);
1444 if (rc || lsm == NULL) {
1445 ptlrpc_req_finished(req);
1449 ptlrpc_req_finished(req);
1451 oa->o_id = lsm->lsm_object_id;
1452 oa->o_flags = flags;
1453 oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
1455 obdo_from_inode(oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER);
1456 rc = obd_setattr(sbi->ll_dt_exp, oa, lsm, NULL);
1459 if (rc != -EPERM && rc != -EACCES)
1460 CERROR("md_setattr fails: rc = %d\n", rc);
1464 if (flags & EXT3_APPEND_FL)
1465 inode->i_flags |= S_APPEND;
1467 inode->i_flags &= ~S_APPEND;
1468 if (flags & EXT3_IMMUTABLE_FL)
1469 inode->i_flags |= S_IMMUTABLE;
1471 inode->i_flags &= ~S_IMMUTABLE;
1472 if (flags & EXT3_NOATIME_FL)
1473 inode->i_flags |= S_NOATIME;
1475 inode->i_flags &= ~S_NOATIME;
1486 /* umount -f client means force down, don't save state */
1487 void ll_umount_begin(struct super_block *sb)
1489 struct lustre_sb_info *lsi = s2lsi(sb);
1490 struct ll_sb_info *sbi = ll_s2sbi(sb);
1491 struct obd_device *obd;
1492 struct obd_ioctl_data ioc_data = { 0 };
1495 /* Tell the MGC we got umount -f */
1496 lsi->lsi_flags |= LSI_UMOUNT_FORCE;
1498 CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
1499 sb->s_count, atomic_read(&sb->s_active));
1501 obd = class_exp2obd(sbi->ll_md_exp);
1503 CERROR("Invalid MDC connection handle "LPX64"\n",
1504 sbi->ll_md_exp->exp_handle.h_cookie);
1508 obd->obd_no_recov = 1;
1509 obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp, sizeof ioc_data,
1512 obd = class_exp2obd(sbi->ll_dt_exp);
1514 CERROR("Invalid LOV connection handle "LPX64"\n",
1515 sbi->ll_dt_exp->exp_handle.h_cookie);
1520 obd->obd_no_recov = 1;
1521 obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp, sizeof ioc_data,
1524 /* Really, we'd like to wait until there are no requests outstanding,
1525 * and then continue. For now, we just invalidate the requests,
1526 * schedule, and hope.
1533 int ll_remount_fs(struct super_block *sb, int *flags, char *data)
1535 struct ll_sb_info *sbi = ll_s2sbi(sb);
1539 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
1540 read_only = *flags & MS_RDONLY;
1541 err = obd_set_info_async(sbi->ll_md_exp, strlen("read-only"),
1542 "read-only", sizeof(read_only),
1545 CERROR("Failed to change the read-only flag during "
1546 "remount: %d\n", err);
1551 sb->s_flags |= MS_RDONLY;
1553 sb->s_flags &= ~MS_RDONLY;
1558 int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
1559 int offset, struct super_block *sb)
1561 struct ll_sb_info *sbi = NULL;
1562 struct lustre_md md;
1566 LASSERT(*inode || sb);
1567 sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
1568 prune_deathrow(sbi, 1);
1570 rc = md_get_lustre_md(sbi->ll_md_exp, req, offset,
1571 sbi->ll_dt_exp, &md);
1576 ll_update_inode(*inode, &md);
1578 LASSERT(sb != NULL);
1580 /* at this point server answers to client's RPC with same fid as
1581 * client generated for creating some inode. So using ->fid1 is
1583 LASSERT(fid_num(&md.body->fid1) != 0);
1585 *inode = ll_iget(sb, ll_fid_build_ino(sbi, &md.body->fid1), &md);
1586 if (*inode == NULL || is_bad_inode(*inode)) {
1587 md_free_lustre_md(sbi->ll_dt_exp, &md);
1589 CERROR("new_inode -fatal: rc %d\n", rc);
1594 rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp,
1595 ll_i2info(*inode)->lli_smd);
1600 char *llap_origins[] = {
1601 [LLAP_ORIGIN_UNKNOWN] = "--",
1602 [LLAP_ORIGIN_READPAGE] = "rp",
1603 [LLAP_ORIGIN_READAHEAD] = "ra",
1604 [LLAP_ORIGIN_COMMIT_WRITE] = "cw",
1605 [LLAP_ORIGIN_WRITEPAGE] = "wp",
1608 struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
1609 struct list_head *list)
1611 struct ll_async_page *llap;
1612 struct list_head *pos;
1614 list_for_each(pos, list) {
1615 if (pos == &sbi->ll_pglist)
1617 llap = list_entry(pos, struct ll_async_page, llap_pglist_item);
1618 if (llap->llap_page == NULL)
1626 int ll_obd_statfs(struct inode *inode, void *arg)
1628 struct ll_sb_info *sbi = NULL;
1629 struct obd_device *client_obd = NULL, *lov_obd = NULL;
1630 struct lov_obd *lov = NULL;
1631 struct obd_statfs stat_buf = {0};
1633 struct obd_ioctl_data *data = NULL;
1637 if (!inode || !(sbi = ll_i2sbi(inode)))
1638 GOTO(out_statfs, rc = -EINVAL);
1640 rc = obd_ioctl_getdata(&buf, &len, arg);
1642 GOTO(out_statfs, rc);
1645 if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
1646 !data->ioc_pbuf1 || !data->ioc_pbuf2)
1647 GOTO(out_statfs, rc = -EINVAL);
1649 memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
1650 memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
1652 if (type == LL_STATFS_MDC) {
1654 GOTO(out_statfs, rc = -ENODEV);
1655 client_obd = class_exp2obd(sbi->ll_md_exp);
1656 } else if (type == LL_STATFS_LOV) {
1657 lov_obd = class_exp2obd(sbi->ll_dt_exp);
1658 lov = &lov_obd->u.lov;
1660 if (index >= lov->desc.ld_tgt_count)
1661 GOTO(out_statfs, rc = -ENODEV);
1663 client_obd = class_exp2obd(lov->tgts[index].ltd_exp);
1664 if (!lov->tgts[index].active)
1665 GOTO(out_uuid, rc = -ENODATA);
1669 GOTO(out_statfs, rc = -EINVAL);
1671 rc = obd_statfs(client_obd, &stat_buf, jiffies - 1);
1673 GOTO(out_statfs, rc);
1675 if (copy_to_user(data->ioc_pbuf1, &stat_buf, data->ioc_plen1))
1676 GOTO(out_statfs, rc = -EFAULT);
1679 if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(client_obd),
1685 obd_ioctl_freedata(buf, len);