Commit OST AMD support to HEAD so we can being running with a common code base.

[fs/lustre-release.git] / lustre / llite / llite_lib.c
diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c

index 267c61c..906e978 100644 (file)
--- a/lustre/llite/llite_lib.c
+++ b/lustre/llite/llite_lib.c
@@ -24,6 +24,7 @@
  #define DEBUG_SUBSYSTEM S_LLITE
  
  #include <linux/module.h>
+#include <linux/types.h>
  #include <linux/random.h>
  #include <linux/version.h>
  
@@ -31,19 +32,19 @@
  #include <linux/lustre_ha.h>
  #include <linux/lustre_dlm.h>
  #include <linux/lprocfs_status.h>
+#include <linux/lustre_snap.h>
  #include "llite_internal.h"
  
  kmem_cache_t *ll_file_data_slab;
  
  extern struct address_space_operations ll_aops;
  extern struct address_space_operations ll_dir_aops;
-extern struct super_operations ll_super_operations;
  
  #ifndef log2
  #define log2(n) ffz(~(n))
  #endif
  
-struct ll_sb_info *lustre_init_sbi(struct super_block *sb) 
+struct ll_sb_info *lustre_init_sbi(struct super_block *sb)
  {
          struct ll_sb_info *sbi = NULL;
          class_uuid_t uuid;
@@ -53,29 +54,59 @@ struct ll_sb_info *lustre_init_sbi(struct super_block *sb)
          if (!sbi)
                  RETURN(NULL);
  
-        spin_lock_init(&sbi->ll_pglist_lock);
+        spin_lock_init(&sbi->ll_lock);
          INIT_LIST_HEAD(&sbi->ll_pglist);
          sbi->ll_pglist_gen = 0;
+        sbi->ll_max_read_ahead_pages = SBI_DEFAULT_RA_MAX;
          INIT_LIST_HEAD(&sbi->ll_conn_chain);
          INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list);
-        ll_s2sbi(sb) = sbi;
+        INIT_LIST_HEAD(&sbi->ll_mnt_list);
+        sema_init(&sbi->ll_gns_sem, 1);
+        init_completion(&sbi->ll_gns_completion);
+        sbi->ll_gns_state = LL_GNS_STATE_IDLE;
+        sbi->ll_gns_timer.data = (unsigned long)sbi;
+        sbi->ll_gns_timer.function = ll_gns_timer_callback;
+        init_timer(&sbi->ll_gns_timer);
+        INIT_LIST_HEAD(&sbi->ll_gns_sbi_head);
+
+        ll_set_sbi(sb, sbi);
  
          generate_random_uuid(uuid);
          class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
          RETURN(sbi);
  }
  
-void lustre_free_sbi(struct super_block *sb) 
+void lustre_free_sbi(struct super_block *sb)
  {
          struct ll_sb_info *sbi = ll_s2sbi(sb);
          ENTRY;
  
-        if (sbi != NULL)
+        if (sbi != NULL) {
+                list_del(&sbi->ll_gns_sbi_head);
+                del_timer(&sbi->ll_gns_timer);
                  OBD_FREE(sbi, sizeof(*sbi));
-        ll_s2sbi(sb) = NULL;
+        }
+        ll_set_sbi(sb, NULL);
          EXIT;
  }
  
+int lustre_init_ea_size(struct ll_sb_info *sbi)
+{
+        struct lov_desc desc;
+        int rc, valsize;
+        
+        /* OSC may have larger ea size */
+        valsize = sizeof(desc);
+        rc = obd_get_info(sbi->ll_osc_exp, strlen("lovdesc") + 1, "lovdesc", 
+                          &valsize, &desc);
+        if (rc)
+                RETURN(rc);
+        obd_init_ea_size(sbi->ll_mdc_exp, obd_size_diskmd(sbi->ll_osc_exp, NULL),
+                         desc.ld_tgt_count*sizeof(struct llog_cookie));
+
+        RETURN(rc);
+}
+
  int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
  {
          struct inode *root = 0;
@@ -103,9 +134,7 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                          CERROR("could not register mount in /proc/lustre");
          }
  
-        mdc_init_ea_size(obd, osc);
-
-        err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid);
+        err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, 0);
          if (err == -EBUSY) {
                  CERROR("An MDS (mdc %s) is performing recovery, of which this"
                         " client is not a part.  Please wait for recovery to "
@@ -126,9 +155,10 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
          sb->s_blocksize_bits = log2(osfs.os_bsize);
          sb->s_magic = LL_SUPER_MAGIC;
          sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-        
-        devno = get_uuid2int(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid, 
-                             strlen(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid));
+       
+        devno = get_uuid2int(sbi->ll_mdc_exp->exp_obd->obd_uuid.uuid, 
+                             strlen(sbi->ll_mdc_exp->exp_obd->obd_uuid.uuid));
+
          sb->s_dev = devno;
  
          obd = class_name2obd(osc);
@@ -137,7 +167,7 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                  GOTO(out_mdc, err);
          }
  
-        err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid);
+        err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, 0);
          if (err == -EBUSY) {
                  CERROR("An OST (osc %s) is performing recovery, of which this"
                         " client is not a part.  Please wait for recovery to "
@@ -149,7 +179,9 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
          }
          sbi->ll_osc_exp = class_conn2export(&osc_conn);
  
-        err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
+        lustre_init_ea_size(sbi);
+
+        err = md_getstatus(sbi->ll_mdc_exp, &rootfid);
          if (err) {
                  CERROR("cannot mds_connect: rc = %d\n", err);
                  GOTO(out_osc, err);
@@ -159,16 +191,17 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
  
          sb->s_op = &lustre_super_operations;
  
-        /* make root inode 
+        /* make root inode
           * XXX: move this to after cbd setup? */
-        err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
-                          OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
+        err = md_getattr(sbi->ll_mdc_exp, &rootfid,
+                         OBD_MD_FLNOTOBD | OBD_MD_FLBLOCKS, 0, &request);
          if (err) {
-                CERROR("mdc_getattr failed for root: rc = %d\n", err);
+                CERROR("md_getattr failed for root: rc = %d\n", err);
                  GOTO(out_osc, err);
          }
  
-        err = mdc_req2lustre_md(request, 0, sbi->ll_osc_exp, &md);
+        err = mdc_req2lustre_md(sbi->ll_mdc_exp, request, 0, 
+                                sbi->ll_osc_exp, &md);
          if (err) {
                  CERROR("failed to understand root inode md: rc = %d\n",err);
                  ptlrpc_req_finished (request);
@@ -192,7 +225,23 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                  GOTO(out_root, err);
          }
  
+        ll_gns_add_timer(sbi);
+
+        /* making vm readahead 0 for 2.4.x. In the case of 2.6.x,
+           backing dev info assigned to inode mapping is used for
+           determining maximal readahead. */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
+        /* bug 2805 - set VM readahead to zero */
+        vm_max_readahead = vm_min_readahead = 0;
+#endif
+
          sb->s_root = d_alloc_root(root);
+
+#ifdef S_PDIROPS
+        CWARN("Enabling PDIROPS\n");
+        sb->s_flags |= S_PDIROPS;
+#endif
+
          RETURN(err);
  
  out_root:
@@ -213,6 +262,8 @@ void lustre_common_put_super(struct super_block *sb)
          struct hlist_node *tmp, *next;
          ENTRY;
  
+        ll_gns_del_timer(sbi);
+
          ll_close_thread_shutdown(sbi->ll_lcq);
  
          list_del(&sbi->ll_conn_chain);
@@ -230,6 +281,8 @@ void lustre_common_put_super(struct super_block *sb)
          spin_lock(&dcache_lock);
          hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
                  struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash);
+                CWARN("orphan dentry %*s (%p) at unmount\n",
+                      dentry->d_name.len, dentry->d_name.name, dentry);
                  shrink_dcache_parent(dentry);
          }
          spin_unlock(&dcache_lock);
@@ -272,7 +325,8 @@ int ll_set_opt(const char *opt, char *data, int fl)
                  RETURN(fl);
  }
  
-void ll_options(char *options, char **ost, char **mdc, int *flags)
+void ll_options(char *options, char **ost, char **mdc, int *flags, 
+                char **clone_opts)
  {
          char *this_char;
  #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
@@ -297,12 +351,16 @@ void ll_options(char *options, char **ost, char **mdc, int *flags)
                          continue;
                  if (!*mdc && (*mdc = ll_read_opt("mdc", this_char)))
                          continue;
+                if (!*clone_opts && (*clone_opts = ll_read_opt("clone", 
+                                                                this_char))) 
+                        continue; 
                  if (!(*flags & LL_SBI_NOLCK) &&
                      ((*flags) = (*flags) |
                                  ll_set_opt("nolock", this_char,
                                             LL_SBI_NOLCK)))
                          continue;
          }
+        
          EXIT;
  }
  
@@ -313,6 +371,7 @@ void ll_lli_init(struct ll_inode_info *lli)
          lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
          spin_lock_init(&lli->lli_lock);
          INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
+        lli->lli_inode_magic = LLI_INODE_MAGIC;
  }
  
  int ll_fill_super(struct super_block *sb, void *data, int silent)
@@ -320,6 +379,7 @@ int ll_fill_super(struct super_block *sb, void *data, int silent)
          struct ll_sb_info *sbi;
          char *osc = NULL;
          char *mdc = NULL;
+        char *clone_opts = NULL;
          int err;
          ENTRY;
  
@@ -330,7 +390,7 @@ int ll_fill_super(struct super_block *sb, void *data, int silent)
                  RETURN(-ENOMEM);
  
          sbi->ll_flags |= LL_SBI_READAHEAD;
-        ll_options(data, &osc, &mdc, &sbi->ll_flags);
+        ll_options(data, &osc, &mdc, &sbi->ll_flags, &clone_opts);
  
          if (!osc) {
                  CERROR("no osc\n");
@@ -351,25 +411,14 @@ out:
                  OBD_FREE(mdc, strlen(mdc) + 1);
          if (osc)
                  OBD_FREE(osc, strlen(osc) + 1);
+        if (clone_opts)
+                OBD_FREE(clone_opts, strlen(clone_opts) + 1);
  
          RETURN(err);
  } /* ll_read_super */
  
-void ll_put_super(struct super_block *sb)
-{
-        ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
-
-        lustre_common_put_super(sb);
-
-        lustre_free_sbi(sb);
-
-        EXIT;
-} /* ll_put_super */
-
-int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
-                       struct config_llog_instance *cfg)
+static int lustre_process_log(struct lustre_mount_data *lmd, char *profile,
+                              struct config_llog_instance *cfg, int allow_recov)
  {
          struct lustre_cfg lcfg;
          struct portals_cfg pcfg;
@@ -385,6 +434,9 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
          int err;
          ENTRY;
  
+        if (lmd_bad_magic(lmd))
+                RETURN(-EINVAL);
+
          generate_random_uuid(uuid);
          class_uuid_unparse(uuid, &mdc_uuid);
  
@@ -392,7 +444,7 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
                  PCFG_INIT(pcfg, NAL_CMD_REGISTER_MYNID);
                  pcfg.pcfg_nal = lmd->lmd_nal;
                  pcfg.pcfg_nid = lmd->lmd_local_nid;
-                err = kportal_nal_cmd(&pcfg);
+                err = libcfs_nal_cmd(&pcfg);
                  if (err <0)
                          GOTO(out, err);
          }
@@ -405,7 +457,7 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
                  pcfg.pcfg_misc    = lmd->lmd_port;
                  pcfg.pcfg_size    = 8388608;
                  pcfg.pcfg_flags   = 0x4; /*share*/
-                err = kportal_nal_cmd(&pcfg);
+                err = libcfs_nal_cmd(&pcfg);
                  if (err <0)
                          GOTO(out, err);
          }
@@ -436,24 +488,30 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
          err = class_process_config(&lcfg);
          if (err < 0)
                  GOTO(out_detach, err);
-        
+
          obd = class_name2obd(name);
          if (obd == NULL)
                  GOTO(out_cleanup, err = -EINVAL);
  
-        err = obd_connect(&mdc_conn, obd, &mdc_uuid);
+        /* Disable initial recovery on this import */
+        err = obd_set_info(obd->obd_self_export,
+                           strlen("initial_recov"), "initial_recov",
+                           sizeof(allow_recov), &allow_recov);
+        if (err)
+                GOTO(out_cleanup, err);
+
+        err = obd_connect(&mdc_conn, obd, &mdc_uuid, 0);
          if (err) {
                  CERROR("cannot connect to %s: rc = %d\n", lmd->lmd_mds, err);
                  GOTO(out_cleanup, err);
          }
-        
+
          exp = class_conn2export(&mdc_conn);
-        
-        ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
-        rc = class_config_parse_llog(ctxt, profile, cfg);
-        if (rc) {
-                CERROR("class_config_parse_llog failed: rc = %d\n", rc);
-        }
+
+        ctxt = llog_get_context(&exp->exp_obd->obd_llogs,LLOG_CONFIG_REPL_CTXT);
+        rc = class_config_process_llog(ctxt, profile, cfg);
+        if (rc)
+                CERROR("class_config_process_llog failed: rc = %d\n", rc);
  
          err = obd_disconnect(exp, 0);
  
@@ -482,14 +540,14 @@ out_del_conn:
                  pcfg.pcfg_nid     = lmd->lmd_server_nid;
                  pcfg.pcfg_id      = lmd->lmd_server_ipaddr;
                  pcfg.pcfg_flags   = 1; /*share*/
-                err = kportal_nal_cmd(&pcfg);
+                err = libcfs_nal_cmd(&pcfg);
                  if (err <0)
                          GOTO(out, err);
          }
  out:
          if (rc == 0)
                  rc = err;
-        
+
          RETURN(rc);
  }
  
@@ -503,6 +561,9 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
          ENTRY;
  
          CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
+        if (lmd_bad_magic(lmd))
+                RETURN(-EINVAL);
+
          sbi = lustre_init_sbi(sb);
          if (!sbi)
                  RETURN(-ENOMEM);
@@ -514,31 +575,30 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                  struct config_llog_instance cfg;
                  int len;
  
-                if (!lmd->lmd_mds) {
+                if (lmd->lmd_mds[0] == '\0') {
                          CERROR("no mds name\n");
                          GOTO(out_free, err = -EINVAL);
                  }
  
                  OBD_ALLOC(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
-                if (sbi->ll_lmd == NULL) 
+                if (sbi->ll_lmd == NULL)
                          GOTO(out_free, err = -ENOMEM);
                  memcpy(sbi->ll_lmd, lmd, sizeof(*lmd));
  
                  /* generate a string unique to this super, let's try
                   the address of the super itself.*/
-                len = (sizeof(sb) * 2) + 1; 
+                len = (sizeof(sb) * 2) + 1;
                  OBD_ALLOC(sbi->ll_instance, len);
-                if (sbi->ll_instance == NULL) 
+                if (sbi->ll_instance == NULL)
                          GOTO(out_free, err = -ENOMEM);
                  sprintf(sbi->ll_instance, "%p", sb);
  
                  cfg.cfg_instance = sbi->ll_instance;
                  cfg.cfg_uuid = sbi->ll_sb_uuid;
                  cfg.cfg_local_nid = lmd->lmd_local_nid;
-                err = lustre_process_log(lmd, lmd->lmd_profile, &cfg);
+                err = lustre_process_log(lmd, lmd->lmd_profile, &cfg, 0);
                  if (err < 0) {
                          CERROR("Unable to process log: %s\n", lmd->lmd_profile);
-
                          GOTO(out_free, err);
                  }
  
@@ -549,13 +609,13 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                  }
                  if (osc)
                          OBD_FREE(osc, strlen(osc) + 1);
-                OBD_ALLOC(osc, strlen(lprof->lp_osc) + 
+                OBD_ALLOC(osc, strlen(lprof->lp_osc) +
                            strlen(sbi->ll_instance) + 2);
                  sprintf(osc, "%s-%s", lprof->lp_osc, sbi->ll_instance);
  
                  if (mdc)
                          OBD_FREE(mdc, strlen(mdc) + 1);
-                OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + 
+                OBD_ALLOC(mdc, strlen(lprof->lp_mdc) +
                            strlen(sbi->ll_instance) + 2);
                  sprintf(mdc, "%s-%s", lprof->lp_mdc, sbi->ll_instance);
          }
@@ -569,12 +629,12 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                  CERROR("no mdc\n");
                  GOTO(out_free, err = -EINVAL);
          }
-        
+
          err = lustre_common_fill_super(sb, mdc, osc);
-        
+
          if (err)
                  GOTO(out_free, err);
-
+        
  out_dev:
          if (mdc)
                  OBD_FREE(mdc, strlen(mdc) + 1);
@@ -589,6 +649,7 @@ out_free:
                  int err;
  
                  if (sbi->ll_instance != NULL) {
+                        struct lustre_mount_data *lmd = sbi->ll_lmd;
                          char * cln_prof;
                          struct config_llog_instance cfg;
  
@@ -596,10 +657,10 @@ out_free:
                          cfg.cfg_uuid = sbi->ll_sb_uuid;
  
                          OBD_ALLOC(cln_prof, len);
-                        sprintf(cln_prof, "%s-clean", sbi->ll_lmd->lmd_profile);
+                        sprintf(cln_prof, "%s-clean", lmd->lmd_profile);
  
-                        err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg);
-                        if (err < 0) 
+                        err = lustre_process_log(lmd, cln_prof, &cfg, 0);
+                        if (err < 0)
                                  CERROR("Unable to process log: %s\n", cln_prof);
                          OBD_FREE(cln_prof, len);
                          OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance)+ 1);
@@ -611,32 +672,76 @@ out_free:
          goto out_dev;
  } /* lustre_fill_super */
  
+static void lustre_manual_cleanup(struct ll_sb_info *sbi)
+{
+        struct lustre_cfg lcfg;
+        struct obd_device *obd;
+        int next = 0;
+
+        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
+        {
+                int err;
+
+                LCFG_INIT(lcfg, LCFG_CLEANUP, obd->obd_name);
+                err = class_process_config(&lcfg);
+                if (err) {
+                        CERROR("cleanup failed: %s\n", obd->obd_name);
+                        //continue;
+                }
+
+                LCFG_INIT(lcfg, LCFG_DETACH, obd->obd_name);
+                err = class_process_config(&lcfg);
+                if (err) {
+                        CERROR("detach failed: %s\n", obd->obd_name);
+                        //continue;
+                }
+        }
+
+        if (sbi->ll_lmd != NULL)
+                class_del_profile(sbi->ll_lmd->lmd_profile);
+}
+
  void lustre_put_super(struct super_block *sb)
  {
+        struct obd_device *obd;
          struct ll_sb_info *sbi = ll_s2sbi(sb);
+        int force_umount = 0;
          ENTRY;
  
          CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
+        obd = class_exp2obd(sbi->ll_mdc_exp);
+        if (obd)
+                force_umount = obd->obd_no_recov;
+        obd = NULL;
  
          lustre_common_put_super(sb);
-
          if (sbi->ll_lmd != NULL) {
                  char * cln_prof;
                  int len = strlen(sbi->ll_lmd->lmd_profile) + sizeof("-clean")+1;
                  int err;
                  struct config_llog_instance cfg;
  
+                if (force_umount) {
+                        CERROR("force umount, doing manual cleanup\n");
+                        lustre_manual_cleanup(sbi);
+                        GOTO(free_lmd, 0);
+                }
+
                  cfg.cfg_instance = sbi->ll_instance;
                  cfg.cfg_uuid = sbi->ll_sb_uuid;
  
                  OBD_ALLOC(cln_prof, len);
                  sprintf(cln_prof, "%s-clean", sbi->ll_lmd->lmd_profile);
  
-                err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg);
-                if (err < 0)
-                        CERROR("Unable to process log: %s\n", cln_prof);
+                err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg, 0);
+                if (err < 0) {
+                        CERROR("Unable to process log: %s, doing manual cleanup"
+                               "\n", cln_prof);
+                        lustre_manual_cleanup(sbi);
+                }
  
                  OBD_FREE(cln_prof, len);
+        free_lmd:
                  OBD_FREE(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
                  OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance) + 1);
          }
@@ -646,26 +751,101 @@ void lustre_put_super(struct super_block *sb)
          EXIT;
  } /* lustre_put_super */
  
+int ll_process_config_update(struct ll_sb_info *sbi, int clean)
+{
+        struct obd_export *mdc_exp = sbi->ll_mdc_exp;
+        struct lustre_mount_data *lmd = sbi->ll_lmd;
+        struct llog_ctxt *ctxt;
+        struct config_llog_instance cfg;
+        char *profile = lmd->lmd_profile, *name = NULL;
+        int rc, namelen =  0, version;
+        ENTRY;
+
+        if (profile == NULL)
+                RETURN(0);
+        if (lmd == NULL) {
+                CERROR("Client not mounted with zero-conf; cannot process "
+                       "update log.\n");
+                RETURN(0);
+        }
+
+        rc = ldlm_cli_cancel_unused(mdc_exp->exp_obd->obd_namespace, NULL,
+                                    LDLM_FL_CONFIG_CHANGE, NULL);
+        if (rc != 0)
+                CWARN("ldlm_cli_cancel_unused(mdc): %d\n", rc);
+
+        rc = obd_cancel_unused(sbi->ll_osc_exp, NULL, LDLM_FL_CONFIG_CHANGE,
+                               NULL);
+        if (rc != 0)
+                CWARN("obd_cancel_unused(lov): %d\n", rc);
+
+        cfg.cfg_instance = sbi->ll_instance;
+        cfg.cfg_uuid = sbi->ll_sb_uuid;
+        cfg.cfg_local_nid = lmd->lmd_local_nid;
+
+        namelen = strlen(profile) + 20; /* -clean-######### */
+        OBD_ALLOC(name, namelen);
+        if (name == NULL)
+                RETURN(-ENOMEM);
+
+        if (clean) {
+                version = sbi->ll_config_version - 1;
+                sprintf(name, "%s-clean-%d", profile, version);
+        } else {
+                version = sbi->ll_config_version + 1;
+                sprintf(name, "%s-%d", profile, version);
+        }
+
+        CWARN("Applying configuration log %s\n", name);
+
+        ctxt = llog_get_context(&mdc_exp->exp_obd->obd_llogs,
+                                LLOG_CONFIG_REPL_CTXT);
+        rc = class_config_process_llog(ctxt, name, &cfg);
+        if (rc == 0)
+                sbi->ll_config_version = version;
+        CWARN("Finished applying configuration log %s: %d\n", name, rc);
+
+        if (rc == 0 && clean == 0) {
+                struct lov_desc desc;
+                int rc, valsize;
+                valsize = sizeof(desc);
+                rc = obd_get_info(sbi->ll_osc_exp, strlen("lovdesc") + 1,
+                                  "lovdesc", &valsize, &desc);
+
+                rc = obd_init_ea_size(mdc_exp,
+                                      obd_size_diskmd(sbi->ll_osc_exp, NULL),
+                                      (desc.ld_tgt_count *
+                                       sizeof(struct llog_cookie)));
+        }
+        OBD_FREE(name, namelen);
+        RETURN(rc);
+}
  
  struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
  {
-        struct inode *inode;
+        struct inode *inode = NULL;
          l_lock(&lock->l_resource->lr_namespace->ns_lock);
-        if (lock->l_ast_data)
-                inode = igrab(lock->l_ast_data);
-        else
-                inode = NULL;
+        if (lock->l_ast_data) {
+                struct ll_inode_info *lli = ll_i2info(lock->l_ast_data);
+                if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
+                        inode = igrab(lock->l_ast_data);
+                } else {
+                        CERROR("DEBUG: l_ast_data %p is bogus: magic %x\n",
+                               lock->l_ast_data, lli->lli_inode_magic);
+                }
+        }
          l_unlock(&lock->l_resource->lr_namespace->ns_lock);
          return inode;
  }
  
-static int null_if_equal(struct ldlm_lock *lock, void *data)
+int null_if_equal(struct ldlm_lock *lock, void *data)
  {
-        if (data == lock->l_ast_data)
+        if (data == lock->l_ast_data) {
                  lock->l_ast_data = NULL;
  
-        if (lock->l_req_mode != lock->l_granted_mode)
-                return LDLM_ITER_STOP;
+                if (lock->l_req_mode != lock->l_granted_mode)
+                        LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
+        }
  
          return LDLM_ITER_CONTINUE;
  }
@@ -680,9 +860,10 @@ void ll_clear_inode(struct inode *inode)
          CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
                 inode->i_generation, inode);
  
+        lli->lli_inode_magic = LLI_INODE_DEAD;
          ll_inode2fid(&fid, inode);
          clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(ll_i2info(inode)->lli_flags));
-        mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
+        md_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
  
          if (lli->lli_smd)
                  obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
@@ -693,6 +874,13 @@ void ll_clear_inode(struct inode *inode)
                  lli->lli_smd = NULL;
          }
  
+        if (lli->lli_mea) {
+                /* FIXME: change cbdata for mea here */
+                obd_free_memmd(sbi->ll_mdc_exp,
+                               (struct lov_stripe_md **) &lli->lli_mea);
+                lli->lli_mea = NULL;
+        }
+
          if (lli->lli_symlink_name) {
                  OBD_FREE(lli->lli_symlink_name,
                           strlen(lli->lli_symlink_name) + 1);
@@ -772,17 +960,17 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                  struct lustre_md md;
                  ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
  
-                rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
-                                 attr, NULL, 0, NULL, 0, &request);
-
+                rc = md_setattr(sbi->ll_mdc_exp, &op_data,
+                                attr, NULL, 0, NULL, 0, &request);
                  if (rc) {
                          ptlrpc_req_finished(request);
                          if (rc != -EPERM && rc != -EACCES)
-                                CERROR("mdc_setattr fails: rc = %d\n", rc);
+                                CERROR("md_setattr fails: rc = %d\n", rc);
                          RETURN(rc);
                  }
  
-                rc = mdc_req2lustre_md(request, 0, sbi->ll_osc_exp, &md);
+                rc = mdc_req2lustre_md(sbi->ll_mdc_exp, request, 0, 
+                                       sbi->ll_osc_exp, &md);
                  if (rc) {
                          ptlrpc_req_finished(request);
                          RETURN(rc);
@@ -791,7 +979,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                  /* Won't invoke vmtruncate as we already cleared ATTR_SIZE,
                   * but needed to set timestamps backwards on utime. */
                  inode_setattr(inode, attr);
-                ll_update_inode(inode, md.body, md.lsm);
+                ll_update_inode(inode, &md);
                  ptlrpc_req_finished(request);
  
                  if (!lsm || !S_ISREG(inode->i_mode)) {
@@ -807,13 +995,13 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                          /* from sys_utime() */
                          if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
                                  if (current->fsuid != inode->i_uid &&
-                                    (rc = ll_permission(inode, MAY_WRITE, NULL)) != 0)
+                                    (rc=ll_permission(inode,MAY_WRITE,NULL))!=0)
                                          RETURN(rc);
                          } else {
-                               /* from inode_change_ok() */
-                               if (current->fsuid != inode->i_uid &&
-                                   !capable(CAP_FOWNER))
-                                       RETURN(-EPERM);
+                                /* from inode_change_ok() */
+                                if (current->fsuid != inode->i_uid &&
+                                    !capable(CAP_FOWNER))
+                                        RETURN(-EPERM);
                          }
                  }
  
@@ -825,11 +1013,11 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
           * If we don't we can race with other i_size updaters on our node, like
           * ll_file_read.  We can also race with i_size propogation to other
           * nodes through dirtying and writeback of final cached pages.  This
-         * last one is especially bad for racing o_append users on other 
+         * last one is especially bad for racing o_append users on other
           * nodes. */
          if (ia_valid & ATTR_SIZE) {
-                struct ldlm_extent extent = { .start = attr->ia_size,
-                                              .end = OBD_OBJECT_EOF };
+                ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
+                                                           OBD_OBJECT_EOF } };
                  struct lustre_handle lockh = { 0 };
                  int err, ast_flags = 0;
                  /* XXX when we fix the AST intents to pass the discard-range
@@ -841,22 +1029,21 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                  /* bug 1639: avoid write/truncate i_sem/DLM deadlock */
                  LASSERT(atomic_read(&inode->i_sem.count) <= 0);
                  up(&inode->i_sem);
-                rc = ll_extent_lock_no_validate(NULL, inode, lsm, LCK_PW,
-                                                &extent, &lockh, ast_flags);
+                rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy, &lockh,
+                                    ast_flags);
                  down(&inode->i_sem);
-                if (rc != ELDLM_OK)
+                if (rc != 0)
                          RETURN(rc);
  
                  rc = vmtruncate(inode, attr->ia_size);
-                if (rc == 0)
-                        set_bit(LLI_F_HAVE_OST_SIZE_LOCK,
-                                &ll_i2info(inode)->lli_flags);
-
-                //ll_try_done_writing(inode);
  
+                /* We need to drop the semaphore here, because this unlock may
+                 * result in a cancellation, which will need the i_sem */
+                up(&inode->i_sem);
                  /* unlock now as we don't mind others file lockers racing with
                   * the mds updates below? */
                  err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
+                down(&inode->i_sem);
                  if (err) {
                          CERROR("ll_extent_unlock failed: %d\n", err);
                          if (!rc)
@@ -868,7 +1055,8 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                  CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
                         inode->i_ino, LTIME_S(attr->ia_mtime));
                  oa.o_id = lsm->lsm_object_id;
-                oa.o_valid = OBD_MD_FLID;
+                oa.o_gr = lsm->lsm_object_gr;
+                oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
                  obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
                                              OBD_MD_FLMTIME | OBD_MD_FLCTIME);
                  rc = obd_setattr(sbi->ll_osc_exp, &oa, lsm, NULL);
@@ -898,6 +1086,8 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
                  RETURN(rc);
          }
  
+        osfs->os_type = sb->s_magic;
+
          CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
                 osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
  
@@ -962,27 +1152,25 @@ int ll_statfs(struct super_block *sb, struct kstatfs *sfs)
          return 0;
  }
  
-void dump_lsm(int level, struct lov_stripe_md *lsm)
-{
-        CDEBUG(level, "objid "LPX64", maxbytes "LPX64", magic 0x%08X, "
-               "stripe_size %u, stripe_count %u\n",
-               lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic,
-               lsm->lsm_stripe_size, lsm->lsm_stripe_count);
-}
-
-void ll_update_inode(struct inode *inode, struct mds_body *body,
-                     struct lov_stripe_md *lsm)
+void ll_update_inode(struct inode *inode, struct lustre_md *md)
  {
          struct ll_inode_info *lli = ll_i2info(inode);
+        struct mds_body *body = md->body;
+        struct lov_stripe_md *lsm = md->lsm;
+        struct mea *mea = md->mea;
+        ENTRY;
  
-        LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
+        LASSERT((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
+        LASSERT((mea != NULL) == ((body->valid & OBD_MD_FLDIREA) != 0));
          if (lsm != NULL) {
+                LASSERT(lsm->lsm_object_gr > 0);
                  if (lli->lli_smd == NULL) {
                          lli->lli_smd = lsm;
                          lli->lli_maxbytes = lsm->lsm_maxbytes;
                          if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
                                  lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
                  } else {
+                        int i;
                          if (memcmp(lli->lli_smd, lsm, sizeof(*lsm))) {
                                  CERROR("lsm mismatch for inode %ld\n",
                                         inode->i_ino);
@@ -992,11 +1180,42 @@ void ll_update_inode(struct inode *inode, struct mds_body *body,
                                  dump_lsm(D_ERROR, lsm);
                                  LBUG();
                          }
+                        /* XXX FIXME -- We should decide on a safer (atomic) and
+                         * more elegant way to update the lsm */
+                        for (i = 0; i < lsm->lsm_stripe_count; i++) {
+                                lli->lli_smd->lsm_oinfo[i].loi_id =
+                                        lsm->lsm_oinfo[i].loi_id;
+                                lli->lli_smd->lsm_oinfo[i].loi_gr =
+                                        lsm->lsm_oinfo[i].loi_gr;
+                                lli->lli_smd->lsm_oinfo[i].loi_ost_idx =
+                                        lsm->lsm_oinfo[i].loi_ost_idx;
+                                lli->lli_smd->lsm_oinfo[i].loi_ost_gen =
+                                        lsm->lsm_oinfo[i].loi_ost_gen;
+                        }
                  }
+                /* bug 2844 - limit i_blksize for broken user-space apps */
+                LASSERTF(lsm->lsm_xfersize != 0, "%lu\n", lsm->lsm_xfersize);
+                inode->i_blksize = min(lsm->lsm_xfersize, LL_MAX_BLKSIZE);
                  if (lli->lli_smd != lsm)
                          obd_free_memmd(ll_i2obdexp(inode), &lsm);
          }
  
+        if (mea != NULL) {
+                if (lli->lli_mea == NULL) {
+                        lli->lli_mea = mea;
+                } else {
+                        if (memcmp(lli->lli_mea, mea, body->eadatasize)) {
+                                CERROR("mea mismatch for inode %lu\n",
+                                        inode->i_ino);
+                                LBUG();
+                        }
+                }
+                if (lli->lli_mea != mea)
+                        obd_free_memmd(ll_i2mdcexp(inode),
+                                        (struct lov_stripe_md **) &mea);
+                        
+        }
+
          if (body->valid & OBD_MD_FLID)
                  inode->i_ino = body->ino;
          if (body->valid & OBD_MD_FLATIME)
@@ -1037,8 +1256,19 @@ void ll_update_inode(struct inode *inode, struct mds_body *body,
  
          if (body->valid & OBD_MD_FLSIZE)
                  set_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
+
+        lli->lli_mds = body->mds;
+        inode->i_dev = (kdev_t) body->mds;
+        LASSERT(body->mds < 1000);
  }
  
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+static struct backing_dev_info ll_backing_dev_info = {
+        .ra_pages       = 0,    /* No readahead */
+        .memory_backed  = 0,    /* Does contribute to dirty memory */
+};
+#endif
+
  void ll_read_inode2(struct inode *inode, void *opaque)
  {
          struct lustre_md *md = opaque;
@@ -1059,7 +1289,9 @@ void ll_read_inode2(struct inode *inode, void *opaque)
          LTIME_S(inode->i_mtime) = 0;
          LTIME_S(inode->i_atime) = 0;
          LTIME_S(inode->i_ctime) = 0;
-        ll_update_inode(inode, md->body, md->lsm);
+
+        inode->i_rdev = 0;
+        ll_update_inode(inode, md);
  
          /* OIDEBUG(inode); */
  
@@ -1078,16 +1310,57 @@ void ll_read_inode2(struct inode *inode, void *opaque)
                  EXIT;
          } else {
                  inode->i_op = &ll_special_inode_operations;
+
  #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
                  init_special_inode(inode, inode->i_mode,
                                     kdev_t_to_nr(inode->i_rdev));
+
+                /* initializing backing dev info. */
+                inode->i_mapping->backing_dev_info = &ll_backing_dev_info;
  #else
                  init_special_inode(inode, inode->i_mode, inode->i_rdev);
  #endif
+                lli->ll_save_ifop = inode->i_fop;
+
+                if (S_ISCHR(inode->i_mode))
+                        inode->i_fop = &ll_special_chr_inode_fops;
+                else if (S_ISBLK(inode->i_mode))
+                        inode->i_fop = &ll_special_blk_inode_fops;
+                else if (S_ISFIFO(inode->i_mode))
+                        inode->i_fop = &ll_special_fifo_inode_fops;
+                else if (S_ISSOCK(inode->i_mode))
+                        inode->i_fop = &ll_special_sock_inode_fops;
+
+                CWARN("saved %p, replaced with %p\n", lli->ll_save_ifop,
+                      inode->i_fop);
+
+                if (lli->ll_save_ifop->owner) {
+                        CWARN("%p has owner %p\n", lli->ll_save_ifop,
+                              lli->ll_save_ifop->owner);
+                }
                  EXIT;
          }
  }
  
+void ll_delete_inode(struct inode *inode)
+{
+        int rc;
+        struct ll_fid fid;
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        ENTRY;
+        
+        ll_inode2fid(&fid, inode);
+
+        rc = md_delete_object(sbi->ll_mdc_exp, &fid);
+        if (rc) {
+                CERROR("md_delete_object() failed, error %d.\n",
+                       rc);
+        }
+
+        clear_inode(inode);
+        EXIT;
+}
+
  int ll_iocontrol(struct inode *inode, struct file *file,
                   unsigned int cmd, unsigned long arg)
  {
@@ -1103,7 +1376,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
                  struct mds_body *body;
  
                  ll_inode2fid(&fid, inode);
-                rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, 0, &req);
+                rc = md_getattr(sbi->ll_mdc_exp, &fid, valid, 0, &req);
                  if (rc) {
                          CERROR("failure %d inode %lu\n", rc, inode->i_ino);
                          RETURN(-abs(rc));
@@ -1141,26 +1414,27 @@ int ll_iocontrol(struct inode *inode, struct file *file,
                  attr.ia_attr_flags = flags;
                  attr.ia_valid |= ATTR_ATTR_FLAG;
  
-                rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
-                                 &attr, NULL, 0, NULL, 0, &req);
+                rc = md_setattr(sbi->ll_mdc_exp, &op_data,
+                                &attr, NULL, 0, NULL, 0, &req);
                  if (rc) {
                          ptlrpc_req_finished(req);
                          if (rc != -EPERM && rc != -EACCES)
-                                CERROR("mdc_setattr fails: rc = %d\n", rc);
+                                CERROR("md_setattr fails: rc = %d\n", rc);
                          obdo_free(oa);
                          RETURN(rc);
                  }
                  ptlrpc_req_finished(req);
  
                  oa->o_id = lsm->lsm_object_id;
+                oa->o_gr = lsm->lsm_object_gr;
                  oa->o_flags = flags;
-                oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
+                oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
  
                  rc = obd_setattr(sbi->ll_osc_exp, oa, lsm, NULL);
                  obdo_free(oa);
                  if (rc) {
                          if (rc != -EPERM && rc != -EACCES)
-                                CERROR("mdc_setattr fails: rc = %d\n", rc);
+                                CERROR("md_setattr fails: rc = %d\n", rc);
                          RETURN(rc);
                  }
  
@@ -1226,25 +1500,29 @@ void ll_umount_begin(struct super_block *sb)
          EXIT;
  }
  
-int ll_prep_inode(struct obd_export *exp, struct inode **inode,
-                  struct ptlrpc_request *req, int offset,struct super_block *sb)
+int ll_prep_inode(struct obd_export *osc_exp, struct obd_export *mdc_exp,
+                  struct inode **inode, struct ptlrpc_request *req,
+                  int offset, struct super_block *sb)
  {
          struct lustre_md md;
          int rc = 0;
  
-        rc = mdc_req2lustre_md(req, offset, exp, &md);
+        rc = mdc_req2lustre_md(mdc_exp, req, offset, osc_exp, &md);
          if (rc)
                  RETURN(rc);
  
          if (*inode) {
-                ll_update_inode(*inode, md.body, md.lsm);
+                ll_update_inode(*inode, &md);
          } else {
                  LASSERT(sb);
                  *inode = ll_iget(sb, md.body->ino, &md);
                  if (*inode == NULL || is_bad_inode(*inode)) {
                          /* free the lsm if we allocated one above */
                          if (md.lsm != NULL)
-                                obd_free_memmd(exp, &md.lsm);
+                                obd_free_memmd(osc_exp, &md.lsm);
+                        if (md.mea != NULL)
+                                obd_free_memmd(mdc_exp,
+                                               (struct lov_stripe_md**)&md.mea);
                          rc = -ENOMEM;
                          CERROR("new_inode -fatal: rc %d\n", rc);
                  }