LU-1247 obdfilter: fix invalid check of precrate objects

[fs/lustre-release.git] / lustre / obdfilter / filter.c
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c

index 69e06be..75fd01b 100644 (file)
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -28,6 +28,8 @@
  /*
   * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
   * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
   */
  /*
   * This file is part of Lustre, http://www.lustre.org/
@@ -135,12 +137,12 @@ int filter_finish_transno(struct obd_export *exp, struct inode *inode,
          if (!exp->exp_obd->obd_replayable || oti == NULL)
                  RETURN(rc);
  
-        cfs_mutex_down(&ted->ted_lcd_lock);
+        cfs_mutex_lock(&ted->ted_lcd_lock);
          lcd = ted->ted_lcd;
          /* if the export has already been disconnected, we have no last_rcvd slot,
           * update server data with latest transno then */
          if (lcd == NULL) {
-                cfs_mutex_up(&ted->ted_lcd_lock);
+                cfs_mutex_unlock(&ted->ted_lcd_lock);
                  CWARN("commit transaction for disconnected client %s: rc %d\n",
                        exp->exp_client_uuid.uuid, rc);
                  err = filter_update_server_data(exp->exp_obd);
@@ -152,14 +154,25 @@ int filter_finish_transno(struct obd_export *exp, struct inode *inode,
          if (oti->oti_transno == 0) {
                  last_rcvd = le64_to_cpu(lsd->lsd_last_transno) + 1;
                  lsd->lsd_last_transno = cpu_to_le64(last_rcvd);
+                LASSERT(last_rcvd >= le64_to_cpu(lcd->lcd_last_transno));
          } else {
                  last_rcvd = oti->oti_transno;
                  if (last_rcvd > le64_to_cpu(lsd->lsd_last_transno))
                          lsd->lsd_last_transno = cpu_to_le64(last_rcvd);
+                if (unlikely(last_rcvd < le64_to_cpu(lcd->lcd_last_transno))) {
+                        CERROR("Trying to overwrite bigger transno, on-disk: "
+                               LPU64", new: "LPU64"\n",
+                               le64_to_cpu(lcd->lcd_last_transno), last_rcvd);
+                        cfs_spin_lock(&exp->exp_lock);
+                        exp->exp_vbr_failed = 1;
+                        cfs_spin_unlock(&exp->exp_lock);
+                        cfs_spin_unlock(&obt->obt_lut->lut_translock);
+                        cfs_mutex_unlock(&ted->ted_lcd_lock);
+                        RETURN(-EOVERFLOW);
+                }
          }
          oti->oti_transno = last_rcvd;
  
-        LASSERT(last_rcvd >= le64_to_cpu(lcd->lcd_last_transno));
          lcd->lcd_last_transno = cpu_to_le64(last_rcvd);
          lcd->lcd_pre_versions[0] = cpu_to_le64(oti->oti_pre_version);
          lcd->lcd_last_xid = cpu_to_le64(oti->oti_xid);
@@ -196,7 +209,7 @@ int filter_finish_transno(struct obd_export *exp, struct inode *inode,
  
          CDEBUG(log_pri, "wrote trans "LPU64" for client %s at #%d: err = %d\n",
                 last_rcvd, lcd->lcd_uuid, ted->ted_lr_idx, err);
-        cfs_mutex_up(&ted->ted_lcd_lock);
+        cfs_mutex_unlock(&ted->ted_lcd_lock);
          RETURN(rc);
  }
  
@@ -241,7 +254,6 @@ static int lprocfs_init_rw_stats(struct obd_device *obd,
     plus the procfs overhead :( */
  static int filter_export_stats_init(struct obd_device *obd,
                                      struct obd_export *exp,
-                                    int reconnect,
                                      void *client_nid)
  {
          int rc, newnid = 0;
@@ -251,7 +263,7 @@ static int filter_export_stats_init(struct obd_device *obd,
                  /* Self-export gets no proc entry */
                  RETURN(0);
  
-        rc = lprocfs_exp_setup(exp, client_nid, reconnect, &newnid);
+        rc = lprocfs_exp_setup(exp, client_nid, &newnid);
          if (rc) {
                  /* Mask error for already created
                   * /proc entries */
@@ -290,7 +302,6 @@ static int filter_export_stats_init(struct obd_device *obd,
  
          RETURN(0);
   clean:
-        lprocfs_exp_cleanup(exp);
          return rc;
  }
  
@@ -343,7 +354,7 @@ static int filter_client_add(struct obd_device *obd, struct obd_export *exp,
          ted->ted_lr_idx = cl_idx;
          ted->ted_lr_off = le32_to_cpu(lsd->lsd_client_start) +
                            cl_idx * le16_to_cpu(lsd->lsd_client_size);
-        cfs_init_mutex(&ted->ted_lcd_lock);
+        cfs_mutex_init(&ted->ted_lcd_lock);
          LASSERTF(ted->ted_lr_off > 0, "ted_lr_off = %llu\n", ted->ted_lr_off);
  
          CDEBUG(D_INFO, "client at index %d (%llu) with UUID '%s' added\n",
@@ -444,12 +455,12 @@ static int filter_client_del(struct obd_export *exp)
           * be in server data or in client data in case of failure */
          filter_update_server_data(exp->exp_obd);
  
-        cfs_mutex_down(&ted->ted_lcd_lock);
+        cfs_mutex_lock(&ted->ted_lcd_lock);
          memset(ted->ted_lcd->lcd_uuid, 0, sizeof ted->ted_lcd->lcd_uuid);
          rc = fsfilt_write_record(exp->exp_obd, obt->obt_rcvd_filp,
                                   ted->ted_lcd,
                                   sizeof(*ted->ted_lcd), &off, 0);
-        cfs_mutex_up(&ted->ted_lcd_lock);
+        cfs_mutex_unlock(&ted->ted_lcd_lock);
          pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
  
          CDEBUG(rc == 0 ? D_INFO : D_ERROR,
@@ -636,17 +647,28 @@ static void filter_fmd_cleanup(struct obd_export *exp)
  static int filter_init_export(struct obd_export *exp)
  {
          int rc;
+        ENTRY;
+
          cfs_spin_lock_init(&exp->exp_filter_data.fed_lock);
          CFS_INIT_LIST_HEAD(&exp->exp_filter_data.fed_mod_list);
  
          cfs_spin_lock(&exp->exp_lock);
          exp->exp_connecting = 1;
          cfs_spin_unlock(&exp->exp_lock);
+
+        /* self-export doesn't need client data and ldlm initialization */
+        if (unlikely(obd_uuid_equals(&exp->exp_obd->obd_uuid,
+                                     &exp->exp_client_uuid)))
+                RETURN(0);
+
          rc = lut_client_alloc(exp);
          if (rc == 0)
                  rc = ldlm_init_export(exp);
+        if (rc)
+                CERROR("%s: Can't initialize export: rc %d\n",
+                       exp->exp_obd->obd_name, rc);
  
-        return rc;
+        RETURN(rc);
  }
  
  static int filter_free_server_data(struct obd_device_target *obt)
@@ -870,7 +892,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
                  fed = &exp->exp_filter_data;
                  *fed->fed_ted.ted_lcd = *lcd;
                  fed->fed_group = 0; /* will be assigned at connect */
-                filter_export_stats_init(obd, exp, 0, NULL);
+                filter_export_stats_init(obd, exp, NULL);
                  rc = filter_client_add(obd, exp, cl_idx);
                  /* can't fail for existing client */
                  LASSERTF(rc == 0, "rc = %d\n", rc);
@@ -892,6 +914,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
          obd->obd_last_committed = le64_to_cpu(lsd->lsd_last_transno);
  out:
          obd->u.obt.obt_mount_count = mount_count + 1;
+        obd->u.obt.obt_instance = (__u32)obd->u.obt.obt_mount_count;
          lsd->lsd_mount_count = cpu_to_le64(obd->u.obt.obt_mount_count);
  
          /* save it, so mount count and last_transno is current */
@@ -1175,14 +1198,14 @@ static int filter_read_groups(struct obd_device *obd, int last_group,
          struct filter_obd *filter = &obd->u.filter;
          int old_count, group, rc = 0;
  
-        cfs_down(&filter->fo_init_lock);
+        cfs_mutex_lock(&filter->fo_init_lock);
          old_count = filter->fo_group_count;
          for (group = old_count; group <= last_group; group++) {
                  rc = filter_read_group_internal(obd, group, create);
                  if (rc != 0)
                          break;
          }
-        cfs_up(&filter->fo_init_lock);
+        cfs_mutex_unlock(&filter->fo_init_lock);
          return rc;
  }
  
@@ -1196,12 +1219,12 @@ static int filter_prep_groups(struct obd_device *obd)
          loff_t off = 0;
          ENTRY;
  
-        O_dentry = simple_mkdir(current->fs->pwd, obd->u.obt.obt_vfsmnt,
+        O_dentry = simple_mkdir(cfs_fs_pwd(current->fs), obd->u.obt.obt_vfsmnt,
                                  "O", 0700, 1);
-        CDEBUG(D_INODE, "got/created O: %p\n", O_dentry);
+        CDEBUG(D_INODE, "%s: got/created O: %p\n", obd->obd_name, O_dentry);
          if (IS_ERR(O_dentry)) {
                  rc = PTR_ERR(O_dentry);
-                CERROR("cannot open/create O: rc = %d\n", rc);
+                CERROR("%s: cannot open/create O: rc = %d\n", obd->obd_name,rc);
                  GOTO(cleanup, rc);
          }
          filter->fo_dentry_O = O_dentry;
@@ -1211,22 +1234,24 @@ static int filter_prep_groups(struct obd_device *obd)
           * clients because they may send create/destroy for any group -bzzz */
          filp = filp_open("LAST_GROUP", O_CREAT | O_RDWR, 0700);
          if (IS_ERR(filp)) {
-                CERROR("cannot create LAST_GROUP: rc = %ld\n", PTR_ERR(filp));
+                CERROR("%s: cannot create LAST_GROUP: rc = %ld\n",
+                       obd->obd_name, PTR_ERR(filp));
                  GOTO(cleanup, rc = PTR_ERR(filp));
          }
          cleanup_phase = 2; /* filp */
  
          rc = fsfilt_read_record(obd, filp, &last_group, sizeof(__u32), &off);
          if (rc) {
-                CDEBUG(D_INODE, "error reading LAST_GROUP: rc %d\n", rc);
+                CERROR("%s: error reading LAST_GROUP: rc %d\n",
+                       obd->obd_name, rc);
                  GOTO(cleanup, rc);
          }
  
          if (off == 0)
                  last_group = FID_SEQ_OST_MDT0;
  
-        CWARN("%s: initialize groups [%d,%d]\n", obd->obd_name,
-              FID_SEQ_OST_MDT0, last_group);
+        CDEBUG(D_INODE, "%s: initialize group %u (max %u)\n", obd->obd_name,
+               FID_SEQ_OST_MDT0, last_group);
          filter->fo_committed_group = last_group;
          rc = filter_read_groups(obd, last_group, 1);
          if (rc)
@@ -1407,7 +1432,9 @@ struct dentry *filter_parent(struct obd_device *obd, obd_seq group, obd_id objid
  {
          struct filter_obd *filter = &obd->u.filter;
          struct filter_subdirs *subdirs;
-        LASSERT(group < filter->fo_group_count); /* FIXME: object groups */
+
+        if (group >= filter->fo_group_count) /* FIXME: object groups */
+                return ERR_PTR(-EBADF);
  
          if (!fid_seq_is_mdt(group) || filter->fo_subdir_count == 0)
                  return filter->fo_dentry_O_groups[group];
@@ -1461,7 +1488,7 @@ struct dentry *filter_fid2dentry(struct obd_device *obd,
              obd->u.filter.fo_destroys_in_progress == 0) {
                  /* don't fail lookups for orphan recovery, it causes
                   * later LBUGs when objects still exist during precreate */
-                CDEBUG(D_INFO, "*** obd_fail_loc=%x ***\n",OBD_FAIL_OST_ENOENT);
+                CDEBUG(D_INFO, "*** cfs_fail_loc=%x ***\n",OBD_FAIL_OST_ENOENT);
                  RETURN(ERR_PTR(-ENOENT));
          }
          if (id == 0) {
@@ -1481,7 +1508,8 @@ struct dentry *filter_fid2dentry(struct obd_device *obd,
          }
          CDEBUG(D_INODE, "looking up object O/%.*s/%s\n",
                 dparent->d_name.len, dparent->d_name.name, name);
-        dchild = /*ll_*/lookup_one_len(name, dparent, len);
+        /* dparent is already locked here, so we cannot use ll_lookup_one_len() */
+        dchild = lookup_one_len(name, dparent, len);
          if (dir_dentry == NULL)
                  filter_parent_unlock(dparent);
          if (IS_ERR(dchild)) {
@@ -1562,15 +1590,13 @@ int filter_vfs_unlink(struct inode *dir, struct dentry *dentry,
                  GOTO(out, rc = -EPERM);
  
          /* check_sticky() */
-        if ((dentry->d_inode->i_uid != current->fsuid &&
+        if ((dentry->d_inode->i_uid != cfs_curproc_fsuid() &&
               !cfs_capable(CFS_CAP_FOWNER)) || IS_APPEND(dentry->d_inode) ||
              IS_IMMUTABLE(dentry->d_inode))
                  GOTO(out, rc = -EPERM);
  
-        /* NOTE: This might need to go outside i_mutex, though it isn't clear if
-         *       that was done because of journal_start (which is already done
-         *       here) or some other ordering issue. */
-        DQUOT_INIT(dir);
+        /* Locking order: i_mutex -> journal_lock -> dqptr_sem. LU-952 */
+        ll_vfs_dq_init(dir);
  
          rc = ll_security_inode_unlink(dir, dentry, mnt);
          if (rc)
@@ -1596,7 +1622,10 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
          struct inode *inode = dchild->d_inode;
          int rc;
  
-        if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) {
+        /* There should be 2 references to the inode:
+         *  1) taken by filter_prepare_destroy
+         *  2) taken by filter_destroy */
+        if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 2) {
                  CERROR("destroying objid %.*s ino %lu nlink %lu count %d\n",
                         dchild->d_name.len, dchild->d_name.name, inode->i_ino,
                         (unsigned long)inode->i_nlink,
@@ -1661,7 +1690,6 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
                                  struct ldlm_lock **lockp, void *req_cookie,
                                  ldlm_mode_t mode, int flags, void *data)
  {
-        CFS_LIST_HEAD(rpc_list);
          struct ptlrpc_request *req = req_cookie;
          struct ldlm_lock *lock = *lockp, *l = NULL;
          struct ldlm_resource *res = lock->l_resource;
@@ -1700,29 +1728,23 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
           * lock, and should not be granted if the lock will be blocked.
           */
  
-        LASSERT(ns == res->lr_namespace);
-        lock_res(res);
-        rc = policy(lock, &tmpflags, 0, &err, &rpc_list);
-        check_res_locked(res);
+        if (flags & LDLM_FL_BLOCK_NOWAIT) {
+                OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_AGL_DELAY, 5);
  
-        /* FIXME: we should change the policy function slightly, to not make
-         * this list at all, since we just turn around and free it */
-        while (!cfs_list_empty(&rpc_list)) {
-                struct ldlm_lock *wlock =
-                        cfs_list_entry(rpc_list.next, struct ldlm_lock,
-                                       l_cp_ast);
-                LASSERT((lock->l_flags & LDLM_FL_AST_SENT) == 0);
-                LASSERT(lock->l_flags & LDLM_FL_CP_REQD);
-                lock->l_flags &= ~LDLM_FL_CP_REQD;
-                cfs_list_del_init(&wlock->l_cp_ast);
-                LDLM_LOCK_RELEASE(wlock);
+                if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_AGL_NOLOCK))
+                        RETURN(ELDLM_LOCK_ABORTED);
          }
  
+        LASSERT(ns == ldlm_res_to_ns(res));
+        lock_res(res);
+        rc = policy(lock, &tmpflags, 0, &err, NULL);
+        check_res_locked(res);
+
          /* The lock met with no resistance; we're finished. */
          if (rc == LDLM_ITER_CONTINUE) {
                  /* do not grant locks to the liblustre clients: they cannot
                   * handle ASTs robustly.  We need to do this while still
-                 * holding ns_lock to avoid the lock remaining on the res_link
+                 * holding lr_lock to avoid the lock remaining on the res_link
                   * list (and potentially being added to l_pending_list by an
                   * AST) when we are going to drop this lock ASAP. */
                  if (lock->l_export->exp_libclient ||
@@ -1734,6 +1756,12 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
                  }
                  unlock_res(res);
                  RETURN(err);
+        } else if (flags & LDLM_FL_BLOCK_NOWAIT) {
+                /* LDLM_FL_BLOCK_NOWAIT means it is for AGL. Do not send glimpse
+                 * callback for glimpse size. The real size user will trigger
+                 * the glimpse callback when necessary. */
+                unlock_res(res);
+                RETURN(ELDLM_LOCK_ABORTED);
          }
  
          /* Do not grant any lock, but instead send GL callbacks.  The extent
@@ -1745,11 +1773,11 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
          *reply_lvb = *res_lvb;
  
          /*
-         * ->ns_lock guarantees that no new locks are granted, and,
+         * lr_lock guarantees that no new locks are granted, and,
           * therefore, that res->lr_lvb_data cannot increase beyond the
           * end of already granted lock. As a result, it is safe to
           * check against "stale" reply_lvb->lvb_size value without
-         * res->lr_lvb_sem.
+         * res->lr_lvb_mutex.
           */
          arg.size = reply_lvb->lvb_size;
          arg.victim = &l;
@@ -1798,13 +1826,6 @@ static int filter_intent_policy(struct ldlm_namespace *ns,
  
          LASSERTF(l->l_glimpse_ast != NULL, "l == %p", l);
          rc = l->l_glimpse_ast(l, NULL); /* this will update the LVB */
-        /* Update the LVB from disk if the AST failed (this is a legal race) */
-        /*
-         * XXX nikita: situation when ldlm_server_glimpse_ast() failed before
-         * sending ast is not handled. This can result in lost client writes.
-         */
-        if (rc != 0)
-                ldlm_res_lvbo_update(res, NULL, 1);
  
          lock_res(res);
          *reply_lvb = *res_lvb;
@@ -1948,12 +1969,13 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
  {
          struct filter_obd *filter = &obd->u.filter;
          struct vfsmount *mnt;
+        struct file_system_type *type;
          struct lustre_mount_info *lmi;
          struct obd_uuid uuid;
          __u8 *uuid_ptr;
          char *str, *label;
          char ns_name[48];
-        request_queue_t *q;
+        struct request_queue *q;
          int rc, i;
          ENTRY;
  
@@ -1969,20 +1991,17 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
                  struct lustre_sb_info *lsi = s2lsi(lmi->lmi_sb);
                  mnt = lmi->lmi_mnt;
                  obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
-
-                /* gets recovery timeouts from mount data */
-                if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_soft)
-                        obd->obd_recovery_timeout =
-                                lsi->lsi_lmd->lmd_recovery_time_soft;
-                if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_hard)
-                        obd->obd_recovery_time_hard =
-                                lsi->lsi_lmd->lmd_recovery_time_hard;
          } else {
                  /* old path - used by lctl */
                  CERROR("Using old MDS mount method\n");
-                mnt = ll_kern_mount(lustre_cfg_string(lcfg, 2),
-                                    MS_NOATIME|MS_NODIRATIME,
-                                    lustre_cfg_string(lcfg, 1), option);
+                type = get_fs_type(lustre_cfg_string(lcfg, 2));
+                if (!type) {
+                        CERROR("get_fs_type failed\n");
+                        RETURN(-ENODEV);
+                }
+                mnt = vfs_kern_mount(type, MS_NOATIME|MS_NODIRATIME,
+                                     lustre_cfg_string(lcfg, 1), option);
+                cfs_module_put(type->owner);
                  if (IS_ERR(mnt)) {
                          rc = PTR_ERR(mnt);
                          LCONSOLE_ERROR_MSG(0x135, "Can't mount disk %s (%d)\n",
@@ -2008,6 +2027,9 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
          /* failover is the default */
          obd->obd_replayable = 1;
  
+        /* disable connection until configuration finishes */
+        obd->obd_no_conn = 1;
+
          if (lcfg->lcfg_bufcount > 3 && LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
                  str = lustre_cfg_string(lcfg, 3);
                  if (strchr(str, 'n')) {
@@ -2016,13 +2038,15 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
                  }
          }
  
+        obd->u.obt.obt_magic = OBT_MAGIC;
          obd->u.obt.obt_vfsmnt = mnt;
          obd->u.obt.obt_sb = mnt->mnt_sb;
-        obd->u.obt.obt_magic = OBT_MAGIC;
          filter->fo_fstype = mnt->mnt_sb->s_type->name;
          CDEBUG(D_SUPER, "%s: mnt = %p\n", filter->fo_fstype, mnt);
  
-        fsfilt_setup(obd, obd->u.obt.obt_sb);
+        rc = fsfilt_setup(obd, obd->u.obt.obt_sb);
+        if (rc)
+                GOTO(err_ops, rc);
  
          OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
          obd->obd_lvfs_ctxt.pwdmnt = mnt;
@@ -2030,15 +2054,15 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
          obd->obd_lvfs_ctxt.fs = get_ds();
          obd->obd_lvfs_ctxt.cb_ops = filter_lvfs_ops;
  
-        cfs_init_mutex(&filter->fo_init_lock);
+        cfs_mutex_init(&filter->fo_init_lock);
          filter->fo_committed_group = 0;
          filter->fo_destroys_in_progress = 0;
          for (i = 0; i < 32; i++)
-                cfs_sema_init(&filter->fo_create_locks[i], 1);
+                cfs_mutex_init(&filter->fo_create_locks[i]);
  
          cfs_spin_lock_init(&filter->fo_objidlock);
          CFS_INIT_LIST_HEAD(&filter->fo_export_list);
-        cfs_sema_init(&filter->fo_alloc_lock, 1);
+        cfs_mutex_init(&filter->fo_alloc_lock);
          init_brw_stats(&filter->fo_filter_stats);
          cfs_spin_lock_init(&filter->fo_flags_lock);
          filter->fo_read_cache = 1; /* enable read-only cache by default */
@@ -2064,8 +2088,10 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
                  GOTO(err_post, rc = -ENOMEM);
  
          sprintf(ns_name, "filter-%s", obd->obd_uuid.uuid);
-        obd->obd_namespace = ldlm_namespace_new(obd, ns_name, LDLM_NAMESPACE_SERVER,
-                                                LDLM_NAMESPACE_GREEDY);
+        obd->obd_namespace = ldlm_namespace_new(obd, ns_name,
+                                                LDLM_NAMESPACE_SERVER,
+                                                LDLM_NAMESPACE_GREEDY,
+                                                LDLM_NS_TYPE_OST);
          if (obd->obd_namespace == NULL)
                  GOTO(err_post, rc = -ENOMEM);
          obd->obd_namespace->ns_lvbp = obd;
@@ -2091,13 +2117,13 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
                  GOTO(err_post, rc);
  
          q = bdev_get_queue(mnt->mnt_sb->s_bdev);
-        if (q->max_sectors < q->max_hw_sectors &&
-            q->max_sectors < PTLRPC_MAX_BRW_SIZE >> 9)
+        if (queue_max_sectors(q) < queue_max_hw_sectors(q) &&
+            queue_max_sectors(q) < PTLRPC_MAX_BRW_SIZE >> 9)
                  LCONSOLE_INFO("%s: underlying device %s should be tuned "
                                "for larger I/O requests: max_sectors = %u "
                                "could be up to max_hw_sectors=%u\n",
                                obd->obd_name, mnt->mnt_sb->s_id,
-                              q->max_sectors, q->max_hw_sectors);
+                              queue_max_sectors(q), queue_max_hw_sectors(q));
  
          uuid_ptr = fsfilt_uuid(obd, obd->u.obt.obt_sb);
          if (uuid_ptr != NULL) {
@@ -2113,17 +2139,6 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
                        lmi ? s2lsi(lmi->lmi_sb)->lsi_lmd->lmd_dev : "",
                        obd->obd_replayable ? "enabled" : "disabled");
  
-        if (obd->obd_recovering)
-                LCONSOLE_WARN("%s: Will be in recovery for at least %d:%.02d, "
-                              "or until %d client%s reconnect%s\n",
-                              obd->obd_name,
-                              obd->obd_recovery_timeout / 60,
-                              obd->obd_recovery_timeout % 60,
-                              obd->obd_max_recoverable_clients,
-                              (obd->obd_max_recoverable_clients == 1) ? "" : "s",
-                              (obd->obd_max_recoverable_clients == 1) ? "s": "");
-
-
          RETURN(0);
  
  err_post:
@@ -2140,9 +2155,11 @@ err_mntput:
  static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
  {
          struct lprocfs_static_vars lvars;
+        cfs_proc_dir_entry_t *entry;
          unsigned long addr;
          struct page *page;
          int rc;
+        ENTRY;
  
          CLASSERT(offsetof(struct obd_device, u.obt) ==
                   offsetof(struct obd_device, u.filter.fo_obt));
@@ -2150,69 +2167,89 @@ static int filter_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
          if (!LUSTRE_CFG_BUFLEN(lcfg, 1) || !LUSTRE_CFG_BUFLEN(lcfg, 2))
                  RETURN(-EINVAL);
  
-        /* 2.6.9 selinux wants a full option page for do_kern_mount (bug6471) */
-        OBD_PAGE_ALLOC(page, CFS_ALLOC_STD);
-        if (!page)
-                RETURN(-ENOMEM);
-        addr = (unsigned long)cfs_page_address(page);
-        clear_page((void *)addr);
-
          /* lprocfs must be setup before the filter so state can be safely added
           * to /proc incrementally as the filter is setup */
          lprocfs_filter_init_vars(&lvars);
-        if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0 &&
-            lprocfs_alloc_obd_stats(obd, LPROC_FILTER_LAST) == 0) {
-                /* Init obdfilter private stats here */
-                lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_READ_BYTES,
-                                     LPROCFS_CNTR_AVGMINMAX,
-                                     "read_bytes", "bytes");
-                lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
-                                     LPROCFS_CNTR_AVGMINMAX,
-                                     "write_bytes", "bytes");
-                lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_GET_PAGE,
-                                     LPROCFS_CNTR_AVGMINMAX|LPROCFS_CNTR_STDDEV,
-                                     "get_page", "usec");
-                lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_NO_PAGE,
-                                     LPROCFS_CNTR_AVGMINMAX,
-                                     "get_page_failures", "num");
-                lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_CACHE_ACCESS,
-                                     LPROCFS_CNTR_AVGMINMAX,
-                                     "cache_access", "pages");
-                lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_CACHE_HIT,
-                                     LPROCFS_CNTR_AVGMINMAX,
-                                     "cache_hit", "pages");
-                lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_CACHE_MISS,
-                                     LPROCFS_CNTR_AVGMINMAX,
-                                     "cache_miss", "pages");
-
-                lproc_filter_attach_seqstat(obd);
-                obd->obd_proc_exports_entry = lprocfs_register("exports",
-                                                        obd->obd_proc_entry,
-                                                        NULL, NULL);
-                if (IS_ERR(obd->obd_proc_exports_entry)) {
-                        rc = PTR_ERR(obd->obd_proc_exports_entry);
-                        CERROR("error %d setting up lprocfs for %s\n",
-                               rc, "exports");
-                        obd->obd_proc_exports_entry = NULL;
-                }
+        rc = lprocfs_obd_setup(obd, lvars.obd_vars);
+        if (rc) {
+                CERROR("%s: lprocfs_obd_setup failed: %d.\n",
+                       obd->obd_name, rc);
+                RETURN(rc);
+        }
+
+        rc = lprocfs_alloc_obd_stats(obd, LPROC_FILTER_LAST);
+        if (rc) {
+                CERROR("%s: lprocfs_alloc_obd_stats failed: %d.\n",
+                       obd->obd_name, rc);
+                GOTO(obd_cleanup, rc);
+        }
+
+        /* Init obdfilter private stats here */
+        lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_READ_BYTES,
+                             LPROCFS_CNTR_AVGMINMAX, "read_bytes", "bytes");
+        lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
+                             LPROCFS_CNTR_AVGMINMAX, "write_bytes", "bytes");
+        lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_GET_PAGE,
+                             LPROCFS_CNTR_AVGMINMAX|LPROCFS_CNTR_STDDEV,
+                             "get_page", "usec");
+        lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_NO_PAGE,
+                             LPROCFS_CNTR_AVGMINMAX, "get_page_failures", "num");
+        lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_CACHE_ACCESS,
+                             LPROCFS_CNTR_AVGMINMAX, "cache_access", "pages");
+        lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_CACHE_HIT,
+                             LPROCFS_CNTR_AVGMINMAX, "cache_hit", "pages");
+        lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_CACHE_MISS,
+                             LPROCFS_CNTR_AVGMINMAX, "cache_miss", "pages");
+
+        rc = lproc_filter_attach_seqstat(obd);
+        if (rc) {
+                CERROR("%s: create seqstat failed: %d.\n", obd->obd_name, rc);
+                GOTO(free_obd_stats, rc);
+        }
+
+        entry = lprocfs_register("exports", obd->obd_proc_entry, NULL, NULL);
+        if (IS_ERR(entry)) {
+                rc = PTR_ERR(entry);
+                CERROR("%s: error %d setting up lprocfs for %s\n",
+                       obd->obd_name, rc, "exports");
+                GOTO(free_obd_stats, rc);
          }
-        if (obd->obd_proc_exports_entry)
-                lprocfs_add_simple(obd->obd_proc_exports_entry, "clear",
+        obd->obd_proc_exports_entry = entry;
+
+        entry = lprocfs_add_simple(obd->obd_proc_exports_entry, "clear",
                                     lprocfs_nid_stats_clear_read,
                                     lprocfs_nid_stats_clear_write, obd, NULL);
+        if (IS_ERR(entry)) {
+                rc = PTR_ERR(entry);
+                CERROR("%s: add proc entry 'clear' failed: %d.\n",
+                       obd->obd_name, rc);
+                GOTO(free_obd_stats, rc);
+        }
  
+        /* 2.6.9 selinux wants a full option page for do_kern_mount (bug6471) */
+        OBD_PAGE_ALLOC(page, CFS_ALLOC_STD);
+        if (!page)
+                GOTO(remove_entry_clear, rc = -ENOMEM);
+        addr = (unsigned long)cfs_page_address(page);
+        clear_page((void *)addr);
          memcpy((void *)addr, lustre_cfg_buf(lcfg, 4),
                 LUSTRE_CFG_BUFLEN(lcfg, 4));
          rc = filter_common_setup(obd, lcfg, (void *)addr);
          OBD_PAGE_FREE(page);
-
          if (rc) {
-                lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
-                lprocfs_free_per_client_stats(obd);
-                lprocfs_free_obd_stats(obd);
-                lprocfs_obd_cleanup(obd);
+                CERROR("%s: filter_common_setup failed: %d.\n",
+                       obd->obd_name, rc);
+                GOTO(remove_entry_clear, rc);
          }
  
+        RETURN(0);
+
+remove_entry_clear:
+        lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
+free_obd_stats:
+        lprocfs_free_obd_stats(obd);
+obd_cleanup:
+        lprocfs_obd_cleanup(obd);
          return rc;
  }
  
@@ -2373,20 +2410,20 @@ static int filter_llog_finish(struct obd_device *obd, int count)
                   * This is safe to do, as llog is already synchronized
                   * and its import may go.
                   */
-                cfs_mutex_down(&ctxt->loc_sem);
+                cfs_mutex_lock(&ctxt->loc_mutex);
                  if (ctxt->loc_imp) {
                          class_import_put(ctxt->loc_imp);
                          ctxt->loc_imp = NULL;
                  }
-                cfs_mutex_up(&ctxt->loc_sem);
+                cfs_mutex_unlock(&ctxt->loc_mutex);
                  llog_ctxt_put(ctxt);
          }
  
          if (filter->fo_lcm) {
-                cfs_mutex_down(&ctxt->loc_sem);
+                cfs_mutex_lock(&ctxt->loc_mutex);
                  llog_recov_thread_fini(filter->fo_lcm, obd->obd_force);
                  filter->fo_lcm = NULL;
-                cfs_mutex_up(&ctxt->loc_sem);
+                cfs_mutex_unlock(&ctxt->loc_mutex);
          }
          RETURN(filter_olg_fini(&obd->obd_olg));
  }
@@ -2433,7 +2470,7 @@ struct obd_llog_group *filter_find_olg(struct obd_device *obd, int group)
   */
  struct obd_llog_group *filter_find_create_olg(struct obd_device *obd, int group)
  {
-        struct obd_llog_group *olg = NULL;
+        struct obd_llog_group *olg = NULL, *olg_new = NULL;
          struct filter_obd *filter;
          int rc;
  
@@ -2442,6 +2479,10 @@ struct obd_llog_group *filter_find_create_olg(struct obd_device *obd, int group)
          if (group == FID_SEQ_LLOG)
                  RETURN(&obd->obd_olg);
  
+        OBD_ALLOC_PTR(olg_new);
+        if (olg_new == NULL)
+               RETURN(ERR_PTR(-ENOMEM));
+
          cfs_spin_lock(&filter->fo_llog_list_lock);
          olg = filter_find_olg_internal(filter, group);
          if (olg) {
@@ -2450,10 +2491,11 @@ struct obd_llog_group *filter_find_create_olg(struct obd_device *obd, int group)
                  } else {
                          GOTO(out_unlock, olg);
                  }
+        } else {
+                /* set as the newly allocated one */
+                olg = olg_new;
+                olg_new = NULL;
          }
-        OBD_ALLOC_PTR(olg);
-        if (olg == NULL)
-               GOTO(out_unlock, olg = ERR_PTR(-ENOMEM));
  
          llog_group_init(olg, group);
          cfs_list_add(&olg->olg_list, &filter->fo_llog_list);
@@ -2478,7 +2520,9 @@ out:
  
  out_unlock:
          cfs_spin_unlock(&filter->fo_llog_list_lock);
-        GOTO(out, olg);
+        if (olg_new)
+               OBD_FREE_PTR(olg_new);
+        goto out;
  }
  
  static int filter_llog_connect(struct obd_export *exp,
@@ -2571,7 +2615,16 @@ static int filter_precleanup(struct obd_device *obd,
          case OBD_CLEANUP_EXPORTS:
                  /* Stop recovery before namespace cleanup. */
                  target_recovery_fini(obd);
+
+                obd_exports_barrier(obd);
+                obd_zombie_barrier();
+
                  rc = filter_llog_preclean(obd);
+                lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
+                lprocfs_free_per_client_stats(obd);
+                lprocfs_obd_cleanup(obd);
+                lprocfs_free_obd_stats(obd);
+                lquota_cleanup(filter_quota_interface_ref, obd);
                  break;
          }
          RETURN(rc);
@@ -2586,15 +2639,6 @@ static int filter_cleanup(struct obd_device *obd)
                  LCONSOLE_WARN("%s: shutting down for failover; client state "
                                "will be preserved.\n", obd->obd_name);
  
-        obd_exports_barrier(obd);
-        obd_zombie_barrier();
-
-        lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
-        lprocfs_free_per_client_stats(obd);
-        lprocfs_free_obd_stats(obd);
-        lprocfs_obd_cleanup(obd);
-        lquota_cleanup(filter_quota_interface_ref, obd);
-
          ldlm_namespace_free(obd->obd_namespace, NULL, obd->obd_force);
          obd->obd_namespace = NULL;
  
@@ -2605,7 +2649,7 @@ static int filter_cleanup(struct obd_device *obd)
  
          filter_post(obd);
  
-        LL_DQUOT_OFF(obd->u.obt.obt_sb);
+        ll_vfs_dq_off(obd->u.obt.obt_sb, 0);
          shrink_dcache_sb(obd->u.obt.obt_sb);
  
          server_put_mount(obd->obd_name, obd->u.obt.obt_vfsmnt);
@@ -2639,7 +2683,7 @@ static int filter_connect_internal(struct obd_export *exp,
                  CWARN("!!! This export (nid %s) used object group %d "
                         "earlier; now it's trying to use group %d!  This could "
                         "be a bug in the MDS. Please report to "
-                       "http://bugzilla.lustre.org/\n",
+                       "http://bugs.whamcloud.com/\n",
                         obd_export_nid2str(exp), fed->fed_group,data->ocd_group);
                  RETURN(-EPROTO);
          }
@@ -2650,8 +2694,10 @@ static int filter_connect_internal(struct obd_export *exp,
          data->ocd_version = LUSTRE_VERSION_CODE;
  
          /* Kindly make sure the SKIP_ORPHAN flag is from MDS. */
-        if (!ergo(data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN,
-                  data->ocd_connect_flags & OBD_CONNECT_MDS))
+        if (data->ocd_connect_flags & OBD_CONNECT_MDS)
+                CWARN("%s: Received MDS connection for group %u\n",
+                      exp->exp_obd->obd_name, data->ocd_group);
+        else if (data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN)
                  RETURN(-EPROTO);
  
          if (exp->exp_connect_flags & OBD_CONNECT_GRANT) {
@@ -2705,7 +2751,18 @@ static int filter_connect_internal(struct obd_export *exp,
          } else if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) {
                  data->ocd_brw_size = min(data->ocd_brw_size,
                                 (__u32)(PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT));
-                LASSERT(data->ocd_brw_size);
+                if (data->ocd_brw_size == 0) {
+                        CERROR("%s: cli %s/%p ocd_connect_flags: "LPX64
+                               " ocd_version: %x ocd_grant: %d ocd_index: %u "
+                               "ocd_brw_size is unexpectedly zero, "
+                               "network data corruption?"
+                               "Refusing connection of this client\n",
+                                exp->exp_obd->obd_name,
+                                exp->exp_client_uuid.uuid,
+                                exp, data->ocd_connect_flags, data->ocd_version,
+                                data->ocd_grant, data->ocd_index);
+                        RETURN(-EPROTO);
+                }
          }
  
          if (data->ocd_connect_flags & OBD_CONNECT_CKSUM) {
@@ -2714,9 +2771,10 @@ static int filter_connect_internal(struct obd_export *exp,
                  /* The client set in ocd_cksum_types the checksum types it
                   * supports. We have to mask off the algorithms that we don't
                   * support */
-                if (cksum_types & OBD_CKSUM_ALL)
-                        data->ocd_cksum_types &= OBD_CKSUM_ALL;
-                else
+                data->ocd_cksum_types &= cksum_types_supported();
+
+                /* 1.6.4- only support CRC32 and didn't set ocd_cksum_types */
+                if (unlikely(data->ocd_cksum_types == 0))
                          data->ocd_cksum_types = OBD_CKSUM_CRC32;
  
                  CDEBUG(D_RPCTRACE, "%s: cli %s supports cksum type %x, return "
@@ -2732,6 +2790,9 @@ static int filter_connect_internal(struct obd_export *exp,
                                     obd_export_nid2str(exp));
          }
  
+        if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES)
+                data->ocd_maxbytes = exp->exp_obd->u.obt.obt_sb->s_maxbytes;
+
          RETURN(0);
  }
  
@@ -2749,12 +2810,11 @@ static int filter_reconnect(const struct lu_env *env,
  
          rc = filter_connect_internal(exp, data, 1);
          if (rc == 0)
-                filter_export_stats_init(obd, exp, 1, localdata);
+                filter_export_stats_init(obd, exp, localdata);
  
          RETURN(rc);
  }
  
-/* nearly identical to mds_connect */
  static int filter_connect(const struct lu_env *env,
                            struct obd_export **exp, struct obd_device *obd,
                            struct obd_uuid *cluuid,
@@ -2763,7 +2823,6 @@ static int filter_connect(const struct lu_env *env,
          struct lvfs_run_ctxt saved;
          struct lustre_handle conn = { 0 };
          struct obd_export *lexp;
-        __u32 group;
          int rc;
          ENTRY;
  
@@ -2780,7 +2839,7 @@ static int filter_connect(const struct lu_env *env,
          if (rc)
                  GOTO(cleanup, rc);
  
-        filter_export_stats_init(obd, lexp, 0, localdata);
+        filter_export_stats_init(obd, lexp, localdata);
          if (obd->obd_replayable) {
                  struct lsd_client_data *lcd = lexp->exp_target_data.ted_lcd;
                  LASSERT(lcd);
@@ -2790,16 +2849,11 @@ static int filter_connect(const struct lu_env *env,
                          GOTO(cleanup, rc);
          }
  
-        group = data->ocd_group;
-
-        CWARN("%s: Received MDS connection ("LPX64"); group %d\n",
-              obd->obd_name, lexp->exp_handle.h_cookie, group);
-
          push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-        rc = filter_read_groups(obd, group, 1);
+        rc = filter_read_groups(obd, data->ocd_group, 1);
          pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
          if (rc != 0) {
-                CERROR("can't read group %u\n", group);
+                CERROR("can't read group %u\n", data->ocd_group);
                  GOTO(cleanup, rc);
          }
  
@@ -2808,7 +2862,6 @@ static int filter_connect(const struct lu_env *env,
  cleanup:
          if (rc) {
                  class_disconnect(lexp);
-                lprocfs_exp_cleanup(lexp);
                  *exp = NULL;
          } else {
                  *exp = lexp;
@@ -2938,12 +2991,14 @@ static int filter_destroy_export(struct obd_export *exp)
          lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
  
          target_destroy_export(exp);
+
+        if (unlikely(obd_uuid_equals(&exp->exp_obd->obd_uuid,
+                                     &exp->exp_client_uuid)))
+               RETURN(0);
+
          ldlm_destroy_export(exp);
          lut_client_free(exp);
  
-        if (obd_uuid_equals(&exp->exp_client_uuid, &exp->exp_obd->obd_uuid))
-                RETURN(0);
-
          if (!exp->exp_obd->obd_replayable)
                  fsfilt_sync(exp->exp_obd, exp->exp_obd->u.obt.obt_sb);
  
@@ -3088,6 +3143,22 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd, struct ost_id *ostid,
                  RETURN(ERR_PTR(-ENOENT));
          }
  
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2,7,50,0)
+        /* Try to correct for a bug in 2.1.0 (LU-221) that caused negative
+         * timestamps to appear to be in the far future, due old timestamp
+         * being stored on disk as an unsigned value.  This fixes up any
+         * bad values stored on disk before returning them to the client,
+         * and ensures any timestamp updates are correct.  LU-1042 */
+        if (unlikely(LTIME_S(dchild->d_inode->i_atime) == LU221_BAD_TIME))
+                LTIME_S(dchild->d_inode->i_atime) = 0;
+        if (unlikely(LTIME_S(dchild->d_inode->i_mtime) == LU221_BAD_TIME))
+                LTIME_S(dchild->d_inode->i_mtime) = 0;
+        if (unlikely(LTIME_S(dchild->d_inode->i_ctime) == LU221_BAD_TIME))
+                LTIME_S(dchild->d_inode->i_ctime) = 0;
+#else
+#warning "remove old LU-221/LU-1042 workaround code"
+#endif
+
          return dchild;
  }
  
@@ -3095,6 +3166,7 @@ static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo)
  {
          struct dentry *dentry = NULL;
          struct obd_device *obd;
+        __u64 curr_version;
          int rc = 0;
          ENTRY;
  
@@ -3115,7 +3187,14 @@ static int filter_getattr(struct obd_export *exp, struct obd_info *oinfo)
  
          /* Limit the valid bits in the return data to what we actually use */
          oinfo->oi_oa->o_valid = OBD_MD_FLID;
-        obdo_from_inode(oinfo->oi_oa, dentry->d_inode, NULL, FILTER_VALID_FLAGS);
+        obdo_from_inode(oinfo->oi_oa, dentry->d_inode, FILTER_VALID_FLAGS);
+
+        /* Store inode version in reply */
+        curr_version = fsfilt_get_version(exp->exp_obd, dentry->d_inode);
+        if ((__s64)curr_version != -EOPNOTSUPP) {
+                oinfo->oi_oa->o_valid |= OBD_MD_FLDATAVERSION;
+                oinfo->oi_oa->o_data_version = curr_version;
+        }
  
          f_dput(dentry);
          RETURN(rc);
@@ -3192,13 +3271,14 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
                          *fcc = oa->o_lcookie;
          }
          if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) {
-                DQUOT_INIT(inode);
+                unsigned long now = jiffies;
                  /* Filter truncates and writes are serialized by
                   * i_alloc_sem, see the comment in
                   * filter_preprw_write.*/
                  if (ia_valid & ATTR_SIZE)
                          down_write(&inode->i_alloc_sem);
                  LOCK_INODE_MUTEX(inode);
+                fsfilt_check_slow(exp->exp_obd, now, "i_alloc_sem and i_mutex");
                  old_size = i_size_read(inode);
          }
  
@@ -3261,6 +3341,11 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
                  if (IS_ERR(handle))
                          GOTO(out_unlock, rc = PTR_ERR(handle));
          }
+
+        /* Locking order: i_mutex -> journal_lock -> dqptr_sem. LU-952 */
+        if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID))
+                ll_vfs_dq_init(inode);
+
          if (oa->o_valid & OBD_MD_FLFLAGS) {
                  rc = fsfilt_iocontrol(exp->exp_obd, dentry,
                                        FSFILT_IOC_SETFLAGS, (long)&oa->o_flags);
@@ -3280,7 +3365,10 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
          * we have two left for the last_rcvd and VBR inode version updates. */
          err = fsfilt_extend(exp->exp_obd, inode, 2, handle);
  
-        rc = filter_finish_transno(exp, inode, oti, rc, sync);
+        /* Update inode version only if data has changed => size has changed */
+        rc = filter_finish_transno(exp, ia_valid & ATTR_SIZE ? inode : NULL,
+                                   oti, rc, sync);
+
          if (sync) {
                  filter_cancel_cookies_cb(exp->exp_obd, 0, fcc, rc);
                  fcc = NULL;
@@ -3346,7 +3434,7 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
          int rc;
          ENTRY;
  
-        if (oa->o_valid & OBD_FL_TRUNC)
+        if (oinfo->oi_flags & OBD_FL_PUNCH)
                  opc |= CAPA_OPC_OSS_TRUNC;
  
          rc = filter_auth_capa(exp, NULL, oa->o_seq, capa, opc);
@@ -3395,7 +3483,9 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
           */
          if (oa->o_valid &
              (OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME)) {
+                unsigned long now = jiffies;
                  down_write(&dentry->d_inode->i_alloc_sem);
+                fsfilt_check_slow(exp->exp_obd, now, "i_alloc_sem");
                  fmd = filter_fmd_get(exp, oa->o_id, oa->o_seq);
                  if (fmd && fmd->fmd_mactime_xid < oti->oti_xid)
                          fmd->fmd_mactime_xid = oti->oti_xid;
@@ -3421,7 +3511,7 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
          oa->o_valid = OBD_MD_FLID;
  
          /* Quota release need uid/gid info */
-        obdo_from_inode(oa, dentry->d_inode, NULL,
+        obdo_from_inode(oa, dentry->d_inode,
                          FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID);
  
          EXIT;
@@ -3482,7 +3572,7 @@ static int filter_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
                  LASSERT((*lsmp)->lsm_object_id);
          }
  
-        (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+        (*lsmp)->lsm_maxbytes = exp->exp_obd->u.obt.obt_sb->s_maxbytes;
  
          RETURN(lsm_size);
  }
@@ -3497,7 +3587,7 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
          int skip_orphan;
          ENTRY;
  
-        LASSERT(down_trylock(&filter->fo_create_locks[oa->o_seq]) != 0);
+        LASSERT_MUTEX_LOCKED(&filter->fo_create_locks[oa->o_seq]);
  
          memset(&doa, 0, sizeof(doa));
  
@@ -3541,7 +3631,14 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
                  filter_set_last_id(filter, id, doa.o_seq);
                  rc = filter_update_last_objid(exp->exp_obd, doa.o_seq, 1);
          } else {
-                /* don't reuse orphan object, return last used objid */
+                /*
+                 * We have destroyed orphan objects, but don't want to reuse
+                 * them. Therefore we don't reset last_id to the last created
+                 * objects. Instead, we report back to the MDS the object id
+                 * of the last orphan, so that the MDS can restart allocating
+                 * objects from this id + 1 and thus skip the whole orphan
+                 * object id range
+                 */
                  oa->o_id = last;
                  rc = 0;
          }
@@ -3572,18 +3669,18 @@ static int filter_handle_precreate(struct obd_export *exp, struct obdo *oa,
                  }
                  /* This causes inflight precreates to abort and drop lock */
                  cfs_set_bit(group, &filter->fo_destroys_in_progress);
-                cfs_down(&filter->fo_create_locks[group]);
+                cfs_mutex_lock(&filter->fo_create_locks[group]);
                  if (!cfs_test_bit(group, &filter->fo_destroys_in_progress)) {
                          CERROR("%s:["LPU64"] destroys_in_progress already cleared\n",
                                 exp->exp_obd->obd_name, group);
-                        cfs_up(&filter->fo_create_locks[group]);
+                        cfs_mutex_unlock(&filter->fo_create_locks[group]);
                          RETURN(0);
                  }
                  diff = oa->o_id - last;
                  CDEBUG(D_HA, "filter_last_id() = "LPU64" -> diff = %d\n",
                         last, diff);
  
-                if (-diff > OST_MAX_PRECREATE) {
+                if (-diff > (OST_MAX_PRECREATE * 3) / 2) {
                          CERROR("%s: ignoring bogus orphan destroy request: "
                                 "obdid "LPU64" last_id "LPU64"\n", obd->obd_name,
                                 oa->o_id, last);
@@ -3601,7 +3698,7 @@ static int filter_handle_precreate(struct obd_export *exp, struct obdo *oa,
                          cfs_clear_bit(group, &filter->fo_destroys_in_progress);
                  }
          } else {
-                cfs_down(&filter->fo_create_locks[group]);
+                cfs_mutex_lock(&filter->fo_create_locks[group]);
                  if (oti->oti_conn_cnt < exp->exp_conn_cnt) {
                          CERROR("%s: dropping old precreate request\n",
                                 obd->obd_name);
@@ -3624,13 +3721,13 @@ static int filter_handle_precreate(struct obd_export *exp, struct obdo *oa,
                  rc = filter_precreate(obd, oa, group, &diff);
                  oa->o_id = filter_last_id(&obd->u.filter, group);
                  oa->o_seq = group;
-                oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
+                oa->o_valid |= (OBD_MD_FLID | OBD_MD_FLGROUP);
                  GOTO(out, rc);
          }
          /* else diff == 0 */
          GOTO(out, rc = 0);
  out:
-        cfs_up(&filter->fo_create_locks[group]);
+        cfs_mutex_unlock(&filter->fo_create_locks[group]);
          return rc;
  }
  
@@ -3639,6 +3736,7 @@ static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
  {
          struct filter_obd *filter = &obd->u.filter;
          int blockbits = obd->u.obt.obt_sb->s_blocksize_bits;
+        struct lr_server_data *lsd = class_server_data(obd);
          int rc;
          ENTRY;
  
@@ -3662,16 +3760,13 @@ static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
                                 ((filter->fo_tot_dirty + filter->fo_tot_pending +
                                   osfs->os_bsize - 1) >> blockbits));
  
-        if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOSPC)) {
-                struct lr_server_data *lsd = class_server_data(obd);
-                int index = le32_to_cpu(lsd->lsd_ost_index);
+        if (OBD_FAIL_CHECK_VALUE(OBD_FAIL_OST_ENOSPC,
+                                 le32_to_cpu(lsd->lsd_ost_index)))
+                osfs->os_bfree = osfs->os_bavail = 2;
  
-                if (obd_fail_val == -1 ||
-                    index == obd_fail_val)
-                        osfs->os_bfree = osfs->os_bavail = 2;
-                else if (obd_fail_loc & OBD_FAIL_ONCE)
-                        obd_fail_loc &= ~OBD_FAILED; /* reset flag */
-        }
+        if (OBD_FAIL_CHECK_VALUE(OBD_FAIL_OST_ENOINO,
+                                 le32_to_cpu(lsd->lsd_ost_index)))
+                osfs->os_ffree = 0;
  
          /* set EROFS to state field if FS is mounted as RDONLY. The goal is to
           * stop creating files on MDS if OST is not good shape to create
@@ -3741,6 +3836,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
          struct dentry *dchild = NULL, *dparent = NULL;
          struct filter_obd *filter;
          struct obd_statfs *osfs;
+        struct iattr iattr;
          int err = 0, rc = 0, recreate_obj = 0, i;
          cfs_time_t enough_time = cfs_time_shift(DISK_TIMEOUT/2);
          __u64 os_ffree;
@@ -3750,7 +3846,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
  
          filter = &obd->u.filter;
  
-        LASSERT(down_trylock(&filter->fo_create_locks[group]) != 0);
+        LASSERT_MUTEX_LOCKED(&filter->fo_create_locks[group]);
  
          OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_PRECREATE, obd_timeout / 2);
  
@@ -3769,6 +3865,13 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                                 LPU64"\n", obd->obd_name, osfs->os_bavail <<
                                 obd->u.obt.obt_vfsmnt->mnt_sb->s_blocksize_bits);
                          *num = 0;
+                        if (oa->o_valid & OBD_MD_FLFLAGS)
+                                oa->o_flags |= OBD_FL_NOSPC_BLK;
+                        else {
+                                oa->o_valid |= OBD_MD_FLFLAGS;
+                                oa->o_flags = OBD_FL_NOSPC_BLK;
+                        }
+
                          rc = -ENOSPC;
                  }
                  OBD_FREE(osfs, sizeof(*osfs));
@@ -3802,10 +3905,15 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                  } else
                          next_id = filter_last_id(filter, group) + 1;
  
-                /* Temporary solution for oid in CMD before fid-on-OST */
-                if ((fid_seq_is_mdt0(oa->o_seq) && next_id >= IDIF_MAX_OID) &&
-                    (fid_seq_is_cmd(oa->o_seq) && next_id >= OBIF_MAX_OID)) {
-                        CERROR("%s:"POSTID" hit the max IDIF_MAX_OID(1<<48)!\n",
+                /* Don't create objects beyond the valid range for this SEQ */
+                if (unlikely(fid_seq_is_mdt0(group) &&
+                            next_id >= IDIF_MAX_OID)) {
+                        CERROR("%s:"POSTID" hit the IDIF_MAX_OID (1<<48)!\n",
+                                obd->obd_name, next_id, group);
+                        GOTO(cleanup, rc = -ENOSPC);
+               } else if (unlikely(!fid_seq_is_mdt0(group) &&
+                                   next_id >= OBIF_MAX_OID)) {
+                        CERROR("%s:"POSTID" hit the OBIF_MAX_OID (1<<32)!\n",
                                  obd->obd_name, next_id, group);
                          GOTO(cleanup, rc = -ENOSPC);
                  }
@@ -3831,7 +3939,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                          } else {
                                  /* Use these existing objects if they are
                                   * zero length. */
-                                if (dchild->d_inode->i_size == 0) {
+                                if (i_size_read(dchild->d_inode) == 0) {
                                          rc = filter_use_existing_obj(obd,dchild,
                                                        &handle, &cleanup_phase);
                                          if (rc == 0)
@@ -3865,12 +3973,24 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                  rc = ll_vfs_create(dparent->d_inode, dchild,
                                     S_IFREG |  S_ISUID | S_ISGID | 0666, NULL);
                  if (rc) {
-                        CERROR("create failed rc = %d\n", rc);
+                        CWARN("%s: create failed: rc = %d\n", obd->obd_name,rc);
                          if (rc == -ENOSPC) {
                                  os_ffree = filter_calc_free_inodes(obd);
-                                if (os_ffree != -1)
-                                        CERROR("%s: free inode "LPU64"\n",
-                                               obd->obd_name, os_ffree);
+                                if (os_ffree == -1)
+                                        GOTO(cleanup, rc);
+
+                                if (obd->obd_osfs.os_bavail <
+                                    (obd->obd_osfs.os_blocks >> 10)) {
+                                        if (oa->o_valid & OBD_MD_FLFLAGS) {
+                                                oa->o_flags |= OBD_FL_NOSPC_BLK;
+                                        } else {
+                                                oa->o_valid |= OBD_MD_FLFLAGS;
+                                                oa->o_flags = OBD_FL_NOSPC_BLK;
+                                        }
+
+                                        CWARN("%s: free inode "LPU64"\n",
+                                              obd->obd_name, os_ffree);
+                                }
                          }
                          GOTO(cleanup, rc);
                  }
@@ -3880,12 +4000,27 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                                         dchild->d_inode->i_ino);
  
  set_last_id:
+                /* Initialize a/c/m time so any client timestamp will always
+                 * be newer and update the inode. ctime = 0 is also handled
+                 * specially in fsfilt_ext3_setattr(). See LU-221, LU-1042 */
+                iattr.ia_valid = ATTR_ATIME | ATTR_MTIME | ATTR_CTIME;
+                LTIME_S(iattr.ia_atime) = 0;
+                LTIME_S(iattr.ia_mtime) = 0;
+                LTIME_S(iattr.ia_ctime) = 0;
+                err = fsfilt_setattr(obd, dchild, handle, &iattr, 0);
+                 if (err)
+                        CWARN("%s: unable to initialize a/c/m time of newly "
+                              "created object %.*s: rc = %d\n",
+                              obd->obd_name, dchild->d_name.len,
+                              dchild->d_name.name, err);
+
                  if (!recreate_obj) {
                          filter_set_last_id(filter, next_id, group);
                          err = filter_update_last_objid(obd, group, 0);
                          if (err)
-                                CERROR("unable to write lastobjid "
-                                       "but file created\n");
+                                CERROR("%s: unable to write lastobjid "
+                                       "but file created: rc = %d\n",
+                                       obd->obd_name, err);
                  }
  
          cleanup:
@@ -3941,7 +4076,17 @@ int filter_create(struct obd_export *exp, struct obdo *oa,
          fed = &exp->exp_filter_data;
          filter = &obd->u.filter;
  
-        if (fed->fed_group != oa->o_seq) {
+        /* 1.8 client doesn't carry the ocd_group with connect request,
+         * so the fed_group will always be zero for 1.8 client. */
+        if (!(exp->exp_connect_flags & OBD_CONNECT_FULL20)) {
+                if (oa->o_seq != FID_SEQ_OST_MDT0 &&
+                    oa->o_seq != FID_SEQ_LLOG &&
+                    oa->o_seq != FID_SEQ_ECHO) {
+                        CERROR("The request from older client has invalid"
+                               " group "LPU64"!\n", oa->o_seq);
+                        RETURN(-EINVAL);
+                }
+        } else if (fed->fed_group != oa->o_seq) {
                  CERROR("%s: this export (nid %s) used object group %d "
                          "earlier; now it's trying to use group "LPU64"!"
                          " This could be a bug in the MDS. Please report to "
@@ -3971,9 +4116,9 @@ int filter_create(struct obd_export *exp, struct obdo *oa,
                          rc = -EINVAL;
                  } else {
                          diff = 1;
-                        cfs_down(&filter->fo_create_locks[oa->o_seq]);
+                        cfs_mutex_lock(&filter->fo_create_locks[oa->o_seq]);
                          rc = filter_precreate(obd, oa, oa->o_seq, &diff);
-                        cfs_up(&filter->fo_create_locks[oa->o_seq]);
+                        cfs_mutex_unlock(&filter->fo_create_locks[oa->o_seq]);
                  }
          } else {
                  rc = filter_handle_precreate(exp, oa, oa->o_seq, oti);
@@ -4008,6 +4153,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
          struct llog_cookie *fcc = NULL;
          int rc, rc2, cleanup_phase = 0, sync = 0;
          struct iattr iattr;
+        unsigned long now;
          ENTRY;
  
          rc = filter_auth_capa(exp, NULL, oa->o_seq,
@@ -4062,7 +4208,6 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
                  if (fcc != NULL)
                          *fcc = oa->o_lcookie;
          }
-        DQUOT_INIT(dchild->d_inode);
  
          /* we're gonna truncate it first in order to avoid possible deadlock:
           *      P1                      P2
@@ -4076,8 +4221,10 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
           * between page lock, i_mutex & starting new journal handle.
           * (see bug 20321) -johann
           */
+        now = jiffies;
          down_write(&dchild->d_inode->i_alloc_sem);
          LOCK_INODE_MUTEX(dchild->d_inode);
+        fsfilt_check_slow(exp->exp_obd, now, "i_alloc_sem and i_mutex");
  
          /* VBR: version recovery check */
          rc = filter_version_get_check(exp, oti, dchild->d_inode);
@@ -4095,6 +4242,9 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
                  GOTO(cleanup, rc = PTR_ERR(handle));
          }
  
+        /* Locking order: i_mutex -> journal_lock -> dqptr_sem. LU-952 */
+        ll_vfs_dq_init(dchild->d_inode);
+
          iattr.ia_valid = ATTR_SIZE;
          iattr.ia_size = 0;
          rc = fsfilt_setattr(obd, dchild, handle, &iattr, 1);
@@ -4124,7 +4274,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
          cleanup_phase = 4; /* fsfilt_commit */
  
          /* Quota release need uid/gid of inode */
-        obdo_from_inode(oa, dchild->d_inode, NULL, OBD_MD_FLUID|OBD_MD_FLGID);
+        obdo_from_inode(oa, dchild->d_inode, OBD_MD_FLUID | OBD_MD_FLGID);
  
          filter_fmd_drop(exp, oa->o_id, oa->o_seq);
  
@@ -4203,15 +4353,13 @@ static int filter_truncate(struct obd_export *exp, struct obd_info *oinfo,
                  oinfo->oi_policy.l_extent.start);
  
          oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start;
-        oinfo->oi_oa->o_valid |= OBD_FL_TRUNC;
          rc = filter_setattr(exp, oinfo, oti);
-        oinfo->oi_oa->o_valid &= ~OBD_FL_TRUNC;
          RETURN(rc);
  }
  
-static int filter_sync(struct obd_export *exp, struct obdo *oa,
-                       struct lov_stripe_md *lsm, obd_off start, obd_off end,
-                       void *capa)
+static int filter_sync(struct obd_export *exp, struct obd_info *oinfo,
+                       obd_off start, obd_off end,
+                       struct ptlrpc_request_set *set)
  {
          struct lvfs_run_ctxt saved;
          struct obd_device_target *obt;
@@ -4219,22 +4367,23 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
          int rc, rc2;
          ENTRY;
  
-        rc = filter_auth_capa(exp, NULL, oa->o_seq,
-                              (struct lustre_capa *)capa, CAPA_OPC_OSS_WRITE);
+        rc = filter_auth_capa(exp, NULL, oinfo->oi_oa->o_seq,
+                              (struct lustre_capa *)oinfo->oi_capa,
+                              CAPA_OPC_OSS_WRITE);
          if (rc)
                  RETURN(rc);
  
          obt = &exp->exp_obd->u.obt;
  
          /* An objid of zero is taken to mean "sync whole filesystem" */
-        if (!oa || !(oa->o_valid & OBD_MD_FLID)) {
+        if (!oinfo->oi_oa || !(oinfo->oi_oa->o_valid & OBD_MD_FLID)) {
                  rc = fsfilt_sync(exp->exp_obd, obt->obt_sb);
                  /* Flush any remaining cancel messages out to the target */
                  filter_sync_llogs(exp->exp_obd, exp);
                  RETURN(rc);
          }
  
-        dentry = filter_oa2dentry(exp->exp_obd, &oa->o_oi);
+        dentry = filter_oa2dentry(exp->exp_obd, &oinfo->oi_oa->o_oi);
          if (IS_ERR(dentry))
                  RETURN(PTR_ERR(dentry));
  
@@ -4256,8 +4405,8 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa,
          }
          UNLOCK_INODE_MUTEX(dentry->d_inode);
  
-        oa->o_valid = OBD_MD_FLID;
-        obdo_from_inode(oa, dentry->d_inode, NULL, FILTER_VALID_FLAGS);
+        oinfo->oi_oa->o_valid = OBD_MD_FLID;
+        obdo_from_inode(oinfo->oi_oa, dentry->d_inode, FILTER_VALID_FLAGS);
  
          pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
  
@@ -4498,8 +4647,8 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp,
                  CDEBUG(D_HA, "syncing ost %s\n", obd->obd_name);
                  rc = fsfilt_sync(obd, obd->u.obt.obt_sb);
  
-                lvfs_set_rdonly(obd, obd->u.obt.obt_sb);
-                RETURN(0);
+                rc = lvfs_set_rdonly(obd, obd->u.obt.obt_sb);
+                RETURN(rc);
          }
  
          case OBD_IOC_CATLOGLIST: {
@@ -4579,6 +4728,24 @@ static int filter_process_config(struct obd_device *obd, obd_count len,
          return rc;
  }
  
+static int filter_notify(struct obd_device *obd,
+                         struct obd_device *unused,
+                         enum obd_notify_event ev, void *data)
+{
+        switch (ev) {
+        case OBD_NOTIFY_CONFIG:
+                LASSERT(obd->obd_no_conn);
+                cfs_spin_lock(&obd->obd_dev_lock);
+                obd->obd_no_conn = 0;
+                cfs_spin_unlock(&obd->obd_dev_lock);
+                break;
+        default:
+                CDEBUG(D_INFO, "%s: Unhandled notification %#x\n",
+                       obd->obd_name, ev);
+        }
+        return 0;
+}
+
  static struct lvfs_callback_ops filter_lvfs_ops = {
          l_fid2dentry:     filter_lvfs_fid2dentry,
  };
@@ -4602,7 +4769,6 @@ static struct obd_ops filter_obd_ops = {
          .o_create         = filter_create,
          .o_setattr        = filter_setattr,
          .o_destroy        = filter_destroy,
-        .o_brw            = filter_brw,
          .o_punch          = filter_truncate,
          .o_sync           = filter_sync,
          .o_preprw         = filter_preprw,
@@ -4613,6 +4779,7 @@ static struct obd_ops filter_obd_ops = {
          .o_iocontrol      = filter_iocontrol,
          .o_health_check   = filter_health_check,
          .o_process_config = filter_process_config,
+        .o_notify         = filter_notify,
  };
  
  quota_interface_t *filter_quota_interface_ref;