Whamcloud - gitweb
r=adilger,phil
[fs/lustre-release.git] / lustre / llite / llite_lib.c
index deaa355..c9cf119 100644 (file)
@@ -43,7 +43,7 @@ extern struct super_operations ll_super_operations;
 #define log2(n) ffz(~(n))
 #endif
 
-struct ll_sb_info *lustre_init_sbi(struct super_block *sb) 
+struct ll_sb_info *lustre_init_sbi(struct super_block *sb)
 {
         struct ll_sb_info *sbi = NULL;
         class_uuid_t uuid;
@@ -53,6 +53,9 @@ struct ll_sb_info *lustre_init_sbi(struct super_block *sb)
         if (!sbi)
                 RETURN(NULL);
 
+        spin_lock_init(&sbi->ll_pglist_lock);
+        INIT_LIST_HEAD(&sbi->ll_pglist);
+        sbi->ll_pglist_gen = 0;
         INIT_LIST_HEAD(&sbi->ll_conn_chain);
         INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list);
         ll_s2sbi(sb) = sbi;
@@ -62,7 +65,7 @@ struct ll_sb_info *lustre_init_sbi(struct super_block *sb)
         RETURN(sbi);
 }
 
-void lustre_free_sbi(struct super_block *sb) 
+void lustre_free_sbi(struct super_block *sb)
 {
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         ENTRY;
@@ -103,7 +106,12 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         mdc_init_ea_size(obd, osc);
 
         err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
+        if (err == -EBUSY) {
+                CERROR("An MDS (mdc %s) is performing recovery, of which this"
+                       " client is not a part.  Please wait for recovery to "
+                       "complete, abort, or time out.\n", mdc);
+                GOTO(out, err);
+        } else if (err) {
                 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
                 GOTO(out, err);
         }
@@ -118,8 +126,8 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         sb->s_blocksize_bits = log2(osfs.os_bsize);
         sb->s_magic = LL_SUPER_MAGIC;
         sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-        
-        devno = get_uuid2int(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid, 
+
+        devno = get_uuid2int(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid,
                              strlen(sbi2mdc(sbi)->cl_import->imp_target_uuid.uuid));
         sb->s_dev = devno;
 
@@ -130,7 +138,12 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         }
 
         err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid);
-        if (err) {
+        if (err == -EBUSY) {
+                CERROR("An OST (osc %s) is performing recovery, of which this"
+                       " client is not a part.  Please wait for recovery to "
+                       "complete, abort, or time out.\n", osc);
+                GOTO(out, err);
+        } else if (err) {
                 CERROR("cannot connect to %s: rc = %d\n", osc, err);
                 GOTO(out_mdc, err);
         }
@@ -146,7 +159,7 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
 
         sb->s_op = &lustre_super_operations;
 
-        /* make root inode 
+        /* make root inode
          * XXX: move this to after cbd setup? */
         err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
                           OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
@@ -179,6 +192,8 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                 GOTO(out_root, err);
         }
 
+        /* bug 2805 - set VM readahead to zero */
+        vm_max_readahead = vm_min_readahead = 0;
         sb->s_root = d_alloc_root(root);
         RETURN(err);
 
@@ -356,7 +371,7 @@ void ll_put_super(struct super_block *sb)
 } /* ll_put_super */
 
 int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
-                       struct config_llog_instance *cfg)
+                       struct config_llog_instance *cfg, int allow_recov)
 {
         struct lustre_cfg lcfg;
         struct portals_cfg pcfg;
@@ -372,6 +387,9 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
         int err;
         ENTRY;
 
+        if (lmd_bad_magic(lmd))
+                RETURN(-EINVAL);
+
         generate_random_uuid(uuid);
         class_uuid_unparse(uuid, &mdc_uuid);
 
@@ -390,7 +408,7 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
                 pcfg.pcfg_nid     = lmd->lmd_server_nid;
                 pcfg.pcfg_id      = lmd->lmd_server_ipaddr;
                 pcfg.pcfg_misc    = lmd->lmd_port;
-                pcfg.pcfg_size    = 0;
+                pcfg.pcfg_size    = 8388608;
                 pcfg.pcfg_flags   = 0x4; /*share*/
                 err = kportal_nal_cmd(&pcfg);
                 if (err <0)
@@ -423,21 +441,35 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile,
         err = class_process_config(&lcfg);
         if (err < 0)
                 GOTO(out_detach, err);
-        
+
         obd = class_name2obd(name);
         if (obd == NULL)
                 GOTO(out_cleanup, err = -EINVAL);
 
+        /* Disable initial recovery on this import */
+        err = obd_set_info(obd->obd_self_export,
+                           strlen("initial_recov"), "initial_recov",
+                           sizeof(allow_recov), &allow_recov);
+        if (err)
+                GOTO(out_cleanup, err);
+
         err = obd_connect(&mdc_conn, obd, &mdc_uuid);
         if (err) {
                 CERROR("cannot connect to %s: rc = %d\n", lmd->lmd_mds, err);
                 GOTO(out_cleanup, err);
         }
-        
+
         exp = class_conn2export(&mdc_conn);
-        
+
         ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT);
+#if 1
         rc = class_config_parse_llog(ctxt, profile, cfg);
+#else
+        /*
+         * For debugging, it's useful to just dump the log
+         */
+        rc = class_config_dump_llog(ctxt, profile, cfg);
+#endif
         if (rc) {
                 CERROR("class_config_parse_llog failed: rc = %d\n", rc);
         }
@@ -476,7 +508,7 @@ out_del_conn:
 out:
         if (rc == 0)
                 rc = err;
-        
+
         RETURN(rc);
 }
 
@@ -490,6 +522,9 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
+        if (lmd_bad_magic(lmd))
+                RETURN(-EINVAL);
+
         sbi = lustre_init_sbi(sb);
         if (!sbi)
                 RETURN(-ENOMEM);
@@ -507,22 +542,22 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                 }
 
                 OBD_ALLOC(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
-                if (sbi->ll_lmd == NULL) 
+                if (sbi->ll_lmd == NULL)
                         GOTO(out_free, err = -ENOMEM);
                 memcpy(sbi->ll_lmd, lmd, sizeof(*lmd));
 
                 /* generate a string unique to this super, let's try
                  the address of the super itself.*/
-                len = (sizeof(sb) * 2) + 1; 
+                len = (sizeof(sb) * 2) + 1;
                 OBD_ALLOC(sbi->ll_instance, len);
-                if (sbi->ll_instance == NULL) 
+                if (sbi->ll_instance == NULL)
                         GOTO(out_free, err = -ENOMEM);
                 sprintf(sbi->ll_instance, "%p", sb);
 
                 cfg.cfg_instance = sbi->ll_instance;
                 cfg.cfg_uuid = sbi->ll_sb_uuid;
                 cfg.cfg_local_nid = lmd->lmd_local_nid;
-                err = lustre_process_log(lmd, lmd->lmd_profile, &cfg);
+                err = lustre_process_log(lmd, lmd->lmd_profile, &cfg, 0);
                 if (err < 0) {
                         CERROR("Unable to process log: %s\n", lmd->lmd_profile);
 
@@ -536,13 +571,13 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                 }
                 if (osc)
                         OBD_FREE(osc, strlen(osc) + 1);
-                OBD_ALLOC(osc, strlen(lprof->lp_osc) + 
+                OBD_ALLOC(osc, strlen(lprof->lp_osc) +
                           strlen(sbi->ll_instance) + 2);
                 sprintf(osc, "%s-%s", lprof->lp_osc, sbi->ll_instance);
 
                 if (mdc)
                         OBD_FREE(mdc, strlen(mdc) + 1);
-                OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + 
+                OBD_ALLOC(mdc, strlen(lprof->lp_mdc) +
                           strlen(sbi->ll_instance) + 2);
                 sprintf(mdc, "%s-%s", lprof->lp_mdc, sbi->ll_instance);
         }
@@ -556,9 +591,9 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                 CERROR("no mdc\n");
                 GOTO(out_free, err = -EINVAL);
         }
-        
+
         err = lustre_common_fill_super(sb, mdc, osc);
-        
+
         if (err)
                 GOTO(out_free, err);
 
@@ -585,8 +620,9 @@ out_free:
                         OBD_ALLOC(cln_prof, len);
                         sprintf(cln_prof, "%s-clean", sbi->ll_lmd->lmd_profile);
 
-                        err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg);
-                        if (err < 0) 
+                        err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg,
+                                                 0);
+                        if (err < 0)
                                 CERROR("Unable to process log: %s\n", cln_prof);
                         OBD_FREE(cln_prof, len);
                         OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance)+ 1);
@@ -598,12 +634,47 @@ out_free:
         goto out_dev;
 } /* lustre_fill_super */
 
+static void lustre_manual_cleanup(struct ll_sb_info *sbi)
+{
+        struct lustre_cfg lcfg;
+        struct obd_device *obd;
+        int next = 0;
+
+        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
+        {
+                int err;
+
+                LCFG_INIT(lcfg, LCFG_CLEANUP, obd->obd_name);
+                err = class_process_config(&lcfg);
+                if (err) {
+                        CERROR("cleanup failed: %s\n", obd->obd_name);
+                        //continue;
+                }
+
+                LCFG_INIT(lcfg, LCFG_DETACH, obd->obd_name);
+                err = class_process_config(&lcfg);
+                if (err) {
+                        CERROR("detach failed: %s\n", obd->obd_name);
+                        //continue;
+                }
+        }
+
+        if (sbi->ll_lmd != NULL)
+                class_del_profile(sbi->ll_lmd->lmd_profile);
+}
+
 void lustre_put_super(struct super_block *sb)
 {
+        struct obd_device *obd;
         struct ll_sb_info *sbi = ll_s2sbi(sb);
+        int force_umount = 0;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
+        obd = class_exp2obd(sbi->ll_mdc_exp);
+        if (obd)
+                force_umount = obd->obd_no_recov;
+        obd = NULL;
 
         lustre_common_put_super(sb);
 
@@ -613,17 +684,27 @@ void lustre_put_super(struct super_block *sb)
                 int err;
                 struct config_llog_instance cfg;
 
+                if (force_umount) {
+                        CERROR("force umount, doing manual cleanup\n");
+                        lustre_manual_cleanup(sbi);
+                        GOTO(free_lmd, 0);
+                }
+
                 cfg.cfg_instance = sbi->ll_instance;
                 cfg.cfg_uuid = sbi->ll_sb_uuid;
 
                 OBD_ALLOC(cln_prof, len);
                 sprintf(cln_prof, "%s-clean", sbi->ll_lmd->lmd_profile);
 
-                err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg);
-                if (err < 0)
-                        CERROR("Unable to process log: %s\n", cln_prof);
+                err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg, 0);
+                if (err < 0) {
+                        CERROR("Unable to process log: %s, doing manual cleanup"
+                               "\n", cln_prof);
+                        lustre_manual_cleanup(sbi);
+                }
 
                 OBD_FREE(cln_prof, len);
+        free_lmd:
                 OBD_FREE(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
                 OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance) + 1);
         }
@@ -794,7 +875,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                         /* from sys_utime() */
                         if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
                                 if (current->fsuid != inode->i_uid &&
-                                    (rc = ll_permission(inode, MAY_WRITE, NULL)) != 0)
+                                    (rc=ll_permission(inode,MAY_WRITE,NULL))!=0)
                                         RETURN(rc);
                         } else {
                                /* from inode_change_ok() */
@@ -812,11 +893,11 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
          * If we don't we can race with other i_size updaters on our node, like
          * ll_file_read.  We can also race with i_size propogation to other
          * nodes through dirtying and writeback of final cached pages.  This
-         * last one is especially bad for racing o_append users on other 
+         * last one is especially bad for racing o_append users on other
          * nodes. */
         if (ia_valid & ATTR_SIZE) {
-                struct ldlm_extent extent = { .start = attr->ia_size,
-                                              .end = OBD_OBJECT_EOF };
+                ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
+                                                           OBD_OBJECT_EOF } };
                 struct lustre_handle lockh = { 0 };
                 int err, ast_flags = 0;
                 /* XXX when we fix the AST intents to pass the discard-range
@@ -828,22 +909,21 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                 /* bug 1639: avoid write/truncate i_sem/DLM deadlock */
                 LASSERT(atomic_read(&inode->i_sem.count) <= 0);
                 up(&inode->i_sem);
-                rc = ll_extent_lock_no_validate(NULL, inode, lsm, LCK_PW,
-                                                &extent, &lockh, ast_flags);
+                rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy, &lockh,
+                                    ast_flags);
                 down(&inode->i_sem);
                 if (rc != ELDLM_OK)
                         RETURN(rc);
 
                 rc = vmtruncate(inode, attr->ia_size);
-                if (rc == 0)
-                        set_bit(LLI_F_HAVE_OST_SIZE_LOCK,
-                                &ll_i2info(inode)->lli_flags);
-
-                //ll_try_done_writing(inode);
 
+                /* We need to drop the semaphore here, because this unlock may
+                 * result in a cancellation, which will need the i_sem */
+                up(&inode->i_sem);
                 /* unlock now as we don't mind others file lockers racing with
                  * the mds updates below? */
                 err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
+                down(&inode->i_sem);
                 if (err) {
                         CERROR("ll_extent_unlock failed: %d\n", err);
                         if (!rc)
@@ -885,6 +965,8 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
                 RETURN(rc);
         }
 
+        osfs->os_type = sb->s_magic;
+
         CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
                osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
 
@@ -980,6 +1062,9 @@ void ll_update_inode(struct inode *inode, struct mds_body *body,
                                 LBUG();
                         }
                 }
+                /* bug 2844 - limit i_blksize for broken user-space apps */
+                LASSERTF(lsm->lsm_xfersize != 0, "%lu\n", lsm->lsm_xfersize);
+                inode->i_blksize = min(lsm->lsm_xfersize, LL_MAX_BLKSIZE);
                 if (lli->lli_smd != lsm)
                         obd_free_memmd(ll_i2obdexp(inode), &lsm);
         }
@@ -1015,7 +1100,7 @@ void ll_update_inode(struct inode *inode, struct mds_body *body,
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
                 inode->i_rdev = body->rdev;
 #else
-                inode->i_rdev = old_encode_dev(body->rdev);
+                inode->i_rdev = old_decode_dev(body->rdev);
 #endif
         if (body->valid & OBD_MD_FLSIZE)
                 inode->i_size = body->size;
@@ -1066,29 +1151,17 @@ void ll_read_inode2(struct inode *inode, void *opaque)
         } else {
                 inode->i_op = &ll_special_inode_operations;
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-                init_special_inode(inode, inode->i_mode, 
+                init_special_inode(inode, inode->i_mode,
                                    kdev_t_to_nr(inode->i_rdev));
 #else
                 init_special_inode(inode, inode->i_mode, inode->i_rdev);
-                
-                lli->ll_save_ifop = inode->i_fop;
-                if (S_ISCHR(inode->i_mode)) {
-                        inode->i_fop = &ll_special_chr_inode_fops;                                                         
-                }else if (S_ISBLK(inode->i_mode)) {
-                        inode->i_fop = &ll_special_blk_inode_fops; 
-                }else if (S_ISFIFO(inode->i_mode)){
-                        inode->i_fop = &ll_special_fifo_inode_fops;
-                }else if (S_ISSOCK(inode->i_mode)){ 
-                        inode->i_fop = &ll_special_sock_inode_fops;
-                }                                               
-                inode->i_fop->owner = lli->ll_save_ifop->owner;
 #endif
                 EXIT;
         }
 }
 
 int ll_iocontrol(struct inode *inode, struct file *file,
-                        unsigned int cmd, unsigned long arg)
+                 unsigned int cmd, unsigned long arg)
 {
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct ptlrpc_request *req = NULL;