Whamcloud - gitweb
Change e2fsck to detect and offer to delete or rename duplicate
authorTheodore Ts'o <tytso@mit.edu>
Fri, 14 Mar 2003 06:43:56 +0000 (01:43 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Fri, 14 Mar 2003 06:43:56 +0000 (01:43 -0500)
filenames in directories when rebuilding directories using
"e2fsck -fD /dev/XXX"

e2fsck/ChangeLog
e2fsck/e2fsck.h
e2fsck/pass3.c
e2fsck/problem.c
e2fsck/problem.h
e2fsck/rehash.c
e2fsck/unix.c

index 792f71c..e74a622 100644 (file)
@@ -1,3 +1,23 @@
+2003-03-14  Theodore Ts'o  <tytso@mit.edu>
+
+       * rehash.c (duplicate_search_and_fix): Now search for duplicates
+               filenames, and either prompt to remove a complete
+               duplicate entry, or to rename a duplicate filename.
+               (e2fsck_rehash_dir): Use a progress bar to report
+               progress, and don't print all of the directory inodes as
+               they are optimized.
+
+       * problem.c, problem.h (PR_2_DUPLICATE_DIRENT,
+               PR_2_NON_UNIQUE_FILE):  New problem codes.
+       
+       * unix.c (e2fsck_simple_progress), e2fsck.h: New function which
+               can be called to provide specialized progress bars that
+               are not related to the top-level pass-based completion
+               percentage.
+
+       * pass3.c (e2fsck_adjust_inode_count), e2fsck.h: Export previously
+               static function.
+
 2003-03-06    <tytso@mit.edu>
 
        * e2fsck.8.in: Fix minor nit in the -C option.  (Addresses Debian
index 3526c71..67d4b95 100644 (file)
@@ -409,6 +409,8 @@ extern int e2fsck_reconnect_file(e2fsck_t ctx, ext2_ino_t inode);
 extern errcode_t e2fsck_expand_directory(e2fsck_t ctx, ext2_ino_t dir,
                                         int num, int gauranteed_size);
 extern ext2_ino_t e2fsck_get_lost_and_found(e2fsck_t ctx, int fix);
+extern errcode_t e2fsck_adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, 
+                                          int adj);
 
 
 /* region.c */
@@ -456,3 +458,5 @@ extern int ext2_file_type(unsigned int mode);
 
 /* unix.c */
 extern void e2fsck_clear_progbar(e2fsck_t ctx);
+extern int e2fsck_simple_progress(e2fsck_t ctx, char *label,
+                                 float percent, unsigned int dpynum);
index 1e68b1e..86f2e62 100644 (file)
@@ -45,7 +45,6 @@ static void check_root(e2fsck_t ctx);
 static int check_directory(e2fsck_t ctx, struct dir_info *dir,
                           struct problem_context *pctx);
 static void fix_dotdot(e2fsck_t ctx, struct dir_info *dir, ext2_ino_t parent);
-static errcode_t adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, int adj);
 
 static ext2fs_inode_bitmap inode_loop_detect = 0;
 static ext2fs_inode_bitmap inode_done_map = 0;
@@ -406,7 +405,7 @@ ext2_ino_t e2fsck_get_lost_and_found(e2fsck_t ctx, int fix)
                dirinfo = e2fsck_get_dir_info(ctx, ino);
                if (dirinfo)
                        dirinfo->parent = 0;
-               adjust_inode_count(ctx, ino, -1);
+               e2fsck_adjust_inode_count(ctx, ino, -1);
        } else if (retval != EXT2_ET_FILE_NOT_FOUND) {
                pctx.errcode = retval;
                fix_problem(ctx, PR_3_ERR_FIND_LPF, &pctx);
@@ -435,7 +434,7 @@ ext2_ino_t e2fsck_get_lost_and_found(e2fsck_t ctx, int fix)
        /*
         * Next find a free inode.
         */
-       retval = ext2fs_new_inode(fs, EXT2_ROOT_INO, 040755,
+       retval = ext2fs_new_inode(fs, EXT2_ROOT_INO, 040700,
                                  ctx->inode_used_map, &ino);
        if (retval) {
                pctx.errcode = retval;
@@ -498,7 +497,7 @@ ext2_ino_t e2fsck_get_lost_and_found(e2fsck_t ctx, int fix)
         * Miscellaneous bookkeeping that needs to be kept straight.
         */
        e2fsck_add_dir_info(ctx, ino, EXT2_ROOT_INO);
-       adjust_inode_count(ctx, EXT2_ROOT_INO, 1);
+       e2fsck_adjust_inode_count(ctx, EXT2_ROOT_INO, 1);
        ext2fs_icount_store(ctx->inode_count, ino, 2);
        ext2fs_icount_store(ctx->inode_link_info, ino, 2);
        ctx->lost_and_found = ino;
@@ -554,7 +553,7 @@ int e2fsck_reconnect_file(e2fsck_t ctx, ext2_ino_t ino)
                fix_problem(ctx, PR_3_CANT_RECONNECT, &pctx);
                return 1;
        }
-       adjust_inode_count(ctx, ino, 1);
+       e2fsck_adjust_inode_count(ctx, ino, 1);
 
        return 0;
 }
@@ -562,7 +561,7 @@ int e2fsck_reconnect_file(e2fsck_t ctx, ext2_ino_t ino)
 /*
  * Utility routine to adjust the inode counts on an inode.
  */
-static errcode_t adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, int adj)
+errcode_t e2fsck_adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, int adj)
 {
        ext2_filsys fs = ctx->fs;
        errcode_t               retval;
@@ -628,12 +627,12 @@ static int fix_dotdot_proc(struct ext2_dir_entry *dirent,
 
        clear_problem_context(&pctx);
        
-       retval = adjust_inode_count(fp->ctx, dirent->inode, -1);
+       retval = e2fsck_adjust_inode_count(fp->ctx, dirent->inode, -1);
        if (retval) {
                pctx.errcode = retval;
                fix_problem(fp->ctx, PR_3_ADJUST_INODE, &pctx);
        }
-       retval = adjust_inode_count(fp->ctx, fp->parent, 1);
+       retval = e2fsck_adjust_inode_count(fp->ctx, fp->parent, 1);
        if (retval) {
                pctx.errcode = retval;
                fix_problem(fp->ctx, PR_3_ADJUST_INODE, &pctx);
index 29cd462..3870fd4 100644 (file)
@@ -1087,6 +1087,16 @@ static const struct e2fsck_problem problem_table[] = {
          N_("@p @h %d: node (%B) has bad depth\n"),
          PROMPT_NONE, 0 },
        
+       /* Duplicate directory entry found */
+       { PR_2_DUPLICATE_DIRENT,
+         N_("Duplicate @E found.  "),
+         PROMPT_CLEAR, 0 },
+       
+       /* Non-unique filename found */
+       { PR_2_NON_UNIQUE_FILE,
+         N_("@E has a non-unique filename.\nRename to %s"),
+         PROMPT_NULL, 0 },
+       
        /* Pass 3 errors */
 
        /* Pass 3: Checking directory connectivity */
index f4a87d2..3c405ef 100644 (file)
@@ -643,7 +643,13 @@ struct problem_context {
 #define PR_2_HTREE_HASH_ORDER  0x02003F
 
 /* Node in HTREE directory has bad depth */
-#define PR_2_HTREE_BAD_DEPTH   0x020040        
+#define PR_2_HTREE_BAD_DEPTH   0x020040
+
+/* Duplicate directory entry found */
+#define PR_2_DUPLICATE_DIRENT  0x020041
+
+/* Non-unique filename found */
+#define PR_2_NON_UNIQUE_FILE   0x020042
 
 /*
  * Pass 3 errors
index d8bfb40..9c185b4 100644 (file)
@@ -157,20 +157,22 @@ static int fill_dir_block(ext2_filsys fs,
 }
 
 /* Used for sorting the hash entry */
-static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
+static EXT2_QSORT_TYPE name_cmp(const void *a, const void *b)
 {
        const struct hash_entry *he_a = (const struct hash_entry *) a;
        const struct hash_entry *he_b = (const struct hash_entry *) b;
        int     ret;
-       
-       if (he_a->hash > he_b->hash)
-               ret = 1;
-       else if (he_a->hash < he_b->hash)
-               ret = -1;
-       else {
-               if (he_a->minor_hash > he_b->minor_hash)
+       int     min_len;
+
+       min_len = he_a->dir->name_len;
+       if (min_len > he_b->dir->name_len)
+               min_len = he_b->dir->name_len;
+
+       ret = strncmp(he_a->dir->name, he_b->dir->name, min_len);
+       if (ret == 0) {
+               if (he_a->dir->name_len > he_b->dir->name_len)
                        ret = 1;
-               else if (he_a->minor_hash < he_b->minor_hash)
+               else if (he_a->dir->name_len < he_b->dir->name_len)
                        ret = -1;
                else
                        ret = 0;
@@ -179,25 +181,23 @@ static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
 }
 
 /* Used for sorting the hash entry */
-static EXT2_QSORT_TYPE name_cmp(const void *a, const void *b)
+static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
 {
        const struct hash_entry *he_a = (const struct hash_entry *) a;
        const struct hash_entry *he_b = (const struct hash_entry *) b;
        int     ret;
-       int     min_len;
-
-       min_len = he_a->dir->name_len;
-       if (min_len > he_b->dir->name_len)
-               min_len = he_b->dir->name_len;
-
-       ret = strncmp(he_a->dir->name, he_b->dir->name, min_len);
-       if (ret == 0) {
-               if (he_a->dir->name_len > he_b->dir->name_len)
+       
+       if (he_a->hash > he_b->hash)
+               ret = 1;
+       else if (he_a->hash < he_b->hash)
+               ret = -1;
+       else {
+               if (he_a->minor_hash > he_b->minor_hash)
                        ret = 1;
-               else if (he_a->dir->name_len < he_b->dir->name_len)
+               else if (he_a->minor_hash < he_b->minor_hash)
                        ret = -1;
                else
-                       ret = 0;
+                       ret = name_cmp(a, b);
        }
        return ret;
 }
@@ -249,6 +249,127 @@ static errcode_t get_next_block(ext2_filsys fs, struct out_dir *outdir,
        return 0;
 }
 
+/*
+ * This function is used to make a unique filename.  We do this by
+ * appending ~0, and then incrementing the number.  However, we cannot
+ * expand the length of the filename beyond the padding available in
+ * the directory entry.
+ */
+static void mutate_name(char *str, __u16 *len)
+{
+       int     i;
+       __u16   l = *len & 0xFF, h = *len & 0xff00;
+       
+       /*
+        * First check to see if it looks the name has been mutated
+        * already
+        */
+       for (i = l-1; i > 0; i--) {
+               if (!isdigit(str[i]))
+                       break;
+       }
+       if ((i == l-1) || (str[i] != '~')) {
+               if (((l-1) & 3) < 2)
+                       l += 2;
+               else
+                       l = (l+3) & ~3;
+               str[l-2] = '~';
+               str[l-1] = '0';
+               *len = l | h;
+               return;
+       }
+       for (i = l-1; i >= 0; i--) {
+               if (isdigit(str[i])) {
+                       if (str[i] == '9')
+                               str[i] = '0';
+                       else {
+                               str[i]++;
+                               return;
+                       }
+                       continue;
+               }
+               if (i == 1) {
+                       if (str[0] == 'z')
+                               str[0] = 'A';
+                       else if (str[0] == 'Z') {
+                               str[0] = '~';
+                               str[1] = '0';
+                       } else
+                               str[0]++;
+               } else if (i > 0) {
+                       str[i] = '1';
+                       str[i-1] = '~';
+               } else {
+                       if (str[0] == '~')
+                               str[0] = 'a';
+                       else 
+                               str[0]++;
+               }
+               break;
+       }
+}
+
+static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
+                                   ext2_ino_t ino,
+                                   struct fill_dir_struct *fd)
+{
+       struct problem_context  pctx;
+       struct hash_entry       *ent, *prev;
+       int                     i, j;
+       int                     fixed = 0;
+       char                    new_name[256];
+       __u16                   new_len;
+       
+       clear_problem_context(&pctx);
+       pctx.ino = ino;
+
+       for (i=1; i < fd->num_array; i++) {
+               ent = fd->harray + i;
+               prev = ent - 1;
+               if (!ent->dir->inode ||
+                   ((ent->dir->name_len & 0xFF) !=
+                    (prev->dir->name_len & 0xFF)) ||
+                   (strncmp(ent->dir->name, prev->dir->name,
+                            ent->dir->name_len & 0xFF)))
+                       continue;
+               pctx.dirent = ent->dir;
+               if ((ent->dir->inode == prev->dir->inode) &&
+                   fix_problem(ctx, PR_2_DUPLICATE_DIRENT, &pctx)) {
+                       e2fsck_adjust_inode_count(ctx, ent->dir->inode, -1);
+                       ent->dir->inode = 0;
+                       fixed++;
+                       continue;
+               }
+               memcpy(new_name, ent->dir->name, ent->dir->name_len & 0xFF);
+               new_len = ent->dir->name_len;
+               mutate_name(new_name, &new_len);
+               for (j=0; j < fd->num_array; j++) {
+                       if ((i==j) ||
+                           ((ent->dir->name_len & 0xFF) !=
+                            (fd->harray[j].dir->name_len & 0xFF)) ||
+                           (strncmp(new_name, fd->harray[j].dir->name,
+                                    new_len & 0xFF)))
+                               continue;
+                       mutate_name(new_name, &new_len);
+                       
+                       j = -1;
+               }
+               new_name[new_len & 0xFF] = 0;
+               pctx.str = new_name;
+               if (fix_problem(ctx, PR_2_NON_UNIQUE_FILE, &pctx)) {
+                       memcpy(ent->dir->name, new_name, new_len & 0xFF);
+                       ent->dir->name_len = new_len;
+                       ext2fs_dirhash(fs->super->s_def_hash_version,
+                                      ent->dir->name,
+                                      ent->dir->name_len & 0xFF,
+                                      fs->super->s_hash_seed,
+                                      &ent->hash, &ent->minor_hash);
+                       fixed++;
+               }
+       }
+       return fixed;
+}
+
 
 static errcode_t copy_dir_entries(ext2_filsys fs,
                                  struct fill_dir_struct *fd,
@@ -277,6 +398,8 @@ static errcode_t copy_dir_entries(ext2_filsys fs,
        left = fs->blocksize;
        for (i=0; i < fd->num_array; i++) {
                ent = fd->harray + i;
+               if (ent->dir->inode == 0)
+                       continue;
                rec_len = EXT2_DIR_REC_LEN(ent->dir->name_len & 0xFF);
                if (rec_len > left) {
                        if (left)
@@ -575,6 +698,7 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino)
 #endif
 
        /* Sort the list */
+resort:
        if (fd.compress)
                qsort(fd.harray+2, fd.num_array-2,
                      sizeof(struct hash_entry), name_cmp);
@@ -583,6 +707,12 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino)
                      sizeof(struct hash_entry), hash_cmp);
 
        /*
+        * Look for duplicates
+        */
+       if (duplicate_search_and_fix(ctx, fs, ino, &fd))
+               goto resort;
+
+       /*
         * Copy the directory entries.  In a htree directory these
         * will become the leaf nodes.
         */
@@ -623,7 +753,7 @@ void e2fsck_rehash_directories(e2fsck_t ctx)
        ext2_u32_iterate        iter;
        ext2_ino_t              ino;
        errcode_t               retval;
-       int                     i, all_dirs, dir_index, first = 1;
+       int                     i, cur, max, all_dirs, dir_index, first = 1;
 
 #ifdef RESOURCE_TRACK
        init_resource_track(&rtrack);
@@ -639,9 +769,11 @@ void e2fsck_rehash_directories(e2fsck_t ctx)
        clear_problem_context(&pctx);
 
        dir_index = ctx->fs->super->s_feature_compat & EXT2_FEATURE_COMPAT_DIR_INDEX;
-       if (all_dirs)
+       cur = 0;
+       if (all_dirs) {
                i = 0;
-       else {
+               max = e2fsck_get_num_dirinfo(ctx);
+       } else {
                retval = ext2fs_u32_list_iterate_begin(ctx->dirs_to_hash, 
                                                       &iter);
                if (retval) {
@@ -649,6 +781,7 @@ void e2fsck_rehash_directories(e2fsck_t ctx)
                        fix_problem(ctx, PR_3A_OPTIMIZE_ITER, &pctx);
                        return;
                }
+               max = ext2fs_u32_list_count(ctx->dirs_to_hash);
        }
        while (1) {
                if (all_dirs) {
@@ -666,12 +799,16 @@ void e2fsck_rehash_directories(e2fsck_t ctx)
                        fix_problem(ctx, PR_3A_PASS_HEADER, &pctx);
                        first = 0;
                }
+#if 0
                fix_problem(ctx, PR_3A_OPTIMIZE_DIR, &pctx);
+#endif
                pctx.errcode = e2fsck_rehash_dir(ctx, ino);
                if (pctx.errcode) {
                        end_problem_latch(ctx, PR_LATCH_OPTIMIZE_DIR);
                        fix_problem(ctx, PR_3A_OPTIMIZE_DIR_ERR, &pctx);
                }
+               e2fsck_simple_progress(ctx, "Rebuilding directory",
+                                      (float) (++cur) / (float) max, ino);
        }
        end_problem_latch(ctx, PR_LATCH_OPTIMIZE_DIR);
        if (!all_dirs)
index ec1821a..492ba4a 100644 (file)
@@ -328,6 +328,71 @@ extern void e2fsck_clear_progbar(e2fsck_t ctx)
        ctx->flags &= ~E2F_FLAG_PROG_BAR;
 }
 
+int e2fsck_simple_progress(e2fsck_t ctx, char *label, float percent,
+                          unsigned int dpynum)
+{
+       static const char spinner[] = "\\|/-";
+       char buf[80];
+       int     i;
+       int     tick;
+       struct timeval  tv;
+       int dpywidth;
+
+       if (ctx->flags & E2F_FLAG_PROG_SUPPRESS)
+               return 0;
+
+       /*
+        * Calculate the new progress position.  If the
+        * percentage hasn't changed, then we skip out right
+        * away. 
+        */
+       if (ctx->progress_last_percent == (int) 10 * percent)
+               return 0;
+       ctx->progress_last_percent = (int) 10 * percent;
+
+       /*
+        * If we've already updated the spinner once within
+        * the last 1/8th of a second, no point doing it
+        * again.
+        */
+       gettimeofday(&tv, NULL);
+       tick = (tv.tv_sec << 3) + (tv.tv_usec / (1000000 / 8));
+       if ((tick == ctx->progress_last_time) &&
+           (percent != 0.0) && (percent != 100.0))
+               return 0;
+       ctx->progress_last_time = tick;
+
+       /*
+        * Advance the spinner, and note that the progress bar
+        * will be on the screen
+        */
+       ctx->progress_pos = (ctx->progress_pos+1) & 3;
+       ctx->flags |= E2F_FLAG_PROG_BAR;
+
+       dpywidth = 66 - strlen(label);
+       dpywidth = 8 * (dpywidth / 8);
+       if (dpynum)
+               dpywidth -= 8;
+
+       i = ((percent * dpywidth) + 50) / 100;
+       printf("%s: |%s%s", label, bar + (sizeof(bar) - (i+1)),
+              spaces + (sizeof(spaces) - (dpywidth - i + 1)));
+       if (percent == 100.0)
+               fputc('|', stdout);
+       else
+               fputc(spinner[ctx->progress_pos & 3], stdout);
+       if (dpynum)
+               printf(" %4.1f%%  %u\r", percent, dpynum);
+       else
+               printf(" %4.1f%%   \r", percent);
+       
+       if (percent == 100.0)
+               e2fsck_clear_progbar(ctx);
+       fflush(stdout);
+
+       return 0;
+}
+
 static int e2fsck_update_progress(e2fsck_t ctx, int pass,
                                  unsigned long cur, unsigned long max)
 {
@@ -346,53 +411,9 @@ static int e2fsck_update_progress(e2fsck_t ctx, int pass,
                sprintf(buf, "%d %lu %lu\n", pass, cur, max);
                write(ctx->progress_fd, buf, strlen(buf));
        } else {
-               if (ctx->flags & E2F_FLAG_PROG_SUPPRESS)
-                       return 0;
-               if (dpywidth == 0) {
-                       dpywidth = 66 - strlen(ctx->device_name);
-                       dpywidth = 8 * (dpywidth / 8);
-               }
-               /*
-                * Calculate the new progress position.  If the
-                * percentage hasn't changed, then we skip out right
-                * away. 
-                */
                percent = calc_percent(&e2fsck_tbl, pass, cur, max);
-               if (ctx->progress_last_percent == (int) 10 * percent)
-                       return 0;
-               ctx->progress_last_percent = (int) 10 * percent;
-
-               /*
-                * If we've already updated the spinner once within
-                * the last 1/8th of a second, no point doing it
-                * again.
-                */
-               gettimeofday(&tv, NULL);
-               tick = (tv.tv_sec << 3) + (tv.tv_usec / (1000000 / 8));
-               if ((tick == ctx->progress_last_time) &&
-                   (cur != max) && (cur != 0))
-                       return 0;
-               ctx->progress_last_time = tick;
-
-               /*
-                * Advance the spinner, and note that the progress bar
-                * will be on the screen
-                */
-               ctx->progress_pos = (ctx->progress_pos+1) & 3;
-               ctx->flags |= E2F_FLAG_PROG_BAR;
-               
-               i = ((percent * dpywidth) + 50) / 100;
-               printf("%s: |%s%s", ctx->device_name,
-                      bar + (sizeof(bar) - (i+1)),
-                      spaces + (sizeof(spaces) - (dpywidth - i + 1)));
-               if (percent == 100.0)
-                       fputc('|', stdout);
-               else
-                       fputc(spinner[ctx->progress_pos & 3], stdout);
-               printf(" %4.1f%%   \r", percent);
-               if (percent == 100.0)
-                       e2fsck_clear_progbar(ctx);
-               fflush(stdout);
+               e2fsck_simple_progress(ctx, ctx->device_name,
+                                      percent, 0);
        }
        return 0;
 }