>From 31658599d6ff1cd3f02b8ff4c7473d44d9da76a9 Mon Sep 17 00:00:00 2001 From: Pavel Raiskup Date: Fri, 11 Jul 2014 07:36:58 +0200 Subject: [PATCH 1/3] tar: avoid recursion in directory traversal This commit avoids unnecessary recursion from dump_file() call. Directory "stack" is created on heap space instead to avoid segfaults in case of rarely deep directory structures. Note that the recursion still exists in case of incremental archives. Relevant tread: http://www.mail-archive.com/address@hidden/msg04542.html * src/create.c (dump_dir0): Rename to dump_dir as the wrapper itself caused that additional calls to free() had to exist. (dump_dir): Return bool. Use the tour_t context and call tour_plan_dir/tour_plan_file to emulate recursion from now. (open_failure_recover): Rather dup() (sensitively) the directory descriptor for fdopendir() purposes which allows us to save a lot of memory in case of deep dir structures. (create_archive): Use dump_file_incr for incremental dumps and dump_member for usual dumps instead of dump_file in general. (dump_file0): Reuse tour_t context. Let the tar_stat_close on "tour" mechanism. (dump_file): Rename to dump_member. (dump_member): The member name parameter is enough from now. Use the tour_t & tour_* handlers. * update.c (update_archive): Call dump_member from now. * src/tour.c: New file. * src/common.h: Export needed symbols from xlist.c. * gnulib.modules: Use array-list and xlist. * src/Makefile.am: Compile also tour.c file. * NEWS: Document. --- NEWS | 4 + gnulib.modules | 2 + src/Makefile.am | 1 + src/common.h | 31 +++++++- src/create.c | 150 +++++++++++++++++------------------- src/tour.c | 234 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/unlink.c | 6 +- src/update.c | 4 +- 8 files changed, 347 insertions(+), 85 deletions(-) create mode 100644 src/tour.c diff --git a/NEWS b/NEWS index 2dd9f19..616ec02 100644 --- a/NEWS +++ b/NEWS @@ -111,6 +111,10 @@ speed up archivation. * Tar refuses to read input from and write output to a tty device. +* Tar avoids calling recursive rutines for directory traversal (for usual + scenarios) to save stack-space and thus avoid segfaults for rarely deep + archived directories. + * Manpages This release includes official tar(1) and rmt(8) manpages. diff --git a/gnulib.modules b/gnulib.modules index fe5ab73..f2ce146 100644 --- a/gnulib.modules +++ b/gnulib.modules @@ -23,6 +23,7 @@ areadlinkat-with-size argmatch argp argp-version-etc +array-list backupfile closeout configmake @@ -102,5 +103,6 @@ version-etc-fsf xalloc xalloc-die xgetcwd +xlist xstrtoumax xvasprintf diff --git a/src/Makefile.am b/src/Makefile.am index a0bacfd..1ab218c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -40,6 +40,7 @@ tar_SOURCES = \ suffix.c\ system.c\ tar.c\ + tour.c\ transform.c\ unlink.c\ update.c\ diff --git a/src/common.h b/src/common.h index 72ad4c1..8c69f29 100644 --- a/src/common.h +++ b/src/common.h @@ -484,7 +484,8 @@ char *get_directory_entries (struct tar_stat_info *st); void create_archive (void); void pad_archive (off_t size_left); -void dump_file (struct tar_stat_info *parent, char const *name, +void dump_member (char const *member); +void dump_file_incr (struct tar_stat_info *parent, char const *name, char const *fullname); union block *start_header (struct tar_stat_info *st); void finish_header (struct tar_stat_info *st, union block *header, @@ -958,5 +959,33 @@ int owner_map_translate (uid_t uid, uid_t *new_uid, char const **new_name); void group_map_read (char const *file); int group_map_translate (gid_t gid, gid_t *new_gid, char const **new_name); +/* Module tour.c */ + +/* hide TOUR definition in tour.c */ +typedef struct tour *tour_t; + +/* One tour_node_t represents single directory level in FS hierarchy, + i.e. list of files. At one moment we keep in memory only one path + representation. */ +typedef struct +{ + struct tar_stat_info *parent; /* used to fill child's stat info */ + char *items; /* output from readdir */ + + /* per-directory volatile data to avoid reallocation for each file in + * directory */ + const char *item; /* processed filename (ptr into ITEMS) */ + struct tar_stat_info st; /* stat of processed file */ + char *namebuf; /* buffer for constructing full name */ + size_t buflen; /* length of NAMEBUF */ +} tour_node_t; + +tour_t tour_init (const char *, struct tar_stat_info *); +bool tour_next (tour_t, const char **, const char **); +tour_node_t *tour_current (tour_t); +void tour_plan_dir (tour_t, char *); +void tour_plan_file (tour_t, const char *); +bool tour_has_child (tour_t t); +void tour_free (tour_t t); _GL_INLINE_HEADER_END diff --git a/src/create.c b/src/create.c index b7c19ab..edb9ca2 100644 --- a/src/create.c +++ b/src/create.c @@ -1,7 +1,7 @@ /* Create a tar archive. Copyright 1985, 1992-1994, 1996-1997, 1999-2001, 2003-2007, - 2009-2010, 2012-2014 Free Software Foundation, Inc. + 2009-2010, 2012-2015 Free Software Foundation, Inc. This file is part of GNU tar. @@ -1097,12 +1097,15 @@ dump_regular_file (int fd, struct tar_stat_info *st) } -/* Copy info from the directory identified by ST into the archive. - DIRECTORY contains the directory's entries. */ - -static void -dump_dir0 (struct tar_stat_info *st, char const *directory) +/* Dump currently precessed directory in T. Return true if successful, + false (emitting diagnostics) otherwise. Get ST's entries, recurse + through its subfiles, and clean up file descriptors afterwards. + */ +static bool +dump_dir (tour_t t) { + tour_node_t *n = tour_current (t); + struct tar_stat_info *st = &n->st; bool top_level = ! st->parent; const char *tag_file_name; union block *blk = NULL; @@ -1112,7 +1115,7 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) blk = start_header (st); if (!blk) - return; + return false; info_attach_exclist (st); @@ -1167,11 +1170,11 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) set_next_block_after (blk + (bufsize - 1) / BLOCKSIZE); } } - return; + return true; } if (!recursion_option) - return; + return true; if (one_file_system_option && !top_level @@ -1185,9 +1188,6 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) } else { - char *name_buf; - size_t name_size; - switch (check_exclusion_tags (st, &tag_file_name)) { case exclusion_tag_all: @@ -1196,40 +1196,20 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) case exclusion_tag_none: { - char const *entry; - size_t entry_len; - size_t name_len; - - name_buf = xstrdup (st->orig_file_name); - name_size = name_len = strlen (name_buf); - - /* Now output all the files in the directory. */ - for (entry = directory; (entry_len = strlen (entry)) != 0; - entry += entry_len + 1) - { - if (name_size < name_len + entry_len) - { - name_size = name_len + entry_len; - name_buf = xrealloc (name_buf, name_size + 1); - } - strcpy (name_buf + name_len, entry); - if (!excluded_name (name_buf, st)) - dump_file (st, entry, name_buf); - } - - free (name_buf); + char *directory = get_directory_entries (st); + if (! directory) + { + savedir_diag (st->orig_file_name); + return false; + } + tour_plan_dir (t, directory); } break; case exclusion_tag_contents: exclusion_tag_warning (st->orig_file_name, tag_file_name, _("contents not dumped")); - name_size = strlen (st->orig_file_name) + strlen (tag_file_name) + 1; - name_buf = xmalloc (name_size); - strcpy (name_buf, st->orig_file_name); - strcat (name_buf, tag_file_name); - dump_file (st, tag_file_name, name_buf); - free (name_buf); + tour_plan_file (t, tag_file_name); break; case exclusion_tag_under: @@ -1238,6 +1218,7 @@ dump_dir0 (struct tar_stat_info *st, char const *directory) break; } } + return true; } /* Ensure exactly one trailing slash. */ @@ -1288,30 +1269,19 @@ open_failure_recover (struct tar_stat_info const *dir) char * get_directory_entries (struct tar_stat_info *st) { - while (! (st->dirstream = fdopendir (st->fd))) + int newfd; + while ((newfd = dup (st->fd)) == -1) if (! open_failure_recover (st)) return 0; - return streamsavedir (st->dirstream, savedir_sort_order); -} -/* Dump the directory ST. Return true if successful, false (emitting - diagnostics) otherwise. Get ST's entries, recurse through its - subdirectories, and clean up file descriptors afterwards. */ -static bool -dump_dir (struct tar_stat_info *st) -{ - char *directory = get_directory_entries (st); - if (! directory) - { - savedir_diag (st->orig_file_name); - return false; - } - - dump_dir0 (st, directory); + while (! (st->dirstream = fdopendir (newfd))) + if (! open_failure_recover (st)) + return 0; - restore_parent_fd (st); - free (directory); - return true; + char *x = streamsavedir (st->dirstream, savedir_sort_order); + closedir (st->dirstream); + st->dirstream = 0; + return x; } @@ -1343,7 +1313,7 @@ create_archive (void) while ((p = name_from_list ()) != NULL) if (!excluded_name (p->name, NULL)) - dump_file (0, p->name, p->name); + dump_file_incr (0, p->name, p->name); blank_name_list (); while ((p = name_from_list ()) != NULL) @@ -1392,7 +1362,7 @@ create_archive (void) buffer = xrealloc (buffer, buffer_size); } strcpy (buffer + plen, q + 1); - dump_file (&st, q + 1, buffer); + dump_file_incr (&st, q + 1, buffer); } q += qlen + 1; } @@ -1405,7 +1375,7 @@ create_archive (void) const char *name; while ((name = name_next (1)) != NULL) if (!excluded_name (name, NULL)) - dump_file (0, name, name); + dump_member (name); } write_eot (); @@ -1630,9 +1600,13 @@ restore_parent_fd (struct tar_stat_info const *st) /* FIXME: One should make sure that for *every* path leading to setting exit_status to failure, a clear diagnostic has been issued. */ +#include static void -dump_file0 (struct tar_stat_info *st, char const *name, char const *p) +dump_file0 (tour_t t, char const *name, char const *p) { + tour_node_t *n = tour_current (t); + assert (n); + struct tar_stat_info *st = &n->st; union block *header; char type; off_t original_size; @@ -1640,7 +1614,7 @@ dump_file0 (struct tar_stat_info *st, char const *name, char const *p) off_t block_ordinal = -1; int fd = 0; bool is_dir; - struct tar_stat_info const *parent = st->parent; + struct tar_stat_info const *parent = st->parent = n->parent; bool top_level = ! parent; int parentfd = top_level ? chdir_fd : parent->fd; void (*diag) (char const *) = 0; @@ -1751,7 +1725,7 @@ dump_file0 (struct tar_stat_info *st, char const *name, char const *p) return; } - ok = dump_dir (st); + ok = dump_dir (t); fd = st->fd; parentfd = top_level ? chdir_fd : parent->fd; @@ -1829,7 +1803,6 @@ dump_file0 (struct tar_stat_info *st, char const *name, char const *p) utime_error (p); } - ok &= tar_stat_close (st); if (ok && remove_files_option) queue_deferred_unlink (p, is_dir); @@ -1936,20 +1909,37 @@ dump_file0 (struct tar_stat_info *st, char const *name, char const *p) queue_deferred_unlink (p, false); } -/* Dump a file, recursively. PARENT describes the file's parent - directory, NAME is the file's name relative to PARENT, and FULLNAME - its full name, possibly relative to the working directory. NAME - may contain slashes at the top level of invocation. */ - +/* Dump a file, recursively. The local TRINFO keeps gradually enlarging stack + of files to be processed in case of directories are present in processed path + starting from MEMBER. */ void -dump_file (struct tar_stat_info *parent, char const *name, - char const *fullname) +dump_member (char const *member) { - struct tar_stat_info st; - tar_stat_init (&st); - st.parent = parent; - dump_file0 (&st, name, fullname); - if (parent && listed_incremental_option) + const char *fullname; + const char *name; + + tour_t t = tour_init (member, 0); + while (tour_next (t, &name, &fullname)) + { + tour_node_t *n = tour_current (t); + if (n->parent && excluded_name (fullname, n->parent)) + continue; + + dump_file0 (t, name, fullname); + } + tour_free (t); +} + +void dump_file_incr (struct tar_stat_info *parent, char const *name, + const char *fullname) +{ + /* one-shot traversal; we are in incremental mode so no sub-folders are going + to be added to tour_t directory stack */ + tour_t t = tour_init (name, parent); + + dump_file0 (t, name, fullname); + if (parent) update_parent_directory (parent); - tar_stat_destroy (&st); + + tour_free (t); } diff --git a/src/tour.c b/src/tour.c new file mode 100644 index 0000000..a53e09d --- /dev/null +++ b/src/tour.c @@ -0,0 +1,234 @@ +/* Create a tar archive. + + Copyright 2015 Free Software Foundation, Inc. + + This file is part of GNU tar. + + GNU tar is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + GNU tar is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Routines making the directory traversal without function recursion. This + avoids segfaults due to small stack space for rarely deep directory + hierarchies. + + Written by Pavel Raiskup , on 2014-07-15. */ + + +#include + +#include "common.h" + +#include +#include +#include + +struct tour +{ + gl_list_t list; + gl_list_node_t current; +}; + + +static tour_node_t * +tour_node_new (void) +{ + tour_node_t *node = xzalloc (sizeof (tour_node_t)); + tar_stat_init (&node->st); + return node; +} + +tour_t +tour_init (const char *initial_name, struct tar_stat_info * parent) +{ + size_t len = strlen (initial_name); + tour_node_t *node; + tour_t t; + + node = tour_node_new (); + node->items = xmalloc (len + 2); + node->items[len + 1] = 0; /* double 0 */ + strcpy (node->items, initial_name); + node->parent = parent; + + /* current item is the first one */ + node->item = node->items; + + t = xzalloc (sizeof (struct tour)); + t->list = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, 1); + + t->current = gl_list_add_last (t->list, node); + return t; +} + +void +tour_plan_dir (tour_t t, char *names) +{ + tour_node_t *node, *curr; + + curr = tour_current (t); + + if (strlen (names) == 0) + { + /* no child planned */ + free (names); + return; + } + + node = tour_node_new (); + node->items = names; + node->parent = &curr->st; + node->item = node->items; + + gl_list_add_last (t->list, node); +} + +void +tour_plan_file (tour_t t, const char *name) +{ + size_t nlen = strlen (name); + char *dirlist = xmalloc (nlen + 2); + dirlist[nlen + 1] = 0; + strcpy (dirlist, name); + + tour_plan_dir (t, dirlist); +} + +tour_node_t * +tour_current (tour_t t) +{ + if (!t->current) + return NULL; + return (tour_node_t *) gl_list_node_value (t->list, t->current); +} + +bool +tour_has_child (tour_t t) +{ + return !!gl_list_next_node (t->list, t->current); +} + +static tour_node_t * +tour_next_node (tour_t t) +{ + gl_list_node_t next = gl_list_next_node (t->list, t->current); + if (!next) + return NULL; + + t->current = next; + return (tour_node_t *) gl_list_node_value (t->list, next); +} + +static void +tour_destroy_node (tour_node_t * n) +{ + tar_stat_destroy (&n->st); + free (n->namebuf); + free (n->items); + free (n); +} + +static tour_node_t * +tour_prev_node (tour_t t) +{ + tour_node_t *data = tour_current (t); + + /* we are going to drop current node -> make sure parent + fd is opened */ + if (data->parent) + restore_parent_fd (data->parent); + + gl_list_node_t n = gl_list_previous_node (t->list, t->current);; + gl_list_remove_node (t->list, t->current); + t->current = n; + + data->st.parent = data->parent; + + tour_destroy_node (data); + + return tour_current (t); +} + +/* Obtain the next NAME and FULLNAME (full path) of file in tour_t to bue dumped + (in correct order). */ +bool +tour_next (tour_t t, const char **name, const char **fullname) +{ + size_t parent_nlen = 0, nlen = 0, len = 0; + tour_node_t *curr, *next; + + curr = tour_current (t); + + /* go to the next node when exists (go down to subfolder) */ + if ((next = tour_next_node (t))) + curr = next; + + /* when ended up the current directory, try to go up in directory */ + while (!curr->item[0]) + { + if ((curr = tour_prev_node (t)) == NULL) + /* last member-item has been processed */ + return false; + } + + /* now curr points to correct node/item */ + + /* re-init ST, TODO: here is good place to save some malloc/free calls? */ + tar_stat_destroy (&curr->st); + curr->st.parent = curr->parent; + + nlen = strlen (curr->item); + if (curr->parent) + parent_nlen = strlen (curr->parent->orig_file_name); + + len = nlen + parent_nlen + 1; + + if (!curr->namebuf) + { + /* first item of particular tour_node_t is handled */ + curr->namebuf = xmalloc (len); + if (curr->parent) + strcpy (curr->namebuf, curr->parent->orig_file_name); + curr->buflen = len; + } + else if (curr->buflen < len) + { + curr->namebuf = xrealloc (curr->namebuf, len); + curr->buflen = len; + } + + strcpy (curr->namebuf + parent_nlen, curr->item); + *name = curr->item; + if (fullname) + *fullname = curr->namebuf; + + /* move the pointer for successive call tour () call */ + curr->item += nlen + 1; + + return true; +} + +void +tour_free (tour_t t) +{ + const void *data; + + gl_list_iterator_t it = gl_list_iterator (t->list); + while (gl_list_iterator_next (&it, &data, NULL)) + { + tour_node_t *n = (tour_node_t *) data; + tour_destroy_node (n); + } + + gl_list_free (t->list); + free (t); +} diff --git a/src/unlink.c b/src/unlink.c index 509daf3..7566c40 100644 --- a/src/unlink.c +++ b/src/unlink.c @@ -1,6 +1,6 @@ /* Unlink files. - Copyright 2009, 2013-2014 Free Software Foundation, Inc. + Copyright 2009, 2013-2015 Free Software Foundation, Inc. This file is part of GNU tar. @@ -46,7 +46,9 @@ static size_t dunlink_count; static struct deferred_unlink *dunlink_avail; /* Delay (number of records written) between adding entry to the - list and its actual removal. */ + list and its actual removal. + TODO: remove? (unused currently) + */ static size_t deferred_unlink_delay = 0; static struct deferred_unlink * diff --git a/src/update.c b/src/update.c index c8fca0c..ce12b60 100644 --- a/src/update.c +++ b/src/update.c @@ -1,7 +1,7 @@ /* Update a tar archive. Copyright 1988, 1992, 1994, 1996-1997, 1999-2001, 2003-2005, 2007, - 2010, 2013-2014 Free Software Foundation, Inc. + 2010, 2013-2015 Free Software Foundation, Inc. This file is part of GNU tar. @@ -223,7 +223,7 @@ update_archive (void) if (subcommand_option == CAT_SUBCOMMAND) append_file (file_name); else - dump_file (0, file_name, file_name); + dump_member (file_name); } } -- 2.5.0