# HG changeset patch
# User Soren Hauberg
# Date 1255976380 -7200
# Node ID 733359750f7c0eec057342aa9dd27a329e6e6c2a
# Parent 78ac37d735577196763c36249727557a1e9ce3da
scripts/io/strread.m scripts/io/textread.m: new functions
diff -r 78ac37d73557 -r 733359750f7c scripts/ChangeLog
--- a/scripts/ChangeLog Mon Oct 19 12:12:58 2009 +0200
+++ b/scripts/ChangeLog Mon Oct 19 20:19:40 2009 +0200
@@ -1,3 +1,7 @@
+2009-10-19 Soren Hauberg
+
+ * io/strread.m io/textread.m: New functions.
+
2009-10-15 David Bateman
* plot/__go_draw_axes__.m: Allow line and surface markerfacecolor
diff -r 78ac37d73557 -r 733359750f7c scripts/io/Makefile.in
--- a/scripts/io/Makefile.in Mon Oct 19 12:12:58 2009 +0200
+++ b/scripts/io/Makefile.in Mon Oct 19 20:19:40 2009 +0200
@@ -32,7 +32,7 @@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_DATA = @INSTALL_DATA@
-SOURCES = beep.m csvread.m csvwrite.m dlmwrite.m
+SOURCES = beep.m csvread.m csvwrite.m dlmwrite.m strread.m textread.m
DISTFILES = $(addprefix $(srcdir)/, Makefile.in $(SOURCES))
diff -r 78ac37d73557 -r 733359750f7c scripts/io/strread.m
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/io/strread.m Mon Oct 19 20:19:40 2009 +0200
@@ -0,0 +1,248 @@
+## Copyright (C) 2009 Eric Chassande-Mottin, CNRS (France)
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, see
+## .
+
+## -*- texinfo -*-
+## @deftypefn {Function File} address@hidden, @dots{}] =} strread (@var{str})
+## @deftypefnx {Function File} address@hidden, @dots{}] =} strread (@var{str}, @var{format})
+## @deftypefnx {Function File} address@hidden, @dots{}] =} strread (@var{str}, @var{format}, @
+## @var{prop1}, @var{value1}, @dots{})
+## Read data from a string.
+##
+## The string @var{str} is split into words that are repeatedly matched to the
+## specifiers in @var{format}. The first word is matched to the first specifier,
+## the second to the second specifier and so forth. If there are more words than
+## specifiers, the process is repeated until all words have been processed.
+##
+## The string @var{format} describes how the words in @var{str} should be parsed.
+## It may contain any combination of the following specifiers:
+## @table @code
+## @item %s
+## The word is parsed as a string.
+## @item %d
+## @itemx %f
+## The word is parsed as a number.
+## @item %*
+## The word is skipped.
+## @end table
+##
+## Parsed word corresponding to the first specifier are returned in the first
+## output argument and likewise for the rest of the specifiers.
+##
+## By default, @var{format} is @t{"%f"}, meaning that numbers are read from @var{str}.
+##
+## For example, the string
+##
+## @example
+## @group
+## @var{str} = "\
+## Bunny Bugs 5.5\n\
+## Duck Daffy -7.5e-5\n\
+## Penguin Tux 6"
+## @end group
+## @end example
+##
+## can be read using
+##
+## @example
+## address@hidden, @var{b}, @var{c}] = strread (@var{str}, "%s %s %f");
+## @end example
+##
+## The behaviour of @code{strread} can be changed via properties @var{prop}
+## @itemize @code
+## @item "headerlines":
+## The first @var{value} number of lines of @var{str} are skipped.
+## @item "commentstyle":
+## Parts of @var{str} are considered comments and will be skipped.
+## @var{value} is the comment style and can be any of the following.
+## @itemize
+## @item "shell"
+## Everything from @code{#} characters to the nearest end-line is skipped.
+## @item "c"
+## Everything between @code{/*} and @code{*/} is skipped.
+## @item "c++"
+## Everything from @code{//} characters to the nearest end-line is skipped.
+## @item "matlab"
+## Everything from @code{%} characters to the nearest end-line is skipped.
+## @end itemize
+## @item "delimiter"
+## Any character in @var{value} will be used to split @var{str} into words.
+## @item "emptyvalue"
+## Parts of the output where no word is available is filled with @var{value}.
+## @end itemize
+##
+## @seealso{textread, load, dlmread, fscanf}
+## @end deftypefn
+
+function varargout = strread (str, formatstr = "%f", varargin)
+ ## Check input
+ if (nargin < 1)
+ print_usage ();
+ endif
+
+ if (!ischar (str) || !ischar (str))
+ error ("strread: first and second input arguments must be strings");
+ endif
+
+ ## Parse options
+ comment_flag = false;
+ header_skip = 0;
+ numeric_fill_value = 0;
+ white_spaces = " \n\r\t\b";
+ for n = 1:2:length (varargin)
+ switch (lower (varargin {n}))
+ case "commentstyle"
+ comment_flag = true;
+ switch (lower (varargin {n+1}))
+ case "c"
+ comment_specif = {"/*", "*/"};
+ case "c++"
+ comment_specif = {"//", "\n"};
+ case "shell"
+ comment_specif = {"#", "\n"};
+ case "matlab"
+ comment_specif = {"%", "\n"};
+ otherwise
+ warning ("strread: unknown comment style '%s'", val);
+ endswitch
+ case "headerlines"
+ header_skip = varargin {n+1};
+ case "delimiter"
+ delimiter_str = varargin {n+1};
+ case "emptyvalue"
+ numeric_fill_value = varargin {n+1};
+ case "bufsize"
+ ## XXX: We could synthesize this, but that just seems weird...
+ warning ("strread: property \"bufsize\" is not implemented");
+ case "whitespace"
+ white_spaces = varargin {n+1};
+ case "expchars"
+ warning ("strread: property \"expchars\" is not implemented");
+ otherwise
+ warning ("strread: unknown property \"%s\"", varargin {n});
+ endswitch
+ endfor
+
+ ## Parse format string
+ idx = strfind (formatstr, "%")';
+ specif = formatstr ([idx, idx+1]);
+ nspecif = length (idx);
+ idx_star = strfind (formatstr, "%*");
+ nfields = length (idx) - length (idx_star);
+
+ if (max (nargout, 1) != nfields)
+ error ("strread: the number of output variables must match that of format specifiers");
+ endif
+
+ ## Remove header
+ if (header_skip > 0)
+ e = find (str == "\n", header_skip);
+ if (length (e) >= header_skip)
+ str = str (e (end)+1:end);
+ else
+ ## We don't have enough data so we discard it all
+ str = "";
+ endif
+ endif
+
+ ## Remove comments
+ if (comment_flag)
+ cstart = strfind (str, comment_specif{1});
+ cstop = strfind (str, comment_specif{2});
+ if (length (cstart) > 0)
+ ## Ignore nested openers.
+ [idx, cidx] = unique (lookup (cstop, cstart), "first");
+ if (idx(end) == length (cstop))
+ cidx(end) = []; # Drop the last one if orphaned.
+ endif
+ cstart = cstart(cidx);
+ endif
+ if (length (cstop) > 0)
+ ## Ignore nested closers.
+ [idx, cidx] = unique (lookup (cstart, cstop), "first");
+ if (idx(1) == 0)
+ cidx(1) = []; # Drop the first one if orphaned.
+ endif
+ cstop = cstop(cidx);
+ endif
+ len = length (str);
+ c2len = length (comment_specif{2});
+ str = cellslices (str, [1, cstop + c2len], [cstart - 1, len]);
+ str = [str{:}];
+ endif
+
+ ## Split 'str' into words
+ words = split_by (str, white_spaces);
+ num_words = numel (words);
+ num_lines = ceil (num_words / nspecif);
+
+ ## For each specifier
+ k = 1;
+ for m = 1:nspecif
+ data = words (m:nspecif:end);
+
+ ## Map to format
+ switch specif (m, :)
+ case "%s"
+ data (end+1:num_lines) = {""};
+ varargout {k} = data';
+ k++;
+ case {"%d", "%f"}
+ data = str2double (data);
+ data (end+1:num_lines) = numeric_fill_value;
+ varargout {k} = data.';
+ k++;
+ case "%*"
+ ## do nothing
+ endswitch
+ endfor
+endfunction
+
+function out = split_by (text, sep)
+ out = strtrim (strsplit (text, sep, true));
+endfunction
+
+%!test
+%! str = "# comment\n# comment\n1 2 3";
+%! [a, b] = strread (str, '%d %s', 'commentstyle', 'shell');
+%! assert (a, [1; 3]);
+%! assert (b, {"2"; ""});
+
+%!test
+%! str = '';
+%! a = rand (10, 1);
+%! b = char (round (65 + 20 * rand (10, 1)));
+%! for k = 1:10
+%! str = sprintf ('%s %.6f %s\n', str, a (k), b (k));
+%! endfor
+%! [aa, bb] = strread (str, '%f %s');
+%! assert (a, aa, 1e-5);
+%! assert (cellstr (b), bb);
+
+%!test
+%! str = '';
+%! a = rand (10, 1);
+%! b = char (round (65 + 20 * rand (10, 1)));
+%! for k = 1:10
+%! str = sprintf ('%s %.6f %s\n', str, a (k), b (k));
+%! endfor
+%! aa = strread (str, '%f %*s');
+%! assert (a, aa, 1e-5);
+
+%!test
+%! str = sprintf ('/* this is\nacomment*/ 1 2 3');
+%! a = strread (str, '%f', 'commentstyle', 'c');
+%! assert (a, [1; 2; 3]);
+
diff -r 78ac37d73557 -r 733359750f7c scripts/io/textread.m
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/io/textread.m Mon Oct 19 20:19:40 2009 +0200
@@ -0,0 +1,51 @@
+## Copyright (C) 2009 Eric Chassande-Mottin, CNRS (France)
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, see
+## .
+
+## -*- texinfo -*-
+## @deftypefn {Function File} address@hidden, @dots{}] =} textread (@var{filename})
+## @deftypefnx {Function File} address@hidden, @dots{}] =} textread (@var{filename}, @var{format})
+## @deftypefnx {Function File} address@hidden, @dots{}] =} textread (@var{filename}, @var{format}, @
+## @var{prop1}, @var{value1}, @dots{})
+## Read data from a text file.
+##
+## The file @var{filename} is read and parsed according to @var{format}. The
+## function behaves like @code{strread} except it works by parsing a file instead
+## of a string. See the documentation of @code{strread} for details.
+## @seealso{strread, load, dlmread, fscanf}
+## @end deftypefn
+
+function varargout = textread (filename, formatstr = "%f", varargin)
+ ## Check input
+ if (nargin < 1)
+ print_usage ();
+ endif
+
+ if (!ischar (filename) || !ischar (filename))
+ error ("textread: first and second input arguments must be strings");
+ endif
+
+ ## Read file
+ fid = fopen (filename, "r");
+ if (fid == -1)
+ error ("textread: could not open '%s' for reading", filename);
+ endif
+
+ str = char (fread (fid, "char")');
+ fclose (fid);
+
+ ## Call strread to make it do the real work
+ [varargout{1:max (nargout, 1)}] = strread (str, formatstr, varargin {:});
+endfunction