# HG changeset patch # User Soren Hauberg # Date 1255976380 -7200 # Node ID 733359750f7c0eec057342aa9dd27a329e6e6c2a # Parent 78ac37d735577196763c36249727557a1e9ce3da scripts/io/strread.m scripts/io/textread.m: new functions diff -r 78ac37d73557 -r 733359750f7c scripts/ChangeLog --- a/scripts/ChangeLog Mon Oct 19 12:12:58 2009 +0200 +++ b/scripts/ChangeLog Mon Oct 19 20:19:40 2009 +0200 @@ -1,3 +1,7 @@ +2009-10-19 Soren Hauberg + + * io/strread.m io/textread.m: New functions. + 2009-10-15 David Bateman * plot/__go_draw_axes__.m: Allow line and surface markerfacecolor diff -r 78ac37d73557 -r 733359750f7c scripts/io/Makefile.in --- a/scripts/io/Makefile.in Mon Oct 19 12:12:58 2009 +0200 +++ b/scripts/io/Makefile.in Mon Oct 19 20:19:40 2009 +0200 @@ -32,7 +32,7 @@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_DATA = @INSTALL_DATA@ -SOURCES = beep.m csvread.m csvwrite.m dlmwrite.m +SOURCES = beep.m csvread.m csvwrite.m dlmwrite.m strread.m textread.m DISTFILES = $(addprefix $(srcdir)/, Makefile.in $(SOURCES)) diff -r 78ac37d73557 -r 733359750f7c scripts/io/strread.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/io/strread.m Mon Oct 19 20:19:40 2009 +0200 @@ -0,0 +1,248 @@ +## Copyright (C) 2009 Eric Chassande-Mottin, CNRS (France) +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, see +## . + +## -*- texinfo -*- +## @deftypefn {Function File} address@hidden, @dots{}] =} strread (@var{str}) +## @deftypefnx {Function File} address@hidden, @dots{}] =} strread (@var{str}, @var{format}) +## @deftypefnx {Function File} address@hidden, @dots{}] =} strread (@var{str}, @var{format}, @ +## @var{prop1}, @var{value1}, @dots{}) +## Read data from a string. +## +## The string @var{str} is split into words that are repeatedly matched to the +## specifiers in @var{format}. The first word is matched to the first specifier, +## the second to the second specifier and so forth. If there are more words than +## specifiers, the process is repeated until all words have been processed. +## +## The string @var{format} describes how the words in @var{str} should be parsed. +## It may contain any combination of the following specifiers: +## @table @code +## @item %s +## The word is parsed as a string. +## @item %d +## @itemx %f +## The word is parsed as a number. +## @item %* +## The word is skipped. +## @end table +## +## Parsed word corresponding to the first specifier are returned in the first +## output argument and likewise for the rest of the specifiers. +## +## By default, @var{format} is @t{"%f"}, meaning that numbers are read from @var{str}. +## +## For example, the string +## +## @example +## @group +## @var{str} = "\ +## Bunny Bugs 5.5\n\ +## Duck Daffy -7.5e-5\n\ +## Penguin Tux 6" +## @end group +## @end example +## +## can be read using +## +## @example +## address@hidden, @var{b}, @var{c}] = strread (@var{str}, "%s %s %f"); +## @end example +## +## The behaviour of @code{strread} can be changed via properties @var{prop} +## @itemize @code +## @item "headerlines": +## The first @var{value} number of lines of @var{str} are skipped. +## @item "commentstyle": +## Parts of @var{str} are considered comments and will be skipped. +## @var{value} is the comment style and can be any of the following. +## @itemize +## @item "shell" +## Everything from @code{#} characters to the nearest end-line is skipped. +## @item "c" +## Everything between @code{/*} and @code{*/} is skipped. +## @item "c++" +## Everything from @code{//} characters to the nearest end-line is skipped. +## @item "matlab" +## Everything from @code{%} characters to the nearest end-line is skipped. +## @end itemize +## @item "delimiter" +## Any character in @var{value} will be used to split @var{str} into words. +## @item "emptyvalue" +## Parts of the output where no word is available is filled with @var{value}. +## @end itemize +## +## @seealso{textread, load, dlmread, fscanf} +## @end deftypefn + +function varargout = strread (str, formatstr = "%f", varargin) + ## Check input + if (nargin < 1) + print_usage (); + endif + + if (!ischar (str) || !ischar (str)) + error ("strread: first and second input arguments must be strings"); + endif + + ## Parse options + comment_flag = false; + header_skip = 0; + numeric_fill_value = 0; + white_spaces = " \n\r\t\b"; + for n = 1:2:length (varargin) + switch (lower (varargin {n})) + case "commentstyle" + comment_flag = true; + switch (lower (varargin {n+1})) + case "c" + comment_specif = {"/*", "*/"}; + case "c++" + comment_specif = {"//", "\n"}; + case "shell" + comment_specif = {"#", "\n"}; + case "matlab" + comment_specif = {"%", "\n"}; + otherwise + warning ("strread: unknown comment style '%s'", val); + endswitch + case "headerlines" + header_skip = varargin {n+1}; + case "delimiter" + delimiter_str = varargin {n+1}; + case "emptyvalue" + numeric_fill_value = varargin {n+1}; + case "bufsize" + ## XXX: We could synthesize this, but that just seems weird... + warning ("strread: property \"bufsize\" is not implemented"); + case "whitespace" + white_spaces = varargin {n+1}; + case "expchars" + warning ("strread: property \"expchars\" is not implemented"); + otherwise + warning ("strread: unknown property \"%s\"", varargin {n}); + endswitch + endfor + + ## Parse format string + idx = strfind (formatstr, "%")'; + specif = formatstr ([idx, idx+1]); + nspecif = length (idx); + idx_star = strfind (formatstr, "%*"); + nfields = length (idx) - length (idx_star); + + if (max (nargout, 1) != nfields) + error ("strread: the number of output variables must match that of format specifiers"); + endif + + ## Remove header + if (header_skip > 0) + e = find (str == "\n", header_skip); + if (length (e) >= header_skip) + str = str (e (end)+1:end); + else + ## We don't have enough data so we discard it all + str = ""; + endif + endif + + ## Remove comments + if (comment_flag) + cstart = strfind (str, comment_specif{1}); + cstop = strfind (str, comment_specif{2}); + if (length (cstart) > 0) + ## Ignore nested openers. + [idx, cidx] = unique (lookup (cstop, cstart), "first"); + if (idx(end) == length (cstop)) + cidx(end) = []; # Drop the last one if orphaned. + endif + cstart = cstart(cidx); + endif + if (length (cstop) > 0) + ## Ignore nested closers. + [idx, cidx] = unique (lookup (cstart, cstop), "first"); + if (idx(1) == 0) + cidx(1) = []; # Drop the first one if orphaned. + endif + cstop = cstop(cidx); + endif + len = length (str); + c2len = length (comment_specif{2}); + str = cellslices (str, [1, cstop + c2len], [cstart - 1, len]); + str = [str{:}]; + endif + + ## Split 'str' into words + words = split_by (str, white_spaces); + num_words = numel (words); + num_lines = ceil (num_words / nspecif); + + ## For each specifier + k = 1; + for m = 1:nspecif + data = words (m:nspecif:end); + + ## Map to format + switch specif (m, :) + case "%s" + data (end+1:num_lines) = {""}; + varargout {k} = data'; + k++; + case {"%d", "%f"} + data = str2double (data); + data (end+1:num_lines) = numeric_fill_value; + varargout {k} = data.'; + k++; + case "%*" + ## do nothing + endswitch + endfor +endfunction + +function out = split_by (text, sep) + out = strtrim (strsplit (text, sep, true)); +endfunction + +%!test +%! str = "# comment\n# comment\n1 2 3"; +%! [a, b] = strread (str, '%d %s', 'commentstyle', 'shell'); +%! assert (a, [1; 3]); +%! assert (b, {"2"; ""}); + +%!test +%! str = ''; +%! a = rand (10, 1); +%! b = char (round (65 + 20 * rand (10, 1))); +%! for k = 1:10 +%! str = sprintf ('%s %.6f %s\n', str, a (k), b (k)); +%! endfor +%! [aa, bb] = strread (str, '%f %s'); +%! assert (a, aa, 1e-5); +%! assert (cellstr (b), bb); + +%!test +%! str = ''; +%! a = rand (10, 1); +%! b = char (round (65 + 20 * rand (10, 1))); +%! for k = 1:10 +%! str = sprintf ('%s %.6f %s\n', str, a (k), b (k)); +%! endfor +%! aa = strread (str, '%f %*s'); +%! assert (a, aa, 1e-5); + +%!test +%! str = sprintf ('/* this is\nacomment*/ 1 2 3'); +%! a = strread (str, '%f', 'commentstyle', 'c'); +%! assert (a, [1; 2; 3]); + diff -r 78ac37d73557 -r 733359750f7c scripts/io/textread.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/io/textread.m Mon Oct 19 20:19:40 2009 +0200 @@ -0,0 +1,51 @@ +## Copyright (C) 2009 Eric Chassande-Mottin, CNRS (France) +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, see +## . + +## -*- texinfo -*- +## @deftypefn {Function File} address@hidden, @dots{}] =} textread (@var{filename}) +## @deftypefnx {Function File} address@hidden, @dots{}] =} textread (@var{filename}, @var{format}) +## @deftypefnx {Function File} address@hidden, @dots{}] =} textread (@var{filename}, @var{format}, @ +## @var{prop1}, @var{value1}, @dots{}) +## Read data from a text file. +## +## The file @var{filename} is read and parsed according to @var{format}. The +## function behaves like @code{strread} except it works by parsing a file instead +## of a string. See the documentation of @code{strread} for details. +## @seealso{strread, load, dlmread, fscanf} +## @end deftypefn + +function varargout = textread (filename, formatstr = "%f", varargin) + ## Check input + if (nargin < 1) + print_usage (); + endif + + if (!ischar (filename) || !ischar (filename)) + error ("textread: first and second input arguments must be strings"); + endif + + ## Read file + fid = fopen (filename, "r"); + if (fid == -1) + error ("textread: could not open '%s' for reading", filename); + endif + + str = char (fread (fid, "char")'); + fclose (fid); + + ## Call strread to make it do the real work + [varargout{1:max (nargout, 1)}] = strread (str, formatstr, varargin {:}); +endfunction