function [yf,Ak] = interpolsvd(y,nembed,nsmo,ncomp,niter,displ) % [yf,Ak] = interpolsvd(y,nembed,nsmo,ncomp,niter,displ) % % INTERPOLSVD uses the SVD and embedding to interpolate a % monovariate or multivariate data set expectation-maximisation. % INTERPOLSVD decomposes the data into two time scales, which are processed % seperately and then merged at the end. The cutoff time scale (nsmo) is % expressed in number of samples. % Monovariate data must be embedded first (nembed>1). % In the initial data set, gaps must be filled in with NaNs % Example for daily solar data with a cutoff at 81 days % yf = interpolsvd(y,3,81,0,20,1); % % y array or matrix of data with gaps % nembed embedding dimension Default(0) is 1 % nsmo cutoff time scale scale (in nr of samples). Set nsmo<=1 if only one % single time scale is desired % niter max number of iterations Default (0) is 20 % ncomp number of significant components, to be specified if running % in automatic mode. Default (0) leads to manual selection % displ automatic mode with no display if displ=0 % otherwise (default), manual mode is used % yf same data set as y, but with gaps filled % Ak weight distrtibution of the SVD % ThDdW 11/2011 if nargin<2 nembed = 1; end if nargin<3, nsmo = 60; end if nargin<4, ncomp = 0; end if nargin<5, niter = 30; end if nargin<6, displ = 1; end if niter<1, niter = 30; end if nembed<1 nembed = 1; end if nsmo<1 nsmo = 60; end Emax = 95; % max cumulative energy (%) for selecting nr of significant components threshold1 = 1e-4; % iterations stop after relative energy change is < threshold % detect shape of input array and transpose to have more rows than columns (faster) [nrow0,ncol0] = size(y); swap = 0; if ncol0>nrow0, y=y'; swap = 1; end [nrow0,ncol0] = size(y); % get unbiased estimate of average and standard deviation yave = zeros(1,ncol0); ystr = zeros(1,ncol0); for i=1:ncol0 w = find(~isnan(y(:,i))); if length(w)>1 yave(i) = mean(y(w,i)); ystd(i) = std(y(w,i)); y(:,i) = (y(:,i)-yave(i))/ystd(i); else yave(i) = 0; ystd(i) = 1; end end % exclude records that contain NaNs only: they cannot be interpolated killy = find(all(isnan(y))); keepy = (1:ncol0); keepy(killy) = []; y(:,killy) = []; [nrow,ncol] = size(y); if ncol<2 & nembed<2 error('** embedding dimension must be >1 for time series **') end % embed data if nembed>1 x = embedy(y,nembed,1,0); else x = y; end [nrowx,ncolx] = size(x); % weight each record according to number of NaNs % larger weight is given to records with few gaps weight = zeros(1,ncolx); for i=1:ncolx n = sum(isnan(x(:,i))); weight(i) = (nrowx - n)/nrowx; end weight = weight/max(weight); weight = weight.*weight; for i=1:ncolx x(:,i) = x(:,i)*weight(i); end % for display, choose the record that contains the largest # of gaps s = sum(isnan(x)); [s,ind] = sort(s); ind = ind(end); % first iteration: start by filling in gaps by linear interpolation xlp = x; for i=1:ncolx w = find(~isnan(x(:,i))); avex = mean(x(w,i)); u = interp1([0; w; nrowx+1],[0; x(w,i); 0],(1:nrowx)','linear'); if nsmo>1 u = smooth_gauss(u,nsmo); u = interp1([0; w; nrowx+1],[0; u(w); 0],(1:nrowx)','linear'); xlp(:,i) = smooth_gauss(u,nsmo); else xlp(:,i) = u; end end xnew = x; kill = find(isnan(x)); xnew(kill) = xlp(kill); % subtract mean again avex = mean(xnew); xnew = xnew - ones(nrowx,1)*avex; if ncomp<1 [V,S,U] = svd(xnew,0); % do the SVD Ak = diag(S); % singuar values of the SVD E = Ak.*Ak; E = 100*E/sum(E); % fraction amount of energy for each SVD mode nE = length(E); if displ % display nr of components clf semilogy(1:nE,E,'r+-') kmax = min([nE+0.5,100]); Emin = max([min(E),1e-6]); axis([0 kmax Emin 100]) grid ylabel('E_k [%]') xlabel('mode k') ncomp2 = input('number of significant components ncomp ='); if any(ncomp2>nE), error(['** ncomp must not exceed ',int2str(nE),' **']); end hold on semilogy(1:ncomp2,E(1:ncomp2,1),'bo-'); hold off else ncomp2 = 3; end disp(['using ',int2str(ncomp2),' components out of ',int2str(nE)]) else ncomp2 = ncomp; if nargout>1, [V,S,U] = svd(xnew,0); Ak = diag(S); end end % now start the main loop if nsmo>1 for k=1:ncomp2 for i=1:niter xlp = smooth_gauss(xnew,nsmo); xhp = xnew - xlp; [Vlp,Slp,Ulp] = svd(xlp,0); [Vhp,Shp,Uhp] = svd(xhp,0); xlp = Vlp(:,1:k)*Slp(1:k,1:k)*Ulp(:,1:k)'; xhp = Vhp(:,1:k)*Shp(1:k,1:k)*Uhp(:,1:k)'; xold = xnew; xnew(kill) = xlp(kill) + xhp(kill); ave = mean(xnew); xnew = xnew - ones(nrowx,1)*ave; avex = avex+ave; e = xold(kill)-xnew(kill); err(i) = sqrt(e'*e)/sqrt(xold(kill)'*xold(kill)); if displ clf plot([xnew(:,ind) x(:,ind)]) title(['channel ',int2str(ind)]) drawnow disp(['ncomp= ',int2str(k),' iteration ',int2str(i),' rel. error= ',num2str(err(i),3)]) end if err(i)1 yf = deembedy(xnew,ncol,1,0); else yf = xnew; end % add channels that had NaNs only, if any if killy y = zeros(nrow0,ncol0) + NaN; y(:,keepy) = yf; yf = y; end % restore mean and stdev for i=1:ncol0 yf(:,i) = yf(:,i)*ystd(i) + yave(i); end if swap, yf = yf'; end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [y,nrowy] = embedy(x,D,T,displ) % [y,nrowy] = embedy(x,D) % [y,nrowy] = embedy(x,D,T,displ) % % EMBEDY embeds a set of time series [x1 x2 ... xm] into an D-dimensional % space by taking as state vectors the consecutive sequences % z(k,:) = [x1(k) x1(k+T) ... x1(k+D*T-1) x2(k) x2(k+T) ... xm(k+D*T-1)] % for k=1 to n-D*T+1, where n is the length of x and T is the embedding % delay % format % x input matrix (records are columns) [n,m] % D embedding dimension [1,1] % T embedding delay (integer>0), default is 1 [1,1] % displ set displ=0 to prevent size from being displayed % default is with display [1,1] % % y embedded matrix [n-D+1, D*m] % nrowy number of rows in y [1,1] % % ThDdW 4/2011 if nargin<4, displ = 1; end if nargin<3, T = 1; end T = fix(T); if T<=0, error('** T must be > 0 **'); end [nrow,ncol] = size(x); nrowy = nrow - (D-1)*T; if nrowy<2, error(['** embedding dimension D must be < ',int2str((nrow-1)/T),' **']); end if displ, disp(['size of embedded matrix is : ',int2str(nrowy),' x ',int2str(D*ncol)]) end y = zeros(nrowy,D*ncol); for j=1:ncol, for i=1:D, y(:,i+(j-1)*D) = x((1:nrowy)+(i-1)*T,j); end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function y = smooth_gauss(x,ns) % y = SMOOTH_GAUSS(x,ns,typ) % % SMOOTH_GAUS smooths data in x using either a gaussian window % which has a total width given by 2*ns+1 (each side contains ns data points) % x data matrix to smooth (smoothing is done columnwise) % ns smoothing width % y smoothed data % ThDdW 2/11 ns = ceil(ns); [n,m] = size(x); ns = min(floor(n/2-1),ns); % ns must not exceed n/2 if ns<=1, y = x; return end w = (-ns:ns)'/ns; w = exp(-3 * w.^2); w = w/sum(w); % there several possibilities to treat the edges. One can % extrapolate linearly, keep a fixed value, take a mirror image % take this to use a mirror image % x=[2*ones(ns,1)*x(1,:)-x(ns:-1:1,:); x; ... % 2*ones(ns,1)*x(n,:)-x(n:-1:n-ns+1,:)]; % take this to extrapolate linearly % M1 = [-(ns-1:-1:0)' ones(ns,1)]; % M2 = [(1:ns)' ones(ns,1)]; % x = [M1*(M2\x(1:ns,:)); x; M2*(M1\x(n-ns+1:n,:))]; x = [ones(ns,1)*mean(x(1:ns,:)); x; ... ones(ns,1)*mean(x(n-ns:n,:))]; y = zeros(n+2*ns,m); for i=1:m % do the convolution y(:,i)=filter(w,1,x(:,i)); end y=y((1:n)+2*ns,:); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function x = deembedy(y,nset,T,displ) % x = deembedy(y,nset,T,displ) % % DEEMBEDY deembeds an m-dimensional space which has been created % by EMBEDY. It returns the averaged state vector x % format % y embedded matrix [n,m] % nset # of data sets used to build y, default is 1 % T embedding delay (integer), default is 1 % displ set displ=0 to prevent matrix size from being displayed % default is with display % x averaged state vector [n+(m-1)T,1] % % ThDdW 10/93 if nargin<4, displ = 1; end if nargin<3, T = 1; end if nargin<2, nset = 1; end [n1,n2] = size(y); m = nset; % original size of unembedded matrix M = n2/nset; % embedding dimension n = n1 + (M-1)*T; if any(M-fix(M)), error('** nset does not match the size of the data matrix **'); end if displ, disp(['embedding dimension : ',int2str(M)]) disp(['length of array : ',int2str(n)]) end x = zeros(n,m); dx = x; xx = zeros(n,M); for j=1:m, for i=1:M xx(:,i) = [zeros((i-1)*T,1);y(:,i+(j-1)*M);zeros((M-i)*T,1)]; end nor = fix((1:M*T)/T-0.5) + 1; x(:,j) = mean(xx')'./[nor M*ones(1,n-2*M*T) fliplr(nor)]'*M; end