clear all;
NN = 2.^(2:23); % data sizes
MM = 2.^(0:10); % number of bins
xx = randn(max(NN), 1);
Te = Tl = Tc = Ts = zeros(length(NN), length(MM));
for iN = 1:length(NN)
x = xx(1:NN(iN));
for iM = 1:length(MM)
nbins = MM(iM);
tic;
min_val = min (x);
max_val = max (x);
edges = min_val + (max_val - min_val) * (0:nbins) / nbins;
binwidth = edges(2) - edges(1);
bin = 1 + floor ((x - min_val) / binwidth); % compute bin indices in O(Nx)
bin(x > edges(end)) = 0; % set too large entries to zero
idx = bin(bin != 0); % remove all out-of-scope entries
n = accumarray (idx, 1, [nbins+1, 1])'; % cout number of identical indices to get the histogram, in O(Nx)
n = [n(1:end-2), n(end-1)+n(end)]; % last bin should also contain right edge
Te(iN, iM) = toc;
end
end
save -ascii 'hist_bench_equi.dat' Te;
for iN = 1:length(NN)
x = xx(1:NN(iN));
for iM = 1:length(MM)
nbins = MM(iM);
tic;
min_val = min (x);
max_val = max (x);
edges = min_val + (max_val - min_val) * (0:nbins) / nbins;
bin = lookup (edges, x); % compute bin indices in O(Nx*log(nbins)), all x < edges(1) will be zero
bin(x > edges(end)) = 0; % set too large entries to zero
idx = bin(bin != 0); % remove all out-of-scope entries
n = accumarray (idx, 1, [nbins+1, 1])'; % cout number of identical indices to get the histogram, in O(Nx)
n = [n(1:end-2), n(end-1)+n(end)]; % last bin should also contain right edge
Tl(iN, iM) = toc;
end
end
save -ascii 'hist_bench_lookup.dat' Tl;
for iN = 1:length(NN)
x = xx(1:NN(iN));
for iM = 1:length(MM)
nbins = MM(iM);
tic;
len = NN(iN);
min_val = min (x);
max_val = max (x);
m = [0.5:nbins]'/nbins;
m = (max_val - min_val) * m + min_val * ones (size (m));
cutoff = (m(1:end-1,:) + m(2:end,:)) / 2;
[s, idx] = sort ([x; cutoff]);
chist = cumsum (idx <= len);
chist = [0; chist(idx > len); chist(end)-sum(isnan(x))];
freq = diff (chist);
Ts(iN, iM) = toc;
end
end
save -ascii 'hist_bench_sort.dat' Ts;
for iN = 1:length(NN)
x = xx(1:NN(iN));
for iM = 1:length(MM)
nbins = MM(iM);
tic;
min_val = min (x);
max_val = max (x);
m = [0.5:nbins]'/nbins;
m = (max_val - min_val) * m + min_val * ones (size (m));
cutoff = (m(1:end-1,:) + m(2:end,:)) / 2;
chist = zeros (nbins+1, 1);
for i = 1:nbins-1
chist(i+1,:) = sum (x <= cutoff(i));
endfor
chist(nbins+1,:) = sum (! isnan (x));
freq = diff (chist);
Tc(iN, iM) = toc;
end
end
save -ascii 'hist_bench_cdf.dat' Tc;
loglog(NN,Te,'b', NN,Tl,'g', NN,Tc,'r', NN,Ts,'k')