Sabtu, 17 Agustus 2013

Matlab Code of Householder-QL Method for PCA-Part for K-Means initialization

function varargout = HHQL(mat,k)
%--------------------------------------------------------------------------
% Author: Brian Moore
% brimoor@umich.edu
% Modified: Erie Sadewo
% erie@bps.go.id
% Date: September 2013
%--------------------------------------------------------------------------

[m n] = size(mat);
if (m ~= n)
error('Input matrix must be square');
elseif n==2
[V D]=eig(mat);
varargout{1} = V;
varargout{1} = varargout{1}(:,2);
varargout{1} = varargout{1}/max(varargout{1});
varargout{2} = max(diag(D));
else
[d od Q] = myTriDiagHouseholder(mat,'true');
if k<=27 [V D] = myImplicitTriQL(d,od,Q); varargout{1} = V; varargout{1} = -(varargout{1}(:,1)); varargout{1} = varargout{1}/max(varargout{1}); varargout{2} = max(diag(D)); if any(isnan(varargout{1}))==1 [V D] = myImplicitTriQR(d,od,Q); varargout{1} = V; varargout{1} = varargout{1}(:,1); varargout{2} = max(diag(D)); end else [V D] = myImplicitTriQR(d,od,Q); varargout{1} = V; varargout{1} = varargout{1}(:,1); varargout{2} = max(diag(D)); end end end function varargout = myTriDiagHouseholder(mat,varargin) symTol = 1e-9; [m,n] = size(mat); if (m ~= n) error('Input matrix must be square'); end if ~isreal(mat) error('Input matrix must be real valued'); end if (max(max(abs(mat - mat'))) > symTol)
error('Input matrix is not symmetric to working precision');
end
if (nargin == 2)
if strcmpi(varargin{1},'true')
OutputQ = 1;
else
OutputQ = 0;
end
else
OutputQ = 0;
end
d = zeros(n,1);
od = zeros(n,1);
Q = mat;
for i = n:-1:2
l = i - 1;
h = 0.0;
scale = 0.0;
if (l > 1)
for k = 1:l
scale = scale + abs(Q(i,k));
end
if (scale == 0.0)
od(i) = Q(i,l);
else
for k = 1:l
Q(i,k) = Q(i,k) / scale;
h = h + Q(i,k) * Q(i,k);
end
f = Q(i,l);
if (f >= 0.0)
g = -sqrt(h);
else
g = sqrt(h);
end
od(i) = scale * g;
h = h - f * g;
Q(i,l) = f - g;
f = 0.0;
for j = 1:l
if (OutputQ == 1)
Q(j,i) = Q(i,j) / h;
end
g = 0.0;
for k = 1:j
g = g + Q(j,k) * Q(i,k);
end
for k = (j+1):l
g = g + Q(k,j) * Q(i,k);
end
od(j) = g / h;
f = f + od(j) * Q(i,j);
end
hh = f / (h + h);
for j = 1:l
f = Q(i,j);
g = od(j) - hh * f;
od(j) = g;
for k = 1:j
Q(j,k) = Q(j,k) - (f * od(k) + g * Q(i,k));
end
end
end
else
od(i) = Q(i,l);
end
d(i) = h;
end
d(1) = 0.0;
od = od(2:end);
if (OutputQ == 1)
for i = 1:n
l = i - 1;
if (d(i) ~= 0)
for j = 1:l
g = 0.0;
for k = 1:l
g = g + Q(i,k) * Q(k,j);
end
for k = 1:l
Q(k,j) = Q(k,j) - g * Q(k,i);
end
end
end
d(i) = Q(i,i);
Q(i,i) = 1.0;
for j = 1:l
Q(j,i) = 0.0;
Q(i,j) = 0.0;
end
end
else
for i = 1:n
d(i) = Q(i,i);
end
end

if (nargout == 2)
if (OutputQ == 1)
varargout{1} = diag(d) + diag(od,-1) + diag(od,1);
varargout{2} = Q;
else
varargout{1} = d;
varargout{2} = od;
end
elseif (nargout == 3)
varargout{1} = d;
varargout{2} = od;
varargout{3} = Q;
else
varargout{1} = diag(d) + diag(od,-1) + diag(od,1);
end
end

function varargout = myImplicitTriQL(varargin)
maxIters = 30;

% Process input arguments
if (nargin == 1)
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
Q = eye(n);
NUM = n;
elseif (nargin == 2)
[m1 n1] = size(varargin{1});
[m2 n2] = size(varargin{2});
if ((m1 > 1) && (n1 > 1))
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
if ((m2 > 1) && (n2 > 1))
Q = varargin{2};
NUM = n;
else
Q = eye(n);
NUM = varargin{2};
end
else
d = varargin{1};
od = varargin{2};
n = length(d);
Q = eye(n);
NUM = n;
end
elseif (nargin == 3)
[m1 n1] = size(varargin{1});
[m3 n3] = size(varargin{3});
if ((m1 > 1) && (n1 > 1))
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
Q = varargin{2};
NUM = varargin{3};
else
d = varargin{1};
od = varargin{2};
n = length(d);
if ((m3 > 1) && (n3 > 1))
Q = varargin{3};
NUM = n;
else
Q = eye(n);
NUM = varargin{3};
end
end
elseif (nargin == 4)
d = varargin{1};
od = varargin{2};
n = length(d);
Q = varargin{3};
NUM = varargin{4};
else
error('Input syntax error. Type ''help myImplicitTriQL'' for assistance');
end

m = 0;
for l = 1:NUM
iter = 0;
while (m ~= l)
for m = l:(n-1)
dd = abs(d(m)) + abs(d(m+1));
if (abs(od(m)) <= eps * dd) break; end end if (m ~= l) iter = iter + 1; if (iter >= maxIters)
error(['myImplicitTriQL() didn''t converge after ' num2str(maxIters) ' iterations']);
end
g = (d(l+1) - d(l)) / (2.0 * od(l));
r = SafeDistance(g,1.0);
g = d(m) - d(l) + od(l) / (g + abs(r) * sign(g));
s = 1.0;
c = 1.0;
p = 0.0;
for i = (m-1):-1:l
f = s * od(i);
b = c * od(i);
r = SafeDistance(f,g);
od(i+1) = r;
if (r == 0.0)
d(i+1) = d(i+1) - p;
od(m) = 0.0;
break;
end
s = f / r;
c = g / r;
g = d(i+1) - p;
r = (d(i) - g) * s + 2.0 * c * b;
p = s * r;
d(i+1) = g + p;
g = c * r - b;
for k = 1:n
f = Q(k,i+1);
Q(k,i+1) = s * Q(k,i) + c * f;
Q(k,i) = c * Q(k,i) - s * f;
end
end
if ((r == 0.0) && (i >= l))
continue;
end
d(l) = d(l) - p;
od(l) = g;
od(m) = 0.0;
end
end
end

[d ind] = sort(d,'descend');
d = d(1:NUM);

varargout{1} = Q(:,ind(1:NUM));
varargout{2} = diag(d);
end

function varargout = myImplicitTriQR(varargin)
eps = 1e-6;
maxIters = 30;

% Process input arguments
if (nargin == 1)
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
Q = eye(n);
NUM = n;
elseif (nargin == 2)
[m1 n1] = size(varargin{1});
[m2 n2] = size(varargin{2});
if ((m1 > 1) && (n1 > 1))
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
if ((m2 > 1) && (n2 > 1))
Q = varargin{2};
NUM = n;
else
Q = eye(n);
NUM = varargin{2};
end
else
d = varargin{1};
od = varargin{2};
n = length(d);
Q = eye(n);
NUM = n;
end
elseif (nargin == 3)
[m1 n1] = size(varargin{1});
[m3 n3] = size(varargin{3});
if ((m1 > 1) && (n1 > 1))
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
Q = varargin{2};
NUM = varargin{3};
else
d = varargin{1};
od = varargin{2};
n = length(d);
if ((m3 > 1) && (n3 > 1))
Q = varargin{3};
NUM = n;
else
Q = eye(n);
NUM = varargin{3};
end
end
elseif (nargin == 4)
d = varargin{1};
od = varargin{2};
n = length(d);
Q = varargin{3};
NUM = varargin{4};
else
error('Input syntax error. Type ''help myImplicitTriQR'' for assistance');
end

od = [0;od(:)];
m = n;
iter = 0;
while (m > 1)
iter = iter + 1;
g = (d(m-1) - d(m)) / 2;
if (g == 0)
s = d(m) - abs(od(m));
else
s = d(m) - od(m) * od(m) / (g + sign(g) * SafeDistance(g,od(m)));
end
x = d(1) - s;
y = od(2);
for k = 1:(m-1)
if (m > 2)
xydist = SafeDistance(x,y);
c = x / xydist;
s = -y / xydist;
else
alpha = (d(1) - d(2))/od(2);
denom = SafeDistance(1,alpha);
c = alpha / denom;
s = -1 / denom;
end
w = c * x - s * y;
g = d(k) - d(k+1);
z = (2 * c * od(k+1) + g * s) * s;
d(k) = d(k) - z;
d(k+1) = d(k+1) + z;
od(k+1) = g * c * s + (c * c - s * s) * od(k+1);
x = od(k+1);
if (k > 1)
od(k) = w;
end
if (k < (m-1)) y = -s * od(k+2); od(k+2) = c * od(k+2); end Q(:,k:(k+1)) = Q(:,k:(k+1)) * [c s;-s c]; end if ((abs(od(m)) < eps * (abs(d(m-1)) + abs(d(m)))) || (iter >= maxIters))
if (iter >= maxIters)
warning('myImplicitTriQR:instability',['myImplicitTriQR() didn''t converge for m = ' num2str(m) ' after ' num2str(maxIters) ' iterations']);
end
m = m - 1;
iter = 0;
end
end

[d ind] = sort(d,'descend');
d = d(1:NUM);

varargout{1} = Q(:,ind(1:NUM));
varargout{2} = max(max(diag(d)));
end

function dist = SafeDistance(a,b)
abs_a = abs(a);
abs_b = abs(b);
if (abs_a > abs_b)
dist = abs_a * sqrt(1.0 + abs_b * abs_b / (abs_a * abs_a));
else
if (abs_b == 0)
dist = 0;
else
dist = abs_b * sqrt(1.0 + abs_a * abs_a / (abs_b * abs_b));
end
end
end

Simplified Matlab Code of Truncated Power Method for PCA-Part for K-Means Initialization

function [V D] = tpower(A)
%Input: A (vector ora matrice of the data)
%output: Maximum eigenvalue and eigenvector
%Source:
%Yuan, X.T., Zhang T., 2013. Truncated Power Method for Sparse Eigenvalue Problems. Journal of Machine Learning Research, 14.pp. 899-925
optTol = 1e-6;
maxIter = 100;
cardinality = max(size(A));

[~,idx]=max(diag(A));
x0 = zeros(size(A,1),1);
x0(idx) = 1;

V = sparse(x0);
s = A*V;
g = 2*s;
D = V'*s;
V = truncate_operator(g, cardinality);
D_old = D;

i = 1;
while i <= maxIter

s = A*V;
g = 2*s;
V = truncate_operator(g, cardinality);
D = V'*s;

if ( abs(D - D_old) < optTol )
break;
end

D_old = D;
i = i+1;
end
V=V/(max(V));
end

function u = truncate_operator(v , k)

u = zeros(length(v), 1);
[~, idx] = sort(abs(v), 'descend');

v_restrict = v(idx(1:k));
u(idx(1:k)) = v_restrict / norm(v_restrict);

u = sparse(u);
end