Minggu, 15 September 2013

Matlab Code of PCA-Part for K-Means initialization

function idx=PCAPartKmeans(data,k,method)
% input:
% k= number of clusters
% method:
% 1=HHQL (Householder-QL), is modified function taken from myEig in LAPACK (http://eriesadewo.blogspot.com/2014/05/matlab-code-of-householder-ql-method.html)
% 2=simplified truncated power method (http://eriesadewo.blogspot.com/2013/08/simplified-code-of-truncated-power.html),
% output:
% data indexed on cluster number
% Source:
% Su, T., Dy, J., 2004. A Deterministic Method for Initializing K-means Clustering. In: Proc. 16th IEEE International Conference on Tools with Tools with Artificial Intelligence (ICTAI2004), pp.784-786.

% Last Modified: 1 Februari 2014

% Erie Sadewo & A. Syahrul Choir
% erie@bps.go.id

% The user of this function is requested to cite:
% Sadewo, E., Mashuri, M., Barakbah, A.R., 2014. Pengaruh penggunaan metode power dan truncated power pada PCA-Part untuk inisialisasi K-Means. Prosiding Seminar Nasional Sains & Teknologi V, pp. 1326-1335.

centroid=PCAInitCentroid(data,k,method);
idx=kmeans(data,k,'Start',centroid);

function centroid=PCAInitCentroid(data,k,method)
newdata=data;
imaxSSE=1;
SSE=1;
for i=1:(k-1)
clust{imaxSSE}=[];
SSE(imaxSSE)=[];
[C1 C2 SSE1 SSE2]=pca_part(newdata,k,method);
clust{imaxSSE}=C1;
clust{i+1}=C2;
SSE(imaxSSE)=SSE1;
SSE(i+1)=SSE2;
imaxSSE=find(SSE==max(SSE));
newdata=clust{imaxSSE};
end

for i=1:k
m=size(clust{i},1);
if m==1;
centroid{i}=clust{i};
else
centroid{i}=mean(clust{i});
end
end
centroid=cell2mat((centroid)');

function [C1 C2 SSE1 SSE2]=pca_part(data,k,method)
S=cov(data);
if method==1
[V ~]=HHQL(S,k);
elseif method==2
[V ~]=tpower(S);
end
[C1 C2 SSE1 SSE2]=binary_part(data,method,V);

function [C1 C2 SSE1 SSE2]=binary_part(data,method,V)
mu=mean(data);
PC=data*V;
PCmu=mu*V;

if method==1
ind1=find(PC>PCmu);
ind2=find(PCPCmu);
end
ind3=find(PC==PCmu);

C1=data(ind1,:);
C2=data(ind2,:);
if size(C1,1)==1;
mu1=C1;
else
mu1=mean(C1);
end
if size(C2,1)==1;
mu2=C2;
else
mu2=mean(C2);
end
PCmu1=mu1*V;
PCmu2=mu2*V;

if isempty(ind3)==0;
jarak1= sqrt((PCmu1-PCmu).^2);
jarak2= sqrt((PCmu2-PCmu).^2);
if jarak1 C1=[C1;data(ind3,:)];
ind1=[ind1 ind3];
C1=data(ind1,:);
mu1=mean(C1);
else
C2=[C2;data(ind3,:)];
ind2=[ind2 ind3];
C2=data(ind2,:);
mu2=mean(C2);
end
end

SSE1= sum(sqrt(sum((C1-repmat(mu1,(size(C1,1)),1)).^2,2)).^2);
SSE2= sum(sqrt(sum((C2-repmat(mu2,(size(C2,1)),1)).^2,2)).^2);

Sabtu, 17 Agustus 2013

Matlab Code of Householder-QL Method for PCA-Part for K-Means initialization

function varargout = HHQL(mat,k)
%--------------------------------------------------------------------------
% Author: Brian Moore
% brimoor@umich.edu
% Modified: Erie Sadewo
% erie@bps.go.id
% Date: September 2013
%--------------------------------------------------------------------------

[m n] = size(mat);
if (m ~= n)
error('Input matrix must be square');
elseif n==2
[V D]=eig(mat);
varargout{1} = V;
varargout{1} = varargout{1}(:,2);
varargout{1} = varargout{1}/max(varargout{1});
varargout{2} = max(diag(D));
else
[d od Q] = myTriDiagHouseholder(mat,'true');
if k<=27 [V D] = myImplicitTriQL(d,od,Q); varargout{1} = V; varargout{1} = -(varargout{1}(:,1)); varargout{1} = varargout{1}/max(varargout{1}); varargout{2} = max(diag(D)); if any(isnan(varargout{1}))==1 [V D] = myImplicitTriQR(d,od,Q); varargout{1} = V; varargout{1} = varargout{1}(:,1); varargout{2} = max(diag(D)); end else [V D] = myImplicitTriQR(d,od,Q); varargout{1} = V; varargout{1} = varargout{1}(:,1); varargout{2} = max(diag(D)); end end end function varargout = myTriDiagHouseholder(mat,varargin) symTol = 1e-9; [m,n] = size(mat); if (m ~= n) error('Input matrix must be square'); end if ~isreal(mat) error('Input matrix must be real valued'); end if (max(max(abs(mat - mat'))) > symTol)
error('Input matrix is not symmetric to working precision');
end
if (nargin == 2)
if strcmpi(varargin{1},'true')
OutputQ = 1;
else
OutputQ = 0;
end
else
OutputQ = 0;
end
d = zeros(n,1);
od = zeros(n,1);
Q = mat;
for i = n:-1:2
l = i - 1;
h = 0.0;
scale = 0.0;
if (l > 1)
for k = 1:l
scale = scale + abs(Q(i,k));
end
if (scale == 0.0)
od(i) = Q(i,l);
else
for k = 1:l
Q(i,k) = Q(i,k) / scale;
h = h + Q(i,k) * Q(i,k);
end
f = Q(i,l);
if (f >= 0.0)
g = -sqrt(h);
else
g = sqrt(h);
end
od(i) = scale * g;
h = h - f * g;
Q(i,l) = f - g;
f = 0.0;
for j = 1:l
if (OutputQ == 1)
Q(j,i) = Q(i,j) / h;
end
g = 0.0;
for k = 1:j
g = g + Q(j,k) * Q(i,k);
end
for k = (j+1):l
g = g + Q(k,j) * Q(i,k);
end
od(j) = g / h;
f = f + od(j) * Q(i,j);
end
hh = f / (h + h);
for j = 1:l
f = Q(i,j);
g = od(j) - hh * f;
od(j) = g;
for k = 1:j
Q(j,k) = Q(j,k) - (f * od(k) + g * Q(i,k));
end
end
end
else
od(i) = Q(i,l);
end
d(i) = h;
end
d(1) = 0.0;
od = od(2:end);
if (OutputQ == 1)
for i = 1:n
l = i - 1;
if (d(i) ~= 0)
for j = 1:l
g = 0.0;
for k = 1:l
g = g + Q(i,k) * Q(k,j);
end
for k = 1:l
Q(k,j) = Q(k,j) - g * Q(k,i);
end
end
end
d(i) = Q(i,i);
Q(i,i) = 1.0;
for j = 1:l
Q(j,i) = 0.0;
Q(i,j) = 0.0;
end
end
else
for i = 1:n
d(i) = Q(i,i);
end
end

if (nargout == 2)
if (OutputQ == 1)
varargout{1} = diag(d) + diag(od,-1) + diag(od,1);
varargout{2} = Q;
else
varargout{1} = d;
varargout{2} = od;
end
elseif (nargout == 3)
varargout{1} = d;
varargout{2} = od;
varargout{3} = Q;
else
varargout{1} = diag(d) + diag(od,-1) + diag(od,1);
end
end

function varargout = myImplicitTriQL(varargin)
maxIters = 30;

% Process input arguments
if (nargin == 1)
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
Q = eye(n);
NUM = n;
elseif (nargin == 2)
[m1 n1] = size(varargin{1});
[m2 n2] = size(varargin{2});
if ((m1 > 1) && (n1 > 1))
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
if ((m2 > 1) && (n2 > 1))
Q = varargin{2};
NUM = n;
else
Q = eye(n);
NUM = varargin{2};
end
else
d = varargin{1};
od = varargin{2};
n = length(d);
Q = eye(n);
NUM = n;
end
elseif (nargin == 3)
[m1 n1] = size(varargin{1});
[m3 n3] = size(varargin{3});
if ((m1 > 1) && (n1 > 1))
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
Q = varargin{2};
NUM = varargin{3};
else
d = varargin{1};
od = varargin{2};
n = length(d);
if ((m3 > 1) && (n3 > 1))
Q = varargin{3};
NUM = n;
else
Q = eye(n);
NUM = varargin{3};
end
end
elseif (nargin == 4)
d = varargin{1};
od = varargin{2};
n = length(d);
Q = varargin{3};
NUM = varargin{4};
else
error('Input syntax error. Type ''help myImplicitTriQL'' for assistance');
end

m = 0;
for l = 1:NUM
iter = 0;
while (m ~= l)
for m = l:(n-1)
dd = abs(d(m)) + abs(d(m+1));
if (abs(od(m)) <= eps * dd) break; end end if (m ~= l) iter = iter + 1; if (iter >= maxIters)
error(['myImplicitTriQL() didn''t converge after ' num2str(maxIters) ' iterations']);
end
g = (d(l+1) - d(l)) / (2.0 * od(l));
r = SafeDistance(g,1.0);
g = d(m) - d(l) + od(l) / (g + abs(r) * sign(g));
s = 1.0;
c = 1.0;
p = 0.0;
for i = (m-1):-1:l
f = s * od(i);
b = c * od(i);
r = SafeDistance(f,g);
od(i+1) = r;
if (r == 0.0)
d(i+1) = d(i+1) - p;
od(m) = 0.0;
break;
end
s = f / r;
c = g / r;
g = d(i+1) - p;
r = (d(i) - g) * s + 2.0 * c * b;
p = s * r;
d(i+1) = g + p;
g = c * r - b;
for k = 1:n
f = Q(k,i+1);
Q(k,i+1) = s * Q(k,i) + c * f;
Q(k,i) = c * Q(k,i) - s * f;
end
end
if ((r == 0.0) && (i >= l))
continue;
end
d(l) = d(l) - p;
od(l) = g;
od(m) = 0.0;
end
end
end

[d ind] = sort(d,'descend');
d = d(1:NUM);

varargout{1} = Q(:,ind(1:NUM));
varargout{2} = diag(d);
end

function varargout = myImplicitTriQR(varargin)
eps = 1e-6;
maxIters = 30;

% Process input arguments
if (nargin == 1)
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
Q = eye(n);
NUM = n;
elseif (nargin == 2)
[m1 n1] = size(varargin{1});
[m2 n2] = size(varargin{2});
if ((m1 > 1) && (n1 > 1))
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
if ((m2 > 1) && (n2 > 1))
Q = varargin{2};
NUM = n;
else
Q = eye(n);
NUM = varargin{2};
end
else
d = varargin{1};
od = varargin{2};
n = length(d);
Q = eye(n);
NUM = n;
end
elseif (nargin == 3)
[m1 n1] = size(varargin{1});
[m3 n3] = size(varargin{3});
if ((m1 > 1) && (n1 > 1))
T = varargin{1};
d = diag(T);
od = diag(T,1);
n = length(d);
Q = varargin{2};
NUM = varargin{3};
else
d = varargin{1};
od = varargin{2};
n = length(d);
if ((m3 > 1) && (n3 > 1))
Q = varargin{3};
NUM = n;
else
Q = eye(n);
NUM = varargin{3};
end
end
elseif (nargin == 4)
d = varargin{1};
od = varargin{2};
n = length(d);
Q = varargin{3};
NUM = varargin{4};
else
error('Input syntax error. Type ''help myImplicitTriQR'' for assistance');
end

od = [0;od(:)];
m = n;
iter = 0;
while (m > 1)
iter = iter + 1;
g = (d(m-1) - d(m)) / 2;
if (g == 0)
s = d(m) - abs(od(m));
else
s = d(m) - od(m) * od(m) / (g + sign(g) * SafeDistance(g,od(m)));
end
x = d(1) - s;
y = od(2);
for k = 1:(m-1)
if (m > 2)
xydist = SafeDistance(x,y);
c = x / xydist;
s = -y / xydist;
else
alpha = (d(1) - d(2))/od(2);
denom = SafeDistance(1,alpha);
c = alpha / denom;
s = -1 / denom;
end
w = c * x - s * y;
g = d(k) - d(k+1);
z = (2 * c * od(k+1) + g * s) * s;
d(k) = d(k) - z;
d(k+1) = d(k+1) + z;
od(k+1) = g * c * s + (c * c - s * s) * od(k+1);
x = od(k+1);
if (k > 1)
od(k) = w;
end
if (k < (m-1)) y = -s * od(k+2); od(k+2) = c * od(k+2); end Q(:,k:(k+1)) = Q(:,k:(k+1)) * [c s;-s c]; end if ((abs(od(m)) < eps * (abs(d(m-1)) + abs(d(m)))) || (iter >= maxIters))
if (iter >= maxIters)
warning('myImplicitTriQR:instability',['myImplicitTriQR() didn''t converge for m = ' num2str(m) ' after ' num2str(maxIters) ' iterations']);
end
m = m - 1;
iter = 0;
end
end

[d ind] = sort(d,'descend');
d = d(1:NUM);

varargout{1} = Q(:,ind(1:NUM));
varargout{2} = max(max(diag(d)));
end

function dist = SafeDistance(a,b)
abs_a = abs(a);
abs_b = abs(b);
if (abs_a > abs_b)
dist = abs_a * sqrt(1.0 + abs_b * abs_b / (abs_a * abs_a));
else
if (abs_b == 0)
dist = 0;
else
dist = abs_b * sqrt(1.0 + abs_a * abs_a / (abs_b * abs_b));
end
end
end

Simplified Matlab Code of Truncated Power Method for PCA-Part for K-Means Initialization

function [V D] = tpower(A)
%Input: A (vector ora matrice of the data)
%output: Maximum eigenvalue and eigenvector
%Source:
%Yuan, X.T., Zhang T., 2013. Truncated Power Method for Sparse Eigenvalue Problems. Journal of Machine Learning Research, 14.pp. 899-925
optTol = 1e-6;
maxIter = 100;
cardinality = max(size(A));

[~,idx]=max(diag(A));
x0 = zeros(size(A,1),1);
x0(idx) = 1;

V = sparse(x0);
s = A*V;
g = 2*s;
D = V'*s;
V = truncate_operator(g, cardinality);
D_old = D;

i = 1;
while i <= maxIter

s = A*V;
g = 2*s;
V = truncate_operator(g, cardinality);
D = V'*s;

if ( abs(D - D_old) < optTol )
break;
end

D_old = D;
i = i+1;
end
V=V/(max(V));
end

function u = truncate_operator(v , k)

u = zeros(length(v), 1);
[~, idx] = sort(abs(v), 'descend');

v_restrict = v(idx(1:k));
u(idx(1:k)) = v_restrict / norm(v_restrict);

u = sparse(u);
end

Senin, 01 Juli 2013

Penimbang Kota SBH 1996, 2002 Dan 2007

penimbang dapat digunakan untuk melihat peranan kota/wilayah terhadap inflasi nasional, atau untuk melakukan agregasi inflasi pada level yang lebih tinggi (provinsi, regional, dsb)