|
E:\home\projects\trilogy\raptor\Clustering\src\com\trilogy\fs\raptor\clustering\ClusteringAlgorithm.java
|
package com.trilogy.fs.raptor.clustering;
import cern.colt.matrix.DoubleFactory2D;
import cern.colt.matrix.DoubleMatrix1D;
import cern.colt.matrix.DoubleMatrix2D;
import cern.colt.matrix.linalg.Algebra;
import cern.colt.matrix.linalg.CholeskyDecomposition;
import cern.colt.matrix.linalg.EigenvalueDecomposition;
import cern.colt.matrix.linalg.LanczosDecomposition;
import cern.jet.math.Functions;
import java.util.ArrayList;
import java.util.List;
import java.util.Iterator;
/**
* @author <A HREF="mailto:razvan.surdulescu@trilogy.com">Razvan Surdulescu</A>, (C) Trilogy 2003
*/
public class ClusteringAlgorithm {
public static final int DEFAULT_SPATIAL_RADIUS = 20;
private static final int DEFAULT_SPATIAL_DELTA = 10;
public static final String DEFAULT_ALGORITHM = "Lanczos";
private static final int LANCZOS_FLOOR = 10;
private static final double LANCZOS_LOAD = 0.05;
private static final int LANCZOS_CEILING = 30;
private boolean m_log = false;
private String m_algorithm = DEFAULT_ALGORITHM;
private double m_radius = DEFAULT_SPATIAL_RADIUS;
private double m_delta = DEFAULT_SPATIAL_DELTA;
private List m_cluster1 = new ArrayList(), m_cluster2 = new ArrayList();
public void setLog(boolean log) {
m_log = log;
}
public void setAlgorithm(String algorithm) {
m_algorithm = algorithm;
}
public void setRadius(double radius) {
m_radius = radius;
}
private double aff(List a, List b) {
double sum = 0;
for (Iterator aIterator = a.iterator(); aIterator.hasNext(); ) {
Dot aDot = (Dot) aIterator.next();
for (Iterator bIterator = b.iterator(); bIterator.hasNext(); ) {
Dot bDot = (Dot) bIterator.next();
sum += aDot.getWeight(bDot, m_radius, m_delta);
}
}
return sum;
}
public double ncut(List a, List b, List v) {
if (a.size() == 0 || b.size() == 0 || v.size() == 0) {
return Double.MAX_VALUE;
}
double x = aff(a, b);
return (x / aff(a, v) + x / aff(b, v));
}
public void split(List dots) {
split(dots, new Timer());
}
public void split(List dots, Timer timer) {
m_cluster1 = new ArrayList();
m_cluster2 = new ArrayList();
if (dots.size() <= 1) {
return;
}
timer.start("Generate W, D");
DoubleMatrix2D weightMatrix = DoubleFactory2D.dense.make(dots.size(), dots.size());
//DoubleMatrix2D weightMatrix = DoubleFactory2D.sparse.make(dots.size(), dots.size());
for (int row = 0; row < dots.size(); row++) {
Dot a = (Dot) dots.get(row);
for (int col = 0; col < dots.size(); col++) {
Dot b = (Dot) dots.get(col);
weightMatrix.set(row, col, a.getWeight(b, m_radius, m_delta));
}
}
if (m_log) {
System.out.println("Weight matrix:" + weightMatrix);
}
DoubleMatrix2D distanceMatrix = DoubleFactory2D.dense.make(dots.size(), dots.size());
//DoubleMatrix2D distanceMatrix = DoubleFactory2D.sparse.make(dots.size(), dots.size());
for (int row = 0; row < dots.size(); row++) {
double sum = 0;
for (int col = 0; col < dots.size(); col++) {
sum += weightMatrix.get(row, col);
}
distanceMatrix.set(row, row, sum);
}
timer.end();
if (m_log) {
System.out.println("Distance matrix:" + distanceMatrix);
}
timer.start("Compute difference matrix");
// we want to solve the following generalized eigensystem (1):
// differenceMatrix * x = lambda * distanceMatrix * x
DoubleMatrix2D differenceMatrix = distanceMatrix.copy().assign(weightMatrix, Functions.minus);
timer.end();
timer.start("Cholesky decomposition");
// the distance matrix is symmetric, positive definite so this decomposition
// is always possible
CholeskyDecomposition choleskyDecomposition = new CholeskyDecomposition(distanceMatrix);
DoubleMatrix2D rootDistanceMatrix = choleskyDecomposition.getL();
timer.end();
if (m_log) {
System.out.println("Cholesky decomposition matrix:" + rootDistanceMatrix);
}
timer.start("Compute eigen matrix");
// the following matrix (2) is symmetric, positive definite, has the
// same eigenvalues as (1) and has eigenvectors x' = rootDistanceMatrix^T * x:
// eigenMatrix = rootDistanceMatrix^(-1) * differenceMatrix * (rootDistanceMatrix^(-1))^T
// the efficient way to compute (2) is to solve:
// half * rootDistanceMatrix^T = differenceMatrix
// and then solve:
// rootDistanceMatrix * eigenMatrix = half
DoubleMatrix2D half = Algebra.DEFAULT.transpose(
Algebra.DEFAULT.solveTranspose(Algebra.DEFAULT.transpose(rootDistanceMatrix), differenceMatrix));
DoubleMatrix2D eigenMatrix = Algebra.DEFAULT.solve(rootDistanceMatrix, half);
timer.end();
if (m_log) {
System.out.println("Eigenmatrix:" + eigenMatrix);
}
DoubleMatrix1D eigenvalues;
DoubleMatrix2D eigenvectors;
if (m_algorithm.equals("Lanczos")) {
timer.start("Eigenvalue decomposition");
LanczosDecomposition lanczosDecomposition = new LanczosDecomposition(eigenMatrix, getLanczosK(eigenMatrix), m_log);
timer.end();
eigenvalues = lanczosDecomposition.getRealEigenvalues();
eigenvectors = lanczosDecomposition.getV();
} else if (m_algorithm.equals("QL")) {
timer.start("Eigenvalue decomposition");
EigenvalueDecomposition eigenvalueDecomposition = new EigenvalueDecomposition(eigenMatrix);
timer.end();
eigenvalues = eigenvalueDecomposition.getRealEigenvalues();
eigenvectors = eigenvalueDecomposition.getV();
} else {
throw new IllegalArgumentException("Unrecognized algorithm: " + m_algorithm);
}
// symmetric, positive definite matrices have only real eigenvalues (no imaginary)
if (m_log) {
System.out.println("Eigenvalues:" + eigenvalues);
}
// find the appropriate eigenvalue and eigenvector of (2)
timer.start("Select eigenvector");
double minEigenvalue = eigenvalues.get(0);
int minEigenvalueIndex = 0;
for (int index = 1; index < eigenvalues.size(); index++) {
if (eigenvalues.get(index) < minEigenvalue) {
minEigenvalue = eigenvalues.get(index);
minEigenvalueIndex = index;
}
}
// compute the eigenvector of (1) from the corresponding eigenvector of (2)
DoubleMatrix1D eigenvector = Algebra.DEFAULT.mult(
Algebra.DEFAULT.inverse(Algebra.DEFAULT.transpose(rootDistanceMatrix)),
eigenvectors.viewColumn(minEigenvalueIndex));
timer.end();
if (m_log) {
System.out.println("Selected eigenvalue:" + eigenvalues.get(minEigenvalueIndex));
System.out.println("Selected eigenvector:" + eigenvector);
}
timer.start("Cluster");
// pick the split to be the average of min and max components
// in the eigenvector
double maxComponent = eigenvector.get(0), minComponent = eigenvector.get(0);
for (int index = 1; index < eigenvector.size(); index++) {
if (maxComponent < eigenvector.get(index)) {
maxComponent = eigenvector.get(index);
}
if (minComponent > eigenvector.get(index)) {
minComponent = eigenvector.get(index);
}
}
double split = (maxComponent + minComponent) / 2;
if (m_log) {
System.out.println("Split value:" + split);
}
// "left" of the split = cluster 1, "right" of the split = cluster 2
for (int index = 0; index < eigenvector.size(); index++) {
Dot dot = (Dot) dots.get(index);
if (eigenvector.get(index) <= split) {
m_cluster1.add(dot);
} else {
m_cluster2.add(dot);
}
}
timer.end();
}
private int getLanczosK(DoubleMatrix2D matrix) {
int k = (int) (matrix.columns() * LANCZOS_LOAD);
if (k < LANCZOS_FLOOR) {
return Math.min(matrix.columns(), LANCZOS_FLOOR);
} else if (k > LANCZOS_CEILING) {
return Math.min(matrix.columns(), LANCZOS_CEILING);
} else {
return k;
}
}
public List getCluster1() {
return m_cluster1;
}
public List getCluster2() {
return m_cluster2;
}
}