mirror of https://github.com/commaai/openpilot.git
remove fastcluster (#29352)
* remove fastcluster
* lock
* rm there
* and from release files
old-commit-hash: 00a11a1a2b
This commit is contained in:
parent
b3f734283d
commit
cf1d402531
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6e04d095e34a8f906a3ee501288b4a47d176ecf1c7ec43cbb8601f86cda84780
|
||||
size 478413
|
||||
oid sha256:ae0baf97dbbf561510d724711488fbe0a01f6b7bdb079dc52057fbb92456cf35
|
||||
size 470684
|
||||
|
|
|
@ -76,7 +76,6 @@ carla = { url = "https://github.com/commaai/carla/releases/download/3.11.4/carla
|
|||
control = "*"
|
||||
coverage = "*"
|
||||
dictdiffer = "*"
|
||||
fastcluster = "*"
|
||||
ft4222 = "*"
|
||||
hexdump = "*"
|
||||
hypothesis = "==6.46.7"
|
||||
|
|
|
@ -418,8 +418,6 @@ selfdrive/assets/navigation/*
|
|||
third_party/.gitignore
|
||||
third_party/SConscript
|
||||
|
||||
third_party/cluster/*
|
||||
|
||||
third_party/linux/**
|
||||
third_party/opencl/**
|
||||
|
||||
|
|
|
@ -1,140 +0,0 @@
|
|||
# pylint: skip-file
|
||||
|
||||
import time
|
||||
import unittest
|
||||
import numpy as np
|
||||
from fastcluster import linkage_vector
|
||||
from scipy.cluster import _hierarchy
|
||||
from scipy.spatial.distance import pdist
|
||||
|
||||
from third_party.cluster.fastcluster_py import hclust, ffi
|
||||
from third_party.cluster.fastcluster_py import cluster_points_centroid
|
||||
|
||||
|
||||
def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None):
|
||||
# supersimplified function to get fast clustering. Got it from scipy
|
||||
Z = np.asarray(Z, order='c')
|
||||
n = Z.shape[0] + 1
|
||||
T = np.zeros((n,), dtype='i')
|
||||
_hierarchy.cluster_dist(Z, T, float(t), int(n))
|
||||
return T
|
||||
|
||||
|
||||
TRACK_PTS = np.array([[59.26000137, -9.35999966, -5.42500019],
|
||||
[91.61999817, -0.31999999, -2.75],
|
||||
[31.38000031, 0.40000001, -0.2],
|
||||
[89.57999725, -8.07999992, -18.04999924],
|
||||
[53.42000122, 0.63999999, -0.175],
|
||||
[31.38000031, 0.47999999, -0.2],
|
||||
[36.33999939, 0.16, -0.2],
|
||||
[53.33999939, 0.95999998, -0.175],
|
||||
[59.26000137, -9.76000023, -5.44999981],
|
||||
[33.93999977, 0.40000001, -0.22499999],
|
||||
[106.74000092, -5.76000023, -18.04999924]])
|
||||
|
||||
CORRECT_LINK = np.array([[2., 5., 0.07999998, 2.],
|
||||
[4., 7., 0.32984889, 2.],
|
||||
[0., 8., 0.40078104, 2.],
|
||||
[6., 9., 2.41209933, 2.],
|
||||
[11., 14., 3.76342275, 4.],
|
||||
[12., 13., 13.02297651, 4.],
|
||||
[1., 3., 17.27626057, 2.],
|
||||
[10., 17., 17.92918845, 3.],
|
||||
[15., 16., 23.68525366, 8.],
|
||||
[18., 19., 52.52351319, 11.]])
|
||||
|
||||
CORRECT_LABELS = np.array([7, 1, 4, 2, 6, 4, 5, 6, 7, 5, 3], dtype=np.int32)
|
||||
|
||||
|
||||
def plot_cluster(pts, idx_old, idx_new):
|
||||
import matplotlib.pyplot as plt
|
||||
m = 'Set1'
|
||||
|
||||
plt.figure()
|
||||
plt.subplot(1, 2, 1)
|
||||
plt.scatter(pts[:, 0], pts[:, 1], c=idx_old, cmap=m)
|
||||
plt.title("Old")
|
||||
plt.colorbar()
|
||||
plt.subplot(1, 2, 2)
|
||||
plt.scatter(pts[:, 0], pts[:, 1], c=idx_new, cmap=m)
|
||||
plt.title("New")
|
||||
plt.colorbar()
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
def same_clusters(correct, other):
|
||||
correct = np.asarray(correct)
|
||||
other = np.asarray(other)
|
||||
if len(correct) != len(other):
|
||||
return False
|
||||
|
||||
for i in range(len(correct)):
|
||||
c = np.where(correct == correct[i])
|
||||
o = np.where(other == other[i])
|
||||
if not np.array_equal(c, o):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class TestClustering(unittest.TestCase):
|
||||
def test_scipy_clustering(self):
|
||||
old_link = linkage_vector(TRACK_PTS, method='centroid')
|
||||
old_cluster_idxs = fcluster(old_link, 2.5, criterion='distance')
|
||||
|
||||
np.testing.assert_allclose(old_link, CORRECT_LINK)
|
||||
np.testing.assert_allclose(old_cluster_idxs, CORRECT_LABELS)
|
||||
|
||||
def test_pdist(self):
|
||||
pts = np.ascontiguousarray(TRACK_PTS, dtype=np.float64)
|
||||
pts_ptr = ffi.cast("double *", pts.ctypes.data)
|
||||
|
||||
n, m = pts.shape
|
||||
out = np.zeros((n * (n - 1) // 2, ), dtype=np.float64)
|
||||
out_ptr = ffi.cast("double *", out.ctypes.data)
|
||||
hclust.hclust_pdist(n, m, pts_ptr, out_ptr)
|
||||
|
||||
np.testing.assert_allclose(out, np.power(pdist(TRACK_PTS), 2))
|
||||
|
||||
def test_cpp_clustering(self):
|
||||
pts = np.ascontiguousarray(TRACK_PTS, dtype=np.float64)
|
||||
pts_ptr = ffi.cast("double *", pts.ctypes.data)
|
||||
n, m = pts.shape
|
||||
|
||||
labels = np.zeros((n, ), dtype=np.int32)
|
||||
labels_ptr = ffi.cast("int *", labels.ctypes.data)
|
||||
hclust.cluster_points_centroid(n, m, pts_ptr, 2.5**2, labels_ptr)
|
||||
self.assertTrue(same_clusters(CORRECT_LABELS, labels))
|
||||
|
||||
def test_cpp_wrapper_clustering(self):
|
||||
labels = cluster_points_centroid(TRACK_PTS, 2.5)
|
||||
self.assertTrue(same_clusters(CORRECT_LABELS, labels))
|
||||
|
||||
def test_random_cluster(self):
|
||||
np.random.seed(1337)
|
||||
N = 1000
|
||||
|
||||
t_old = 0.
|
||||
t_new = 0.
|
||||
|
||||
for _ in range(N):
|
||||
n = int(np.random.uniform(2, 32))
|
||||
x = np.random.uniform(-10, 50, (n, 1))
|
||||
y = np.random.uniform(-5, 5, (n, 1))
|
||||
vrel = np.random.uniform(-5, 5, (n, 1))
|
||||
pts = np.hstack([x, y, vrel])
|
||||
|
||||
t = time.time()
|
||||
old_link = linkage_vector(pts, method='centroid')
|
||||
old_cluster_idx = fcluster(old_link, 2.5, criterion='distance')
|
||||
t_old += time.time() - t
|
||||
|
||||
t = time.time()
|
||||
cluster_idx = cluster_points_centroid(pts, 2.5)
|
||||
t_new += time.time() - t
|
||||
|
||||
self.assertTrue(same_clusters(old_cluster_idx, cluster_idx))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -4,5 +4,3 @@ env.Library('json11', ['json11/json11.cpp'], CCFLAGS=env['CCFLAGS'] + ['-Wno-unq
|
|||
env.Append(CPPPATH=[Dir('json11')])
|
||||
|
||||
env.Library('kaitai', ['kaitai/kaitaistream.cpp'], CPPDEFINES=['KS_STR_ENCODING_NONE'])
|
||||
|
||||
SConscript(['cluster/SConscript'])
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
test
|
|
@ -1,13 +0,0 @@
|
|||
Copyright:
|
||||
* fastcluster_dm.cpp & fastcluster_R_dm.cpp:
|
||||
© 2011 Daniel Müllner <http://danifold.net>
|
||||
* fastcluster.(h|cpp) & demo.cpp & plotresult.r:
|
||||
© 2018 Christoph Dalitz <http://www.hsnr.de/ipattern/>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -1,79 +0,0 @@
|
|||
C++ interface to fast hierarchical clustering algorithms
|
||||
========================================================
|
||||
|
||||
This is a simplified C++ interface to fast implementations of hierarchical
|
||||
clustering by Daniel Müllner. The original library with interfaces to R
|
||||
and Python is described in:
|
||||
|
||||
Daniel Müllner: "fastcluster: Fast Hierarchical, Agglomerative Clustering
|
||||
Routines for R and Python." Journal of Statistical Software 53 (2013),
|
||||
no. 9, pp. 1–18, http://www.jstatsoft.org/v53/i09/
|
||||
|
||||
|
||||
Usage of the library
|
||||
--------------------
|
||||
|
||||
For using the library, the following source files are needed:
|
||||
|
||||
fastcluster_dm.cpp, fastcluster_R_dm.cpp
|
||||
original code by Daniel Müllner
|
||||
these are included by fastcluster.cpp via #include, and therefore
|
||||
need not be compiled to object code
|
||||
|
||||
fastcluster.[h|cpp]
|
||||
simplified C++ interface
|
||||
fastcluster.cpp is the only file that must be compiled
|
||||
|
||||
The library provides the clustering function *hclust_fast* for
|
||||
creating the dendrogram information in an encoding as used by the
|
||||
R function *hclust*. For a description of the parameters, see fastcluster.h.
|
||||
Its parameter *method* can be one of
|
||||
|
||||
HCLUST_METHOD_SINGLE
|
||||
single link with the minimum spanning tree algorithm (Rohlf, 1973)
|
||||
|
||||
HHCLUST_METHOD_COMPLETE
|
||||
complete link with the nearest-neighbor-chain algorithm (Murtagh, 1984)
|
||||
|
||||
HCLUST_METHOD_AVERAGE
|
||||
complete link with the nearest-neighbor-chain algorithm (Murtagh, 1984)
|
||||
|
||||
HCLUST_METHOD_MEDIAN
|
||||
median link with the generic algorithm (Müllner, 2011)
|
||||
|
||||
For splitting the dendrogram into clusters, the two functions *cutree_k*
|
||||
and *cutree_cdist* are provided.
|
||||
|
||||
Note that output parameters must be allocated beforehand, e.g.
|
||||
int* merge = new int[2*(npoints-1)];
|
||||
For a complete usage example, see lines 135-142 of demo.cpp.
|
||||
|
||||
|
||||
Demonstration program
|
||||
---------------------
|
||||
|
||||
A simple demo is implemented in demo.cpp, which can be compiled and run with
|
||||
|
||||
make
|
||||
./hclust-demo -m complete lines.csv
|
||||
|
||||
It creates two clusters of line segments such that the segment angle between
|
||||
line segments of different clusters have a maximum (cosine) dissimilarity.
|
||||
For visualizing the result, plotresult.r can be used as follows
|
||||
(requires R <https://r-project.org> to be installed):
|
||||
|
||||
./hclust-demo -m complete lines.csv | Rscript plotresult.r
|
||||
|
||||
|
||||
Authors & Copyright
|
||||
-------------------
|
||||
|
||||
Daniel Müllner, 2011, <http://danifold.net>
|
||||
Christoph Dalitz, 2018, <http://www.hsnr.de/ipattern/>
|
||||
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
This code is provided under a BSD-style license.
|
||||
See the file LICENSE for details.
|
|
@ -1,8 +0,0 @@
|
|||
Import('env')
|
||||
|
||||
fc = env.SharedLibrary("fastcluster", "fastcluster.cpp")
|
||||
|
||||
# TODO: how do I gate on test
|
||||
#env.Program("test", ["test.cpp"], LIBS=[fc])
|
||||
#valgrind --leak-check=full ./test
|
||||
|
|
@ -1,218 +0,0 @@
|
|||
//
|
||||
// C++ standalone verion of fastcluster by Daniel Müllner
|
||||
//
|
||||
// Copyright: Christoph Dalitz, 2018
|
||||
// Daniel Müllner, 2011
|
||||
// License: BSD style license
|
||||
// (see the file LICENSE for details)
|
||||
//
|
||||
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
|
||||
extern "C" {
|
||||
#include "fastcluster.h"
|
||||
}
|
||||
|
||||
// Code by Daniel Müllner
|
||||
// workaround to make it usable as a standalone version (without R)
|
||||
bool fc_isnan(double x) { return false; }
|
||||
#include "fastcluster_dm.cpp"
|
||||
#include "fastcluster_R_dm.cpp"
|
||||
|
||||
extern "C" {
|
||||
//
|
||||
// Assigns cluster labels (0, ..., nclust-1) to the n points such
|
||||
// that the cluster result is split into nclust clusters.
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// merge = clustering result in R format
|
||||
// nclust = number of clusters
|
||||
// Output arguments:
|
||||
// labels = allocated integer array of size n for result
|
||||
//
|
||||
void cutree_k(int n, const int* merge, int nclust, int* labels) {
|
||||
|
||||
int k,m1,m2,j,l;
|
||||
|
||||
if (nclust > n || nclust < 2) {
|
||||
for (j=0; j<n; j++) labels[j] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// assign to each observable the number of its last merge step
|
||||
// beware: indices of observables in merge start at 1 (R convention)
|
||||
std::vector<int> last_merge(n, 0);
|
||||
for (k=1; k<=(n-nclust); k++) {
|
||||
// (m1,m2) = merge[k,]
|
||||
m1 = merge[k-1];
|
||||
m2 = merge[n-1+k-1];
|
||||
if (m1 < 0 && m2 < 0) { // both single observables
|
||||
last_merge[-m1-1] = last_merge[-m2-1] = k;
|
||||
}
|
||||
else if (m1 < 0 || m2 < 0) { // one is a cluster
|
||||
if(m1 < 0) { j = -m1; m1 = m2; } else j = -m2;
|
||||
// merging single observable and cluster
|
||||
for(l = 0; l < n; l++)
|
||||
if (last_merge[l] == m1)
|
||||
last_merge[l] = k;
|
||||
last_merge[j-1] = k;
|
||||
}
|
||||
else { // both cluster
|
||||
for(l=0; l < n; l++) {
|
||||
if( last_merge[l] == m1 || last_merge[l] == m2 )
|
||||
last_merge[l] = k;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// assign cluster labels
|
||||
int label = 0;
|
||||
std::vector<int> z(n,-1);
|
||||
for (j=0; j<n; j++) {
|
||||
if (last_merge[j] == 0) { // still singleton
|
||||
labels[j] = label++;
|
||||
} else {
|
||||
if (z[last_merge[j]] < 0) {
|
||||
z[last_merge[j]] = label++;
|
||||
}
|
||||
labels[j] = z[last_merge[j]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Assigns cluster labels (0, ..., nclust-1) to the n points such
|
||||
// that the hierarchical clustering is stopped when cluster distance >= cdist
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// merge = clustering result in R format
|
||||
// height = cluster distance at each merge step
|
||||
// cdist = cutoff cluster distance
|
||||
// Output arguments:
|
||||
// labels = allocated integer array of size n for result
|
||||
//
|
||||
void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels) {
|
||||
|
||||
int k;
|
||||
|
||||
for (k=0; k<(n-1); k++) {
|
||||
if (height[k] >= cdist) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
cutree_k(n, merge, n-k, labels);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Hierarchical clustering with one of Daniel Muellner's fast algorithms
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
|
||||
// the upper triangle (without diagonal elements) of the distance
|
||||
// matrix, e.g. for n=4:
|
||||
// d00 d01 d02 d03
|
||||
// d10 d11 d12 d13 -> d01 d02 d03 d12 d13 d23
|
||||
// d20 d21 d22 d23
|
||||
// d30 d31 d32 d33
|
||||
// method = cluster metric (see enum method_code)
|
||||
// Output arguments:
|
||||
// merge = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
|
||||
// Result follows R hclust convention:
|
||||
// - observabe indices start with one
|
||||
// - merge[i][] contains the merged nodes in step i
|
||||
// - merge[i][j] is negative when the node is an atom
|
||||
// height = allocated (n-1) array with distances at each merge step
|
||||
// Return code:
|
||||
// 0 = ok
|
||||
// 1 = invalid method
|
||||
//
|
||||
int hclust_fast(int n, double* distmat, int method, int* merge, double* height) {
|
||||
|
||||
// call appropriate culstering function
|
||||
cluster_result Z2(n-1);
|
||||
if (method == HCLUST_METHOD_SINGLE) {
|
||||
// single link
|
||||
MST_linkage_core(n, distmat, Z2);
|
||||
}
|
||||
else if (method == HCLUST_METHOD_COMPLETE) {
|
||||
// complete link
|
||||
NN_chain_core<METHOD_METR_COMPLETE, t_float>(n, distmat, NULL, Z2);
|
||||
}
|
||||
else if (method == HCLUST_METHOD_AVERAGE) {
|
||||
// best average distance
|
||||
double* members = new double[n];
|
||||
for (int i=0; i<n; i++) members[i] = 1;
|
||||
NN_chain_core<METHOD_METR_AVERAGE, t_float>(n, distmat, members, Z2);
|
||||
delete[] members;
|
||||
}
|
||||
else if (method == HCLUST_METHOD_MEDIAN) {
|
||||
// best median distance (beware: O(n^3))
|
||||
generic_linkage<METHOD_METR_MEDIAN, t_float>(n, distmat, NULL, Z2);
|
||||
}
|
||||
else if (method == HCLUST_METHOD_CENTROID) {
|
||||
// best centroid distance (beware: O(n^3))
|
||||
double* members = new double[n];
|
||||
for (int i=0; i<n; i++) members[i] = 1;
|
||||
generic_linkage<METHOD_METR_CENTROID, t_float>(n, distmat, members, Z2);
|
||||
delete[] members;
|
||||
}
|
||||
else {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int* order = new int[n];
|
||||
if (method == HCLUST_METHOD_MEDIAN || method == HCLUST_METHOD_CENTROID) {
|
||||
generate_R_dendrogram<true>(merge, height, order, Z2, n);
|
||||
} else {
|
||||
generate_R_dendrogram<false>(merge, height, order, Z2, n);
|
||||
}
|
||||
delete[] order; // only needed for visualization
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Build condensed distance matrix
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// m = dimension of observable
|
||||
// Output arguments:
|
||||
// out = allocated integer array of size n * (n - 1) / 2 for result
|
||||
void hclust_pdist(int n, int m, double* pts, double* out) {
|
||||
int ii = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
for (int j = i + 1; j < n; j++) {
|
||||
// Compute euclidian distance
|
||||
double d = 0;
|
||||
for (int k = 0; k < m; k ++) {
|
||||
double error = pts[i * m + k] - pts[j * m + k];
|
||||
d += (error * error);
|
||||
}
|
||||
out[ii] = d;//sqrt(d);
|
||||
ii++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cluster_points_centroid(int n, int m, double* pts, double dist, int* idx) {
|
||||
double* pdist = new double[n * (n - 1) / 2];
|
||||
int* merge = new int[2 * (n - 1)];
|
||||
double* height = new double[n - 1];
|
||||
|
||||
hclust_pdist(n, m, pts, pdist);
|
||||
hclust_fast(n, pdist, HCLUST_METHOD_CENTROID, merge, height);
|
||||
cutree_cdist(n, merge, height, dist, idx);
|
||||
|
||||
delete[] pdist;
|
||||
delete[] merge;
|
||||
delete[] height;
|
||||
}
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
//
|
||||
// C++ standalone verion of fastcluster by Daniel Muellner
|
||||
//
|
||||
// Copyright: Daniel Muellner, 2011
|
||||
// Christoph Dalitz, 2018
|
||||
// License: BSD style license
|
||||
// (see the file LICENSE for details)
|
||||
//
|
||||
|
||||
#ifndef fastclustercpp_H
|
||||
#define fastclustercpp_H
|
||||
|
||||
//
|
||||
// Assigns cluster labels (0, ..., nclust-1) to the n points such
|
||||
// that the cluster result is split into nclust clusters.
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// merge = clustering result in R format
|
||||
// nclust = number of clusters
|
||||
// Output arguments:
|
||||
// labels = allocated integer array of size n for result
|
||||
//
|
||||
void cutree_k(int n, const int* merge, int nclust, int* labels);
|
||||
|
||||
//
|
||||
// Assigns cluster labels (0, ..., nclust-1) to the n points such
|
||||
// that the hierarchical clsutering is stopped at cluster distance cdist
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// merge = clustering result in R format
|
||||
// height = cluster distance at each merge step
|
||||
// cdist = cutoff cluster distance
|
||||
// Output arguments:
|
||||
// labels = allocated integer array of size n for result
|
||||
//
|
||||
void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels);
|
||||
|
||||
//
|
||||
// Hierarchical clustering with one of Daniel Muellner's fast algorithms
|
||||
//
|
||||
// Input arguments:
|
||||
// n = number of observables
|
||||
// distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
|
||||
// the upper triangle (without diagonal elements) of the distance
|
||||
// matrix, e.g. for n=4:
|
||||
// d00 d01 d02 d03
|
||||
// d10 d11 d12 d13 -> d01 d02 d03 d12 d13 d23
|
||||
// d20 d21 d22 d23
|
||||
// d30 d31 d32 d33
|
||||
// method = cluster metric (see enum method_code)
|
||||
// Output arguments:
|
||||
// merge = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
|
||||
// Result follows R hclust convention:
|
||||
// - observabe indices start with one
|
||||
// - merge[i][] contains the merged nodes in step i
|
||||
// - merge[i][j] is negative when the node is an atom
|
||||
// height = allocated (n-1) array with distances at each merge step
|
||||
// Return code:
|
||||
// 0 = ok
|
||||
// 1 = invalid method
|
||||
//
|
||||
int hclust_fast(int n, double* distmat, int method, int* merge, double* height);
|
||||
enum hclust_fast_methods {
|
||||
HCLUST_METHOD_SINGLE = 0,
|
||||
HCLUST_METHOD_COMPLETE = 1,
|
||||
HCLUST_METHOD_AVERAGE = 2,
|
||||
HCLUST_METHOD_MEDIAN = 3,
|
||||
HCLUST_METHOD_CENTROID = 5,
|
||||
};
|
||||
|
||||
void hclust_pdist(int n, int m, double* pts, double* out);
|
||||
void cluster_points_centroid(int n, int m, double* pts, double dist, int* idx);
|
||||
|
||||
|
||||
#endif
|
|
@ -1,115 +0,0 @@
|
|||
//
|
||||
// Excerpt from fastcluster_R.cpp
|
||||
//
|
||||
// Copyright: Daniel Müllner, 2011 <http://danifold.net>
|
||||
//
|
||||
|
||||
struct pos_node {
|
||||
t_index pos;
|
||||
int node;
|
||||
};
|
||||
|
||||
void order_nodes(const int N, const int * const merge, const t_index * const node_size, int * const order) {
|
||||
/* Parameters:
|
||||
N : number of data points
|
||||
merge : (N-1)×2 array which specifies the node indices which are
|
||||
merged in each step of the clustering procedure.
|
||||
Negative entries -1...-N point to singleton nodes, while
|
||||
positive entries 1...(N-1) point to nodes which are themselves
|
||||
parents of other nodes.
|
||||
node_size : array of node sizes - makes it easier
|
||||
order : output array of size N
|
||||
|
||||
Runtime: Θ(N)
|
||||
*/
|
||||
auto_array_ptr<pos_node> queue(N/2);
|
||||
|
||||
int parent;
|
||||
int child;
|
||||
t_index pos = 0;
|
||||
|
||||
queue[0].pos = 0;
|
||||
queue[0].node = N-2;
|
||||
t_index idx = 1;
|
||||
|
||||
do {
|
||||
--idx;
|
||||
pos = queue[idx].pos;
|
||||
parent = queue[idx].node;
|
||||
|
||||
// First child
|
||||
child = merge[parent];
|
||||
if (child<0) { // singleton node, write this into the 'order' array.
|
||||
order[pos] = -child;
|
||||
++pos;
|
||||
}
|
||||
else { /* compound node: put it on top of the queue and decompose it
|
||||
in a later iteration. */
|
||||
queue[idx].pos = pos;
|
||||
queue[idx].node = child-1; // convert index-1 based to index-0 based
|
||||
++idx;
|
||||
pos += node_size[child-1];
|
||||
}
|
||||
// Second child
|
||||
child = merge[parent+N-1];
|
||||
if (child<0) {
|
||||
order[pos] = -child;
|
||||
}
|
||||
else {
|
||||
queue[idx].pos = pos;
|
||||
queue[idx].node = child-1;
|
||||
++idx;
|
||||
}
|
||||
} while (idx>0);
|
||||
}
|
||||
|
||||
#define size_(r_) ( ((r_<N) ? 1 : node_size[r_-N]) )
|
||||
|
||||
template <const bool sorted>
|
||||
void generate_R_dendrogram(int * const merge, double * const height, int * const order, cluster_result & Z2, const int N) {
|
||||
// The array "nodes" is a union-find data structure for the cluster
|
||||
// identites (only needed for unsorted cluster_result input).
|
||||
union_find nodes(sorted ? 0 : N);
|
||||
if (!sorted) {
|
||||
std::stable_sort(Z2[0], Z2[N-1]);
|
||||
}
|
||||
|
||||
t_index node1, node2;
|
||||
auto_array_ptr<t_index> node_size(N-1);
|
||||
|
||||
for (t_index i=0; i<N-1; ++i) {
|
||||
// Get two data points whose clusters are merged in step i.
|
||||
// Find the cluster identifiers for these points.
|
||||
if (sorted) {
|
||||
node1 = Z2[i]->node1;
|
||||
node2 = Z2[i]->node2;
|
||||
}
|
||||
else {
|
||||
node1 = nodes.Find(Z2[i]->node1);
|
||||
node2 = nodes.Find(Z2[i]->node2);
|
||||
// Merge the nodes in the union-find data structure by making them
|
||||
// children of a new node.
|
||||
nodes.Union(node1, node2);
|
||||
}
|
||||
// Sort the nodes in the output array.
|
||||
if (node1>node2) {
|
||||
t_index tmp = node1;
|
||||
node1 = node2;
|
||||
node2 = tmp;
|
||||
}
|
||||
/* Conversion between labeling conventions.
|
||||
Input: singleton nodes 0,...,N-1
|
||||
compound nodes N,...,2N-2
|
||||
Output: singleton nodes -1,...,-N
|
||||
compound nodes 1,...,N
|
||||
*/
|
||||
merge[i] = (node1<N) ? -static_cast<int>(node1)-1
|
||||
: static_cast<int>(node1)-N+1;
|
||||
merge[i+N-1] = (node2<N) ? -static_cast<int>(node2)-1
|
||||
: static_cast<int>(node2)-N+1;
|
||||
height[i] = Z2[i]->dist;
|
||||
node_size[i] = size_(node1) + size_(node2);
|
||||
}
|
||||
|
||||
order_nodes(N, merge, node_size, order);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,28 +0,0 @@
|
|||
import os
|
||||
import numpy as np
|
||||
|
||||
from cffi import FFI
|
||||
from common.ffi_wrapper import suffix
|
||||
|
||||
cluster_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)))
|
||||
cluster_fn = os.path.join(cluster_dir, "libfastcluster"+suffix())
|
||||
|
||||
ffi = FFI()
|
||||
ffi.cdef("""
|
||||
int hclust_fast(int n, double* distmat, int method, int* merge, double* height);
|
||||
void cutree_cdist(int n, const int* merge, double* height, double cdist, int* labels);
|
||||
void hclust_pdist(int n, int m, double* pts, double* out);
|
||||
void cluster_points_centroid(int n, int m, double* pts, double dist, int* idx);
|
||||
""")
|
||||
|
||||
hclust = ffi.dlopen(cluster_fn)
|
||||
|
||||
|
||||
def cluster_points_centroid(pts, dist):
|
||||
pts = np.ascontiguousarray(pts, dtype=np.float64)
|
||||
pts_ptr = ffi.cast("double *", pts.ctypes.data)
|
||||
n, m = pts.shape
|
||||
|
||||
labels_ptr = ffi.new("int[]", n)
|
||||
hclust.cluster_points_centroid(n, m, pts_ptr, dist**2, labels_ptr)
|
||||
return list(labels_ptr)
|
|
@ -1,35 +0,0 @@
|
|||
#include <cassert>
|
||||
|
||||
extern "C" {
|
||||
#include "fastcluster.h"
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
const int n = 11;
|
||||
const int m = 3;
|
||||
double* pts = new double[n*m]{59.26000137, -9.35999966, -5.42500019,
|
||||
91.61999817, -0.31999999, -2.75,
|
||||
31.38000031, 0.40000001, -0.2,
|
||||
89.57999725, -8.07999992, -18.04999924,
|
||||
53.42000122, 0.63999999, -0.175,
|
||||
31.38000031, 0.47999999, -0.2,
|
||||
36.33999939, 0.16, -0.2,
|
||||
53.33999939, 0.95999998, -0.175,
|
||||
59.26000137, -9.76000023, -5.44999981,
|
||||
33.93999977, 0.40000001, -0.22499999,
|
||||
106.74000092, -5.76000023, -18.04999924};
|
||||
|
||||
int * idx = new int[n];
|
||||
int * correct_idx = new int[n]{0, 1, 2, 3, 4, 2, 5, 4, 0, 5, 6};
|
||||
|
||||
cluster_points_centroid(n, m, pts, 2.5 * 2.5, idx);
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
assert(idx[i] == correct_idx[i]);
|
||||
}
|
||||
|
||||
delete[] idx;
|
||||
delete[] correct_idx;
|
||||
delete[] pts;
|
||||
}
|
Loading…
Reference in New Issue