Skip to content

Commit

Permalink
Merge pull request scikit-learn#2 from jjerphan/pairwise_aggregation_…
Browse files Browse the repository at this point in the history
…cython-oop

ParallelReduction class hierarchy (POC for scikit-learn#20254)
  • Loading branch information
jjerphan committed Jul 1, 2021
2 parents cac7313 + ad496f0 commit 3cdc476
Show file tree
Hide file tree
Showing 23 changed files with 958 additions and 626 deletions.
4 changes: 2 additions & 2 deletions sklearn/cluster/_agglomerative.py
Expand Up @@ -16,8 +16,8 @@

from ..base import BaseEstimator, ClusterMixin
from ..metrics.pairwise import paired_distances, pairwise_distances
from ..neighbors import DistanceMetric
from ..neighbors._dist_metrics import METRIC_MAPPING
from ..metrics import DistanceMetric
from ..metrics._dist_metrics import METRIC_MAPPING
from ..utils import check_array
from ..utils._fast_dict import IntFloatDict
from ..utils.fixes import _astype_copy_false
Expand Down
15 changes: 7 additions & 8 deletions sklearn/cluster/_hierarchical_fast.pyx
Expand Up @@ -13,7 +13,7 @@ ctypedef np.int8_t INT8

np.import_array()

from ..neighbors._dist_metrics cimport DistanceMetric
from ..metrics._dist_metrics cimport DistanceMetric
from ..utils._fast_dict cimport IntFloatDict

# C++
Expand Down Expand Up @@ -236,8 +236,8 @@ def max_merge(IntFloatDict a, IntFloatDict b,
def average_merge(IntFloatDict a, IntFloatDict b,
np.ndarray[ITYPE_t, ndim=1] mask,
ITYPE_t n_a, ITYPE_t n_b):
"""Merge two IntFloatDicts with the average strategy: when the
same key is present in the two dicts, the weighted average of the two
"""Merge two IntFloatDicts with the average strategy: when the
same key is present in the two dicts, the weighted average of the two
values is used.
Parameters
Expand Down Expand Up @@ -290,13 +290,13 @@ def average_merge(IntFloatDict a, IntFloatDict b,


###############################################################################
# An edge object for fast comparisons
# An edge object for fast comparisons

cdef class WeightedEdge:
cdef public ITYPE_t a
cdef public ITYPE_t b
cdef public DTYPE_t weight

def __init__(self, DTYPE_t weight, ITYPE_t a, ITYPE_t b):
self.weight = weight
self.a = a
Expand Down Expand Up @@ -326,7 +326,7 @@ cdef class WeightedEdge:
return self.weight > other.weight
elif op == 5:
return self.weight >= other.weight

def __repr__(self):
return "%s(weight=%f, a=%i, b=%i)" % (self.__class__.__name__,
self.weight,
Expand Down Expand Up @@ -475,7 +475,7 @@ def mst_linkage_core(
dist_metric: DistanceMetric
A DistanceMetric object conforming to the API from
``sklearn.neighbors._dist_metrics.pxd`` that will be
``sklearn.metrics._dist_metrics.pxd`` that will be
used to compute distances.
Returns
Expand Down Expand Up @@ -534,4 +534,3 @@ def mst_linkage_core(
current_node = new_node

return np.array(result)

5 changes: 3 additions & 2 deletions sklearn/cluster/tests/test_hierarchical.py
Expand Up @@ -16,7 +16,7 @@
from scipy.cluster import hierarchy

from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.neighbors.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS
from sklearn.metrics.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS
from sklearn.utils._testing import assert_almost_equal, create_memmap_backed_data
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import ignore_warnings
Expand All @@ -30,14 +30,15 @@
_fix_connectivity,
)
from sklearn.feature_extraction.image import grid_to_graph
from sklearn.metrics import DistanceMetric
from sklearn.metrics.pairwise import (
PAIRED_DISTANCES,
cosine_distances,
manhattan_distances,
pairwise_distances,
)
from sklearn.metrics.cluster import normalized_mutual_info_score
from sklearn.neighbors import kneighbors_graph, DistanceMetric
from sklearn.neighbors import kneighbors_graph
from sklearn.cluster._hierarchical_fast import (
average_merge,
max_merge,
Expand Down
3 changes: 3 additions & 0 deletions sklearn/metrics/__init__.py
Expand Up @@ -36,6 +36,8 @@
from ._classification import brier_score_loss
from ._classification import multilabel_confusion_matrix

from ._dist_metrics import DistanceMetric

from . import cluster
from .cluster import adjusted_mutual_info_score
from .cluster import adjusted_rand_score
Expand Down Expand Up @@ -113,6 +115,7 @@
"davies_bouldin_score",
"DetCurveDisplay",
"det_curve",
"DistanceMetric",
"euclidean_distances",
"explained_variance_score",
"f1_score",
Expand Down

0 comments on commit 3cdc476

Please sign in to comment.