6 Copyright (C) e2its - All Rights Reserved 7 * Unauthorized copying of this file, via any medium is strictly prohibited 8 * Proprietary and confidential 10 * This file is part of gDayF project. 12 * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019 17 $betweenss: is the between clusters sum of squares. In fact it is the mean of distances between cluster centers. 18 One expects, this ratio, to be as higher as possible, since we would like to have heterogenous clusters. 19 2 · ( ∑m ∑n | CmP - CnP |2 ) / p · p - 1 21 $withinss: is the within cluster sum of squares. So it results in a vector with a number for each cluster. 22 One expects, this ratio, to be as lower as possible for each cluster, 23 since we would like to have homogeneity within the clusters. 24 ( ∑m | Xm - C |2 ) / p 26 Some equalities may help to understand: 27 $tot.withinss = sum ( $withinss ) 28 $totss = $tot.withinss + $betweenss 31 from collections
import OrderedDict
44 ClusteringMetricMetadata.__init__(self)
51 for parameter, _
in self.items():
53 if perf_metrics
is not None:
54 if parameter ==
'centroid_stats':
55 self[parameter] = perf_metrics._metric_json[parameter].as_data_frame()
56 self[
'k'] = int(self[parameter][
'centroid'].max())
57 self[parameter] = json.loads(self[parameter].to_json(orient=
'split'),
58 object_pairs_hook=OrderedDict)
60 self[parameter] = perf_metrics._metric_json[parameter]
61 except KeyError
as kexecution_error:
64 except AttributeError
as aexecution_error:
65 print(
'Trace: ' + repr(aexecution_error))
66 except TypeError
as texecution_error:
67 print(
'Trace: ' + repr(texecution_error))