DayF core  1.2.1.2
DayF (Decision at your Fingertips) is an AutoML freeware development framework that let developers works with Machine Learning models without any idea of AI, simply taking a csv dataset and the objective column
clusteringmetricmetadata.py
1 
4 
5 '''
6 Copyright (C) e2its - All Rights Reserved
7  * Unauthorized copying of this file, via any medium is strictly prohibited
8  * Proprietary and confidential
9  *
10  * This file is part of gDayF project.
11  *
12  * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019
13 '''
14 
15 '''
16 The basic ones
17 $betweenss: is the between clusters sum of squares. In fact it is the mean of distances between cluster centers.
18 One expects, this ratio, to be as higher as possible, since we would like to have heterogenous clusters.
19 2 · ( ∑m ∑n | CmP - CnP |2 ) / p · p - 1
20 
21 $withinss: is the within cluster sum of squares. So it results in a vector with a number for each cluster.
22 One expects, this ratio, to be as lower as possible for each cluster,
23 since we would like to have homogeneity within the clusters.
24 ( ∑m | Xm - C |2 ) / p
25 
26 Some equalities may help to understand:
27 $tot.withinss = sum ( $withinss )
28 $totss = $tot.withinss + $betweenss
29 '''
30 from gdayf.metrics.metricmetadata import MetricMetadata
31 from pandas import DataFrame
32 from collections import OrderedDict
33 import json
34 import time
35 
36 
37 # Class Base for Regression metricts as OrderedDict
38 #
39 # Base Metrics for Clustering
40 # [No expanded metrics]
42 
43 
45  def __init__(self):
46  MetricMetadata.__init__(self)
47  self['betweenss'] = None
48  self['tot_withinss'] = None
49  self['totss'] = None
50  self['centroid_stats'] = None
51 
52 
54  def set_precision(self, threshold):
55  pass
56 
57 
60  def set_h2ometrics(self, perf_metrics):
61  for parameter, _ in self.items():
62  try:
63  if perf_metrics is not None:
64  if parameter == 'centroid_stats':
65  self[parameter] = perf_metrics._metric_json[parameter].as_data_frame()
66  self['k'] = int(self[parameter]['centroid'].max())
67  self[parameter] = json.loads(self[parameter].to_json(orient='split'),
68  object_pairs_hook=OrderedDict)
69  else:
70  self[parameter] = perf_metrics._metric_json[parameter]
71  except KeyError as kexecution_error:
72  pass
73  #print('Trace: ' + repr(kexecution_error))
74  except AttributeError as aexecution_error:
75  print('Trace: ' + repr(aexecution_error))
76  except TypeError as texecution_error:
77  print('Trace: ' + repr(texecution_error))
78 
79 
84  def set_sparkmetrics(self, model, data):
85 
86  start = time.time()
87  if model is not None and data is not None:
88  self['clusterCenters'] = DataFrame(model.clusterCenters())
89  self['k'] = self['clusterCenters'].shape[0]
90  self['clusterCenters'] = json.loads(DataFrame(model.clusterCenters()).to_json(orient='split'),
91  object_pairs_hook=OrderedDict)
92  self['bettweenss'] = 1e15 # Need to be implemented
93  self['totss'] = 1e15 # Need to be implemented
94  self['tot_withinss'] = model.computeCost(data)
95  self['nobs'] = data.count()
96  self['model_category'] = 'Clustering'
97  self['predictions'] = None
98  self['RMSE'] = 10e+308
99  self['scoring_time'] = int(time.time() - start)
Define Base Metric object as OrderedDict() of common measures for all metrics types on an unified way...
Class Base for metricts as OrderedDict.
def set_precision(self, threshold)
Method to set precision measure Not implemented yet.
def set_sparkmetrics(self, model, data)
Method to load Regression metrics from Spark RegressionEvaluator class.
def set_h2ometrics(self, perf_metrics)
Method to load Clustering metrics from H2OClusteringModelMetrics class.