DayF core  1.2.1.2
DayF (Decision at your Fingertips) is an AutoML freeware development framework that let developers works with Machine Learning models without any idea of AI, simply taking a csv dataset and the objective column
sparkmultinomialmetricmetadata.py
1 
4 
5 '''
6 Copyright (C) e2its - All Rights Reserved
7  * Unauthorized copying of this file, via any medium is strictly prohibited
8  * Proprietary and confidential
9  *
10  * This file is part of gDayF project.
11  *
12  * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019
13 '''
14 
15 from collections import OrderedDict
16 from gdayf.metrics.multinomialmetricmetadata import MultinomialMetricMetadata
17 from pandas import DataFrame
18 import numpy as np
19 import time
20 import json
21 from pyspark.mllib.evaluation import MulticlassMetrics
22 
23 
24 # Class Base for Multinomial metricts as OrderedDict
25 #
26 # Base Metrics for Multinomial
27 # [hit_ratio_table (as json_dataframe(orient=split)), cm (as json_dataframe(orient=split) structure)]
29 
31  def __init__(self):
32  MultinomialMetricMetadata.__init__(self)
33 
34 
39  def set_metrics(self, evaluator, data, objective_column):
40 
41  start = time.time()
42  if evaluator is not None and data is not None:
43  self['f1'] = evaluator.evaluate(data, {evaluator.metricName: "f1"})
44  self['weightedPrecision'] = evaluator.evaluate(data, {evaluator.metricName: "weightedPrecision"})
45  self['weightedRecall'] = evaluator.evaluate(data, {evaluator.metricName: "weightedRecall"})
46  self['accuracy'] = evaluator.evaluate(data, {evaluator.metricName: "accuracy"})
47  self['nobs'] = data.count()
48  self['model_category'] = 'Multinomial'
49  self['RMSE'] = 10e+308
50 
51  #Generating ConfusionMatrix
52  dimcount = data.select(objective_column).distinct().count()
53  tp = data.select("prediction", data[objective_column].cast('float'))\
54  .toDF("prediction", objective_column).rdd.map(tuple)
55  metrics = MulticlassMetrics(tp)
56  pdf = DataFrame(data=np.array(metrics.confusionMatrix().values).reshape((dimcount, dimcount)),
57  columns=range(0, dimcount))
58  pdf['total'] = pdf.sum(axis=1)
59  index = pdf.index.tolist()
60  index.append('total')
61  pdf = pdf.append(pdf.sum(axis=0), ignore_index=True)
62  pdf.index = index
63  self['cm'] = json.loads(pdf.to_json(orient='split'), object_pairs_hook=OrderedDict)
64 
65  self['scoring_time'] = int(time.time() - start)
Define Multinomial Metric object as OrderedDict() of common measures for all frameworks on an unified...
def set_metrics(self, evaluator, data, objective_column)
Method to load Multinomial metrics from Spark MulticlassClassificationEvaluator class.