6 Copyright (C) e2its - All Rights Reserved 7 * Unauthorized copying of this file, via any medium is strictly prohibited 8 * Proprietary and confidential 10 * This file is part of gDayF project. 12 * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019 15 from collections
import OrderedDict
17 from pandas
import DataFrame
21 from pyspark.mllib.evaluation
import MulticlassMetrics
32 MultinomialMetricMetadata.__init__(self)
42 if evaluator
is not None and data
is not None:
43 self[
'f1'] = evaluator.evaluate(data, {evaluator.metricName:
"f1"})
44 self[
'weightedPrecision'] = evaluator.evaluate(data, {evaluator.metricName:
"weightedPrecision"})
45 self[
'weightedRecall'] = evaluator.evaluate(data, {evaluator.metricName:
"weightedRecall"})
46 self[
'accuracy'] = evaluator.evaluate(data, {evaluator.metricName:
"accuracy"})
47 self[
'nobs'] = data.count()
48 self[
'model_category'] =
'Multinomial' 49 self[
'RMSE'] = 10e+308
52 dimcount = data.select(objective_column).distinct().count()
53 tp = data.select(
"prediction", data[objective_column].cast(
'float'))\
54 .toDF(
"prediction", objective_column).rdd.map(tuple)
55 metrics = MulticlassMetrics(tp)
56 pdf = DataFrame(data=np.array(metrics.confusionMatrix().values).reshape((dimcount, dimcount)),
57 columns=range(0, dimcount))
58 pdf[
'total'] = pdf.sum(axis=1)
59 index = pdf.index.tolist()
61 pdf = pdf.append(pdf.sum(axis=0), ignore_index=
True)
63 self[
'cm'] = json.loads(pdf.to_json(orient=
'split'), object_pairs_hook=OrderedDict)
65 self[
'scoring_time'] = int(time.time() - start)