6 Copyright (C) e2its - All Rights Reserved 7 * Unauthorized copying of this file, via any medium is strictly prohibited 8 * Proprietary and confidential 10 * This file is part of gDayF project. 12 * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019 15 from collections
import OrderedDict
17 from pandas
import DataFrame
20 from pyspark.mllib.evaluation
import MulticlassMetrics
35 BinomialMetricMetadata.__init__(self)
45 if evaluator
is not None and data
is not None:
46 self[
'AUC'] = evaluator.evaluate(data, {evaluator.metricName:
"areaUnderROC"})
47 self[
'AUPR'] = evaluator.evaluate(data, {evaluator.metricName:
"areaUnderPR"})
48 self[
'nobs'] = data.count()
49 self[
'model_category'] =
'Binomial' 50 self[
'max_criteria_and_metric_scores'] =
None 51 self[
'RMSE'] = 10e+308
54 tp = data.select(
"prediction", data[objective_column].cast(
'float'))\
55 .toDF(
"prediction", objective_column).rdd.map(tuple)
56 metrics = MulticlassMetrics(tp)
57 pdf = DataFrame(data=np.array(metrics.confusionMatrix().values).reshape((2, 2)),
59 pdf[
'total'] = pdf.sum(axis=1)
60 index = pdf.index.tolist()
62 pdf = pdf.append(pdf.sum(axis=0), ignore_index=
True)
64 self[
'cm'] = json.loads(pdf.to_json(orient=
'split'), object_pairs_hook=OrderedDict)
66 self[
'scoring_time'] = int(time.time() - start)