DayF core  1.2.1.2
DayF (Decision at your Fingertips) is an AutoML freeware development framework that let developers works with Machine Learning models without any idea of AI, simply taking a csv dataset and the objective column
adviserbase.py
1 
5 
6 '''
7 Copyright (C) e2its - All Rights Reserved
8  * Unauthorized copying of this file, via any medium is strictly prohibited
9  * Proprietary and confidential
10  *
11  * This file is part of gDayF project.
12  *
13  * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019
14 '''
15 
16 from gdayf.models.frameworkmetadata import FrameworkMetadata
17 from gdayf.common.armetadata import ArMetadata
18 from gdayf.normalizer.normalizer import Normalizer
19 from gdayf.models.atypesmetadata import ATypesMetadata
20 from gdayf.logs.logshandler import LogsHandler
21 from gdayf.common.utils import compare_sorted_list_dict
22 from gdayf.common.utils import get_model_fw
23 from gdayf.common.constants import *
24 from collections import OrderedDict
25 from time import time
26 from hashlib import md5 as md5
27 from json import dumps
28 from copy import deepcopy
29 import importlib
30 
31 
32 
36 class Adviser(object):
37  deepness = 1
38 
39 
46 
47  def __init__(self, e_c, deep_impact=5, metric='accuracy', dataframe_name='', hash_dataframe=''):
48  self._ec = e_c
49  self._labels = self._ec.labels.get_config()['messages']['adviser']
50  self._config = self._ec.config.get_config()['optimizer']
51  self._frameworks = self._ec.config.get_config()['frameworks']
52  self._logging = LogsHandler(self._ec)
53  self.timestamp = time()
54  self.an_objective = None
55  self.deep_impact = deep_impact
56  self.analysis_recommendation_order = list()
57  self.analyzed_models = list()
58  self.excluded_models = list()
59  self.next_analysis_list = list()
60  self.metric = metric
61  self.dataframe_name = dataframe_name
62  self.hash_dataframe = hash_dataframe
63 
64 
65 
72  def set_recommendations(self, dataframe_metadata, objective_column, amode=POC, atype=None):
73  supervised = True
74  if objective_column is None:
75  supervised = False
76  self._logging.log_exec(self._ec.get_id_analysis(), 'AdviserAStar',
77  self._labels["ana_type"],
78  str(atype) + ' (' + str(self.deepness) + ')')
79  if supervised:
80  if self.deepness == 1:
81  self.an_objective = self.get_analysis_objective(dataframe_metadata,
82  objective_column=objective_column,
83  atype=atype)
84  if amode == POC:
85  return self.analysispoc(dataframe_metadata, objective_column, amode=FAST)
86  if amode in [FAST, NORMAL]:
87  return self.analysisnormal(dataframe_metadata, objective_column, amode=amode)
88  elif amode in [FAST_PARANOIAC, PARANOIAC]:
89  return self.analysisparanoiac(dataframe_metadata, objective_column, amode=amode)
90  else:
91  if amode in [ANOMALIES]:
92  self.an_objective = ATypesMetadata(anomalies=True)
93  return self.analysisanomalies(dataframe_metadata, objective_column, amode=amode)
94  elif amode in [CLUSTERING]:
95  self.an_objective = ATypesMetadata(clustering=True)
96  return self.analysisclustering(dataframe_metadata, objective_column, amode=amode)
97 
98 
104  def analysisnormal(self, dataframe_metadata, objective_column, amode):
105  self.next_analysis_list.clear()
106  if self.deepness == 1:
107  self.base_iteration(amode, dataframe_metadata, objective_column)
108  elif self.deepness > self.deep_impact:
109  self.next_analysis_list = None
110  elif self.deepness == 2:
111  fw_model_list = list()
112  # Added 31/08/2017
113  best_models = list()
114  # End - Added 31/08/2017
115  aux_loop_controller = len(self.analysis_recommendation_order)
116  for indexer in range(0, aux_loop_controller):
117  try:
118  model = self.analysis_recommendation_order[indexer]
119  if model['status'] == 'Executed':
120  model_type = model['model_parameters'][get_model_fw(model)]['model']
121  if model_type not in best_models and len(best_models) < self._config['adviser_L2_wide']:
122  fw_model_list.extend(self.optimize_models(self.analysis_recommendation_order[indexer]))
123  best_models.append(model_type)
124  except TypeError:
125  ''' If all optimize_models doesn't return new models
126  register it as evaluated and seleted'''
127  best_models.append(model_type)
128  self.next_analysis_list.extend(fw_model_list)
129  if len(self.next_analysis_list) == 0:
130  self.next_analysis_list = None
131  elif self.next_analysis_list is not None:
132  fw_model_list = list()
133  # Added 31/08/2017
134  best_models = list()
135  # End - Added 31/08/2017
136  aux_loop_controller = len(self.analysis_recommendation_order)
137  for indexer in range(0, aux_loop_controller):
138  try:
139  model = self.analysis_recommendation_order[indexer]
140  if model['status'] == 'Executed':
141  model_type = model['model_parameters'][get_model_fw(model)]['model']
142  if model_type not in best_models and len(best_models) < self._config['adviser_normal_wide']:
143  fw_model_list.extend(self.optimize_models(self.analysis_recommendation_order[indexer]))
144  #print("Trace:%s-%s" % (model_type, best_models))
145  best_models.append(model_type)
146  except TypeError:
147  ''' If all optimize_models doesn't return new models
148  register it as evaluated and seleted'''
149  best_models.append(model_type)
150 
151  '''' Modified 20/09/2017
152  # Get two most potential best models
153  fw_model_list = list()
154  for indexer in range(0, 2):
155  try:
156  fw_model_list.extend(self.optimize_models(self.analysis_recommendation_order[indexer]))
157  except TypeError:
158  pass
159  #if fw_model_list is not None:'''
160  self.next_analysis_list.extend(fw_model_list)
161  if len(self.next_analysis_list) == 0:
162  self.next_analysis_list = None
163  self.deepness += 1
164  return self._ec.get_id_analysis(), self.next_analysis_list
165 
166 
172  def analysispoc(self, dataframe_metadata, objective_column, amode):
173  self.next_analysis_list.clear()
174  if self.deepness == 1:
175  self.base_iteration(amode, dataframe_metadata, objective_column)
176  elif self.deepness > self.deep_impact:
177  self.next_analysis_list = None
178  elif self.next_analysis_list is not None:
179  # Get two most potential best models
180  fw_model_list = list()
181  for indexer in range(0, 1):
182  try:
183  if self.analysis_recommendation_order[indexer]['status'] == 'Executed':
184  fw_model_list.extend(self.optimize_models(self.analysis_recommendation_order[indexer]))
185  except TypeError:
186  pass
187  # if fw_model_list is not None:
188  self.next_analysis_list.extend(fw_model_list)
189  if len(self.next_analysis_list) == 0:
190  self.next_analysis_list = None
191  self.deepness += 1
192  return self._ec.get_id_analysis(), self.next_analysis_list
193 
194 
199  def analysis_specific(self, dataframe_metadata, list_ar_metadata):
200  self.next_analysis_list.clear()
201  if self.deepness == 1:
202  #Check_dataframe_metadata compatibility
203  self.base_specific(dataframe_metadata, list_ar_metadata)
204  # Added 22/09/1974
205  elif self.deepness > self.deep_impact:
206  self.next_analysis_list = None
207  elif self.next_analysis_list is not None:
208  fw_model_list = list()
209  # Added 31/08/2017
210  best_models = list()
211  # End - Added 31/08/2017
212  aux_loop_controller = len(self.analysis_recommendation_order)
213  for indexer in range(0, aux_loop_controller):
214  try:
215  # Modified 31/08/2017
216  model = self.analysis_recommendation_order[indexer]
217  if model['status'] == 'Executed':
218  model_type = model['model_parameters'][get_model_fw(model)]['model']
219  if model_type not in best_models:
220  fw_model_list.extend(self.optimize_models(self.analysis_recommendation_order[indexer]))
221  #print("Trace:%s-%s" % (model_type, best_models))
222  best_models.append(model_type)
223  # End - Modified 31/08/2017
224  except TypeError:
225  ''' If all optimize_models doesn't return new models
226  pass and look for next best model on this type'''
227  pass
228  # if fw_model_list is not None:
229  self.next_analysis_list.extend(fw_model_list)
230  if len(self.next_analysis_list) == 0:
231  self.next_analysis_list = None
232  self.deepness += 1
233  return self._ec.get_id_analysis(), self.next_analysis_list
234 
235 
241  def analysisparanoiac(self, dataframe_metadata, objective_column, amode):
242  self.next_analysis_list.clear()
243  if self.deepness == 1:
244  self.base_iteration(amode, dataframe_metadata, objective_column)
245  elif self.deepness > self.deep_impact:
246  self.next_analysis_list = None
247  elif self.next_analysis_list is not None:
248  fw_model_list = list()
249  # Added 31/08/2017
250  best_models = list()
251  # End - Added 31/08/2017
252  aux_loop_controller = len(self.analysis_recommendation_order)
253  for indexer in range(0, aux_loop_controller):
254  try:
255  # Modified 31/08/2017
256  model = self.analysis_recommendation_order[indexer]
257  if model['status'] == 'Executed':
258  model_type = model['model_parameters'][get_model_fw(model)]['model']
259  if model_type not in best_models:
260  fw_model_list.extend(self.optimize_models(self.analysis_recommendation_order[indexer]))
261  #print("Trace:%s-%s" % (model_type, best_models))
262  best_models.append(model_type)
263  # End - Modified 31/08/2017
264  except TypeError:
265  ''' If all optimize_models doesn't return new models
266  pass and look for next best model on this type'''
267  pass
268  #if fw_model_list is not None:
269  self.next_analysis_list.extend(fw_model_list)
270  if len(self.next_analysis_list) == 0:
271  self.next_analysis_list = None
272  self.deepness += 1
273  return self._ec.get_id_analysis(), self.next_analysis_list
274 
275 
281 
282  def analysisanomalies(self, dataframe_metadata, objective_column, amode):
283  self.next_analysis_list.clear()
284  if self.deepness == 1:
285  self.base_iteration(amode, dataframe_metadata, objective_column)
286  elif self.deepness > self.deep_impact:
287  self.next_analysis_list = None
288  elif self.next_analysis_list is not None:
289  fw_model_list = list()
290  # Added 31/08/2017
291  best_models = list()
292  # End - Added 31/08/2017
293  aux_loop_controller = len(self.analysis_recommendation_order)
294  for indexer in range(0, aux_loop_controller):
295  try:
296  # Modified 31/08/2017
297  model = self.analysis_recommendation_order[indexer]
298  if model['status'] == 'Executed':
299  model_type = model['model_parameters'][get_model_fw(model)]['model']
300  if model_type not in best_models:
301  #print("Trace:%s-%s"%(model_type, best_models))
302  fw_model_list.extend(self.optimize_models(self.analysis_recommendation_order[indexer]))
303  best_models.append(model_type)
304  # End - Modified 31/08/2017
305  except TypeError:
306  ''' If all optimize_models doesn't return new models
307  pass and look for next best model on this type'''
308  pass
309  #if fw_model_list is not None:
310  self.next_analysis_list.extend(fw_model_list)
311  if len(self.next_analysis_list) == 0:
312  self.next_analysis_list = None
313  self.deepness += 1
314  return self._ec.get_id_analysis(), self.next_analysis_list
315 
316 
322 
323  def analysisclustering(self, dataframe_metadata, objective_column, amode):
324  self.next_analysis_list.clear()
325  if self.deepness == 1:
326  self.base_iteration(amode, dataframe_metadata, objective_column)
327  elif self.deepness > self.deep_impact:
328  self.next_analysis_list = None
329  elif self.next_analysis_list is not None:
330  fw_model_list = list()
331  # Added 31/08/2017
332  best_models = list()
333  # End - Added 31/08/2017
334  aux_loop_controller = len(self.analysis_recommendation_order)
335  for indexer in range(0, aux_loop_controller):
336  try:
337  # Modified 31/08/2017
338  model = self.analysis_recommendation_order[indexer]
339  if model['status'] == 'Executed':
340  model_type = model['model_parameters'][get_model_fw(model)]['model']
341  #if model_type not in best_models:
342  #print("Trace:%s-%s"%(model_type, best_models))
343  fw_model_list.extend(self.optimize_models(self.analysis_recommendation_order[indexer]))
344  #best_models.append(model_type)
345  # End - Modified 31/08/2017
346  except TypeError:
347  ''' If all optimize_models doesn't return new models
348  pass and look for next best model on this type'''
349  best_models.append(model_type)
350  #if fw_model_list is not None:
351  self.next_analysis_list.extend(fw_model_list)
352  if len(self.next_analysis_list) == 0:
353  self.next_analysis_list = None
354  self.deepness += 1
355  return self._ec.get_id_analysis(), self.next_analysis_list
356 
357 
361  def base_specific(self, dataframe_metadata, list_ar_metadata):
362  version = self._ec.config.get_config()['common']['version']
363  for ar_metadata in list_ar_metadata:
364 
365  ar_structure = ArMetadata()
366  if ar_metadata['dataset_hash_value'] == self.hash_dataframe:
367  self._ec.set_id_analysis(ar_metadata['model_id'])
368  ar_structure['predecessor'] = ar_metadata['model_parameters'][get_model_fw(ar_metadata)] \
369  ['parameters']['model_id']['value']
370  ar_structure['round'] = int(ar_metadata['round']) + 1
371  else:
372  ar_structure['predecessor'] = 'root'
373 
374  ar_structure['model_id'] = self._ec.get_id_analysis()
375  ar_structure['version'] = version
376  ar_structure['user_id'] = self._ec.get_id_user()
377  ar_structure['workflow_id'] = ar_metadata['workflow_id']
378  ar_structure['objective_column'] = ar_metadata['objective_column']
379  ar_structure['timestamp'] = self.timestamp
380  ar_structure['normalizations_set'] = ar_metadata['normalizations_set']
381  ar_structure['dataset'] = self.dataframe_name
382  ar_structure['dataset_hash_value'] = self.hash_dataframe
383  ar_structure['data_initial'] = dataframe_metadata
384  ar_structure['data_normalized'] = None
385  ar_structure['model_parameters'] = ar_metadata['model_parameters']
386  ar_structure['ignored_parameters'] = None
387  ar_structure['full_parameters_stack'] = None
388  ar_structure['status'] = -1
389  self.next_analysis_list.append(ar_structure)
390  self.analyzed_models.append(self.generate_vectors(ar_structure, ar_metadata['normalizations_set']))
391 
392 
397  def base_iteration(self, amode, dataframe_metadata, objective_column):
398  version = self._ec.config.get_config()['common']['version']
399  supervised = True
400  if objective_column is None:
401  supervised = False
402 
403  increment = self.get_size_increment(dataframe_metadata)
404  fw_model_list = self.get_candidate_models(self.an_objective, amode, increment=increment)
405 
406  aux_model_list = list()
407  norm = Normalizer(self._ec)
408  #modified 11/09/2017
409  #minimal_nmd = [norm.define_minimal_norm(objective_column=objective_column)]
410  minimal_nmd = norm.define_minimal_norm(dataframe_metadata=dataframe_metadata,
411  objective_column=objective_column,
412  an_objective=self.an_objective)
413  for fw, model, _ in fw_model_list:
414  aux_model_list.append((fw, model, deepcopy(minimal_nmd)))
415  fw_model_list = aux_model_list
416 
417  self.applicability(fw_model_list, nrows=dataframe_metadata['rowcount'], ncols=dataframe_metadata['cols'])
418 
419  nmd = norm.define_normalizations(dataframe_metadata=dataframe_metadata,
420  objective_column=objective_column,
421  an_objective=self.an_objective)
422 
423  if nmd is not None:
424  nmdlist = list()
425  for fw, model, _ in fw_model_list:
426  if minimal_nmd is not None and len(minimal_nmd) > 0:
427  whole_nmd = deepcopy(minimal_nmd)
428  whole_nmd.extend(deepcopy(nmd))
429  nmdlist.append((fw, model, whole_nmd))
430  else:
431  nmdlist.append((fw, model, deepcopy(nmd)))
432 
433  fw_model_list.extend(nmdlist)
434 
435  for fw, model_params, norm_sets in fw_model_list:
436  #Included 26/05/2018: Changeset: "only_standardize"
437  if not(norm_sets is not None and len(norm_sets) > 0 and compare_sorted_list_dict(norm_sets, minimal_nmd) \
438  and model_params['only_standardize'])\
439  or ((norm_sets is None or len(norm_sets) == 0) and model_params['only_standardize']):
440  ar_structure = ArMetadata()
441  ar_structure['model_id'] = self._ec.get_id_analysis()
442  ar_structure['version'] = version
443  ar_structure['user_id'] = self._ec.get_id_user()
444  ar_structure['workflow_id'] = self._ec.get_id_workflow()
445  ar_structure['objective_column'] = objective_column
446  ar_structure['timestamp'] = self.timestamp
447  ar_structure['normalizations_set'] = norm_sets
448  ar_structure['dataset'] = self.dataframe_name
449  ar_structure['dataset_hash_value'] = self.hash_dataframe
450  ar_structure['data_initial'] = dataframe_metadata
451  ar_structure['data_normalized'] = None
452  ar_structure['model_parameters'] = OrderedDict()
453  ar_structure['model_parameters'][fw] = model_params
454  ar_structure['ignored_parameters'] = None
455  ar_structure['full_parameters_stack'] = None
456  ar_structure['predecessor'] = 'root'
457  ar_structure['status'] = -1
458  self.next_analysis_list.append(ar_structure)
459  self.analyzed_models.append(self.generate_vectors(ar_structure, norm_sets))
460 
461 
464  def load_frameworks(self):
465  return FrameworkMetadata(self._ec)
466 
467 
473  def get_analysis_objective(self, dataframe_metadata, objective_column, atype=None):
474  config = self._config['AdviserStart_rules']['common']
475  for each_column in dataframe_metadata['columns']:
476  if each_column['name'] == objective_column:
477 
478  if each_column['missed'] != 0:
479  cardinality = int(each_column['cardinality']) - 1
480  else:
481  cardinality = int(each_column['cardinality'])
482 
483  if cardinality == 2 and (atype == 'binomial' or atype is None):
484  if atype is not None:
485  self._logging.log_info(self._ec.get_id_analysis(), 'AdviserAStar',
486  self._labels["sucess_specific"], '%s-%s' % (cardinality, atype))
487  return ATypesMetadata(binomial=True)
488  elif atype is not None:
489  if atype == 'regression':
490  self._logging.log_info(self._ec.get_id_analysis(), 'AdviserAStar',
491  self._labels["sucess_specific"], '%s-%s' % (cardinality, atype))
492  return ATypesMetadata(regression=True)
493  if atype == 'multinomial':
494  self._logging.log_info(self._ec.get_id_analysis(), 'AdviserAStar',
495  self._labels["sucess_specific"], '%s-%s' % (cardinality, atype))
496  return ATypesMetadata(multinomial=True)
497  else:
498  self._logging.log_info(self._ec.get_id_analysis(), 'AdviserAStar',
499  self._labels["failed_specific"], '%s-%s' % (cardinality, atype))
500 
501  if each_column['type'] not in DTYPES:
502  if cardinality > 2:
503  return ATypesMetadata(multinomial=True)
504  elif cardinality <= config['multi_cardinality_limit'] \
505  and cardinality <= (dataframe_metadata['rowcount']*config['multi_limit']):
506  return ATypesMetadata(multinomial=True)
507  else:
508  return ATypesMetadata(regression=True)
509 
510  self._logging.log_critical(self._ec.get_id_analysis(), 'AdviserAStar',
511  self._labels["failed_mselection"], '%s-%s' % (cardinality, atype))
512  return None
513 
514 
518  def get_size_increment(self, df_metadata):
519  base = self._config['common']['base_increment']
520  increment = 1.0
521  variabilizations = df_metadata['rowcount'] * df_metadata['cols']
522  for _, pvalue in base.items():
523  if variabilizations > pvalue['base'] and increment < pvalue['increment']:
524  increment = pvalue['increment']
525  self._logging.log_info(self._ec.get_id_analysis(), 'AdviserAStar', self._labels["inc_application"],
526  increment)
527  return increment
528 
529 
535  def get_candidate_models(self, atype, amode, increment=1.0):
536  defaultframeworks = self.load_frameworks()
537  model_list = list()
538  for fw, fw_value in defaultframeworks.items():
539  if fw_value['conf']['enabled']:
540  wfw_module = importlib.import_module(self._frameworks[fw]['conf']['framework_metadata_module'])
541  wfw = eval('wfw_module.' + self._frameworks[fw]['conf']['framework_metadata_class']
542  + '(defaultframeworks)')
543  for each_base_model in wfw.get_default():
544  if each_base_model['enabled']:
545  for each_type in each_base_model['types']:
546  if each_type['active'] and each_type['type'] == atype[0]['type']:
547  model_module = importlib.import_module(self._frameworks[fw]['conf']['model_metadata_module'])
548  modelbase = eval('model_module.' + self._frameworks[fw]['conf']['model_metadata_class']
549  + '(self._ec)')
550  model = modelbase.generate_models(each_base_model['model'], atype, amode, increment)
551  wfw.models.append(model)
552  model_list.append((fw, model, None))
553  return model_list
554 
555 
561  def applicability(self, model_list, nrows, ncols):
562  fw_config = self._ec.config.get_config()['frameworks']
563  exclude_model = list()
564  for iterator in range(0, len(model_list)):
565  fw = model_list[iterator][0]
566  model = model_list[iterator][1]
567  if fw_config[fw]['conf']['min_rows_enabled'] and (nrows < model['min_rows_applicability']):
568  self._logging.log_info(self._ec.get_id_analysis(), 'AdviserAStar', self._labels["exc_applicability"],
569  model['model'] + ' - ' + 'rows < ' +
570  str(model['min_rows_applicability']))
571  exclude_model.append(model_list[iterator])
572  if fw_config[fw]['conf']['max_cols_enabled'] and model['max_cols_applicability'] is not None \
573  and(ncols > model['max_cols_applicability']):
574  self._logging.log_info(self._ec.get_id_analysis(), 'AdviserAStar', self._labels["exc_applicability"],
575  model['model'] + ' - ' + 'cols > ' +
576  str(model['max_cols_applicability']))
577  exclude_model.append(model_list[iterator])
578  for model in exclude_model:
579  model_list.remove(model)
580 
581 
584  @staticmethod
585  def get_train_accuracy(model):
586  try:
587  return float(model['metrics']['accuracy']['train']),\
588  1/float(model['metrics']['execution']['train']['RMSE']),\
589  1.0
590  except ZeroDivisionError:
591  return float(model['metrics']['accuracy']['train']), \
592  -1.0, \
593  1.0
594  except KeyError:
595  return -1.0, -1.0, 1.0
596  except Exception:
597  return -1.0, -1.0, 1.0
598 
599 
602  @staticmethod
603  def get_test_accuracy(model):
604  try:
605  return float(model['metrics']['accuracy']['test']),\
606  1/float(model['metrics']['execution']['test']['RMSE']),\
607  1.0
608  except ZeroDivisionError:
609  return float(model['metrics']['accuracy']['test']), \
610  -1.0, \
611  1.0
612  except KeyError:
613  return -1.0, -1.0, 1.0
614  except Exception:
615  return -1.0, -1.0, 1.0
616 
617 
620  @staticmethod
622  try:
623  return float(model['metrics']['accuracy']['combined']),\
624  1/float(model['metrics']['execution']['train']['RMSE']),\
625  1.0
626  except ZeroDivisionError:
627  return float(model['metrics']['accuracy']['combined']), \
628  -1.0, \
629  1.0
630  except KeyError:
631  return -1.0, -1.0, 1.0
632  except Exception:
633  return -1.0, -1.0, 1.0
634 
635 
638  @staticmethod
639  def get_train_rmse(model):
640  if str(float(model['metrics']['execution']['train']['RMSE'])).lower() == 'nan':
641  rmse = 1e+16
642  else:
643  rmse = float(model['metrics']['execution']['train']['RMSE'])
644  try:
645  return rmse,\
646  1/float(model['metrics']['accuracy']['combined']),\
647  0.0
648  except ZeroDivisionError:
649  return rmse,\
650  1e+16, \
651  0.0
652  except KeyError:
653  return 1e+16, 1e+16, 0.0
654  except Exception:
655  return 1e+16, 1e+16, 0.0
656 
657 
660  @staticmethod
661  def get_test_rmse(model):
662  if str(float(model['metrics']['execution']['test']['RMSE'])).lower() == 'nan':
663  rmse = 1e+16
664  else:
665  rmse = float(model['metrics']['execution']['test']['RMSE'])
666  try:
667  return rmse,\
668  1/float(model['metrics']['accuracy']['combined']),\
669  0.0
670  except ZeroDivisionError:
671  return rmse,\
672  1e+16, \
673  0.0
674  except KeyError:
675  return 1e+16, 1e+16, 0.0
676  except Exception:
677  return 1e+16, 1e+16, 0.0
678 
679 
683  @staticmethod
684  def get_cdistance(model):
685  try:
686  return float(model['metrics']['execution']['train']['tot_withinss']), \
687  1/float(model['metrics']['execution']['train']['betweenss']), \
688  0.0
689  except ZeroDivisionError:
690  return float(model['metrics']['execution']['train']['tot_withinss']), \
691  1e+16, \
692  0.0
693  except TypeError:
694  return float(model['metrics']['execution']['train']['tot_withinss']), \
695  1e+16, \
696  0.0
697  except KeyError:
698  return 1e+16, 1e+16, 0.0
699 
700 
703  @staticmethod
704  def get_train_r2(model):
705  try:
706  return float(model['metrics']['execution']['train']['r2']),\
707  1/float(model['metrics']['execution']['train']['RMSE']),\
708  1.0
709  except ZeroDivisionError:
710  return float(model['metrics']['execution']['train']['r2']), \
711  -1.0, \
712  1.0
713  except KeyError:
714  return -1.0, -1.0, 1.0
715  except Exception:
716  return -1.0, -1.0, 1.0
717 
718 
721  @staticmethod
722  def get_test_r2(model):
723  try:
724  return float(model['metrics']['execution']['test']['r2']),\
725  1/float(model['metrics']['execution']['test']['RMSE']),\
726  1.0
727  except ZeroDivisionError:
728  return float(model['metrics']['execution']['test']['r2']), \
729  -1.0, \
730  1.0
731  except KeyError:
732  return -1.0, -1.0, 1.0
733  except Exception:
734  return -1.0, -1.0, 1.0
735 
736 
740  def priorize_models(self, model_list):
741  if self.metric == 'train_accuracy':
742  return sorted(model_list, key=self.get_train_accuracy, reverse=True)
743  elif self.metric == 'test_accuracy':
744  return sorted(model_list, key=self.get_test_accuracy, reverse=True)
745  elif self.metric == 'combined_accuracy':
746  return sorted(model_list, key=self.get_combined_accuracy, reverse=True)
747  elif self.metric == 'cdistance':
748  return sorted(model_list, key=self.get_cdistance)
749  elif self.metric == 'train_rmse':
750  return sorted(model_list, key=self.get_train_rmse)
751  elif self.metric == 'test_rmse':
752  return sorted(model_list, key=self.get_test_rmse)
753  elif self.metric == 'train_r2':
754  return sorted(model_list, key=self.get_train_r2, reverse=True)
755  elif self.metric == 'test_r2':
756  return sorted(model_list, key=self.get_test_r2, reverse=True)
757  else:
758  return model_list
759 
760 
764  def generate_vectors(self, model, normalization_set):
765  vector = list()
766  norm_vector = list()
767  fw = get_model_fw(model)
768  for parm, parm_value in model['model_parameters'][fw]['parameters'].items():
769  if isinstance(parm_value, OrderedDict) and parm != 'model_id':
770  vector.append(parm_value['value'])
771  #added 31/08/2017
772  if normalization_set == [None]:
773  norm_vector = normalization_set
774  else:
775  for normalization in normalization_set:
776  norm_vector.append(md5(dumps(normalization).encode('utf8')).hexdigest())
777  #print("Trace:%s-%s-%s-%s"%(fw, model['model_parameters'][fw]['model'], vector, norm_vector))
778  return fw, model['model_parameters'][fw]['model'], vector, norm_vector
779 
780 
783  def is_executed(self, vector):
784  aux_analized_models = deepcopy(self.analyzed_models)
785  analyzed = False
786  while not analyzed and len(aux_analized_models) > 0:
787  analyzed = analyzed or self.compare_vectors(vector, aux_analized_models.pop())
788  return analyzed
789 
790 
794  @ staticmethod
795  def compare_vectors(vector1, vector2):
796  return vector1[0] == vector2[0] and vector1[1] == vector2[1] \
797  and vector1[2] == vector2[2] and vector1[3] == vector2[3]
798 
799 
802  def safe_append(self, model_list, model):
803  vector = self.generate_vectors(model, model['normalizations_set'])
804  if not self.is_executed(vector):
805  model_list.append(model)
806  self.analyzed_models.append(vector)
807  self._logging.log_info(self._ec.get_id_analysis(), 'AdviserAStar', self._labels["new_vector"], str(vector))
808  else:
809  self.excluded_models.append(vector)
810  self._logging.log_info(self._ec.get_id_analysis(), 'AdviserAStar', self._labels["exc_vector"], str(vector))
811 
Generate OrderedDict() from analysis types accepted returning the structure to be added on ModelMetad...
Define all objects, functions and structured related to Analysis_Results for one execution (final jso...
Definition: armetadata.py:1
def analysisnormal(self, dataframe_metadata, objective_column, amode)
Method oriented to execute smart normal and fast analysis.
Definition: adviserbase.py:104
Class focused on execute A* based analysis on three modalities of working Fast: 1 level analysis over...
Definition: adviserbase.py:36
def get_train_r2(model)
Method get train accuracy for generic model.
Definition: adviserbase.py:704
def compare_vectors(vector1, vector2)
Compare to execution vectors.
Definition: adviserbase.py:795
Define Base Framework methods and members on an unified way.
def applicability(self, model_list, nrows, ncols)
Method oriented to select applicability of models over min_rows_limit.
Definition: adviserbase.py:561
def generate_vectors(self, model, normalization_set)
Store executed model base parameters to check past executions.
Definition: adviserbase.py:764
Define all objects, functions and structs related to common utilities not associated to one concrete ...
Definition: utils.py:1
def get_cdistance(model)
Method get clustering distance for generic model.
Definition: adviserbase.py:684
def base_iteration(self, amode, dataframe_metadata, objective_column)
Method oriented to select initial candidate models.
Definition: adviserbase.py:397
Define all objects, functions and structures related to logging event on DayF product logs...
Definition: logshandler.py:1
Class oriented to manage all messages and interaction with DayF product logs.
Definition: logshandler.py:23
def get_combined_accuracy(model)
Method get averaged train and test accuracy for generic model.
Definition: adviserbase.py:621
Generate Framework base Class and base members.
def safe_append(self, model_list, model)
Check if model is previously executed.
Definition: adviserbase.py:802
def get_train_rmse(model)
Method get rmse for generic model.
Definition: adviserbase.py:639
def analysis_specific(self, dataframe_metadata, list_ar_metadata)
Method oriented to execute new analysis.
Definition: adviserbase.py:199
def analysisclustering(self, dataframe_metadata, objective_column, amode)
Method oriented to execute unsupervised clustering models.
Definition: adviserbase.py:323
def is_executed(self, vector)
Check if model has benn executed or is planned to execute.
Definition: adviserbase.py:783
def get_test_r2(model)
Method get test accuracy for generic model.
Definition: adviserbase.py:722
def analysisanomalies(self, dataframe_metadata, objective_column, amode)
Method oriented to execute unsupervised anomalies models.
Definition: adviserbase.py:282
def base_specific(self, dataframe_metadata, list_ar_metadata)
Method oriented to generate specific candidate metadata.
Definition: adviserbase.py:361
def priorize_models(self, model_list)
Method managing scoring algorithm results params: results for Handlers (gdayf.handlers) ...
Definition: adviserbase.py:740
def get_size_increment(self, df_metadata)
Method oriented to analyze get increments on effort based on DF_metadata structure.
Definition: adviserbase.py:518
def get_analysis_objective(self, dataframe_metadata, objective_column, atype=None)
Method oriented to analyze DFmetadata and select analysis objective.
Definition: adviserbase.py:473
def analysisparanoiac(self, dataframe_metadata, objective_column, amode)
Method oriented to execute smart normal and fast analysis.
Definition: adviserbase.py:241
Define Analysis Types for DayF product on an unified way.
def get_test_accuracy(model)
Method get test accuracy for generic model.
Definition: adviserbase.py:603
def __init__(self, e_c, deep_impact=5, metric='accuracy', dataframe_name='', hash_dataframe='')
Constructor.
Definition: adviserbase.py:47
Class ArMetadata manage the Analysis results structs on OrderedDict format and exportable to json...
Definition: armetadata.py:22
Class oriented to manage normalizations on dataframes for improvements on accuracy.
Definition: normalizer.py:26
def analysispoc(self, dataframe_metadata, objective_column, amode)
Method oriented to execute poc analysis.
Definition: adviserbase.py:172
def get_train_accuracy(model)
Method get train accuracy for generic model.
Definition: adviserbase.py:585
def load_frameworks(self)
Method oriented to get frameworks default values from config.
Definition: adviserbase.py:464
def get_candidate_models(self, atype, amode, increment=1.0)
Method oriented to analyze choose models candidate and select analysis objective. ...
Definition: adviserbase.py:535
def get_test_rmse(model)
Method get test rmse for generic model.
Definition: adviserbase.py:661
def set_recommendations(self, dataframe_metadata, objective_column, amode=POC, atype=None)
Main method oriented to execute smart analysis.
Definition: adviserbase.py:72