DayF core  1.2.1.2
DayF (Decision at your Fingertips) is an AutoML freeware development framework that let developers works with Machine Learning models without any idea of AI, simply taking a csv dataset and the objective column
controller.py
1 
4 
5 '''
6 Copyright (C) e2its - All Rights Reserved
7  * Unauthorized copying of this file, via any medium is strictly prohibited
8  * Proprietary and confidential
9  *
10  * This file is part of gDayF project.
11  *
12  * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019
13 '''
14 
15 from gdayf.handlers.inputhandler import inputHandlerCSV
16 from gdayf.common.dfmetada import DFMetada
17 
18 from gdayf.logs.logshandler import LogsHandler
19 from gdayf.common.utils import get_model_fw
20 from gdayf.common.constants import *
21 from gdayf.common.utils import pandas_split_data, hash_key
22 from gdayf.common.armetadata import ArMetadata
23 from gdayf.common.armetadata import deep_ordered_copy
24 from gdayf.persistence.persistencehandler import PersistenceHandler
25 from gdayf.common.storagemetadata import StorageMetadata
26 from collections import OrderedDict
27 from pathlib import Path
28 from pandas import DataFrame
29 import importlib
30 from json.decoder import JSONDecodeError
31 from time import time
32 from pymongo import MongoClient
33 from pymongo.errors import *
34 # import bson
35 # from bson.codec_options import CodecOptions
36 from hashlib import md5
37 from gdayf.core.experiment_context import Experiment_Context as E_C
38 
39 
41 class Controller(object):
42 
43 
44  def __init__(self, e_c=None, user_id='PoC_gDayF', workflow_id='default'):
45  self.timestamp = str(time())
46  if e_c is None:
47  if workflow_id == 'default':
48  self._ec = E_C(user_id=user_id, workflow_id=workflow_id + '_' + self.timestamp)
49  else:
50  self._ec = E_C(user_id=user_id, workflow_id=workflow_id)
51  else:
52  self._ec = e_c
53  self._config = self._ec.config.get_config()
54  self._labels = self._ec.labels.get_config()['messages']['controller']
55  self._frameworks = self._ec.config.get_config()['frameworks']
56  self._logging = LogsHandler(self._ec)
57  self.analysis_list = OrderedDict() # For future multi-analysis uses
58  self.model_handler = OrderedDict()
59  self.adviser = importlib.import_module(self._config['optimizer']['adviser_classpath'])
60  self._logging.log_info('gDayF', "Controller", self._labels["loading_adviser"],
61  self._config['optimizer']['adviser_classpath'])
62 
63 
66  def config_checks(self):
67  storage_conf = self._config['storage']
68  grants = storage_conf['grants']
69  localfs = (storage_conf['localfs'] is not None) \
70  and self._coherence_fs_checks(storage_conf['localfs'], grants=grants)
71  hdfs = (storage_conf['hdfs'] is not None) \
72  and self._coherence_fs_checks(storage_conf['hdfs'], grants=grants)
73  mongoDB = (storage_conf['mongoDB'] is not None) \
74  and self._coherence_db_checks(storage_conf['mongoDB'])
75  self._logging.log_info('gDayF', "Controller", self._labels["primary_path"],
76  str(storage_conf['primary_path']))
77 
78  ''' Checking primary Json storage Paths'''
79  primary = False
80  #if storage_conf['primary_path'] in ['localfs', 'hdfs']:
81  for storage in StorageMetadata(self._ec).get_json_path():
82  if storage_conf['primary_path'] == storage['type']:
83  primary = True
84  if storage['type'] == 'mongoDB':
85  if not mongoDB:
86  self._logging.log_critical('gDayF', "Controller", self._labels["failed_json"],
87  str(storage))
88  return False
89  elif storage['type'] == 'localfs':
90  if not localfs:
91  self._logging.log_critical('gDayF', "Controller", self._labels["failed_json"],
92  str(storage))
93  return False
94  elif storage['type'] == 'hdfs':
95  if not hdfs:
96  self._logging.log_critical('gDayF', "Controller", self._labels["failed_json"],
97  str(storage))
98  return False
99 
100  if not primary:
101  self._logging.log_critical('gDayF', "Controller", self._labels["no_primary"],
102  str(storage_conf[storage_conf['primary_path']]))
103  return False
104 
105  ''' Checking Load storage Paths'''
106  at_least_on = False
107  for storage in StorageMetadata(self._ec).get_load_path():
108  if storage['type'] == 'mongoDB':
109  self._logging.log_critical('gDayF', "Controller", self._labels["failed_file_storage"],
110  str(storage))
111  return False
112  elif storage['type'] == 'localfs':
113  if not localfs:
114  self._logging.log_critical('gDayF', "Controller", self._labels["failed_load"],
115  str(storage))
116  return False
117  else:
118  at_least_on = at_least_on or True
119  elif storage['type'] == 'hdfs':
120  if not hdfs:
121  self._logging.log_critical('gDayF', "Controller", self._labels["failed_load"],
122  str(storage))
123  return False
124  else:
125  at_least_on = at_least_on or True
126 
127  if not at_least_on:
128  self._logging.log_critical('gDayF', "Controller", self._labels["no_primary"],
129  str(storage_conf[storage_conf['primary_path']]))
130  return False
131 
132  ''' Checking log storage Paths'''
133  at_least_on = False
134  for storage in StorageMetadata(self._ec).get_log_path():
135  if storage['type'] == 'mongoDB':
136  self._logging.log_critical('gDayF', "Controller", self._labels["failed_file_storage"],
137  str(storage))
138  return False
139  elif storage['type'] == 'localfs':
140  if not localfs:
141  self._logging.log_critical('gDayF', "Controller", self._labels["failed_log"],
142  str(storage))
143  return False
144  else:
145  at_least_on = at_least_on or True
146  elif storage['type'] == 'hdfs':
147  if not hdfs:
148  self._logging.log_critical('gDayF', "Controller", self._labels["failed_log"],
149  str(storage))
150  return False
151  else:
152  at_least_on = at_least_on or True
153  if not at_least_on:
154  self._logging.log_critical('gDayF', "Controller", self._labels["no_primary"],
155  str(storage_conf[storage_conf['primary_path']]))
156  return False
157 
158  ''' If all things OK'''
159  return True
160 
161 
166  def _coherence_fs_checks(self, storage, grants):
167  persistence = PersistenceHandler(self._ec)
168  try:
169  if persistence.mkdir(type=storage['type'], path=str(storage['value']), grants=grants):
170  return False
171  except OSError:
172  self._logging.log_critical('gDayF', "Controller", self._labels["failed_json_path"],
173  str(storage['value']))
174  return False
175  if storage['hash_type'] not in ['MD5', 'SHA256']:
176  self._logging.log_critical('gDayF', "Controller", self._labels["failed_hash_method"],
177  str(storage))
178  return False
179  return True
180 
181 
185  def _coherence_db_checks(self, storage):
186  if storage['type'] == 'mongoDB':
187  try:
188  client = MongoClient(host=storage['url'],
189  port=int(storage['port']),
190  document_class=OrderedDict)
191  except ConnectionFailure as cexecution_error:
192  print(repr(cexecution_error))
193  return False
194  try:
195  db = client[storage['value']]
196  collection = db[self._ec.get_id_user()]
197  test_insert = collection.insert_one({'test': 'connection.check.dot.bson'}).inserted_id
198  collection.delete_one({"_id": test_insert})
199  except PyMongoError as wexecution_error:
200  print(repr(wexecution_error))
201  return False
202  finally:
203  client.close()
204  return True
205 
206 
211  def exec_prediction(self, datapath, armetadata=None, model_file=None):
212 
213  self._logging.log_info('gDayF', "Controller", self._labels["ana_mode"], 'prediction')
214  if armetadata is None and model_file is None:
215  self._logging.log_critical('gDayF', "Controller", self._labels["failed_model"], datapath)
216  return self._labels["failed_model"]
217  elif armetadata is not None:
218  try:
219  assert isinstance(armetadata, ArMetadata)
220  base_ar = deep_ordered_copy(armetadata)
221  except AssertionError:
222  self._logging.log_critical('gDayF', "Controller", self._labels["failed_model"], armetadata)
223  return self._labels["failed_model"]
224  elif model_file is not None:
225  try:
226  #json_file = open(model_file)
227  persistence = PersistenceHandler(self._ec)
228  invalid, base_ar = persistence.get_ar_from_engine(model_file)
229  del persistence
230 
231  if invalid:
232  self._logging.log_critical('gDayF', "Controller", self._labels["failed_model"], model_file)
233  return self._labels["failed_model"]
234  except IOError as iexecution_error:
235  print(repr(iexecution_error))
236  self._logging.log_critical('gDayF', "Controller", self._labels["failed_model"], model_file)
237  return self._labels["failed_model"]
238  except OSError as oexecution_error:
239  print(repr(oexecution_error))
240  self._logging.log_critical('gDayF', "Controller", self._labels["failed_model"], model_file)
241  return self._labels["failed_model"]
242 
243  if isinstance(datapath, str):
244  try:
245  self._logging.log_info('gDayF', "Controller", self._labels["input_param"], datapath)
246  pd_dataset = inputHandlerCSV().inputCSV(filename=datapath)
247  except [IOError, OSError, JSONDecodeError]:
248  self._logging.log_critical('gDayF', "Controller", self._labels["failed_input"], datapath)
249  return self._labels['failed_input']
250  elif isinstance(datapath, DataFrame):
251  pd_dataset = datapath
252  self._logging.log_info('gDayF', "Controller", self._labels["input_param"], str(datapath.shape))
253  else:
254  self._logging.log_critical('gDayF', "Controller", self._labels["failed_input"], datapath)
255  return self._labels['failed_input']
256 
257  fw = get_model_fw(base_ar)
258 
259  self.init_handler(fw)
260 
261  prediction_frame = None
262  try:
263  prediction_frame, _ = self.model_handler[fw]['handler'].predict(predict_frame=pd_dataset,
264  base_ar=base_ar)
265  except TypeError:
266  self._logging.log_critical('gDayF', "Controller", self._labels["failed_model"], model_file)
267 
268  self.clean_handler(fw)
269 
270  self._logging.log_info('gDayF', 'controller', self._labels["pred_end"])
271 
272  return prediction_frame
273 
274 
276  def clean_handler(self, fw):
277  if self.model_handler[fw]['handler'] is not None:
278  self.model_handler[fw]['handler'].delete_frames()
279  self.model_handler[fw]['handler'] = None
280 
281 
283  def init_handler(self, fw):
284  try:
285  if self.model_handler[fw]['handler'] is None:
286  handler = importlib.import_module(self._frameworks[fw]['conf']['handler_module'])
287  self.model_handler[fw]['handler'] = \
288  eval('handler.' + self._frameworks[fw]['conf']['handler_class'] + '(e_c=self._ec)')
289  except KeyError:
290  self.model_handler[fw] = OrderedDict()
291  handler = importlib.import_module(self._frameworks[fw]['conf']['handler_module'])
292  self.model_handler[fw]['handler'] = \
293  eval('handler.' + self._frameworks[fw]['conf']['handler_class'] + '(e_c=self._ec)')
294  self.model_handler[fw]['initiated'] = False
295  if not self.model_handler[fw]['handler'].is_alive():
296  initiated = self.model_handler[fw]['handler'].connect()
297  self.model_handler[fw]['initiated'] = (self.model_handler[fw]['initiated'] or initiated)
298 
299 
300  def clean_handlers(self):
301  for fw, each_handlers in self.model_handler.items():
302  if each_handlers['handler'] is not None:
303  #self.model_handler[fw][each_handlers['handler']].clean_handler(fw)
304  self.clean_handler(fw)
305  self._logging.log_exec('gDayF', "Controller", self._labels["cleaning"], fw)
306  if each_handlers['initiated']:
307  handler = importlib.import_module(self._frameworks[fw]['conf']['handler_module'])
308  self.model_handler[fw]['handler'] = \
309  eval('handler.' + self._frameworks[fw]['conf']['handler_class']
310  + '(e_c=self._ec).shutdown_cluster()')
311  self._logging.log_exec('gDayF', "Controller", self._labels["shuttingdown"], fw)
312 
313 
321  def exec_analysis(self, datapath, objective_column, amode=POC, metric='test_accuracy', deep_impact=3, **kwargs):
322  # Clustering variables
323  k = None
324  estimate_k = False
325 
326  #Force analysis variable
327  atype = None
328 
329  hash_dataframe = ''
330 
331  for pname, pvalue in kwargs.items():
332  if pname == 'k':
333  assert isinstance(pvalue, int)
334  k = pvalue
335  elif pname == 'estimate_k':
336  assert isinstance(pvalue, bool)
337  estimate_k = pvalue
338  elif pname == 'atype':
339  assert pvalue in atypes
340  atype = pvalue
341 
342 
343  supervised = True
344  if objective_column is None:
345  supervised = False
346 
347  self._logging.log_info('gDayF', "Controller", self._labels["start"])
348  self._logging.log_info('gDayF', "Controller", self._labels["ana_param"], metric)
349  self._logging.log_info('gDayF', "Controller", self._labels["dep_param"], deep_impact)
350  self._logging.log_info('gDayF', "Controller", self._labels["ana_mode"], amode)
351 
352 
353  if isinstance(datapath, str):
354  try:
355  self._logging.log_info('gDayF', "Controller", self._labels["input_param"], datapath)
356  pd_dataset = inputHandlerCSV().inputCSV(filename=datapath)
357  id_datapath = Path(datapath).name
358  hash_dataframe = hash_key('MD5', datapath)
359  except IOError:
360  self._logging.log_critical('gDayF', "Controller", self._labels["failed_input"], datapath)
361  return self._labels['failed_input']
362  except OSError:
363  self._logging.log_critical('gDayF', "Controller", self._labels["failed_input"], datapath)
364  return self._labels['failed_input']
365  except JSONDecodeError:
366  self._logging.log_critical('gDayF', "Controller", self._labels["failed_input"], datapath)
367  return self._labels['failed_input']
368  elif isinstance(datapath, DataFrame):
369  self._logging.log_info('gDayF', "Controller", self._labels["input_param"], str(datapath.shape))
370  pd_dataset = datapath
371  id_datapath = 'Dataframe' + \
372  '_' + str(pd_dataset.size) + \
373  '_' + str(pd_dataset.shape[0]) + \
374  '_' + str(pd_dataset.shape[1])
375  hash_dataframe = md5(datapath.to_msgpack()).hexdigest()
376  else:
377  self._logging.log_critical('gDayF', "Controller", self._labels["failed_input"], datapath)
378  return self._labels['failed_input'], None
379 
380  pd_test_dataset = None
381  ''' Changed 05/04/2018
382  if metric == 'combined_accuracy' or 'test_accuracy':'''
383  if self._config['common']['minimal_test_split'] <= len(pd_dataset.index) \
384  and (metric in ACCURACY_METRICS or metric in REGRESSION_METRICS):
385  pd_dataset, pd_test_dataset = pandas_split_data(pd_dataset,
386  train_perc=self._config['common']['test_frame_ratio'])
387 
388  df = DFMetada().getDataFrameMetadata(pd_dataset, 'pandas')
389 
390  self._ec.set_id_analysis(self._ec.get_id_user() + '_' + id_datapath + '_' + str(time()))
391  adviser = self.adviser.AdviserAStar(e_c=self._ec,
392  metric=metric,
393  deep_impact=deep_impact, dataframe_name=id_datapath,
394  hash_dataframe=hash_dataframe)
395 
396  adviser.set_recommendations(dataframe_metadata=df, objective_column=objective_column, amode=amode, atype=atype)
397 
398  while adviser.next_analysis_list is not None:
399  for each_model in adviser.next_analysis_list:
400  fw = get_model_fw(each_model)
401 
402  if k is not None:
403  try:
404  each_model["model_parameters"][fw]["parameters"]["k"]["value"] = k
405  each_model["model_parameters"][fw]["parameters"]["k"]["seleccionable"] = True
406  each_model["model_parameters"][fw]["parameters"]["estimate_k"]["value"] = estimate_k
407  each_model["model_parameters"][fw]["parameters"]["estimate_k"]["seleccionable"] = True
408  except KeyError:
409  pass
410 
411  self.init_handler(fw)
412  if pd_test_dataset is not None:
413  _, analyzed_model = self.model_handler[fw]['handler'].order_training(training_pframe=pd_dataset,
414  base_ar=each_model,
415  test_frame=pd_test_dataset,
416  filtering='STANDARDIZE')
417  else:
418  _, analyzed_model = self.model_handler[fw]['handler'].order_training(training_pframe=pd_dataset,
419  base_ar=each_model,
420  test_frame=pd_dataset,
421  filtering='STANDARDIZE')
422 
423  if analyzed_model is not None:
424  adviser.analysis_recommendation_order.append(analyzed_model)
425  adviser.next_analysis_list.clear()
426  adviser.analysis_recommendation_order = adviser.priorize_models(model_list=
427  adviser.analysis_recommendation_order)
428  adviser.set_recommendations(dataframe_metadata=df, objective_column=objective_column, amode=amode)
429 
430  self._logging.log_info(self._ec.get_id_analysis(), 'controller',
431  self._labels["ana_models"], str(len(adviser.analyzed_models)))
432  self._logging.log_info(self._ec.get_id_analysis(), 'controller',
433  self._labels["exc_models"], str(len(adviser.excluded_models)))
434 
435  self._logging.log_exec(self._ec.get_id_analysis(), 'controller', self._labels["end"])
436 
437  self.clean_handlers()
438 
439  adviser.analysis_recommendation_order = adviser.priorize_models(model_list=
440  adviser.analysis_recommendation_order)
441 
442  return self._labels['success_op'], adviser.analysis_recommendation_order
443 
444 
447  def log_model_list(self, ar_list, metric):
448  best_check = True
449  ordered_list = self.priorize_list(arlist=ar_list, metric=metric)
450  for model in ordered_list:
451  if best_check:
452  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["best_model"],
453  model['model_parameters'][get_model_fw(model)]['parameters']['model_id']['value'])
454  best_check = False
455  else:
456  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["res_model"],
457  model['model_parameters'][get_model_fw(model)]['parameters']['model_id']['value'])
458 
459  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["round_reach"], model['round'])
460  if model["normalizations_set"] is None:
461  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["norm_app"], [])
462  else:
463  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["norm_app"],
464  model["normalizations_set"])
465 
466  if metric in ACCURACY_METRICS or metric in REGRESSION_METRICS:
467  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["ametric_order"],
468  model['metrics']['accuracy'])
469  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["pmetric_order"],
470  model['metrics']['execution']['train']['RMSE'])
471  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["pmetric_order"],
472  model['metrics']['execution']['test']['RMSE'])
473  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["rmetric_order"],
474  model['metrics']['execution']['train']['r2'])
475  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["rmetric_order"],
476  model['metrics']['execution']['test']['r2'])
477  if metric in CLUSTERING_METRICS:
478  try:
479  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["ckmetric_order"],
480  model['metrics']['execution']['train']['k'])
481  except KeyError:
482  self._logging.log_info(self._ec.get_id_analysis(),'controller', self._labels["ckmetric_order"],
483  "0")
484  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["ctmetric_order"],
485  model['metrics']['execution']['train']['tot_withinss'])
486  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["cbmetric_order"],
487  model['metrics']['execution']['train']['betweenss'])
488 
489 
494 
495  def table_model_list(self, ar_list, metric):
496  dataframe = list()
497  normal_cols = ['Model', 'Train_accuracy', 'Test_accuracy', 'Combined_accuracy', 'train_rmse', 'test_rmse']
498  cluster_cols = ['Model', 'k', 'tot_withinss', 'betweenss']
499 
500  ordered_list = self.priorize_list(arlist=ar_list, metric=metric)
501  for model in ordered_list:
502  if metric in ACCURACY_METRICS or metric in REGRESSION_METRICS:
503  try:
504  dataframe.append(
505  {'Model': model['model_parameters'][get_model_fw(model)]['parameters']['model_id']['value'],
506  'Round': model['round'],
507  'train_accuracy': model['metrics']['accuracy']['train'],
508  'test_accuracy': model['metrics']['accuracy']['test'],
509  'combined_accuracy': model['metrics']['accuracy']['combined'],
510  'train_rmse': model['metrics']['execution']['train']['RMSE'],
511  'test_rmse': model['metrics']['execution']['test']['RMSE'],
512  'train_r2': model['metrics']['execution']['train']['r2'],
513  'test_r2': model['metrics']['execution']['test']['r2'],
514  'path': model['json_path'][0]['value']
515  }
516  )
517  # AutoEncoders metrics
518  except KeyError:
519  dataframe.append(
520  {'Model': model['model_parameters'][get_model_fw(model)]['parameters']['model_id']['value'],
521  'Round': model['round'],
522  'train_accuracy': model['metrics']['accuracy']['train'],
523  'test_accuracy': model['metrics']['accuracy']['test'],
524  'combined_accuracy': model['metrics']['accuracy']['combined'],
525  'train_rmse': model['metrics']['execution']['train']['RMSE'],
526  'path': model['json_path'][0]['value']
527  }
528  )
529 
530  if metric in CLUSTERING_METRICS:
531  try:
532  aux = model['metrics']['execution']['train']['k']
533  except KeyError:
534  aux = 0
535 
536  dataframe.append(
537  {'Model': model['model_parameters'][get_model_fw(model)]['parameters']['model_id']['value'],
538  'Round': model['round'],
539  'k': aux,
540  'tot_withinss':model['metrics']['execution']['train']['tot_withinss'],
541  'betweenss':model['metrics']['execution']['train']['betweenss'],
542  'path': model['json_path'][0]['value']
543  }
544  )
545  return DataFrame(dataframe)
546 
547 
554  def exec_sanalysis(self, datapath, list_ar_metadata, metric='combined_accuracy', deep_impact=1, **kwargs):
555 
556  self._logging.log_info('gDayF', "Controller", self._labels["start"])
557  self._logging.log_info('gDayF', "Controller", self._labels["ana_param"], metric)
558  self._logging.log_info('gDayF', "Controller", self._labels["dep_param"], deep_impact)
559 
560  if isinstance(datapath, str):
561  try:
562  self._logging.log_info('gDayF', "Controller", self._labels["input_param"], datapath)
563  pd_dataset = inputHandlerCSV().inputCSV(filename=datapath)
564  id_datapath = Path(datapath).name
565  hash_dataframe = hash_key('MD5', datapath)
566  except IOError:
567  self._logging.log_critical('gDayF', "Controller", self._labels["failed_input"], datapath)
568  return self._labels['failed_input']
569  except OSError:
570  self._logging.log_critical('gDayF', "Controller", self._labels["failed_input"], datapath)
571  return self._labels['failed_input']
572  except JSONDecodeError:
573  self._logging.log_critical('gDayF', "Controller", self._labels["failed_input"], datapath)
574  return self._labels['failed_input']
575  elif isinstance(datapath, DataFrame):
576  hash_dataframe = None
577  self._logging.log_critical('gDayF', "Controller", self._labels["input_param"], str(datapath.shape))
578  pd_dataset = datapath
579  id_datapath = 'Dataframe' + \
580  '_' + str(pd_dataset.size) + \
581  '_' + str(pd_dataset.shape[0]) + \
582  '_' + str(pd_dataset.shape[1])
583  else:
584  self._logging.log_critical('gDayF', "Controller", self._labels["failed_input"], datapath)
585  return self._labels['failed_input'], None
586 
587  pd_test_dataset = None
588  if self._config['common']['minimal_test_split'] <= len(pd_dataset.index) \
589  and (metric in ACCURACY_METRICS or metric in REGRESSION_METRICS):
590  pd_dataset, pd_test_dataset = pandas_split_data(pd_dataset,
591  train_perc=self._config['common']['test_frame_ratio'])
592 
593  df = DFMetada().getDataFrameMetadata(pd_dataset, 'pandas')
594  self._ec.set_id_analysis(self._ec.get_id_user() + '_' + id_datapath + '_' + str(time()))
595  adviser = self.adviser.AdviserAStar(e_c=self._ec,
596  metric=metric,
597  deep_impact=deep_impact, dataframe_name=id_datapath,
598  hash_dataframe=hash_dataframe)
599 
600  adviser.analysis_specific(dataframe_metadata=df, list_ar_metadata=list_ar_metadata)
601 
602  while adviser.next_analysis_list is not None:
603 
604  for each_model in adviser.next_analysis_list:
605  fw = get_model_fw(each_model)
606 
607  self.init_handler(fw)
608 
609  if pd_test_dataset is not None:
610  _, analyzed_model = self.model_handler[fw]['handler'].order_training(
611  training_pframe=pd_dataset,
612  base_ar=each_model,
613  test_frame=pd_test_dataset, filtering='NONE')
614  else:
615  _, analyzed_model = self.model_handler[fw]['handler'].order_training(
616  training_pframe=pd_dataset,
617  base_ar=each_model, filtering='NONE')
618  if analyzed_model is not None:
619  adviser.analysis_recommendation_order.append(analyzed_model)
620 
621  adviser.next_analysis_list.clear()
622  adviser.analysis_recommendation_order = adviser.priorize_models(model_list=
623  adviser.analysis_recommendation_order)
624  adviser.analysis_specific(dataframe_metadata=df, list_ar_metadata=adviser.analysis_recommendation_order)
625 
626  self._logging.log_info(self._ec.get_id_analysis(), 'controller',
627  self._labels["ana_models"], str(len(adviser.analyzed_models)))
628  self._logging.log_info(self._ec.get_id_analysis(), 'controller',
629  self._labels["exc_models"], str(len(adviser.excluded_models)))
630 
631  self.log_model_list(adviser.analysis_recommendation_order, metric)
632 
633  self._logging.log_info(self._ec.get_id_analysis(), 'controller', self._labels["end"])
634 
635  self.clean_handlers()
636 
637  adviser.analysis_recommendation_order = adviser.priorize_models(model_list=
638  adviser.analysis_recommendation_order)
639 
640  return self._labels['success_op'], adviser.analysis_recommendation_order
641 
642 
647  def get_external_model(self, armetadata, type='pojo'):
648  fw = get_model_fw(armetadata)
649  self.init_handler(fw)
650  results = self.model_handler[fw]['handler'].get_external_model(armetadata, type)
651  self.clean_handler(fw)
652  return results
653 
654 
659  def save_models(self, arlist, mode=BEST, metric='accuracy'):
660  if mode == BEST:
661  model_list = [arlist[0]]
662  elif mode == BEST_3:
663  model_list = arlist[0:3]
664  elif mode == EACH_BEST:
665  exclusion = list()
666  model_list = list()
667  for model in arlist:
668  if (get_model_fw(model), model['model_parameters'][get_model_fw(model)]['model'],
669  model['normalizations_set']) not in exclusion:
670  model_list.append(model)
671  exclusion.append((get_model_fw(model), model['model_parameters'][get_model_fw(model)]['model'],
672  model['normalizations_set']))
673  elif mode == ALL:
674  model_list = arlist
675  elif mode == NONE:
676  model_list = list()
677  for fw in self._config['frameworks'].keys():
678  self.init_handler(fw)
679  for each_model in model_list:
680  if fw in each_model['model_parameters'].keys():
681  self.model_handler[fw]['handler'].store_model(each_model, user=self._ec.get_id_user())
682  self.clean_handler(fw)
683 
684 
688  def load_models(self, arlist):
689  model_loaded = list()
690  for fw in self._config['frameworks'].keys():
691  self.init_handler(fw)
692  for each_model in arlist:
693  if fw in each_model['model_parameters'].keys():
694  model_load = self.model_handler[fw]['handler'].load_model(each_model)
695  if model_load is not None:
696  model_loaded.append(model_load)
697  self.clean_handler(fw)
698  return model_loaded
699 
700 
704  def remove_models(self, arlist, mode=ALL):
705  if mode == BEST:
706  model_list = arlist[1:]
707  elif mode == BEST_3:
708  model_list = arlist[3:]
709  elif mode == EACH_BEST:
710  exclusion = list()
711  model_list = list()
712  for model in arlist:
713  if (get_model_fw(model), model['model_parameters'][get_model_fw(model)]['model'],
714  model['normalizations_set']) not in exclusion:
715  exclusion.append((get_model_fw(model), model['model_parameters'][get_model_fw(model)]['model'],
716  model['normalizations_set']))
717  else:
718  model_list.append(model)
719  elif mode == ALL:
720  model_list = arlist
721  elif mode == NONE:
722  model_list = list()
723  fw_list = list()
724  for models in model_list:
725  if get_model_fw(models) not in fw_list:
726  fw_list.append(get_model_fw(models))
727 
728  for fw in fw_list:
729  self.init_handler(fw)
730  self.model_handler[fw]['handler'].remove_models(model_list)
731  self.clean_handler(fw)
732 
733 
740  def reconstruct_execution_tree(self, arlist=None, metric='combined', store=True):
741  if (arlist is None or len(arlist) == 0) and self._ec.get_id_analysis() is None:
742  self._logging.log_critical('gDayF', 'controller', self._labels["failed_model"])
743  return None
744  elif self._ec.get_id_analysis() is not None and self._ec.get_id_user() != 'guest':
745  new_arlist = PersistenceHandler(self._ec).recover_experiment_mongoDB()
746  else:
747  analysis_id = arlist[0]['model_id']
748  new_arlist = arlist
749 
750  ordered_list = self.priorize_list(arlist=new_arlist, metric=metric)
751 
752  root = OrderedDict()
753  root['data'] = None
754  root['ranking'] = 0
755  root['successors'] = OrderedDict()
756  variable_dict = OrderedDict()
757  variable_dict[0] = {'root': root}
758 
759  ranking = 1
760  for new_tree_structure in ordered_list:
761  new_model = deep_ordered_copy(new_tree_structure)
762  model_id = new_tree_structure['model_parameters'][get_model_fw(new_tree_structure)]\
763  ['parameters']['model_id']['value']
764  level = new_tree_structure['round']
765  if level not in variable_dict.keys():
766  variable_dict[level] = OrderedDict()
767 
768  new_tree_structure = OrderedDict()
769  new_tree_structure['ranking'] = ranking
770  new_tree_structure['data'] = new_model
771  new_tree_structure['successors'] = OrderedDict()
772  variable_dict[level][model_id] = new_tree_structure
773 
774  ranking += 1
775 
776  level = 1
777  max_level = max(variable_dict.keys())
778  while level in range(1, max_level+1):
779  for model_id, new_tree_structure in variable_dict[level].items():
780  counter = 1
781  found = False
782  while not found or (level - counter) == 0:
783  if new_tree_structure['data']['predecessor'] in variable_dict[level-counter].keys():
784  container = eval('variable_dict[level-counter][new_tree_structure[\'data\'][\'predecessor\']]')
785  container['successors'][model_id] = new_tree_structure
786  found = True
787  counter += 1
788  if not found:
789  self._logging.log_debug(self._ec.get_id_analysis(), 'controller', self._labels['fail_reconstruct'],
790  model_id)
791  level += 1
792 
793  #Store_json on primary path
794  if store and self._config['storage']['primary_path'] != 'mongoDB':
795  primary_path = self._config['storage']['primary_path']
796  fstype = self._config['storage'][primary_path]['type']
797 
798  datafile = list()
799  datafile.append(self._config['storage'][primary_path]['value'])
800  datafile.append('/')
801  datafile.append(self._ec.get_id_user())
802  datafile.append('/')
803  datafile.append(self._ec.get_id_workflow())
804  datafile.append('/')
805  datafile.append(self._config['common']['execution_tree_dir'])
806  datafile.append('/')
807  datafile.append(self._ec.get_id_analysis())
808  datafile.append('.json')
809 
810  if self._config['persistence']['compress_json']:
811  datafile.append('.gz')
812 
813  storage = StorageMetadata(self._ec)
814  storage.append(value=''.join(datafile), fstype=fstype)
815  PersistenceHandler(self._ec).store_json(storage, root)
816  return root
817 
818 
824  def priorize_list(self, arlist, metric):
825  adviser = self.adviser.AdviserAStar(e_c=self._ec, metric=metric)
826  ordered_list = adviser.priorize_models(arlist)
827  del adviser
828  return ordered_list
829 
830 
834  def get_ar_from_engine(self, path):
835  persistence = PersistenceHandler(self._ec)
836  failed, armetadata = persistence.get_ar_from_engine(path=path)
837  del persistence
838  return failed, armetadata
839 
840 
841 
842 
Define all objects, functions and structured related to Analysis_Results for one execution (final jso...
Definition: armetadata.py:1
def reconstruct_execution_tree(self, arlist=None, metric='combined', store=True)
Method oriented to generate execution tree for visualizations and analysis issues.
Definition: controller.py:740
Class DFMetadata manage the Data Analysis results structs on OrderedDict format and exportable to jso...
Definition: dfmetada.py:28
def __init__(self, e_c=None, user_id='PoC_gDayF', workflow_id='default')
Constructor.
Definition: controller.py:44
Define all objects, functions and structured related to Data Analysis of input data on OrderedDict fo...
Definition: dfmetada.py:1
def get_external_model(self, armetadata, type='pojo')
Method leading and controlling coversion to java model.
Definition: controller.py:647
def exec_analysis(self, datapath, objective_column, amode=POC, metric='test_accuracy', deep_impact=3, kwargs)
Method leading and controlling analysis&#39;s executions on all frameworks.
Definition: controller.py:321
Define all objects, functions and structs related to common utilities not associated to one concrete ...
Definition: utils.py:1
Define all objects, functions and structures related to logging event on DayF product logs...
Definition: logshandler.py:1
Class storage metadata format [{value: , fstype:[&#39;localfs&#39;, &#39;hdfs&#39;, &#39;mongoDB&#39;], hash_value : ""...
def clean_handler(self, fw)
Method focus on cleaning handler objects.
Definition: controller.py:276
Class oriented to manage all messages and interaction with DayF product logs.
Definition: logshandler.py:23
def exec_prediction(self, datapath, armetadata=None, model_file=None)
Method leading and controlling prediction&#39;s executions on all frameworks.
Definition: controller.py:211
def load_models(self, arlist)
Method leading and controlling model loads.
Definition: controller.py:688
def clean_handlers(self)
Method oriented to shutdown localClusters.
Definition: controller.py:300
Define all objects, functions and structures related to physically store information on persistence s...
def log_model_list(self, ar_list, metric)
Method oriented to log leaderboard against selected metrics.
Definition: controller.py:447
def priorize_list(self, arlist, metric)
Method oriented to priorize ARlist.
Definition: controller.py:824
Core class oriented to manage the comunication and execution messages pass for all components on syst...
Definition: controller.py:41
def get_ar_from_engine(self, path)
Method base to get an ArMetadata Structure from file.
Definition: controller.py:834
def _coherence_fs_checks(self, storage, grants)
Method leading configurations coherence checks on fs engines.
Definition: controller.py:166
def table_model_list(self, ar_list, metric)
Method oriented to log leaderboard against selected metrics on dataframe.
Definition: controller.py:495
def init_handler(self, fw)
Method oriented to init handler objects.
Definition: controller.py:283
def config_checks(self)
Method leading configurations coherence checks.
Definition: controller.py:66
Define all global objects, functions and structs related with an specific experiment.
Define all objects, functions and structured related to adding storage information metadata (json str...
def _coherence_db_checks(self, storage)
Method leading configurations coherence checks on fs engines.
Definition: controller.py:185
Class to manage trasient information between all persistence options and models on an unified way...
def save_models(self, arlist, mode=BEST, metric='accuracy')
Method leading and controlling model savings.
Definition: controller.py:659
def exec_sanalysis(self, datapath, list_ar_metadata, metric='combined_accuracy', deep_impact=1, kwargs)
Method leading and controlling analysis&#39;s executions on specific analysis.
Definition: controller.py:554
def remove_models(self, arlist, mode=ALL)
Method leading and controlling model removing from server.
Definition: controller.py:704