6 Copyright (C) e2its - All Rights Reserved 7 * Unauthorized copying of this file, via any medium is strictly prohibited 8 * Proprietary and confidential 10 * This file is part of gDayF project. 12 * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019 26 from collections
import OrderedDict
27 from pathlib
import Path
28 from pandas
import DataFrame
30 from json.decoder
import JSONDecodeError
32 from pymongo
import MongoClient
36 from hashlib
import md5
44 def __init__(self, e_c=None, user_id='PoC_gDayF', workflow_id='default'):
47 if workflow_id ==
'default':
48 self.
_ec = E_C(user_id=user_id, workflow_id=workflow_id +
'_' + self.
timestamp)
50 self.
_ec = E_C(user_id=user_id, workflow_id=workflow_id)
54 self.
_labels = self.
_ec.labels.get_config()[
'messages'][
'controller']
59 self.
adviser = importlib.import_module(self.
_config[
'optimizer'][
'adviser_classpath'])
60 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"loading_adviser"],
61 self.
_config[
'optimizer'][
'adviser_classpath'])
67 storage_conf = self.
_config[
'storage']
68 grants = storage_conf[
'grants']
69 localfs = (storage_conf[
'localfs']
is not None) \
71 hdfs = (storage_conf[
'hdfs']
is not None) \
73 mongoDB = (storage_conf[
'mongoDB']
is not None) \
75 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"primary_path"],
76 str(storage_conf[
'primary_path']))
78 ''' Checking primary Json storage Paths''' 82 if storage_conf[
'primary_path'] == storage[
'type']:
84 if storage[
'type'] ==
'mongoDB':
86 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_json"],
89 elif storage[
'type'] ==
'localfs':
91 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_json"],
94 elif storage[
'type'] ==
'hdfs':
96 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_json"],
101 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"no_primary"],
102 str(storage_conf[storage_conf[
'primary_path']]))
105 ''' Checking Load storage Paths''' 108 if storage[
'type'] ==
'mongoDB':
109 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_file_storage"],
112 elif storage[
'type'] ==
'localfs':
114 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_load"],
118 at_least_on = at_least_on
or True 119 elif storage[
'type'] ==
'hdfs':
121 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_load"],
125 at_least_on = at_least_on
or True 128 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"no_primary"],
129 str(storage_conf[storage_conf[
'primary_path']]))
132 ''' Checking log storage Paths''' 135 if storage[
'type'] ==
'mongoDB':
136 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_file_storage"],
139 elif storage[
'type'] ==
'localfs':
141 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_log"],
145 at_least_on = at_least_on
or True 146 elif storage[
'type'] ==
'hdfs':
148 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_log"],
152 at_least_on = at_least_on
or True 154 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"no_primary"],
155 str(storage_conf[storage_conf[
'primary_path']]))
158 ''' If all things OK''' 166 def _coherence_fs_checks(self, storage, grants):
169 if persistence.mkdir(type=storage[
'type'], path=str(storage[
'value']), grants=grants):
172 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_json_path"],
173 str(storage[
'value']))
175 if storage[
'hash_type']
not in [
'MD5',
'SHA256']:
176 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_hash_method"],
185 def _coherence_db_checks(self, storage):
186 if storage[
'type'] ==
'mongoDB':
188 client = MongoClient(host=storage[
'url'],
189 port=int(storage[
'port']),
190 document_class=OrderedDict)
191 except ConnectionFailure
as cexecution_error:
192 print(repr(cexecution_error))
195 db = client[storage[
'value']]
196 collection = db[self.
_ec.get_id_user()]
197 test_insert = collection.insert_one({
'test':
'connection.check.dot.bson'}).inserted_id
198 collection.delete_one({
"_id": test_insert})
199 except PyMongoError
as wexecution_error:
200 print(repr(wexecution_error))
213 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"ana_mode"],
'prediction')
214 if armetadata
is None and model_file
is None:
215 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_model"], datapath)
216 return self.
_labels[
"failed_model"]
217 elif armetadata
is not None:
219 assert isinstance(armetadata, ArMetadata)
220 base_ar = deep_ordered_copy(armetadata)
221 except AssertionError:
222 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_model"], armetadata)
223 return self.
_labels[
"failed_model"]
224 elif model_file
is not None:
228 invalid, base_ar = persistence.get_ar_from_engine(model_file)
232 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_model"], model_file)
233 return self.
_labels[
"failed_model"]
234 except IOError
as iexecution_error:
235 print(repr(iexecution_error))
236 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_model"], model_file)
237 return self.
_labels[
"failed_model"]
238 except OSError
as oexecution_error:
239 print(repr(oexecution_error))
240 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_model"], model_file)
241 return self.
_labels[
"failed_model"]
243 if isinstance(datapath, str):
245 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"input_param"], datapath)
247 except [IOError, OSError, JSONDecodeError]:
248 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_input"], datapath)
249 return self.
_labels[
'failed_input']
250 elif isinstance(datapath, DataFrame):
251 pd_dataset = datapath
252 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"input_param"], str(datapath.shape))
254 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_input"], datapath)
255 return self.
_labels[
'failed_input']
257 fw = get_model_fw(base_ar)
261 prediction_frame =
None 263 prediction_frame, _ = self.
model_handler[fw][
'handler'].predict(predict_frame=pd_dataset,
266 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_model"], model_file)
270 self.
_logging.log_info(
'gDayF',
'controller', self.
_labels[
"pred_end"])
272 return prediction_frame
286 handler = importlib.import_module(self.
_frameworks[fw][
'conf'][
'handler_module'])
288 eval(
'handler.' + self.
_frameworks[fw][
'conf'][
'handler_class'] +
'(e_c=self._ec)')
291 handler = importlib.import_module(self.
_frameworks[fw][
'conf'][
'handler_module'])
293 eval(
'handler.' + self.
_frameworks[fw][
'conf'][
'handler_class'] +
'(e_c=self._ec)')
302 if each_handlers[
'handler']
is not None:
305 self.
_logging.log_exec(
'gDayF',
"Controller", self.
_labels[
"cleaning"], fw)
306 if each_handlers[
'initiated']:
307 handler = importlib.import_module(self.
_frameworks[fw][
'conf'][
'handler_module'])
309 eval(
'handler.' + self.
_frameworks[fw][
'conf'][
'handler_class']
310 +
'(e_c=self._ec).shutdown_cluster()')
311 self.
_logging.log_exec(
'gDayF',
"Controller", self.
_labels[
"shuttingdown"], fw)
321 def exec_analysis(self, datapath, objective_column, amode=POC, metric='test_accuracy', deep_impact=3, **kwargs):
331 for pname, pvalue
in kwargs.items():
333 assert isinstance(pvalue, int)
335 elif pname ==
'estimate_k':
336 assert isinstance(pvalue, bool)
338 elif pname ==
'atype':
339 assert pvalue
in atypes
344 if objective_column
is None:
348 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"ana_param"], metric)
349 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"dep_param"], deep_impact)
350 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"ana_mode"], amode)
353 if isinstance(datapath, str):
355 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"input_param"], datapath)
357 id_datapath = Path(datapath).name
358 hash_dataframe = hash_key(
'MD5', datapath)
360 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_input"], datapath)
361 return self.
_labels[
'failed_input']
363 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_input"], datapath)
364 return self.
_labels[
'failed_input']
365 except JSONDecodeError:
366 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_input"], datapath)
367 return self.
_labels[
'failed_input']
368 elif isinstance(datapath, DataFrame):
369 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"input_param"], str(datapath.shape))
370 pd_dataset = datapath
371 id_datapath =
'Dataframe' + \
372 '_' + str(pd_dataset.size) + \
373 '_' + str(pd_dataset.shape[0]) + \
374 '_' + str(pd_dataset.shape[1])
375 hash_dataframe = md5(datapath.to_msgpack()).hexdigest()
377 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_input"], datapath)
378 return self.
_labels[
'failed_input'],
None 380 pd_test_dataset =
None 381 ''' Changed 05/04/2018 382 if metric == 'combined_accuracy' or 'test_accuracy':''' 383 if self.
_config[
'common'][
'minimal_test_split'] <= len(pd_dataset.index) \
384 and (metric
in ACCURACY_METRICS
or metric
in REGRESSION_METRICS):
385 pd_dataset, pd_test_dataset = pandas_split_data(pd_dataset,
386 train_perc=self.
_config[
'common'][
'test_frame_ratio'])
388 df =
DFMetada().getDataFrameMetadata(pd_dataset,
'pandas')
390 self.
_ec.set_id_analysis(self.
_ec.get_id_user() +
'_' + id_datapath +
'_' + str(time()))
391 adviser = self.
adviser.AdviserAStar(e_c=self.
_ec,
393 deep_impact=deep_impact, dataframe_name=id_datapath,
394 hash_dataframe=hash_dataframe)
396 adviser.set_recommendations(dataframe_metadata=df, objective_column=objective_column, amode=amode, atype=atype)
398 while adviser.next_analysis_list
is not None:
399 for each_model
in adviser.next_analysis_list:
400 fw = get_model_fw(each_model)
404 each_model[
"model_parameters"][fw][
"parameters"][
"k"][
"value"] = k
405 each_model[
"model_parameters"][fw][
"parameters"][
"k"][
"seleccionable"] =
True 406 each_model[
"model_parameters"][fw][
"parameters"][
"estimate_k"][
"value"] = estimate_k
407 each_model[
"model_parameters"][fw][
"parameters"][
"estimate_k"][
"seleccionable"] =
True 412 if pd_test_dataset
is not None:
413 _, analyzed_model = self.
model_handler[fw][
'handler'].order_training(training_pframe=pd_dataset,
415 test_frame=pd_test_dataset,
416 filtering=
'STANDARDIZE')
418 _, analyzed_model = self.
model_handler[fw][
'handler'].order_training(training_pframe=pd_dataset,
420 test_frame=pd_dataset,
421 filtering=
'STANDARDIZE')
423 if analyzed_model
is not None:
424 adviser.analysis_recommendation_order.append(analyzed_model)
425 adviser.next_analysis_list.clear()
426 adviser.analysis_recommendation_order = adviser.priorize_models(model_list=
427 adviser.analysis_recommendation_order)
428 adviser.set_recommendations(dataframe_metadata=df, objective_column=objective_column, amode=amode)
430 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller',
431 self.
_labels[
"ana_models"], str(len(adviser.analyzed_models)))
432 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller',
433 self.
_labels[
"exc_models"], str(len(adviser.excluded_models)))
439 adviser.analysis_recommendation_order = adviser.priorize_models(model_list=
440 adviser.analysis_recommendation_order)
442 return self.
_labels[
'success_op'], adviser.analysis_recommendation_order
449 ordered_list = self.
priorize_list(arlist=ar_list, metric=metric)
450 for model
in ordered_list:
452 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"best_model"],
453 model[
'model_parameters'][get_model_fw(model)][
'parameters'][
'model_id'][
'value'])
456 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"res_model"],
457 model[
'model_parameters'][get_model_fw(model)][
'parameters'][
'model_id'][
'value'])
459 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"round_reach"], model[
'round'])
460 if model[
"normalizations_set"]
is None:
461 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"norm_app"], [])
463 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"norm_app"],
464 model[
"normalizations_set"])
466 if metric
in ACCURACY_METRICS
or metric
in REGRESSION_METRICS:
467 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"ametric_order"],
468 model[
'metrics'][
'accuracy'])
469 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"pmetric_order"],
470 model[
'metrics'][
'execution'][
'train'][
'RMSE'])
471 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"pmetric_order"],
472 model[
'metrics'][
'execution'][
'test'][
'RMSE'])
473 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"rmetric_order"],
474 model[
'metrics'][
'execution'][
'train'][
'r2'])
475 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"rmetric_order"],
476 model[
'metrics'][
'execution'][
'test'][
'r2'])
477 if metric
in CLUSTERING_METRICS:
479 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"ckmetric_order"],
480 model[
'metrics'][
'execution'][
'train'][
'k'])
482 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"ckmetric_order"],
484 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"ctmetric_order"],
485 model[
'metrics'][
'execution'][
'train'][
'tot_withinss'])
486 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
"cbmetric_order"],
487 model[
'metrics'][
'execution'][
'train'][
'betweenss'])
497 normal_cols = [
'Model',
'Train_accuracy',
'Test_accuracy',
'Combined_accuracy',
'train_rmse',
'test_rmse']
498 cluster_cols = [
'Model',
'k',
'tot_withinss',
'betweenss']
500 ordered_list = self.
priorize_list(arlist=ar_list, metric=metric)
501 for model
in ordered_list:
502 if metric
in ACCURACY_METRICS
or metric
in REGRESSION_METRICS:
505 {
'Model': model[
'model_parameters'][get_model_fw(model)][
'parameters'][
'model_id'][
'value'],
506 'Round': model[
'round'],
507 'train_accuracy': model[
'metrics'][
'accuracy'][
'train'],
508 'test_accuracy': model[
'metrics'][
'accuracy'][
'test'],
509 'combined_accuracy': model[
'metrics'][
'accuracy'][
'combined'],
510 'train_rmse': model[
'metrics'][
'execution'][
'train'][
'RMSE'],
511 'test_rmse': model[
'metrics'][
'execution'][
'test'][
'RMSE'],
512 'train_r2': model[
'metrics'][
'execution'][
'train'][
'r2'],
513 'test_r2': model[
'metrics'][
'execution'][
'test'][
'r2'],
514 'path': model[
'json_path'][0][
'value']
520 {
'Model': model[
'model_parameters'][get_model_fw(model)][
'parameters'][
'model_id'][
'value'],
521 'Round': model[
'round'],
522 'train_accuracy': model[
'metrics'][
'accuracy'][
'train'],
523 'test_accuracy': model[
'metrics'][
'accuracy'][
'test'],
524 'combined_accuracy': model[
'metrics'][
'accuracy'][
'combined'],
525 'train_rmse': model[
'metrics'][
'execution'][
'train'][
'RMSE'],
526 'path': model[
'json_path'][0][
'value']
530 if metric
in CLUSTERING_METRICS:
532 aux = model[
'metrics'][
'execution'][
'train'][
'k']
537 {
'Model': model[
'model_parameters'][get_model_fw(model)][
'parameters'][
'model_id'][
'value'],
538 'Round': model[
'round'],
540 'tot_withinss':model[
'metrics'][
'execution'][
'train'][
'tot_withinss'],
541 'betweenss':model[
'metrics'][
'execution'][
'train'][
'betweenss'],
542 'path': model[
'json_path'][0][
'value']
545 return DataFrame(dataframe)
554 def exec_sanalysis(self, datapath, list_ar_metadata, metric='combined_accuracy', deep_impact=1, **kwargs):
557 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"ana_param"], metric)
558 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"dep_param"], deep_impact)
560 if isinstance(datapath, str):
562 self.
_logging.log_info(
'gDayF',
"Controller", self.
_labels[
"input_param"], datapath)
564 id_datapath = Path(datapath).name
565 hash_dataframe = hash_key(
'MD5', datapath)
567 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_input"], datapath)
568 return self.
_labels[
'failed_input']
570 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_input"], datapath)
571 return self.
_labels[
'failed_input']
572 except JSONDecodeError:
573 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_input"], datapath)
574 return self.
_labels[
'failed_input']
575 elif isinstance(datapath, DataFrame):
576 hash_dataframe =
None 577 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"input_param"], str(datapath.shape))
578 pd_dataset = datapath
579 id_datapath =
'Dataframe' + \
580 '_' + str(pd_dataset.size) + \
581 '_' + str(pd_dataset.shape[0]) + \
582 '_' + str(pd_dataset.shape[1])
584 self.
_logging.log_critical(
'gDayF',
"Controller", self.
_labels[
"failed_input"], datapath)
585 return self.
_labels[
'failed_input'],
None 587 pd_test_dataset =
None 588 if self.
_config[
'common'][
'minimal_test_split'] <= len(pd_dataset.index) \
589 and (metric
in ACCURACY_METRICS
or metric
in REGRESSION_METRICS):
590 pd_dataset, pd_test_dataset = pandas_split_data(pd_dataset,
591 train_perc=self.
_config[
'common'][
'test_frame_ratio'])
593 df =
DFMetada().getDataFrameMetadata(pd_dataset,
'pandas')
594 self.
_ec.set_id_analysis(self.
_ec.get_id_user() +
'_' + id_datapath +
'_' + str(time()))
595 adviser = self.
adviser.AdviserAStar(e_c=self.
_ec,
597 deep_impact=deep_impact, dataframe_name=id_datapath,
598 hash_dataframe=hash_dataframe)
600 adviser.analysis_specific(dataframe_metadata=df, list_ar_metadata=list_ar_metadata)
602 while adviser.next_analysis_list
is not None:
604 for each_model
in adviser.next_analysis_list:
605 fw = get_model_fw(each_model)
609 if pd_test_dataset
is not None:
610 _, analyzed_model = self.
model_handler[fw][
'handler'].order_training(
611 training_pframe=pd_dataset,
613 test_frame=pd_test_dataset, filtering=
'NONE')
615 _, analyzed_model = self.
model_handler[fw][
'handler'].order_training(
616 training_pframe=pd_dataset,
617 base_ar=each_model, filtering=
'NONE')
618 if analyzed_model
is not None:
619 adviser.analysis_recommendation_order.append(analyzed_model)
621 adviser.next_analysis_list.clear()
622 adviser.analysis_recommendation_order = adviser.priorize_models(model_list=
623 adviser.analysis_recommendation_order)
624 adviser.analysis_specific(dataframe_metadata=df, list_ar_metadata=adviser.analysis_recommendation_order)
626 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller',
627 self.
_labels[
"ana_models"], str(len(adviser.analyzed_models)))
628 self.
_logging.log_info(self.
_ec.get_id_analysis(),
'controller',
629 self.
_labels[
"exc_models"], str(len(adviser.excluded_models)))
631 self.
log_model_list(adviser.analysis_recommendation_order, metric)
637 adviser.analysis_recommendation_order = adviser.priorize_models(model_list=
638 adviser.analysis_recommendation_order)
640 return self.
_labels[
'success_op'], adviser.analysis_recommendation_order
648 fw = get_model_fw(armetadata)
661 model_list = [arlist[0]]
663 model_list = arlist[0:3]
664 elif mode == EACH_BEST:
668 if (get_model_fw(model), model[
'model_parameters'][get_model_fw(model)][
'model'],
669 model[
'normalizations_set'])
not in exclusion:
670 model_list.append(model)
671 exclusion.append((get_model_fw(model), model[
'model_parameters'][get_model_fw(model)][
'model'],
672 model[
'normalizations_set']))
677 for fw
in self.
_config[
'frameworks'].keys():
679 for each_model
in model_list:
680 if fw
in each_model[
'model_parameters'].keys():
681 self.
model_handler[fw][
'handler'].store_model(each_model, user=self.
_ec.get_id_user())
689 model_loaded = list()
690 for fw
in self.
_config[
'frameworks'].keys():
692 for each_model
in arlist:
693 if fw
in each_model[
'model_parameters'].keys():
694 model_load = self.
model_handler[fw][
'handler'].load_model(each_model)
695 if model_load
is not None:
696 model_loaded.append(model_load)
706 model_list = arlist[1:]
708 model_list = arlist[3:]
709 elif mode == EACH_BEST:
713 if (get_model_fw(model), model[
'model_parameters'][get_model_fw(model)][
'model'],
714 model[
'normalizations_set'])
not in exclusion:
715 exclusion.append((get_model_fw(model), model[
'model_parameters'][get_model_fw(model)][
'model'],
716 model[
'normalizations_set']))
718 model_list.append(model)
724 for models
in model_list:
725 if get_model_fw(models)
not in fw_list:
726 fw_list.append(get_model_fw(models))
741 if (arlist
is None or len(arlist) == 0)
and self.
_ec.get_id_analysis()
is None:
742 self.
_logging.log_critical(
'gDayF',
'controller', self.
_labels[
"failed_model"])
744 elif self.
_ec.get_id_analysis()
is not None and self.
_ec.get_id_user() !=
'guest':
747 analysis_id = arlist[0][
'model_id']
750 ordered_list = self.
priorize_list(arlist=new_arlist, metric=metric)
755 root[
'successors'] = OrderedDict()
756 variable_dict = OrderedDict()
757 variable_dict[0] = {
'root': root}
760 for new_tree_structure
in ordered_list:
761 new_model = deep_ordered_copy(new_tree_structure)
762 model_id = new_tree_structure[
'model_parameters'][get_model_fw(new_tree_structure)]\
763 [
'parameters'][
'model_id'][
'value']
764 level = new_tree_structure[
'round']
765 if level
not in variable_dict.keys():
766 variable_dict[level] = OrderedDict()
768 new_tree_structure = OrderedDict()
769 new_tree_structure[
'ranking'] = ranking
770 new_tree_structure[
'data'] = new_model
771 new_tree_structure[
'successors'] = OrderedDict()
772 variable_dict[level][model_id] = new_tree_structure
777 max_level = max(variable_dict.keys())
778 while level
in range(1, max_level+1):
779 for model_id, new_tree_structure
in variable_dict[level].items():
782 while not found
or (level - counter) == 0:
783 if new_tree_structure[
'data'][
'predecessor']
in variable_dict[level-counter].keys():
784 container = eval(
'variable_dict[level-counter][new_tree_structure[\'data\'][\'predecessor\']]')
785 container[
'successors'][model_id] = new_tree_structure
789 self.
_logging.log_debug(self.
_ec.get_id_analysis(),
'controller', self.
_labels[
'fail_reconstruct'],
794 if store
and self.
_config[
'storage'][
'primary_path'] !=
'mongoDB':
795 primary_path = self.
_config[
'storage'][
'primary_path']
796 fstype = self.
_config[
'storage'][primary_path][
'type']
799 datafile.append(self.
_config[
'storage'][primary_path][
'value'])
801 datafile.append(self.
_ec.get_id_user())
803 datafile.append(self.
_ec.get_id_workflow())
805 datafile.append(self.
_config[
'common'][
'execution_tree_dir'])
807 datafile.append(self.
_ec.get_id_analysis())
808 datafile.append(
'.json')
810 if self.
_config[
'persistence'][
'compress_json']:
811 datafile.append(
'.gz')
814 storage.append(value=
''.join(datafile), fstype=fstype)
825 adviser = self.
adviser.AdviserAStar(e_c=self.
_ec, metric=metric)
826 ordered_list = adviser.priorize_models(arlist)
836 failed, armetadata = persistence.get_ar_from_engine(path=path)
838 return failed, armetadata
def reconstruct_execution_tree(self, arlist=None, metric='combined', store=True)
Method oriented to generate execution tree for visualizations and analysis issues.
def __init__(self, e_c=None, user_id='PoC_gDayF', workflow_id='default')
Constructor.
def get_external_model(self, armetadata, type='pojo')
Method leading and controlling coversion to java model.
def exec_analysis(self, datapath, objective_column, amode=POC, metric='test_accuracy', deep_impact=3, kwargs)
Method leading and controlling analysis's executions on all frameworks.
Define all objects, functions and structs related to common utilities not associated to one concrete ...
Define all objects, functions and structures related to logging event on DayF product logs...
def clean_handler(self, fw)
Method focus on cleaning handler objects.
Class oriented to manage all messages and interaction with DayF product logs.
def exec_prediction(self, datapath, armetadata=None, model_file=None)
Method leading and controlling prediction's executions on all frameworks.
def load_models(self, arlist)
Method leading and controlling model loads.
def clean_handlers(self)
Method oriented to shutdown localClusters.
Define all objects, functions and structures related to physically store information on persistence s...
def log_model_list(self, ar_list, metric)
Method oriented to log leaderboard against selected metrics.
def priorize_list(self, arlist, metric)
Method oriented to priorize ARlist.
Core class oriented to manage the comunication and execution messages pass for all components on syst...
def get_ar_from_engine(self, path)
Method base to get an ArMetadata Structure from file.
def _coherence_fs_checks(self, storage, grants)
Method leading configurations coherence checks on fs engines.
def table_model_list(self, ar_list, metric)
Method oriented to log leaderboard against selected metrics on dataframe.
def init_handler(self, fw)
Method oriented to init handler objects.
def config_checks(self)
Method leading configurations coherence checks.
Define all global objects, functions and structs related with an specific experiment.
def _coherence_db_checks(self, storage)
Method leading configurations coherence checks on fs engines.
Class to manage trasient information between all persistence options and models on an unified way...
def save_models(self, arlist, mode=BEST, metric='accuracy')
Method leading and controlling model savings.
def exec_sanalysis(self, datapath, list_ar_metadata, metric='combined_accuracy', deep_impact=1, kwargs)
Method leading and controlling analysis's executions on specific analysis.
def remove_models(self, arlist, mode=ALL)
Method leading and controlling model removing from server.