DayF core  1.2.1.2
DayF (Decision at your Fingertips) is an AutoML freeware development framework that let developers works with Machine Learning models without any idea of AI, simply taking a csv dataset and the objective column
h2o_optimizer.py
1 
5 
6 '''
7 Copyright (C) e2its - All Rights Reserved
8  * Unauthorized copying of this file, via any medium is strictly prohibited
9  * Proprietary and confidential
10  *
11  * This file is part of gDayF project.
12  *
13  * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019
14 '''
15 
16 from gdayf.common.utils import get_model_fw
17 from gdayf.conf.loadconfig import LoadConfig
18 from gdayf.common.utils import decode_ordered_dict_to_dataframe
19 from gdayf.models.parametersmetadata import ParameterMetadata
20 
21 class Optimizer(object):
22 
23 
28  def __init__(self, e_c):
29  self._ec = e_c
30  self._labels = self._ec.labels.get_config()['messages']['adviser']
31  self._config = LoadConfig().get_config()['optimizer']['AdviserStart_rules']['h2o']
32 
33 
34 
42  def optimize_models(self, armetadata, metric_value, objective, deepness, deep_impact):
43  model_list = list()
44  model = armetadata['model_parameters'][get_model_fw(armetadata)]
45  config = self._config
46  if get_model_fw(armetadata) == 'h2o' and metric_value != objective \
47  and armetadata['status'] != self._labels['failed_op']:
48  try:
49  model_metric = decode_ordered_dict_to_dataframe(armetadata['metrics']['model'])
50  if model['model'] not in ['H2ONaiveBayesEstimator']:
51  scoring_metric = decode_ordered_dict_to_dataframe(armetadata['metrics']['scoring'])
52  nfold_limit = config['nfold_limit']
53  min_rows_limit = config['min_rows_limit']
54  cols_breakdown = config['cols_breakdown']
55  nfold_increment = config['nfold_increment']
56  min_rows_increment = config['min_rows_increment']
57  max_interactions_rows_breakdown = config['max_interactions_rows_breakdown']
58  max_interactions_increment = config['max_interactions_increment']
59  max_depth_increment = config['max_depth_increment']
60  ntrees_increment = config['ntrees_increment']
61  dpl_rcount_limit = config['dpl_rcount_limit']
62  dpl_divisor = config['dpl_divisor']
63  h_dropout_ratio = config['h_dropout_ratio']
64  epochs_increment = config['epochs_increment']
65  dpl_min_batch_size = config['dpl_min_batch_size']
66  dpl_batch_reduced_divisor = config['dpl_batch_reduced_divisor']
67  deeper_increment = config['deeper_increment']
68  wider_increment = config['wider_increment']
69  learning_conf = config['learning_conf']
70  rho_conf = config['rho_conf']
71  nv_laplace = config['nv_laplace']
72  nv_min_prob = config['nv_min_prob']
73  nv_min_sdev = config['nv_min_sdev']
74  nv_improvement = config['nv_improvement']
75  nv_divisor = config['nv_divisor']
76  clustering_increment = config['clustering_increment']
77  sample_rate = config['sample_rate']
78 
79  if model['model'] == 'H2OGradientBoostingEstimator':
80  if (deepness == 2) and model['types'][0]['type'] == 'regression':
81  for tweedie_power in [1.1, 1.5, 1.9]:
82  new_armetadata = armetadata.copy_template()
83  model_aux = new_armetadata['model_parameters']['h2o']
84  model_aux['parameters']['distribution']['value'] = 'tweedie'
85  model_aux['parameters']['tweedie_power'] = ParameterMetadata()
86  model_aux['parameters']['tweedie_power'].set_value(tweedie_power)
87  model_list.append(new_armetadata)
88  if deepness == 2:
89  for learning in learning_conf:
90  new_armetadata = armetadata.copy_template()
91  model_aux = new_armetadata['model_parameters']['h2o']
92  model_aux['parameters']['learn_rate']['value'] = learning['learn']
93  model_aux['parameters']['learn_rate_annealing']['value'] = learning['improvement']
94  model_list.append(new_armetadata)
95  if model_metric['number_of_trees'][0] >= model['parameters']['ntrees']['value']:
96  new_armetadata = armetadata.copy_template()
97  model_aux = new_armetadata['model_parameters']['h2o']
98  model_aux['parameters']['ntrees']['value'] *= ntrees_increment
99  model_list.append(new_armetadata)
100  if model_metric['max_depth'][0] >= model['parameters']['max_depth']['value']:
101  new_armetadata = armetadata.copy_template()
102  model_aux = new_armetadata['model_parameters']['h2o']
103  model_aux['parameters']['max_depth']['value'] *= max_depth_increment
104  model_list.append(new_armetadata)
105  if model['parameters']['nfolds']['value'] < nfold_limit:
106  new_armetadata = armetadata.copy_template()
107  model_aux = new_armetadata['model_parameters']['h2o']
108  model_aux['parameters']['nfolds']['value'] += nfold_increment
109  model_list.append(new_armetadata)
110  if model['parameters']['min_rows']['value'] > min_rows_limit:
111  new_armetadata = armetadata.copy_template()
112  model_aux = new_armetadata['model_parameters']['h2o']
113  model_aux['parameters']['min_rows']['value'] = round(model_aux['parameters']['min_rows']['value']
114  / min_rows_increment, 0)
115  model_list.append(new_armetadata)
116 
117  elif model['model'] == 'H2OGeneralizedLinearEstimator':
118 
119  if model_metric['number_of_iterations'][0] >= model['parameters']['max_iterations']['value']:
120 
121  if deepness == 2:
122  max_iterations = model['parameters']['max_iterations']['value'] * \
123  max(round(
124  armetadata['data_initial']['rowcount'] / max_interactions_rows_breakdown),
125  1)
126  else:
127  max_iterations = model['parameters']['max_iterations']['value'] * max_interactions_increment
128  else:
129  max_iterations = model['parameters']['max_iterations']['value']
130 
131  if (deepness == 2) and model['types'][0]['type'] == 'regression':
132  for tweedie_power in [1.0, 1.5, 2.0, 2.5, 3.0]:
133  new_armetadata = armetadata.copy_template()
134  model_aux = new_armetadata['model_parameters']['h2o']
135  model_aux['parameters']['tweedie_variance_power']['value'] = tweedie_power
136  model_aux['parameters']['max_iterations']['value'] = max_iterations
137  model_list.append(new_armetadata)
138  if deepness == 2:
139  new_armetadata = armetadata.copy_template()
140  model_aux = new_armetadata['model_parameters']['h2o']
141  model_aux['parameters']['alpha']['value'] = 0.0
142  model_aux['parameters']['max_iterations']['value'] = max_iterations
143  model_list.append(new_armetadata)
144  new_armetadata = armetadata.copy_template()
145  model_aux = new_armetadata['model_parameters']['h2o']
146  model_aux['parameters']['alpha']['value'] = 1.0
147  model_aux['parameters']['max_iterations']['value'] = max_iterations
148  model_list.append(new_armetadata)
149  if armetadata['data_initial']['cols'] > cols_breakdown:
150  new_armetadata = armetadata.copy_template()
151  model_aux = new_armetadata['model_parameters']['h2o']
152  model_aux['parameters']['solver']['value'] = 'L_BFGS'
153  model_aux['parameters']['max_iterations']['value'] = max_iterations
154  model_list.append(new_armetadata)
155  if deepness == 2:
156  new_armetadata = armetadata.copy_template()
157  model_aux = new_armetadata['model_parameters']['h2o']
158  model_aux['parameters']['balance_classes']['value'] = \
159  not model_aux['parameters']['balance_classes']['value']
160  model_aux['parameters']['max_iterations']['value'] = max_iterations
161  model_list.append(new_armetadata)
162  if model['parameters']['nfolds']['value'] < nfold_limit:
163  new_armetadata = armetadata.copy_template()
164  model_aux = new_armetadata['model_parameters']['h2o']
165  model_aux['parameters']['nfolds']['value'] += nfold_increment
166  model_aux['parameters']['max_iterations']['value'] = max_iterations
167  model_list.append(new_armetadata)
168 
169  elif model['model'] == 'H2ODeepLearningEstimator':
170 
171  if scoring_metric.shape[0] == 0 or \
172  (scoring_metric['epochs'].max() >=
173  model['parameters']['epochs']['value']):
174  epochs = model['parameters']['epochs']['value'] * epochs_increment
175  else:
176  epochs = model['parameters']['epochs']['value']
177 
178  if deepness == 2:
179  new_armetadata = armetadata.copy_template()
180  model_aux = new_armetadata['model_parameters']['h2o']
181  if armetadata['data_initial']['rowcount'] > dpl_rcount_limit:
182  model_aux['parameters']['hidden']['value'] = \
183  round(armetadata['data_initial']['rowcount'] / (dpl_divisor * 0.5))
184  else:
185  model_aux['parameters']['hidden']['value'][0] = \
186  round(model['parameters']['hidden']['value'][0] * wider_increment)
187  model_list.append(new_armetadata)
188 
189  for learning in rho_conf:
190  new_armetadata = new_armetadata.copy_template(increment=0)
191 
192  model_aux = new_armetadata['model_parameters']['h2o']
193  model_aux['parameters']['rho']['value'] = learning['learn']
194  model_aux['parameters']['epsilon']['value'] = learning['improvement']
195  model_aux['parameters']['epochs']['value'] = epochs
196  model_list.append(new_armetadata)
197 
198  new_armetadata = armetadata.copy_template()
199  model_aux = new_armetadata['model_parameters']['h2o']
200 
201  if armetadata['data_initial']['rowcount'] > dpl_rcount_limit:
202  model_aux['parameters']['hidden']['value'] = \
203  [round(armetadata['data_initial']['rowcount'] / (dpl_divisor * 0.5)),
204  round(armetadata['data_initial']['rowcount'] / (dpl_divisor * deep_impact))]
205  else:
206  model_aux['parameters']['hidden']['value'] = [model['parameters']['hidden']['value'][0],
207  round(model['parameters']['hidden']['value'][0]
208  / wider_increment)]
209  model_aux['parameters']['hidden_dropout_ratios']['value'] = [h_dropout_ratio, h_dropout_ratio]
210  model_list.append(new_armetadata)
211 
212  for learning in rho_conf:
213  new_armetadata = new_armetadata.copy_template(increment=0)
214  model_aux = new_armetadata['model_parameters']['h2o']
215  model_aux['parameters']['rho']['value'] = learning['learn']
216  model_aux['parameters']['epsilon']['value'] = learning['improvement']
217  model_aux['parameters']['epochs']['value'] = epochs
218  model_list.append(new_armetadata)
219 
220  if (deepness == 3) and model['types'][0]['type'] == 'regression':
221  for tweedie_power in [1.1, 1.5, 1.9]:
222  new_armetadata = armetadata.copy_template()
223  model_aux = new_armetadata['model_parameters']['h2o']
224  model_aux['parameters']['distribution']['value'] = 'tweedie'
225  model_aux['parameters']['tweedie_power'] = ParameterMetadata()
226  model_aux['parameters']['tweedie_power'].set_value(tweedie_power)
227  model_aux['parameters']['activation']['value'] = 'tanh_with_dropout'
228  model_aux['parameters']['epochs']['value'] = epochs
229  model_list.append(new_armetadata)
230 
231  if deepness == 3 and not model['parameters']['sparse']['value']:
232  new_armetadata = armetadata.copy_template()
233  model_aux = new_armetadata['model_parameters']['h2o']
234  model_aux['parameters']['sparse']['value'] = not model_aux['parameters']['sparse']['value']
235  model_aux['parameters']['epochs']['value'] = epochs
236  model_list.append(new_armetadata)
237  '''Eliminado 19/09/2017
238  if deepness == 3 and model['parameters']['activation']['value'] == "rectifier_with_dropout":
239  new_armetadata = armetadata.copy_template()
240  model_aux = new_armetadata['model_parameters']['h2o']
241  model_aux['parameters']['activation']['value'] = 'tanh_with_dropout'
242  model_list.append(new_armetadata)'''
243 
244  if deepness == 3 and model['parameters']['initial_weight_distribution']['value'] == "normal":
245  new_armetadata = armetadata.copy_template()
246  model_aux = new_armetadata['model_parameters']['h2o']
247  model_aux['parameters']['initial_weight_distribution']['value'] = "uniform"
248  model_aux['parameters']['epochs']['value'] = epochs
249  model_list.append(new_armetadata)
250  elif deepness == 3:
251  new_armetadata = armetadata.copy_template()
252  model_aux = new_armetadata['model_parameters']['h2o']
253  model_aux['parameters']['initial_weight_distribution']['value'] = "normal"
254  model_aux['parameters']['epochs']['value'] = epochs
255  model_list.append(new_armetadata)
256 
257  if deepness > 2 and deepness <= deep_impact:
258  if len(armetadata['model_parameters']['h2o']['parameters']['hidden']['value']) < 4:
259  new_armetadata = armetadata.copy_template()
260  model_aux = new_armetadata['model_parameters']['h2o']
261 
262  if len(model_aux['parameters']['hidden']['value']) > 1 \
263  and model_aux['parameters']['hidden']['value'][0] > \
264  model_aux['parameters']['hidden']['value'][1]:
265  model_aux['parameters']['hidden']['value'].insert(0,
266  round(model_aux['parameters']['hidden']
267  ['value'][0] * deeper_increment))
268  model_aux['parameters']['hidden_dropout_ratios']['value'].insert(0, h_dropout_ratio)
269  elif len(model_aux['parameters']['hidden']['value']) > 1 \
270  and model_aux['parameters']['hidden']['value'][0] < \
271  model_aux['parameters']['hidden']['value'][1]:
272  model_aux['parameters']['hidden']['value'].append(
273  round(model_aux['parameters']['hidden']['value'][-1] * deeper_increment))
274  model_aux['parameters']['hidden_dropout_ratios']['value'].append(h_dropout_ratio)
275  elif len(model_aux['parameters']['hidden']['value']) == 1:
276  model_aux['parameters']['hidden']['value'][0] = \
277  round(model_aux['parameters']['hidden']['value'][0] * deeper_increment)
278 
279  model_aux['parameters']['epochs']['value'] = epochs
280  model_list.append(new_armetadata)
281 
282  new_armetadata = armetadata.copy_template()
283  model_aux = new_armetadata['model_parameters']['h2o']
284 
285  for iterador in range(0, len(model_aux['parameters']['hidden']['value'])):
286  model_aux['parameters']['hidden']['value'][iterador] = \
287  int(round(model_aux['parameters']['hidden']['value'][iterador]) * wider_increment)
288 
289  model_aux['parameters']['epochs']['value'] = epochs
290  model_list.append(new_armetadata)
291 
292  if model['parameters']['mini_batch_size']['value'] >= dpl_min_batch_size:
293  new_armetadata = armetadata.copy_template()
294  model_aux = new_armetadata['model_parameters']['h2o']
295  model_aux['parameters']['mini_batch_size']['value'] = \
296  round(model_aux['parameters']['mini_batch_size']['value'] / dpl_batch_reduced_divisor)
297  model_aux['parameters']['epochs']['value'] = epochs
298  model_list.append(new_armetadata)
299 
300  elif model['model'] == 'H2ORandomForestEstimator':
301  if deepness == 2:
302  for size in sample_rate:
303  for size2 in sample_rate:
304  new_armetadata = armetadata.copy_template()
305  model_aux = new_armetadata['model_parameters']['h2o']
306  model_aux['parameters']['sample_rate']['value'] = size['size']
307  model_aux['parameters']['col_sample_rate_per_tree']['value'] = size2['size']
308  model_list.append(new_armetadata)
309 
310  if model_metric['number_of_trees'][0] == model['parameters']['ntrees']['value']:
311  new_armetadata = armetadata.copy_template()
312  model_aux = new_armetadata['model_parameters']['h2o']
313  model_aux['parameters']['ntrees']['value'] *= ntrees_increment
314  model_list.append(new_armetadata)
315  if model_metric['max_depth'][0] == model['parameters']['max_depth']['value']:
316  new_armetadata = armetadata.copy_template()
317  model_aux = new_armetadata['model_parameters']['h2o']
318  model_aux['parameters']['max_depth']['value'] *= max_depth_increment
319  model_list.append(new_armetadata)
320  if model['parameters']['nfolds']['value'] < nfold_limit:
321  new_armetadata = armetadata.copy_template()
322  model_aux = new_armetadata['model_parameters']['h2o']
323  model_aux['parameters']['nfolds']['value'] += nfold_increment
324  model_list.append(new_armetadata)
325  if model['parameters']['mtries']['value'] not in [round(armetadata['data_initial']['cols'] / 2),
326  round(armetadata['data_initial']['cols'] * 3 / 4)]:
327  new_armetadata = armetadata.copy_template()
328  model_aux = new_armetadata['model_parameters']['h2o']
329  model_aux['parameters']['mtries']['value'] = round(armetadata['data_initial']['cols'] / 2)
330  model_list.append(new_armetadata)
331  new_armetadata = armetadata.copy_template()
332  model_aux = new_armetadata['model_parameters']['h2o']
333  model_aux['parameters']['mtries']['value'] = round(armetadata['data_initial']['cols'] * 3 / 4)
334  model_list.append(new_armetadata)
335  if model['parameters']['min_rows']['value'] > (min_rows_limit / 2):
336  new_armetadata = armetadata.copy_template()
337  model_aux = new_armetadata['model_parameters']['h2o']
338  model_aux['parameters']['min_rows']['value'] = round(model_aux['parameters']['min_rows']['value']
339  / min_rows_increment, 0)
340  model_list.append(new_armetadata)
341 
342  elif model['model'] == 'H2ONaiveBayesEstimator':
343  if deepness == 2:
344  for laplace in nv_laplace:
345  for min_prob in nv_min_prob:
346  for min_sdev in nv_min_sdev:
347  new_armetadata = armetadata.copy_template()
348  model_aux = new_armetadata['model_parameters']['h2o']
349  model_aux['parameters']['laplace']['value'] = laplace
350  model_aux['parameters']['min_prob']['value'] = min_prob
351  model_aux['parameters']['min_sdev']['value'] = min_sdev
352  model_list.append(new_armetadata)
353  elif deepness >= 2:
354  if deepness == deep_impact:
355  new_armetadata = armetadata.copy_template()
356  model_aux = new_armetadata['model_parameters']['h2o']
357  model_aux['parameters']['balance_classes']['value'] = \
358  not model_aux['parameters']['balance_classes']['value']
359  model_list.append(new_armetadata)
360  if model['parameters']['nfolds']['value'] < nfold_limit:
361  new_armetadata = armetadata.copy_template()
362  model_aux = new_armetadata['model_parameters']['h2o']
363  model_aux['parameters']['nfolds']['value'] += nfold_increment
364  model_list.append(new_armetadata)
365 
366  for laplace in ['improvement', 'decrement']:
367  new_armetadata = armetadata.copy_template()
368  model_aux = new_armetadata['model_parameters']['h2o']
369  if laplace == 'improvement':
370  model_aux['parameters']['laplace']['value'] = model_aux['parameters']['laplace'][
371  'value'] * (1 + nv_improvement)
372  else:
373  model_aux['parameters']['laplace']['value'] = model_aux['parameters']['laplace'][
374  'value'] * (1 - nv_divisor)
375  model_list.append(new_armetadata)
376 
377  elif model['model'] == 'H2OAutoEncoderEstimator':
378  if scoring_metric.shape[0] == 0 or \
379  (scoring_metric['epochs'].max() >=
380  model['parameters']['epochs']['value']):
381  epochs = model['parameters']['epochs']['value'] * epochs_increment
382  else:
383  epochs = model['parameters']['epochs']['value']
384 
385  if deepness == 2:
386  for learning in rho_conf:
387  new_armetadata = armetadata.copy_template()
388  model_aux = new_armetadata['model_parameters']['h2o']
389  model_aux['parameters']['rho']['value'] = learning['learn']
390  model_aux['parameters']['epsilon']['value'] = learning['improvement']
391  model_aux['parameters']['epochs']['value'] = epochs
392  model_list.append(new_armetadata)
393 
394  if deepness == 3:
395  new_armetadata = armetadata.copy_template()
396  model_aux = new_armetadata['model_parameters']['h2o']
397  model_aux['parameters']['sparse']['value'] = not model_aux['parameters']['sparse']['value']
398  model_aux['parameters']['epochs']['value'] = epochs
399  model_list.append(new_armetadata)
400  if deepness > 1 and model['parameters']['activation']['value'] == "rectifier_with_dropout":
401  new_armetadata = armetadata.copy_template()
402  model_aux = new_armetadata['model_parameters']['h2o']
403  model_aux['parameters']['activation']['value'] = 'tanh_with_dropout'
404  model_aux['parameters']['epochs']['value'] = epochs
405  model_list.append(new_armetadata)
406  if deepness == 3 and model['parameters']['initial_weight_distribution']['value'] == "normal":
407  new_armetadata = armetadata.copy_template()
408  model_aux = new_armetadata['model_parameters']['h2o']
409  model_aux['parameters']['initial_weight_distribution']['value'] = "uniform"
410  model_aux['parameters']['epochs']['value'] = epochs
411  model_list.append(new_armetadata)
412  elif deepness == 3:
413  new_armetadata = armetadata.copy_template()
414  model_aux = new_armetadata['model_parameters']['h2o']
415  model_aux['parameters']['initial_weight_distribution']['value'] = "normal"
416  model_aux['parameters']['epochs']['value'] = epochs
417  model_list.append(new_armetadata)
418 
419  if deepness <= deep_impact:
420  new_armetadata = armetadata.copy_template()
421  model_aux = new_armetadata['model_parameters']['h2o']
422 
423  for iterador in range(0, len(model_aux['parameters']['hidden']['value'])):
424  if iterador != int((float(len(model_aux['parameters']['hidden']['value'])) / 2) - 0.5):
425  model_aux['parameters']['hidden']['value'][iterador] = \
426  int(round(model_aux['parameters']['hidden']['value'][iterador] * wider_increment, 0))
427  model_aux['parameters']['epochs']['value'] = epochs
428  model_list.append(new_armetadata)
429  if len(model_aux['parameters']['hidden']['value']) < 5:
430  new_armetadata = armetadata.copy_template()
431  model_aux = new_armetadata['model_parameters']['h2o']
432  next_hidden = int(
433  round(model_aux['parameters']['hidden']['value'][0] * deeper_increment, 0))
434  model_aux['parameters']['hidden']['value'].insert(0, next_hidden)
435  model_aux['parameters']['hidden_dropout_ratios']['value'].insert(0, h_dropout_ratio)
436  model_aux['parameters']['hidden']['value'].append(next_hidden)
437  model_aux['parameters']['hidden_dropout_ratios']['value'].append(h_dropout_ratio)
438  model_aux['parameters']['epochs']['value'] = epochs
439  model_list.append(new_armetadata)
440 
441  if model['parameters']['mini_batch_size']['value'] >= dpl_min_batch_size:
442  new_armetadata = armetadata.copy_template()
443  model_aux = new_armetadata['model_parameters']['h2o']
444  model_aux['parameters']['mini_batch_size']['value'] = \
445  round(model_aux['parameters']['mini_batch_size']['value'] / dpl_batch_reduced_divisor)
446  model_aux['parameters']['epochs']['value'] = epochs
447  model_list.append(new_armetadata)
448 
449  elif model['model'] == 'H2OKMeansEstimator':
450  if scoring_metric.shape[0] == 0 or \
451  (int(scoring_metric['number_of_reassigned_observations'][-1:]) >= 0):
452  new_armetadata = armetadata.copy_template()
453  model_aux = new_armetadata['model_parameters']['h2o']
454  model_aux['parameters']['max_iterations']['value'] = \
455  int(model_aux['parameters']['max_iterations']['value'] * clustering_increment)
456  model_list.append(new_armetadata)
457  except KeyError:
458  return None
459  else:
460  return None
461  if len(model_list) == 0:
462  return None
463  else:
464  return model_list
Class Getting the config file place on default location and load all parameters on an internal variab...
Definition: loadconfig.py:22
def optimize_models(self, armetadata, metric_value, objective, deepness, deep_impact)
Method manging generation of possible optimized H2O models loadded dinamically on adviserclass params...
Define all objects, functions and structs related to common utilities not associated to one concrete ...
Definition: utils.py:1
def __init__(self, e_c)
Constructor Initialize all framework variables and starts or connect to spark cluster Aditionally sta...
Define all objects, functions and structured related to manage Model Parameters Structure: OrderedDic...
Define all objects, functions and structs related to load on system all configuration parameter from ...
Definition: loadconfig.py:1