DayF core  1.2.1.2
DayF (Decision at your Fingertips) is an AutoML freeware development framework that let developers works with Machine Learning models without any idea of AI, simply taking a csv dataset and the objective column
spark_optimizer.py
1 
5 
6 from gdayf.common.utils import get_model_fw
7 from gdayf.conf.loadconfig import LoadConfig
8 from gdayf.common.utils import decode_ordered_dict_to_dataframe
9 
10 class Optimizer(object):
11 
12 
17  def __init__(self, e_c):
18  self._ec = e_c
19  self._labels = self._ec.labels.get_config()['messages']['adviser']
20  self._config = LoadConfig().get_config()['optimizer']['AdviserStart_rules']['spark']
21 
22 
30  def optimize_models(self, armetadata, metric_value, objective, deepness, deep_impact):
31  model_list = list()
32  model = armetadata['model_parameters'][get_model_fw(armetadata)]
33  config = self._config
34 
35  if get_model_fw(armetadata) == 'spark' and metric_value != objective \
36  and armetadata['status'] != self._labels['failed_op']:
37  try:
38  scoring_metric = decode_ordered_dict_to_dataframe(armetadata['metrics']['scoring'])
39  except ValueError:
40  print("TRACE: Not scoring: " + model)
41  min_rows_limit = config['min_rows_limit']
42  min_rows_increment = config['min_rows_increment']
43  max_interactions_increment = config['max_interactions_increment']
44  interactions_increment = config['interactions_increment']
45  max_depth_increment = config['max_depth_increment']
46  ntrees_increment = config['ntrees_increment']
47  stepSize = config['stepSize']
48  aggregationDepth_increment = config['aggregationDepth_increment']
49  regParam = config['regParam']
50  elastic_variation = config['elastic_variation']
51  nv_smoothing = config['nv_smoothing']
52  nv_improvement = config['nv_improvement']
53  nv_divisor = config['nv_divisor']
54  clustering_increment = config['clustering_increment']
55  initstep_increment = config['initstep_increment']
56 
57  if model['model'] == 'LinearSVC':
58  if deepness == 2 and len(regParam) != 0:
59  for elastic in regParam:
60  new_armetadata = armetadata.copy_template()
61  model_aux = new_armetadata['model_parameters']['spark']
62  model_aux['parameters']['regParam']['value'] = elastic['value']
63  model_list.append(new_armetadata)
64 
65  try:
66  if model['parameters']['maxIter']['value'] \
67  >= scoring_metric['totalIterations'][0] and \
68  scoring_metric['totalIterations'][0] <= max_interactions_increment:
69  new_armetadata = armetadata.copy_template()
70  model_aux = new_armetadata['model_parameters']['spark']
71  model_aux['parameters']['maxIter']['value'] *= interactions_increment
72  model_list.append(new_armetadata)
73  except KeyError:
74  if model['parameters']['maxIter']['value'] \
75  <= max_interactions_increment:
76  new_armetadata = armetadata.copy_template()
77  model_aux = new_armetadata['model_parameters']['spark']
78  model_aux['parameters']['maxIter']['value'] *= interactions_increment
79  model_list.append(new_armetadata)
80 
81  new_armetadata = armetadata.copy_template()
82  model_aux = new_armetadata['model_parameters']['spark']
83  model_aux['parameters']['aggregationDepth']['value'] *= aggregationDepth_increment
84  model_list.append(new_armetadata)
85 
86  elif model['model'] == 'LogisticRegression' or model['model'] == 'LinearRegression':
87  if deepness == 2 and len(regParam) != 0:
88  for elastic in regParam:
89  new_armetadata = armetadata.copy_template()
90  model_aux = new_armetadata['model_parameters']['spark']
91  model_aux['parameters']['regParam']['value'] = elastic['value']
92  model_list.append(new_armetadata)
93 
94  if model['parameters']['elasticNetParam']['value'] \
95  * (1 + elastic_variation) <= 1.0:
96  new_armetadata = armetadata.copy_template()
97  model_aux = new_armetadata['model_parameters']['spark']
98  model_aux['parameters']['elasticNetParam']['value'] = \
99  model_aux['parameters']['elasticNetParam']['value'] * (1 + elastic_variation)
100  model_list.append(new_armetadata)
101  if model['parameters']['elasticNetParam']['value'] \
102  * (1 - elastic_variation) >= 0.0:
103  new_armetadata = armetadata.copy_template()
104  model_aux = new_armetadata['model_parameters']['spark']
105  model_aux['parameters']['elasticNetParam']['value'] = \
106  model_aux['parameters']['elasticNetParam']['value'] * (1 + elastic_variation)
107  model_list.append(new_armetadata)
108 
109  try:
110  if model['parameters']['maxIter']['value'] \
111  >= scoring_metric['totalIterations'][0] and \
112  scoring_metric['totalIterations'][0] <= max_interactions_increment:
113  new_armetadata = armetadata.copy_template()
114  model_aux = new_armetadata['model_parameters']['spark']
115  model_aux['parameters']['maxIter']['value'] *= interactions_increment
116  model_list.append(new_armetadata)
117  except KeyError:
118  if model['parameters']['maxIter']['value'] \
119  <= max_interactions_increment:
120  new_armetadata = armetadata.copy_template()
121  model_aux = new_armetadata['model_parameters']['spark']
122  model_aux['parameters']['maxIter']['value'] *= interactions_increment
123  model_list.append(new_armetadata)
124 
125  new_armetadata = armetadata.copy_template()
126  model_aux = new_armetadata['model_parameters']['spark']
127  model_aux['parameters']['aggregationDepth']['value'] *= aggregationDepth_increment
128  model_list.append(new_armetadata)
129 
130  elif model['model'] == 'DecisionTreeClassifier' or model['model'] == 'DecisionTreeRegressor':
131 
132  if model['parameters']['minInstancesPerNode']['value'] > (min_rows_limit / 2):
133  new_armetadata = armetadata.copy_template()
134  model_aux = new_armetadata['model_parameters']['spark']
135  model_aux['parameters']['minInstancesPerNode']['value'] = round(
136  model_aux['parameters']['minInstancesPerNode']['value']
137  / min_rows_increment, 0)
138  model_list.append(new_armetadata)
139 
140  if scoring_metric['max_depth'][0] >= model['parameters']['maxDepth']['value']:
141  new_armetadata = armetadata.copy_template()
142  model_aux = new_armetadata['model_parameters']['spark']
143  model_aux['parameters']['maxDepth']['value'] = \
144  model_aux['parameters']['maxDepth']['value'] * max_depth_increment
145  model_list.append(new_armetadata)
146 
147  elif model['model'] == 'GBTRegressor':
148  if deepness == 2 and len(stepSize) != 0 and len(eval(model['parameters']['lossType']['type'])) != 0:
149  for stepsize in stepSize:
150  for element in eval(model['parameters']['lossType']['type']):
151  new_armetadata = armetadata.copy_template()
152  model_aux = new_armetadata['model_parameters']['spark']
153  model_aux['parameters']['lossType']['value'] = element
154  model_aux['parameters']['stepSize']['value'] = stepsize['learn']
155  model_list.append(new_armetadata)
156  elif deepness == 2 and len(stepSize) != 0:
157  for stepsize in stepSize:
158  new_armetadata = armetadata.copy_template()
159  model_aux = new_armetadata['model_parameters']['spark']
160  model_aux['parameters']['stepSize']['value'] = stepsize['learn']
161  model_list.append(new_armetadata)
162  elif deepness == 2 and len(eval(model['parameters']['lossType']['type'])) != 0:
163  for element in eval(model['parameters']['lossType']['type']):
164  new_armetadata = armetadata.copy_template()
165  model_aux = new_armetadata['model_parameters']['spark']
166  model_aux['parameters']['impurity']['value'] = element
167  model_list.append(new_armetadata)
168 
169  if model['parameters']['minInstancesPerNode']['value'] > (min_rows_limit / 2):
170  new_armetadata = armetadata.copy_template()
171  model_aux = new_armetadata['model_parameters']['spark']
172  model_aux['parameters']['minInstancesPerNode']['value'] = round(
173  model_aux['parameters']['minInstancesPerNode']['value']
174  / min_rows_increment, 0)
175  model_list.append(new_armetadata)
176 
177  # 05/07/2018. Included platform base restriction maxDepth <=30
178  if scoring_metric['max_depth'][0] >= model['parameters']['maxDepth']['value'] \
179  and model['parameters']['maxDepth']['value'] != 30:
180  new_armetadata = armetadata.copy_template()
181  model_aux = new_armetadata['model_parameters']['spark']
182  if model_aux['parameters']['maxDepth']['value'] * max_depth_increment > 30:
183  model_aux['parameters']['maxDepth']['value'] = 30
184  else:
185  model_aux['parameters']['maxDepth']['value'] *= max_depth_increment
186 
187  model_list.append(new_armetadata)
188 
189  if scoring_metric['trees'][0] >= model['parameters']['maxIter']['value']:
190  new_armetadata = armetadata.copy_template()
191  model_aux = new_armetadata['model_parameters']['spark']
192  model_aux['parameters']['maxIter']['value'] *= ntrees_increment
193  model_list.append(new_armetadata)
194 
195  elif model['model'] == 'GBTClassifier':
196  if deepness == 2 and len(stepSize) != 0 and len(eval(model['parameters']['lossType']['type'])) != 0:
197  for stepsize in stepSize:
198  for element in eval(model['parameters']['lossType']['type']):
199  new_armetadata = armetadata.copy_template()
200  model_aux = new_armetadata['model_parameters']['spark']
201  model_aux['parameters']['lossType']['value'] = element
202  model_aux['parameters']['stepSize']['value'] = stepsize['learn']
203  model_list.append(new_armetadata)
204  elif deepness == 2 and len(stepSize) != 0:
205  for stepsize in stepSize:
206  new_armetadata = armetadata.copy_template()
207  model_aux = new_armetadata['model_parameters']['spark']
208  model_aux['parameters']['stepSize']['value'] = stepsize['learn']
209  model_list.append(new_armetadata)
210  elif deepness == 2 and len(eval(model['parameters']['impurity']['type'])) != 0:
211  for element in eval(model['parameters']['lossType']['type']):
212  new_armetadata = armetadata.copy_template()
213  model_aux = new_armetadata['model_parameters']['spark']
214  model_aux['parameters']['lossType']['value'] = element
215  model_list.append(new_armetadata)
216 
217  if model['parameters']['minInstancesPerNode']['value'] > (min_rows_limit / 2):
218  new_armetadata = armetadata.copy_template()
219  model_aux = new_armetadata['model_parameters']['spark']
220  model_aux['parameters']['minInstancesPerNode']['value'] = round(
221  model_aux['parameters']['minInstancesPerNode']['value']
222  / min_rows_increment, 0)
223  model_list.append(new_armetadata)
224 
225  if scoring_metric['max_depth'][0] >= model['parameters']['maxDepth']['value']:
226  new_armetadata = armetadata.copy_template()
227  model_aux = new_armetadata['model_parameters']['spark']
228  model_aux['parameters']['maxDepth']['value'] *= max_depth_increment
229  model_list.append(new_armetadata)
230 
231  if scoring_metric['trees'][0] >= model['parameters']['maxIter']['value']:
232  new_armetadata = armetadata.copy_template()
233  model_aux = new_armetadata['model_parameters']['spark']
234  model_aux['parameters']['maxIter']['value'] *= ntrees_increment
235  model_list.append(new_armetadata)
236 
237  elif model['model'] == 'RandomForestClassifier' or model['model'] == 'RandomForestRegressor':
238 
239  if deepness == 2 and len(eval(model['parameters']['featureSubsetStrategy']['type'])) != 0 \
240  and len(eval(model['parameters']['impurity']['type'])) != 0:
241  for featuresubsetstrategy in eval(model['parameters']['featureSubsetStrategy']['type']):
242  for element in eval(model['parameters']['impurity']['type']):
243  new_armetadata = armetadata.copy_template()
244  model_aux = new_armetadata['model_parameters']['spark']
245  model_aux['parameters']['impurity']['value'] = element
246  model_aux['parameters']['featureSubsetStrategy']['value'] = featuresubsetstrategy
247  model_list.append(new_armetadata)
248  elif deepness == 2 and len(eval(model['parameters']['featureSubsetStrategy']['type'])) != 0:
249  for featuresubsetstrategy in eval(model['parameters']['featureSubsetStrategy']['type']):
250  new_armetadata = armetadata.copy_template()
251  model_aux = new_armetadata['model_parameters']['spark']
252  model_aux['parameters']['featureSubsetStrategy']['value'] = featuresubsetstrategy
253  model_list.append(new_armetadata)
254  elif deepness == 2 and len(eval(model['parameters']['impurity']['type'])) != 0:
255  for element in model['parameters']['impurity']['type']:
256  new_armetadata = armetadata.copy_template()
257  model_aux = new_armetadata['model_parameters']['spark']
258  model_aux['parameters']['impurity']['value'] = element
259  model_list.append(new_armetadata)
260 
261  if model['parameters']['minInstancesPerNode']['value'] > (min_rows_limit / 2):
262  new_armetadata = armetadata.copy_template()
263  model_aux = new_armetadata['model_parameters']['spark']
264  model_aux['parameters']['minInstancesPerNode']['value'] = round(
265  model_aux['parameters']['minInstancesPerNode']['value']
266  / min_rows_increment, 0)
267  model_list.append(new_armetadata)
268 
269  if scoring_metric['max_depth'][0] >= model['parameters']['maxDepth']['value']:
270  new_armetadata = armetadata.copy_template()
271  model_aux = new_armetadata['model_parameters']['spark']
272  model_aux['parameters']['maxDepth']['value'] *= max_depth_increment
273  model_list.append(new_armetadata)
274 
275  if scoring_metric['trees'][0] >= model['parameters']['numTrees']['value']:
276  new_armetadata = armetadata.copy_template()
277  model_aux = new_armetadata['model_parameters']['spark']
278  model_aux['parameters']['numTrees']['value'] *= ntrees_increment
279  model_list.append(new_armetadata)
280 
281  elif model['model'] == 'GeneralizedLinearRegression':
282  if deepness == 2 and len(regParam) != 0:
283  for elastic in regParam:
284  new_armetadata = armetadata.copy_template()
285  model_aux = new_armetadata['model_parameters']['spark']
286  model_aux['parameters']['regParam']['value'] = elastic['value']
287  model_list.append(new_armetadata)
288  if deepness == 2:
289  if model['parameters']['family']['value'] in ['gaussian', 'gamma']:
290  linklist = ['log', 'inverse']
291  elif model['parameters']['family']['value'] in ['poisson']:
292  linklist = ['log', 'sqrt']
293  elif model['parameters']['family']['value'] in ['poisson', 'tweedie']:
294  linklist = []
295  for linkin in linklist:
296  new_armetadata = armetadata.copy_template()
297  model_aux = new_armetadata['model_parameters']['spark']
298  model_aux['parameters']['link']['value'] = linkin
299  model_list.append(new_armetadata)
300 
301  if model['parameters']['maxIter']['value'] \
302  <= max_interactions_increment:
303  new_armetadata = armetadata.copy_template()
304  model_aux = new_armetadata['model_parameters']['spark']
305  model_aux['parameters']['maxIter']['value'] *= interactions_increment
306  model_list.append(new_armetadata)
307 
308  elif model['model'] == 'NaiveBayes':
309  if deepness == 2 and len(nv_smoothing) != 0:
310  for elastic in nv_smoothing:
311  new_armetadata = armetadata.copy_template()
312  model_aux = new_armetadata['model_parameters']['spark']
313  model_aux['parameters']['smoothing']['value'] = elastic['value']
314  model_list.append(new_armetadata)
315 
316  for adjusting in ['improvement', 'decrement']:
317  new_armetadata = armetadata.copy_template()
318  model_aux = new_armetadata['model_parameters']['spark']
319  if adjusting == 'improvement':
320  model_aux['parameters']['smoothing']['value'] = model_aux['parameters']['smoothing'][
321  'value'] * (1 + nv_improvement)
322  else:
323  model_aux['parameters']['smoothing']['value'] = model_aux['parameters']['smoothing'][
324  'value'] * (1 - nv_divisor)
325  model_list.append(new_armetadata)
326 
327  elif model['model'] == 'BisectingKMeans':
328 
329  new_armetadata = armetadata.copy_template()
330  model_aux = new_armetadata['model_parameters']['spark']
331  model_aux['parameters']['maxIter']['value'] = \
332  int(model_aux['parameters']['maxIter']['value'] * clustering_increment)
333  model_list.append(new_armetadata)
334 
335  elif model['model'] == 'KMeans':
336 
337  if deepness == 2 and len(eval(model['parameters']['initMode']['type'])) != 0:
338  for element in eval(model['parameters']['initMode']['type']):
339  new_armetadata = armetadata.copy_template()
340  model_aux = new_armetadata['model_parameters']['spark']
341  model_aux['parameters']['initMode']['value'] = element
342  model_list.append(new_armetadata)
343 
344  new_armetadata = armetadata.copy_template()
345  model_aux = new_armetadata['model_parameters']['spark']
346  model_aux['parameters']['maxIter']['value'] = \
347  int(model_aux['parameters']['maxIter']['value'] * clustering_increment)
348  model_list.append(new_armetadata)
349 
350  else:
351  return None
352 
353  if len(model_list) == 0:
354  return None
355  else:
356  return model_list
Class Getting the config file place on default location and load all parameters on an internal variab...
Definition: loadconfig.py:22
Define all objects, functions and structs related to common utilities not associated to one concrete ...
Definition: utils.py:1
def __init__(self, e_c)
Constructor Initialize all framework variables and starts or connect to spark cluster Aditionally sta...
def optimize_models(self, armetadata, metric_value, objective, deepness, deep_impact)
Method manging generation of possible optimized H2O models loadded dinamically on adviserclass params...
Define all objects, functions and structs related to load on system all configuration parameter from ...
Definition: loadconfig.py:1