19 self.
_labels = self.
_ec.labels.get_config()[
'messages'][
'adviser']
20 self.
_config =
LoadConfig().get_config()[
'optimizer'][
'AdviserStart_rules'][
'spark']
30 def optimize_models(self, armetadata, metric_value, objective, deepness, deep_impact):
32 model = armetadata[
'model_parameters'][get_model_fw(armetadata)]
35 if get_model_fw(armetadata) ==
'spark' and metric_value != objective \
36 and armetadata[
'status'] != self.
_labels[
'failed_op']:
38 scoring_metric = decode_ordered_dict_to_dataframe(armetadata[
'metrics'][
'scoring'])
40 print(
"TRACE: Not scoring: " + model)
41 min_rows_limit = config[
'min_rows_limit']
42 min_rows_increment = config[
'min_rows_increment']
43 max_interactions_increment = config[
'max_interactions_increment']
44 interactions_increment = config[
'interactions_increment']
45 max_depth_increment = config[
'max_depth_increment']
46 ntrees_increment = config[
'ntrees_increment']
47 stepSize = config[
'stepSize']
48 aggregationDepth_increment = config[
'aggregationDepth_increment']
49 regParam = config[
'regParam']
50 elastic_variation = config[
'elastic_variation']
51 nv_smoothing = config[
'nv_smoothing']
52 nv_improvement = config[
'nv_improvement']
53 nv_divisor = config[
'nv_divisor']
54 clustering_increment = config[
'clustering_increment']
55 initstep_increment = config[
'initstep_increment']
57 if model[
'model'] ==
'LinearSVC':
58 if deepness == 2
and len(regParam) != 0:
59 for elastic
in regParam:
60 new_armetadata = armetadata.copy_template()
61 model_aux = new_armetadata[
'model_parameters'][
'spark']
62 model_aux[
'parameters'][
'regParam'][
'value'] = elastic[
'value']
63 model_list.append(new_armetadata)
66 if model[
'parameters'][
'maxIter'][
'value'] \
67 >= scoring_metric[
'totalIterations'][0]
and \
68 scoring_metric[
'totalIterations'][0] <= max_interactions_increment:
69 new_armetadata = armetadata.copy_template()
70 model_aux = new_armetadata[
'model_parameters'][
'spark']
71 model_aux[
'parameters'][
'maxIter'][
'value'] *= interactions_increment
72 model_list.append(new_armetadata)
74 if model[
'parameters'][
'maxIter'][
'value'] \
75 <= max_interactions_increment:
76 new_armetadata = armetadata.copy_template()
77 model_aux = new_armetadata[
'model_parameters'][
'spark']
78 model_aux[
'parameters'][
'maxIter'][
'value'] *= interactions_increment
79 model_list.append(new_armetadata)
81 new_armetadata = armetadata.copy_template()
82 model_aux = new_armetadata[
'model_parameters'][
'spark']
83 model_aux[
'parameters'][
'aggregationDepth'][
'value'] *= aggregationDepth_increment
84 model_list.append(new_armetadata)
86 elif model[
'model'] ==
'LogisticRegression' or model[
'model'] ==
'LinearRegression':
87 if deepness == 2
and len(regParam) != 0:
88 for elastic
in regParam:
89 new_armetadata = armetadata.copy_template()
90 model_aux = new_armetadata[
'model_parameters'][
'spark']
91 model_aux[
'parameters'][
'regParam'][
'value'] = elastic[
'value']
92 model_list.append(new_armetadata)
94 if model[
'parameters'][
'elasticNetParam'][
'value'] \
95 * (1 + elastic_variation) <= 1.0:
96 new_armetadata = armetadata.copy_template()
97 model_aux = new_armetadata[
'model_parameters'][
'spark']
98 model_aux[
'parameters'][
'elasticNetParam'][
'value'] = \
99 model_aux[
'parameters'][
'elasticNetParam'][
'value'] * (1 + elastic_variation)
100 model_list.append(new_armetadata)
101 if model[
'parameters'][
'elasticNetParam'][
'value'] \
102 * (1 - elastic_variation) >= 0.0:
103 new_armetadata = armetadata.copy_template()
104 model_aux = new_armetadata[
'model_parameters'][
'spark']
105 model_aux[
'parameters'][
'elasticNetParam'][
'value'] = \
106 model_aux[
'parameters'][
'elasticNetParam'][
'value'] * (1 + elastic_variation)
107 model_list.append(new_armetadata)
110 if model[
'parameters'][
'maxIter'][
'value'] \
111 >= scoring_metric[
'totalIterations'][0]
and \
112 scoring_metric[
'totalIterations'][0] <= max_interactions_increment:
113 new_armetadata = armetadata.copy_template()
114 model_aux = new_armetadata[
'model_parameters'][
'spark']
115 model_aux[
'parameters'][
'maxIter'][
'value'] *= interactions_increment
116 model_list.append(new_armetadata)
118 if model[
'parameters'][
'maxIter'][
'value'] \
119 <= max_interactions_increment:
120 new_armetadata = armetadata.copy_template()
121 model_aux = new_armetadata[
'model_parameters'][
'spark']
122 model_aux[
'parameters'][
'maxIter'][
'value'] *= interactions_increment
123 model_list.append(new_armetadata)
125 new_armetadata = armetadata.copy_template()
126 model_aux = new_armetadata[
'model_parameters'][
'spark']
127 model_aux[
'parameters'][
'aggregationDepth'][
'value'] *= aggregationDepth_increment
128 model_list.append(new_armetadata)
130 elif model[
'model'] ==
'DecisionTreeClassifier' or model[
'model'] ==
'DecisionTreeRegressor':
132 if model[
'parameters'][
'minInstancesPerNode'][
'value'] > (min_rows_limit / 2):
133 new_armetadata = armetadata.copy_template()
134 model_aux = new_armetadata[
'model_parameters'][
'spark']
135 model_aux[
'parameters'][
'minInstancesPerNode'][
'value'] = round(
136 model_aux[
'parameters'][
'minInstancesPerNode'][
'value']
137 / min_rows_increment, 0)
138 model_list.append(new_armetadata)
140 if scoring_metric[
'max_depth'][0] >= model[
'parameters'][
'maxDepth'][
'value']:
141 new_armetadata = armetadata.copy_template()
142 model_aux = new_armetadata[
'model_parameters'][
'spark']
143 model_aux[
'parameters'][
'maxDepth'][
'value'] = \
144 model_aux[
'parameters'][
'maxDepth'][
'value'] * max_depth_increment
145 model_list.append(new_armetadata)
147 elif model[
'model'] ==
'GBTRegressor':
148 if deepness == 2
and len(stepSize) != 0
and len(eval(model[
'parameters'][
'lossType'][
'type'])) != 0:
149 for stepsize
in stepSize:
150 for element
in eval(model[
'parameters'][
'lossType'][
'type']):
151 new_armetadata = armetadata.copy_template()
152 model_aux = new_armetadata[
'model_parameters'][
'spark']
153 model_aux[
'parameters'][
'lossType'][
'value'] = element
154 model_aux[
'parameters'][
'stepSize'][
'value'] = stepsize[
'learn']
155 model_list.append(new_armetadata)
156 elif deepness == 2
and len(stepSize) != 0:
157 for stepsize
in stepSize:
158 new_armetadata = armetadata.copy_template()
159 model_aux = new_armetadata[
'model_parameters'][
'spark']
160 model_aux[
'parameters'][
'stepSize'][
'value'] = stepsize[
'learn']
161 model_list.append(new_armetadata)
162 elif deepness == 2
and len(eval(model[
'parameters'][
'lossType'][
'type'])) != 0:
163 for element
in eval(model[
'parameters'][
'lossType'][
'type']):
164 new_armetadata = armetadata.copy_template()
165 model_aux = new_armetadata[
'model_parameters'][
'spark']
166 model_aux[
'parameters'][
'impurity'][
'value'] = element
167 model_list.append(new_armetadata)
169 if model[
'parameters'][
'minInstancesPerNode'][
'value'] > (min_rows_limit / 2):
170 new_armetadata = armetadata.copy_template()
171 model_aux = new_armetadata[
'model_parameters'][
'spark']
172 model_aux[
'parameters'][
'minInstancesPerNode'][
'value'] = round(
173 model_aux[
'parameters'][
'minInstancesPerNode'][
'value']
174 / min_rows_increment, 0)
175 model_list.append(new_armetadata)
178 if scoring_metric[
'max_depth'][0] >= model[
'parameters'][
'maxDepth'][
'value'] \
179 and model[
'parameters'][
'maxDepth'][
'value'] != 30:
180 new_armetadata = armetadata.copy_template()
181 model_aux = new_armetadata[
'model_parameters'][
'spark']
182 if model_aux[
'parameters'][
'maxDepth'][
'value'] * max_depth_increment > 30:
183 model_aux[
'parameters'][
'maxDepth'][
'value'] = 30
185 model_aux[
'parameters'][
'maxDepth'][
'value'] *= max_depth_increment
187 model_list.append(new_armetadata)
189 if scoring_metric[
'trees'][0] >= model[
'parameters'][
'maxIter'][
'value']:
190 new_armetadata = armetadata.copy_template()
191 model_aux = new_armetadata[
'model_parameters'][
'spark']
192 model_aux[
'parameters'][
'maxIter'][
'value'] *= ntrees_increment
193 model_list.append(new_armetadata)
195 elif model[
'model'] ==
'GBTClassifier':
196 if deepness == 2
and len(stepSize) != 0
and len(eval(model[
'parameters'][
'lossType'][
'type'])) != 0:
197 for stepsize
in stepSize:
198 for element
in eval(model[
'parameters'][
'lossType'][
'type']):
199 new_armetadata = armetadata.copy_template()
200 model_aux = new_armetadata[
'model_parameters'][
'spark']
201 model_aux[
'parameters'][
'lossType'][
'value'] = element
202 model_aux[
'parameters'][
'stepSize'][
'value'] = stepsize[
'learn']
203 model_list.append(new_armetadata)
204 elif deepness == 2
and len(stepSize) != 0:
205 for stepsize
in stepSize:
206 new_armetadata = armetadata.copy_template()
207 model_aux = new_armetadata[
'model_parameters'][
'spark']
208 model_aux[
'parameters'][
'stepSize'][
'value'] = stepsize[
'learn']
209 model_list.append(new_armetadata)
210 elif deepness == 2
and len(eval(model[
'parameters'][
'impurity'][
'type'])) != 0:
211 for element
in eval(model[
'parameters'][
'lossType'][
'type']):
212 new_armetadata = armetadata.copy_template()
213 model_aux = new_armetadata[
'model_parameters'][
'spark']
214 model_aux[
'parameters'][
'lossType'][
'value'] = element
215 model_list.append(new_armetadata)
217 if model[
'parameters'][
'minInstancesPerNode'][
'value'] > (min_rows_limit / 2):
218 new_armetadata = armetadata.copy_template()
219 model_aux = new_armetadata[
'model_parameters'][
'spark']
220 model_aux[
'parameters'][
'minInstancesPerNode'][
'value'] = round(
221 model_aux[
'parameters'][
'minInstancesPerNode'][
'value']
222 / min_rows_increment, 0)
223 model_list.append(new_armetadata)
225 if scoring_metric[
'max_depth'][0] >= model[
'parameters'][
'maxDepth'][
'value']:
226 new_armetadata = armetadata.copy_template()
227 model_aux = new_armetadata[
'model_parameters'][
'spark']
228 model_aux[
'parameters'][
'maxDepth'][
'value'] *= max_depth_increment
229 model_list.append(new_armetadata)
231 if scoring_metric[
'trees'][0] >= model[
'parameters'][
'maxIter'][
'value']:
232 new_armetadata = armetadata.copy_template()
233 model_aux = new_armetadata[
'model_parameters'][
'spark']
234 model_aux[
'parameters'][
'maxIter'][
'value'] *= ntrees_increment
235 model_list.append(new_armetadata)
237 elif model[
'model'] ==
'RandomForestClassifier' or model[
'model'] ==
'RandomForestRegressor':
239 if deepness == 2
and len(eval(model[
'parameters'][
'featureSubsetStrategy'][
'type'])) != 0 \
240 and len(eval(model[
'parameters'][
'impurity'][
'type'])) != 0:
241 for featuresubsetstrategy
in eval(model[
'parameters'][
'featureSubsetStrategy'][
'type']):
242 for element
in eval(model[
'parameters'][
'impurity'][
'type']):
243 new_armetadata = armetadata.copy_template()
244 model_aux = new_armetadata[
'model_parameters'][
'spark']
245 model_aux[
'parameters'][
'impurity'][
'value'] = element
246 model_aux[
'parameters'][
'featureSubsetStrategy'][
'value'] = featuresubsetstrategy
247 model_list.append(new_armetadata)
248 elif deepness == 2
and len(eval(model[
'parameters'][
'featureSubsetStrategy'][
'type'])) != 0:
249 for featuresubsetstrategy
in eval(model[
'parameters'][
'featureSubsetStrategy'][
'type']):
250 new_armetadata = armetadata.copy_template()
251 model_aux = new_armetadata[
'model_parameters'][
'spark']
252 model_aux[
'parameters'][
'featureSubsetStrategy'][
'value'] = featuresubsetstrategy
253 model_list.append(new_armetadata)
254 elif deepness == 2
and len(eval(model[
'parameters'][
'impurity'][
'type'])) != 0:
255 for element
in model[
'parameters'][
'impurity'][
'type']:
256 new_armetadata = armetadata.copy_template()
257 model_aux = new_armetadata[
'model_parameters'][
'spark']
258 model_aux[
'parameters'][
'impurity'][
'value'] = element
259 model_list.append(new_armetadata)
261 if model[
'parameters'][
'minInstancesPerNode'][
'value'] > (min_rows_limit / 2):
262 new_armetadata = armetadata.copy_template()
263 model_aux = new_armetadata[
'model_parameters'][
'spark']
264 model_aux[
'parameters'][
'minInstancesPerNode'][
'value'] = round(
265 model_aux[
'parameters'][
'minInstancesPerNode'][
'value']
266 / min_rows_increment, 0)
267 model_list.append(new_armetadata)
269 if scoring_metric[
'max_depth'][0] >= model[
'parameters'][
'maxDepth'][
'value']:
270 new_armetadata = armetadata.copy_template()
271 model_aux = new_armetadata[
'model_parameters'][
'spark']
272 model_aux[
'parameters'][
'maxDepth'][
'value'] *= max_depth_increment
273 model_list.append(new_armetadata)
275 if scoring_metric[
'trees'][0] >= model[
'parameters'][
'numTrees'][
'value']:
276 new_armetadata = armetadata.copy_template()
277 model_aux = new_armetadata[
'model_parameters'][
'spark']
278 model_aux[
'parameters'][
'numTrees'][
'value'] *= ntrees_increment
279 model_list.append(new_armetadata)
281 elif model[
'model'] ==
'GeneralizedLinearRegression':
282 if deepness == 2
and len(regParam) != 0:
283 for elastic
in regParam:
284 new_armetadata = armetadata.copy_template()
285 model_aux = new_armetadata[
'model_parameters'][
'spark']
286 model_aux[
'parameters'][
'regParam'][
'value'] = elastic[
'value']
287 model_list.append(new_armetadata)
289 if model[
'parameters'][
'family'][
'value']
in [
'gaussian',
'gamma']:
290 linklist = [
'log',
'inverse']
291 elif model[
'parameters'][
'family'][
'value']
in [
'poisson']:
292 linklist = [
'log',
'sqrt']
293 elif model[
'parameters'][
'family'][
'value']
in [
'poisson',
'tweedie']:
295 for linkin
in linklist:
296 new_armetadata = armetadata.copy_template()
297 model_aux = new_armetadata[
'model_parameters'][
'spark']
298 model_aux[
'parameters'][
'link'][
'value'] = linkin
299 model_list.append(new_armetadata)
301 if model[
'parameters'][
'maxIter'][
'value'] \
302 <= max_interactions_increment:
303 new_armetadata = armetadata.copy_template()
304 model_aux = new_armetadata[
'model_parameters'][
'spark']
305 model_aux[
'parameters'][
'maxIter'][
'value'] *= interactions_increment
306 model_list.append(new_armetadata)
308 elif model[
'model'] ==
'NaiveBayes':
309 if deepness == 2
and len(nv_smoothing) != 0:
310 for elastic
in nv_smoothing:
311 new_armetadata = armetadata.copy_template()
312 model_aux = new_armetadata[
'model_parameters'][
'spark']
313 model_aux[
'parameters'][
'smoothing'][
'value'] = elastic[
'value']
314 model_list.append(new_armetadata)
316 for adjusting
in [
'improvement',
'decrement']:
317 new_armetadata = armetadata.copy_template()
318 model_aux = new_armetadata[
'model_parameters'][
'spark']
319 if adjusting ==
'improvement':
320 model_aux[
'parameters'][
'smoothing'][
'value'] = model_aux[
'parameters'][
'smoothing'][
321 'value'] * (1 + nv_improvement)
323 model_aux[
'parameters'][
'smoothing'][
'value'] = model_aux[
'parameters'][
'smoothing'][
324 'value'] * (1 - nv_divisor)
325 model_list.append(new_armetadata)
327 elif model[
'model'] ==
'BisectingKMeans':
329 new_armetadata = armetadata.copy_template()
330 model_aux = new_armetadata[
'model_parameters'][
'spark']
331 model_aux[
'parameters'][
'maxIter'][
'value'] = \
332 int(model_aux[
'parameters'][
'maxIter'][
'value'] * clustering_increment)
333 model_list.append(new_armetadata)
335 elif model[
'model'] ==
'KMeans':
337 if deepness == 2
and len(eval(model[
'parameters'][
'initMode'][
'type'])) != 0:
338 for element
in eval(model[
'parameters'][
'initMode'][
'type']):
339 new_armetadata = armetadata.copy_template()
340 model_aux = new_armetadata[
'model_parameters'][
'spark']
341 model_aux[
'parameters'][
'initMode'][
'value'] = element
342 model_list.append(new_armetadata)
344 new_armetadata = armetadata.copy_template()
345 model_aux = new_armetadata[
'model_parameters'][
'spark']
346 model_aux[
'parameters'][
'maxIter'][
'value'] = \
347 int(model_aux[
'parameters'][
'maxIter'][
'value'] * clustering_increment)
348 model_list.append(new_armetadata)
353 if len(model_list) == 0:
Class Getting the config file place on default location and load all parameters on an internal variab...
Define all objects, functions and structs related to common utilities not associated to one concrete ...
def __init__(self, e_c)
Constructor Initialize all framework variables and starts or connect to spark cluster Aditionally sta...
def optimize_models(self, armetadata, metric_value, objective, deepness, deep_impact)
Method manging generation of possible optimized H2O models loadded dinamically on adviserclass params...
Define all objects, functions and structs related to load on system all configuration parameter from ...