DayF core  1.2.1.2
DayF (Decision at your Fingertips) is an AutoML freeware development framework that let developers works with Machine Learning models without any idea of AI, simply taking a csv dataset and the objective column
persistencehandler.py
1 
4 
5 '''
6 Copyright (C) e2its - All Rights Reserved
7  * Unauthorized copying of this file, via any medium is strictly prohibited
8  * Proprietary and confidential
9  *
10  * This file is part of gDayF project.
11  *
12  * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019
13 '''
14 
15 from json import dump, dumps, load, loads
16 from collections import OrderedDict
17 import codecs
18 from os import path as ospath, chmod, remove
19 from os import path
20 from shutil import rmtree, copy2
21 from pathlib import Path
22 from gdayf.common.utils import hash_key
23 from gdayf.conf.loadconfig import LoadConfig
24 from gdayf.common.utils import get_model_fw
25 from gdayf.common.storagemetadata import StorageMetadata
26 import gzip
27 import mimetypes
28 from hdfs import InsecureClient as Client, HdfsError
29 from pymongo import MongoClient
30 from pymongo.errors import *
31 #from pymongo import *
32 from copy import deepcopy
33 import bson
34 from bson.codec_options import CodecOptions
35 
36 
37 
38 class PersistenceHandler(object):
39 
41  def __init__(self, e_c):
42  self._ec = e_c
43  self._config = self._ec.config.get_config()['storage']
44  self._persistence = self._ec.config.get_config()['persistence']
45 
46 
52  def store_file(self, storage_json, filename):
53  global_op = 0
54 
55  '''try:
56  file = open(filename, 'rb')
57  mmap_ = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
58  except IOError:
59  return 1'''
60 
61  for each_storage_type in storage_json:
62  if each_storage_type['type'] == 'localfs':
63  result, each_storage_type['hash_value'] = self._store_file_to_localfs(each_storage_type, filename)
64  global_op += result
65  elif each_storage_type['type'] == 'hdfs':
66  result, each_storage_type['hash_value'] = self._store_file_to_hdfs(each_storage_type, filename)
67  global_op += result
68  '''mmap_.close()'''
69  return global_op
70 
71 
77  def _store_file_to_hdfs(self, storage_json, filename):
78  try:
79  client = Client(url=self._config['hdfs']['url'])
80  except HdfsError as hexecution_error:
81  print(repr(hexecution_error))
82  return 1, None
83  except IOError as iexecution_error:
84  print(repr(iexecution_error))
85  return 1, None
86  except OSError as oexecution_error:
87  print(repr(oexecution_error))
88  return 1, None
89  try:
90  self._mkdir_hdfs(path=path.dirname(storage_json['value']),
91  grants=self._config['grants'],
92  client=client)
93  #client.write(storage_json['value'], data=mmap_, encoding='utf-8', overwrite=True)
94  client.upload(storage_json['value'], filename, overwrite=True)
95 
96  '''with client.write(storage_json['value'],overwrite=True) as wfile:
97  mmap_.seek(0)
98  iterator = 0
99  while iterator < mmap_.size():
100  wfile.write(mmap_.read())
101  wfile.flush()
102  iterator += 1
103  wfile.close()'''
104 
105  except HdfsError as hexecution_error:
106  print(repr(hexecution_error))
107  return 1, None
108  except IOError as iexecution_error:
109  print(repr(iexecution_error))
110  return 1, None
111  except OSError as oexecution_error:
112  print(repr(oexecution_error))
113  return 1, None
114  finally:
115  del client
116 
117  return 0, None
118 
119 
126  def _store_file_to_localfs(self, storage_json, filename):
127  if not ospath.exists(path=storage_json['value']):
128  try:
129  self._mkdir_localfs(path=path.dirname(storage_json['value']), grants=int(self._config['grants'], 8))
130  copy2(filename, storage_json['value'])
131  '''with open(storage_json['value'], 'wb') as wfile:
132  mmap_.seek(0)
133  iterator = 0
134  while iterator < mmap_.size():
135  wfile.write(mmap_.read())
136  wfile.flush()
137  iterator += 1
138  wfile.close()'''
139  chmod(storage_json['value'], int(self._config['grants'], 8))
140  except IOError:
141  return 1, None
142 
143  return 0, hash_key(hash_type=storage_json['hash_type'], filename=storage_json['value'])
144 
145 
150  def remove_file(self, load_path):
151  global_op = 0
152 
153  storage_metadata = StorageMetadata(self._ec)
154  for each_storage_type in load_path:
155  if each_storage_type['type'] == 'localfs':
156  result, storage = self._remove_file_to_localfs(each_storage_type)
157  if storage is not None:
158  storage_metadata.append(storage)
159  global_op += result
160  elif each_storage_type['type'] == 'hdfs':
161  result, storage = self._remove_file_to_hdfs(each_storage_type)
162  if storage is not None:
163  storage_metadata.append(storage)
164  global_op += result
165  return global_op, storage_metadata.copy()
166 
167 
171  def _remove_file_to_hdfs(self,storage_json):
172  path = storage_json['value']
173  url_beginning = path.find('//') + 2
174  url_ending = path.find('/', url_beginning)
175  path = path[url_ending:]
176 
177  try:
178  client = Client(url=self._config['hdfs']['url'])
179  except HdfsError as hexecution_error:
180  print(repr(hexecution_error))
181  return 1, None
182  except IOError as iexecution_error:
183  print(repr(iexecution_error))
184  return 1, None
185  except OSError as oexecution_error:
186  print(repr(oexecution_error))
187  return 1, None
188  try:
189  if client.delete(hdfs_path=path, recursive=True):
190  return 0, None
191  else:
192  return 1, storage_json
193 
194  except HdfsError as hexecution_error:
195  print(repr(hexecution_error))
196  return 1, storage_json
197  except IOError as iexecution_error:
198  print(repr(iexecution_error))
199  return 1, storage_json
200  except OSError as oexecution_error:
201  print(repr(oexecution_error))
202  return 1, storage_json
203  finally:
204  del client
205 
206 
207 
210  @staticmethod
211  def _remove_file_to_localfs(storage_json):
212  if not ospath.exists(path=storage_json['value']):
213  return 0, None
214  else:
215  try:
216  if ospath.isdir(storage_json['value']):
217  rmtree(storage_json['value'])
218  else:
219  remove(storage_json['value'])
220  return 0, None
221  except OSError:
222  return 1, storage_json
223 
224 
231  def store_json(self, storage_json, ar_json, other=None):
232  '''assert isinstance(storage_json, StorageMetadata)'''
233  global_op = 0
234  for each_storage_type in storage_json:
235  if each_storage_type['type'] == 'localfs':
236  if other is None:
237  global_op += self._store_json_to_localfs(each_storage_type, ar_json)
238  else:
239  global_op += self._store_json_to_localfs(each_storage_type, other)
240  elif each_storage_type['type'] == 'hdfs':
241  if other is None:
242  global_op += self._store_json_to_hdfs(each_storage_type, ar_json)
243  else:
244  global_op += self._store_json_to_hdfs(each_storage_type, other)
245  elif each_storage_type['type'] == 'mongoDB':
246  if other is None:
247  global_op += self._store_json_to_mongoDB(each_storage_type, ar_json)
248  else:
249  global_op += self._store_other_to_mongoDB(each_storage_type, other)
250  return global_op
251 
252 
259  def _store_json_to_localfs(self, storage_json, ar_json):
260  compress = self._persistence['compress_json']
261  #if not ospath.exists(storage_json['value']):
262  try:
263  self._mkdir_localfs(path=path.dirname(storage_json['value']), grants=int(self._config['grants'], 8))
264  if compress:
265  file = gzip.GzipFile(storage_json['value'], 'w')
266  json_str = dumps(ar_json, indent=4)
267  json_bytes = json_str.encode('utf-8')
268  file.write(json_bytes)
269  else:
270  # file = open(storage_json['value'], 'w')
271  file = codecs.open(storage_json['value'], 'w', encoding='utf-8')
272  dump(ar_json, file, indent=4, ensure_ascii=False)
273  file.close()
274  chmod(storage_json['value'], int(self._config['grants'], 8))
275  return 0
276  except IOError as iexecution_error:
277  print(repr(iexecution_error))
278  return 1
279 
280 
288  def _store_json_to_hdfs(self, storage_json, ar_json, client=None):
289  remove_client = False
290  if client is None:
291  client = Client(url=self._config['hdfs']['url'])
292  remove_client = True
293  compress = self._persistence['compress_json']
294  #if not ospath.exists(storage_json['value']):
295  try:
296  self._mkdir_hdfs(path=path.dirname(storage_json['value']),
297  grants=self._config['grants'],
298  client=client)
299  if compress:
300  json_str = dumps(ar_json, indent=4)
301  json_bytes = json_str.encode('utf-8')
302  client.write(storage_json['value'],
303  data=gzip.compress(json_bytes),
304  overwrite=True)
305  else:
306  client.write(storage_json['value'], data=dumps(ar_json, indent=4, ensure_ascii=False),
307  encoding='utf-8', overwrite=True)
308  return 0
309  except HdfsError as hexecution_error:
310  print(repr(hexecution_error))
311  return 1
312  except IOError as iexecution_error:
313  print(repr(iexecution_error))
314  return 1
315  except OSError as oexecution_error:
316  print(repr(oexecution_error))
317  return 1
318  finally:
319  if remove_client:
320  del client
321 
322 
329  def _store_json_to_mongoDB(self, storage_json, ar_json, client=None):
330  remove_client = False
331  if client is None or not isinstance(client(MongoClient)):
332  try:
333  client = MongoClient(host=self._config['mongoDB']['url'],
334  port=int(self._config['mongoDB']['port']),
335  document_class=OrderedDict)
336  remove_client = True
337  except ConnectionFailure as cexecution_error:
338  print(repr(cexecution_error))
339  return 1
340  try:
341  description = Path(storage_json['value']).parts
342  db = client[self._config['mongoDB']['value']]
343  collection = db[description[1]]
344  model_id = ar_json['model_parameters'][get_model_fw(ar_json)]['parameters']['model_id']['value']
345  filter_cond = "model_parameters." + get_model_fw(ar_json) + ".parameters.model_id.value"
346  cond = [{filter_cond: model_id}, {"type": ar_json['type']},
347  {"model_id": ar_json['model_id']}, {"timestamp": ar_json['timestamp']}]
348  query = {"$and": cond}
349 
350  count = collection.find(query).count()
351  new_ar_json = OrderedDict(ar_json)
352  if count == 1:
353  collection.delete_one(query)
354  collection.insert(new_ar_json, check_keys=False)
355  return 0
356  elif count == 0:
357  collection.insert(new_ar_json, check_keys=False)
358  return 0
359  else:
360  print("Trace: Duplicate Model %s" % model_id)
361  return 1
362  finally:
363  if remove_client:
364  client.close()
365 
366 
373  def _store_other_to_mongoDB(self, storage_json, other, client=None):
374 
375  remove_client = False
376  if client is None or not isinstance(client(MongoClient)):
377  try:
378  client = MongoClient(host=self._config['mongoDB']['url'],
379  port=int(self._config['mongoDB']['port']),
380  document_class=OrderedDict)
381  remove_client = True
382  except ConnectionFailure as cexecution_error:
383  print(repr(cexecution_error))
384  return 1
385  try:
386  db = client[self._config['mongoDB']['value']]
387  collection = db[self._ec.get_id_user() + '_' + storage_json['value']]
388  new_ar_json = OrderedDict(other)
389  try:
390  collection.insert(new_ar_json, check_keys=False)
391  return 0
392  except Exception as execution_error:
393  print(repr(execution_error))
394  return 1
395  finally:
396  if remove_client:
397  client.close()
398 
399 
400 
405  def recover_experiment_mongoDB(self, client=None):
406  execution_list = list()
407  remove_client = False
408  if client is None or not isinstance(client(MongoClient)):
409  try:
410  client = MongoClient(host=self._config['mongoDB']['url'],
411  port=int(self._config['mongoDB']['port']),
412  document_class=OrderedDict)
413  remove_client = True
414  except ConnectionFailure as cexecution_error:
415  print(repr(cexecution_error))
416  return execution_list
417  try:
418  db = client[self._config['mongoDB']['value']]
419  collection = db[self._ec.get_id_user()]
420  query = {"$and": [{"model_id": self._ec.get_id_analysis()}, {"type": "train"}]}
421  for element in collection.find(query):
422  execution_list.append(element)
423  for element in execution_list:
424  element.pop('_id')
425  #print(execution_list)
426  except PyMongoError as pexecution_error:
427  print(repr(pexecution_error))
428  finally:
429  if remove_client:
430  client.close()
431  return deepcopy(execution_list)
432 
433 
440  def mkdir(self, type, path, grants):
441  if type == 'localfs':
442  return self._mkdir_localfs(path=path, grants=int(grants, 8))
443  elif type == 'hdfs':
444  return self._mkdir_hdfs(path=path, grants=grants)
445  elif type == 'mongoDB':
446  return self._mkdir_mongoDB(path=path, grants=grants)
447 
448 
454  @staticmethod
455  def _mkdir_localfs(path, grants):
456  try:
457  Path(path).mkdir(mode=grants, parents=True, exist_ok=True)
458  return 0
459  except IOError:
460  return 1
461 
462 
470  def _mkdir_hdfs(self, path, grants, client=None):
471  remove_client = False
472  if client is None:
473  client = Client(url=self._config['hdfs']['url'])
474  remove_client = True
475  try:
476  if client.status(hdfs_path=path, strict=False) is None:
477  client.makedirs(hdfs_path=path, permission=grants)
478  return 0
479  except HdfsError as hexecution_error:
480  print(repr(hexecution_error))
481  return 1
482  except IOError as iexecution_error:
483  print(repr(iexecution_error))
484  return 1
485  except OSError as oexecution_error:
486  print(repr(oexecution_error))
487  return 1
488  finally:
489  if remove_client:
490  del client
491 
492 
499  @staticmethod
500  def _mkdir_mongoDB(path, grants):
501  try:
502  return 0
503  except IOError:
504  return 1
505 
506 
510  def get_ar_from_engine(self, path):
511  found = False
512  for storage in ['localfs', 'hdfs', 'mongoDB']:
513  if storage == 'localfs' and not found:
514  if ospath.exists(path):
515  _, type = mimetypes.guess_type(path)
516  if type == 'gzip':
517  file = gzip.GzipFile(filename=path, mode='r')
518  json_bytes = file.read()
519  json_str = json_bytes.decode('utf-8')
520  ar_metadata = loads(json_str, object_hook=OrderedDict)
521  else:
522  #Bug pending
523  file = open(path, 'r')
524  ar_metadata = load(file, object_hook=OrderedDict)
525  file.close()
526  return 0, ar_metadata
527  elif storage == 'hdfs' and not found:
528  url = self._config[storage]['url']
529  client = Client(url=url)
530  remove_client = True
531  try:
532  if client.status(hdfs_path=path, strict=False) is not None:
533  _, type = mimetypes.guess_type(path)
534  if type == 'gzip':
535  with client.read(path) as file_hdfs:
536  file = gzip.GzipFile(fileobj=file_hdfs)
537  json_bytes = file.read()
538  json_str = json_bytes.decode('utf-8')
539  ar_metadata = loads(json_str, object_hook=OrderedDict)
540  file.close()
541  else:
542  with client.read(path) as file_hdfs:
543  json_bytes = file_hdfs.read()
544  json_str = json_bytes.decode('utf-8')
545  ar_metadata = loads(json_str, object_hook=OrderedDict)
546  return 0, ar_metadata
547  except HdfsError as hexecution_error:
548  print(repr(hexecution_error))
549  return 1, repr(hexecution_error)
550  except IOError as iexecution_error:
551  print(repr(iexecution_error))
552  return 1, repr(iexecution_error)
553  except OSError as oexecution_error:
554  print(repr(oexecution_error))
555  return 1, repr(oexecution_error)
556  finally:
557  if remove_client:
558  del client
559  elif storage == 'mongoDB' and not found:
560  try:
561  client = MongoClient(host=self._config['mongoDB']['url'],
562  port=int(self._config['mongoDB']['port']),
563  document_class=OrderedDict)
564  remove_client = True
565  except ConnectionFailure as cexecution_error:
566  print(repr(cexecution_error))
567  return 1, repr(cexecution_error)
568  try:
569  db = client[self._config['mongoDB']['value']]
570  description = Path(path).parts
571  if description[1] is not None \
572  and description[2] is not None \
573  and description[3] is not None \
574  and description[4] is not None \
575  and description[1] in db.collection_names():
576 
577  collection = db[description[1]]
578  query1 = {"$and": [{"model_id": description[3]},
579  {"workflow_id": description[2]},
580  {'type': 'train'}]
581  }
582  for element in collection.find(query1):
583  # Return the first element found
584  if element['model_parameters'][get_model_fw(element)]['parameters']['model_id']['value'] \
585  == description[4]:
586  element.pop('_id')
587  print(element)
588  return 0, element
589  return 1, None
590 
591  else:
592  return 1, None
593  except PyMongoError as pexecution_error:
594  print(repr(pexecution_error))
595  return 1, repr(pexecution_error)
596  finally:
597  if remove_client:
598  client.close()
599 
600  return 1, None
def recover_experiment_mongoDB(self, client=None)
Method used to recover an experiment as [ar_metadata] oriented to store full Analysis_results json bu...
def remove_file(self, load_path)
Method used to remove a file on one persistence system [&#39;localfs&#39;,&#39; hdfs&#39;].
def _mkdir_hdfs(self, path, grants, client=None)
Static protected method used to check and make directory on [&#39;hdfs&#39;] Not implemented yet! ...
Define all objects, functions and structs related to common utilities not associated to one concrete ...
Definition: utils.py:1
def _store_file_to_localfs(self, storage_json, filename)
Protected method used to store a file on [&#39;hdfs&#39;] Not implemented yet !! using mmap structure to mana...
def _store_json_to_mongoDB(self, storage_json, ar_json, client=None)
Protected method used to store an ar_json on [&#39;mongoDB&#39;] persistence system oriented to store full An...
def store_file(self, storage_json, filename)
Method used to store a file on one persistence system [&#39;localfs&#39;, &#39; hdfs&#39;] using mmap structure to ma...
def _store_json_to_localfs(self, storage_json, ar_json)
Protected method used to store a json on [&#39;localfs&#39;] persistence system oriented to store full Analys...
Class storage metadata format [{value: , fstype:[&#39;localfs&#39;, &#39;hdfs&#39;, &#39;mongoDB&#39;], hash_value : ""...
def store_json(self, storage_json, ar_json, other=None)
Method used to store a json on all persistence system [&#39;localfs&#39;, &#39; hdfs&#39;, &#39; mongoDB&#39;] oriented to st...
def _store_other_to_mongoDB(self, storage_json, other, client=None)
Protected method used to store a dayf compatible_json on [&#39;mongoDB&#39;] persistence system oriented to s...
def _store_json_to_hdfs(self, storage_json, ar_json, client=None)
Method used to store a json on [&#39;hdfs&#39;] persistence system oriented to store full Analysis_results js...
def _remove_file_to_hdfs(self, storage_json)
Method used to remove a file on one persistence system [&#39;hdfs&#39;].
def get_ar_from_engine(self, path)
Method base to get an ArMetadata Structure from file.
def mkdir(self, type, path, grants)
Method used to check and make directory os similar path structures on all persistence system [&#39;localf...
def _mkdir_localfs(path, grants)
Static protected method used to check and make directory on [&#39;localfs&#39;].
Define all objects, functions and structured related to adding storage information metadata (json str...
Class to manage trasient information between all persistence options and models on an unified way...
def _mkdir_mongoDB(path, grants)
Static protected method used to check and make directory on [&#39;mongoDB&#39;] Not necessary throught pymong...
Define all objects, functions and structs related to load on system all configuration parameter from ...
Definition: loadconfig.py:1
def _remove_file_to_localfs(storage_json)
Method used to remove a file on one persistence system [&#39;localfs&#39;].
def _store_file_to_hdfs(self, storage_json, filename)
Protected method used to store a file on [&#39;localfs&#39;] using mmap structure to manage multi-persistence...