6 Copyright (C) e2its - All Rights Reserved 7 * Unauthorized copying of this file, via any medium is strictly prohibited 8 * Proprietary and confidential 10 * This file is part of gDayF project. 12 * Written by Jose L. Sanchez <e2its.es@gmail.com>, 2016-2019 15 from json
import dump, dumps, load, loads
16 from collections
import OrderedDict
18 from os
import path
as ospath, chmod, remove
20 from shutil
import rmtree, copy2
21 from pathlib
import Path
28 from hdfs
import InsecureClient
as Client, HdfsError
29 from pymongo
import MongoClient
32 from copy
import deepcopy
34 from bson.codec_options
import CodecOptions
43 self.
_config = self.
_ec.config.get_config()[
'storage']
56 file = open(filename, 'rb') 57 mmap_ = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) 61 for each_storage_type
in storage_json:
62 if each_storage_type[
'type'] ==
'localfs':
65 elif each_storage_type[
'type'] ==
'hdfs':
66 result, each_storage_type[
'hash_value'] = self.
_store_file_to_hdfs(each_storage_type, filename)
77 def _store_file_to_hdfs(self, storage_json, filename):
79 client = Client(url=self.
_config[
'hdfs'][
'url'])
80 except HdfsError
as hexecution_error:
81 print(repr(hexecution_error))
83 except IOError
as iexecution_error:
84 print(repr(iexecution_error))
86 except OSError
as oexecution_error:
87 print(repr(oexecution_error))
90 self.
_mkdir_hdfs(path=path.dirname(storage_json[
'value']),
94 client.upload(storage_json[
'value'], filename, overwrite=
True)
96 '''with client.write(storage_json['value'],overwrite=True) as wfile: 99 while iterator < mmap_.size(): 100 wfile.write(mmap_.read()) 105 except HdfsError
as hexecution_error:
106 print(repr(hexecution_error))
108 except IOError
as iexecution_error:
109 print(repr(iexecution_error))
111 except OSError
as oexecution_error:
112 print(repr(oexecution_error))
126 def _store_file_to_localfs(self, storage_json, filename):
127 if not ospath.exists(path=storage_json[
'value']):
130 copy2(filename, storage_json[
'value'])
131 '''with open(storage_json['value'], 'wb') as wfile: 134 while iterator < mmap_.size(): 135 wfile.write(mmap_.read()) 139 chmod(storage_json[
'value'], int(self.
_config[
'grants'], 8))
143 return 0, hash_key(hash_type=storage_json[
'hash_type'], filename=storage_json[
'value'])
154 for each_storage_type
in load_path:
155 if each_storage_type[
'type'] ==
'localfs':
157 if storage
is not None:
158 storage_metadata.append(storage)
160 elif each_storage_type[
'type'] ==
'hdfs':
162 if storage
is not None:
163 storage_metadata.append(storage)
165 return global_op, storage_metadata.copy()
171 def _remove_file_to_hdfs(self,storage_json):
172 path = storage_json[
'value']
173 url_beginning = path.find(
'//') + 2
174 url_ending = path.find(
'/', url_beginning)
175 path = path[url_ending:]
178 client = Client(url=self.
_config[
'hdfs'][
'url'])
179 except HdfsError
as hexecution_error:
180 print(repr(hexecution_error))
182 except IOError
as iexecution_error:
183 print(repr(iexecution_error))
185 except OSError
as oexecution_error:
186 print(repr(oexecution_error))
189 if client.delete(hdfs_path=path, recursive=
True):
192 return 1, storage_json
194 except HdfsError
as hexecution_error:
195 print(repr(hexecution_error))
196 return 1, storage_json
197 except IOError
as iexecution_error:
198 print(repr(iexecution_error))
199 return 1, storage_json
200 except OSError
as oexecution_error:
201 print(repr(oexecution_error))
202 return 1, storage_json
211 def _remove_file_to_localfs(storage_json):
212 if not ospath.exists(path=storage_json[
'value']):
216 if ospath.isdir(storage_json[
'value']):
217 rmtree(storage_json[
'value'])
219 remove(storage_json[
'value'])
222 return 1, storage_json
232 '''assert isinstance(storage_json, StorageMetadata)''' 234 for each_storage_type
in storage_json:
235 if each_storage_type[
'type'] ==
'localfs':
240 elif each_storage_type[
'type'] ==
'hdfs':
245 elif each_storage_type[
'type'] ==
'mongoDB':
259 def _store_json_to_localfs(self, storage_json, ar_json):
265 file = gzip.GzipFile(storage_json[
'value'],
'w')
266 json_str = dumps(ar_json, indent=4)
267 json_bytes = json_str.encode(
'utf-8')
268 file.write(json_bytes)
271 file = codecs.open(storage_json[
'value'],
'w', encoding=
'utf-8')
272 dump(ar_json, file, indent=4, ensure_ascii=
False)
274 chmod(storage_json[
'value'], int(self.
_config[
'grants'], 8))
276 except IOError
as iexecution_error:
277 print(repr(iexecution_error))
288 def _store_json_to_hdfs(self, storage_json, ar_json, client=None):
289 remove_client =
False 291 client = Client(url=self.
_config[
'hdfs'][
'url'])
296 self.
_mkdir_hdfs(path=path.dirname(storage_json[
'value']),
300 json_str = dumps(ar_json, indent=4)
301 json_bytes = json_str.encode(
'utf-8')
302 client.write(storage_json[
'value'],
303 data=gzip.compress(json_bytes),
306 client.write(storage_json[
'value'], data=dumps(ar_json, indent=4, ensure_ascii=
False),
307 encoding=
'utf-8', overwrite=
True)
309 except HdfsError
as hexecution_error:
310 print(repr(hexecution_error))
312 except IOError
as iexecution_error:
313 print(repr(iexecution_error))
315 except OSError
as oexecution_error:
316 print(repr(oexecution_error))
329 def _store_json_to_mongoDB(self, storage_json, ar_json, client=None):
330 remove_client =
False 331 if client
is None or not isinstance(client(MongoClient)):
333 client = MongoClient(host=self.
_config[
'mongoDB'][
'url'],
334 port=int(self.
_config[
'mongoDB'][
'port']),
335 document_class=OrderedDict)
337 except ConnectionFailure
as cexecution_error:
338 print(repr(cexecution_error))
341 description = Path(storage_json[
'value']).parts
342 db = client[self.
_config[
'mongoDB'][
'value']]
343 collection = db[description[1]]
344 model_id = ar_json[
'model_parameters'][get_model_fw(ar_json)][
'parameters'][
'model_id'][
'value']
345 filter_cond =
"model_parameters." + get_model_fw(ar_json) +
".parameters.model_id.value" 346 cond = [{filter_cond: model_id}, {
"type": ar_json[
'type']},
347 {
"model_id": ar_json[
'model_id']}, {
"timestamp": ar_json[
'timestamp']}]
348 query = {
"$and": cond}
350 count = collection.find(query).count()
351 new_ar_json = OrderedDict(ar_json)
353 collection.delete_one(query)
354 collection.insert(new_ar_json, check_keys=
False)
357 collection.insert(new_ar_json, check_keys=
False)
360 print(
"Trace: Duplicate Model %s" % model_id)
373 def _store_other_to_mongoDB(self, storage_json, other, client=None):
375 remove_client =
False 376 if client
is None or not isinstance(client(MongoClient)):
378 client = MongoClient(host=self.
_config[
'mongoDB'][
'url'],
379 port=int(self.
_config[
'mongoDB'][
'port']),
380 document_class=OrderedDict)
382 except ConnectionFailure
as cexecution_error:
383 print(repr(cexecution_error))
386 db = client[self.
_config[
'mongoDB'][
'value']]
387 collection = db[self.
_ec.get_id_user() +
'_' + storage_json[
'value']]
388 new_ar_json = OrderedDict(other)
390 collection.insert(new_ar_json, check_keys=
False)
392 except Exception
as execution_error:
393 print(repr(execution_error))
406 execution_list = list()
407 remove_client =
False 408 if client
is None or not isinstance(client(MongoClient)):
410 client = MongoClient(host=self.
_config[
'mongoDB'][
'url'],
411 port=int(self.
_config[
'mongoDB'][
'port']),
412 document_class=OrderedDict)
414 except ConnectionFailure
as cexecution_error:
415 print(repr(cexecution_error))
416 return execution_list
418 db = client[self.
_config[
'mongoDB'][
'value']]
419 collection = db[self.
_ec.get_id_user()]
420 query = {
"$and": [{
"model_id": self.
_ec.get_id_analysis()}, {
"type":
"train"}]}
421 for element
in collection.find(query):
422 execution_list.append(element)
423 for element
in execution_list:
426 except PyMongoError
as pexecution_error:
427 print(repr(pexecution_error))
431 return deepcopy(execution_list)
440 def mkdir(self, type, path, grants):
441 if type ==
'localfs':
445 elif type ==
'mongoDB':
455 def _mkdir_localfs(path, grants):
457 Path(path).
mkdir(mode=grants, parents=
True, exist_ok=
True)
470 def _mkdir_hdfs(self, path, grants, client=None):
471 remove_client =
False 473 client = Client(url=self.
_config[
'hdfs'][
'url'])
476 if client.status(hdfs_path=path, strict=
False)
is None:
477 client.makedirs(hdfs_path=path, permission=grants)
479 except HdfsError
as hexecution_error:
480 print(repr(hexecution_error))
482 except IOError
as iexecution_error:
483 print(repr(iexecution_error))
485 except OSError
as oexecution_error:
486 print(repr(oexecution_error))
500 def _mkdir_mongoDB(path, grants):
512 for storage
in [
'localfs',
'hdfs',
'mongoDB']:
513 if storage ==
'localfs' and not found:
514 if ospath.exists(path):
515 _, type = mimetypes.guess_type(path)
517 file = gzip.GzipFile(filename=path, mode=
'r') 518 json_bytes = file.read() 519 json_str = json_bytes.decode('utf-8')
520 ar_metadata = loads(json_str, object_hook=OrderedDict)
523 file = open(path,
'r') 524 ar_metadata = load(file, object_hook=OrderedDict) 526 return 0, ar_metadata
527 elif storage ==
'hdfs' and not found:
528 url = self.
_config[storage][
'url']
529 client = Client(url=url)
532 if client.status(hdfs_path=path, strict=
False)
is not None:
533 _, type = mimetypes.guess_type(path)
535 with client.read(path)
as file_hdfs:
536 file = gzip.GzipFile(fileobj=file_hdfs)
537 json_bytes = file.read()
538 json_str = json_bytes.decode(
'utf-8')
539 ar_metadata = loads(json_str, object_hook=OrderedDict)
542 with client.read(path)
as file_hdfs:
543 json_bytes = file_hdfs.read()
544 json_str = json_bytes.decode(
'utf-8')
545 ar_metadata = loads(json_str, object_hook=OrderedDict)
546 return 0, ar_metadata
547 except HdfsError
as hexecution_error:
548 print(repr(hexecution_error))
549 return 1, repr(hexecution_error)
550 except IOError
as iexecution_error:
551 print(repr(iexecution_error))
552 return 1, repr(iexecution_error)
553 except OSError
as oexecution_error:
554 print(repr(oexecution_error))
555 return 1, repr(oexecution_error)
559 elif storage ==
'mongoDB' and not found:
561 client = MongoClient(host=self.
_config[
'mongoDB'][
'url'],
562 port=int(self.
_config[
'mongoDB'][
'port']),
563 document_class=OrderedDict)
565 except ConnectionFailure
as cexecution_error:
566 print(repr(cexecution_error))
567 return 1, repr(cexecution_error)
569 db = client[self.
_config[
'mongoDB'][
'value']]
570 description = Path(path).parts
571 if description[1]
is not None \
572 and description[2]
is not None \
573 and description[3]
is not None \
574 and description[4]
is not None \
575 and description[1]
in db.collection_names():
577 collection = db[description[1]]
578 query1 = {
"$and": [{
"model_id": description[3]},
579 {
"workflow_id": description[2]},
582 for element
in collection.find(query1):
584 if element[
'model_parameters'][get_model_fw(element)][
'parameters'][
'model_id'][
'value'] \
593 except PyMongoError
as pexecution_error:
594 print(repr(pexecution_error))
595 return 1, repr(pexecution_error)
def recover_experiment_mongoDB(self, client=None)
Method used to recover an experiment as [ar_metadata] oriented to store full Analysis_results json bu...
def remove_file(self, load_path)
Method used to remove a file on one persistence system ['localfs',' hdfs'].
def __init__(self, e_c)
Class Constructor.
def _mkdir_hdfs(self, path, grants, client=None)
Static protected method used to check and make directory on ['hdfs'] Not implemented yet! ...
Define all objects, functions and structs related to common utilities not associated to one concrete ...
def _store_file_to_localfs(self, storage_json, filename)
Protected method used to store a file on ['hdfs'] Not implemented yet !! using mmap structure to mana...
def _store_json_to_mongoDB(self, storage_json, ar_json, client=None)
Protected method used to store an ar_json on ['mongoDB'] persistence system oriented to store full An...
def store_file(self, storage_json, filename)
Method used to store a file on one persistence system ['localfs', ' hdfs'] using mmap structure to ma...
def _store_json_to_localfs(self, storage_json, ar_json)
Protected method used to store a json on ['localfs'] persistence system oriented to store full Analys...
def store_json(self, storage_json, ar_json, other=None)
Method used to store a json on all persistence system ['localfs', ' hdfs', ' mongoDB'] oriented to st...
def _store_other_to_mongoDB(self, storage_json, other, client=None)
Protected method used to store a dayf compatible_json on ['mongoDB'] persistence system oriented to s...
def _store_json_to_hdfs(self, storage_json, ar_json, client=None)
Method used to store a json on ['hdfs'] persistence system oriented to store full Analysis_results js...
def _remove_file_to_hdfs(self, storage_json)
Method used to remove a file on one persistence system ['hdfs'].
def get_ar_from_engine(self, path)
Method base to get an ArMetadata Structure from file.
def mkdir(self, type, path, grants)
Method used to check and make directory os similar path structures on all persistence system ['localf...
def _mkdir_localfs(path, grants)
Static protected method used to check and make directory on ['localfs'].
Class to manage trasient information between all persistence options and models on an unified way...
def _mkdir_mongoDB(path, grants)
Static protected method used to check and make directory on ['mongoDB'] Not necessary throught pymong...
Define all objects, functions and structs related to load on system all configuration parameter from ...
def _remove_file_to_localfs(storage_json)
Method used to remove a file on one persistence system ['localfs'].
def _store_file_to_hdfs(self, storage_json, filename)
Protected method used to store a file on ['localfs'] using mmap structure to manage multi-persistence...