From 32b4c92e6ab084d0a1cdd633c6c7dd519a6ab296 Mon Sep 17 00:00:00 2001 From: Erik Franz Date: Sun, 7 Oct 2018 13:34:28 +0200 Subject: [PATCH 1/8] added optional block data (ID) support for tile block creation added optional Label data support changed signatures of constructor, "addData" and "selectRandomTiles" renamed "low" and "high" to "main" and "scaled" data renamed "highIsLabel" option to "useScaledData", can now completely omit scaled data renamed "dim_t" param to "blockSize" changed "selectRandomTiles" return data format removed premade tiles option restructured addData input checks --- tensorflow/tools/tilecreator_t.py | 624 ++++++++++++++++++++---------- 1 file changed, 421 insertions(+), 203 deletions(-) diff --git a/tensorflow/tools/tilecreator_t.py b/tensorflow/tools/tilecreator_t.py index 4dce986..a4e64ee 100644 --- a/tensorflow/tools/tilecreator_t.py +++ b/tensorflow/tools/tilecreator_t.py @@ -34,8 +34,14 @@ C_KEY_VORTICITY = 'x' C_KEY_POSITION = 'p' -DATA_KEY_LOW = 0 -DATA_KEY_HIGH= 1 +DATA_KEY_MAIN = 0 +DATA_KEY_SCALED = 1 +DATA_KEY_BLOCK = 2 +DATA_KEY_BLOCK_OFFSET = 3 +DATA_KEY_LABEL= 4 + +DATA_FLAG_ACTIVE='active' +DATA_FLAG_CHANNELS='channels' #keys for augmentation operations AOPS_KEY_ROTATE = 'rot' @@ -50,12 +56,16 @@ 'dens':C_KEY_DEFAULT, 'dens_vel':'d,vx,vy,vz' } + +LOG_LEVEL_INFO=0 +LOG_LEVEL_WARNING=1 +LOG_LEVEL_ERROR=2 class TileCreator(object): - def __init__(self, tileSizeLow, simSizeLow=64, upres=2, dim=2, dim_t=1, overlapping=0, densityMinimum=0.02, premadeTiles=False, partTrain=0.8, partTest=0.2, partVal=0, channelLayout_low=C_LAYOUT['dens_vel'], channelLayout_high=C_LAYOUT['dens'], highIsLabel=False, loadPN=False, padding=0): + def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, dim_t=1, overlapping=0, densityMinimum=0.02, partTrain=0.8, partTest=0.2, partVal=0, channelLayout_main=C_LAYOUT['dens_vel'], channelLayout_scaled=C_LAYOUT['dens'], useScaledData=True, useDataBlocks=False, useLabels=False, padding=0, logLevel=LOG_LEVEL_WARNING): ''' - tileSizeLow, simSizeLow: int, [int,int] if 2D, [int,int,int] + tileSize, simSize: int, [int,int] if 2D, [int,int,int] channelLayout: 'key,key,...' the keys are NOT case sensitive and leading and trailing whitespace characters are REMOVED. key: @@ -66,49 +76,57 @@ def __init__(self, tileSizeLow, simSizeLow=64, upres=2, dim=2, dim_t=1, overlapp if x does not exist y,z will be ignored (treaded as 'd'). rest is not yet supported - premadeTiles: cut regular tiles when loading data, can't use data augmentation part(Train|Test|Val): relative size of the different data sets - highIsLabel: high data is not augmented + useScaledData: second structured data set, formerly high res data + useDataBlocks: additional variable size dimension for use in tile-block creation. causes data to be sorted by their id + useLabels: arbitrary data, no augmentation loadHigh: simPath: path to the uni simulation files loadPath: packed simulations are stored here + logLevel: ''' - + self.logLevel = logLevel # DATA DIMENSION self.dim_t = dim_t # same for hi_res or low_res if dim!=2 and dim!=3: self.TCError('Data dimension must be 2 or 3.') self.dim = dim + + # TODO support different input layout (in addData) + # this is the internal data representation used + # TODO extend to 4th dimention 'T' + # N=B, Z=D, Y=H, X=W + self.dimLayout = 'NZYXC' # 'NZYXTC' # TILE SIZE - if np.isscalar(tileSizeLow): - self.tileSizeLow = [tileSizeLow, tileSizeLow, tileSizeLow] - elif len(tileSizeLow)==2 and self.dim==2: - self.tileSizeLow = [1]+tileSizeLow - elif len(tileSizeLow)==3: - self.tileSizeLow = tileSizeLow + if np.isscalar(tileSize): + self.tileSizeLow = [tileSize, tileSize, tileSize] + elif len(tileSize)==2 and self.dim==2: + self.tileSizeLow = [1]+tileSize + elif len(tileSize)==3: + self.tileSizeLow = tileSize else: self.TCError('Tile size mismatch.') self.tileSizeLow = np.asarray(self.tileSizeLow) #SIM SIZE - if np.isscalar(simSizeLow): - self.simSizeLow = [simSizeLow, simSizeLow, simSizeLow] - elif len(simSizeLow)==2 and self.dim==2: - self.simSizeLow = [1]+simSizeLow - elif len(simSizeLow)==3: - self.simSizeLow = simSizeLow + if np.isscalar(simSize): + self.simSizeLow = [simSize, simSize, simSize] + elif len(simSize)==2 and self.dim==2: + self.simSizeLow = [1]+simSize + elif len(simSize)==3: + self.simSizeLow = simSize else: self.TCError('Simulation size mismatch.') self.simSizeLow = np.asarray(self.simSizeLow) - if upres < 1: - self.TCError('Upres must be at least 1.') - self.upres = upres - if not highIsLabel: - self.tileSizeHigh = self.tileSizeLow*upres - self.simSizeHigh = self.simSizeLow*upres - else: - self.tileSizeHigh = np.asarray([1]) - self.simSizeHigh = np.asarray([1]) + #if scaleFactor < 1: + # self.TCError('Upres must be at least 1.') + self.upres = scaleFactor + if useScaledData: + self.tileSizeHigh = self.tileSizeLow*scaleFactor + self.simSizeHigh = self.simSizeLow*scaleFactor + #else: + # self.tileSizeHigh = np.asarray([1]) + # self.simSizeHigh = np.asarray([1]) if self.dim==2: self.tileSizeLow[0]=1 @@ -122,80 +140,101 @@ def __init__(self, tileSizeLow, simSizeLow=64, upres=2, dim=2, dim_t=1, overlapp if densityMinimum<0.: self.TCError('densityMinimum can not be negative.') self.densityMinimum = densityMinimum - self.premadeTiles = premadeTiles self.useDataAug = False #CHANNELS self.c_lists = {} - self.c_low, self.c_lists[DATA_KEY_LOW] = self.parseChannels(channelLayout_low) - self.c_high, self.c_lists[DATA_KEY_HIGH] = self.parseChannels(channelLayout_high) + self.c_low, self.c_lists[DATA_KEY_MAIN] = self.parseChannels(channelLayout_low) + self.c_high, self.c_lists[DATA_KEY_SCALED] = self.parseChannels(channelLayout_high) - # print info - print('\n') - print('Dimension: {}, time dimension: {}'.format(self.dim,self.dim_t)) - print('Low-res data:') - print(' channel layout: {}'.format(self.c_low)) - print(' default channels: {}'.format(self.c_lists[DATA_KEY_LOW][C_KEY_DEFAULT])) - if len(self.c_lists[DATA_KEY_LOW][C_KEY_VELOCITY])>0: - print(' velocity channels: {}'.format(self.c_lists[DATA_KEY_LOW][C_KEY_VELOCITY])) - if len(self.c_lists[DATA_KEY_LOW][C_KEY_VORTICITY])>0: - print(' vorticity channels: {}'.format(self.c_lists[DATA_KEY_LOW][C_KEY_VORTICITY])) - print('High-res data:') - if highIsLabel: - print(' is Label') - print(' channel layout: {}'.format(self.c_high)) - print(' default channels: {}'.format(self.c_lists[DATA_KEY_HIGH][C_KEY_DEFAULT])) - if len(self.c_lists[DATA_KEY_HIGH][C_KEY_VELOCITY])>0: - print(' velocity channels: {}'.format(self.c_lists[DATA_KEY_HIGH][C_KEY_VELOCITY])) - if len(self.c_lists[DATA_KEY_HIGH][C_KEY_VORTICITY])>0: - print(' vorticity channels: {}'.format(self.c_lists[DATA_KEY_HIGH][C_KEY_VORTICITY])) + #self.channels=len(self.c) self.data_flags = { - DATA_KEY_LOW:{ - 'isLabel':False, - 'channels':len(self.c_low), - C_KEY_VELOCITY:len(self.c_lists[DATA_KEY_LOW][C_KEY_VELOCITY])>0, - C_KEY_VORTICITY:len(self.c_lists[DATA_KEY_LOW][C_KEY_VORTICITY])>0, + DATA_KEY_MAIN:{ + DATA_FLAG_ACTIVE:True, + DATA_FLAG_CHANNELS:len(self.c_low), + C_KEY_VELOCITY:len(self.c_lists[DATA_KEY_MAIN][C_KEY_VELOCITY])>0, + C_KEY_VORTICITY:len(self.c_lists[DATA_KEY_MAIN][C_KEY_VORTICITY])>0, C_KEY_POSITION:False }, - DATA_KEY_HIGH:{ - 'isLabel':highIsLabel, - 'channels':len(self.c_high), - C_KEY_VELOCITY:len(self.c_lists[DATA_KEY_HIGH][C_KEY_VELOCITY])>0, - C_KEY_VORTICITY:len(self.c_lists[DATA_KEY_HIGH][C_KEY_VORTICITY])>0, + DATA_KEY_SCALED:{ + DATA_FLAG_ACTIVE:useScaledData, + DATA_FLAG_CHANNELS:len(self.c_high), + C_KEY_VELOCITY:len(self.c_lists[DATA_KEY_SCALED][C_KEY_VELOCITY])>0, + C_KEY_VORTICITY:len(self.c_lists[DATA_KEY_SCALED][C_KEY_VORTICITY])>0, C_KEY_POSITION:False } + DATA_KEY_BLOCK:{ + DATA_FLAG_ACTIVE:useDataBlocks + } + DATA_KEY_LABEL:{ + DATA_FLAG_ACTIVE:useLabels + } } - if loadPN: - self.TCError('prev and next tiles not supported.') - self.hasPN = loadPN self.padding=padding #if self.hasPN: #[z,y,x, velocities an/or position if enabled (density,vel,vel,vel, pos, pos [,pos])] #DATA SHAPES - self.tile_shape_low = np.append(self.tileSizeLow, [self.data_flags[DATA_KEY_LOW]['channels']]) - self.frame_shape_low = np.append(self.simSizeLow, [self.data_flags[DATA_KEY_LOW]['channels']]) - if not self.data_flags[DATA_KEY_HIGH]['isLabel']: - self.tile_shape_high = np.append(self.tileSizeHigh, [self.data_flags[DATA_KEY_HIGH]['channels']]) - self.frame_shape_high = np.append(self.simSizeHigh, [self.data_flags[DATA_KEY_HIGH]['channels']]) - else: - self.tile_shape_high = self.tileSizeHigh[:] - self.frame_shape_high = self.simSizeHigh[:] + self.tile_shape_low = np.append(self.tileSizeLow, [self.data_flags[DATA_KEY_MAIN][DATA_FLAG_CHANNELS]]) + self.frame_shape_low = np.append(self.simSizeLow, [self.data_flags[DATA_KEY_MAIN][DATA_FLAG_CHANNELS]]) + if dataIsActive(DATA_KEY_SCALED): + self.tile_shape_high = np.append(self.tileSizeHigh, [self.data_flags[DATA_KEY_SCALED][DATA_FLAG_CHANNELS]]) + self.frame_shape_high = np.append(self.simSizeHigh, [self.data_flags[DATA_KEY_SCALED][DATA_FLAG_CHANNELS]]) + #else: + # self.tile_shape_high = self.tileSizeHigh[:] + # self.frame_shape_high = self.simSizeHigh[:] self.densityThreshold = (self.densityMinimum * self.tile_shape_low[0] * self.tile_shape_low[1] * self.tile_shape_low[2]) self.data = { - DATA_KEY_LOW:[], - DATA_KEY_HIGH:[] + DATA_KEY_MAIN:[], + DATA_KEY_SCALED:[], + DATA_KEY_BLOCK:[], + DATA_KEY_BLOCK_OFFSET:[], + DATA_KEY_LABEL:[] } all=partTrain+partTest+partVal self.part_train=partTrain/all - self.part_test=partTest/all self.part_validation=partVal/all + self.part_test=partTest/all + + # PRINT INFO + print('\n') + #print('Dimension: {}, time dimension: {}'.format(self.dim,self.dim_t)) + print('Main data:') + print(' channel layout: {}'.format(self.c_low)) + print(' default channels: {}'.format(self.c_lists[DATA_KEY_MAIN][C_KEY_DEFAULT])) + if len(self.c_lists[DATA_KEY_MAIN][C_KEY_VELOCITY])>0: + print(' velocity channels: {}'.format(self.c_lists[DATA_KEY_MAIN][C_KEY_VELOCITY])) + if len(self.c_lists[DATA_KEY_MAIN][C_KEY_VORTICITY])>0: + print(' vorticity channels: {}'.format(self.c_lists[DATA_KEY_MAIN][C_KEY_VORTICITY])) + + print('Scaled data:') + if not useScaledData: + print(' not in use') + else: + print(' channel layout: {}'.format(self.c_high)) + print(' default channels: {}'.format(self.c_lists[DATA_KEY_SCALED][C_KEY_DEFAULT])) + if len(self.c_lists[DATA_KEY_SCALED][C_KEY_VELOCITY])>0: + print(' velocity channels: {}'.format(self.c_lists[DATA_KEY_SCALED][C_KEY_VELOCITY])) + if len(self.c_lists[DATA_KEY_SCALED][C_KEY_VORTICITY])>0: + print(' vorticity channels: {}'.format(self.c_lists[DATA_KEY_SCALED][C_KEY_VORTICITY])) + + print('Labels:') + if not useLabels: + print(' not in use') + else: + print(' active') + + print('Data Block ID:') + if not useDataBlocks: + print(' not in use') + else: + print(' active') def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): ''' @@ -214,7 +253,7 @@ def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): ** Any calculation relay on neighborhood will be wrong, for e.g., spacial scaling (zoom). """ self.aops = { - DATA_KEY_LOW:{ + DATA_KEY_MAIN:{ AOPS_KEY_ROTATE:{ C_KEY_VELOCITY:self.rotateVelocities, C_KEY_VORTICITY:self.rotateVelocities @@ -233,7 +272,7 @@ def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): } }, - DATA_KEY_HIGH:{ + DATA_KEY_SCALED:{ AOPS_KEY_ROTATE:{ C_KEY_VELOCITY:self.rotateVelocities, C_KEY_VORTICITY:self.rotateVelocities @@ -292,72 +331,152 @@ def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): - def addData(self, low, high): + def addData(self, low, high=None, blocks=None, labels=None): ''' - add data, tiles if premadeTiles, frames otherwise. - low, high: list of or single 3D data np arrays + low: list of or single 3D data np arrays + high: list of or single 3D data np arrays, optional + blocks: list of int or single int, optional + labels: arbitrary data, same amount as low, optional ''' # check data shape - low = np.asarray(low) - high = np.asarray(high) - if not self.data_flags[DATA_KEY_HIGH]['isLabel']: - if len(low.shape)!=len(high.shape): #high-low mismatch - self.TCError('Data shape mismatch. Dimensions: {} low vs {} high. Dimensions must match or use highIsLabel.'.format(len(low.shape),len(high.shape)) ) + # low data checks, low data defines input + low = np.asarray(low) if not (len(low.shape)==4 or len(low.shape)==5): #not single frame or sequence of frames - self.TCError('Input must be single 3D data or sequence of 3D data. Format: ([batch,] z, y, x, channels). For 2D use z=1.') - - if (low.shape[-1]!=(self.dim_t * self.data_flags[DATA_KEY_LOW]['channels'])): - self.TCError('Dim_t ({}) * Channels ({}, {}) configured for LOW-res data don\'t match channels ({}) of input data.'.format(self.dim_t, self.data_flags[DATA_KEY_LOW]['channels'], self.c_low, low.shape[-1]) ) - if not self.data_flags[DATA_KEY_HIGH]['isLabel']: - if (high.shape[-1]!=(self.dim_t * self.data_flags[DATA_KEY_HIGH]['channels'])): - self.TCError('Dim_t ({}) * Channels ({}, {}) configured for HIGH-res data don\'t match channels ({}) of input data.'.format(self.dim_t, self.data_flags[DATA_KEY_HIGH]['channels'], self.c_high, high.shape[-1]) ) + self.TCError('Input must be single 3D data or sequence of 3D data. Dimensions: ([batch,] z, y, x, channels). For 2D use z=1.') low_shape = low.shape - high_shape = high.shape + num_data = 1 + single_datum = True if len(low.shape)==5: #sequence - if low.shape[0]!=high.shape[0]: #check amount - self.TCError('Unequal amount of low ({}) and high ({}) data.'.format(low.shape[1], high.shape[1])) # get single data shape low_shape = low_shape[1:] - if not self.data_flags[DATA_KEY_HIGH]['isLabel']: - high_shape = high_shape[1:] - else: high_shape = [1] + num_data = low_shape[0] + single_datum = False else: #single low = [low] - high = [high] + if dataIsActive(DATA_KEY_SCALED): - if self.premadeTiles: - if not (self.dim_t == 1): - self.TCError('Currently, Dim_t = {} > 1 is not supported by premade tiles'.format(self.dim_t)) - if not np.array_equal(low_shape, self.tile_shape_low) or not np.array_equal(high_shape,self.tile_shape_high): - self.TCError('Tile shape mismatch: is - specified\n\tlow: {} - {}\n\thigh {} - {}'.format(low_shape, self.tile_shape_low, high_shape,self.tile_shape_high)) - else: - single_frame_low_shape = list(low_shape) - single_frame_high_shape = list(high_shape) - single_frame_low_shape[-1] = low_shape[-1] // self.dim_t - if not self.data_flags[DATA_KEY_HIGH]['isLabel']: - single_frame_high_shape[-1] = high_shape[-1] // self.dim_t + single_frame_low_shape = list(low_shape) + if not np.array_equal(single_frame_low_shape, self.frame_shape_low): # or not np.array_equal(single_frame_high_shape,self.frame_shape_high): + self.TCError('Low Frame shape mismatch: is {} - specified {}'.format(single_frame_low_shape, self.frame_shape_low))#, single_frame_high_shape,self.frame_shape_high)) + + # high data checks + if dataIsActive(DATA_KEY_SCALED): + if high==None: + self.TCError('High data is active but no high data was provided in addData.') + high = np.asarray(high) + if len(low.shape)!=len(high.shape): #high-low mismatch + self.TCError('Data shape mismatch. Dimensions: {} low vs {} high. Dimensions must match.'.format(len(low.shape),len(high.shape)) ) + + if (high.shape[-1]!=(self.dim_t * self.data_flags[DATA_KEY_SCALED][DATA_FLAG_CHANNELS])): + self.TCError('Dim_t ({}) * Channels ({}, {}) configured for HIGH-res data don\'t match channels ({}) of input data.'.format(self.dim_t, self.data_flags[DATA_KEY_SCALED][DATA_FLAG_CHANNELS], self.c_high, high.shape[-1]) ) + high_shape = high.shape + if single_datum: + high = [high] + else: + if low.shape[0]!=high.shape[0]: #check amount + high_shape = high_shape[1:] - if not np.array_equal(single_frame_low_shape, self.frame_shape_low) or not np.array_equal(single_frame_high_shape,self.frame_shape_high): - self.TCError('Frame shape mismatch: is - specified\n\tlow: {} - {}\n\thigh: {} - {}, given dim_t as {}'.format(single_frame_low_shape, self.frame_shape_low, single_frame_high_shape,self.frame_shape_high, self.dim_t)) - - self.data[DATA_KEY_LOW].extend(low) - self.data[DATA_KEY_HIGH].extend(high) + single_frame_high_shape = list(high_shape) + if not np.array_equal(single_frame_high_shape,self.frame_shape_high): + self.TCError('High Frame shape mismatch: is {} - specified {}'.format(single_frame_high_shape,self.frame_shape_high)) + + # block data checks + if dataIsActive(DATA_KEY_BLOCK): + if blocks==None: + self.TCError('Block data is active but no Block data was provided in addData.') + if single_datum: + if not np.isscalar(blocks): #would be a scalar in case of single datum + self.TCError('single datum input needs a scalar Block.') + label = [label] + else: + if np.isscalar(blocks) or len(blocks)!= low.shape[0]: + self.TCError('Unequal amount of low ({}) and Block data.'.format(low.shape[1]) + + + # label data checks + if dataIsActive(DATA_KEY_LABEL): + if labels==None: + self.TCError('Label data is active but no Label data was provided in addData.') + if not single_datum: + if np.isscalar(labels) or len(labels)!= low.shape[0]: + self.TCError('Unequal amount of low ({}) and Label data.'.format(low.shape[1]) + + + if not np.array_equal(single_frame_low_shape, self.frame_shape_low) or not np.array_equal(single_frame_high_shape,self.frame_shape_high): + self.TCError('Frame shape mismatch: is - specified\n\tlow: {} - {}\n\thigh: {} - {}'.format(single_frame_low_shape, self.frame_shape_low, single_frame_high_shape,self.frame_shape_high)) + + + + self.data[DATA_KEY_MAIN].extend(low) + if dataIsActive(DATA_KEY_SCALED): + self.data[DATA_KEY_SCALED].extend(high) + + if dataIsActive(DATA_KEY_BLOCK): # using blocks + self.data[DATA_KEY_BLOCK].extend(blocks) + # sort data by blocks + self.data[DATA_KEY_MAIN] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_MAIN]))] + + if dataIsActive(DATA_KEY_SCALED): + self.data[DATA_KEY_SCALED] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_SCALED]))] + + if dataIsActive(DATA_KEY_LABEL): + self.data[DATA_KEY_LABEL] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_LABEL]))] + + self.data[DATA_KEY_BLOCK] = sorted(self.data[DATA_KEY_BLOCK]) + + # blocks, offset, amount + self.data[DATA_KEY_LABEL_OFFSET] = [] + label_set = list(set(self.data[DATA_KEY_BLOCK])) + for i in range(len(label_set)-1): + idx = self.data[DATA_KEY_BLOCK].index(label_set[i]) + n_idx = self.data[DATA_KEY_BLOCK].index(label_set[i+1]) + self.data[DATA_KEY_BLOCK_OFFSET].append((label_set[i], idx, n_idx - idx)) + idx = self.data[DATA_KEY_BLOCK].index(label_set[-1]) + self.data[DATA_KEY_BLOCK_OFFSET].append((label_set[i], idx, len(self.data[DATA_KEY_BLOCK]) - idx)) + + self.data[DATA_KEY_BLOCK_OFFSET] = np.asarray(self.data[DATA_KEY_BLOCK_OFFSET]) + + print('\n') - print('Added {} datasets. Total: {}'.format(low.shape[0], len(self.data[DATA_KEY_LOW]))) + print('Added {} datasets. Total: {}'.format(low.shape[0], len(self.data[DATA_KEY_MAIN]))) self.splitSets() def splitSets(self): ''' calculate the set borders for training, testing and validation set ''' - length = len(self.data[DATA_KEY_LOW]) + length = len(self.data[DATA_KEY_MAIN]) + end_train = int( length * self.part_train ) end_test = end_train + int( length * self.part_test ) + # TODO handle block data + # if active strip whole blocks from the end + if dataIsActive(DATA_KEY_BLOCK): + ''' + # get the block the border is in + block_train = self.data[DATA_KEY_BLOCK_OFFSET][np.argmax(self.data[DATA_KEY_BLOCK_OFFSET][:,1:2] > end_train)] + block_test = self.data[DATA_KEY_BLOCK_OFFSET][np.argmax(self.data[DATA_KEY_BLOCK_OFFSET][:,1:2] > end_test)] + # get nearest block border + if block_train[2]/2 < (end_train - block_train[1]): + end_train = block_train[1] + block_train[2] + else: + end_train = block_train[1] + if block_test[2]/2 < (end_test - block_test[1]): + end_test = block_test[1] + block_test[2] + else: + end_test = block_test[1] + ''' + end_train = np.argmax(self.data[DATA_KEY_BLOCK_OFFSET][:,1:2] > end_train) + end_test = np.argmax(self.data[DATA_KEY_BLOCK_OFFSET][:,1:2] > end_test) + #(or simply split by block count...) + + #just store the borders of the different sets to avoid data duplication + # if using block data the index of the first block in DATA_KEY_BLOCK_OFFSET of the set is stored self.setBorders = [end_train, end_test, length] print('Training set: {}'.format(self.setBorders[0])) @@ -369,8 +488,11 @@ def clearData(self): clears the data buffer ''' self.data = { - DATA_KEY_LOW:[], - DATA_KEY_HIGH:[] + DATA_KEY_MAIN:[], + DATA_KEY_SCALED:[], + DATA_KEY_BLOCK:[], + DATA_KEY_BLOCK_OFFSET:[], + DATA_KEY_LABEL:[] } def createTiles(self, data, tileShape, strides=-1): @@ -391,7 +513,8 @@ def createTiles(self, data, tileShape, strides=-1): channels = dataShape[3] noTiles = [ (dataShape[0]-tileShape[0])//strides[0]+1, (dataShape[1]-tileShape[1])//strides[1]+1, (dataShape[2]-tileShape[2])//strides[2]+1 ] tiles = [] - + + # TODO support 4th T dim for tileZ in range(0, noTiles[0]): for tileY in range(0, noTiles[1]): for tileX in range(0, noTiles[2]): @@ -408,6 +531,7 @@ def cutTile(self, data, tileShape, offset=[0,0,0]): ''' cut a tile of with shape and offset ''' + # TODO support 4th T dim offset = np.asarray(offset) tileShape = np.asarray(tileShape) tileShape[-1] = data.shape[-1] @@ -425,11 +549,11 @@ def cutTile(self, data, tileShape, offset=[0,0,0]): ##################################################################################### - def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, tile_t = 1): + def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, blockSize = 1): # ,ouputBlockID=False ''' main method to create baches Return: - shape: [selectionSize, z, y, x, channels * tile_t] + shape: [selectionSize, blockSize, z, y, x, channels] if 2D z = 1 channels: density, [vel x, vel y, vel z], [pos x, pox y, pos z] ''' @@ -439,59 +563,92 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, tile_ else: if (self.setBorders[1] - self.setBorders[0])<1: self.TCError('no test data.') - if(tile_t > self.dim_t): - self.TCError('not enough coherent frames. Requested {}, available {}'.format(tile_t, self.dim_t)) - batch_low = [] - batch_high = [] + # TODO check/handle set borders + if dataIsActive(DATA_KEY_BLOCK) and blockSize > 1: + # check available label block sizes + maxBlockSize = np.max(self.data[DATA_KEY_BLOCK_OFFSET][:,2:]) + if maxBlockSize < blockSize: + self.TCError('No label block with size {} available. Max size is {}.'.format(blockSize, maxBlockSize)) + availableBlocks = np.sum(self.data[DATA_KEY_BLOCK_OFFSET][:,2:] >= blockSize) + if (availableBlocks / self.data[DATA_KEY_BLOCK_OFFSET].shape[0]) < 0.15: + self.TCInfo('only {} of {} labels have a block size at least {}.'.format(availableBlocks, self.data[DATA_KEY_BLOCK_OFFSET].shape[0], blockSize)) + + + batch = {DATA_KEY_MAIN:[]} + #batch_main = [] + if dataIsActive(DATA_KEY_SCALED): + batch[DATA_KEY_SCALED]=[] + if dataIsActive(DATA_KEY_LABEL): + batch[DATA_KEY_LABEL]=[] + #batch_scaled = [] + #batch_label = [] for i in range(selectionSize): if augment and self.useDataAug: #data augmentation - low, high = self.generateTile(isTraining, tile_t) + data = self.generateTile(isTraining, blockSize) else: #cut random tile without augmentation - low, high = self.getRandomDatum(isTraining, tile_t) - if not self.premadeTiles: - low, high = self.getRandomTile(low, high) - batch_low.append(low) - batch_high.append(high) + if augment: + self.TCInfo('Augmentation flag is ingored if data augmentation is not initialized.') + data = self.getRandomDatumDict(isTraining, blockSize) + data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED]) + + for data_key in batch: + batch[data_key].append(data[data_key]) + #batch_main.append(low) + #if dataIsActive(DATA_KEY_SCALED): + # batch_scaled.append(high) + #if dataIsActive(DATA_KEY_LABEL): + # batch_label.append(data[DATA_KEY_LABEL]) - return np.asarray(batch_low), np.asarray(batch_high) - - def generateTile(self, isTraining=True, tile_t = 1): + batch = [batch[DATA_KEY_MAIN]] + if dataIsActive(DATA_KEY_SCALED): + batch.append(batch[DATA_KEY_SCALED]) + if dataIsActive(DATA_KEY_LABEL): + batch.append(batch[DATA_KEY_LABEL]) + #if dataIsActive(DATA_KEY_BLOCK) and ouputBlockID: + # batch.append(batch[DATA_KEY_BLOCK]) + + #TODO collapse blockSize=1 ? + return batch + + def generateTile(self, isTraining=True, blockSize = 1): ''' - generates a random low-high pair of tiles (data augmentation) + generates random tiles (data augmentation) ''' # get a frame, is a copy to avoid transormations affecting the raw dataset - data = {} - data[DATA_KEY_LOW], data[DATA_KEY_HIGH] = self.getRandomDatum(isTraining, tile_t) - - if not self.premadeTiles: - #cut a tile for faster transformation - if self.do_scaling or self.do_rotation: - factor = 1 - if self.do_rotation: # or self.do_scaling: - factor*=1.5 # scaling: to avoid size errors caused by rounding - if self.do_scaling: - scaleFactor = np.random.uniform(self.scaleFactor[0], self.scaleFactor[1]) - factor/= scaleFactor - tileShapeLow = np.ceil(self.tile_shape_low*factor) - if self.dim==2: - tileShapeLow[0] = 1 - data[DATA_KEY_LOW], data[DATA_KEY_HIGH] = self.getRandomTile(data[DATA_KEY_LOW], data[DATA_KEY_HIGH], tileShapeLow.astype(int)) - - - #random scaling, changes resolution + #data = {} + #: main, [scaled, block, label] + #data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomDatum(isTraining, blockSize) + data = self.getRandomDatumDict(isTraining, blockSize) + + + #cut a tile for faster transformation + if self.do_scaling or self.do_rotation: + factor = 1 + if self.do_rotation: # or self.do_scaling: + factor*=1.5 # scaling: to avoid size errors caused by rounding if self.do_scaling: - data = self.scale(data, scaleFactor) + scaleFactor = np.random.uniform(self.scaleFactor[0], self.scaleFactor[1]) + factor/= scaleFactor + tileShapeLow = np.ceil(self.tile_shape_low*factor) + if self.dim==2: + tileShapeLow[0] = 1 + data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], tileShapeLow.astype(int)) + + + #random scaling, changes resolution + if self.do_scaling: + data = self.scale(data, scaleFactor) - bounds = np.zeros(4) + bounds = np.zeros(4) - #rotate - if self.do_rotation: - bounds = np.array(data[DATA_KEY_LOW].shape)*0.16 #bounds applied on all sides, 1.5*(1-2*0.16)~1 - data = self.rotate(data) + #rotate + if self.do_rotation: + bounds = np.array(data[DATA_KEY_MAIN].shape)*0.16 #bounds applied on all sides, 1.5*(1-2*0.16)~1 + data = self.rotate(data) - #get a tile - data[DATA_KEY_LOW], data[DATA_KEY_HIGH] = self.getRandomTile(data[DATA_KEY_LOW], data[DATA_KEY_HIGH], bounds=bounds) #includes "shifting" + #get a tile + data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], bounds=bounds) #includes "shifting" if self.do_rot90: rot = np.random.choice(self.cube_rot[self.dim]) @@ -506,31 +663,57 @@ def generateTile(self, isTraining=True, tile_t = 1): # check tile size target_shape_low = np.copy(self.tile_shape_low) - target_shape_high = np.copy(self.tile_shape_high) - target_shape_low[-1] *= tile_t - target_shape_high[-1] *= tile_t + if not np.array_equal(data[DATA_KEY_MAIN].shape,target_shape_low): + self.TCError('Wrong MAIN tile shape after data augmentation. is: {}. goal: {}.'.format(data[DATA_KEY_MAIN].shape, target_shape_low)) - if not np.array_equal(data[DATA_KEY_LOW].shape,target_shape_low) or (not np.array_equal(data[DATA_KEY_HIGH].shape,target_shape_high) and not self.data_flags[DATA_KEY_HIGH]['isLabel']): - self.TCError('Wrong tile shape after data augmentation. is: {},{}. goal: {},{}.'.format(data[DATA_KEY_LOW].shape, data[DATA_KEY_HIGH].shape, target_shape_low, target_shape_high)) + if dataIsActive(DATA_KEY_SCALED): + target_shape_high = np.copy(self.tile_shape_high) + if not np.array_equal(data[DATA_KEY_SCALED].shape,target_shape_high): + self.TCError('Wrong SCALED tile shape after data augmentation. is: {}. goal: {}.'.format(data[DATA_KEY_SCALED].shape, target_shape_high)) + + return data - return data[DATA_KEY_LOW], data[DATA_KEY_HIGH] + def getRandomDatumDict(self, isTraining=True, blockSize=1): + data = {} + data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], data[DATA_KEY_BLOCK], data[DATA_KEY_LABEL] = self.getRandomDatum(isTraining, blockSize) + return data - def getRandomDatum(self, isTraining=True, tile_t = 1): - '''returns a copy of a random frame''' - if isTraining: - randNo = randrange(0, self.setBorders[0]) - else: - randNo = randrange(self.setBorders[0], self.setBorders[1]) - randFrame = 0 - if tile_t= blockSize) + randBlock = np.random.choice(blockSet p=availableBlocks) # p=availableBlocks/np.sum(availableBlocks) ? + randOffset = randrange(0, randBlock[2] - blockSize) + randNo = randBlock[1] + randOffset + # random block, random offset in set + pass else: - tile_t = self.dim_t - - return self.getDatum(randNo*self.dim_t+randFrame, tile_t) - - def getDatum(self, index, tile_t = 1): - '''returns a copy of the indicated frame or tile''' + if blockSize!=1: + self.TCWarning('Block size is ignored if block data is inactive.') + blockSize = 1 + if isTraining: + randNo = randrange(0, self.setBorders[0]) + else: + randNo = randrange(self.setBorders[0], self.setBorders[1]) + #randFrame = 0 + #if tile_t 1): begin_ch = (index % self.dim_t) * self.tile_shape_low[-1] @@ -539,14 +722,26 @@ def getDatum(self, index, tile_t = 1): if(self.dim_t > 1): begin_ch_y = (index % self.dim_t) * self.tile_shape_high[-1] end_c_h_y = begin_ch_y + tile_t * self.tile_shape_high[-1] + ''' - if not self.data_flags[DATA_KEY_HIGH]['isLabel']: - return np.copy(self.data[DATA_KEY_LOW][index//self.dim_t][:,:,:,begin_ch:end_ch]), np.copy(self.data[DATA_KEY_HIGH][index//self.dim_t][:,:,:,begin_ch_y:end_c_h_y]) + ret = [np.copy(self.data[DATA_KEY_MAIN][index:index+blockSize])] + if dataIsActive(DATA_KEY_SCALED): + #return np.copy(self.data[DATA_KEY_MAIN][index//self.dim_t][:,:,:,begin_ch:end_ch]), np.copy(self.data[DATA_KEY_SCALED][index//self.dim_t][:,:,:,begin_ch_y:end_c_h_y]) + ret.append(np.copy(self.data[DATA_KEY_SCALED][index:index+blockSize])) else: - return np.copy(self.data[DATA_KEY_LOW][index//self.dim_t][:,:,:,begin_ch:end_ch]), np.copy(self.data[DATA_KEY_HIGH][index//self.dim_t]) + ret.append(None) + if dataIsActive(DATA_KEY_BLOCK): + ret.append(np.copy(self.data[DATA_KEY_BLOCK][index:index+blockSize])) + else: + ret.append(None) + if dataIsActive(DATA_KEY_LABEL): + ret.append(np.copy(self.data[DATA_KEY_LABEL][index:index+blockSize])) + else: + ret.append(None) + return ret - def getRandomTile(self, low, high, tileShapeLow=None, bounds=[0,0,0,0]): #bounds to avoid mirrored parts + def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #bounds to avoid mirrored parts ''' cut a random tile (low and high) from a given frame, considers densityMinimum bounds: ignore edges of frames, used to discard mirrored parts after rotation @@ -559,7 +754,7 @@ def getRandomTile(self, low, high, tileShapeLow=None, bounds=[0,0,0,0]): #bounds frameShapeLow = np.asarray(low.shape) if len(low.shape)!=4 or len(tileShapeLow)!=4: self.TCError('Data shape mismatch.') - if len(high.shape)!=4 and not self.data_flags[DATA_KEY_HIGH]['isLabel']: + if high!=None and len(high.shape)!=4: self.TCError('Data shape mismatch.') start = np.ceil(bounds) @@ -584,10 +779,10 @@ def getRandomTile(self, low, high, tileShapeLow=None, bounds=[0,0,0,0]): #bounds offset = np.asarray([randrange(start[0], end[0]), randrange(start[1], end[1]), randrange(start[2], end[2])]) lowTile = self.cutTile(low, tileShapeLow, offset) offset *= offset_up - if not self.data_flags[DATA_KEY_HIGH]['isLabel']: + if high!=None: highTile = self.cutTile(high, tileShapeHigh, offset) else: - highTile = high + highTile = None hasMinDensity = self.hasMinDensity(lowTile) i+=1 return lowTile, highTile @@ -601,11 +796,11 @@ def special_aug(self, data, ops_key, param): wrapper to call the augmentation operations specified in self.aops in initAugmentation """ for data_key in data: - if self.data_flags[data_key]['isLabel']: continue + if not dataCanAugment(data_key): continue orig_shape = data[data_key].shape - tile_t = orig_shape[-1] // self.data_flags[data_key]['channels'] + tile_t = orig_shape[-1] // self.data_flags[data_key][DATA_FLAG_CHANNELS] data_array = data[data_key] - if(tile_t > 1): data_array = data[data_key].reshape( (-1, tile_t, self.data_flags[data_key]['channels']) ) + if(tile_t > 1): data_array = data[data_key].reshape( (-1, tile_t, self.data_flags[data_key][DATA_FLAG_CHANNELS]) ) for c_key, op in self.aops[data_key][ops_key].items(): if self.data_flags[data_key][c_key]: data_array = op(data_array, self.c_lists[data_key][c_key], param) @@ -641,7 +836,7 @@ def rotate(self, data): data = self.special_aug(data, AOPS_KEY_ROTATE, rotation_matrix) for data_key in data: - if not self.data_flags[data_key]['isLabel']: + if dataCanAugment(data_key): data[data_key] = self.applyTransform(data[data_key], rotation_matrix.T) @@ -694,7 +889,7 @@ def rotate90(self, data, axes): self.TCError('need 2 axes for rotate90.') for data_key in data: - if not self.data_flags[data_key]['isLabel']: + if dataCanAugment(data_key): data[data_key] = np.rot90(data[data_key], axes=axes) data = self.special_aug(data, AOPS_KEY_ROT90, axes) @@ -725,7 +920,7 @@ def flip(self, data, axes, isFrame=True): #axes=list, flip multiple at once #flip tiles/frames for axis in axes: for data_key in data: - if not self.data_flags[data_key]['isLabel']: + if dataCanAugment(data_key): data[data_key] = np.flip(data[data_key], axis) @@ -765,8 +960,8 @@ def scale(self, data, factor): scale[0] = 1 # to ensure high/low ration stays the same - scale = np.round(np.array(data[DATA_KEY_LOW].shape) * scale )/np.array(data[DATA_KEY_LOW].shape) - if len(data[DATA_KEY_LOW].shape)==5: #frame sequence + scale = np.round(np.array(data[DATA_KEY_MAIN].shape) * scale )/np.array(data[DATA_KEY_MAIN].shape) + if len(data[DATA_KEY_MAIN].shape)==5: #frame sequence scale = np.append([1],scale) #apply transform @@ -775,7 +970,7 @@ def scale(self, data, factor): #changes the size of the frame. should work well with getRandomTile(), no bounds needed for data_key in data: - if not self.data_flags[data_key]['isLabel']: + if dataCanAugment(data_key): data[data_key] = scipy.ndimage.zoom( data[data_key], scale, order=self.interpolation_order, mode=self.fill_mode, cval=0.0) #necessary? @@ -858,10 +1053,16 @@ def hasMinDensity(self, tile): return self.getTileDensity(tile) >= (self.densityMinimum * tile.shape[0] * tile.shape[1] * tile.shape[2]) def getTileDensity(self, tile): - if self.data_flags[DATA_KEY_LOW]['channels'] > 1: + if self.data_flags[DATA_KEY_MAIN][DATA_FLAG_CHANNELS] > 1: tile = np.split(tile, [1], axis=-1)[0] return tile.sum( dtype=np.float64 ) + def dataIsActive(self, data_key): + return self.data_flags[data_key][DATA_FLAG_ACTIVE] + + def dataCanAugment(self, data_key): + return (dataIsActive(data_key) and (data_key in self.aops)) + def getFrameTiles(self, index): ''' returns the frame as tiles''' low, high = self.getDatum(index) @@ -960,8 +1161,17 @@ def parseCVorticity(self, c, i, c_types): ##################################################################################### # ERROR HANDLING ##################################################################################### + def TCInfo(self, msg): + if self.logLevel <= LOG_LEVEL_INFO: + print('TC INFO: {}'.format(msg)) + + def TCWarning(self, msg): + if self.logLevel <= LOG_LEVEL_WARNING: + print('TC WARNING: {}'.format(msg)) def TCError(self, msg): + if self.logLevel <= LOG_LEVEL_ERROR: + print('TC ERROR: {}'.format(msg)) raise TilecreatorError(msg) class TilecreatorError(Exception): @@ -1015,9 +1225,9 @@ def savePngsBatch(low,high, TC, path, batchCounter=-1, save_vels=False, dscale=1 # plot velocities , for individual samples if save_vels: for i in range(low.shape[0]): - saveVelChannels(low[i], TC.c_lists[DATA_KEY_LOW][C_KEY_VELOCITY], path=path+'low_vel_i{:02d}_'.format(i), name="", scale=vscale ) + saveVelChannels(low[i], TC.c_lists[DATA_KEY_MAIN][C_KEY_VELOCITY], path=path+'low_vel_i{:02d}_'.format(i), name="", scale=vscale ) for i in range(high.shape[0]): - saveVelChannels(high[i], TC.c_lists[DATA_KEY_HIGH][C_KEY_VELOCITY], path=path+'high_vel_i{:02d}_'.format(i), name="", scale=vscale ) + saveVelChannels(high[i], TC.c_lists[DATA_KEY_SCALED][C_KEY_VELOCITY], path=path+'high_vel_i{:02d}_'.format(i), name="", scale=vscale ) # simpler function to output multiple tiles into grayscale pngs @@ -1125,6 +1335,10 @@ def saveRGBChannels(data, path, channel_list, imageCounter=0, value_interval=[-1 img = np.concatenate([channels[i[0]], channels[i[1]], channels[i[2]]], -1) scipy.misc.toimage(img, cmin=-1.0, cmax=1.0).save(path + 'img_rgb_{:04d}.png'.format(imageCounter)) +##################################################################################### +# UNI OUTPUT +##################################################################################### + def save3DasUni(tiles, path, motherUniPath, imageCounter=0, tiles_in_image=[1,1]): ''' tiles_in_image: (y,x) @@ -1182,6 +1396,10 @@ def TDarrayToUni(input, savePath, motherUniPath, imageHeight, imageWidth, imageD uniio.writeUni(savePath, head, fixedArray) +##################################################################################### +# TEMPO BATCH CREATION +##################################################################################### + # ****************************************************************************** # faster functions, batch operations # @@ -1290,7 +1508,7 @@ def selectRandomTempoTiles(self, selectionSize, isTraining=True, augment=False, vel_pos_high_inter = None if adv_flag: # TODO check velocity channels and 3D - macgrid_input = ori_input_shape[:, :, :, :, self.c_lists[DATA_KEY_LOW][C_KEY_VELOCITY][0]] + macgrid_input = ori_input_shape[:, :, :, :, self.c_lists[DATA_KEY_MAIN][C_KEY_VELOCITY][0]] macgrid_input = macgrid_input.reshape( (real_batch_sz, self.tileSizeLow[0], self.tileSizeLow[1], self.tileSizeLow[2], 3)) dtArray = np.array([i * dt for i in range(n_t // 2, -n_t // 2, -1)] * batch_sz, dtype=np.float32) if (self.dim == 2): From 4d58ad24edfa66b84261ec513f5bd1c65f721207 Mon Sep 17 00:00:00 2001 From: Erik Franz Date: Fri, 12 Oct 2018 15:57:52 +0200 Subject: [PATCH 2/8] syntax error fixes internal changes (block data handling in tile-cutting) fixes from testing updated test script --- tensorflow/tools/tc_test.py | 38 ++++--- tensorflow/tools/tilecreator_t.py | 179 +++++++++++++++++------------- 2 files changed, 124 insertions(+), 93 deletions(-) diff --git a/tensorflow/tools/tc_test.py b/tensorflow/tools/tc_test.py index 448983e..9bda989 100644 --- a/tensorflow/tools/tc_test.py +++ b/tensorflow/tools/tc_test.py @@ -16,11 +16,14 @@ import fluiddataloader as fdl import numpy as np +dim=2 upRes=4 simSize=256 tileSize=64 recursionDepth = 0 +augment=False + sim_path = '../data_sim/' out_path = '../test_out/' @@ -32,8 +35,12 @@ highfilename = "density_high_%04d.npz" # no high res data in TC, using high data in TC's low res -highIsLabel = True -if highIsLabel: +useScaledData = True +useLabelData = False +useDataBlocks = False +blockSize = 1 + +if not useScaledData: lowfilename = highfilename highfilename = None else: @@ -43,35 +50,38 @@ #load data mfl = ["density", "velocity"]#, "density", "velocity", "density", "velocity" ] mol = [0,0] -mfh = None if highIsLabel else ["density", "velocity"] -moh = None if highIsLabel else [0,0] +mfh = ["density", "velocity"] if useScaledData else None +moh = [0,0] if useScaledData else None floader = fdl.FluidDataLoader( print_info=1, base_path=sim_path, filename=lowfilename, oldNamingScheme=False, filename_y=highfilename, filename_index_max=200, indices=dirIDs, data_fraction=0.5, multi_file_list=mfl, multi_file_idxOff=mol, multi_file_list_y=mfh , multi_file_idxOff_y=moh) x, y, xFilenames = floader.get() print(x.shape) -if not highIsLabel: print(y.shape) +if useScaledData: print(y.shape) #save ref: if False: print('Output reference') tileShape = (x.shape[0],simSize,simSize,x.shape[-1]) tiles = np.reshape(x, tileShape) tc.savePngsGrayscale(tiles[:1], out_path + 'ref_low_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[2,3]], rgb_interval=[-2,2]) - if not highIsLabel: + if useScaledData: tileShape = (y.shape[0],simSize*upRes,simSize*upRes,y.shape[-1]) tiles = np.reshape(y, tileShape) tc.savePngsGrayscale(tiles[:1], out_path + 'ref_high_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[2,3]], rgb_interval=[-2,2]) # tilecreator -TC = tc.TileCreator(tileSizeLow=tileSize, simSizeLow=simSize , dim =2, dim_t = 1,densityMinimum=0.1, upres=upRes, channelLayout_low='d,vx,vy', channelLayout_high='d,vx,vy', premadeTiles=False, highIsLabel=highIsLabel) -TC.initDataAugmentation(2) +TC = tc.TileCreator(tileSize=tileSize, simSize=simSize , dim=dim, densityMinimum=0.1, scaleFactor=upRes, channelLayout_main='d,vx,vy', channelLayout_scaled='d,vx,vy', useScaledData=useScaledData, useLabels=useLabelData, useDataBlocks=useDataBlocks) +if augment: + TC.initDataAugmentation(2) # strip zero z vel of 2D data -x,_ = np.split(x, [3], axis=-1) -if not highIsLabel: y,_ = np.split(y, [3], axis=-1) +if dim==2: + x,_ = np.split(x, [3], axis=-1) + if useScaledData: + y,_ = np.split(y, [3], axis=-1) # add low data with dummy labels -TC.addData(x,np.zeros(x.shape[0]) if highIsLabel else y) +TC.addData(x, y if useScaledData else None) #bx,by = TC.selectRandomTiles(64, True, augment=True) @@ -82,14 +92,14 @@ #test batch: if True: print('Output normal batch') - batch_x, batch_y = TC.selectRandomTiles(selectionSize = 8, augment=True, isTraining=True) + batch_x, batch_y = TC.selectRandomTiles(selectionSize = 8, augment=augment, isTraining=True) print('batch_x shape: {}'.format(batch_x.shape)) print('batch_y shape: {}'.format(batch_y.shape)) tileShape = (batch_x.shape[0],tileSize,tileSize,batch_x.shape[-1]) tiles = np.reshape(batch_x, tileShape) tc.savePngsGrayscale(tiles, out_path,imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2]) - if not highIsLabel: + if useScaledData: tileShape = (batch_y.shape[0],tileSize*upRes,tileSize*upRes,batch_y.shape[-1]) tiles = np.reshape(batch_y, tileShape) tc.savePngsGrayscale(tiles, out_path + 'high_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2]) @@ -99,7 +109,7 @@ if False: factors=[upRes**r for r in range(1, recursionDepth+1)] # can be arbitrary, 1 is always included print('Output online scaled batch. factors: {}'.format(factors)) - batch_scaled = TC.selectRandomTilesRecScale(selectionSize = 4, factors=[upRes**r for r in range(1, recursionDepth+1)], augment=True, isTraining=True) + batch_scaled = TC.selectRandomTilesRecScale(selectionSize = 4, factors=[upRes**r for r in range(1, recursionDepth+1)], augment=augment, isTraining=True) for r in range(recursionDepth +1): print('batch {}: {}'.format(r, batch_scaled[r].shape)) imgSz = batch_scaled[r].shape[2]#int((batch_scaled[r].shape[1]//4)**(1.0/2) + 0.5) diff --git a/tensorflow/tools/tilecreator_t.py b/tensorflow/tools/tilecreator_t.py index a4e64ee..4b4c8c8 100644 --- a/tensorflow/tools/tilecreator_t.py +++ b/tensorflow/tools/tilecreator_t.py @@ -63,7 +63,7 @@ class TileCreator(object): - def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, dim_t=1, overlapping=0, densityMinimum=0.02, partTrain=0.8, partTest=0.2, partVal=0, channelLayout_main=C_LAYOUT['dens_vel'], channelLayout_scaled=C_LAYOUT['dens'], useScaledData=True, useDataBlocks=False, useLabels=False, padding=0, logLevel=LOG_LEVEL_WARNING): + def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, overlapping=0, densityMinimum=0.02, partTrain=0.8, partTest=0.2, partVal=0, channelLayout_main=C_LAYOUT['dens_vel'], channelLayout_scaled=C_LAYOUT['dens'], useScaledData=True, useDataBlocks=False, useLabels=False, padding=0, logLevel=LOG_LEVEL_WARNING): ''' tileSize, simSize: int, [int,int] if 2D, [int,int,int] channelLayout: 'key,key,...' @@ -87,7 +87,7 @@ def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, dim_t=1, overlapp ''' self.logLevel = logLevel # DATA DIMENSION - self.dim_t = dim_t # same for hi_res or low_res + #self.dim_t = dim_t # same for hi_res or low_res if dim!=2 and dim!=3: self.TCError('Data dimension must be 2 or 3.') self.dim = dim @@ -144,8 +144,8 @@ def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, dim_t=1, overlapp #CHANNELS self.c_lists = {} - self.c_low, self.c_lists[DATA_KEY_MAIN] = self.parseChannels(channelLayout_low) - self.c_high, self.c_lists[DATA_KEY_SCALED] = self.parseChannels(channelLayout_high) + self.c_low, self.c_lists[DATA_KEY_MAIN] = self.parseChannels(channelLayout_main) + self.c_high, self.c_lists[DATA_KEY_SCALED] = self.parseChannels(channelLayout_scaled) #self.channels=len(self.c) @@ -164,10 +164,10 @@ def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, dim_t=1, overlapp C_KEY_VELOCITY:len(self.c_lists[DATA_KEY_SCALED][C_KEY_VELOCITY])>0, C_KEY_VORTICITY:len(self.c_lists[DATA_KEY_SCALED][C_KEY_VORTICITY])>0, C_KEY_POSITION:False - } + }, DATA_KEY_BLOCK:{ DATA_FLAG_ACTIVE:useDataBlocks - } + }, DATA_KEY_LABEL:{ DATA_FLAG_ACTIVE:useLabels } @@ -180,7 +180,7 @@ def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, dim_t=1, overlapp #DATA SHAPES self.tile_shape_low = np.append(self.tileSizeLow, [self.data_flags[DATA_KEY_MAIN][DATA_FLAG_CHANNELS]]) self.frame_shape_low = np.append(self.simSizeLow, [self.data_flags[DATA_KEY_MAIN][DATA_FLAG_CHANNELS]]) - if dataIsActive(DATA_KEY_SCALED): + if self.dataIsActive(DATA_KEY_SCALED): self.tile_shape_high = np.append(self.tileSizeHigh, [self.data_flags[DATA_KEY_SCALED][DATA_FLAG_CHANNELS]]) self.frame_shape_high = np.append(self.simSizeHigh, [self.data_flags[DATA_KEY_SCALED][DATA_FLAG_CHANNELS]]) #else: @@ -331,7 +331,7 @@ def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): - def addData(self, low, high=None, blocks=None, labels=None): + def addData(self, main, scaled=None, blocks=None, labels=None): ''' low: list of or single 3D data np arrays high: list of or single 3D data np arrays, optional @@ -341,88 +341,88 @@ def addData(self, low, high=None, blocks=None, labels=None): # check data shape # low data checks, low data defines input - low = np.asarray(low) - if not (len(low.shape)==4 or len(low.shape)==5): #not single frame or sequence of frames + main = np.asarray(main) + if not (len(main.shape)==4 or len(main.shape)==5): #not single frame or sequence of frames self.TCError('Input must be single 3D data or sequence of 3D data. Dimensions: ([batch,] z, y, x, channels). For 2D use z=1.') - low_shape = low.shape + low_shape = main.shape num_data = 1 single_datum = True - if len(low.shape)==5: #sequence + if len(main.shape)==5: #sequence # get single data shape low_shape = low_shape[1:] num_data = low_shape[0] single_datum = False else: #single - low = [low] - if dataIsActive(DATA_KEY_SCALED): + main = [main] single_frame_low_shape = list(low_shape) if not np.array_equal(single_frame_low_shape, self.frame_shape_low): # or not np.array_equal(single_frame_high_shape,self.frame_shape_high): - self.TCError('Low Frame shape mismatch: is {} - specified {}'.format(single_frame_low_shape, self.frame_shape_low))#, single_frame_high_shape,self.frame_shape_high)) - - # high data checks - if dataIsActive(DATA_KEY_SCALED): - if high==None: - self.TCError('High data is active but no high data was provided in addData.') - high = np.asarray(high) - if len(low.shape)!=len(high.shape): #high-low mismatch - self.TCError('Data shape mismatch. Dimensions: {} low vs {} high. Dimensions must match.'.format(len(low.shape),len(high.shape)) ) + self.TCError('Main data frame shape mismatch: is {} - specified {}'.format(single_frame_low_shape, self.frame_shape_low))#, single_frame_high_shape,self.frame_shape_high)) + + # scaled data checks + if self.dataIsActive(DATA_KEY_SCALED): + if scaled is None: #not type(scaled) is np.ndarray: #scaled==None: + self.TCError('Scaled data is active but no scaled data NDarray was provided in addData.') + scaled = np.asarray(scaled) + if len(main.shape)!=len(scaled.shape): #high-low mismatch + self.TCError('Data shape mismatch. Dimensions: {} main vs {} scaled. Dimensions must match.'.format(len(main.shape),len(scaled.shape)) ) - if (high.shape[-1]!=(self.dim_t * self.data_flags[DATA_KEY_SCALED][DATA_FLAG_CHANNELS])): - self.TCError('Dim_t ({}) * Channels ({}, {}) configured for HIGH-res data don\'t match channels ({}) of input data.'.format(self.dim_t, self.data_flags[DATA_KEY_SCALED][DATA_FLAG_CHANNELS], self.c_high, high.shape[-1]) ) - high_shape = high.shape + if (scaled.shape[-1]!=self.data_flags[DATA_KEY_SCALED][DATA_FLAG_CHANNELS]): + self.TCError('Channels ({}, {}) configured for SCALED data don\'t match channels ({}) of input data.'.format(self.data_flags[DATA_KEY_SCALED][DATA_FLAG_CHANNELS], self.c_high, scaled.shape[-1]) ) + high_shape = scaled.shape if single_datum: - high = [high] + scaled = [scaled] else: - if low.shape[0]!=high.shape[0]: #check amount + if main.shape[0]!=scaled.shape[0]: #check amount + self.TCError('Unequal amount of main ({}) and Scaled data.'.format(main.shape[1])) high_shape = high_shape[1:] single_frame_high_shape = list(high_shape) if not np.array_equal(single_frame_high_shape,self.frame_shape_high): - self.TCError('High Frame shape mismatch: is {} - specified {}'.format(single_frame_high_shape,self.frame_shape_high)) + self.TCError('Scaled data frame shape mismatch: is {} - specified {}'.format(single_frame_high_shape,self.frame_shape_high)) # block data checks - if dataIsActive(DATA_KEY_BLOCK): - if blocks==None: + if self.dataIsActive(DATA_KEY_BLOCK): + if blocks is None: self.TCError('Block data is active but no Block data was provided in addData.') if single_datum: if not np.isscalar(blocks): #would be a scalar in case of single datum self.TCError('single datum input needs a scalar Block.') label = [label] else: - if np.isscalar(blocks) or len(blocks)!= low.shape[0]: - self.TCError('Unequal amount of low ({}) and Block data.'.format(low.shape[1]) + if np.isscalar(blocks) or len(blocks)!= main.shape[0]: + self.TCError('Unequal amount of main ({}) and Block data.'.format(main.shape[1])) # label data checks - if dataIsActive(DATA_KEY_LABEL): - if labels==None: + if self.dataIsActive(DATA_KEY_LABEL): + if labels is None: self.TCError('Label data is active but no Label data was provided in addData.') if not single_datum: - if np.isscalar(labels) or len(labels)!= low.shape[0]: - self.TCError('Unequal amount of low ({}) and Label data.'.format(low.shape[1]) + if np.isscalar(labels) or len(labels)!= main.shape[0]: + self.TCError('Unequal amount of main ({}) and Label data.'.format(main.shape[1])) - if not np.array_equal(single_frame_low_shape, self.frame_shape_low) or not np.array_equal(single_frame_high_shape,self.frame_shape_high): + if (not np.array_equal(single_frame_low_shape, self.frame_shape_low) or not np.array_equal(single_frame_high_shape,self.frame_shape_high)): self.TCError('Frame shape mismatch: is - specified\n\tlow: {} - {}\n\thigh: {} - {}'.format(single_frame_low_shape, self.frame_shape_low, single_frame_high_shape,self.frame_shape_high)) - self.data[DATA_KEY_MAIN].extend(low) - if dataIsActive(DATA_KEY_SCALED): - self.data[DATA_KEY_SCALED].extend(high) + self.data[DATA_KEY_MAIN].extend(main) + if self.dataIsActive(DATA_KEY_SCALED): + self.data[DATA_KEY_SCALED].extend(scaled) - if dataIsActive(DATA_KEY_BLOCK): # using blocks + if self.dataIsActive(DATA_KEY_BLOCK): # using blocks self.data[DATA_KEY_BLOCK].extend(blocks) # sort data by blocks self.data[DATA_KEY_MAIN] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_MAIN]))] - if dataIsActive(DATA_KEY_SCALED): + if self.dataIsActive(DATA_KEY_SCALED): self.data[DATA_KEY_SCALED] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_SCALED]))] - if dataIsActive(DATA_KEY_LABEL): + if self.dataIsActive(DATA_KEY_LABEL): self.data[DATA_KEY_LABEL] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_LABEL]))] self.data[DATA_KEY_BLOCK] = sorted(self.data[DATA_KEY_BLOCK]) @@ -441,7 +441,7 @@ def addData(self, low, high=None, blocks=None, labels=None): print('\n') - print('Added {} datasets. Total: {}'.format(low.shape[0], len(self.data[DATA_KEY_MAIN]))) + print('Added {} datasets. Total: {}'.format(main.shape[0], len(self.data[DATA_KEY_MAIN]))) self.splitSets() def splitSets(self): @@ -455,7 +455,7 @@ def splitSets(self): end_test = end_train + int( length * self.part_test ) # TODO handle block data # if active strip whole blocks from the end - if dataIsActive(DATA_KEY_BLOCK): + if self.dataIsActive(DATA_KEY_BLOCK): ''' # get the block the border is in block_train = self.data[DATA_KEY_BLOCK_OFFSET][np.argmax(self.data[DATA_KEY_BLOCK_OFFSET][:,1:2] > end_train)] @@ -530,17 +530,18 @@ def createTiles(self, data, tileShape, strides=-1): def cutTile(self, data, tileShape, offset=[0,0,0]): ''' cut a tile of with shape and offset + data shape: (block,z,y,x,c), tileShape: (z,y,x,c) ''' # TODO support 4th T dim offset = np.asarray(offset) tileShape = np.asarray(tileShape) tileShape[-1] = data.shape[-1] - if np.less(data.shape[:3], tileShape[:3]+offset[:3]).any(): - self.TCError('Can\'t cut tile with shape {} and offset{} from data with shape {}.'.format(tileShape, offset, data.shape)) + if np.less(data.shape[1:4], tileShape[:3]+offset[:3]).any(): + self.TCError('Can\'t cut tile with shape {} and offset{} from data with shape {}.'.format(tileShape, offset, data.shape[1:])) - tile = data[offset[0]:offset[0]+tileShape[0], offset[1]:offset[1]+tileShape[1], offset[2]:offset[2]+tileShape[2], :] + tile = data[:, offset[0]:offset[0]+tileShape[0], offset[1]:offset[1]+tileShape[1], offset[2]:offset[2]+tileShape[2], :] - if not np.array_equal(tile.shape,tileShape): + if not np.array_equal(tile.shape[1:],tileShape): self.TCError('Wrong tile shape after cutting. is: {}. goal: {}.'.format(tile.shape,tileShape)) return tile @@ -564,7 +565,10 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, block if (self.setBorders[1] - self.setBorders[0])<1: self.TCError('no test data.') # TODO check/handle set borders - if dataIsActive(DATA_KEY_BLOCK) and blockSize > 1: + if not self.dataIsActive(DATA_KEY_BLOCK) and blockSize > 1: + self.TCWarning('Block data is inactive, blockSize {} will be ignored.'.format(blockSize)) + blockSize = 1 + if self.dataIsActive(DATA_KEY_BLOCK) and blockSize > 1: # check available label block sizes maxBlockSize = np.max(self.data[DATA_KEY_BLOCK_OFFSET][:,2:]) if maxBlockSize < blockSize: @@ -576,9 +580,9 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, block batch = {DATA_KEY_MAIN:[]} #batch_main = [] - if dataIsActive(DATA_KEY_SCALED): + if self.dataIsActive(DATA_KEY_SCALED): batch[DATA_KEY_SCALED]=[] - if dataIsActive(DATA_KEY_LABEL): + if self.dataIsActive(DATA_KEY_LABEL): batch[DATA_KEY_LABEL]=[] #batch_scaled = [] #batch_label = [] @@ -594,21 +598,31 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, block for data_key in batch: batch[data_key].append(data[data_key]) #batch_main.append(low) - #if dataIsActive(DATA_KEY_SCALED): + #if self.dataIsActive(DATA_KEY_SCALED): # batch_scaled.append(high) - #if dataIsActive(DATA_KEY_LABEL): + #if self.dataIsActive(DATA_KEY_LABEL): # batch_label.append(data[DATA_KEY_LABEL]) - batch = [batch[DATA_KEY_MAIN]] - if dataIsActive(DATA_KEY_SCALED): - batch.append(batch[DATA_KEY_SCALED]) - if dataIsActive(DATA_KEY_LABEL): - batch.append(batch[DATA_KEY_LABEL]) - #if dataIsActive(DATA_KEY_BLOCK) and ouputBlockID: - # batch.append(batch[DATA_KEY_BLOCK]) + #TODO collapse blockSize=1 + if not self.dataIsActive(DATA_KEY_BLOCK): + #(tiles, block, z,y,x,c) + batch[DATA_KEY_MAIN] = np.squeeze(batch[DATA_KEY_MAIN], axis=1) + if self.dataIsActive(DATA_KEY_SCALED): + batch[DATA_KEY_SCALED] = np.squeeze(batch[DATA_KEY_SCALED], axis=1) + + + + ret_list = [batch[DATA_KEY_MAIN]] + if self.dataIsActive(DATA_KEY_SCALED): + ret_list.append(batch[DATA_KEY_SCALED]) + if self.dataIsActive(DATA_KEY_LABEL): + ret_list.append(batch[DATA_KEY_LABEL]) + #if self.dataIsActive(DATA_KEY_BLOCK) and ouputBlockID: + # ret_list.append(batch[DATA_KEY_BLOCK]) - #TODO collapse blockSize=1 ? - return batch + + + return ret_list def generateTile(self, isTraining=True, blockSize = 1): ''' @@ -666,7 +680,7 @@ def generateTile(self, isTraining=True, blockSize = 1): if not np.array_equal(data[DATA_KEY_MAIN].shape,target_shape_low): self.TCError('Wrong MAIN tile shape after data augmentation. is: {}. goal: {}.'.format(data[DATA_KEY_MAIN].shape, target_shape_low)) - if dataIsActive(DATA_KEY_SCALED): + if self.dataIsActive(DATA_KEY_SCALED): target_shape_high = np.copy(self.tile_shape_high) if not np.array_equal(data[DATA_KEY_SCALED].shape,target_shape_high): self.TCError('Wrong SCALED tile shape after data augmentation. is: {}. goal: {}.'.format(data[DATA_KEY_SCALED].shape, target_shape_high)) @@ -680,14 +694,14 @@ def getRandomDatumDict(self, isTraining=True, blockSize=1): def getRandomDatum(self, isTraining=True, blockSize = 1): '''returns a copy of a random frame: main, scaled, block, label''' - if dataIsActive(DATA_KEY_BLOCK): + if self.dataIsActive(DATA_KEY_BLOCK): # find block with sufficient size (should be garanteed by caller that there is one) if isTraining: blockSet = self.data[DATA_KEY_BLOCK_OFFSET][0:self.setBorders[0]] else: blockSet = self.data[DATA_KEY_BLOCK_OFFSET][self.setBorders[0]:self.setBorders[1]] availableBlocks = np.where(blockSet[:,2:] >= blockSize) - randBlock = np.random.choice(blockSet p=availableBlocks) # p=availableBlocks/np.sum(availableBlocks) ? + randBlock = np.random.choice(blockSet, p=availableBlocks) # p=availableBlocks/np.sum(availableBlocks) ? randOffset = randrange(0, randBlock[2] - blockSize) randNo = randBlock[1] + randOffset # random block, random offset in set @@ -725,16 +739,16 @@ def getDatum(self, index, blockSize = 1): ''' ret = [np.copy(self.data[DATA_KEY_MAIN][index:index+blockSize])] - if dataIsActive(DATA_KEY_SCALED): + if self.dataIsActive(DATA_KEY_SCALED): #return np.copy(self.data[DATA_KEY_MAIN][index//self.dim_t][:,:,:,begin_ch:end_ch]), np.copy(self.data[DATA_KEY_SCALED][index//self.dim_t][:,:,:,begin_ch_y:end_c_h_y]) ret.append(np.copy(self.data[DATA_KEY_SCALED][index:index+blockSize])) else: ret.append(None) - if dataIsActive(DATA_KEY_BLOCK): + if self.dataIsActive(DATA_KEY_BLOCK): ret.append(np.copy(self.data[DATA_KEY_BLOCK][index:index+blockSize])) else: ret.append(None) - if dataIsActive(DATA_KEY_LABEL): + if self.dataIsActive(DATA_KEY_LABEL): ret.append(np.copy(self.data[DATA_KEY_LABEL][index:index+blockSize])) else: ret.append(None) @@ -744,6 +758,7 @@ def getDatum(self, index, blockSize = 1): def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #bounds to avoid mirrored parts ''' cut a random tile (low and high) from a given frame, considers densityMinimum + input array shape is (block, z,y,x,c), tile shape is (z,y,x,c) bounds: ignore edges of frames, used to discard mirrored parts after rotation ''' @@ -751,12 +766,12 @@ def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #b tileShapeLow = np.copy(self.tile_shape_low) # use copy is very important!!! tileShapeHigh = tileShapeLow*self.upres - frameShapeLow = np.asarray(low.shape) - if len(low.shape)!=4 or len(tileShapeLow)!=4: - self.TCError('Data shape mismatch.') - if high!=None and len(high.shape)!=4: - self.TCError('Data shape mismatch.') - + if len(low.shape)!=5 or len(tileShapeLow)!=4: + self.TCErrorInternal('Data shape mismatch.') + if (not high is None) and len(high.shape)!=5: + self.TCErrorInternal('Data shape mismatch.') + + frameShapeLow = np.asarray(low.shape[1:]) start = np.ceil(bounds) end = frameShapeLow - tileShapeLow + np.ones(4) - start @@ -770,7 +785,7 @@ def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #b # check if possible to cut tile if np.amin((end-start)[:3]) < 0: - self.TCError('Can\'t cut tile {} from frame {} with bounds {}.'.format(tileShapeLow, frameShapeLow, start)) + self.TCErrorInternal('Can\'t cut tile {} from frame {} with bounds {}.'.format(tileShapeLow, frameShapeLow, start)) # cut tile hasMinDensity = False @@ -779,7 +794,7 @@ def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #b offset = np.asarray([randrange(start[0], end[0]), randrange(start[1], end[1]), randrange(start[2], end[2])]) lowTile = self.cutTile(low, tileShapeLow, offset) offset *= offset_up - if high!=None: + if high is not None: highTile = self.cutTile(high, tileShapeHigh, offset) else: highTile = None @@ -1061,7 +1076,7 @@ def dataIsActive(self, data_key): return self.data_flags[data_key][DATA_FLAG_ACTIVE] def dataCanAugment(self, data_key): - return (dataIsActive(data_key) and (data_key in self.aops)) + return (self.dataIsActive(data_key) and (data_key in self.aops)) def getFrameTiles(self, index): ''' returns the frame as tiles''' @@ -1174,6 +1189,12 @@ def TCError(self, msg): print('TC ERROR: {}'.format(msg)) raise TilecreatorError(msg) + + def TCErrorInternal(self, msg): + if self.logLevel <= LOG_LEVEL_ERROR: + print('TC INTERNAL ERROR: {}'.format(msg)) + raise TilecreatorError(msg) + class TilecreatorError(Exception): ''' Tilecreator errors ''' From 97509e12120b67b2ff1f7bac052fc75463e9ab95 Mon Sep 17 00:00:00 2001 From: Erik Franz Date: Thu, 1 Nov 2018 15:56:17 +0100 Subject: [PATCH 3/8] added option to selectRandomTiles to remove unused (size=1) dimensions added function to extract data blocks from channels (as provided by fluiddataloader) renamed savePngsGrayscale to savePngs (as it is no longer grayscale only) added different data format support to image output (savePngs) savePngs now returns an updated imageCounter value fixed augmentation handling of block data fixes from testing paramhelper: implemented missing flush function for logger added error logger added a small guide for the TileCreator --- tensorflow/tools/README_TileCreator.txt | 60 ++++ tensorflow/tools/paramhelpers.py | 17 +- tensorflow/tools/tc_test.py | 142 +++++--- tensorflow/tools/tilecreator_t.py | 411 ++++++++++++++++-------- 4 files changed, 454 insertions(+), 176 deletions(-) create mode 100644 tensorflow/tools/README_TileCreator.txt diff --git a/tensorflow/tools/README_TileCreator.txt b/tensorflow/tools/README_TileCreator.txt new file mode 100644 index 0000000..ec4aa5d --- /dev/null +++ b/tensorflow/tools/README_TileCreator.txt @@ -0,0 +1,60 @@ + +TileCreator +create tiles from 2D or 3D simulation data (or other structured data) with different channels. +data augmentation: scaling, shifting and rotation. scaling and rotation is also applied to vector data + +optional second set of sturctured data with dimensionality and size matching the main data + +arbitrary label alongside the structured data + + + + +1. Setup +import tilecreator_t as tc +TC = tc.TileCreator(tileSize, simSize=64, dim=2, densityMinimum=0.02, channelLayout_main=C_LAYOUT['dens_vel'], useScaledData=True, channelLayout_scaled=C_LAYOUT['dens'], scaleFactor=2, useDataBlocks=False, useLabels=False, partTrain=0.8, partTest=0.2, partVal=0, logLevel=LOG_LEVEL_WARNING) + +tileSize: size of the tiles to create. must be less or equal the simulation size. Assumed to have enough bounds for rotation augmentation if active. (previously lowTileSize) +simSize: size of the input simulation data. (previously lowSimSize) +dim: dimension of main (and scaled) data. can be 2 or 3. +densityMinimum: minimum avg. density in a tile. To prevent generating empty tiles +channelLayout_main: what type of data the different channels contain as a comma separaed sring of channel keys. used in augmentation. Keys: + d: default. not augmented + v: vector data. needs 2 (2D; x,y) or 3 (2D, 3D; x,y,z) components with matching labels. the order does not matter and one empty label is accepted. format: v[label](x|y|z). examples: '...,vx,vy,vz,...', 'd,vVELx,vVELy,d,vVortz,vVortx,d,vVorty' + +useScaledData: an optional second structured dataset with a fixed scaling factor to the main data. will be augmented to fit the augmentation of the main data (e.g. same rotation) +channelLayout_scaled: same as channelLayout_main for active scaled data +scaleFactor: the scaling factor between main and scaled data, can be 1 + +useDataBlocks: an optional grouping of data using block ids. the ids have to be provided when adding data to the tilecrator. enables the creation of blocks of tiles with matching augmentation (i.e. like an additional dimension that is not agmented). can be used to create augmented (time-) sequences of data +useLabels: an optional set of data that is not augmented. no type or structure is assumed +partTrain, partTest (, partVal): the relative sizes of data sets for train and testing mode (machine learing). val data is currently unused and inaccessible and should be left at 0. +logLevel: how much information to print. + +1.1 Setup Data Augmentation +TC.initDataAugmentation(rot=2, minScale=0.85, maxScale=1.15 ,flip=True) + +rot: type of rotation augmentation (NOT an angle limit). 1: fixed 90° roatations, 2: full rotation (assumes enough space for boundaries in data), else: no rotation) +minScale, maxScale: limits for scaling. set both to 1 to disable scaling. +flip: flipping (mirroring) of data + +2. Adding data +TC.addData(main, scaled=None, labels=None, blocks=None) + +main: the main data, must match simSize and channels specified in the constructor +scaled: required when useScaledData==True, ignored otherwise. must match simSize*scaleFactor and channels specified in the constructor. must be the same amount as the main data. +labels: required when useLabels==True, ignored otherwise. list/iterable of arbitrary data. must be the same amount as the main data. +blocks: required when useDataBlocks==True, ignored otherwise. list of block ids (int). can be unsorted. will be sorted according to the id, the order of data with the same id (within the same block) is preserved. + +3. Batch creation +TC.selectRandomTiles(selectionSize, isTraining=True, augment=False, blockSize = 1, squeezeZ=False, squeezeBlocks=True) + +selectionSize: number of tiles to create +isTraining: whether to use data from the training or testing set +augment: whether to augment the data. requires data augmentation to be initialized (TC.initDataAugmentation). +blockSize: what block size to use if block data is active. ignored otherwise +squeezeZ: whether to squeeze/collapse the z dimension/axis of main (and scaled) data when using 2D data. +squeezeBlocks: whether to squeeze/collapse the block dimension/axis of main (and scaled) when block size is 1 or block data is inactive + +returns: +main[,scaled][,labels]: main and scaled are np.ndarray with shape: n[,b][,z],y,x,c with z,y,x mathching the (scaled) tile size and c channels. labels is a list \ No newline at end of file diff --git a/tensorflow/tools/paramhelpers.py b/tensorflow/tools/paramhelpers.py index e92d606..93c1f03 100644 --- a/tensorflow/tools/paramhelpers.py +++ b/tensorflow/tools/paramhelpers.py @@ -90,6 +90,19 @@ def write(self, message): self.log.write(message) def flush(self): - # to avoid errormsg, " AttributeError: 'Logger' object has no attribute 'flush' " - pass + self.terminal.flush() + self.log.flush() + +class ErrorLogger(object): + def __init__(self, test_path): + self.terminal = sys.stderr + self.log = open(test_path + "logfile.log", "a") + + def write(self, message): + self.terminal.write(message) + self.log.write(message) + + def flush(self): + self.terminal.flush() + self.log.flush() diff --git a/tensorflow/tools/tc_test.py b/tensorflow/tools/tc_test.py index 9bda989..2572436 100644 --- a/tensorflow/tools/tc_test.py +++ b/tensorflow/tools/tc_test.py @@ -12,20 +12,51 @@ #****************************************************************************** import os,sys +from itertools import repeat import tilecreator_t as tc import fluiddataloader as fdl import numpy as np +import paramhelpers as ph -dim=2 -upRes=4 -simSize=256 -tileSize=64 -recursionDepth = 0 -augment=False +out_path = ph.getParam( "basePath", '../test_out/' ) +sim_path = ph.getParam( "basePath", '../data_sim/' ) +randSeed = int(ph.getParam( "randSeed", 1 )) # seed for np and tf initialization + +simSize = int(ph.getParam( "simSize", 256 )) # tiles of low res sim +tileSize = int(ph.getParam( "tileSize", 64 )) # size of low res tiles +upRes = int(ph.getParam( "upRes", 4 )) # single generator scaling factor +dim = int(ph.getParam( "dim", 2 )) # dimension of dataset + +augment = int(ph.getParam( "aug", 1 )) # use dataAugmentation or not + +# no high res data in TC, using high data in TC's low res +useScaledData = int(ph.getParam( "scaled", 0 )) +useLabelData = int(ph.getParam( "label", 0 )) +useDataBlocks = int(ph.getParam( "block", 0 )) +blockSize = int(ph.getParam( "blockSize", 1 )) + +batchCount = int(ph.getParam( "batchCount", 1 )) + + +ph.checkUnusedParams() +np.random.seed(randSeed) +#tf.set_random_seed(randSeed) + -sim_path = '../data_sim/' -out_path = '../test_out/' +#if not os.path.exists(out_path): +# os.makedirs(out_path) +test_path,_ = ph.getNextTestPath(0, out_path) +sys.stdout = ph.Logger(test_path) +sys.stderr = ph.ErrorLogger(test_path) + +print('') +print('--- TEST STARTED ---') +print('') + +print("\nUsing parameters:\n"+ph.paramsToString()) + +recursionDepth = 0 fromSim = 1018 toSim = fromSim @@ -34,11 +65,6 @@ lowfilename = "density_low_%04d.npz" highfilename = "density_high_%04d.npz" -# no high res data in TC, using high data in TC's low res -useScaledData = True -useLabelData = False -useDataBlocks = False -blockSize = 1 if not useScaledData: lowfilename = highfilename @@ -48,29 +74,54 @@ tileSize = tileSize//upRes #load data -mfl = ["density", "velocity"]#, "density", "velocity", "density", "velocity" ] -mol = [0,0] -mfh = ["density", "velocity"] if useScaledData else None -moh = [0,0] if useScaledData else None +mfl = ["density", "velocity"] if not useDataBlocks else ["density", "velocity", "density", "velocity", "density", "velocity" ] +mol = [0,0] if not useDataBlocks else [0,0,1,1,2,2] +mfh = None +moh = None +if useScaledData: + mfh = ["density", "velocity"] if not useDataBlocks else ["density", "velocity", "density", "velocity", "density", "velocity" ] + moh = [0,0] if not useDataBlocks else [0,0,1,1,2,2] floader = fdl.FluidDataLoader( print_info=1, base_path=sim_path, filename=lowfilename, oldNamingScheme=False, filename_y=highfilename, filename_index_max=200, indices=dirIDs, data_fraction=0.5, multi_file_list=mfl, multi_file_idxOff=mol, multi_file_list_y=mfh , multi_file_idxOff_y=moh) x, y, xFilenames = floader.get() -print(x.shape) -if useScaledData: print(y.shape) +tile_format='NYXC' +z_axis = 1 +if useDataBlocks: + tile_format='NBYXC' + z_axis = 2 + +print('Loaded x shape: {}'.format(x.shape)) +if useScaledData: print('Loaded y shape: {}'.format(y.shape)) +l=None +if useLabelData: + l = list(range(len(x))) + print('\tUsing label data:\n{}'.format(l)) +b=None +if useDataBlocks: + b = [i for item in range(len(x)) for i in repeat(item, 3)] + x = tc.blockFromChannelsToSequence(x, 3) + print('\tUsing block data:\n{}'.format(b)) + print('Extracted blocks x shape: {}'.format(x.shape)) + if useScaledData: + y = tc.blockFromChannelsToSequence(y, 3) + print('Extracted blocks y shape: {}'.format(y.shape)) + if useLabelData: + l = b[:] + #save ref: if False: print('Output reference') tileShape = (x.shape[0],simSize,simSize,x.shape[-1]) tiles = np.reshape(x, tileShape) - tc.savePngsGrayscale(tiles[:1], out_path + 'ref_low_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[2,3]], rgb_interval=[-2,2]) + tc.savePngs(tiles[:1], test_path + 'ref_low_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[2,3]], rgb_interval=[-2,2]) if useScaledData: tileShape = (y.shape[0],simSize*upRes,simSize*upRes,y.shape[-1]) tiles = np.reshape(y, tileShape) - tc.savePngsGrayscale(tiles[:1], out_path + 'ref_high_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[2,3]], rgb_interval=[-2,2]) + tc.savePngs(tiles[:1], test_path + 'ref_high_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[2,3]], rgb_interval=[-2,2]) # tilecreator -TC = tc.TileCreator(tileSize=tileSize, simSize=simSize , dim=dim, densityMinimum=0.1, scaleFactor=upRes, channelLayout_main='d,vx,vy', channelLayout_scaled='d,vx,vy', useScaledData=useScaledData, useLabels=useLabelData, useDataBlocks=useDataBlocks) +TC = tc.TileCreator(tileSize=tileSize, simSize=simSize , dim=dim, densityMinimum=0.1, scaleFactor=upRes, channelLayout_main='d,vx,vy', channelLayout_scaled='d,vx,vy', useScaledData=useScaledData, useLabels=useLabelData, useDataBlocks=useDataBlocks, logLevel=10) if augment: TC.initDataAugmentation(2) @@ -81,28 +132,39 @@ y,_ = np.split(y, [3], axis=-1) # add low data with dummy labels -TC.addData(x, y if useScaledData else None) +TC.addData(x, y if useScaledData else None, l, b) #bx,by = TC.selectRandomTiles(64, True, augment=True) -if not os.path.exists(out_path): - os.makedirs(out_path) #test batch: if True: - print('Output normal batch') - batch_x, batch_y = TC.selectRandomTiles(selectionSize = 8, augment=augment, isTraining=True) - print('batch_x shape: {}'.format(batch_x.shape)) - print('batch_y shape: {}'.format(batch_y.shape)) - - tileShape = (batch_x.shape[0],tileSize,tileSize,batch_x.shape[-1]) - tiles = np.reshape(batch_x, tileShape) - tc.savePngsGrayscale(tiles, out_path,imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2]) - if useScaledData: - tileShape = (batch_y.shape[0],tileSize*upRes,tileSize*upRes,batch_y.shape[-1]) - tiles = np.reshape(batch_y, tileShape) - tc.savePngsGrayscale(tiles, out_path + 'high_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2]) + imageCounter=0 + for batch_number in range(batchCount): + i=0 + print('\nOutput batch {}'.format(batch_number)) + batch = TC.selectRandomTiles(selectionSize = 8, augment=augment, isTraining=True, blockSize=blockSize, squeezeZ=True) + print('batch_x shape: {}'.format(batch[0].shape)) + + #tileShape = (batch[0].shape[0],tileSize,tileSize,batch[0].shape[-1]) + tiles = batch[i] + i+=1 + #print('tiles_x shape: {}'.format(tiles.shape)) + ic=tc.savePngs(tiles, test_path, tile_format=tile_format,imageCounter=imageCounter, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2]) + if useScaledData: + print('batch_y shape: {}'.format(batch[1].shape)) + #tileShape = (batch[1].shape[0],tileSize*upRes,tileSize*upRes,batch[1].shape[-1]) + tiles = batch[i] + i+=1 + #print('tiles_y shape: {}'.format(tiles.shape)) + tc.savePngs(tiles, test_path + 'high_', tile_format=tile_format, imageCounter=imageCounter, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2]) + if useLabelData: + tiles = batch[i] + i+=1 + print('labels: {}'.format(tiles)) + print('-> images {} to {}'.format(imageCounter, ic-1)) + imageCounter=ic #test online scaled batch # NOT YET IMPLEMENTED @@ -116,6 +178,8 @@ tileShape = (batch_scaled[r].shape[0], imgSz,imgSz,3) tile = np.reshape(batch_scaled[r], tileShape) print('rec {}: tile shape: {}'.format(r, tile.shape)) - tc.savePngsGrayscale(tile, out_path + 'rec_{}_'.format(r),imageCounter=0, tiles_in_image=[1,1], channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2], plot_vel_x_y=False) - + tc.savePngsGrayscale(tile, test_path + 'rec_{}_'.format(r),imageCounter=0, tiles_in_image=[1,1], channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2], plot_vel_x_y=False) +print('') +print('--- TEST FINISHED ---') +print('') diff --git a/tensorflow/tools/tilecreator_t.py b/tensorflow/tools/tilecreator_t.py index 4b4c8c8..270f667 100644 --- a/tensorflow/tools/tilecreator_t.py +++ b/tensorflow/tools/tilecreator_t.py @@ -36,13 +36,16 @@ DATA_KEY_MAIN = 0 DATA_KEY_SCALED = 1 -DATA_KEY_BLOCK = 2 -DATA_KEY_BLOCK_OFFSET = 3 -DATA_KEY_LABEL= 4 +DATA_KEY_LABEL= 2 +DATA_KEY_BLOCK = 3 +DATA_KEY_BLOCK_OFFSET = 4 DATA_FLAG_ACTIVE='active' DATA_FLAG_CHANNELS='channels' +DATA_DIM_LENGTH_SEQUENCE = 5 # seq,z,y,x,c +DATA_DIM_LENGTH_SINGLE = 4 # z,y,x,c + #keys for augmentation operations AOPS_KEY_ROTATE = 'rot' AOPS_KEY_SCALE = 'scale' @@ -57,13 +60,14 @@ 'dens_vel':'d,vx,vy,vz' } -LOG_LEVEL_INFO=0 +LOG_LEVEL_ERROR=0 LOG_LEVEL_WARNING=1 -LOG_LEVEL_ERROR=2 +LOG_LEVEL_INFO=2 +LOG_LEVEL_DEBUG=3 class TileCreator(object): - def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, overlapping=0, densityMinimum=0.02, partTrain=0.8, partTest=0.2, partVal=0, channelLayout_main=C_LAYOUT['dens_vel'], channelLayout_scaled=C_LAYOUT['dens'], useScaledData=True, useDataBlocks=False, useLabels=False, padding=0, logLevel=LOG_LEVEL_WARNING): + def __init__(self, tileSize, simSize=64, dim=2, densityMinimum=0.02, channelLayout_main=C_LAYOUT['dens_vel'], useScaledData=True, channelLayout_scaled=C_LAYOUT['dens'], scaleFactor=2, useDataBlocks=False, useLabels=False, partTrain=0.8, partTest=0.2, partVal=0, logLevel=LOG_LEVEL_WARNING): ''' tileSize, simSize: int, [int,int] if 2D, [int,int,int] channelLayout: 'key,key,...' @@ -118,21 +122,23 @@ def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, overlapping=0, de self.TCError('Simulation size mismatch.') self.simSizeLow = np.asarray(self.simSizeLow) + if self.dim==2: + self.tileSizeLow[0]=1 + self.simSizeLow[0]=1 + #if scaleFactor < 1: # self.TCError('Upres must be at least 1.') self.upres = scaleFactor if useScaledData: self.tileSizeHigh = self.tileSizeLow*scaleFactor self.simSizeHigh = self.simSizeLow*scaleFactor + if self.dim==2: + self.tileSizeHigh[0]=1 + self.simSizeHigh[0]=1 #else: # self.tileSizeHigh = np.asarray([1]) # self.simSizeHigh = np.asarray([1]) - if self.dim==2: - self.tileSizeLow[0]=1 - self.tileSizeHigh[0]=1 - self.simSizeLow[0]=1 - self.simSizeHigh[0]=1 if np.less(self.simSizeLow, self.tileSizeLow).any(): self.TCError('Tile size {} can not be larger than sim size {}, {}.'.format(self.tileSizeLow,self.simSizeLow)) @@ -165,14 +171,14 @@ def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, overlapping=0, de C_KEY_VORTICITY:len(self.c_lists[DATA_KEY_SCALED][C_KEY_VORTICITY])>0, C_KEY_POSITION:False }, - DATA_KEY_BLOCK:{ - DATA_FLAG_ACTIVE:useDataBlocks - }, DATA_KEY_LABEL:{ DATA_FLAG_ACTIVE:useLabels + }, + DATA_KEY_BLOCK:{ + DATA_FLAG_ACTIVE:useDataBlocks } } - self.padding=padding + self.padding= 0 #padding #no longer used #if self.hasPN: #[z,y,x, velocities an/or position if enabled (density,vel,vel,vel, pos, pos [,pos])] @@ -192,9 +198,9 @@ def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, overlapping=0, de self.data = { DATA_KEY_MAIN:[], DATA_KEY_SCALED:[], + DATA_KEY_LABEL:[], DATA_KEY_BLOCK:[], - DATA_KEY_BLOCK_OFFSET:[], - DATA_KEY_LABEL:[] + DATA_KEY_BLOCK_OFFSET:[] } all=partTrain+partTest+partVal @@ -203,43 +209,50 @@ def __init__(self, tileSize, simSize=64, scaleFactor=2, dim=2, overlapping=0, de self.part_test=partTest/all # PRINT INFO - print('\n') + TC_setup_info = '\n' + def addInfoLine(line): + nonlocal TC_setup_info + TC_setup_info += line + '\n' + #print('Dimension: {}, time dimension: {}'.format(self.dim,self.dim_t)) - print('Main data:') - print(' channel layout: {}'.format(self.c_low)) - print(' default channels: {}'.format(self.c_lists[DATA_KEY_MAIN][C_KEY_DEFAULT])) + addInfoLine('Main data:') + + addInfoLine(' channel layout: {}'.format(self.c_low)) + addInfoLine(' default channels: {}'.format(self.c_lists[DATA_KEY_MAIN][C_KEY_DEFAULT])) if len(self.c_lists[DATA_KEY_MAIN][C_KEY_VELOCITY])>0: - print(' velocity channels: {}'.format(self.c_lists[DATA_KEY_MAIN][C_KEY_VELOCITY])) + addInfoLine(' velocity channels: {}'.format(self.c_lists[DATA_KEY_MAIN][C_KEY_VELOCITY])) if len(self.c_lists[DATA_KEY_MAIN][C_KEY_VORTICITY])>0: - print(' vorticity channels: {}'.format(self.c_lists[DATA_KEY_MAIN][C_KEY_VORTICITY])) + addInfoLine(' vorticity channels: {}'.format(self.c_lists[DATA_KEY_MAIN][C_KEY_VORTICITY])) - print('Scaled data:') + addInfoLine('Scaled data:') if not useScaledData: - print(' not in use') + addInfoLine(' not in use') else: - print(' channel layout: {}'.format(self.c_high)) - print(' default channels: {}'.format(self.c_lists[DATA_KEY_SCALED][C_KEY_DEFAULT])) + addInfoLine(' channel layout: {}'.format(self.c_high)) + addInfoLine(' default channels: {}'.format(self.c_lists[DATA_KEY_SCALED][C_KEY_DEFAULT])) if len(self.c_lists[DATA_KEY_SCALED][C_KEY_VELOCITY])>0: - print(' velocity channels: {}'.format(self.c_lists[DATA_KEY_SCALED][C_KEY_VELOCITY])) + addInfoLine(' velocity channels: {}'.format(self.c_lists[DATA_KEY_SCALED][C_KEY_VELOCITY])) if len(self.c_lists[DATA_KEY_SCALED][C_KEY_VORTICITY])>0: - print(' vorticity channels: {}'.format(self.c_lists[DATA_KEY_SCALED][C_KEY_VORTICITY])) + addInfoLine(' vorticity channels: {}'.format(self.c_lists[DATA_KEY_SCALED][C_KEY_VORTICITY])) - print('Labels:') + addInfoLine('Labels:') if not useLabels: - print(' not in use') + addInfoLine(' not in use') else: - print(' active') + addInfoLine(' active') - print('Data Block ID:') + addInfoLine('Data Block ID:') if not useDataBlocks: - print(' not in use') + addInfoLine(' not in use') else: - print(' active') + addInfoLine(' active') + + self.TCInfo(TC_setup_info) def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): ''' set up data augmentation - rot: 1: 90 degree rotations; 2: full rotation; else: nop rotation + rot: 1: 90 degree rotations; 2: full rotation; else: no rotation Scale: if both 1 disable scaling ''' self.useDataAug = True @@ -292,7 +305,7 @@ def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): } } - msg = 'data augmentation: ' + msg = 'setup data augmentation: ' if rot==2: self.do_rotation = True @@ -325,13 +338,13 @@ def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): if self.do_flip: msg += 'flip' msg += '.' - print(msg) + self.TCInfo(msg) self.interpolation_order = 1 self.fill_mode = 'constant' - def addData(self, main, scaled=None, blocks=None, labels=None): + def addData(self, main, scaled=None, labels=None, blocks=None): ''' low: list of or single 3D data np arrays high: list of or single 3D data np arrays, optional @@ -389,10 +402,12 @@ def addData(self, main, scaled=None, blocks=None, labels=None): if single_datum: if not np.isscalar(blocks): #would be a scalar in case of single datum self.TCError('single datum input needs a scalar Block.') - label = [label] + blocks = [blocks] else: - if np.isscalar(blocks) or len(blocks)!= main.shape[0]: - self.TCError('Unequal amount of main ({}) and Block data.'.format(main.shape[1])) + if np.isscalar(blocks): + self.TCError('Block data must be an array-like') + if len(blocks)!= main.shape[0]: + self.TCError('Unequal amount of main ({}) and Block ({}) data.'.format(main.shape[0], len(blocks))) # label data checks @@ -405,19 +420,21 @@ def addData(self, main, scaled=None, blocks=None, labels=None): - if (not np.array_equal(single_frame_low_shape, self.frame_shape_low) or not np.array_equal(single_frame_high_shape,self.frame_shape_high)): - self.TCError('Frame shape mismatch: is - specified\n\tlow: {} - {}\n\thigh: {} - {}'.format(single_frame_low_shape, self.frame_shape_low, single_frame_high_shape,self.frame_shape_high)) + #if (not np.array_equal(single_frame_low_shape, self.frame_shape_low) or not np.array_equal(single_frame_high_shape,self.frame_shape_high)): + # self.TCError('Frame shape mismatch: is - specified\n\tlow: {} - {}\n\thigh: {} - {}'.format(single_frame_low_shape, self.frame_shape_low, single_frame_high_shape,self.frame_shape_high)) self.data[DATA_KEY_MAIN].extend(main) if self.dataIsActive(DATA_KEY_SCALED): self.data[DATA_KEY_SCALED].extend(scaled) + if self.dataIsActive(DATA_KEY_LABEL): + self.data[DATA_KEY_LABEL].extend(labels) if self.dataIsActive(DATA_KEY_BLOCK): # using blocks self.data[DATA_KEY_BLOCK].extend(blocks) # sort data by blocks - self.data[DATA_KEY_MAIN] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_MAIN]))] + self.data[DATA_KEY_MAIN] = [x for (_, x) in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_MAIN]), key= lambda i: i[0])] if self.dataIsActive(DATA_KEY_SCALED): self.data[DATA_KEY_SCALED] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_SCALED]))] @@ -428,7 +445,7 @@ def addData(self, main, scaled=None, blocks=None, labels=None): self.data[DATA_KEY_BLOCK] = sorted(self.data[DATA_KEY_BLOCK]) # blocks, offset, amount - self.data[DATA_KEY_LABEL_OFFSET] = [] + self.data[DATA_KEY_BLOCK_OFFSET] = [] label_set = list(set(self.data[DATA_KEY_BLOCK])) for i in range(len(label_set)-1): idx = self.data[DATA_KEY_BLOCK].index(label_set[i]) @@ -439,9 +456,13 @@ def addData(self, main, scaled=None, blocks=None, labels=None): self.data[DATA_KEY_BLOCK_OFFSET] = np.asarray(self.data[DATA_KEY_BLOCK_OFFSET]) + self.TCDebug('Created data blocks: [id, offset, size]\n{}'.format(self.data[DATA_KEY_BLOCK_OFFSET])) - print('\n') - print('Added {} datasets. Total: {}'.format(main.shape[0], len(self.data[DATA_KEY_MAIN]))) + #print('\n') + if not self.dataIsActive(DATA_KEY_BLOCK): + self.TCInfo('Added {} datasets. Total: {}'.format(main.shape[0], len(self.data[DATA_KEY_MAIN]))) + else: + self.TCInfo('Added {} datasets. Total: {}, Blocks: {}'.format(main.shape[0], len(self.data[DATA_KEY_MAIN]), len(self.data[DATA_KEY_BLOCK_OFFSET]))) self.splitSets() def splitSets(self): @@ -455,6 +476,7 @@ def splitSets(self): end_test = end_train + int( length * self.part_test ) # TODO handle block data # if active strip whole blocks from the end + self.TCDebug('set borders {}'.format([end_train, end_test, length])) if self.dataIsActive(DATA_KEY_BLOCK): ''' # get the block the border is in @@ -470,8 +492,19 @@ def splitSets(self): else: end_test = block_test[1] ''' - end_train = np.argmax(self.data[DATA_KEY_BLOCK_OFFSET][:,1:2] > end_train) - end_test = np.argmax(self.data[DATA_KEY_BLOCK_OFFSET][:,1:2] > end_test) + length = len(self.data[DATA_KEY_BLOCK_OFFSET]) + end_train = (self.data[DATA_KEY_BLOCK_OFFSET][:,1:2] > end_train) + if end_train.any(): + end_train = np.argmax(end_train) + else: + end_train = length + end_test = (self.data[DATA_KEY_BLOCK_OFFSET][:,1:2] > end_test) + if end_test.any(): + end_test = np.argmax(end_test) + else: + end_test = length + + self.TCDebug('set borders blocks {}'.format([end_train, end_test, length])) #(or simply split by block count...) @@ -479,9 +512,10 @@ def splitSets(self): # if using block data the index of the first block in DATA_KEY_BLOCK_OFFSET of the set is stored self.setBorders = [end_train, end_test, length] - print('Training set: {}'.format(self.setBorders[0])) - print('Testing set: {}'.format(self.setBorders[1]-self.setBorders[0])) - print('Validation set: {}'.format(self.setBorders[2]-self.setBorders[1])) + self.TCInfo('Set sizes: training: {}, validation: {}, testing: {}, '.format(self.setBorders[0], self.setBorders[1]-self.setBorders[0], self.setBorders[2]-self.setBorders[1])) + #print('Training set: {}'.format(self.setBorders[0])) + #print('Testing set: {}'.format(self.setBorders[1]-self.setBorders[0])) + #print('Validation set: {}'.format(self.setBorders[2]-self.setBorders[1])) def clearData(self): ''' @@ -494,6 +528,7 @@ def clearData(self): DATA_KEY_BLOCK_OFFSET:[], DATA_KEY_LABEL:[] } + self.TCInfo('Data cleared') def createTiles(self, data, tileShape, strides=-1): ''' @@ -550,12 +585,13 @@ def cutTile(self, data, tileShape, offset=[0,0,0]): ##################################################################################### - def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, blockSize = 1): # ,ouputBlockID=False + def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, blockSize = 1, squeezeZ=False, squeezeBlocks=True): # ,ouputBlockID=False ''' main method to create baches Return: - shape: [selectionSize, blockSize, z, y, x, channels] - if 2D z = 1 + shape: selectionSize,[ blockSize,][ z,] y, x, channels + if 2D z = 1 unless squeezeZ is set + block dimension is removed by default when block data is not active channels: density, [vel x, vel y, vel z], [pos x, pox y, pos z] ''' if isTraining: @@ -564,7 +600,7 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, block else: if (self.setBorders[1] - self.setBorders[0])<1: self.TCError('no test data.') - # TODO check/handle set borders + # check/handle set borders if not self.dataIsActive(DATA_KEY_BLOCK) and blockSize > 1: self.TCWarning('Block data is inactive, blockSize {} will be ignored.'.format(blockSize)) blockSize = 1 @@ -575,44 +611,51 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, block self.TCError('No label block with size {} available. Max size is {}.'.format(blockSize, maxBlockSize)) availableBlocks = np.sum(self.data[DATA_KEY_BLOCK_OFFSET][:,2:] >= blockSize) if (availableBlocks / self.data[DATA_KEY_BLOCK_OFFSET].shape[0]) < 0.15: - self.TCInfo('only {} of {} labels have a block size at least {}.'.format(availableBlocks, self.data[DATA_KEY_BLOCK_OFFSET].shape[0], blockSize)) + self.TCInfo('only {} of {} data blocks have a block size of at least {}.'.format(availableBlocks, self.data[DATA_KEY_BLOCK_OFFSET].shape[0], blockSize)) batch = {DATA_KEY_MAIN:[]} - #batch_main = [] if self.dataIsActive(DATA_KEY_SCALED): batch[DATA_KEY_SCALED]=[] if self.dataIsActive(DATA_KEY_LABEL): batch[DATA_KEY_LABEL]=[] - #batch_scaled = [] - #batch_label = [] for i in range(selectionSize): if augment and self.useDataAug: #data augmentation data = self.generateTile(isTraining, blockSize) else: #cut random tile without augmentation if augment: - self.TCInfo('Augmentation flag is ingored if data augmentation is not initialized.') + self.TCWarning('Augmentation flag is ingored if data augmentation is not initialized.') data = self.getRandomDatumDict(isTraining, blockSize) data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED]) - for data_key in batch: batch[data_key].append(data[data_key]) - #batch_main.append(low) - #if self.dataIsActive(DATA_KEY_SCALED): - # batch_scaled.append(high) - #if self.dataIsActive(DATA_KEY_LABEL): - # batch_label.append(data[DATA_KEY_LABEL]) - - #TODO collapse blockSize=1 - if not self.dataIsActive(DATA_KEY_BLOCK): + + batch[DATA_KEY_MAIN] = np.asarray(batch[DATA_KEY_MAIN]) + if self.dataIsActive(DATA_KEY_SCALED): + batch[DATA_KEY_SCALED] = np.asarray(batch[DATA_KEY_SCALED]) + + if self.dim==2 and squeezeZ: + TCDebug('squeeze z dimension') + #(tiles, block, z,y,x,c) + batch[DATA_KEY_MAIN] = np.squeeze(batch[DATA_KEY_MAIN], axis=2) + if self.dataIsActive(DATA_KEY_SCALED): + batch[DATA_KEY_SCALED] = np.squeeze(batch[DATA_KEY_SCALED], axis=2) + + #collapse blockSize=1 + if squeezeBlocks and blockSize==1: + TCDebug('squeeze block dimension') #(tiles, block, z,y,x,c) batch[DATA_KEY_MAIN] = np.squeeze(batch[DATA_KEY_MAIN], axis=1) if self.dataIsActive(DATA_KEY_SCALED): batch[DATA_KEY_SCALED] = np.squeeze(batch[DATA_KEY_SCALED], axis=1) + # for labels too? + elif squeezeBlocks: + self.TCWarning('can\'t squeeze block dimension with size {}.'.format(blockSize)) + #self.TCInfo('batch shape: {}'.format(batch[DATA_KEY_MAIN].shape)) - ret_list = [batch[DATA_KEY_MAIN]] + ret_list = [np.asarray(batch[DATA_KEY_MAIN])] if self.dataIsActive(DATA_KEY_SCALED): ret_list.append(batch[DATA_KEY_SCALED]) if self.dataIsActive(DATA_KEY_LABEL): @@ -658,7 +701,7 @@ def generateTile(self, isTraining=True, blockSize = 1): #rotate if self.do_rotation: - bounds = np.array(data[DATA_KEY_MAIN].shape)*0.16 #bounds applied on all sides, 1.5*(1-2*0.16)~1 + bounds = np.array(self.getTileShape(data[DATA_KEY_MAIN]))*0.16 #bounds applied on all sides, 1.5*(1-2*0.16)~1 data = self.rotate(data) #get a tile @@ -677,13 +720,13 @@ def generateTile(self, isTraining=True, blockSize = 1): # check tile size target_shape_low = np.copy(self.tile_shape_low) - if not np.array_equal(data[DATA_KEY_MAIN].shape,target_shape_low): - self.TCError('Wrong MAIN tile shape after data augmentation. is: {}. goal: {}.'.format(data[DATA_KEY_MAIN].shape, target_shape_low)) + if not np.array_equal(self.getTileShape(data[DATA_KEY_MAIN]),target_shape_low): + self.TCErrorInternal('Wrong MAIN tile shape after data augmentation. is: {}. goal: {}.'.format(data[DATA_KEY_MAIN].shape, target_shape_low)) if self.dataIsActive(DATA_KEY_SCALED): target_shape_high = np.copy(self.tile_shape_high) - if not np.array_equal(data[DATA_KEY_SCALED].shape,target_shape_high): - self.TCError('Wrong SCALED tile shape after data augmentation. is: {}. goal: {}.'.format(data[DATA_KEY_SCALED].shape, target_shape_high)) + if not np.array_equal(self.getTileShape(data[DATA_KEY_SCALED]),target_shape_high): + self.TCErrorInternal('Wrong SCALED tile shape after data augmentation. is: {}. goal: {}.'.format(data[DATA_KEY_SCALED].shape, target_shape_high)) return data @@ -700,9 +743,11 @@ def getRandomDatum(self, isTraining=True, blockSize = 1): blockSet = self.data[DATA_KEY_BLOCK_OFFSET][0:self.setBorders[0]] else: blockSet = self.data[DATA_KEY_BLOCK_OFFSET][self.setBorders[0]:self.setBorders[1]] - availableBlocks = np.where(blockSet[:,2:] >= blockSize) - randBlock = np.random.choice(blockSet, p=availableBlocks) # p=availableBlocks/np.sum(availableBlocks) ? - randOffset = randrange(0, randBlock[2] - blockSize) + availableBlocks = np.resize(np.where(blockSet[:,2] >= blockSize), (-1)) + #self.TCInfo('available blocks {}'.format(availableBlocks)) + randBlock = np.random.choice(availableBlocks) #len(blockSet), p=availableBlocks/np.sum(availableBlocks)) # p=availableBlocks/np.sum(availableBlocks) ? + randBlock = blockSet[randBlock] + randOffset = randrange(0, randBlock[2] - (blockSize-1)) randNo = randBlock[1] + randOffset # random block, random offset in set pass @@ -749,7 +794,7 @@ def getDatum(self, index, blockSize = 1): else: ret.append(None) if self.dataIsActive(DATA_KEY_LABEL): - ret.append(np.copy(self.data[DATA_KEY_LABEL][index:index+blockSize])) + ret.append(self.data[DATA_KEY_LABEL][index:index+blockSize]) else: ret.append(None) return ret @@ -765,11 +810,12 @@ def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #b if tileShapeLow is None: tileShapeLow = np.copy(self.tile_shape_low) # use copy is very important!!! tileShapeHigh = tileShapeLow*self.upres - - if len(low.shape)!=5 or len(tileShapeLow)!=4: - self.TCErrorInternal('Data shape mismatch.') - if (not high is None) and len(high.shape)!=5: - self.TCErrorInternal('Data shape mismatch.') + if not self.isValidMainTileShape(tileShapeLow): + self.TCErrorInternal('Invalid tile shape') + if not self.isFrameSequence(low): #len(low.shape)!=5 or len(tileShapeLow)!=4: + self.TCErrorInternal('MAIN data is no sequence') + if (not high is None) and not self.isFrameSequence(high): + self.TCErrorInternal('SCALED data is no sequence') frameShapeLow = np.asarray(low.shape[1:]) start = np.ceil(bounds) @@ -811,7 +857,7 @@ def special_aug(self, data, ops_key, param): wrapper to call the augmentation operations specified in self.aops in initAugmentation """ for data_key in data: - if not dataCanAugment(data_key): continue + if not self.dataCanAugment(data_key): continue orig_shape = data[data_key].shape tile_t = orig_shape[-1] // self.data_flags[data_key][DATA_FLAG_CHANNELS] data_array = data[data_key] @@ -827,6 +873,8 @@ def rotate(self, data): random uniform rotation of low and high data of a given frame ''' #check if single frame + if len(data[DATA_KEY_MAIN].shape)==4: + pass #2D: if self.dim==2: @@ -851,8 +899,12 @@ def rotate(self, data): data = self.special_aug(data, AOPS_KEY_ROTATE, rotation_matrix) for data_key in data: - if dataCanAugment(data_key): - data[data_key] = self.applyTransform(data[data_key], rotation_matrix.T) + if self.dataCanAugment(data_key): + if self.isFrameSequence(data[data_key]): + for i in range(len(data[data_key])): + data[data_key][i]=self.applyTransform(data[data_key][i], rotation_matrix.T) + else: + data[data_key] = self.applyTransform(data[data_key], rotation_matrix.T) return data @@ -877,7 +929,7 @@ def rotateVelocities(self, datum, c_list, rotationMatrix): rotation2 = rotationMatrix[1:3, 1:3] channels = np.split(datum, datum.shape[-1], -1) for v in c_list: - if len(v) == 3: # currently always ends here!! even for 2D, #z,y,x to match rotation matrix + if len(v) == 3: #z,y,x to match rotation matrix vel = np.stack([channels[v[2]].flatten(),channels[v[1]].flatten(),channels[v[0]].flatten()]) vel = rotation3.dot(vel) channels[v[2]] = np.reshape(vel[0], channels[v[2]].shape) @@ -904,7 +956,7 @@ def rotate90(self, data, axes): self.TCError('need 2 axes for rotate90.') for data_key in data: - if dataCanAugment(data_key): + if self.dataCanAugment(data_key): data[data_key] = np.rot90(data[data_key], axes=axes) data = self.special_aug(data, AOPS_KEY_ROT90, axes) @@ -928,14 +980,14 @@ def flip(self, data, axes, isFrame=True): #axes=list, flip multiple at once axes: list of axis indices 0,1,2-> z,y,x ''' # axis: 0,1,2 -> z,y,x - - if not isFrame: - axes = np.asarray(axes) + np.ones(axes.shape) + axes = np.asarray(axes, dtype=np.int32) + if self.isFrameSequence(data[DATA_KEY_MAIN]): # not isFrame: #only for dims, not vectors + axes_m = axes + np.ones(axes.shape, dtype=np.int32) #flip tiles/frames - for axis in axes: + for axis in axes_m: for data_key in data: - if dataCanAugment(data_key): + if self.dataCanAugment(data_key): data[data_key] = np.flip(data[data_key], axis) @@ -974,10 +1026,10 @@ def scale(self, data, factor): if self.dim==2: scale[0] = 1 - # to ensure high/low ration stays the same - scale = np.round(np.array(data[DATA_KEY_MAIN].shape) * scale )/np.array(data[DATA_KEY_MAIN].shape) if len(data[DATA_KEY_MAIN].shape)==5: #frame sequence scale = np.append([1],scale) + # to ensure high/low ration stays the same + scale = np.round(np.array(data[DATA_KEY_MAIN].shape) * scale )/np.array(data[DATA_KEY_MAIN].shape) #apply transform #low = self.applyTransform(low, zoom_matrix) @@ -985,7 +1037,7 @@ def scale(self, data, factor): #changes the size of the frame. should work well with getRandomTile(), no bounds needed for data_key in data: - if dataCanAugment(data_key): + if self.dataCanAugment(data_key): data[data_key] = scipy.ndimage.zoom( data[data_key], scale, order=self.interpolation_order, mode=self.fill_mode, cval=0.0) #necessary? @@ -1007,7 +1059,10 @@ def scaleVelocities(self, datum, c_list, factor): def applyTransform(self, data, transform_matrix, data_dim=3): # change axis order from z,y,x to x,y,z? (invert axis order +channel) if len(data.shape)!=4: - self.TCError('Data shape mismatch.') + if len(data.shape)!=5: + self.TCErrorInternal('Data shape mismatch.') + else: # data sequence + self.TCErrorInternal('TODO: implement.') #set transform to center; from fluiddatagenerator.py offset = np.array(data.shape) / 2 - np.array([0.5, 0.5, 0.5, 0]) @@ -1078,6 +1133,29 @@ def dataIsActive(self, data_key): def dataCanAugment(self, data_key): return (self.dataIsActive(data_key) and (data_key in self.aops)) + def checkDims(self, shape, dim): + return len(shape)==dim + + def isFrameSequence(self, data): + return len(data.shape)==DATA_DIM_LENGTH_SEQUENCE + #return checkDims(data.shape, DATA_DIM_LENGTH_SEQUENCE) + + def isFrameSingle(self, data): + return len(data.shape)==DATA_DIM_LENGTH_SINGLE + + def isValidMainTileShape(self, shape): + if len(shape)!=DATA_DIM_LENGTH_SINGLE: return False + # smaller than frame + self.frame_shape_low + if np.less(self.simSizeLow, shape[:-1]).any(): return False + return True + + def getTileShape(self, data): + if not type(data) is np.ndarray: self.TCError('Can\'t get tile shape from not np-array data') + if self.isFrameSequence(data): return data.shape[1:] + elif self.isFrameSingle(data): return data.shape[:] + else: self.TCError('Can\'t get tile shape from data with shape {}'.format(data.shape)) + def getFrameTiles(self, index): ''' returns the frame as tiles''' low, high = self.getDatum(index) @@ -1176,24 +1254,27 @@ def parseCVorticity(self, c, i, c_types): ##################################################################################### # ERROR HANDLING ##################################################################################### - def TCInfo(self, msg): - if self.logLevel <= LOG_LEVEL_INFO: - print('TC INFO: {}'.format(msg)) - - def TCWarning(self, msg): - if self.logLevel <= LOG_LEVEL_WARNING: - print('TC WARNING: {}'.format(msg)) + def TCErrorInternal(self, msg): + if self.logLevel >= LOG_LEVEL_ERROR: + print('TC INTERNAL ERROR: {}'.format(msg)) + raise TilecreatorError(msg) def TCError(self, msg): - if self.logLevel <= LOG_LEVEL_ERROR: + if self.logLevel >= LOG_LEVEL_ERROR: print('TC ERROR: {}'.format(msg)) raise TilecreatorError(msg) - - def TCErrorInternal(self, msg): - if self.logLevel <= LOG_LEVEL_ERROR: - print('TC INTERNAL ERROR: {}'.format(msg)) - raise TilecreatorError(msg) + def TCWarning(self, msg): + if self.logLevel >= LOG_LEVEL_WARNING: + print('TC WARNING: {}'.format(msg)) + + def TCInfo(self, msg): + if self.logLevel >= LOG_LEVEL_INFO: + print('TC INFO: {}'.format(msg)) + + def TCDebug(self, msg): + if self.logLevel >= LOG_LEVEL_DEBUG: + print('TC DEBUG: {}'.format(msg)) class TilecreatorError(Exception): ''' Tilecreator errors ''' @@ -1239,9 +1320,9 @@ def savePngsBatch(low,high, TC, path, batchCounter=-1, save_vels=False, dscale=1 # note - outputs all channels as images, also vel channels... clout = np.arange(low.shape[4]) - savePngsGrayscale(lowD, path+'low_'+suff, tiles_in_image=[batch//tileX,tileX], channels=clout ) + savePngs(lowD, path+'low_'+suff, tiles_in_image=[batch//tileX,tileX], channels=clout ) chout = np.arange(high.shape[4]) - savePngsGrayscale(tiles=highD, path=path+'high_'+suff, imageCounter=0, tiles_in_image=[batch//tileX,tileX], channels=chout ) + savePngs(tiles=highD, path=path+'high_'+suff, imageCounter=0, tiles_in_image=[batch//tileX,tileX], channels=chout ) # plot velocities , for individual samples if save_vels: @@ -1252,17 +1333,39 @@ def savePngsBatch(low,high, TC, path, batchCounter=-1, save_vels=False, dscale=1 # simpler function to output multiple tiles into grayscale pngs -def savePngsGrayscale(tiles, path, imageCounter=0, tiles_in_image=[1,1], channels=[0], save_gif=False, plot_vel_x_y=False, save_rgb=None, rgb_interval=[-1,1]): +def savePngs(tiles, path, tile_format='NYXC', imageCounter=0, tiles_in_image=[1,1], channels=[0], save_gif=False, plot_vel_x_y=False, save_rgb=None, rgb_interval=[-1,1]): ''' + tiles: np-array with shape matching format param + tile_format: YXC, NYXC, BYXC, NBYXC tiles_in_image: (y,x) - tiles: shape: (tile,y,x,c) + + return: updated image counter ''' + tile_format = tile_format.lower() + valid_formats = ['yx','yxc','nyx','nyxc','byxc', 'nbyxc'] + full_format = valid_formats[-1] + tiles = np.asarray(tiles) + shape = tiles.shape + if (not tile_format in valid_formats) or len(shape)!=len(tile_format): + msg = 'Invalid tile format \'{}\' for tiles with shape {}'.format(tile_format, shape) + print('TC ERROR: {}'.format(msg)) + return tilesInImage = tiles_in_image[0]*tiles_in_image[1] - if len(tiles)%tilesInImage!=0: - print('ERROR: number of tiles does not match tiles per image') + + if shape[0]%tilesInImage!=0: + msg = 'Number of tiles does not match tiles per image' + print('TC ERROR: {}'.format(msg)) return - tiles = np.asarray(tiles) - noImages = len(tiles)//tilesInImage + + tileShape = [1,1,1,1,1] + for dim in tile_format: + tileShape[full_format.index(dim)] = shape[tile_format.index(dim)] + + # to full 'nbyxc' format + tiles = np.reshape(tiles, tileShape) + + + noImages = shape[0]//tilesInImage if save_gif: gif=[] @@ -1273,20 +1376,30 @@ def savePngsGrayscale(tiles, path, imageCounter=0, tiles_in_image=[1,1], channel offset=image*tilesInImage + y*tiles_in_image[1] img.append(np.concatenate(tiles[offset:offset+tiles_in_image[1]],axis=1)) #combine x img = np.concatenate(img, axis=0) #combine y - # move channels to first dim. - img_c = np.rollaxis(img, -1, 0) - if len(img_c)>1 and (plot_vel_x_y or save_rgb!=None): - if plot_vel_x_y: saveVel(img, path, imageCounter+image) - if save_rgb!=None: saveRGBChannels(img,path, save_rgb,value_interval=rgb_interval, imageCounter=imageCounter+image) - if len(channels) == 1: - scipy.misc.toimage(img_c[channels[0]], cmin=0.0, cmax=1.0).save(path + 'img_{:04d}.png'.format(imageCounter*noImages+image)) - else: - for i in channels: - scipy.misc.toimage(img_c[i], cmin=0.0, cmax=1.0).save(path + 'img_{:04d}_c{:04d}.png'.format(imageCounter*noImages+image, i)) + # format from here: byxc + block_count = len(img) + for block_number in range(len(img)): + block = img[block_number] #actually one image-slice of the block + # format from here: yxc + # move channels to first dim. + img_c = np.rollaxis(block, -1, 0) + img_name = 'img_{:04d}'.format(imageCounter+image) + if block_count>1: + img_name += '_b{:04d}'.format(block_number) + + if len(img_c)>1 and (plot_vel_x_y or save_rgb!=None): + if plot_vel_x_y: saveVel(block, path, name=img_name+'_vel-x-y.png') + if save_rgb!=None: saveRGBChannels(block,path, save_rgb,value_interval=rgb_interval, filename=img_name + '_rgb_{:04d}.png') + if len(channels) == 1: + scipy.misc.toimage(img_c[channels[0]], cmin=0.0, cmax=1.0).save(path + '{}.png'.format(img_name)) + else: + for i in channels: + scipy.misc.toimage(img_c[i], cmin=0.0, cmax=1.0).save(path + '{}_c{:04d}.png'.format(img_name, i)) + return imageCounter + noImages # store velocity as quiver plot -def saveVel(tile, path, imageCounter=0, name='vel-x-y'): +def saveVel(tile, path, name='vel-x-y'): # origin is in upper left corner, transform acordingly y, x = np.mgrid[-tile.shape[0]:0, 0:tile.shape[1]] vx = None; vy = None @@ -1301,7 +1414,7 @@ def saveVel(tile, path, imageCounter=0, name='vel-x-y'): if found_matplotlib: matplotlib.pyplot.quiver(x,y,vx.flatten(),vy.flatten(), units = 'xy', scale = 1) matplotlib.pyplot.axis('equal') - matplotlib.pyplot.savefig(path + '{}_{:04d}.png'.format(name,imageCounter)) + matplotlib.pyplot.savefig(path + name) matplotlib.pyplot.clf() # save velocity channels from the tilecreator with multiple axis projections (uses saveVel) @@ -1338,7 +1451,7 @@ def saveVelChannels(data, c_idx, path, average=False, scale=1.0, normalize=True, saveVel(vavg, path=vpath, name='_yz' ) -def saveRGBChannels(data, path, channel_list, imageCounter=0, value_interval=[-1,1]): +def saveRGBChannels(data, path, channel_list, value_interval=[-1,1], filename = 'img_rgb_{:04d}.png'): """ data: shape[y,x,c] channels: list of triples of channel ids saved as RGB image @@ -1349,12 +1462,14 @@ def saveRGBChannels(data, path, channel_list, imageCounter=0, value_interval=[-1 num_channels = data.shape[-1] channels = np.split(data, num_channels, -1) + c = 0 for i in channel_list: if len(i)==2: img = np.concatenate([channels[i[0]], channels[i[1]], np.ones_like(channels[i[0]])*cmin], -1) else: img = np.concatenate([channels[i[0]], channels[i[1]], channels[i[2]]], -1) - scipy.misc.toimage(img, cmin=-1.0, cmax=1.0).save(path + 'img_rgb_{:04d}.png'.format(imageCounter)) + scipy.misc.toimage(img, cmin=-1.0, cmax=1.0).save(path + filename.format(c)) + c+=1 ##################################################################################### # UNI OUTPUT @@ -1421,6 +1536,32 @@ def TDarrayToUni(input, savePath, motherUniPath, imageHeight, imageWidth, imageD # TEMPO BATCH CREATION ##################################################################################### +def blockFromChannelsToSequence(data, blockLength): + ''' + assumes channels is last dimension, channels of a block are consecutive (blocks of channels) + separate blocks from channels + moved blocks to new first dimension + merges blocks into sequence + ''' + shape = list(data.shape) + if shape[-1] % blockLength != 0: + pass #error + num_channels = shape[-1]//blockLength + shape_block = shape + [num_channels] # actual channels + shape_block[-2] = blockLength + data = np.reshape(data, shape_block) + data = np.rollaxis(data, -2, 1) # blocks after sequence (sequence of blocks) + shape[0] *= blockLength # blocks into sequence + shape[-1] = num_channels + data = np.reshape(data, (shape)) + return data + +def blockToChannels(data): + ''' + merge blocks to channels + ''' + pass + # ****************************************************************************** # faster functions, batch operations # From ad6c5d6d9e10b1c66cd25760bc662e7e7f2f28ed Mon Sep 17 00:00:00 2001 From: thunil Date: Fri, 16 Nov 2018 19:52:21 +0100 Subject: [PATCH 4/8] python2 fixes, smaller formatting changes --- tensorflow/tools/README_TileCreator.txt | 62 ++++++++++++++++++------- tensorflow/tools/tc_test.py | 19 ++++---- tensorflow/tools/tilecreator_t.py | 10 ++-- 3 files changed, 58 insertions(+), 33 deletions(-) diff --git a/tensorflow/tools/README_TileCreator.txt b/tensorflow/tools/README_TileCreator.txt index ec4aa5d..6d30e3e 100644 --- a/tensorflow/tools/README_TileCreator.txt +++ b/tensorflow/tools/README_TileCreator.txt @@ -1,40 +1,57 @@ TileCreator create tiles from 2D or 3D simulation data (or other structured data) with different channels. -data augmentation: scaling, shifting and rotation. scaling and rotation is also applied to vector data -optional second set of sturctured data with dimensionality and size matching the main data - -arbitrary label alongside the structured data +data augmentation: scaling, shifting and rotation. scaling and rotation is also + applied to vector data +optional second set of structured data with dimensionality and size matching the main data +arbitrary label alongside the structured data +=== 1. Setup import tilecreator_t as tc TC = tc.TileCreator(tileSize, simSize=64, dim=2, densityMinimum=0.02, channelLayout_main=C_LAYOUT['dens_vel'], useScaledData=True, channelLayout_scaled=C_LAYOUT['dens'], scaleFactor=2, useDataBlocks=False, useLabels=False, partTrain=0.8, partTest=0.2, partVal=0, logLevel=LOG_LEVEL_WARNING) -tileSize: size of the tiles to create. must be less or equal the simulation size. Assumed to have enough bounds for rotation augmentation if active. (previously lowTileSize) +tileSize: size of the tiles to create. must be less or equal the simulation + size. Assumed to have enough bounds for rotation augmentation if active. + (previously lowTileSize) simSize: size of the input simulation data. (previously lowSimSize) dim: dimension of main (and scaled) data. can be 2 or 3. -densityMinimum: minimum avg. density in a tile. To prevent generating empty tiles -channelLayout_main: what type of data the different channels contain as a comma separaed sring of channel keys. used in augmentation. Keys: +densityMinimum: minimum avg. density in a tile. To prevent generating empty tiles +channelLayout_main: what type of data the different channels contain as a comma + separated sring of channel keys. used in augmentation. +Keys: d: default. not augmented - v: vector data. needs 2 (2D; x,y) or 3 (2D, 3D; x,y,z) components with matching labels. the order does not matter and one empty label is accepted. format: v[label](x|y|z). examples: '...,vx,vy,vz,...', 'd,vVELx,vVELy,d,vVortz,vVortx,d,vVorty' - -useScaledData: an optional second structured dataset with a fixed scaling factor to the main data. will be augmented to fit the augmentation of the main data (e.g. same rotation) + v: vector data. needs 2 (2D; x,y) or 3 (2D, 3D; x,y,z) components with + matching labels. the order does not matter and one empty label is accepted. + format: v[label](x|y|z). examples: '...,vx,vy,vz,...', + 'd,vVELx,vVELy,d,vVortz,vVortx,d,vVorty' + +useScaledData: an optional second structured dataset with a fixed scaling + factor to the main data. will be augmented to fit the augmentation of the main + data (e.g. same rotation) channelLayout_scaled: same as channelLayout_main for active scaled data scaleFactor: the scaling factor between main and scaled data, can be 1 -useDataBlocks: an optional grouping of data using block ids. the ids have to be provided when adding data to the tilecrator. enables the creation of blocks of tiles with matching augmentation (i.e. like an additional dimension that is not agmented). can be used to create augmented (time-) sequences of data +useDataBlocks: an optional grouping of data using block ids. the ids have to be + provided when adding data to the tilecrator. enables the creation of blocks of + tiles with matching augmentation (i.e. like an additional dimension that is not + agmented). can be used to create augmented (time-) sequences of data useLabels: an optional set of data that is not augmented. no type or structure is assumed -partTrain, partTest (, partVal): the relative sizes of data sets for train and testing mode (machine learing). val data is currently unused and inaccessible and should be left at 0. +partTrain, partTest (, partVal): the relative sizes of data sets for train and + testing mode (machine learing). val data is currently unused and inaccessible + and should be left at 0. logLevel: how much information to print. 1.1 Setup Data Augmentation TC.initDataAugmentation(rot=2, minScale=0.85, maxScale=1.15 ,flip=True) -rot: type of rotation augmentation (NOT an angle limit). 1: fixed 90° roatations, 2: full rotation (assumes enough space for boundaries in data), else: no rotation) +rot: type of rotation augmentation (NOT an angle limit). 1: fixed 90° + roatations, 2: full rotation (assumes enough space for boundaries in data), + else: no rotation) minScale, maxScale: limits for scaling. set both to 1 to disable scaling. flip: flipping (mirroring) of data @@ -42,19 +59,28 @@ flip: flipping (mirroring) of data TC.addData(main, scaled=None, labels=None, blocks=None) main: the main data, must match simSize and channels specified in the constructor -scaled: required when useScaledData==True, ignored otherwise. must match simSize*scaleFactor and channels specified in the constructor. must be the same amount as the main data. -labels: required when useLabels==True, ignored otherwise. list/iterable of arbitrary data. must be the same amount as the main data. -blocks: required when useDataBlocks==True, ignored otherwise. list of block ids (int). can be unsorted. will be sorted according to the id, the order of data with the same id (within the same block) is preserved. +scaled: required when useScaledData==True, ignored otherwise. must match + simSize*scaleFactor and channels specified in the constructor. must be the same + amount as the main data. +labels: required when useLabels==True, ignored otherwise. list/iterable of + arbitrary data. must be the same amount as the main data. +blocks: required when useDataBlocks==True, ignored otherwise. list of block ids + (int). can be unsorted. will be sorted according to the id, the order of data + with the same id (within the same block) is preserved. 3. Batch creation TC.selectRandomTiles(selectionSize, isTraining=True, augment=False, blockSize = 1, squeezeZ=False, squeezeBlocks=True) selectionSize: number of tiles to create isTraining: whether to use data from the training or testing set -augment: whether to augment the data. requires data augmentation to be initialized (TC.initDataAugmentation). +augment: whether to augment the data. requires data augmentation to be + initialized (TC.initDataAugmentation). blockSize: what block size to use if block data is active. ignored otherwise squeezeZ: whether to squeeze/collapse the z dimension/axis of main (and scaled) data when using 2D data. squeezeBlocks: whether to squeeze/collapse the block dimension/axis of main (and scaled) when block size is 1 or block data is inactive returns: -main[,scaled][,labels]: main and scaled are np.ndarray with shape: n[,b][,z],y,x,c with z,y,x mathching the (scaled) tile size and c channels. labels is a list \ No newline at end of file +main[,scaled][,labels]: main and scaled are np.ndarray with shape: +n[,b][,z],y,x,c with z,y,x matching the (scaled) tile size and c channels. +labels is a list + diff --git a/tensorflow/tools/tc_test.py b/tensorflow/tools/tc_test.py index 2572436..f37ac10 100644 --- a/tensorflow/tools/tc_test.py +++ b/tensorflow/tools/tc_test.py @@ -19,26 +19,28 @@ import paramhelpers as ph -out_path = ph.getParam( "basePath", '../test_out/' ) +out_path = ph.getParam( "outPath", '../test_out/' ) sim_path = ph.getParam( "basePath", '../data_sim/' ) randSeed = int(ph.getParam( "randSeed", 1 )) # seed for np and tf initialization simSize = int(ph.getParam( "simSize", 256 )) # tiles of low res sim tileSize = int(ph.getParam( "tileSize", 64 )) # size of low res tiles -upRes = int(ph.getParam( "upRes", 4 )) # single generator scaling factor -dim = int(ph.getParam( "dim", 2 )) # dimension of dataset +upRes = int(ph.getParam( "upRes", 4 )) # single generator scaling factor +dim = int(ph.getParam( "dim", 2 )) # dimension of dataset augment = int(ph.getParam( "aug", 1 )) # use dataAugmentation or not +fromSim = int(ph.getParam( "fromSim", 1018 )) +toSim = int(ph.getParam( "toSim", fromSim )) +indexMax = int(ph.getParam( "indexMax", 200 )) + # no high res data in TC, using high data in TC's low res useScaledData = int(ph.getParam( "scaled", 0 )) useLabelData = int(ph.getParam( "label", 0 )) useDataBlocks = int(ph.getParam( "block", 0 )) -blockSize = int(ph.getParam( "blockSize", 1 )) - +blockSize = int(ph.getParam( "blockSize", 1 )) batchCount = int(ph.getParam( "batchCount", 1 )) - ph.checkUnusedParams() np.random.seed(randSeed) #tf.set_random_seed(randSeed) @@ -58,9 +60,6 @@ recursionDepth = 0 -fromSim = 1018 -toSim = fromSim - dirIDs = np.linspace(fromSim, toSim, (toSim-fromSim+1),dtype='int16') lowfilename = "density_low_%04d.npz" highfilename = "density_high_%04d.npz" @@ -82,7 +81,7 @@ mfh = ["density", "velocity"] if not useDataBlocks else ["density", "velocity", "density", "velocity", "density", "velocity" ] moh = [0,0] if not useDataBlocks else [0,0,1,1,2,2] -floader = fdl.FluidDataLoader( print_info=1, base_path=sim_path, filename=lowfilename, oldNamingScheme=False, filename_y=highfilename, filename_index_max=200, indices=dirIDs, data_fraction=0.5, multi_file_list=mfl, multi_file_idxOff=mol, multi_file_list_y=mfh , multi_file_idxOff_y=moh) +floader = fdl.FluidDataLoader( print_info=1, base_path=sim_path, filename=lowfilename, oldNamingScheme=False, filename_y=highfilename, filename_index_max=indexMax, indices=dirIDs, data_fraction=0.5, multi_file_list=mfl, multi_file_idxOff=mol, multi_file_list_y=mfh , multi_file_idxOff_y=moh) x, y, xFilenames = floader.get() tile_format='NYXC' diff --git a/tensorflow/tools/tilecreator_t.py b/tensorflow/tools/tilecreator_t.py index 270f667..6ee70ac 100644 --- a/tensorflow/tools/tilecreator_t.py +++ b/tensorflow/tools/tilecreator_t.py @@ -209,10 +209,10 @@ def __init__(self, tileSize, simSize=64, dim=2, densityMinimum=0.02, channelLayo self.part_test=partTest/all # PRINT INFO - TC_setup_info = '\n' + TC_setup_info = {'0' : '\n'} # workarond for closures in python2.x def addInfoLine(line): - nonlocal TC_setup_info - TC_setup_info += line + '\n' + #nonlocal TC_setup_info + TC_setup_info['0'] += line + '\n' #print('Dimension: {}, time dimension: {}'.format(self.dim,self.dim_t)) addInfoLine('Main data:') @@ -635,7 +635,7 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, block batch[DATA_KEY_SCALED] = np.asarray(batch[DATA_KEY_SCALED]) if self.dim==2 and squeezeZ: - TCDebug('squeeze z dimension') + self.TCDebug('squeeze z dimension') #(tiles, block, z,y,x,c) batch[DATA_KEY_MAIN] = np.squeeze(batch[DATA_KEY_MAIN], axis=2) if self.dataIsActive(DATA_KEY_SCALED): @@ -643,7 +643,7 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, block #collapse blockSize=1 if squeezeBlocks and blockSize==1: - TCDebug('squeeze block dimension') + self.TCDebug('squeeze block dimension') #(tiles, block, z,y,x,c) batch[DATA_KEY_MAIN] = np.squeeze(batch[DATA_KEY_MAIN], axis=1) if self.dataIsActive(DATA_KEY_SCALED): From 617cb32f308fe03a7a4a9404ace0d956d2594862 Mon Sep 17 00:00:00 2001 From: Erik Franz Date: Tue, 27 Nov 2018 22:10:20 +0100 Subject: [PATCH 5/8] TC added more tile and sim size checks to prevent invalid configurations added checks for valid scaleFactors and tile and sim sizes if scaled data is used added sim and tile size to info output bug fixes TESTING added timing added more options for data used catching TCErrors --- tensorflow/tools/README_TileCreator.txt | 7 +- tensorflow/tools/tc_test.py | 150 ++++++++++++++++++----- tensorflow/tools/tilecreator_t.py | 151 ++++++++++++++++++++---- 3 files changed, 255 insertions(+), 53 deletions(-) diff --git a/tensorflow/tools/README_TileCreator.txt b/tensorflow/tools/README_TileCreator.txt index ec4aa5d..da48d53 100644 --- a/tensorflow/tools/README_TileCreator.txt +++ b/tensorflow/tools/README_TileCreator.txt @@ -38,6 +38,9 @@ rot: type of rotation augmentation (NOT an angle limit). 1: fixed 90 minScale, maxScale: limits for scaling. set both to 1 to disable scaling. flip: flipping (mirroring) of data +1.2 Using block data from fluiddataloader +The fluiddataloader merges blocks into channels. Use tc.blockFromChannelsToSequence(data, blockLength) to convert the data to the required format. + 2. Adding data TC.addData(main, scaled=None, labels=None, blocks=None) @@ -57,4 +60,6 @@ squeezeZ: whether to squeeze/collapse the z dimension/axis of main (and scaled) squeezeBlocks: whether to squeeze/collapse the block dimension/axis of main (and scaled) when block size is 1 or block data is inactive returns: -main[,scaled][,labels]: main and scaled are np.ndarray with shape: n[,b][,z],y,x,c with z,y,x mathching the (scaled) tile size and c channels. labels is a list \ No newline at end of file +main[,scaled][,labels]: main and scaled are np.ndarray with shape: n[,b][,z],y,x,c with z,y,x mathching the (scaled) tile size and c channels. labels is a list + +4. Image output \ No newline at end of file diff --git a/tensorflow/tools/tc_test.py b/tensorflow/tools/tc_test.py index 2572436..2aa8ad2 100644 --- a/tensorflow/tools/tc_test.py +++ b/tensorflow/tools/tc_test.py @@ -11,7 +11,8 @@ # #****************************************************************************** -import os,sys +import os,sys,time +import traceback from itertools import repeat import tilecreator_t as tc import fluiddataloader as fdl @@ -23,21 +24,29 @@ sim_path = ph.getParam( "basePath", '../data_sim/' ) randSeed = int(ph.getParam( "randSeed", 1 )) # seed for np and tf initialization -simSize = int(ph.getParam( "simSize", 256 )) # tiles of low res sim -tileSize = int(ph.getParam( "tileSize", 64 )) # size of low res tiles +simSizeHigh = int(ph.getParam( "simSizeHigh", 256 )) # size of high res sim +tileSizeHigh = int(ph.getParam( "tileSizeHigh", 64 )) # size of high res tiles +simSizeLow = int(ph.getParam( "simSizeLow", 64 )) # size of low res sim +tileSizeLow = int(ph.getParam( "tileSizeLow", 16 )) # size of low res tiles upRes = int(ph.getParam( "upRes", 4 )) # single generator scaling factor dim = int(ph.getParam( "dim", 2 )) # dimension of dataset +useVel = int(ph.getParam( "vel", 1 )) augment = int(ph.getParam( "aug", 1 )) # use dataAugmentation or not # no high res data in TC, using high data in TC's low res useScaledData = int(ph.getParam( "scaled", 0 )) +mainIsLow = int(ph.getParam( "mainIsLow", 0 )) + useLabelData = int(ph.getParam( "label", 0 )) useDataBlocks = int(ph.getParam( "block", 0 )) blockSize = int(ph.getParam( "blockSize", 1 )) batchCount = int(ph.getParam( "batchCount", 1 )) +saveImages = int(ph.getParam( "img", 1 )) +saveRef = int(ph.getParam( "ref", 0 )) +fail = int(ph.getParam( "fail", 0 )) ph.checkUnusedParams() np.random.seed(randSeed) @@ -50,6 +59,18 @@ sys.stdout = ph.Logger(test_path) sys.stderr = ph.ErrorLogger(test_path) +def testFailed(msg): + print('\n{}:\n{}'.format(msg, traceback.format_exc())) + if fail: + print('') + print('--- TEST FAILED AS INDICATED ---') + print('') + else: + print('') + print('--- TEST FAILED ---') + print('') + exit() + print('') print('--- TEST STARTED ---') print('') @@ -58,20 +79,55 @@ recursionDepth = 0 -fromSim = 1018 +if dim==2: + fromSim = 1018 + index_min = 0 + index_max = 200 + fileType = '.npz' + rgb_channels = [[1,2]] + rgb_range = [-2,2] +elif dim==3: + fromSim = 3006 + if useScaledData: + index_min = 30 + index_max = 60 + else: + index_min = 30 + index_max = 60 + fileType = '.npz' + rgb_channels = [[1,2,3]] + rgb_range = [-2,2]#[-0.1,0.1] +else: + print('dim must be 2 or 3.') + exit() toSim = fromSim dirIDs = np.linspace(fromSim, toSim, (toSim-fromSim+1),dtype='int16') -lowfilename = "density_low_%04d.npz" -highfilename = "density_high_%04d.npz" +lowfilename = "density_low_%04d" + fileType +highfilename = "density_high_%04d" + fileType -if not useScaledData: - lowfilename = highfilename - highfilename = None +if mainIsLow: + if not useScaledData: + highfilename = None + upRes = 1 + simSize = simSizeLow + tileSize = tileSizeLow else: - simSize = simSize//upRes - tileSize = tileSize//upRes + lowfn = lowfilename + lowfilename = highfilename + if useScaledData: + highfilename = lowfn + upRes = 1/upRes + else: + highfilename = None + upRes = 1 + simSize = simSizeHigh + tileSize = tileSizeHigh +#if useVel: +# fl = ["density", "velocity"] +# ol = [0,0] +#else: #load data mfl = ["density", "velocity"] if not useDataBlocks else ["density", "velocity", "density", "velocity", "density", "velocity" ] @@ -82,12 +138,21 @@ mfh = ["density", "velocity"] if not useDataBlocks else ["density", "velocity", "density", "velocity", "density", "velocity" ] moh = [0,0] if not useDataBlocks else [0,0,1,1,2,2] -floader = fdl.FluidDataLoader( print_info=1, base_path=sim_path, filename=lowfilename, oldNamingScheme=False, filename_y=highfilename, filename_index_max=200, indices=dirIDs, data_fraction=0.5, multi_file_list=mfl, multi_file_idxOff=mol, multi_file_list_y=mfh , multi_file_idxOff_y=moh) +pt1_start = time.perf_counter() +pt2_start = time.process_time() +floader = fdl.FluidDataLoader( print_info=1, base_path=sim_path, filename=lowfilename, oldNamingScheme=False, filename_y=highfilename, filename_index_min=index_min, filename_index_max=index_max, indices=dirIDs, data_fraction=0.5, multi_file_list=mfl, multi_file_idxOff=mol, multi_file_list_y=mfh , multi_file_idxOff_y=moh) x, y, xFilenames = floader.get() +pt2_end = time.process_time() +pt1_end = time.perf_counter() +pt1 = pt1_end - pt1_start +pt2 = pt2_end - pt2_start +print('Loading Process Time: Total {:.04f}s; avg/frame {:.04f}s'.format(pt2,pt2/len(x))) +print('Loading Time: Total {:.04f}s; avg/frame {:.04f}s'.format(pt1,pt1/len(x))) + tile_format='NYXC' z_axis = 1 -if useDataBlocks: +if useDataBlocks or dim==3: tile_format='NBYXC' z_axis = 2 @@ -110,20 +175,29 @@ l = b[:] #save ref: -if False: +if saveRef: print('Output reference') - tileShape = (x.shape[0],simSize,simSize,x.shape[-1]) - tiles = np.reshape(x, tileShape) - tc.savePngs(tiles[:1], test_path + 'ref_low_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[2,3]], rgb_interval=[-2,2]) + #tileShape = (x.shape[0],simSize,simSize,x.shape[-1]) + #tiles = np.reshape(x, tileShape) ,27:34 + tc.savePngs(x[11:12], test_path + 'ref_main_', tile_format=tile_format,imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = rgb_channels, rgb_interval=rgb_range) if useScaledData: - tileShape = (y.shape[0],simSize*upRes,simSize*upRes,y.shape[-1]) - tiles = np.reshape(y, tileShape) - tc.savePngs(tiles[:1], test_path + 'ref_high_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[2,3]], rgb_interval=[-2,2]) + #tileShape = (y.shape[0],simSize*upRes,simSize*upRes,y.shape[-1]) + #tiles = np.reshape(y, tileShape) + tc.savePngs(y[:1], test_path + 'ref_scaled_',imageCounter=0, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[2,3]], rgb_interval=[-2,2]) +channel_layout = 'd,vx,vy' +if dim==3: + channel_layout += ',vz' # tilecreator -TC = tc.TileCreator(tileSize=tileSize, simSize=simSize , dim=dim, densityMinimum=0.1, scaleFactor=upRes, channelLayout_main='d,vx,vy', channelLayout_scaled='d,vx,vy', useScaledData=useScaledData, useLabels=useLabelData, useDataBlocks=useDataBlocks, logLevel=10) +try: + TC = tc.TileCreator(tileSize=tileSize, simSize=simSize , dim=dim, densityMinimum=0.1, scaleFactor=upRes, channelLayout_main=channel_layout, channelLayout_scaled=channel_layout, useScaledData=useScaledData, useLabels=useLabelData, useDataBlocks=useDataBlocks, logLevel=10) +except tc.TilecreatorError as e: + testFailed('TileCreator Error on construction') if augment: - TC.initDataAugmentation(2) + try: + TC.initDataAugmentation(2) + except tc.TilecreatorError as e: + testFailed('TileCreator Error on augmentation init') # strip zero z vel of 2D data if dim==2: @@ -132,8 +206,10 @@ y,_ = np.split(y, [3], axis=-1) # add low data with dummy labels -TC.addData(x, y if useScaledData else None, l, b) - +try: + TC.addData(x, y if useScaledData else None, l, b) +except tc.TilecreatorError as e: + testFailed('TileCreator Error when adding data') #bx,by = TC.selectRandomTiles(64, True, augment=True) @@ -141,30 +217,44 @@ #test batch: if True: imageCounter=0 + pt1_start = time.perf_counter() + pt2_start = time.process_time() for batch_number in range(batchCount): i=0 - print('\nOutput batch {}'.format(batch_number)) - batch = TC.selectRandomTiles(selectionSize = 8, augment=augment, isTraining=True, blockSize=blockSize, squeezeZ=True) + print('\nOutput batch #{}'.format(batch_number)) + try: + batch = TC.selectRandomTiles(selectionSize = 8, augment=augment, isTraining=True, blockSize=blockSize, squeezeZ=True) + except tc.TilecreatorError as e: + testFailed('TileCreator Error when creating batch') print('batch_x shape: {}'.format(batch[0].shape)) #tileShape = (batch[0].shape[0],tileSize,tileSize,batch[0].shape[-1]) tiles = batch[i] i+=1 #print('tiles_x shape: {}'.format(tiles.shape)) - ic=tc.savePngs(tiles, test_path, tile_format=tile_format,imageCounter=imageCounter, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2]) + if dim==3: tiles = (tiles[2:6,6:8] if useScaledData else tiles[2:6,24:32]) + if saveImages: ic=tc.savePngs(tiles, test_path, tile_format=tile_format,imageCounter=imageCounter, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = rgb_channels, rgb_interval=rgb_range) if useScaledData: print('batch_y shape: {}'.format(batch[1].shape)) #tileShape = (batch[1].shape[0],tileSize*upRes,tileSize*upRes,batch[1].shape[-1]) tiles = batch[i] i+=1 #print('tiles_y shape: {}'.format(tiles.shape)) - tc.savePngs(tiles, test_path + 'high_', tile_format=tile_format, imageCounter=imageCounter, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2]) + if dim==3: tiles = tiles[2:6,24:32] + if saveImages: tc.savePngs(tiles, test_path + 'high_', tile_format=tile_format, imageCounter=imageCounter, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = rgb_channels, rgb_interval=rgb_range) if useLabelData: tiles = batch[i] i+=1 print('labels: {}'.format(tiles)) - print('-> images {} to {}'.format(imageCounter, ic-1)) - imageCounter=ic + if saveImages: print('-> images {} to {}'.format(imageCounter, ic-1)) + if saveImages: imageCounter=ic + + pt2_end = time.process_time() + pt1_end = time.perf_counter() + pt1 = pt1_end - pt1_start + pt2 = pt2_end - pt2_start + print('Process Time: Total {:.04f}s; avg/batch {:.04f}s; avg/tile {:.04f}s'.format(pt2,pt2/batchCount,pt2/(batchCount*8))) + print('Time: Total {:.04f}s; avg/batch {:.04f}s; avg/tile {:.04f}s'.format(pt1,pt1/batchCount,pt1/(batchCount*8))) #test online scaled batch # NOT YET IMPLEMENTED diff --git a/tensorflow/tools/tilecreator_t.py b/tensorflow/tools/tilecreator_t.py index 270f667..a4c5546 100644 --- a/tensorflow/tools/tilecreator_t.py +++ b/tensorflow/tools/tilecreator_t.py @@ -121,26 +121,44 @@ def __init__(self, tileSize, simSize=64, dim=2, densityMinimum=0.02, channelLayo else: self.TCError('Simulation size mismatch.') self.simSizeLow = np.asarray(self.simSizeLow) + #SCALE FACTOR (UPRES) (don't confuse self.upres (this) and self.scaleFactor (augmentation)) + if np.isscalar(scaleFactor): + self.upres = [scaleFactor, scaleFactor, scaleFactor] + elif len(scaleFactor)==2 and self.dim==2: + self.upres = [1]+scaleFactor + elif len(scaleFactor)==3: + self.upres = scaleFactor + else: + self.TCError('Scale factor size mismatch.') + self.upres = np.asarray(self.upres) + if self.dim==2: self.tileSizeLow[0]=1 self.simSizeLow[0]=1 - - #if scaleFactor < 1: - # self.TCError('Upres must be at least 1.') - self.upres = scaleFactor - if useScaledData: - self.tileSizeHigh = self.tileSizeLow*scaleFactor - self.simSizeHigh = self.simSizeLow*scaleFactor - if self.dim==2: - self.tileSizeHigh[0]=1 - self.simSizeHigh[0]=1 + self.upres[0]=1 #else: # self.tileSizeHigh = np.asarray([1]) # self.simSizeHigh = np.asarray([1]) + + # todo ckeck valid main sizes + if not self.isIntArray(self.simSizeLow): + self.TCError('Sim size must be integer.') + if not self.isIntArray(self.tileSizeLow): + self.TCError('Tile size must be integer.') + # check valid scale factor + minUpresStep = 0.125 + if not np.all(np.mod(array,minUpresStep)==0): + self.TCError('Tile size must be a multiple of {}.'.format(minUpresStep)) + #self.TCDebug('Main size: sim {}, tile {}.'.format(self.simSizeLow, self.tileSizeLow)) + + self.tileFactor = self.tileSizeLow/self.simSizeLow + if self.dim==2: + self.tileFactor = self.tileFactor[1:] + if np.less(self.simSizeLow, self.tileSizeLow).any(): - self.TCError('Tile size {} can not be larger than sim size {}, {}.'.format(self.tileSizeLow,self.simSizeLow)) + self.TCError('Tile size {} can not be larger than sim size {}.'.format(self.tileSizeLow,self.simSizeLow)) if densityMinimum<0.: @@ -148,6 +166,30 @@ def __init__(self, tileSize, simSize=64, dim=2, densityMinimum=0.02, channelLayo self.densityMinimum = densityMinimum self.useDataAug = False + # SCALED DATA + if useScaledData: + # preliminary checks + # valid scaleFactor + if(np.less(self.upres, np.ones(3)).any): + self.TCError('Scale factor must be at least one for every dimension. Swap main and scaled data and invert the scale factor.') + # main sizes compatible + if not self.isIntArray(self.simSizeLow*self.upres): + self.TCError('Main sim size {} is not compatible with the scale factor {}.'.format(self.simSizeLow, self.upres)) + if not self.isIntArray(self.tileSizeLow*self.upres): + self.TCError('Main tile size {} is not compatible with the scale factor {}.'.format(self.tileSizeLow, self.upres)) + + # calculate scaled sizes + self.tileSizeHigh = self.tileSizeLow*self.upres + self.simSizeHigh = self.simSizeLow*self.upres + if self.dim==2: + self.tileSizeHigh[0]=1 + self.simSizeHigh[0]=1 + # check valid scaled. checked before, but just in case.. + if not self.isIntArray(self.simSizeHigh) or not self.isIntArray(self.tileSizeHigh): + self.TCError('Scale factor {} with main sim size {} and main tile size {} would result in illegal scaled sizes. Scaled sim {} and tile {} sizes must be integer.'.format(self.upres, self.simSizeLow, self.tileSizeLow, self.simSizeHigh, self.tileSizeHigh)) + self.tileSizeHigh = self.tileSizeHigh.astype(int) + self.simSizeHigh = self.simSizeHigh.astype(int) + #CHANNELS self.c_lists = {} self.c_low, self.c_lists[DATA_KEY_MAIN] = self.parseChannels(channelLayout_main) @@ -216,7 +258,8 @@ def addInfoLine(line): #print('Dimension: {}, time dimension: {}'.format(self.dim,self.dim_t)) addInfoLine('Main data:') - + addInfoLine(' sim size: {}'.format(self.simSizeLow)) + addInfoLine(' tile size: {}'.format(self.tileSizeLow)) addInfoLine(' channel layout: {}'.format(self.c_low)) addInfoLine(' default channels: {}'.format(self.c_lists[DATA_KEY_MAIN][C_KEY_DEFAULT])) if len(self.c_lists[DATA_KEY_MAIN][C_KEY_VELOCITY])>0: @@ -228,6 +271,8 @@ def addInfoLine(line): if not useScaledData: addInfoLine(' not in use') else: + addInfoLine(' sim size: {}'.format(self.simSizeHigh)) + addInfoLine(' tile size: {}'.format(self.tileSizeHigh)) addInfoLine(' channel layout: {}'.format(self.c_high)) addInfoLine(' default channels: {}'.format(self.c_lists[DATA_KEY_SCALED][C_KEY_DEFAULT])) if len(self.c_lists[DATA_KEY_SCALED][C_KEY_VELOCITY])>0: @@ -308,9 +353,14 @@ def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): msg = 'setup data augmentation: ' if rot==2: + #TODO check tile sizes for bounds + self.rotScaleFactor = 1.6 + if np.greater(self.tileFactor, np.ones_like(self.tileFactor)/self.rotScaleFactor).any(): + self.TCError('Tiles are too large for random rotation (rot=1) augmentation. Use smaller tiles or rot=2.') self.do_rotation = True self.do_rot90 = False msg += 'rotation, ' + elif rot==1: self.do_rotation = False self.do_rot90 = True @@ -331,6 +381,9 @@ def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): if (self.scaleFactor[0]==1 and self.scaleFactor[1]==1): self.do_scaling = False else: + #TODO check tile sizes for bounds + if np.greater(self.tileFactor, np.ones_like(self.tileFactor)*self.scaleFactor[0]).any(): + self.TCError('Tiles are too large for minimum scaling augmentation {}. Use smaller tiles or a larger minimum.'.format(self.scaleFactor[0])) self.do_scaling = True msg += 'scaling, ' @@ -635,7 +688,7 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, block batch[DATA_KEY_SCALED] = np.asarray(batch[DATA_KEY_SCALED]) if self.dim==2 and squeezeZ: - TCDebug('squeeze z dimension') + self.TCDebug('squeeze z dimension') #(tiles, block, z,y,x,c) batch[DATA_KEY_MAIN] = np.squeeze(batch[DATA_KEY_MAIN], axis=2) if self.dataIsActive(DATA_KEY_SCALED): @@ -643,7 +696,7 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, block #collapse blockSize=1 if squeezeBlocks and blockSize==1: - TCDebug('squeeze block dimension') + self.TCDebug('squeeze block dimension') #(tiles, block, z,y,x,c) batch[DATA_KEY_MAIN] = np.squeeze(batch[DATA_KEY_MAIN], axis=1) if self.dataIsActive(DATA_KEY_SCALED): @@ -681,12 +734,19 @@ def generateTile(self, isTraining=True, blockSize = 1): #cut a tile for faster transformation if self.do_scaling or self.do_rotation: factor = 1 + preCutTileSize = self.tile_shape_low if self.do_rotation: # or self.do_scaling: - factor*=1.5 # scaling: to avoid size errors caused by rounding + factor*=self.rotScaleFactor #1.6 # scaling: to avoid size errors caused by rounding, could be off if data is not square/cubic + preCutTileSize = np.ceil(preCutTileSize * self.rotScaleFactor) if self.do_scaling: scaleFactor = np.random.uniform(self.scaleFactor[0], self.scaleFactor[1]) factor/= scaleFactor + preCutTileSize = np.ceil(preCutTileSize / scaleFactor) tileShapeLow = np.ceil(self.tile_shape_low*factor) + self.TCDebug('Pre-cut size old {}, new {}.'.format(tileShapeLow, preCutTileSize)) + if not self.isValidMainTileShapeChannels(tileShapeLow): + self.TCWarning('Augmentation pre-cutting results in larger than frame tile. Trying with frame size...') + tileShapeLow = self.frame_shape_low if self.dim==2: tileShapeLow[0] = 1 data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], tileShapeLow.astype(int)) @@ -696,12 +756,14 @@ def generateTile(self, isTraining=True, blockSize = 1): if self.do_scaling: data = self.scale(data, scaleFactor) - bounds = np.zeros(4) #rotate if self.do_rotation: bounds = np.array(self.getTileShape(data[DATA_KEY_MAIN]))*0.16 #bounds applied on all sides, 1.5*(1-2*0.16)~1 + # make sure bounds is compatible with upRes + if self.dataIsActive(DATA_KEY_SCALED): + bounds = self.makeValidMainShape(bounds, ceil=True) data = self.rotate(data) #get a tile @@ -810,7 +872,7 @@ def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #b if tileShapeLow is None: tileShapeLow = np.copy(self.tile_shape_low) # use copy is very important!!! tileShapeHigh = tileShapeLow*self.upres - if not self.isValidMainTileShape(tileShapeLow): + if not self.isValidMainTileShapeChannels(tileShapeLow): self.TCErrorInternal('Invalid tile shape') if not self.isFrameSequence(low): #len(low.shape)!=5 or len(tileShapeLow)!=4: self.TCErrorInternal('MAIN data is no sequence') @@ -821,7 +883,7 @@ def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #b start = np.ceil(bounds) end = frameShapeLow - tileShapeLow + np.ones(4) - start - offset_up = np.array([self.upres, self.upres, self.upres]) + offset_up = self.upres #np.array([self.upres, self.upres, self.upres]) if self.dim==2: start[0] = 0 @@ -836,16 +898,21 @@ def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #b # cut tile hasMinDensity = False i = 1 - while (not hasMinDensity) and i<20: + while (not hasMinDensity): offset = np.asarray([randrange(start[0], end[0]), randrange(start[1], end[1]), randrange(start[2], end[2])]) + # TODO test; this uses global shapes, not the local ones + offset = self.makeValidMainShape(offset, ceil=False) lowTile = self.cutTile(low, tileShapeLow, offset) - offset *= offset_up + offset = (offset * offset_up).astype(int) if high is not None: highTile = self.cutTile(high, tileShapeHigh, offset) else: highTile = None hasMinDensity = self.hasMinDensity(lowTile) i+=1 + if i>=20: + self.TCInfo('Could not cut tile with minimum average density {} after {} tries. Using last one with average density {}.'.format(self.densityMinimum, 20, self.getTileDensity(lowTile))) + break return lowTile, highTile ##################################################################################### @@ -1142,24 +1209,63 @@ def isFrameSequence(self, data): def isFrameSingle(self, data): return len(data.shape)==DATA_DIM_LENGTH_SINGLE + def isValidMainTileShape(self, shape): + ''' + includes channels + ''' + if not self.isIntArray(shape): return False + if len(shape)!=(DATA_DIM_LENGTH_SINGLE-1): return False + # check compatible to scale factor + # smaller than frame + if np.less(self.simSizeLow, shape[:-1]).any(): return False + return True + + def isValidMainTileShapeChannels(self, shape): + ''' + includes channels + ''' + if not self.isIntArray(shape): return False if len(shape)!=DATA_DIM_LENGTH_SINGLE: return False + # check compatible to scale factor # smaller than frame - self.frame_shape_low if np.less(self.simSizeLow, shape[:-1]).any(): return False return True + def isScaleCompatibleMainTileShape(self, shape): + if not self.isValidMainTileShape(shape): return False + # check compatible to scale factor + if not self.isIntArray(self.simSizeLow*self.upres): return False + return True + def getTileShape(self, data): if not type(data) is np.ndarray: self.TCError('Can\'t get tile shape from not np-array data') if self.isFrameSequence(data): return data.shape[1:] elif self.isFrameSingle(data): return data.shape[:] else: self.TCError('Can\'t get tile shape from data with shape {}'.format(data.shape)) + + def getMinValidMainStep(self): + # lcm of 1 and self.upres #TODO test + return (np.lcm(np.ones_like(self.upres), self.upres)/self.upres).astype(int) + + def makeValidMainShape(self, shape, ceil=True): + if self.isValidMainTileShape(shape): return np.array(shape) # nothing to do + # per component: find next larger valid value + minStep = self.getMinValidMainStep() + if ceil: + return (shape * np.ceil(shape / minStep)).astype(int) + else: + return (shape * np.floor(shape / minStep)).astype(int) def getFrameTiles(self, index): ''' returns the frame as tiles''' low, high = self.getDatum(index) return self.createTiles(low, self.tile_shape_low), self.createTiles(high, self.tile_shape_high) + + def isIntArray(self, array): + return np.all(np.mod(array,1)==0) + ##################################################################################### # CHANNEL PARSING @@ -1362,10 +1468,11 @@ def savePngs(tiles, path, tile_format='NYXC', imageCounter=0, tiles_in_image=[1, tileShape[full_format.index(dim)] = shape[tile_format.index(dim)] # to full 'nbyxc' format + #print('tile shape {}'.format(tileShape)) tiles = np.reshape(tiles, tileShape) - noImages = shape[0]//tilesInImage + noImages = tileShape[0]//tilesInImage if save_gif: gif=[] From 923c876f5b46183d7ace1e64260d4ddacfa1bd73 Mon Sep 17 00:00:00 2001 From: Erik Franz Date: Wed, 28 Nov 2018 22:05:38 +0100 Subject: [PATCH 6/8] TC bugfixes TESTING added option to use dummy data output formatting --- tensorflow/tools/README_TileCreator.txt | 5 +- tensorflow/tools/tc_test.py | 62 +++++++++++++++++-------- tensorflow/tools/tilecreator_t.py | 51 ++++++++++++++------ 3 files changed, 84 insertions(+), 34 deletions(-) diff --git a/tensorflow/tools/README_TileCreator.txt b/tensorflow/tools/README_TileCreator.txt index da48d53..e41d6f8 100644 --- a/tensorflow/tools/README_TileCreator.txt +++ b/tensorflow/tools/README_TileCreator.txt @@ -8,7 +8,10 @@ optional second set of sturctured data with dimensionality and size matching the arbitrary label alongside the structured data - +0. Dependencies +numpy v1.14.0 (v1.15.4) +scipy v0.18.1 +imageio v2.1.2 1. Setup import tilecreator_t as tc diff --git a/tensorflow/tools/tc_test.py b/tensorflow/tools/tc_test.py index 2aa8ad2..e853e60 100644 --- a/tensorflow/tools/tc_test.py +++ b/tensorflow/tools/tc_test.py @@ -21,32 +21,36 @@ out_path = ph.getParam( "basePath", '../test_out/' ) -sim_path = ph.getParam( "basePath", '../data_sim/' ) +sim_path = ph.getParam( "simPath", '../data_sim/' ) randSeed = int(ph.getParam( "randSeed", 1 )) # seed for np and tf initialization simSizeHigh = int(ph.getParam( "simSizeHigh", 256 )) # size of high res sim tileSizeHigh = int(ph.getParam( "tileSizeHigh", 64 )) # size of high res tiles simSizeLow = int(ph.getParam( "simSizeLow", 64 )) # size of low res sim tileSizeLow = int(ph.getParam( "tileSizeLow", 16 )) # size of low res tiles -upRes = int(ph.getParam( "upRes", 4 )) # single generator scaling factor -dim = int(ph.getParam( "dim", 2 )) # dimension of dataset +upRes = float(ph.getParam( "upRes", 4 )) # single generator scaling factor +dim = int(ph.getParam( "dim", 2 )) # dimension of dataset -useVel = int(ph.getParam( "vel", 1 )) -augment = int(ph.getParam( "aug", 1 )) # use dataAugmentation or not +useDummyData = int(ph.getParam( "dummyData", 0 )) # create dummy arrays instead of loading real data +dummySizeLow = int(ph.getParam( "dummyLow", simSizeLow )) # sim size of dummy data if used. use to test size mismatches +dummySizeHigh = int(ph.getParam( "dummyHigh", simSizeHigh )) # + +useVel = int(ph.getParam( "vel", 1 )) # currently not in use +augment = int(ph.getParam( "aug", 1 )) # use dataAugmentation or not # no high res data in TC, using high data in TC's low res useScaledData = int(ph.getParam( "scaled", 0 )) -mainIsLow = int(ph.getParam( "mainIsLow", 0 )) +mainIsLow = int(ph.getParam( "mainIsLow", 1 if useScaledData else 0 )) # use high or low data as main data. TileCreator requires the main data to be the smaller one. useLabelData = int(ph.getParam( "label", 0 )) useDataBlocks = int(ph.getParam( "block", 0 )) blockSize = int(ph.getParam( "blockSize", 1 )) -batchCount = int(ph.getParam( "batchCount", 1 )) +batchCount = int(ph.getParam( "batchCount", 1 )) # number of batches to create saveImages = int(ph.getParam( "img", 1 )) saveRef = int(ph.getParam( "ref", 0 )) -fail = int(ph.getParam( "fail", 0 )) +fail = int(ph.getParam( "fail", 0 )) # wether the test is supposed to fail. used for output to indicate a "successful fail". ph.checkUnusedParams() np.random.seed(randSeed) @@ -106,7 +110,7 @@ def testFailed(msg): lowfilename = "density_low_%04d" + fileType highfilename = "density_high_%04d" + fileType - +# this is no longer supported by the TileCreator. main must always be the smaller one. if mainIsLow: if not useScaledData: highfilename = None @@ -138,16 +142,27 @@ def testFailed(msg): mfh = ["density", "velocity"] if not useDataBlocks else ["density", "velocity", "density", "velocity", "density", "velocity" ] moh = [0,0] if not useDataBlocks else [0,0,1,1,2,2] -pt1_start = time.perf_counter() -pt2_start = time.process_time() -floader = fdl.FluidDataLoader( print_info=1, base_path=sim_path, filename=lowfilename, oldNamingScheme=False, filename_y=highfilename, filename_index_min=index_min, filename_index_max=index_max, indices=dirIDs, data_fraction=0.5, multi_file_list=mfl, multi_file_idxOff=mol, multi_file_list_y=mfh , multi_file_idxOff_y=moh) -x, y, xFilenames = floader.get() -pt2_end = time.process_time() -pt1_end = time.perf_counter() -pt1 = pt1_end - pt1_start -pt2 = pt2_end - pt2_start -print('Loading Process Time: Total {:.04f}s; avg/frame {:.04f}s'.format(pt2,pt2/len(x))) -print('Loading Time: Total {:.04f}s; avg/frame {:.04f}s'.format(pt1,pt1/len(x))) +if not useDummyData: + print('\n - LOADING DATA -\n') + pt1_start = time.perf_counter() + pt2_start = time.process_time() + floader = fdl.FluidDataLoader( print_info=1, base_path=sim_path, filename=lowfilename, oldNamingScheme=False, filename_y=highfilename, filename_index_min=index_min, filename_index_max=index_max, indices=dirIDs, data_fraction=0.5, multi_file_list=mfl, multi_file_idxOff=mol, multi_file_list_y=mfh , multi_file_idxOff_y=moh) + x, y, xFilenames = floader.get() + pt2_end = time.process_time() + pt1_end = time.perf_counter() + pt1 = pt1_end - pt1_start + pt2 = pt2_end - pt2_start + print('Loading Process Time: Total {:.04f}s; avg/frame {:.04f}s'.format(pt2,pt2/len(x))) + print('Loading Time: Total {:.04f}s; avg/frame {:.04f}s'.format(pt1,pt1/len(x))) + +else: + print('\n - CREATING DATA -\n') + shapeLow = (40 ,(dummySizeLow if dim==3 else 1),dummySizeLow,dummySizeLow, dim+1) + shapeHigh = (40 ,(dummySizeHigh if dim==3 else 1),dummySizeHigh,dummySizeHigh, dim+1) + + x = np.ones(shapeLow if mainIsLow else shapeHigh) + if useScaledData: + y = np.ones(shapeHigh if mainIsLow else shapeLow) tile_format='NYXC' @@ -189,11 +204,13 @@ def testFailed(msg): if dim==3: channel_layout += ',vz' # tilecreator +print('\n - INIT TILECREATOR -\n') try: TC = tc.TileCreator(tileSize=tileSize, simSize=simSize , dim=dim, densityMinimum=0.1, scaleFactor=upRes, channelLayout_main=channel_layout, channelLayout_scaled=channel_layout, useScaledData=useScaledData, useLabels=useLabelData, useDataBlocks=useDataBlocks, logLevel=10) except tc.TilecreatorError as e: testFailed('TileCreator Error on construction') if augment: + print('\n - INIT DATA AUGMENTATION -\n') try: TC.initDataAugmentation(2) except tc.TilecreatorError as e: @@ -206,6 +223,7 @@ def testFailed(msg): y,_ = np.split(y, [3], axis=-1) # add low data with dummy labels +print('\n - ADDING DATA -\n') try: TC.addData(x, y if useScaledData else None, l, b) except tc.TilecreatorError as e: @@ -216,6 +234,7 @@ def testFailed(msg): #test batch: if True: + print('\n - CREATING BATCH -\n') imageCounter=0 pt1_start = time.perf_counter() pt2_start = time.process_time() @@ -271,5 +290,8 @@ def testFailed(msg): tc.savePngsGrayscale(tile, test_path + 'rec_{}_'.format(r),imageCounter=0, tiles_in_image=[1,1], channels=[0], save_rgb = [[1,2]], rgb_interval=[-2,2], plot_vel_x_y=False) print('') -print('--- TEST FINISHED ---') +if fail: + print('--- TEST FINISHED DESPITE INDICATED FAILURE---') +else: + print('--- TEST FINISHED ---') print('') diff --git a/tensorflow/tools/tilecreator_t.py b/tensorflow/tools/tilecreator_t.py index a4c5546..d580a58 100644 --- a/tensorflow/tools/tilecreator_t.py +++ b/tensorflow/tools/tilecreator_t.py @@ -149,7 +149,7 @@ def __init__(self, tileSize, simSize=64, dim=2, densityMinimum=0.02, channelLayo self.TCError('Tile size must be integer.') # check valid scale factor minUpresStep = 0.125 - if not np.all(np.mod(array,minUpresStep)==0): + if not np.all(np.mod(self.upres,minUpresStep)==0): self.TCError('Tile size must be a multiple of {}.'.format(minUpresStep)) #self.TCDebug('Main size: sim {}, tile {}.'.format(self.simSizeLow, self.tileSizeLow)) @@ -170,8 +170,8 @@ def __init__(self, tileSize, simSize=64, dim=2, densityMinimum=0.02, channelLayo if useScaledData: # preliminary checks # valid scaleFactor - if(np.less(self.upres, np.ones(3)).any): - self.TCError('Scale factor must be at least one for every dimension. Swap main and scaled data and invert the scale factor.') + if np.less(self.upres, np.ones(3)).any(): + self.TCError('Scale factor ({}) must be at least one for every dimension.'.format(self.upres)) # main sizes compatible if not self.isIntArray(self.simSizeLow*self.upres): self.TCError('Main sim size {} is not compatible with the scale factor {}.'.format(self.simSizeLow, self.upres)) @@ -743,12 +743,13 @@ def generateTile(self, isTraining=True, blockSize = 1): factor/= scaleFactor preCutTileSize = np.ceil(preCutTileSize / scaleFactor) tileShapeLow = np.ceil(self.tile_shape_low*factor) + if self.dim==2: + tileShapeLow[0] = 1 + preCutTileSize[0] = 1 self.TCDebug('Pre-cut size old {}, new {}.'.format(tileShapeLow, preCutTileSize)) if not self.isValidMainTileShapeChannels(tileShapeLow): self.TCWarning('Augmentation pre-cutting results in larger than frame tile. Trying with frame size...') tileShapeLow = self.frame_shape_low - if self.dim==2: - tileShapeLow[0] = 1 data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], tileShapeLow.astype(int)) @@ -761,12 +762,13 @@ def generateTile(self, isTraining=True, blockSize = 1): #rotate if self.do_rotation: bounds = np.array(self.getTileShape(data[DATA_KEY_MAIN]))*0.16 #bounds applied on all sides, 1.5*(1-2*0.16)~1 - # make sure bounds is compatible with upRes - if self.dataIsActive(DATA_KEY_SCALED): - bounds = self.makeValidMainShape(bounds, ceil=True) data = self.rotate(data) #get a tile + # make sure bounds is compatible with upRes + self.TCDebug('Bounds {}.'.format(bounds)) + bounds = self.makeValidMainShape(bounds, ceil=True) + self.TCDebug('Bounds {}.'.format(bounds)) data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], bounds=bounds) #includes "shifting" if self.do_rot90: @@ -871,7 +873,7 @@ def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #b if tileShapeLow is None: tileShapeLow = np.copy(self.tile_shape_low) # use copy is very important!!! - tileShapeHigh = tileShapeLow*self.upres + tileShapeHigh = self.makeHighShape(tileShapeLow, format='zyxc') if not self.isValidMainTileShapeChannels(tileShapeLow): self.TCErrorInternal('Invalid tile shape') if not self.isFrameSequence(low): #len(low.shape)!=5 or len(tileShapeLow)!=4: @@ -1219,7 +1221,7 @@ def isValidMainTileShape(self, shape): if len(shape)!=(DATA_DIM_LENGTH_SINGLE-1): return False # check compatible to scale factor # smaller than frame - if np.less(self.simSizeLow, shape[:-1]).any(): return False + if np.less(self.simSizeLow, shape).any(): return False return True def isValidMainTileShapeChannels(self, shape): @@ -1247,16 +1249,39 @@ def getTileShape(self, data): def getMinValidMainStep(self): # lcm of 1 and self.upres #TODO test - return (np.lcm(np.ones_like(self.upres), self.upres)/self.upres).astype(int) + if self.dataIsActive(DATA_KEY_SCALED): + # TODO lcm does not work with float + return (np.lcm(np.ones_like(self.upres), self.upres)/self.upres).astype(int) + else: + return 1.0 def makeValidMainShape(self, shape, ceil=True): if self.isValidMainTileShape(shape): return np.array(shape) # nothing to do # per component: find next larger valid value minStep = self.getMinValidMainStep() if ceil: - return (shape * np.ceil(shape / minStep)).astype(int) + return (minStep * np.ceil(shape / minStep)).astype(int) + else: + return (minStep * np.floor(shape / minStep)).astype(int) + + def makeHighShape(self, shape, format='zyx'): + format = format.lower() + if len(shape) != len(format): + self.TCErrorInternal('Shape {} does not match shape format \'{}\'.'.format(shape, format)) + upRes = np.copy(self.upres) + + if format == 'zyx': + pass + elif format == 'bzyx': + upRes = np.array((1,upRes[0],upRes[1],upRes[2])) + elif format == 'zyxc': + upRes = np.array((upRes[0],upRes[1],upRes[2],1)) + elif format == 'bzyxc': + upRes = np.array((1,upRes[0],upRes[1],upRes[2],1)) else: - return (shape * np.floor(shape / minStep)).astype(int) + self.TCErrorInternal('Unkown shape format \'{}\'.'.format(format)) + + return (shape*upRes).astype(int) def getFrameTiles(self, index): ''' returns the frame as tiles''' From 451fb90e18c3ce30e86863000ac1246fd0a63956 Mon Sep 17 00:00:00 2001 From: Erik Franz Date: Sat, 1 Dec 2018 17:38:09 +0100 Subject: [PATCH 7/8] TC fixed min valid step for offset calculation when using scaled data added shape formatting functions added more debug output bugfixes TESTING added test overview log --- tensorflow/tools/README_TileCreator.txt | 2 +- tensorflow/tools/tc_test.py | 20 ++- tensorflow/tools/tilecreator_t.py | 166 ++++++++++++++++++------ 3 files changed, 142 insertions(+), 46 deletions(-) diff --git a/tensorflow/tools/README_TileCreator.txt b/tensorflow/tools/README_TileCreator.txt index e41d6f8..b0b19f5 100644 --- a/tensorflow/tools/README_TileCreator.txt +++ b/tensorflow/tools/README_TileCreator.txt @@ -9,7 +9,7 @@ arbitrary label alongside the structured data 0. Dependencies -numpy v1.14.0 (v1.15.4) +numpy v1.14.0 (v1.15.4 for scaled data) scipy v0.18.1 imageio v2.1.2 diff --git a/tensorflow/tools/tc_test.py b/tensorflow/tools/tc_test.py index e853e60..e966c30 100644 --- a/tensorflow/tools/tc_test.py +++ b/tensorflow/tools/tc_test.py @@ -19,7 +19,7 @@ import numpy as np import paramhelpers as ph - +test_name = ph.getParam( "title", '' ) out_path = ph.getParam( "basePath", '../test_out/' ) sim_path = ph.getParam( "simPath", '../data_sim/' ) randSeed = int(ph.getParam( "randSeed", 1 )) # seed for np and tf initialization @@ -59,20 +59,27 @@ #if not os.path.exists(out_path): # os.makedirs(out_path) -test_path,_ = ph.getNextTestPath(0, out_path) +test_path, test_no = ph.getNextTestPath(0, out_path) sys.stdout = ph.Logger(test_path) sys.stderr = ph.ErrorLogger(test_path) +def writeSummary(msg): + # write summary to test overview + with open(out_path + 'test_overview.log', "a") as text_file: + text_file.write(msg + '\n') + def testFailed(msg): print('\n{}:\n{}'.format(msg, traceback.format_exc())) if fail: print('') print('--- TEST FAILED AS INDICATED ---') print('') + writeSummary('SUCCESS: Test {} \"{}\" failed as indicated with error {}'.format(test_no, test_name, msg)) else: print('') print('--- TEST FAILED ---') print('') + writeSummary('FAILURE: Test {} \"{}\" failed with error {}'.format(test_no, test_name, msg)) exit() print('') @@ -114,7 +121,7 @@ def testFailed(msg): if mainIsLow: if not useScaledData: highfilename = None - upRes = 1 + #upRes = 1 simSize = simSizeLow tileSize = tileSizeLow else: @@ -125,7 +132,7 @@ def testFailed(msg): upRes = 1/upRes else: highfilename = None - upRes = 1 + #upRes = 1 simSize = simSizeHigh tileSize = tileSizeHigh #if useVel: @@ -250,6 +257,7 @@ def testFailed(msg): #tileShape = (batch[0].shape[0],tileSize,tileSize,batch[0].shape[-1]) tiles = batch[i] i+=1 + if len(tiles.shape) != len(tile_format): tile_format = 'NYXC' # squeeded active block data if blockSize=1 #print('tiles_x shape: {}'.format(tiles.shape)) if dim==3: tiles = (tiles[2:6,6:8] if useScaledData else tiles[2:6,24:32]) if saveImages: ic=tc.savePngs(tiles, test_path, tile_format=tile_format,imageCounter=imageCounter, tiles_in_image=[1,1], plot_vel_x_y=False, channels=[0], save_rgb = rgb_channels, rgb_interval=rgb_range) @@ -292,6 +300,10 @@ def testFailed(msg): print('') if fail: print('--- TEST FINISHED DESPITE INDICATED FAILURE---') + writeSummary('FAILURE: Test {} \"{}\" finished despite indicated failure'.format(test_no, test_name)) else: print('--- TEST FINISHED ---') + writeSummary('SUCCESS: Test {} \"{}\" finished'.format(test_no, test_name)) print('') + + diff --git a/tensorflow/tools/tilecreator_t.py b/tensorflow/tools/tilecreator_t.py index d580a58..44af426 100644 --- a/tensorflow/tools/tilecreator_t.py +++ b/tensorflow/tools/tilecreator_t.py @@ -52,6 +52,15 @@ AOPS_KEY_ROT90 = 'rot90' AOPS_KEY_FLIP = 'flip' +SCALEFACTOR_GREATEST_DENOMINATOR = 2**4 + +# main and scaled data shape formats +SHAPE_FORMATS = ['nzyxc', 'nzyx', 'zyxc', 'zyx'] +SHAPE_FORMAT_FULL = SHAPE_FORMATS[0] +SHAPE_FORMAT_FULL_NOCHANNEL = SHAPE_FORMATS[1] +SHAPE_FORMAT_SINGLE = SHAPE_FORMATS[2] +SHAPE_FORMAT_SINGLE_NOCHANNEL = SHAPE_FORMATS[3] + seed( 42 ) # default channel layouts @@ -166,12 +175,16 @@ def __init__(self, tileSize, simSize=64, dim=2, densityMinimum=0.02, channelLayo self.densityMinimum = densityMinimum self.useDataAug = False + self.sfD = SCALEFACTOR_GREATEST_DENOMINATOR + # SCALED DATA if useScaledData: # preliminary checks # valid scaleFactor if np.less(self.upres, np.ones(3)).any(): self.TCError('Scale factor ({}) must be at least one for every dimension.'.format(self.upres)) + if not self.isIntArray(self.upres * self.sfD): + self.TCError('Scale factor ({}) must be divisible by {}.'.format(self.upres, self.sfD)) # main sizes compatible if not self.isIntArray(self.simSizeLow*self.upres): self.TCError('Main sim size {} is not compatible with the scale factor {}.'.format(self.simSizeLow, self.upres)) @@ -490,10 +503,10 @@ def addData(self, main, scaled=None, labels=None, blocks=None): self.data[DATA_KEY_MAIN] = [x for (_, x) in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_MAIN]), key= lambda i: i[0])] if self.dataIsActive(DATA_KEY_SCALED): - self.data[DATA_KEY_SCALED] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_SCALED]))] + self.data[DATA_KEY_SCALED] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_SCALED]), key= lambda i: i[0])] if self.dataIsActive(DATA_KEY_LABEL): - self.data[DATA_KEY_LABEL] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_LABEL]))] + self.data[DATA_KEY_LABEL] = [x for _, x in sorted(zip(self.data[DATA_KEY_BLOCK], self.data[DATA_KEY_LABEL]), key= lambda i: i[0])] self.data[DATA_KEY_BLOCK] = sorted(self.data[DATA_KEY_BLOCK]) @@ -618,18 +631,20 @@ def createTiles(self, data, tileShape, strides=-1): def cutTile(self, data, tileShape, offset=[0,0,0]): ''' cut a tile of with shape and offset - data shape: (block,z,y,x,c), tileShape: (z,y,x,c) + data shape: (block,z,y,x,c), tileShape: (z,y,x[,c]) ''' # TODO support 4th T dim offset = np.asarray(offset) tileShape = np.asarray(tileShape) - tileShape[-1] = data.shape[-1] + #tileShape[-1] = data.shape[-1] if np.less(data.shape[1:4], tileShape[:3]+offset[:3]).any(): self.TCError('Can\'t cut tile with shape {} and offset{} from data with shape {}.'.format(tileShape, offset, data.shape[1:])) + self.TCDebug('Cut tile with shape {} and offset{} from data with shape {}.'.format(tileShape, offset, data.shape[1:])) + tile = data[:, offset[0]:offset[0]+tileShape[0], offset[1]:offset[1]+tileShape[1], offset[2]:offset[2]+tileShape[2], :] - if not np.array_equal(tile.shape[1:],tileShape): + if not np.array_equal(tile.shape[1:4],tileShape[:3]): self.TCError('Wrong tile shape after cutting. is: {}. goal: {}.'.format(tile.shape,tileShape)) return tile @@ -703,7 +718,7 @@ def selectRandomTiles(self, selectionSize, isTraining=True, augment=False, block batch[DATA_KEY_SCALED] = np.squeeze(batch[DATA_KEY_SCALED], axis=1) # for labels too? elif squeezeBlocks: - self.TCWarning('can\'t squeeze block dimension with size {}.'.format(blockSize)) + self.TCInfo('can\'t squeeze block dimension with size {}.'.format(blockSize)) #self.TCInfo('batch shape: {}'.format(batch[DATA_KEY_MAIN].shape)) @@ -724,10 +739,9 @@ def generateTile(self, isTraining=True, blockSize = 1): ''' generates random tiles (data augmentation) ''' - # get a frame, is a copy to avoid transormations affecting the raw dataset - #data = {} - #: main, [scaled, block, label] - #data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomDatum(isTraining, blockSize) + + self.TCDebug('Start generate tile.') + data = self.getRandomDatumDict(isTraining, blockSize) @@ -746,27 +760,27 @@ def generateTile(self, isTraining=True, blockSize = 1): if self.dim==2: tileShapeLow[0] = 1 preCutTileSize[0] = 1 - self.TCDebug('Pre-cut size old {}, new {}.'.format(tileShapeLow, preCutTileSize)) - if not self.isValidMainTileShapeChannels(tileShapeLow): + self.TCDebug('Pre-cut size {} (old would be {}).'.format(preCutTileSize, tileShapeLow)) + if not self.isValidMainTileShapeChannels(preCutTileSize): self.TCWarning('Augmentation pre-cutting results in larger than frame tile. Trying with frame size...') - tileShapeLow = self.frame_shape_low - data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], tileShapeLow.astype(int)) + preCutTileSize = self.frame_shape_low + data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], preCutTileSize.astype(int)) #random scaling, changes resolution if self.do_scaling: data = self.scale(data, scaleFactor) - bounds = np.zeros(4) + bounds = np.zeros(3) #rotate if self.do_rotation: - bounds = np.array(self.getTileShape(data[DATA_KEY_MAIN]))*0.16 #bounds applied on all sides, 1.5*(1-2*0.16)~1 + bounds = np.array(self.getTileShape(data[DATA_KEY_MAIN], SHAPE_FORMAT_SINGLE_NOCHANNEL))*0.16 #bounds applied on all sides, 1.5*(1-2*0.16)~1 data = self.rotate(data) #get a tile # make sure bounds is compatible with upRes - self.TCDebug('Bounds {}.'.format(bounds)) + self.TCDebug('Bounds raw {}.'.format(bounds)) bounds = self.makeValidMainShape(bounds, ceil=True) self.TCDebug('Bounds {}.'.format(bounds)) data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], bounds=bounds) #includes "shifting" @@ -791,6 +805,9 @@ def generateTile(self, isTraining=True, blockSize = 1): target_shape_high = np.copy(self.tile_shape_high) if not np.array_equal(self.getTileShape(data[DATA_KEY_SCALED]),target_shape_high): self.TCErrorInternal('Wrong SCALED tile shape after data augmentation. is: {}. goal: {}.'.format(data[DATA_KEY_SCALED].shape, target_shape_high)) + + + self.TCDebug('End generate tile. Tile shape {}.\n'.format(self.getTileShape(data[DATA_KEY_MAIN]))) return data @@ -808,7 +825,7 @@ def getRandomDatum(self, isTraining=True, blockSize = 1): else: blockSet = self.data[DATA_KEY_BLOCK_OFFSET][self.setBorders[0]:self.setBorders[1]] availableBlocks = np.resize(np.where(blockSet[:,2] >= blockSize), (-1)) - #self.TCInfo('available blocks {}'.format(availableBlocks)) + self.TCDebug('{} blocks available with block size {}.'.format(len(availableBlocks), blockSize)) randBlock = np.random.choice(availableBlocks) #len(blockSet), p=availableBlocks/np.sum(availableBlocks)) # p=availableBlocks/np.sum(availableBlocks) ? randBlock = blockSet[randBlock] randOffset = randrange(0, randBlock[2] - (blockSize-1)) @@ -817,7 +834,7 @@ def getRandomDatum(self, isTraining=True, blockSize = 1): pass else: if blockSize!=1: - self.TCWarning('Block size is ignored if block data is inactive.') + self.TCInfo('Block size is ignored if block data is inactive.') blockSize = 1 if isTraining: randNo = randrange(0, self.setBorders[0]) @@ -846,7 +863,7 @@ def getDatum(self, index, blockSize = 1): begin_ch_y = (index % self.dim_t) * self.tile_shape_high[-1] end_c_h_y = begin_ch_y + tile_t * self.tile_shape_high[-1] ''' - + self.TCDebug('Fetching datum {} with block size {}'.format(index, blockSize)) ret = [np.copy(self.data[DATA_KEY_MAIN][index:index+blockSize])] if self.dataIsActive(DATA_KEY_SCALED): #return np.copy(self.data[DATA_KEY_MAIN][index//self.dim_t][:,:,:,begin_ch:end_ch]), np.copy(self.data[DATA_KEY_SCALED][index//self.dim_t][:,:,:,begin_ch_y:end_c_h_y]) @@ -864,26 +881,31 @@ def getDatum(self, index, blockSize = 1): return ret - def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #bounds to avoid mirrored parts + def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0]): #bounds to avoid mirrored parts ''' cut a random tile (low and high) from a given frame, considers densityMinimum - input array shape is (block, z,y,x,c), tile shape is (z,y,x,c) - bounds: ignore edges of frames, used to discard mirrored parts after rotation + input array shape is (block, z,y,x,c), tile shape is (z,y,x[,c]) + bounds: ignore edges of frames, used to discard mirrored parts after rotation, shape (z,y,x) ''' + bounds = np.array(bounds) if tileShapeLow is None: - tileShapeLow = np.copy(self.tile_shape_low) # use copy is very important!!! - tileShapeHigh = self.makeHighShape(tileShapeLow, format='zyxc') - if not self.isValidMainTileShapeChannels(tileShapeLow): - self.TCErrorInternal('Invalid tile shape') + tileShapeLow = np.copy(self.tile_shape_low[:3]) # use copy is very important!!! + else: + tileShapeLow = np.asarray(tileShapeLow[:3]) + tileShapeHigh = self.makeHighShape(tileShapeLow, format='zyx') + if not self.isValidMainTileShape(tileShapeLow): + self.TCErrorInternal('Invalid main tile shape {}.'.format(tileShapeLow)) if not self.isFrameSequence(low): #len(low.shape)!=5 or len(tileShapeLow)!=4: self.TCErrorInternal('MAIN data is no sequence') if (not high is None) and not self.isFrameSequence(high): self.TCErrorInternal('SCALED data is no sequence') + if len(bounds)!=3: + self.TCErrorInternal('BOUNDS must be length 3') - frameShapeLow = np.asarray(low.shape[1:]) + frameShapeLow = np.asarray(low.shape[1:4]) start = np.ceil(bounds) - end = frameShapeLow - tileShapeLow + np.ones(4) - start + end = frameShapeLow - tileShapeLow + np.ones(3) - start offset_up = self.upres #np.array([self.upres, self.upres, self.upres]) @@ -898,12 +920,15 @@ def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0,0]): #b self.TCErrorInternal('Can\'t cut tile {} from frame {} with bounds {}.'.format(tileShapeLow, frameShapeLow, start)) # cut tile + self.TCDebug('Cut random tile {} from frame {} with bounds {}.'.format(tileShapeLow, frameShapeLow, start)) hasMinDensity = False i = 1 while (not hasMinDensity): offset = np.asarray([randrange(start[0], end[0]), randrange(start[1], end[1]), randrange(start[2], end[2])]) # TODO test; this uses global shapes, not the local ones + self.TCDebug('Try {}, raw offset {}.'.format(i, offset)) offset = self.makeValidMainShape(offset, ceil=False) + self.TCDebug('offset {}.'.format(offset)) lowTile = self.cutTile(low, tileShapeLow, offset) offset = (offset * offset_up).astype(int) if high is not None: @@ -965,6 +990,8 @@ def rotate(self, data): [ q[1, 3]-q[2, 0], q[2, 3]+q[1, 0], 1-q[1, 1]-q[2, 2], 0], [ 0, 0, 0, 1]]) + self.TCDebug('Rotate by {}.'.format(q if self.dim==3 else theta)) + data = self.special_aug(data, AOPS_KEY_ROTATE, rotation_matrix) for data_key in data: @@ -994,6 +1021,8 @@ def rotateVelocities(self, datum, c_list, rotationMatrix): rotate vel vectors (channel 1-3) ''' + self.TCDebug('Rotate vectors in channels {}.'.format(c_list)) + rotation3 = rotationMatrix[:3, :3] rotation2 = rotationMatrix[1:3, 1:3] channels = np.split(datum, datum.shape[-1], -1) @@ -1024,6 +1053,8 @@ def rotate90(self, data, axes): if len(axes)!=2: self.TCError('need 2 axes for rotate90.') + self.TCDebug('Rotate 90 degree from axis {} to axis {}.'.format(axes[0], axes[1])) + for data_key in data: if self.dataCanAugment(data_key): data[data_key] = np.rot90(data[data_key], axes=axes) @@ -1035,6 +1066,7 @@ def rotate90(self, data, axes): def rotate90Velocities(self, datum, c_list, axes): if len(axes)!=2: self.TCError('need 2 axes for rotate90.') + self.TCDebug('Rotate vectors 90 degree in channels {}.'.format(c_list)) channels = np.split(datum, datum.shape[-1], -1) for v in c_list: #axes z,y,x -> vel x,y,z: 0,1,2 -> 2,1,0 @@ -1050,6 +1082,8 @@ def flip(self, data, axes, isFrame=True): #axes=list, flip multiple at once ''' # axis: 0,1,2 -> z,y,x axes = np.asarray(axes, dtype=np.int32) + self.TCDebug('Flip axes {}.'.format(axes)) + if self.isFrameSequence(data[DATA_KEY_MAIN]): # not isFrame: #only for dims, not vectors axes_m = axes + np.ones(axes.shape, dtype=np.int32) @@ -1070,6 +1104,7 @@ def flipVelocities(self, datum, c_list, axes): low: data with velocity to flip (4 channels: d,vx,vy,vz) axes: list of axis indices 0,1,2-> z,y,x ''' + self.TCDebug('Flip vectors in channels {}.'.format(c_list)) # !axis order: data z,y,x channels = np.split(datum, datum.shape[-1], -1) @@ -1094,6 +1129,8 @@ def scale(self, data, factor): scale = [factor, factor, factor, 1] #single frame if self.dim==2: scale[0] = 1 + + self.TCDebug('Scale with factor {}.'.format(scale)) if len(data[DATA_KEY_MAIN].shape)==5: #frame sequence scale = np.append([1],scale) @@ -1116,6 +1153,7 @@ def scale(self, data, factor): def scaleVelocities(self, datum, c_list, factor): #scale vel? vel*=factor + self.TCDebug('Scale vectors in channels {}.'.format(c_list)) channels = np.split(datum, datum.shape[-1], -1) for v in c_list: # x,y,[z]; 2,1,0 channels[v[0]] *= factor @@ -1220,6 +1258,8 @@ def isValidMainTileShape(self, shape): if not self.isIntArray(shape): return False if len(shape)!=(DATA_DIM_LENGTH_SINGLE-1): return False # check compatible to scale factor + if self.dataIsActive(DATA_KEY_SCALED): + if not self.isIntArray(shape[:3]*self.upres): return False # smaller than frame if np.less(self.simSizeLow, shape).any(): return False return True @@ -1231,6 +1271,8 @@ def isValidMainTileShapeChannels(self, shape): if not self.isIntArray(shape): return False if len(shape)!=DATA_DIM_LENGTH_SINGLE: return False # check compatible to scale factor + if self.dataIsActive(DATA_KEY_SCALED): + if not self.isIntArray(shape[:3]*self.upres): return False # smaller than frame if np.less(self.simSizeLow, shape[:-1]).any(): return False return True @@ -1238,38 +1280,49 @@ def isValidMainTileShapeChannels(self, shape): def isScaleCompatibleMainTileShape(self, shape): if not self.isValidMainTileShape(shape): return False # check compatible to scale factor - if not self.isIntArray(self.simSizeLow*self.upres): return False + if not self.isIntArray(shape*self.upres): return False return True - def getTileShape(self, data): + def getTileShape(self, data, format=SHAPE_FORMAT_SINGLE): if not type(data) is np.ndarray: self.TCError('Can\'t get tile shape from not np-array data') - if self.isFrameSequence(data): return data.shape[1:] - elif self.isFrameSingle(data): return data.shape[:] + if self.isFrameSequence(data): shape = data.shape[1:] + elif self.isFrameSingle(data): shape = data.shape[:] else: self.TCError('Can\'t get tile shape from data with shape {}'.format(data.shape)) + return self.formatShape(shape, SHAPE_FORMAT_SINGLE, format) + def getMinValidMainStep(self): - # lcm of 1 and self.upres #TODO test + ''' + minimum valid step size for scaleable shapes + return shape format 'zyx' SHAPE_FORMAT_SINGLE_NOCHANNEL, same as self.upres + ''' if self.dataIsActive(DATA_KEY_SCALED): - # TODO lcm does not work with float - return (np.lcm(np.ones_like(self.upres), self.upres)/self.upres).astype(int) + minStep = self.sfD / np.gcd(self.sfD, (self.sfD * self.upres).astype(int)) + if not self.isIntArray(minStep): + self.TCErrorInternal('Calculated minimum step {} is invalid'.format(minStep)) + #self.TCDebug('Minimum valid main step {}'.format(minStep)) + return minStep.astype(int) else: - return 1.0 + return np.ones(3) - def makeValidMainShape(self, shape, ceil=True): - if self.isValidMainTileShape(shape): return np.array(shape) # nothing to do + def makeValidMainShape(self, shape, format=SHAPE_FORMAT_SINGLE_NOCHANNEL, ceil=True): + if self.isValidMainTileShape(shape): return np.copy(shape) # nothing to do # per component: find next larger valid value minStep = self.getMinValidMainStep() + minStep = self.formatShape(minStep, SHAPE_FORMAT_SINGLE_NOCHANNEL, format) + if ceil: return (minStep * np.ceil(shape / minStep)).astype(int) else: return (minStep * np.floor(shape / minStep)).astype(int) - def makeHighShape(self, shape, format='zyx'): + def makeHighShape(self, shape, format=SHAPE_FORMAT_SINGLE_NOCHANNEL): format = format.lower() if len(shape) != len(format): self.TCErrorInternal('Shape {} does not match shape format \'{}\'.'.format(shape, format)) - upRes = np.copy(self.upres) + #upRes = np.copy(self.upres) + ''' if format == 'zyx': pass elif format == 'bzyx': @@ -1280,9 +1333,40 @@ def makeHighShape(self, shape, format='zyx'): upRes = np.array((1,upRes[0],upRes[1],upRes[2],1)) else: self.TCErrorInternal('Unkown shape format \'{}\'.'.format(format)) + ''' + upRes = self.formatShape(self.upres, SHAPE_FORMAT_SINGLE_NOCHANNEL, format) return (shape*upRes).astype(int) + def formatShape(self, srcShape, srcFormat, dstFormat, fill=1): + ''' + transforms one data shape format into another. + dimensions that are not present in the destination format are discarded + dimensions that are not present in the source format are filled with + ''' + if not self.isShapeFormat(srcShape, srcFormat): + self.TCErrorInternal('Shape {} with format {} is invalid.'.format(srcShape, srcFormat)) + if not self.isValidShapeFormat(dstFormat): + self.TCErrorInternal('Shape format {} is invalid.'.format(dstFormat)) + + if srcFormat == dstFormat: return np.copy(srcShape) + + dstShape = [fill] * len(dstFormat) + for dim in dstFormat: + if dim in srcFormat: + dstShape[dstFormat.index(dim)] = srcShape[srcFormat.index(dim)] + + self.TCDebug('Format shape {} with format {} to shape {} with format {}.'.format(srcShape, srcFormat, dstShape, dstFormat)) + return np.asarray(dstShape) + + def isValidShapeFormat(self, format): + return format.lower() in SHAPE_FORMATS + + def isShapeFormat(self, shape, format): + if not self.isValidShapeFormat(format): return False + if len(shape) != len(format): return False + return True + def getFrameTiles(self, index): ''' returns the frame as tiles''' low, high = self.getDatum(index) From 81486821c24cd0ffc49585418d7d6576478f681c Mon Sep 17 00:00:00 2001 From: Erik Franz Date: Sun, 2 Dec 2018 18:58:10 +0100 Subject: [PATCH 8/8] TC added tile size checks for data augmentation updated readme --- tensorflow/tools/README_TileCreator.txt | 35 ++++++++++++++++++++----- tensorflow/tools/tilecreator_t.py | 16 +++++++---- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/tensorflow/tools/README_TileCreator.txt b/tensorflow/tools/README_TileCreator.txt index b0b19f5..4255920 100644 --- a/tensorflow/tools/README_TileCreator.txt +++ b/tensorflow/tools/README_TileCreator.txt @@ -1,14 +1,15 @@ TileCreator -create tiles from 2D or 3D simulation data (or other structured data) with different channels. -data augmentation: scaling, shifting and rotation. scaling and rotation is also applied to vector data +Tool to create tiles from 2D or 3D simulation data (or other grid data) with different channels. +Includes data augmentation: scaling, shifting and rotation. scaling and rotation is also applied to vector data in channels. -optional second set of sturctured data with dimensionality and size matching the main data +optional second set of sturctured data with matching dimensionality and size compatible to the main data arbitrary label alongside the structured data 0. Dependencies +python v3.6 numpy v1.14.0 (v1.15.4 for scaled data) scipy v0.18.1 imageio v2.1.2 @@ -27,25 +28,29 @@ channelLayout_main: what type of data the different channels contain as a comma useScaledData: an optional second structured dataset with a fixed scaling factor to the main data. will be augmented to fit the augmentation of the main data (e.g. same rotation) channelLayout_scaled: same as channelLayout_main for active scaled data -scaleFactor: the scaling factor between main and scaled data, can be 1 +scaleFactor: the scaling factor between main and scaled data. must be larger or equal to 1. must be a multiple of 1/16 to avoid numerical errors. simSize*scaleFactor and tileSize*scaleFactor must be whole numbers. useDataBlocks: an optional grouping of data using block ids. the ids have to be provided when adding data to the tilecrator. enables the creation of blocks of tiles with matching augmentation (i.e. like an additional dimension that is not agmented). can be used to create augmented (time-) sequences of data useLabels: an optional set of data that is not augmented. no type or structure is assumed partTrain, partTest (, partVal): the relative sizes of data sets for train and testing mode (machine learing). val data is currently unused and inaccessible and should be left at 0. -logLevel: how much information to print. +logLevel: how much information to print. tc.LOG_LEVEL_ERROR, tc.LOG_LEVEL_WARNING, tc.LOG_LEVEL_INFO, tc.LOG_LEVEL_DEBUG. 1.1 Setup Data Augmentation TC.initDataAugmentation(rot=2, minScale=0.85, maxScale=1.15 ,flip=True) +Enables data augmentation for main (and scaled) data with the specified parameters. Set augment=True when calling TC.selectRandomTiles() to create augmented data. rot: type of rotation augmentation (NOT an angle limit). 1: fixed 90° roatations, 2: full rotation (assumes enough space for boundaries in data), else: no rotation) minScale, maxScale: limits for scaling. set both to 1 to disable scaling. flip: flipping (mirroring) of data +tile size is checked to be compatible with the specified rotation and scaling parameters. + 1.2 Using block data from fluiddataloader The fluiddataloader merges blocks into channels. Use tc.blockFromChannelsToSequence(data, blockLength) to convert the data to the required format. 2. Adding data TC.addData(main, scaled=None, labels=None, blocks=None) +Adds data to the TileCreator. The data is split into training,validation and testing set afterwards. Data can be cleared with TC.clearData() main: the main data, must match simSize and channels specified in the constructor scaled: required when useScaledData==True, ignored otherwise. must match simSize*scaleFactor and channels specified in the constructor. must be the same amount as the main data. @@ -65,4 +70,22 @@ squeezeBlocks: whether to squeeze/collapse the block dimension/axis of main (and returns: main[,scaled][,labels]: main and scaled are np.ndarray with shape: n[,b][,z],y,x,c with z,y,x mathching the (scaled) tile size and c channels. labels is a list -4. Image output \ No newline at end of file +4. Image output +tc.savePngs(tiles, path, tile_format='NYXC', imageCounter=0, tiles_in_image=[1,1], channels=[0], save_gif=False, plot_vel_x_y=False, save_rgb=None, rgb_interval=[-1,1]) + +tiles: tile data to save as images (.png). +Path: directory to write images to. +tile_format: data format of the imput tile. valid formats are 'yx','yxc','nyx','nyxc','byxc', 'nbyxc', not case sensitive. +imageCounter: as this method can output multiple images for sequences of tiles, the images are numbered starting with this number. +tiles_in_image: number of tiles along x and y dimension to combine to a single image. +channels: channels to save as grayscale image. +save_gif: +plot_vel_x_y: save 2nd and 3rd channel as vector quiver plot. +save_rgb: list of lists of length 2 or 3. save these channels as RGB image. +rgb_interval: data values are mapped to [0,1] from this interval when writing RGB images. + +returns: next image counter (imageCounter + number of images written) + + +5. known issues +very small tiles might cause errors when cutting tiles during augmentation due to rounding. \ No newline at end of file diff --git a/tensorflow/tools/tilecreator_t.py b/tensorflow/tools/tilecreator_t.py index 44af426..1142b0d 100644 --- a/tensorflow/tools/tilecreator_t.py +++ b/tensorflow/tools/tilecreator_t.py @@ -366,7 +366,7 @@ def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): msg = 'setup data augmentation: ' if rot==2: - #TODO check tile sizes for bounds + #check tile sizes for bounds self.rotScaleFactor = 1.6 if np.greater(self.tileFactor, np.ones_like(self.tileFactor)/self.rotScaleFactor).any(): self.TCError('Tiles are too large for random rotation (rot=1) augmentation. Use smaller tiles or rot=2.') @@ -394,12 +394,18 @@ def initDataAugmentation(self, rot=2, minScale=0.85, maxScale=1.15 ,flip=True): if (self.scaleFactor[0]==1 and self.scaleFactor[1]==1): self.do_scaling = False else: - #TODO check tile sizes for bounds + #check tile sizes for bounds if np.greater(self.tileFactor, np.ones_like(self.tileFactor)*self.scaleFactor[0]).any(): self.TCError('Tiles are too large for minimum scaling augmentation {}. Use smaller tiles or a larger minimum.'.format(self.scaleFactor[0])) self.do_scaling = True msg += 'scaling, ' + # check tile size for rot AND min scale + if self.do_rotation and self.do_scaling: + if np.greater(self.tileFactor, np.ones_like(self.tileFactor)*self.scaleFactor[0]/self.rotScaleFactor).any(): + self.TCError('Tiles are too large for combined rotation - minimum scaling ({}) augmentation . Use smaller tiles or a larger minimum.'.format(self.scaleFactor[0])) + + self.do_flip = flip if self.do_flip: msg += 'flip' @@ -764,7 +770,7 @@ def generateTile(self, isTraining=True, blockSize = 1): if not self.isValidMainTileShapeChannels(preCutTileSize): self.TCWarning('Augmentation pre-cutting results in larger than frame tile. Trying with frame size...') preCutTileSize = self.frame_shape_low - data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], preCutTileSize.astype(int)) + else: data[DATA_KEY_MAIN], data[DATA_KEY_SCALED] = self.getRandomTile(data[DATA_KEY_MAIN], data[DATA_KEY_SCALED], preCutTileSize.astype(int)) #random scaling, changes resolution @@ -916,7 +922,7 @@ def getRandomTile(self, low, high=None, tileShapeLow=None, bounds=[0,0,0]): #bou tileShapeHigh[0] = 1 # check if possible to cut tile - if np.amin((end-start)[:3]) < 0: + if np.amin((end-start)[:3]) < 1: self.TCErrorInternal('Can\'t cut tile {} from frame {} with bounds {}.'.format(tileShapeLow, frameShapeLow, start)) # cut tile @@ -1134,7 +1140,7 @@ def scale(self, data, factor): if len(data[DATA_KEY_MAIN].shape)==5: #frame sequence scale = np.append([1],scale) - # to ensure high/low ration stays the same + # to ensure high/low ratio stays the same scale = np.round(np.array(data[DATA_KEY_MAIN].shape) * scale )/np.array(data[DATA_KEY_MAIN].shape) #apply transform