+
Skip to content

Generator #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 12, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
FROM python:3.6

COPY *.py /app/
COPY requirements.txt app/
COPY data/binaries/ /app/data/binaries/
COPY data/models/textBranchNorm.h5 /app/data/models/

WORKDIR /app
RUN pip install -r requirements.txt


EXPOSE 5000
CMD ["python", "./webservice.py"]


###Some comands I used for building this docker container

##Build docker container from 'Dockerfile'
#docker build -t geoloc .

##Execute docker container
#docker run -d -p 5000:5000 --network host geoloc

##Contact docker webservice
#http://127.0.0.1:5000/predictText?text=Montmartre%20is%20truly%20beautiful

##Export docker container
#docker save geoloc > geolocV2.tar

########################### Other commands ###########################
##list images
#docker images -a

##ls for docker
#docker container ls

#Interactive execution of docker container
#docker exec -i -t 3411bb89b103 /bin/bash






32 changes: 18 additions & 14 deletions TrainIndividualModels.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
file = open(binaryPath +"data.obj",'rb')
trainDescription, trainLocation, trainDomain, trainTld, trainSource, trainTexts, trainUserName, trainTZ, trainUtc, trainUserLang, trainCreatedAt= pickle.load(file)

#Shuffle train-data
trainDescription, trainLocation, trainDomain, trainTld, trainSource, trainTexts, trainUserName, trainTZ, trainUtc, trainUserLang, trainCreatedAt, classes = shuffle(trainDescription, trainLocation, trainDomain, trainTld, trainSource, trainTexts, trainUserName, trainTZ, trainUtc, trainUserLang, trainCreatedAt, classes, random_state=1202)

##################Train
# create the model
batch_size = 256
Expand All @@ -41,6 +44,7 @@
textEmbeddings = 100
nameEmbeddings = 100
tzEmbeddings = 50
validation_split = 0.01 #91279 samples for validation

#callbacks = [
# EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=6, verbose=1, restore_best_weights=True),
Expand Down Expand Up @@ -70,7 +74,7 @@
start = time.time()
descriptionHistory = descriptionModel.fit(trainDescription, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("descriptionBranch finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
descriptionModel.save(modelPath +'descriptionBranchNorm.h5')
Expand All @@ -96,7 +100,7 @@
start = time.time()
sourceHistory = domainModel.fit(trainDomain, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("tldBranch finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
domainModel.save(modelPath + 'domainBranch.h5')
Expand All @@ -121,7 +125,7 @@
start = time.time()
sourceHistory = tldBranchModel.fit(trainTld, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("tldBranch finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
tldBranchModel.save(modelPath + 'tldBranch.h5')
Expand All @@ -139,7 +143,7 @@
start = time.time()
sourceHistory = linkModel.fit(np.concatenate((trainDomain, trainTld), axis=1), classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("linkModel finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
linkModel.save(modelPath + 'linkModel.h5')
Expand All @@ -166,7 +170,7 @@
start = time.time()
locationHistory = locationModel.fit(trainLocation, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("locationHistory finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
locationModel.save(modelPath +'locationBranchNorm.h5')
Expand All @@ -191,7 +195,7 @@
start = time.time()
sourceHistory = sourceModel.fit(trainSource, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("sourceBranch finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
sourceModel.save(modelPath +'sourceBranch.h5')
Expand Down Expand Up @@ -219,7 +223,7 @@
start = time.time()
textHistory = textModel.fit(trainTexts, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("textBranch finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
textModel.save(modelPath +'textBranchNorm.h5')
Expand Down Expand Up @@ -247,7 +251,7 @@
start = time.time()
nameHistory = nameModel.fit(trainUserName, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("nameBranch finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
nameModel.save(modelPath +'nameBranchNorm.h5')
Expand All @@ -274,7 +278,7 @@
start = time.time()
tzHistory = tzBranchModel.fit(trainTZ, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("tzBranch finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
tzBranchModel.save(modelPath +'tzBranchNorm.h5')
Expand All @@ -300,7 +304,7 @@
start = time.time()
utcHistory = utcBranchModel.fit(trainUtc, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("utcBranch finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
utcBranchModel.save(modelPath +'utcBranch.h5')
Expand All @@ -323,7 +327,7 @@
start = time.time()
userLangHistory = userLangModel.fit(trainUserLang, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("userLangBranch finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
userLangModel.save(modelPath +'userLangBranch.h5')
Expand All @@ -339,7 +343,7 @@

timeHistory = tweetTimeModel.fit(trainCreatedAt, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("tweetTimeModel finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
tweetTimeModel.save(modelPath + 'tweetTimeBranch.h5')
Expand All @@ -364,7 +368,7 @@

timeHistory = tweetTimeModel.fit(trainCreatedAt, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("tweetTimeModel finished after " +str(datetime.timedelta(seconds=round(time.time() - start))))
tweetTimeModel.save(modelPath + 'tweetTimeBranch.h5')
Expand All @@ -387,7 +391,7 @@

categorialModelHistory = categorialModel.fit(trainData, classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=verbosity
verbose=verbosity, validation_split=validation_split
)
print("categorialModel finished after " +str(datetime.timedelta(time.time() - start)))
categorialModel.save(modelPath + 'categorialModel.h5')
10 changes: 8 additions & 2 deletions TrainMergedModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import time

import os
from sklearn.utils import shuffle
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""

Expand Down Expand Up @@ -43,9 +44,14 @@
file = open(binaryPath +"data.obj",'rb')
trainDescription, trainLocation, trainDomain, trainTld, trainSource, trainTexts, trainUserName, trainTZ, trainUtc, trainUserLang, trainCreatedAt= pickle.load(file)

#Shuffle train-data
trainDescription, trainLocation, trainDomain, trainTld, trainSource, trainTexts, trainUserName, trainTZ, trainUtc, trainUserLang, trainCreatedAt, classes = shuffle(trainDescription, trainLocation, trainDomain, trainTld, trainSource, trainTexts, trainUserName, trainTZ, trainUtc, trainUserLang, trainCreatedAt, classes, random_state=1202)


# create the model
batch_size = 256
nb_epoch = 3
validation_split = 0.01 #91279 samples for validation


##Convert data into one hot encodings
Expand Down Expand Up @@ -148,7 +154,7 @@
finalHistory = final_model.fit([trainDescription, trainDomain, trainTld, trainLocation, trainSource, trainTexts, trainUserName, trainTZ, trainUtc, trainUserLang, trainCreatedAt],
classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=2
verbose=2, validation_split=validation_split
)
end = time.time()
print("final_model finished after " +str(datetime.timedelta(seconds=time.time() - start)))
Expand All @@ -168,7 +174,7 @@
finalHistory = final_model.fit([trainDescription, trainDomain, trainTld, trainLocation, trainSource, trainTexts, trainUserName, trainTZ, trainUtc, trainUserLang, trainCreatedAt],
classes,
epochs=nb_epoch, batch_size=batch_size,
verbose=2
verbose=2, validation_split=validation_split
)
end = time.time()
print("final_model finished after " +str(datetime.timedelta(seconds=time.time() - start)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
binaryPath= 'data/binaries/' #Place where the serialized training data is
modelPath= 'data/models/' #Place to store the models
unknownClass = "unknownLocation" #place holder for unknown classes
trainFile="/home/philippe/PycharmProjects/geolocation/train.json.gz"
testFile="/home/philippe/PycharmProjects/geolocation/test.json.gz"
trainFile="/home/philippe/Desktop/train.json.gz"
testFile="/home/philippe/Desktop/test.json.gz"



Expand Down
File renamed without changes.
10 changes: 6 additions & 4 deletions webservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def predictText():
predict = textBranch.predict(textSequences)

# Print the topN
result = []
hits = []
for index in reversed(predict.argsort()[0][-maxCities:]):
print("%s with score=%.3f" % (colnames[index], float(predict[0][index])))
my_dict = {
Expand All @@ -71,9 +71,11 @@ def predictText():
'lat': placeMedian[colnames[index]][0],
'lon': placeMedian[colnames[index]][1]
}
result.append(json.dumps(my_dict, indent=4))
print(result)
return Response(json.dumps(result, indent=4), mimetype='application/json')
hits.append(my_dict)
x= {"query":text,
"results":hits}
print(hits)
return Response(json.dumps(x, indent=4), mimetype='application/json')

#Has some issues with json escape character //
"""
Expand Down
点击 这是indexloc提供的php浏览器服务,不要输入任何密码和下载